diff options
Diffstat (limited to 'arch')
167 files changed, 4803 insertions, 8491 deletions
diff --git a/arch/alpha/boot/misc.c b/arch/alpha/boot/misc.c index 1d65adf5691..c00646b25f6 100644 --- a/arch/alpha/boot/misc.c +++ b/arch/alpha/boot/misc.c | |||
@@ -98,7 +98,7 @@ extern int end; | |||
98 | static ulg free_mem_ptr; | 98 | static ulg free_mem_ptr; |
99 | static ulg free_mem_ptr_end; | 99 | static ulg free_mem_ptr_end; |
100 | 100 | ||
101 | #define HEAP_SIZE 0x2000 | 101 | #define HEAP_SIZE 0x3000 |
102 | 102 | ||
103 | #include "../../../lib/inflate.c" | 103 | #include "../../../lib/inflate.c" |
104 | 104 | ||
diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S index 4cc44bd33d3..cf1e6fc6c68 100644 --- a/arch/alpha/kernel/vmlinux.lds.S +++ b/arch/alpha/kernel/vmlinux.lds.S | |||
@@ -69,7 +69,7 @@ SECTIONS | |||
69 | . = ALIGN(8); | 69 | . = ALIGN(8); |
70 | SECURITY_INIT | 70 | SECURITY_INIT |
71 | 71 | ||
72 | . = ALIGN(64); | 72 | . = ALIGN(8192); |
73 | __per_cpu_start = .; | 73 | __per_cpu_start = .; |
74 | .data.percpu : { *(.data.percpu) } | 74 | .data.percpu : { *(.data.percpu) } |
75 | __per_cpu_end = .; | 75 | __per_cpu_end = .; |
diff --git a/arch/arm/boot/compressed/misc.c b/arch/arm/boot/compressed/misc.c index 283891c736c..9b444022cb9 100644 --- a/arch/arm/boot/compressed/misc.c +++ b/arch/arm/boot/compressed/misc.c | |||
@@ -239,7 +239,7 @@ extern int end; | |||
239 | static ulg free_mem_ptr; | 239 | static ulg free_mem_ptr; |
240 | static ulg free_mem_ptr_end; | 240 | static ulg free_mem_ptr_end; |
241 | 241 | ||
242 | #define HEAP_SIZE 0x2000 | 242 | #define HEAP_SIZE 0x3000 |
243 | 243 | ||
244 | #include "../../../../lib/inflate.c" | 244 | #include "../../../../lib/inflate.c" |
245 | 245 | ||
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index ddbdad48f5b..d1a6a597ed9 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S | |||
@@ -59,7 +59,7 @@ SECTIONS | |||
59 | usr/built-in.o(.init.ramfs) | 59 | usr/built-in.o(.init.ramfs) |
60 | __initramfs_end = .; | 60 | __initramfs_end = .; |
61 | #endif | 61 | #endif |
62 | . = ALIGN(64); | 62 | . = ALIGN(4096); |
63 | __per_cpu_start = .; | 63 | __per_cpu_start = .; |
64 | *(.data.percpu) | 64 | *(.data.percpu) |
65 | __per_cpu_end = .; | 65 | __per_cpu_end = .; |
diff --git a/arch/arm26/boot/compressed/misc.c b/arch/arm26/boot/compressed/misc.c index f17f50e5516..0714d19c577 100644 --- a/arch/arm26/boot/compressed/misc.c +++ b/arch/arm26/boot/compressed/misc.c | |||
@@ -182,7 +182,7 @@ extern int end; | |||
182 | static ulg free_mem_ptr; | 182 | static ulg free_mem_ptr; |
183 | static ulg free_mem_ptr_end; | 183 | static ulg free_mem_ptr_end; |
184 | 184 | ||
185 | #define HEAP_SIZE 0x2000 | 185 | #define HEAP_SIZE 0x3000 |
186 | 186 | ||
187 | #include "../../../../lib/inflate.c" | 187 | #include "../../../../lib/inflate.c" |
188 | 188 | ||
diff --git a/arch/cris/arch-v32/vmlinux.lds.S b/arch/cris/arch-v32/vmlinux.lds.S index e124fcd766d..dfa25e1542b 100644 --- a/arch/cris/arch-v32/vmlinux.lds.S +++ b/arch/cris/arch-v32/vmlinux.lds.S | |||
@@ -91,6 +91,7 @@ SECTIONS | |||
91 | } | 91 | } |
92 | SECURITY_INIT | 92 | SECURITY_INIT |
93 | 93 | ||
94 | . = ALIGN (8192); | ||
94 | __per_cpu_start = .; | 95 | __per_cpu_start = .; |
95 | .data.percpu : { *(.data.percpu) } | 96 | .data.percpu : { *(.data.percpu) } |
96 | __per_cpu_end = .; | 97 | __per_cpu_end = .; |
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S index 97910e01682..28eae9735ad 100644 --- a/arch/frv/kernel/vmlinux.lds.S +++ b/arch/frv/kernel/vmlinux.lds.S | |||
@@ -57,6 +57,7 @@ SECTIONS | |||
57 | __alt_instructions_end = .; | 57 | __alt_instructions_end = .; |
58 | .altinstr_replacement : { *(.altinstr_replacement) } | 58 | .altinstr_replacement : { *(.altinstr_replacement) } |
59 | 59 | ||
60 | . = ALIGN(4096); | ||
60 | __per_cpu_start = .; | 61 | __per_cpu_start = .; |
61 | .data.percpu : { *(.data.percpu) } | 62 | .data.percpu : { *(.data.percpu) } |
62 | __per_cpu_end = .; | 63 | __per_cpu_end = .; |
diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index bcf2fc408a1..a9af760c7e5 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig | |||
@@ -220,7 +220,7 @@ config PARAVIRT | |||
220 | 220 | ||
221 | config VMI | 221 | config VMI |
222 | bool "VMI Paravirt-ops support" | 222 | bool "VMI Paravirt-ops support" |
223 | depends on PARAVIRT && !COMPAT_VDSO | 223 | depends on PARAVIRT |
224 | help | 224 | help |
225 | VMI provides a paravirtualized interface to the VMware ESX server | 225 | VMI provides a paravirtualized interface to the VMware ESX server |
226 | (it could be used by other hypervisors in theory too, but is not | 226 | (it could be used by other hypervisors in theory too, but is not |
@@ -571,6 +571,9 @@ choice | |||
571 | bool "3G/1G user/kernel split (for full 1G low memory)" | 571 | bool "3G/1G user/kernel split (for full 1G low memory)" |
572 | config VMSPLIT_2G | 572 | config VMSPLIT_2G |
573 | bool "2G/2G user/kernel split" | 573 | bool "2G/2G user/kernel split" |
574 | config VMSPLIT_2G_OPT | ||
575 | depends on !HIGHMEM | ||
576 | bool "2G/2G user/kernel split (for full 2G low memory)" | ||
574 | config VMSPLIT_1G | 577 | config VMSPLIT_1G |
575 | bool "1G/3G user/kernel split" | 578 | bool "1G/3G user/kernel split" |
576 | endchoice | 579 | endchoice |
@@ -578,7 +581,8 @@ endchoice | |||
578 | config PAGE_OFFSET | 581 | config PAGE_OFFSET |
579 | hex | 582 | hex |
580 | default 0xB0000000 if VMSPLIT_3G_OPT | 583 | default 0xB0000000 if VMSPLIT_3G_OPT |
581 | default 0x78000000 if VMSPLIT_2G | 584 | default 0x80000000 if VMSPLIT_2G |
585 | default 0x78000000 if VMSPLIT_2G_OPT | ||
582 | default 0x40000000 if VMSPLIT_1G | 586 | default 0x40000000 if VMSPLIT_1G |
583 | default 0xC0000000 | 587 | default 0xC0000000 |
584 | 588 | ||
@@ -915,12 +919,9 @@ source kernel/power/Kconfig | |||
915 | 919 | ||
916 | source "drivers/acpi/Kconfig" | 920 | source "drivers/acpi/Kconfig" |
917 | 921 | ||
918 | menu "APM (Advanced Power Management) BIOS Support" | 922 | menuconfig APM |
919 | depends on PM && !X86_VISWS | ||
920 | |||
921 | config APM | ||
922 | tristate "APM (Advanced Power Management) BIOS support" | 923 | tristate "APM (Advanced Power Management) BIOS support" |
923 | depends on PM | 924 | depends on PM && !X86_VISWS |
924 | ---help--- | 925 | ---help--- |
925 | APM is a BIOS specification for saving power using several different | 926 | APM is a BIOS specification for saving power using several different |
926 | techniques. This is mostly useful for battery powered laptops with | 927 | techniques. This is mostly useful for battery powered laptops with |
@@ -977,9 +978,10 @@ config APM | |||
977 | To compile this driver as a module, choose M here: the | 978 | To compile this driver as a module, choose M here: the |
978 | module will be called apm. | 979 | module will be called apm. |
979 | 980 | ||
981 | if APM | ||
982 | |||
980 | config APM_IGNORE_USER_SUSPEND | 983 | config APM_IGNORE_USER_SUSPEND |
981 | bool "Ignore USER SUSPEND" | 984 | bool "Ignore USER SUSPEND" |
982 | depends on APM | ||
983 | help | 985 | help |
984 | This option will ignore USER SUSPEND requests. On machines with a | 986 | This option will ignore USER SUSPEND requests. On machines with a |
985 | compliant APM BIOS, you want to say N. However, on the NEC Versa M | 987 | compliant APM BIOS, you want to say N. However, on the NEC Versa M |
@@ -987,7 +989,6 @@ config APM_IGNORE_USER_SUSPEND | |||
987 | 989 | ||
988 | config APM_DO_ENABLE | 990 | config APM_DO_ENABLE |
989 | bool "Enable PM at boot time" | 991 | bool "Enable PM at boot time" |
990 | depends on APM | ||
991 | ---help--- | 992 | ---help--- |
992 | Enable APM features at boot time. From page 36 of the APM BIOS | 993 | Enable APM features at boot time. From page 36 of the APM BIOS |
993 | specification: "When disabled, the APM BIOS does not automatically | 994 | specification: "When disabled, the APM BIOS does not automatically |
@@ -1005,7 +1006,6 @@ config APM_DO_ENABLE | |||
1005 | 1006 | ||
1006 | config APM_CPU_IDLE | 1007 | config APM_CPU_IDLE |
1007 | bool "Make CPU Idle calls when idle" | 1008 | bool "Make CPU Idle calls when idle" |
1008 | depends on APM | ||
1009 | help | 1009 | help |
1010 | Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. | 1010 | Enable calls to APM CPU Idle/CPU Busy inside the kernel's idle loop. |
1011 | On some machines, this can activate improved power savings, such as | 1011 | On some machines, this can activate improved power savings, such as |
@@ -1017,7 +1017,6 @@ config APM_CPU_IDLE | |||
1017 | 1017 | ||
1018 | config APM_DISPLAY_BLANK | 1018 | config APM_DISPLAY_BLANK |
1019 | bool "Enable console blanking using APM" | 1019 | bool "Enable console blanking using APM" |
1020 | depends on APM | ||
1021 | help | 1020 | help |
1022 | Enable console blanking using the APM. Some laptops can use this to | 1021 | Enable console blanking using the APM. Some laptops can use this to |
1023 | turn off the LCD backlight when the screen blanker of the Linux | 1022 | turn off the LCD backlight when the screen blanker of the Linux |
@@ -1029,22 +1028,8 @@ config APM_DISPLAY_BLANK | |||
1029 | backlight at all, or it might print a lot of errors to the console, | 1028 | backlight at all, or it might print a lot of errors to the console, |
1030 | especially if you are using gpm. | 1029 | especially if you are using gpm. |
1031 | 1030 | ||
1032 | config APM_RTC_IS_GMT | ||
1033 | bool "RTC stores time in GMT" | ||
1034 | depends on APM | ||
1035 | help | ||
1036 | Say Y here if your RTC (Real Time Clock a.k.a. hardware clock) | ||
1037 | stores the time in GMT (Greenwich Mean Time). Say N if your RTC | ||
1038 | stores localtime. | ||
1039 | |||
1040 | It is in fact recommended to store GMT in your RTC, because then you | ||
1041 | don't have to worry about daylight savings time changes. The only | ||
1042 | reason not to use GMT in your RTC is if you also run a broken OS | ||
1043 | that doesn't understand GMT. | ||
1044 | |||
1045 | config APM_ALLOW_INTS | 1031 | config APM_ALLOW_INTS |
1046 | bool "Allow interrupts during APM BIOS calls" | 1032 | bool "Allow interrupts during APM BIOS calls" |
1047 | depends on APM | ||
1048 | help | 1033 | help |
1049 | Normally we disable external interrupts while we are making calls to | 1034 | Normally we disable external interrupts while we are making calls to |
1050 | the APM BIOS as a measure to lessen the effects of a badly behaving | 1035 | the APM BIOS as a measure to lessen the effects of a badly behaving |
@@ -1055,13 +1040,12 @@ config APM_ALLOW_INTS | |||
1055 | 1040 | ||
1056 | config APM_REAL_MODE_POWER_OFF | 1041 | config APM_REAL_MODE_POWER_OFF |
1057 | bool "Use real mode APM BIOS call to power off" | 1042 | bool "Use real mode APM BIOS call to power off" |
1058 | depends on APM | ||
1059 | help | 1043 | help |
1060 | Use real mode APM BIOS calls to switch off the computer. This is | 1044 | Use real mode APM BIOS calls to switch off the computer. This is |
1061 | a work-around for a number of buggy BIOSes. Switch this option on if | 1045 | a work-around for a number of buggy BIOSes. Switch this option on if |
1062 | your computer crashes instead of powering off properly. | 1046 | your computer crashes instead of powering off properly. |
1063 | 1047 | ||
1064 | endmenu | 1048 | endif # APM |
1065 | 1049 | ||
1066 | source "arch/i386/kernel/cpu/cpufreq/Kconfig" | 1050 | source "arch/i386/kernel/cpu/cpufreq/Kconfig" |
1067 | 1051 | ||
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu index b99c0e2a4e6..dce6124cb84 100644 --- a/arch/i386/Kconfig.cpu +++ b/arch/i386/Kconfig.cpu | |||
@@ -43,6 +43,7 @@ config M386 | |||
43 | - "Geode GX/LX" For AMD Geode GX and LX processors. | 43 | - "Geode GX/LX" For AMD Geode GX and LX processors. |
44 | - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. | 44 | - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3. |
45 | - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). | 45 | - "VIA C3-2" for VIA C3-2 "Nehemiah" (model 9 and above). |
46 | - "VIA C7" for VIA C7. | ||
46 | 47 | ||
47 | If you don't know what to do, choose "386". | 48 | If you don't know what to do, choose "386". |
48 | 49 | ||
@@ -203,6 +204,12 @@ config MVIAC3_2 | |||
203 | of SSE and tells gcc to treat the CPU as a 686. | 204 | of SSE and tells gcc to treat the CPU as a 686. |
204 | Note, this kernel will not boot on older (pre model 9) C3s. | 205 | Note, this kernel will not boot on older (pre model 9) C3s. |
205 | 206 | ||
207 | config MVIAC7 | ||
208 | bool "VIA C7" | ||
209 | help | ||
210 | Select this for a VIA C7. Selecting this uses the correct cache | ||
211 | shift and tells gcc to treat the CPU as a 686. | ||
212 | |||
206 | endchoice | 213 | endchoice |
207 | 214 | ||
208 | config X86_GENERIC | 215 | config X86_GENERIC |
@@ -231,16 +238,21 @@ config X86_L1_CACHE_SHIFT | |||
231 | default "7" if MPENTIUM4 || X86_GENERIC | 238 | default "7" if MPENTIUM4 || X86_GENERIC |
232 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 | 239 | default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 |
233 | default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX | 240 | default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX |
234 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 | 241 | default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 |
242 | |||
243 | config X86_XADD | ||
244 | bool | ||
245 | depends on !M386 | ||
246 | default y | ||
235 | 247 | ||
236 | config RWSEM_GENERIC_SPINLOCK | 248 | config RWSEM_GENERIC_SPINLOCK |
237 | bool | 249 | bool |
238 | depends on M386 | 250 | depends on !X86_XADD |
239 | default y | 251 | default y |
240 | 252 | ||
241 | config RWSEM_XCHGADD_ALGORITHM | 253 | config RWSEM_XCHGADD_ALGORITHM |
242 | bool | 254 | bool |
243 | depends on !M386 | 255 | depends on X86_XADD |
244 | default y | 256 | default y |
245 | 257 | ||
246 | config ARCH_HAS_ILOG2_U32 | 258 | config ARCH_HAS_ILOG2_U32 |
@@ -297,7 +309,7 @@ config X86_ALIGNMENT_16 | |||
297 | 309 | ||
298 | config X86_GOOD_APIC | 310 | config X86_GOOD_APIC |
299 | bool | 311 | bool |
300 | depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 | 312 | depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 |
301 | default y | 313 | default y |
302 | 314 | ||
303 | config X86_INTEL_USERCOPY | 315 | config X86_INTEL_USERCOPY |
@@ -322,5 +334,18 @@ config X86_OOSTORE | |||
322 | 334 | ||
323 | config X86_TSC | 335 | config X86_TSC |
324 | bool | 336 | bool |
325 | depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ | 337 | depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2) && !X86_NUMAQ |
326 | default y | 338 | default y |
339 | |||
340 | # this should be set for all -march=.. options where the compiler | ||
341 | # generates cmov. | ||
342 | config X86_CMOV | ||
343 | bool | ||
344 | depends on (MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MVIAC3_2 || MVIAC7) | ||
345 | default y | ||
346 | |||
347 | config X86_MINIMUM_CPU_MODEL | ||
348 | int | ||
349 | default "4" if X86_XADD || X86_CMPXCHG || X86_BSWAP | ||
350 | default "0" | ||
351 | |||
diff --git a/arch/i386/Kconfig.debug b/arch/i386/Kconfig.debug index 458bc161193..b31c0802e1c 100644 --- a/arch/i386/Kconfig.debug +++ b/arch/i386/Kconfig.debug | |||
@@ -85,14 +85,4 @@ config DOUBLEFAULT | |||
85 | option saves about 4k and might cause you much additional grey | 85 | option saves about 4k and might cause you much additional grey |
86 | hair. | 86 | hair. |
87 | 87 | ||
88 | config DEBUG_PARAVIRT | ||
89 | bool "Enable some paravirtualization debugging" | ||
90 | default n | ||
91 | depends on PARAVIRT && DEBUG_KERNEL | ||
92 | help | ||
93 | Currently deliberately clobbers regs which are allowed to be | ||
94 | clobbered in inlined paravirt hooks, even in native mode. | ||
95 | If turning this off solves a problem, then DISABLE_INTERRUPTS() or | ||
96 | ENABLE_INTERRUPTS() is lying about what registers can be clobbered. | ||
97 | |||
98 | endmenu | 88 | endmenu |
diff --git a/arch/i386/Makefile b/arch/i386/Makefile index bd28f9f9b4b..6dc5e5d90fe 100644 --- a/arch/i386/Makefile +++ b/arch/i386/Makefile | |||
@@ -34,7 +34,7 @@ CHECKFLAGS += -D__i386__ | |||
34 | CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return | 34 | CFLAGS += -pipe -msoft-float -mregparm=3 -freg-struct-return |
35 | 35 | ||
36 | # prevent gcc from keeping the stack 16 byte aligned | 36 | # prevent gcc from keeping the stack 16 byte aligned |
37 | CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2) | 37 | CFLAGS += -mpreferred-stack-boundary=4 |
38 | 38 | ||
39 | # CPU-specific tuning. Anything which can be shared with UML should go here. | 39 | # CPU-specific tuning. Anything which can be shared with UML should go here. |
40 | include $(srctree)/arch/i386/Makefile.cpu | 40 | include $(srctree)/arch/i386/Makefile.cpu |
diff --git a/arch/i386/Makefile.cpu b/arch/i386/Makefile.cpu index a32c031c90d..e372b584e91 100644 --- a/arch/i386/Makefile.cpu +++ b/arch/i386/Makefile.cpu | |||
@@ -4,9 +4,9 @@ | |||
4 | #-mtune exists since gcc 3.4 | 4 | #-mtune exists since gcc 3.4 |
5 | HAS_MTUNE := $(call cc-option-yn, -mtune=i386) | 5 | HAS_MTUNE := $(call cc-option-yn, -mtune=i386) |
6 | ifeq ($(HAS_MTUNE),y) | 6 | ifeq ($(HAS_MTUNE),y) |
7 | tune = $(call cc-option,-mtune=$(1),) | 7 | tune = $(call cc-option,-mtune=$(1),$(2)) |
8 | else | 8 | else |
9 | tune = $(call cc-option,-mcpu=$(1),) | 9 | tune = $(call cc-option,-mcpu=$(1),$(2)) |
10 | endif | 10 | endif |
11 | 11 | ||
12 | align := $(cc-option-align) | 12 | align := $(cc-option-align) |
@@ -32,7 +32,8 @@ cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586) | |||
32 | cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) | 32 | cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586) |
33 | cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 | 33 | cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0 |
34 | cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) | 34 | cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686) |
35 | cflags-$(CONFIG_MCORE2) += -march=i686 $(call cc-option,-mtune=core2,$(call cc-option,-mtune=generic,-mtune=i686)) | 35 | cflags-$(CONFIG_MVIAC7) += -march=i686 |
36 | cflags-$(CONFIG_MCORE2) += -march=i686 $(call tune,core2) | ||
36 | 37 | ||
37 | # AMD Elan support | 38 | # AMD Elan support |
38 | cflags-$(CONFIG_X86_ELAN) += -march=i486 | 39 | cflags-$(CONFIG_X86_ELAN) += -march=i486 |
@@ -42,5 +43,5 @@ cflags-$(CONFIG_MGEODEGX1) += -march=pentium-mmx | |||
42 | 43 | ||
43 | # add at the end to overwrite eventual tuning options from earlier | 44 | # add at the end to overwrite eventual tuning options from earlier |
44 | # cpu entries | 45 | # cpu entries |
45 | cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic) | 46 | cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) |
46 | 47 | ||
diff --git a/arch/i386/boot/Makefile b/arch/i386/boot/Makefile index e9794662606..bfbc32098a4 100644 --- a/arch/i386/boot/Makefile +++ b/arch/i386/boot/Makefile | |||
@@ -36,9 +36,9 @@ HOSTCFLAGS_build.o := $(LINUXINCLUDE) | |||
36 | # --------------------------------------------------------------------------- | 36 | # --------------------------------------------------------------------------- |
37 | 37 | ||
38 | $(obj)/zImage: IMAGE_OFFSET := 0x1000 | 38 | $(obj)/zImage: IMAGE_OFFSET := 0x1000 |
39 | $(obj)/zImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) | 39 | $(obj)/zImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) |
40 | $(obj)/bzImage: IMAGE_OFFSET := 0x100000 | 40 | $(obj)/bzImage: IMAGE_OFFSET := 0x100000 |
41 | $(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ | 41 | $(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ |
42 | $(obj)/bzImage: BUILDFLAGS := -b | 42 | $(obj)/bzImage: BUILDFLAGS := -b |
43 | 43 | ||
44 | quiet_cmd_image = BUILD $@ | 44 | quiet_cmd_image = BUILD $@ |
diff --git a/arch/i386/boot/compressed/misc.c b/arch/i386/boot/compressed/misc.c index 1ce7017fd62..b28505c544c 100644 --- a/arch/i386/boot/compressed/misc.c +++ b/arch/i386/boot/compressed/misc.c | |||
@@ -189,7 +189,7 @@ static void putstr(const char *); | |||
189 | static unsigned long free_mem_ptr; | 189 | static unsigned long free_mem_ptr; |
190 | static unsigned long free_mem_end_ptr; | 190 | static unsigned long free_mem_end_ptr; |
191 | 191 | ||
192 | #define HEAP_SIZE 0x3000 | 192 | #define HEAP_SIZE 0x4000 |
193 | 193 | ||
194 | static char *vidmem = (char *)0xb8000; | 194 | static char *vidmem = (char *)0xb8000; |
195 | static int vidport; | 195 | static int vidport; |
diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index 06edf1c6624..f8b3b9cda2b 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <asm/boot.h> | 52 | #include <asm/boot.h> |
53 | #include <asm/e820.h> | 53 | #include <asm/e820.h> |
54 | #include <asm/page.h> | 54 | #include <asm/page.h> |
55 | #include <asm/setup.h> | ||
55 | 56 | ||
56 | /* Signature words to ensure LILO loaded us right */ | 57 | /* Signature words to ensure LILO loaded us right */ |
57 | #define SIG1 0xAA55 | 58 | #define SIG1 0xAA55 |
@@ -81,7 +82,7 @@ start: | |||
81 | # This is the setup header, and it must start at %cs:2 (old 0x9020:2) | 82 | # This is the setup header, and it must start at %cs:2 (old 0x9020:2) |
82 | 83 | ||
83 | .ascii "HdrS" # header signature | 84 | .ascii "HdrS" # header signature |
84 | .word 0x0205 # header version number (>= 0x0105) | 85 | .word 0x0206 # header version number (>= 0x0105) |
85 | # or else old loadlin-1.5 will fail) | 86 | # or else old loadlin-1.5 will fail) |
86 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 87 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
87 | start_sys_seg: .word SYSSEG | 88 | start_sys_seg: .word SYSSEG |
@@ -171,6 +172,10 @@ relocatable_kernel: .byte 0 | |||
171 | pad2: .byte 0 | 172 | pad2: .byte 0 |
172 | pad3: .word 0 | 173 | pad3: .word 0 |
173 | 174 | ||
175 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | ||
176 | #added with boot protocol | ||
177 | #version 2.06 | ||
178 | |||
174 | trampoline: call start_of_setup | 179 | trampoline: call start_of_setup |
175 | .align 16 | 180 | .align 16 |
176 | # The offset at this point is 0x240 | 181 | # The offset at this point is 0x240 |
@@ -297,7 +302,24 @@ good_sig: | |||
297 | 302 | ||
298 | loader_panic_mess: .string "Wrong loader, giving up..." | 303 | loader_panic_mess: .string "Wrong loader, giving up..." |
299 | 304 | ||
305 | # check minimum cpuid | ||
306 | # we do this here because it is the last place we can actually | ||
307 | # show a user visible error message. Later the video modus | ||
308 | # might be already messed up. | ||
300 | loader_ok: | 309 | loader_ok: |
310 | call verify_cpu | ||
311 | testl %eax,%eax | ||
312 | jz cpu_ok | ||
313 | lea cpu_panic_mess,%si | ||
314 | call prtstr | ||
315 | 1: jmp 1b | ||
316 | |||
317 | cpu_panic_mess: | ||
318 | .asciz "PANIC: CPU too old for this kernel." | ||
319 | |||
320 | #include "../kernel/verify_cpu.S" | ||
321 | |||
322 | cpu_ok: | ||
301 | # Get memory size (extended mem, kB) | 323 | # Get memory size (extended mem, kB) |
302 | 324 | ||
303 | xorl %eax, %eax | 325 | xorl %eax, %eax |
diff --git a/arch/i386/defconfig b/arch/i386/defconfig index c96911c37ae..9da84412a83 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.21-rc3 | 3 | # Linux kernel version: 2.6.21-git3 |
4 | # Wed Mar 7 15:29:47 2007 | 4 | # Tue May 1 07:30:51 2007 |
5 | # | 5 | # |
6 | CONFIG_X86_32=y | 6 | CONFIG_X86_32=y |
7 | CONFIG_GENERIC_TIME=y | 7 | CONFIG_GENERIC_TIME=y |
@@ -108,9 +108,9 @@ CONFIG_DEFAULT_IOSCHED="anticipatory" | |||
108 | # | 108 | # |
109 | # Processor type and features | 109 | # Processor type and features |
110 | # | 110 | # |
111 | # CONFIG_TICK_ONESHOT is not set | 111 | CONFIG_TICK_ONESHOT=y |
112 | # CONFIG_NO_HZ is not set | 112 | CONFIG_NO_HZ=y |
113 | # CONFIG_HIGH_RES_TIMERS is not set | 113 | CONFIG_HIGH_RES_TIMERS=y |
114 | CONFIG_SMP=y | 114 | CONFIG_SMP=y |
115 | # CONFIG_X86_PC is not set | 115 | # CONFIG_X86_PC is not set |
116 | # CONFIG_X86_ELAN is not set | 116 | # CONFIG_X86_ELAN is not set |
@@ -146,9 +146,11 @@ CONFIG_MPENTIUMIII=y | |||
146 | # CONFIG_MGEODE_LX is not set | 146 | # CONFIG_MGEODE_LX is not set |
147 | # CONFIG_MCYRIXIII is not set | 147 | # CONFIG_MCYRIXIII is not set |
148 | # CONFIG_MVIAC3_2 is not set | 148 | # CONFIG_MVIAC3_2 is not set |
149 | # CONFIG_MVIAC7 is not set | ||
149 | CONFIG_X86_GENERIC=y | 150 | CONFIG_X86_GENERIC=y |
150 | CONFIG_X86_CMPXCHG=y | 151 | CONFIG_X86_CMPXCHG=y |
151 | CONFIG_X86_L1_CACHE_SHIFT=7 | 152 | CONFIG_X86_L1_CACHE_SHIFT=7 |
153 | CONFIG_X86_XADD=y | ||
152 | CONFIG_RWSEM_XCHGADD_ALGORITHM=y | 154 | CONFIG_RWSEM_XCHGADD_ALGORITHM=y |
153 | # CONFIG_ARCH_HAS_ILOG2_U32 is not set | 155 | # CONFIG_ARCH_HAS_ILOG2_U32 is not set |
154 | # CONFIG_ARCH_HAS_ILOG2_U64 is not set | 156 | # CONFIG_ARCH_HAS_ILOG2_U64 is not set |
@@ -162,6 +164,8 @@ CONFIG_X86_GOOD_APIC=y | |||
162 | CONFIG_X86_INTEL_USERCOPY=y | 164 | CONFIG_X86_INTEL_USERCOPY=y |
163 | CONFIG_X86_USE_PPRO_CHECKSUM=y | 165 | CONFIG_X86_USE_PPRO_CHECKSUM=y |
164 | CONFIG_X86_TSC=y | 166 | CONFIG_X86_TSC=y |
167 | CONFIG_X86_CMOV=y | ||
168 | CONFIG_X86_MINIMUM_CPU_MODEL=4 | ||
165 | CONFIG_HPET_TIMER=y | 169 | CONFIG_HPET_TIMER=y |
166 | CONFIG_HPET_EMULATE_RTC=y | 170 | CONFIG_HPET_EMULATE_RTC=y |
167 | CONFIG_NR_CPUS=32 | 171 | CONFIG_NR_CPUS=32 |
@@ -248,7 +252,6 @@ CONFIG_ACPI_FAN=y | |||
248 | CONFIG_ACPI_PROCESSOR=y | 252 | CONFIG_ACPI_PROCESSOR=y |
249 | CONFIG_ACPI_THERMAL=y | 253 | CONFIG_ACPI_THERMAL=y |
250 | # CONFIG_ACPI_ASUS is not set | 254 | # CONFIG_ACPI_ASUS is not set |
251 | # CONFIG_ACPI_IBM is not set | ||
252 | # CONFIG_ACPI_TOSHIBA is not set | 255 | # CONFIG_ACPI_TOSHIBA is not set |
253 | CONFIG_ACPI_BLACKLIST_YEAR=2001 | 256 | CONFIG_ACPI_BLACKLIST_YEAR=2001 |
254 | CONFIG_ACPI_DEBUG=y | 257 | CONFIG_ACPI_DEBUG=y |
@@ -257,10 +260,7 @@ CONFIG_ACPI_POWER=y | |||
257 | CONFIG_ACPI_SYSTEM=y | 260 | CONFIG_ACPI_SYSTEM=y |
258 | CONFIG_X86_PM_TIMER=y | 261 | CONFIG_X86_PM_TIMER=y |
259 | # CONFIG_ACPI_CONTAINER is not set | 262 | # CONFIG_ACPI_CONTAINER is not set |
260 | 263 | # CONFIG_ACPI_SBS is not set | |
261 | # | ||
262 | # APM (Advanced Power Management) BIOS Support | ||
263 | # | ||
264 | # CONFIG_APM is not set | 264 | # CONFIG_APM is not set |
265 | 265 | ||
266 | # | 266 | # |
@@ -277,7 +277,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y | |||
277 | # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set | 277 | # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set |
278 | CONFIG_CPU_FREQ_GOV_USERSPACE=y | 278 | CONFIG_CPU_FREQ_GOV_USERSPACE=y |
279 | CONFIG_CPU_FREQ_GOV_ONDEMAND=y | 279 | CONFIG_CPU_FREQ_GOV_ONDEMAND=y |
280 | # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set | 280 | CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y |
281 | 281 | ||
282 | # | 282 | # |
283 | # CPUFreq processor drivers | 283 | # CPUFreq processor drivers |
@@ -349,7 +349,6 @@ CONFIG_NET=y | |||
349 | # | 349 | # |
350 | # Networking options | 350 | # Networking options |
351 | # | 351 | # |
352 | # CONFIG_NETDEBUG is not set | ||
353 | CONFIG_PACKET=y | 352 | CONFIG_PACKET=y |
354 | # CONFIG_PACKET_MMAP is not set | 353 | # CONFIG_PACKET_MMAP is not set |
355 | CONFIG_UNIX=y | 354 | CONFIG_UNIX=y |
@@ -388,6 +387,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" | |||
388 | CONFIG_IPV6=y | 387 | CONFIG_IPV6=y |
389 | # CONFIG_IPV6_PRIVACY is not set | 388 | # CONFIG_IPV6_PRIVACY is not set |
390 | # CONFIG_IPV6_ROUTER_PREF is not set | 389 | # CONFIG_IPV6_ROUTER_PREF is not set |
390 | # CONFIG_IPV6_OPTIMISTIC_DAD is not set | ||
391 | # CONFIG_INET6_AH is not set | 391 | # CONFIG_INET6_AH is not set |
392 | # CONFIG_INET6_ESP is not set | 392 | # CONFIG_INET6_ESP is not set |
393 | # CONFIG_INET6_IPCOMP is not set | 393 | # CONFIG_INET6_IPCOMP is not set |
@@ -443,6 +443,13 @@ CONFIG_IPV6_SIT=y | |||
443 | # CONFIG_HAMRADIO is not set | 443 | # CONFIG_HAMRADIO is not set |
444 | # CONFIG_IRDA is not set | 444 | # CONFIG_IRDA is not set |
445 | # CONFIG_BT is not set | 445 | # CONFIG_BT is not set |
446 | # CONFIG_AF_RXRPC is not set | ||
447 | |||
448 | # | ||
449 | # Wireless | ||
450 | # | ||
451 | # CONFIG_CFG80211 is not set | ||
452 | # CONFIG_WIRELESS_EXT is not set | ||
446 | # CONFIG_IEEE80211 is not set | 453 | # CONFIG_IEEE80211 is not set |
447 | 454 | ||
448 | # | 455 | # |
@@ -463,10 +470,6 @@ CONFIG_FW_LOADER=y | |||
463 | # Connector - unified userspace <-> kernelspace linker | 470 | # Connector - unified userspace <-> kernelspace linker |
464 | # | 471 | # |
465 | # CONFIG_CONNECTOR is not set | 472 | # CONFIG_CONNECTOR is not set |
466 | |||
467 | # | ||
468 | # Memory Technology Devices (MTD) | ||
469 | # | ||
470 | # CONFIG_MTD is not set | 473 | # CONFIG_MTD is not set |
471 | 474 | ||
472 | # | 475 | # |
@@ -513,6 +516,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 | |||
513 | # CONFIG_SGI_IOC4 is not set | 516 | # CONFIG_SGI_IOC4 is not set |
514 | # CONFIG_TIFM_CORE is not set | 517 | # CONFIG_TIFM_CORE is not set |
515 | # CONFIG_SONY_LAPTOP is not set | 518 | # CONFIG_SONY_LAPTOP is not set |
519 | # CONFIG_THINKPAD_ACPI is not set | ||
516 | 520 | ||
517 | # | 521 | # |
518 | # ATA/ATAPI/MFM/RLL support | 522 | # ATA/ATAPI/MFM/RLL support |
@@ -548,7 +552,6 @@ CONFIG_BLK_DEV_IDEPCI=y | |||
548 | # CONFIG_BLK_DEV_RZ1000 is not set | 552 | # CONFIG_BLK_DEV_RZ1000 is not set |
549 | CONFIG_BLK_DEV_IDEDMA_PCI=y | 553 | CONFIG_BLK_DEV_IDEDMA_PCI=y |
550 | # CONFIG_BLK_DEV_IDEDMA_FORCED is not set | 554 | # CONFIG_BLK_DEV_IDEDMA_FORCED is not set |
551 | CONFIG_IDEDMA_PCI_AUTO=y | ||
552 | # CONFIG_IDEDMA_ONLYDISK is not set | 555 | # CONFIG_IDEDMA_ONLYDISK is not set |
553 | # CONFIG_BLK_DEV_AEC62XX is not set | 556 | # CONFIG_BLK_DEV_AEC62XX is not set |
554 | # CONFIG_BLK_DEV_ALI15X3 is not set | 557 | # CONFIG_BLK_DEV_ALI15X3 is not set |
@@ -580,7 +583,6 @@ CONFIG_BLK_DEV_PIIX=y | |||
580 | # CONFIG_IDE_ARM is not set | 583 | # CONFIG_IDE_ARM is not set |
581 | CONFIG_BLK_DEV_IDEDMA=y | 584 | CONFIG_BLK_DEV_IDEDMA=y |
582 | # CONFIG_IDEDMA_IVB is not set | 585 | # CONFIG_IDEDMA_IVB is not set |
583 | CONFIG_IDEDMA_AUTO=y | ||
584 | # CONFIG_BLK_DEV_HD is not set | 586 | # CONFIG_BLK_DEV_HD is not set |
585 | 587 | ||
586 | # | 588 | # |
@@ -669,6 +671,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0 | |||
669 | # CONFIG_SCSI_DC390T is not set | 671 | # CONFIG_SCSI_DC390T is not set |
670 | # CONFIG_SCSI_NSP32 is not set | 672 | # CONFIG_SCSI_NSP32 is not set |
671 | # CONFIG_SCSI_DEBUG is not set | 673 | # CONFIG_SCSI_DEBUG is not set |
674 | # CONFIG_SCSI_ESP_CORE is not set | ||
672 | # CONFIG_SCSI_SRP is not set | 675 | # CONFIG_SCSI_SRP is not set |
673 | 676 | ||
674 | # | 677 | # |
@@ -697,6 +700,7 @@ CONFIG_SATA_ACPI=y | |||
697 | # CONFIG_PATA_AMD is not set | 700 | # CONFIG_PATA_AMD is not set |
698 | # CONFIG_PATA_ARTOP is not set | 701 | # CONFIG_PATA_ARTOP is not set |
699 | # CONFIG_PATA_ATIIXP is not set | 702 | # CONFIG_PATA_ATIIXP is not set |
703 | # CONFIG_PATA_CMD640_PCI is not set | ||
700 | # CONFIG_PATA_CMD64X is not set | 704 | # CONFIG_PATA_CMD64X is not set |
701 | # CONFIG_PATA_CS5520 is not set | 705 | # CONFIG_PATA_CS5520 is not set |
702 | # CONFIG_PATA_CS5530 is not set | 706 | # CONFIG_PATA_CS5530 is not set |
@@ -762,10 +766,9 @@ CONFIG_IEEE1394=y | |||
762 | # Subsystem Options | 766 | # Subsystem Options |
763 | # | 767 | # |
764 | # CONFIG_IEEE1394_VERBOSEDEBUG is not set | 768 | # CONFIG_IEEE1394_VERBOSEDEBUG is not set |
765 | # CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set | ||
766 | 769 | ||
767 | # | 770 | # |
768 | # Device Drivers | 771 | # Controllers |
769 | # | 772 | # |
770 | 773 | ||
771 | # | 774 | # |
@@ -774,10 +777,11 @@ CONFIG_IEEE1394=y | |||
774 | CONFIG_IEEE1394_OHCI1394=y | 777 | CONFIG_IEEE1394_OHCI1394=y |
775 | 778 | ||
776 | # | 779 | # |
777 | # Protocol Drivers | 780 | # Protocols |
778 | # | 781 | # |
779 | # CONFIG_IEEE1394_VIDEO1394 is not set | 782 | # CONFIG_IEEE1394_VIDEO1394 is not set |
780 | # CONFIG_IEEE1394_SBP2 is not set | 783 | # CONFIG_IEEE1394_SBP2 is not set |
784 | # CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set | ||
781 | # CONFIG_IEEE1394_ETH1394 is not set | 785 | # CONFIG_IEEE1394_ETH1394 is not set |
782 | # CONFIG_IEEE1394_DV1394 is not set | 786 | # CONFIG_IEEE1394_DV1394 is not set |
783 | CONFIG_IEEE1394_RAWIO=y | 787 | CONFIG_IEEE1394_RAWIO=y |
@@ -820,7 +824,9 @@ CONFIG_MII=y | |||
820 | # CONFIG_HAPPYMEAL is not set | 824 | # CONFIG_HAPPYMEAL is not set |
821 | # CONFIG_SUNGEM is not set | 825 | # CONFIG_SUNGEM is not set |
822 | # CONFIG_CASSINI is not set | 826 | # CONFIG_CASSINI is not set |
823 | # CONFIG_NET_VENDOR_3COM is not set | 827 | CONFIG_NET_VENDOR_3COM=y |
828 | CONFIG_VORTEX=y | ||
829 | # CONFIG_TYPHOON is not set | ||
824 | 830 | ||
825 | # | 831 | # |
826 | # Tulip family network device support | 832 | # Tulip family network device support |
@@ -901,9 +907,10 @@ CONFIG_BNX2=y | |||
901 | # CONFIG_TR is not set | 907 | # CONFIG_TR is not set |
902 | 908 | ||
903 | # | 909 | # |
904 | # Wireless LAN (non-hamradio) | 910 | # Wireless LAN |
905 | # | 911 | # |
906 | # CONFIG_NET_RADIO is not set | 912 | # CONFIG_WLAN_PRE80211 is not set |
913 | # CONFIG_WLAN_80211 is not set | ||
907 | 914 | ||
908 | # | 915 | # |
909 | # Wan interfaces | 916 | # Wan interfaces |
@@ -917,7 +924,6 @@ CONFIG_BNX2=y | |||
917 | # CONFIG_SHAPER is not set | 924 | # CONFIG_SHAPER is not set |
918 | CONFIG_NETCONSOLE=y | 925 | CONFIG_NETCONSOLE=y |
919 | CONFIG_NETPOLL=y | 926 | CONFIG_NETPOLL=y |
920 | # CONFIG_NETPOLL_RX is not set | ||
921 | # CONFIG_NETPOLL_TRAP is not set | 927 | # CONFIG_NETPOLL_TRAP is not set |
922 | CONFIG_NET_POLL_CONTROLLER=y | 928 | CONFIG_NET_POLL_CONTROLLER=y |
923 | 929 | ||
@@ -1050,7 +1056,7 @@ CONFIG_MAX_RAW_DEVS=256 | |||
1050 | CONFIG_HPET=y | 1056 | CONFIG_HPET=y |
1051 | # CONFIG_HPET_RTC_IRQ is not set | 1057 | # CONFIG_HPET_RTC_IRQ is not set |
1052 | CONFIG_HPET_MMAP=y | 1058 | CONFIG_HPET_MMAP=y |
1053 | CONFIG_HANGCHECK_TIMER=y | 1059 | # CONFIG_HANGCHECK_TIMER is not set |
1054 | 1060 | ||
1055 | # | 1061 | # |
1056 | # TPM devices | 1062 | # TPM devices |
@@ -1142,6 +1148,14 @@ CONFIG_HID=y | |||
1142 | # CONFIG_HID_DEBUG is not set | 1148 | # CONFIG_HID_DEBUG is not set |
1143 | 1149 | ||
1144 | # | 1150 | # |
1151 | # USB Input Devices | ||
1152 | # | ||
1153 | CONFIG_USB_HID=y | ||
1154 | # CONFIG_USB_HIDINPUT_POWERBOOK is not set | ||
1155 | # CONFIG_HID_FF is not set | ||
1156 | # CONFIG_USB_HIDDEV is not set | ||
1157 | |||
1158 | # | ||
1145 | # USB support | 1159 | # USB support |
1146 | # | 1160 | # |
1147 | CONFIG_USB_ARCH_HAS_HCD=y | 1161 | CONFIG_USB_ARCH_HAS_HCD=y |
@@ -1154,6 +1168,7 @@ CONFIG_USB=y | |||
1154 | # Miscellaneous USB options | 1168 | # Miscellaneous USB options |
1155 | # | 1169 | # |
1156 | CONFIG_USB_DEVICEFS=y | 1170 | CONFIG_USB_DEVICEFS=y |
1171 | # CONFIG_USB_DEVICE_CLASS is not set | ||
1157 | # CONFIG_USB_DYNAMIC_MINORS is not set | 1172 | # CONFIG_USB_DYNAMIC_MINORS is not set |
1158 | # CONFIG_USB_SUSPEND is not set | 1173 | # CONFIG_USB_SUSPEND is not set |
1159 | # CONFIG_USB_OTG is not set | 1174 | # CONFIG_USB_OTG is not set |
@@ -1204,10 +1219,6 @@ CONFIG_USB_STORAGE=y | |||
1204 | # | 1219 | # |
1205 | # USB Input Devices | 1220 | # USB Input Devices |
1206 | # | 1221 | # |
1207 | CONFIG_USB_HID=y | ||
1208 | # CONFIG_USB_HIDINPUT_POWERBOOK is not set | ||
1209 | # CONFIG_HID_FF is not set | ||
1210 | # CONFIG_USB_HIDDEV is not set | ||
1211 | # CONFIG_USB_AIPTEK is not set | 1222 | # CONFIG_USB_AIPTEK is not set |
1212 | # CONFIG_USB_WACOM is not set | 1223 | # CONFIG_USB_WACOM is not set |
1213 | # CONFIG_USB_ACECAD is not set | 1224 | # CONFIG_USB_ACECAD is not set |
@@ -1528,7 +1539,7 @@ CONFIG_DEBUG_KERNEL=y | |||
1528 | CONFIG_LOG_BUF_SHIFT=18 | 1539 | CONFIG_LOG_BUF_SHIFT=18 |
1529 | CONFIG_DETECT_SOFTLOCKUP=y | 1540 | CONFIG_DETECT_SOFTLOCKUP=y |
1530 | # CONFIG_SCHEDSTATS is not set | 1541 | # CONFIG_SCHEDSTATS is not set |
1531 | # CONFIG_TIMER_STATS is not set | 1542 | CONFIG_TIMER_STATS=y |
1532 | # CONFIG_DEBUG_SLAB is not set | 1543 | # CONFIG_DEBUG_SLAB is not set |
1533 | # CONFIG_DEBUG_RT_MUTEXES is not set | 1544 | # CONFIG_DEBUG_RT_MUTEXES is not set |
1534 | # CONFIG_RT_MUTEX_TESTER is not set | 1545 | # CONFIG_RT_MUTEX_TESTER is not set |
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 4ae3dcf1d2f..4f98516b9f9 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -39,12 +39,10 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | |||
39 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 39 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
40 | obj-$(CONFIG_K8_NB) += k8.o | 40 | obj-$(CONFIG_K8_NB) += k8.o |
41 | 41 | ||
42 | obj-$(CONFIG_VMI) += vmi.o vmitime.o | 42 | obj-$(CONFIG_VMI) += vmi.o vmiclock.o |
43 | obj-$(CONFIG_PARAVIRT) += paravirt.o | 43 | obj-$(CONFIG_PARAVIRT) += paravirt.o |
44 | obj-y += pcspeaker.o | 44 | obj-y += pcspeaker.o |
45 | 45 | ||
46 | EXTRA_AFLAGS := -traditional | ||
47 | |||
48 | obj-$(CONFIG_SCx200) += scx200.o | 46 | obj-$(CONFIG_SCx200) += scx200.o |
49 | 47 | ||
50 | # vsyscall.o contains the vsyscall DSO images as __initdata. | 48 | # vsyscall.o contains the vsyscall DSO images as __initdata. |
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 9ea5b8ecc7e..280898b045b 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c | |||
@@ -874,7 +874,7 @@ static void __init acpi_process_madt(void) | |||
874 | acpi_ioapic = 1; | 874 | acpi_ioapic = 1; |
875 | 875 | ||
876 | smp_found_config = 1; | 876 | smp_found_config = 1; |
877 | clustered_apic_check(); | 877 | setup_apic_routing(); |
878 | } | 878 | } |
879 | } | 879 | } |
880 | if (error == -EINVAL) { | 880 | if (error == -EINVAL) { |
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 8f7efd38254..23f78efc577 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <asm/pci-direct.h> | 10 | #include <asm/pci-direct.h> |
11 | #include <asm/acpi.h> | 11 | #include <asm/acpi.h> |
12 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
13 | #include <asm/irq.h> | ||
14 | 13 | ||
15 | #ifdef CONFIG_ACPI | 14 | #ifdef CONFIG_ACPI |
16 | 15 | ||
@@ -48,24 +47,6 @@ static int __init check_bridge(int vendor, int device) | |||
48 | return 0; | 47 | return 0; |
49 | } | 48 | } |
50 | 49 | ||
51 | static void check_intel(void) | ||
52 | { | ||
53 | u16 vendor, device; | ||
54 | |||
55 | vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID); | ||
56 | |||
57 | if (vendor != PCI_VENDOR_ID_INTEL) | ||
58 | return; | ||
59 | |||
60 | device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); | ||
61 | #ifdef CONFIG_SMP | ||
62 | if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || | ||
63 | device == PCI_DEVICE_ID_INTEL_E7520_MCH || | ||
64 | device == PCI_DEVICE_ID_INTEL_E7525_MCH) | ||
65 | quirk_intel_irqbalance(); | ||
66 | #endif | ||
67 | } | ||
68 | |||
69 | void __init check_acpi_pci(void) | 50 | void __init check_acpi_pci(void) |
70 | { | 51 | { |
71 | int num, slot, func; | 52 | int num, slot, func; |
@@ -77,8 +58,6 @@ void __init check_acpi_pci(void) | |||
77 | if (!early_pci_allowed()) | 58 | if (!early_pci_allowed()) |
78 | return; | 59 | return; |
79 | 60 | ||
80 | check_intel(); | ||
81 | |||
82 | /* Poor man's PCI discovery */ | 61 | /* Poor man's PCI discovery */ |
83 | for (num = 0; num < 32; num++) { | 62 | for (num = 0; num < 32; num++) { |
84 | for (slot = 0; slot < 32; slot++) { | 63 | for (slot = 0; slot < 32; slot++) { |
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 426f59b0106..e5cec6685cc 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <asm/alternative.h> | 5 | #include <asm/alternative.h> |
6 | #include <asm/sections.h> | 6 | #include <asm/sections.h> |
7 | 7 | ||
8 | static int noreplace_smp = 0; | ||
8 | static int smp_alt_once = 0; | 9 | static int smp_alt_once = 0; |
9 | static int debug_alternative = 0; | 10 | static int debug_alternative = 0; |
10 | 11 | ||
@@ -13,15 +14,33 @@ static int __init bootonly(char *str) | |||
13 | smp_alt_once = 1; | 14 | smp_alt_once = 1; |
14 | return 1; | 15 | return 1; |
15 | } | 16 | } |
17 | __setup("smp-alt-boot", bootonly); | ||
18 | |||
16 | static int __init debug_alt(char *str) | 19 | static int __init debug_alt(char *str) |
17 | { | 20 | { |
18 | debug_alternative = 1; | 21 | debug_alternative = 1; |
19 | return 1; | 22 | return 1; |
20 | } | 23 | } |
21 | |||
22 | __setup("smp-alt-boot", bootonly); | ||
23 | __setup("debug-alternative", debug_alt); | 24 | __setup("debug-alternative", debug_alt); |
24 | 25 | ||
26 | static int __init setup_noreplace_smp(char *str) | ||
27 | { | ||
28 | noreplace_smp = 1; | ||
29 | return 1; | ||
30 | } | ||
31 | __setup("noreplace-smp", setup_noreplace_smp); | ||
32 | |||
33 | #ifdef CONFIG_PARAVIRT | ||
34 | static int noreplace_paravirt = 0; | ||
35 | |||
36 | static int __init setup_noreplace_paravirt(char *str) | ||
37 | { | ||
38 | noreplace_paravirt = 1; | ||
39 | return 1; | ||
40 | } | ||
41 | __setup("noreplace-paravirt", setup_noreplace_paravirt); | ||
42 | #endif | ||
43 | |||
25 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | 44 | #define DPRINTK(fmt, args...) if (debug_alternative) \ |
26 | printk(KERN_DEBUG fmt, args) | 45 | printk(KERN_DEBUG fmt, args) |
27 | 46 | ||
@@ -132,11 +151,8 @@ static void nop_out(void *insns, unsigned int len) | |||
132 | } | 151 | } |
133 | 152 | ||
134 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 153 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
135 | extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; | ||
136 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 154 | extern u8 *__smp_locks[], *__smp_locks_end[]; |
137 | 155 | ||
138 | extern u8 __smp_alt_begin[], __smp_alt_end[]; | ||
139 | |||
140 | /* Replace instructions with better alternatives for this CPU type. | 156 | /* Replace instructions with better alternatives for this CPU type. |
141 | This runs before SMP is initialized to avoid SMP problems with | 157 | This runs before SMP is initialized to avoid SMP problems with |
142 | self modifying code. This implies that assymetric systems where | 158 | self modifying code. This implies that assymetric systems where |
@@ -171,29 +187,6 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) | |||
171 | 187 | ||
172 | #ifdef CONFIG_SMP | 188 | #ifdef CONFIG_SMP |
173 | 189 | ||
174 | static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end) | ||
175 | { | ||
176 | struct alt_instr *a; | ||
177 | |||
178 | DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); | ||
179 | for (a = start; a < end; a++) { | ||
180 | memcpy(a->replacement + a->replacementlen, | ||
181 | a->instr, | ||
182 | a->instrlen); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) | ||
187 | { | ||
188 | struct alt_instr *a; | ||
189 | |||
190 | for (a = start; a < end; a++) { | ||
191 | memcpy(a->instr, | ||
192 | a->replacement + a->replacementlen, | ||
193 | a->instrlen); | ||
194 | } | ||
195 | } | ||
196 | |||
197 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 190 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) |
198 | { | 191 | { |
199 | u8 **ptr; | 192 | u8 **ptr; |
@@ -211,6 +204,9 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end | |||
211 | { | 204 | { |
212 | u8 **ptr; | 205 | u8 **ptr; |
213 | 206 | ||
207 | if (noreplace_smp) | ||
208 | return; | ||
209 | |||
214 | for (ptr = start; ptr < end; ptr++) { | 210 | for (ptr = start; ptr < end; ptr++) { |
215 | if (*ptr < text) | 211 | if (*ptr < text) |
216 | continue; | 212 | continue; |
@@ -245,6 +241,9 @@ void alternatives_smp_module_add(struct module *mod, char *name, | |||
245 | struct smp_alt_module *smp; | 241 | struct smp_alt_module *smp; |
246 | unsigned long flags; | 242 | unsigned long flags; |
247 | 243 | ||
244 | if (noreplace_smp) | ||
245 | return; | ||
246 | |||
248 | if (smp_alt_once) { | 247 | if (smp_alt_once) { |
249 | if (boot_cpu_has(X86_FEATURE_UP)) | 248 | if (boot_cpu_has(X86_FEATURE_UP)) |
250 | alternatives_smp_unlock(locks, locks_end, | 249 | alternatives_smp_unlock(locks, locks_end, |
@@ -279,7 +278,7 @@ void alternatives_smp_module_del(struct module *mod) | |||
279 | struct smp_alt_module *item; | 278 | struct smp_alt_module *item; |
280 | unsigned long flags; | 279 | unsigned long flags; |
281 | 280 | ||
282 | if (smp_alt_once) | 281 | if (smp_alt_once || noreplace_smp) |
283 | return; | 282 | return; |
284 | 283 | ||
285 | spin_lock_irqsave(&smp_alt, flags); | 284 | spin_lock_irqsave(&smp_alt, flags); |
@@ -310,7 +309,7 @@ void alternatives_smp_switch(int smp) | |||
310 | return; | 309 | return; |
311 | #endif | 310 | #endif |
312 | 311 | ||
313 | if (smp_alt_once) | 312 | if (noreplace_smp || smp_alt_once) |
314 | return; | 313 | return; |
315 | BUG_ON(!smp && (num_online_cpus() > 1)); | 314 | BUG_ON(!smp && (num_online_cpus() > 1)); |
316 | 315 | ||
@@ -319,8 +318,6 @@ void alternatives_smp_switch(int smp) | |||
319 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); | 318 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); |
320 | clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 319 | clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); |
321 | clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); | 320 | clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); |
322 | alternatives_smp_apply(__smp_alt_instructions, | ||
323 | __smp_alt_instructions_end); | ||
324 | list_for_each_entry(mod, &smp_alt_modules, next) | 321 | list_for_each_entry(mod, &smp_alt_modules, next) |
325 | alternatives_smp_lock(mod->locks, mod->locks_end, | 322 | alternatives_smp_lock(mod->locks, mod->locks_end, |
326 | mod->text, mod->text_end); | 323 | mod->text, mod->text_end); |
@@ -328,8 +325,6 @@ void alternatives_smp_switch(int smp) | |||
328 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 325 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); |
329 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 326 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); |
330 | set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); | 327 | set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); |
331 | apply_alternatives(__smp_alt_instructions, | ||
332 | __smp_alt_instructions_end); | ||
333 | list_for_each_entry(mod, &smp_alt_modules, next) | 328 | list_for_each_entry(mod, &smp_alt_modules, next) |
334 | alternatives_smp_unlock(mod->locks, mod->locks_end, | 329 | alternatives_smp_unlock(mod->locks, mod->locks_end, |
335 | mod->text, mod->text_end); | 330 | mod->text, mod->text_end); |
@@ -340,36 +335,31 @@ void alternatives_smp_switch(int smp) | |||
340 | #endif | 335 | #endif |
341 | 336 | ||
342 | #ifdef CONFIG_PARAVIRT | 337 | #ifdef CONFIG_PARAVIRT |
343 | void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) | 338 | void apply_paravirt(struct paravirt_patch_site *start, |
339 | struct paravirt_patch_site *end) | ||
344 | { | 340 | { |
345 | struct paravirt_patch *p; | 341 | struct paravirt_patch_site *p; |
342 | |||
343 | if (noreplace_paravirt) | ||
344 | return; | ||
346 | 345 | ||
347 | for (p = start; p < end; p++) { | 346 | for (p = start; p < end; p++) { |
348 | unsigned int used; | 347 | unsigned int used; |
349 | 348 | ||
350 | used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, | 349 | used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, |
351 | p->len); | 350 | p->len); |
352 | #ifdef CONFIG_DEBUG_PARAVIRT | 351 | |
353 | { | 352 | BUG_ON(used > p->len); |
354 | int i; | 353 | |
355 | /* Deliberately clobber regs using "not %reg" to find bugs. */ | ||
356 | for (i = 0; i < 3; i++) { | ||
357 | if (p->len - used >= 2 && (p->clobbers & (1 << i))) { | ||
358 | memcpy(p->instr + used, "\xf7\xd0", 2); | ||
359 | p->instr[used+1] |= i; | ||
360 | used += 2; | ||
361 | } | ||
362 | } | ||
363 | } | ||
364 | #endif | ||
365 | /* Pad the rest with nops */ | 354 | /* Pad the rest with nops */ |
366 | nop_out(p->instr + used, p->len - used); | 355 | nop_out(p->instr + used, p->len - used); |
367 | } | 356 | } |
368 | 357 | ||
369 | /* Sync to be conservative, in case we patched following instructions */ | 358 | /* Sync to be conservative, in case we patched following |
359 | * instructions */ | ||
370 | sync_core(); | 360 | sync_core(); |
371 | } | 361 | } |
372 | extern struct paravirt_patch __start_parainstructions[], | 362 | extern struct paravirt_patch_site __start_parainstructions[], |
373 | __stop_parainstructions[]; | 363 | __stop_parainstructions[]; |
374 | #endif /* CONFIG_PARAVIRT */ | 364 | #endif /* CONFIG_PARAVIRT */ |
375 | 365 | ||
@@ -396,23 +386,19 @@ void __init alternative_instructions(void) | |||
396 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 386 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); |
397 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 387 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); |
398 | set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); | 388 | set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); |
399 | apply_alternatives(__smp_alt_instructions, | ||
400 | __smp_alt_instructions_end); | ||
401 | alternatives_smp_unlock(__smp_locks, __smp_locks_end, | 389 | alternatives_smp_unlock(__smp_locks, __smp_locks_end, |
402 | _text, _etext); | 390 | _text, _etext); |
403 | } | 391 | } |
404 | free_init_pages("SMP alternatives", | 392 | free_init_pages("SMP alternatives", |
405 | (unsigned long)__smp_alt_begin, | 393 | __pa_symbol(&__smp_locks), |
406 | (unsigned long)__smp_alt_end); | 394 | __pa_symbol(&__smp_locks_end)); |
407 | } else { | 395 | } else { |
408 | alternatives_smp_save(__smp_alt_instructions, | ||
409 | __smp_alt_instructions_end); | ||
410 | alternatives_smp_module_add(NULL, "core kernel", | 396 | alternatives_smp_module_add(NULL, "core kernel", |
411 | __smp_locks, __smp_locks_end, | 397 | __smp_locks, __smp_locks_end, |
412 | _text, _etext); | 398 | _text, _etext); |
413 | alternatives_smp_switch(0); | 399 | alternatives_smp_switch(0); |
414 | } | 400 | } |
415 | #endif | 401 | #endif |
416 | apply_paravirt(__start_parainstructions, __stop_parainstructions); | 402 | apply_paravirt(__parainstructions, __parainstructions_end); |
417 | local_irq_restore(flags); | 403 | local_irq_restore(flags); |
418 | } | 404 | } |
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 93aa911646a..aca054cc055 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -129,6 +129,28 @@ static int modern_apic(void) | |||
129 | return lapic_get_version() >= 0x14; | 129 | return lapic_get_version() >= 0x14; |
130 | } | 130 | } |
131 | 131 | ||
132 | void apic_wait_icr_idle(void) | ||
133 | { | ||
134 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | ||
135 | cpu_relax(); | ||
136 | } | ||
137 | |||
138 | unsigned long safe_apic_wait_icr_idle(void) | ||
139 | { | ||
140 | unsigned long send_status; | ||
141 | int timeout; | ||
142 | |||
143 | timeout = 0; | ||
144 | do { | ||
145 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
146 | if (!send_status) | ||
147 | break; | ||
148 | udelay(100); | ||
149 | } while (timeout++ < 1000); | ||
150 | |||
151 | return send_status; | ||
152 | } | ||
153 | |||
132 | /** | 154 | /** |
133 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | 155 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 |
134 | */ | 156 | */ |
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 064bbf2861f..367ff1d930c 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c | |||
@@ -233,11 +233,10 @@ | |||
233 | #include <asm/desc.h> | 233 | #include <asm/desc.h> |
234 | #include <asm/i8253.h> | 234 | #include <asm/i8253.h> |
235 | #include <asm/paravirt.h> | 235 | #include <asm/paravirt.h> |
236 | #include <asm/reboot.h> | ||
236 | 237 | ||
237 | #include "io_ports.h" | 238 | #include "io_ports.h" |
238 | 239 | ||
239 | extern void machine_real_restart(unsigned char *, int); | ||
240 | |||
241 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | 240 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) |
242 | extern int (*console_blank_hook)(int); | 241 | extern int (*console_blank_hook)(int); |
243 | #endif | 242 | #endif |
@@ -384,13 +383,6 @@ static int ignore_sys_suspend; | |||
384 | static int ignore_normal_resume; | 383 | static int ignore_normal_resume; |
385 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; | 384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; |
386 | 385 | ||
387 | #ifdef CONFIG_APM_RTC_IS_GMT | ||
388 | # define clock_cmos_diff 0 | ||
389 | # define got_clock_diff 1 | ||
390 | #else | ||
391 | static long clock_cmos_diff; | ||
392 | static int got_clock_diff; | ||
393 | #endif | ||
394 | static int debug __read_mostly; | 386 | static int debug __read_mostly; |
395 | static int smp __read_mostly; | 387 | static int smp __read_mostly; |
396 | static int apm_disabled = -1; | 388 | static int apm_disabled = -1; |
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index c37535163bf..27a776c9044 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -11,11 +11,11 @@ | |||
11 | #include <linux/suspend.h> | 11 | #include <linux/suspend.h> |
12 | #include <asm/ucontext.h> | 12 | #include <asm/ucontext.h> |
13 | #include "sigframe.h" | 13 | #include "sigframe.h" |
14 | #include <asm/pgtable.h> | ||
14 | #include <asm/fixmap.h> | 15 | #include <asm/fixmap.h> |
15 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
16 | #include <asm/thread_info.h> | 17 | #include <asm/thread_info.h> |
17 | #include <asm/elf.h> | 18 | #include <asm/elf.h> |
18 | #include <asm/pda.h> | ||
19 | 19 | ||
20 | #define DEFINE(sym, val) \ | 20 | #define DEFINE(sym, val) \ |
21 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | 21 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) |
@@ -25,6 +25,9 @@ | |||
25 | #define OFFSET(sym, str, mem) \ | 25 | #define OFFSET(sym, str, mem) \ |
26 | DEFINE(sym, offsetof(struct str, mem)); | 26 | DEFINE(sym, offsetof(struct str, mem)); |
27 | 27 | ||
28 | /* workaround for a warning with -Wmissing-prototypes */ | ||
29 | void foo(void); | ||
30 | |||
28 | void foo(void) | 31 | void foo(void) |
29 | { | 32 | { |
30 | OFFSET(SIGCONTEXT_eax, sigcontext, eax); | 33 | OFFSET(SIGCONTEXT_eax, sigcontext, eax); |
@@ -90,17 +93,18 @@ void foo(void) | |||
90 | OFFSET(pbe_next, pbe, next); | 93 | OFFSET(pbe_next, pbe, next); |
91 | 94 | ||
92 | /* Offset from the sysenter stack to tss.esp0 */ | 95 | /* Offset from the sysenter stack to tss.esp0 */ |
93 | DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) - | 96 | DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) - |
94 | sizeof(struct tss_struct)); | 97 | sizeof(struct tss_struct)); |
95 | 98 | ||
96 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 99 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
97 | DEFINE(VDSO_PRELINK, VDSO_PRELINK); | 100 | DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); |
101 | DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); | ||
102 | DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); | ||
103 | DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); | ||
98 | 104 | ||
99 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | 105 | DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK); |
100 | 106 | ||
101 | BLANK(); | 107 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); |
102 | OFFSET(PDA_cpu, i386_pda, cpu_number); | ||
103 | OFFSET(PDA_pcurrent, i386_pda, pcurrent); | ||
104 | 108 | ||
105 | #ifdef CONFIG_PARAVIRT | 109 | #ifdef CONFIG_PARAVIRT |
106 | BLANK(); | 110 | BLANK(); |
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 010aecfffbc..74f27a463db 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := common.o proc.o | 5 | obj-y := common.o proc.o bugs.o |
6 | 6 | ||
7 | obj-y += amd.o | 7 | obj-y += amd.o |
8 | obj-y += cyrix.o | 8 | obj-y += cyrix.o |
@@ -17,3 +17,5 @@ obj-$(CONFIG_X86_MCE) += mcheck/ | |||
17 | 17 | ||
18 | obj-$(CONFIG_MTRR) += mtrr/ | 18 | obj-$(CONFIG_MTRR) += mtrr/ |
19 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | 19 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ |
20 | |||
21 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | ||
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 2d47db48297..4fec702afd7 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c | |||
@@ -53,6 +53,8 @@ static __cpuinit int amd_apic_timer_broken(void) | |||
53 | return 0; | 53 | return 0; |
54 | } | 54 | } |
55 | 55 | ||
56 | int force_mwait __cpuinitdata; | ||
57 | |||
56 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 58 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
57 | { | 59 | { |
58 | u32 l, h; | 60 | u32 l, h; |
@@ -275,6 +277,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
275 | 277 | ||
276 | if (amd_apic_timer_broken()) | 278 | if (amd_apic_timer_broken()) |
277 | set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); | 279 | set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); |
280 | |||
281 | if (c->x86 == 0x10 && !force_mwait) | ||
282 | clear_bit(X86_FEATURE_MWAIT, c->x86_capability); | ||
278 | } | 283 | } |
279 | 284 | ||
280 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 285 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
@@ -314,13 +319,3 @@ int __init amd_init_cpu(void) | |||
314 | cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; | 319 | cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; |
315 | return 0; | 320 | return 0; |
316 | } | 321 | } |
317 | |||
318 | //early_arch_initcall(amd_init_cpu); | ||
319 | |||
320 | static int __init amd_exit_cpu(void) | ||
321 | { | ||
322 | cpu_devs[X86_VENDOR_AMD] = NULL; | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | late_initcall(amd_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/bugs.c b/arch/i386/kernel/cpu/bugs.c new file mode 100644 index 00000000000..54428a2500f --- /dev/null +++ b/arch/i386/kernel/cpu/bugs.c | |||
@@ -0,0 +1,191 @@ | |||
1 | /* | ||
2 | * arch/i386/cpu/bugs.c | ||
3 | * | ||
4 | * Copyright (C) 1994 Linus Torvalds | ||
5 | * | ||
6 | * Cyrix stuff, June 1998 by: | ||
7 | * - Rafael R. Reilova (moved everything from head.S), | ||
8 | * <rreilova@ececs.uc.edu> | ||
9 | * - Channing Corn (tests & fixes), | ||
10 | * - Andrew D. Balsa (code cleanup). | ||
11 | */ | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/utsname.h> | ||
14 | #include <asm/processor.h> | ||
15 | #include <asm/i387.h> | ||
16 | #include <asm/msr.h> | ||
17 | #include <asm/paravirt.h> | ||
18 | #include <asm/alternative.h> | ||
19 | |||
20 | static int __init no_halt(char *s) | ||
21 | { | ||
22 | boot_cpu_data.hlt_works_ok = 0; | ||
23 | return 1; | ||
24 | } | ||
25 | |||
26 | __setup("no-hlt", no_halt); | ||
27 | |||
28 | static int __init mca_pentium(char *s) | ||
29 | { | ||
30 | mca_pentium_flag = 1; | ||
31 | return 1; | ||
32 | } | ||
33 | |||
34 | __setup("mca-pentium", mca_pentium); | ||
35 | |||
36 | static int __init no_387(char *s) | ||
37 | { | ||
38 | boot_cpu_data.hard_math = 0; | ||
39 | write_cr0(0xE | read_cr0()); | ||
40 | return 1; | ||
41 | } | ||
42 | |||
43 | __setup("no387", no_387); | ||
44 | |||
45 | static double __initdata x = 4195835.0; | ||
46 | static double __initdata y = 3145727.0; | ||
47 | |||
48 | /* | ||
49 | * This used to check for exceptions.. | ||
50 | * However, it turns out that to support that, | ||
51 | * the XMM trap handlers basically had to | ||
52 | * be buggy. So let's have a correct XMM trap | ||
53 | * handler, and forget about printing out | ||
54 | * some status at boot. | ||
55 | * | ||
56 | * We should really only care about bugs here | ||
57 | * anyway. Not features. | ||
58 | */ | ||
59 | static void __init check_fpu(void) | ||
60 | { | ||
61 | if (!boot_cpu_data.hard_math) { | ||
62 | #ifndef CONFIG_MATH_EMULATION | ||
63 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); | ||
64 | printk(KERN_EMERG "Giving up.\n"); | ||
65 | for (;;) ; | ||
66 | #endif | ||
67 | return; | ||
68 | } | ||
69 | |||
70 | /* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ | ||
71 | /* Test for the divl bug.. */ | ||
72 | __asm__("fninit\n\t" | ||
73 | "fldl %1\n\t" | ||
74 | "fdivl %2\n\t" | ||
75 | "fmull %2\n\t" | ||
76 | "fldl %1\n\t" | ||
77 | "fsubp %%st,%%st(1)\n\t" | ||
78 | "fistpl %0\n\t" | ||
79 | "fwait\n\t" | ||
80 | "fninit" | ||
81 | : "=m" (*&boot_cpu_data.fdiv_bug) | ||
82 | : "m" (*&x), "m" (*&y)); | ||
83 | if (boot_cpu_data.fdiv_bug) | ||
84 | printk("Hmm, FPU with FDIV bug.\n"); | ||
85 | } | ||
86 | |||
87 | static void __init check_hlt(void) | ||
88 | { | ||
89 | if (paravirt_enabled()) | ||
90 | return; | ||
91 | |||
92 | printk(KERN_INFO "Checking 'hlt' instruction... "); | ||
93 | if (!boot_cpu_data.hlt_works_ok) { | ||
94 | printk("disabled\n"); | ||
95 | return; | ||
96 | } | ||
97 | halt(); | ||
98 | halt(); | ||
99 | halt(); | ||
100 | halt(); | ||
101 | printk("OK.\n"); | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Most 386 processors have a bug where a POPAD can lock the | ||
106 | * machine even from user space. | ||
107 | */ | ||
108 | |||
109 | static void __init check_popad(void) | ||
110 | { | ||
111 | #ifndef CONFIG_X86_POPAD_OK | ||
112 | int res, inp = (int) &res; | ||
113 | |||
114 | printk(KERN_INFO "Checking for popad bug... "); | ||
115 | __asm__ __volatile__( | ||
116 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " | ||
117 | : "=&a" (res) | ||
118 | : "d" (inp) | ||
119 | : "ecx", "edi" ); | ||
120 | /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ | ||
121 | if (res != 12345678) printk( "Buggy.\n" ); | ||
122 | else printk( "OK.\n" ); | ||
123 | #endif | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Check whether we are able to run this kernel safely on SMP. | ||
128 | * | ||
129 | * - In order to run on a i386, we need to be compiled for i386 | ||
130 | * (for due to lack of "invlpg" and working WP on a i386) | ||
131 | * - In order to run on anything without a TSC, we need to be | ||
132 | * compiled for a i486. | ||
133 | * - In order to support the local APIC on a buggy Pentium machine, | ||
134 | * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, | ||
135 | * which happens implicitly if compiled for a Pentium or lower | ||
136 | * (unless an advanced selection of CPU features is used) as an | ||
137 | * otherwise config implies a properly working local APIC without | ||
138 | * the need to do extra reads from the APIC. | ||
139 | */ | ||
140 | |||
141 | static void __init check_config(void) | ||
142 | { | ||
143 | /* | ||
144 | * We'd better not be a i386 if we're configured to use some | ||
145 | * i486+ only features! (WP works in supervisor mode and the | ||
146 | * new "invlpg" and "bswap" instructions) | ||
147 | */ | ||
148 | #if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) | ||
149 | if (boot_cpu_data.x86 == 3) | ||
150 | panic("Kernel requires i486+ for 'invlpg' and other features"); | ||
151 | #endif | ||
152 | |||
153 | /* | ||
154 | * If we configured ourselves for a TSC, we'd better have one! | ||
155 | */ | ||
156 | #ifdef CONFIG_X86_TSC | ||
157 | if (!cpu_has_tsc && !tsc_disable) | ||
158 | panic("Kernel compiled for Pentium+, requires TSC feature!"); | ||
159 | #endif | ||
160 | |||
161 | /* | ||
162 | * If we were told we had a good local APIC, check for buggy Pentia, | ||
163 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
164 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
165 | * Specification Update"). | ||
166 | */ | ||
167 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) | ||
168 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL | ||
169 | && cpu_has_apic | ||
170 | && boot_cpu_data.x86 == 5 | ||
171 | && boot_cpu_data.x86_model == 2 | ||
172 | && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) | ||
173 | panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); | ||
174 | #endif | ||
175 | } | ||
176 | |||
177 | |||
178 | void __init check_bugs(void) | ||
179 | { | ||
180 | identify_boot_cpu(); | ||
181 | #ifndef CONFIG_SMP | ||
182 | printk("CPU: "); | ||
183 | print_cpu_info(&boot_cpu_data); | ||
184 | #endif | ||
185 | check_config(); | ||
186 | check_fpu(); | ||
187 | check_hlt(); | ||
188 | check_popad(); | ||
189 | init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | ||
190 | alternative_instructions(); | ||
191 | } | ||
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index 8c25047975c..473eac883c7 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c | |||
@@ -469,13 +469,3 @@ int __init centaur_init_cpu(void) | |||
469 | cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; | 469 | cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; |
470 | return 0; | 470 | return 0; |
471 | } | 471 | } |
472 | |||
473 | //early_arch_initcall(centaur_init_cpu); | ||
474 | |||
475 | static int __init centaur_exit_cpu(void) | ||
476 | { | ||
477 | cpu_devs[X86_VENDOR_CENTAUR] = NULL; | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | late_initcall(centaur_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index dcbbd0a8bfc..794d593c47e 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -18,15 +18,37 @@ | |||
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | #include <mach_apic.h> | 19 | #include <mach_apic.h> |
20 | #endif | 20 | #endif |
21 | #include <asm/pda.h> | ||
22 | 21 | ||
23 | #include "cpu.h" | 22 | #include "cpu.h" |
24 | 23 | ||
25 | DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); | 24 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { |
26 | EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); | 25 | [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, |
26 | [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, | ||
27 | [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, | ||
28 | [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, | ||
29 | /* | ||
30 | * Segments used for calling PnP BIOS have byte granularity. | ||
31 | * They code segments and data segments have fixed 64k limits, | ||
32 | * the transfer segment sizes are set at run time. | ||
33 | */ | ||
34 | [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | ||
35 | [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ | ||
36 | [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ | ||
37 | [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ | ||
38 | [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ | ||
39 | /* | ||
40 | * The APM segments have byte granularity and their bases | ||
41 | * are set at run time. All have 64k limits. | ||
42 | */ | ||
43 | [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | ||
44 | /* 16-bit code */ | ||
45 | [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, | ||
46 | [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ | ||
27 | 47 | ||
28 | struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; | 48 | [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, |
29 | EXPORT_SYMBOL(_cpu_pda); | 49 | [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, |
50 | } }; | ||
51 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | ||
30 | 52 | ||
31 | static int cachesize_override __cpuinitdata = -1; | 53 | static int cachesize_override __cpuinitdata = -1; |
32 | static int disable_x86_fxsr __cpuinitdata; | 54 | static int disable_x86_fxsr __cpuinitdata; |
@@ -368,7 +390,7 @@ __setup("serialnumber", x86_serial_nr_setup); | |||
368 | /* | 390 | /* |
369 | * This does the hard work of actually picking apart the CPU stuff... | 391 | * This does the hard work of actually picking apart the CPU stuff... |
370 | */ | 392 | */ |
371 | void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | 393 | static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) |
372 | { | 394 | { |
373 | int i; | 395 | int i; |
374 | 396 | ||
@@ -479,15 +501,22 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
479 | 501 | ||
480 | /* Init Machine Check Exception if available. */ | 502 | /* Init Machine Check Exception if available. */ |
481 | mcheck_init(c); | 503 | mcheck_init(c); |
504 | } | ||
482 | 505 | ||
483 | if (c == &boot_cpu_data) | 506 | void __init identify_boot_cpu(void) |
484 | sysenter_setup(); | 507 | { |
508 | identify_cpu(&boot_cpu_data); | ||
509 | sysenter_setup(); | ||
485 | enable_sep_cpu(); | 510 | enable_sep_cpu(); |
511 | mtrr_bp_init(); | ||
512 | } | ||
486 | 513 | ||
487 | if (c == &boot_cpu_data) | 514 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
488 | mtrr_bp_init(); | 515 | { |
489 | else | 516 | BUG_ON(c == &boot_cpu_data); |
490 | mtrr_ap_init(); | 517 | identify_cpu(c); |
518 | enable_sep_cpu(); | ||
519 | mtrr_ap_init(); | ||
491 | } | 520 | } |
492 | 521 | ||
493 | #ifdef CONFIG_X86_HT | 522 | #ifdef CONFIG_X86_HT |
@@ -601,129 +630,36 @@ void __init early_cpu_init(void) | |||
601 | #endif | 630 | #endif |
602 | } | 631 | } |
603 | 632 | ||
604 | /* Make sure %gs is initialized properly in idle threads */ | 633 | /* Make sure %fs is initialized properly in idle threads */ |
605 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | 634 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) |
606 | { | 635 | { |
607 | memset(regs, 0, sizeof(struct pt_regs)); | 636 | memset(regs, 0, sizeof(struct pt_regs)); |
608 | regs->xfs = __KERNEL_PDA; | 637 | regs->xfs = __KERNEL_PERCPU; |
609 | return regs; | 638 | return regs; |
610 | } | 639 | } |
611 | 640 | ||
612 | static __cpuinit int alloc_gdt(int cpu) | 641 | /* Current gdt points %fs at the "master" per-cpu area: after this, |
642 | * it's on the real one. */ | ||
643 | void switch_to_new_gdt(void) | ||
613 | { | 644 | { |
614 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | 645 | struct Xgt_desc_struct gdt_descr; |
615 | struct desc_struct *gdt; | ||
616 | struct i386_pda *pda; | ||
617 | |||
618 | gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
619 | pda = cpu_pda(cpu); | ||
620 | |||
621 | /* | ||
622 | * This is a horrible hack to allocate the GDT. The problem | ||
623 | * is that cpu_init() is called really early for the boot CPU | ||
624 | * (and hence needs bootmem) but much later for the secondary | ||
625 | * CPUs, when bootmem will have gone away | ||
626 | */ | ||
627 | if (NODE_DATA(0)->bdata->node_bootmem_map) { | ||
628 | BUG_ON(gdt != NULL || pda != NULL); | ||
629 | |||
630 | gdt = alloc_bootmem_pages(PAGE_SIZE); | ||
631 | pda = alloc_bootmem(sizeof(*pda)); | ||
632 | /* alloc_bootmem(_pages) panics on failure, so no check */ | ||
633 | |||
634 | memset(gdt, 0, PAGE_SIZE); | ||
635 | memset(pda, 0, sizeof(*pda)); | ||
636 | } else { | ||
637 | /* GDT and PDA might already have been allocated if | ||
638 | this is a CPU hotplug re-insertion. */ | ||
639 | if (gdt == NULL) | ||
640 | gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); | ||
641 | |||
642 | if (pda == NULL) | ||
643 | pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); | ||
644 | |||
645 | if (unlikely(!gdt || !pda)) { | ||
646 | free_pages((unsigned long)gdt, 0); | ||
647 | kfree(pda); | ||
648 | return 0; | ||
649 | } | ||
650 | } | ||
651 | |||
652 | cpu_gdt_descr->address = (unsigned long)gdt; | ||
653 | cpu_pda(cpu) = pda; | ||
654 | |||
655 | return 1; | ||
656 | } | ||
657 | 646 | ||
658 | /* Initial PDA used by boot CPU */ | 647 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); |
659 | struct i386_pda boot_pda = { | 648 | gdt_descr.size = GDT_SIZE - 1; |
660 | ._pda = &boot_pda, | 649 | load_gdt(&gdt_descr); |
661 | .cpu_number = 0, | 650 | asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); |
662 | .pcurrent = &init_task, | ||
663 | }; | ||
664 | |||
665 | static inline void set_kernel_fs(void) | ||
666 | { | ||
667 | /* Set %fs for this CPU's PDA. Memory clobber is to create a | ||
668 | barrier with respect to any PDA operations, so the compiler | ||
669 | doesn't move any before here. */ | ||
670 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); | ||
671 | } | 651 | } |
672 | 652 | ||
673 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for | 653 | /* |
674 | itself, but secondaries find this done for them. */ | 654 | * cpu_init() initializes state that is per-CPU. Some data is already |
675 | __cpuinit int init_gdt(int cpu, struct task_struct *idle) | 655 | * initialized (naturally) in the bootstrap process, such as the GDT |
676 | { | 656 | * and IDT. We reload them nevertheless, this function acts as a |
677 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | 657 | * 'CPU state barrier', nothing should get across. |
678 | struct desc_struct *gdt; | 658 | */ |
679 | struct i386_pda *pda; | 659 | void __cpuinit cpu_init(void) |
680 | |||
681 | /* For non-boot CPUs, the GDT and PDA should already have been | ||
682 | allocated. */ | ||
683 | if (!alloc_gdt(cpu)) { | ||
684 | printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); | ||
685 | return 0; | ||
686 | } | ||
687 | |||
688 | gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
689 | pda = cpu_pda(cpu); | ||
690 | |||
691 | BUG_ON(gdt == NULL || pda == NULL); | ||
692 | |||
693 | /* | ||
694 | * Initialize the per-CPU GDT with the boot GDT, | ||
695 | * and set up the GDT descriptor: | ||
696 | */ | ||
697 | memcpy(gdt, cpu_gdt_table, GDT_SIZE); | ||
698 | cpu_gdt_descr->size = GDT_SIZE - 1; | ||
699 | |||
700 | pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, | ||
701 | (u32 *)&gdt[GDT_ENTRY_PDA].b, | ||
702 | (unsigned long)pda, sizeof(*pda) - 1, | ||
703 | 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ | ||
704 | |||
705 | memset(pda, 0, sizeof(*pda)); | ||
706 | pda->_pda = pda; | ||
707 | pda->cpu_number = cpu; | ||
708 | pda->pcurrent = idle; | ||
709 | |||
710 | return 1; | ||
711 | } | ||
712 | |||
713 | void __cpuinit cpu_set_gdt(int cpu) | ||
714 | { | ||
715 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
716 | |||
717 | /* Reinit these anyway, even if they've already been done (on | ||
718 | the boot CPU, this will transition from the boot gdt+pda to | ||
719 | the real ones). */ | ||
720 | load_gdt(cpu_gdt_descr); | ||
721 | set_kernel_fs(); | ||
722 | } | ||
723 | |||
724 | /* Common CPU init for both boot and secondary CPUs */ | ||
725 | static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | ||
726 | { | 660 | { |
661 | int cpu = smp_processor_id(); | ||
662 | struct task_struct *curr = current; | ||
727 | struct tss_struct * t = &per_cpu(init_tss, cpu); | 663 | struct tss_struct * t = &per_cpu(init_tss, cpu); |
728 | struct thread_struct *thread = &curr->thread; | 664 | struct thread_struct *thread = &curr->thread; |
729 | 665 | ||
@@ -744,6 +680,7 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | |||
744 | } | 680 | } |
745 | 681 | ||
746 | load_idt(&idt_descr); | 682 | load_idt(&idt_descr); |
683 | switch_to_new_gdt(); | ||
747 | 684 | ||
748 | /* | 685 | /* |
749 | * Set up and load the per-CPU TSS and LDT | 686 | * Set up and load the per-CPU TSS and LDT |
@@ -783,38 +720,6 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | |||
783 | mxcsr_feature_mask_init(); | 720 | mxcsr_feature_mask_init(); |
784 | } | 721 | } |
785 | 722 | ||
786 | /* Entrypoint to initialize secondary CPU */ | ||
787 | void __cpuinit secondary_cpu_init(void) | ||
788 | { | ||
789 | int cpu = smp_processor_id(); | ||
790 | struct task_struct *curr = current; | ||
791 | |||
792 | _cpu_init(cpu, curr); | ||
793 | } | ||
794 | |||
795 | /* | ||
796 | * cpu_init() initializes state that is per-CPU. Some data is already | ||
797 | * initialized (naturally) in the bootstrap process, such as the GDT | ||
798 | * and IDT. We reload them nevertheless, this function acts as a | ||
799 | * 'CPU state barrier', nothing should get across. | ||
800 | */ | ||
801 | void __cpuinit cpu_init(void) | ||
802 | { | ||
803 | int cpu = smp_processor_id(); | ||
804 | struct task_struct *curr = current; | ||
805 | |||
806 | /* Set up the real GDT and PDA, so we can transition from the | ||
807 | boot versions. */ | ||
808 | if (!init_gdt(cpu, curr)) { | ||
809 | /* failed to allocate something; not much we can do... */ | ||
810 | for (;;) | ||
811 | local_irq_enable(); | ||
812 | } | ||
813 | |||
814 | cpu_set_gdt(cpu); | ||
815 | _cpu_init(cpu, curr); | ||
816 | } | ||
817 | |||
818 | #ifdef CONFIG_HOTPLUG_CPU | 723 | #ifdef CONFIG_HOTPLUG_CPU |
819 | void __cpuinit cpu_uninit(void) | 724 | void __cpuinit cpu_uninit(void) |
820 | { | 725 | { |
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index de27bd07bc9..0b8411a864f 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -279,7 +279,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
279 | */ | 279 | */ |
280 | if (vendor == PCI_VENDOR_ID_CYRIX && | 280 | if (vendor == PCI_VENDOR_ID_CYRIX && |
281 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) | 281 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) |
282 | pit_latch_buggy = 1; | 282 | mark_tsc_unstable("cyrix 5510/5520 detected"); |
283 | } | 283 | } |
284 | #endif | 284 | #endif |
285 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ | 285 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ |
@@ -448,16 +448,6 @@ int __init cyrix_init_cpu(void) | |||
448 | return 0; | 448 | return 0; |
449 | } | 449 | } |
450 | 450 | ||
451 | //early_arch_initcall(cyrix_init_cpu); | ||
452 | |||
453 | static int __init cyrix_exit_cpu(void) | ||
454 | { | ||
455 | cpu_devs[X86_VENDOR_CYRIX] = NULL; | ||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | late_initcall(cyrix_exit_cpu); | ||
460 | |||
461 | static struct cpu_dev nsc_cpu_dev __cpuinitdata = { | 451 | static struct cpu_dev nsc_cpu_dev __cpuinitdata = { |
462 | .c_vendor = "NSC", | 452 | .c_vendor = "NSC", |
463 | .c_ident = { "Geode by NSC" }, | 453 | .c_ident = { "Geode by NSC" }, |
@@ -470,12 +460,3 @@ int __init nsc_init_cpu(void) | |||
470 | return 0; | 460 | return 0; |
471 | } | 461 | } |
472 | 462 | ||
473 | //early_arch_initcall(nsc_init_cpu); | ||
474 | |||
475 | static int __init nsc_exit_cpu(void) | ||
476 | { | ||
477 | cpu_devs[X86_VENDOR_NSC] = NULL; | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | late_initcall(nsc_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 56fe2658495..dc4e08147b1 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c | |||
@@ -188,8 +188,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
188 | } | 188 | } |
189 | #endif | 189 | #endif |
190 | 190 | ||
191 | if (c->x86 == 15) | 191 | if (c->x86 == 15) { |
192 | set_bit(X86_FEATURE_P4, c->x86_capability); | 192 | set_bit(X86_FEATURE_P4, c->x86_capability); |
193 | set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); | ||
194 | } | ||
193 | if (c->x86 == 6) | 195 | if (c->x86 == 6) |
194 | set_bit(X86_FEATURE_P3, c->x86_capability); | 196 | set_bit(X86_FEATURE_P3, c->x86_capability); |
195 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 197 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c index b0862af595a..f9fa4142551 100644 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ b/arch/i386/kernel/cpu/mcheck/k7.c | |||
@@ -75,6 +75,9 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) | |||
75 | machine_check_vector = k7_machine_check; | 75 | machine_check_vector = k7_machine_check; |
76 | wmb(); | 76 | wmb(); |
77 | 77 | ||
78 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
79 | return; | ||
80 | |||
78 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | 81 | printk (KERN_INFO "Intel machine check architecture supported.\n"); |
79 | rdmsr (MSR_IA32_MCG_CAP, l, h); | 82 | rdmsr (MSR_IA32_MCG_CAP, l, h); |
80 | if (l & (1<<8)) /* Control register present ? */ | 83 | if (l & (1<<8)) /* Control register present ? */ |
@@ -82,9 +85,13 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) | |||
82 | nr_mce_banks = l & 0xff; | 85 | nr_mce_banks = l & 0xff; |
83 | 86 | ||
84 | /* Clear status for MC index 0 separately, we don't touch CTL, | 87 | /* Clear status for MC index 0 separately, we don't touch CTL, |
85 | * as some Athlons cause spurious MCEs when its enabled. */ | 88 | * as some K7 Athlons cause spurious MCEs when its enabled. */ |
86 | wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); | 89 | if (boot_cpu_data.x86 == 6) { |
87 | for (i=1; i<nr_mce_banks; i++) { | 90 | wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); |
91 | i = 1; | ||
92 | } else | ||
93 | i = 0; | ||
94 | for (; i<nr_mce_banks; i++) { | ||
88 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | 95 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); |
89 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | 96 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); |
90 | } | 97 | } |
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 4f10c62d180..56cd485b127 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c | |||
@@ -38,8 +38,7 @@ void mcheck_init(struct cpuinfo_x86 *c) | |||
38 | 38 | ||
39 | switch (c->x86_vendor) { | 39 | switch (c->x86_vendor) { |
40 | case X86_VENDOR_AMD: | 40 | case X86_VENDOR_AMD: |
41 | if (c->x86==6 || c->x86==15) | 41 | amd_mcheck_init(c); |
42 | amd_mcheck_init(c); | ||
43 | break; | 42 | break; |
44 | 43 | ||
45 | case X86_VENDOR_INTEL: | 44 | case X86_VENDOR_INTEL: |
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 504434a4601..1509edfb231 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c | |||
@@ -124,13 +124,10 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
124 | 124 | ||
125 | 125 | ||
126 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | 126 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
127 | static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | 127 | static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
128 | { | 128 | { |
129 | u32 h; | 129 | u32 h; |
130 | 130 | ||
131 | if (mce_num_extended_msrs == 0) | ||
132 | goto done; | ||
133 | |||
134 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); | 131 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); |
135 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); | 132 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); |
136 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); | 133 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); |
@@ -141,12 +138,6 @@ static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | |||
141 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); | 138 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); |
142 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); | 139 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); |
143 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); | 140 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); |
144 | |||
145 | /* can we rely on kmalloc to do a dynamic | ||
146 | * allocation for the reserved registers? | ||
147 | */ | ||
148 | done: | ||
149 | return mce_num_extended_msrs; | ||
150 | } | 141 | } |
151 | 142 | ||
152 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | 143 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) |
@@ -155,7 +146,6 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | |||
155 | u32 alow, ahigh, high, low; | 146 | u32 alow, ahigh, high, low; |
156 | u32 mcgstl, mcgsth; | 147 | u32 mcgstl, mcgsth; |
157 | int i; | 148 | int i; |
158 | struct intel_mce_extended_msrs dbg; | ||
159 | 149 | ||
160 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 150 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
161 | if (mcgstl & (1<<0)) /* Recoverable ? */ | 151 | if (mcgstl & (1<<0)) /* Recoverable ? */ |
@@ -164,7 +154,9 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | |||
164 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | 154 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", |
165 | smp_processor_id(), mcgsth, mcgstl); | 155 | smp_processor_id(), mcgsth, mcgstl); |
166 | 156 | ||
167 | if (intel_get_extended_msrs(&dbg)) { | 157 | if (mce_num_extended_msrs > 0) { |
158 | struct intel_mce_extended_msrs dbg; | ||
159 | intel_get_extended_msrs(&dbg); | ||
168 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", | 160 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", |
169 | smp_processor_id(), dbg.eip, dbg.eflags); | 161 | smp_processor_id(), dbg.eip, dbg.eflags); |
170 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", | 162 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", |
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index f77fc53db65..5367e32e040 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c | |||
@@ -20,13 +20,25 @@ struct mtrr_state { | |||
20 | mtrr_type def_type; | 20 | mtrr_type def_type; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | struct fixed_range_block { | ||
24 | int base_msr; /* start address of an MTRR block */ | ||
25 | int ranges; /* number of MTRRs in this block */ | ||
26 | }; | ||
27 | |||
28 | static struct fixed_range_block fixed_range_blocks[] = { | ||
29 | { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ | ||
30 | { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ | ||
31 | { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ | ||
32 | {} | ||
33 | }; | ||
34 | |||
23 | static unsigned long smp_changes_mask; | 35 | static unsigned long smp_changes_mask; |
24 | static struct mtrr_state mtrr_state = {}; | 36 | static struct mtrr_state mtrr_state = {}; |
25 | 37 | ||
26 | #undef MODULE_PARAM_PREFIX | 38 | #undef MODULE_PARAM_PREFIX |
27 | #define MODULE_PARAM_PREFIX "mtrr." | 39 | #define MODULE_PARAM_PREFIX "mtrr." |
28 | 40 | ||
29 | static __initdata int mtrr_show; | 41 | static int mtrr_show; |
30 | module_param_named(show, mtrr_show, bool, 0); | 42 | module_param_named(show, mtrr_show, bool, 0); |
31 | 43 | ||
32 | /* Get the MSR pair relating to a var range */ | 44 | /* Get the MSR pair relating to a var range */ |
@@ -37,7 +49,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | |||
37 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | 49 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); |
38 | } | 50 | } |
39 | 51 | ||
40 | static void __init | 52 | static void |
41 | get_fixed_ranges(mtrr_type * frs) | 53 | get_fixed_ranges(mtrr_type * frs) |
42 | { | 54 | { |
43 | unsigned int *p = (unsigned int *) frs; | 55 | unsigned int *p = (unsigned int *) frs; |
@@ -51,12 +63,18 @@ get_fixed_ranges(mtrr_type * frs) | |||
51 | rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); | 63 | rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); |
52 | } | 64 | } |
53 | 65 | ||
54 | static void __init print_fixed(unsigned base, unsigned step, const mtrr_type*types) | 66 | void mtrr_save_fixed_ranges(void *info) |
67 | { | ||
68 | get_fixed_ranges(mtrr_state.fixed_ranges); | ||
69 | } | ||
70 | |||
71 | static void __cpuinit print_fixed(unsigned base, unsigned step, const mtrr_type*types) | ||
55 | { | 72 | { |
56 | unsigned i; | 73 | unsigned i; |
57 | 74 | ||
58 | for (i = 0; i < 8; ++i, ++types, base += step) | 75 | for (i = 0; i < 8; ++i, ++types, base += step) |
59 | printk(KERN_INFO "MTRR %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types)); | 76 | printk(KERN_INFO "MTRR %05X-%05X %s\n", |
77 | base, base + step - 1, mtrr_attrib_to_str(*types)); | ||
60 | } | 78 | } |
61 | 79 | ||
62 | /* Grab all of the MTRR state for this CPU into *state */ | 80 | /* Grab all of the MTRR state for this CPU into *state */ |
@@ -147,6 +165,44 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) | |||
147 | smp_processor_id(), msr, a, b); | 165 | smp_processor_id(), msr, a, b); |
148 | } | 166 | } |
149 | 167 | ||
168 | /** | ||
169 | * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs | ||
170 | * see AMD publication no. 24593, chapter 3.2.1 for more information | ||
171 | */ | ||
172 | static inline void k8_enable_fixed_iorrs(void) | ||
173 | { | ||
174 | unsigned lo, hi; | ||
175 | |||
176 | rdmsr(MSR_K8_SYSCFG, lo, hi); | ||
177 | mtrr_wrmsr(MSR_K8_SYSCFG, lo | ||
178 | | K8_MTRRFIXRANGE_DRAM_ENABLE | ||
179 | | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * Checks and updates an fixed-range MTRR if it differs from the value it | ||
184 | * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. | ||
185 | * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information | ||
186 | * \param msr MSR address of the MTTR which should be checked and updated | ||
187 | * \param changed pointer which indicates whether the MTRR needed to be changed | ||
188 | * \param msrwords pointer to the MSR values which the MSR should have | ||
189 | */ | ||
190 | static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) | ||
191 | { | ||
192 | unsigned lo, hi; | ||
193 | |||
194 | rdmsr(msr, lo, hi); | ||
195 | |||
196 | if (lo != msrwords[0] || hi != msrwords[1]) { | ||
197 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
198 | boot_cpu_data.x86 == 15 && | ||
199 | ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) | ||
200 | k8_enable_fixed_iorrs(); | ||
201 | mtrr_wrmsr(msr, msrwords[0], msrwords[1]); | ||
202 | *changed = TRUE; | ||
203 | } | ||
204 | } | ||
205 | |||
150 | int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 206 | int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) |
151 | /* [SUMMARY] Get a free MTRR. | 207 | /* [SUMMARY] Get a free MTRR. |
152 | <base> The starting (base) address of the region. | 208 | <base> The starting (base) address of the region. |
@@ -196,36 +252,21 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
196 | *type = base_lo & 0xff; | 252 | *type = base_lo & 0xff; |
197 | } | 253 | } |
198 | 254 | ||
255 | /** | ||
256 | * Checks and updates the fixed-range MTRRs if they differ from the saved set | ||
257 | * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() | ||
258 | */ | ||
199 | static int set_fixed_ranges(mtrr_type * frs) | 259 | static int set_fixed_ranges(mtrr_type * frs) |
200 | { | 260 | { |
201 | unsigned int *p = (unsigned int *) frs; | 261 | unsigned long long *saved = (unsigned long long *) frs; |
202 | int changed = FALSE; | 262 | int changed = FALSE; |
203 | int i; | 263 | int block=-1, range; |
204 | unsigned int lo, hi; | ||
205 | 264 | ||
206 | rdmsr(MTRRfix64K_00000_MSR, lo, hi); | 265 | while (fixed_range_blocks[++block].ranges) |
207 | if (p[0] != lo || p[1] != hi) { | 266 | for (range=0; range < fixed_range_blocks[block].ranges; range++) |
208 | mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]); | 267 | set_fixed_range(fixed_range_blocks[block].base_msr + range, |
209 | changed = TRUE; | 268 | &changed, (unsigned int *) saved++); |
210 | } | ||
211 | 269 | ||
212 | for (i = 0; i < 2; i++) { | ||
213 | rdmsr(MTRRfix16K_80000_MSR + i, lo, hi); | ||
214 | if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) { | ||
215 | mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], | ||
216 | p[3 + i * 2]); | ||
217 | changed = TRUE; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | for (i = 0; i < 8; i++) { | ||
222 | rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi); | ||
223 | if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) { | ||
224 | mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], | ||
225 | p[7 + i * 2]); | ||
226 | changed = TRUE; | ||
227 | } | ||
228 | } | ||
229 | return changed; | 270 | return changed; |
230 | } | 271 | } |
231 | 272 | ||
@@ -428,7 +469,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i | |||
428 | } | 469 | } |
429 | } | 470 | } |
430 | 471 | ||
431 | if (base + size < 0x100) { | 472 | if (base < 0x100) { |
432 | printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n", | 473 | printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n", |
433 | base, size); | 474 | base, size); |
434 | return -EINVAL; | 475 | return -EINVAL; |
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 0acfb6a5a22..02a2f39e5e0 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c | |||
@@ -729,6 +729,17 @@ void mtrr_ap_init(void) | |||
729 | local_irq_restore(flags); | 729 | local_irq_restore(flags); |
730 | } | 730 | } |
731 | 731 | ||
732 | /** | ||
733 | * Save current fixed-range MTRR state of the BSP | ||
734 | */ | ||
735 | void mtrr_save_state(void) | ||
736 | { | ||
737 | if (smp_processor_id() == 0) | ||
738 | mtrr_save_fixed_ranges(NULL); | ||
739 | else | ||
740 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); | ||
741 | } | ||
742 | |||
732 | static int __init mtrr_init_finialize(void) | 743 | static int __init mtrr_init_finialize(void) |
733 | { | 744 | { |
734 | if (!mtrr_if) | 745 | if (!mtrr_if) |
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index 8bf23cc80c6..961fbe1a748 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c | |||
@@ -58,13 +58,3 @@ int __init nexgen_init_cpu(void) | |||
58 | cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; | 58 | cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; |
59 | return 0; | 59 | return 0; |
60 | } | 60 | } |
61 | |||
62 | //early_arch_initcall(nexgen_init_cpu); | ||
63 | |||
64 | static int __init nexgen_exit_cpu(void) | ||
65 | { | ||
66 | cpu_devs[X86_VENDOR_NEXGEN] = NULL; | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | late_initcall(nexgen_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c new file mode 100644 index 00000000000..2b04c8f1db6 --- /dev/null +++ b/arch/i386/kernel/cpu/perfctr-watchdog.c | |||
@@ -0,0 +1,658 @@ | |||
1 | /* local apic based NMI watchdog for various CPUs. | ||
2 | This file also handles reservation of performance counters for coordination | ||
3 | with other users (like oprofile). | ||
4 | |||
5 | Note that these events normally don't tick when the CPU idles. This means | ||
6 | the frequency varies with CPU load. | ||
7 | |||
8 | Original code for K7/P6 written by Keith Owens */ | ||
9 | |||
10 | #include <linux/percpu.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/bitops.h> | ||
14 | #include <linux/smp.h> | ||
15 | #include <linux/nmi.h> | ||
16 | #include <asm/apic.h> | ||
17 | #include <asm/intel_arch_perfmon.h> | ||
18 | |||
19 | struct nmi_watchdog_ctlblk { | ||
20 | unsigned int cccr_msr; | ||
21 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
22 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
23 | }; | ||
24 | |||
25 | /* Interface defining a CPU specific perfctr watchdog */ | ||
26 | struct wd_ops { | ||
27 | int (*reserve)(void); | ||
28 | void (*unreserve)(void); | ||
29 | int (*setup)(unsigned nmi_hz); | ||
30 | void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); | ||
31 | void (*stop)(void *); | ||
32 | unsigned perfctr; | ||
33 | unsigned evntsel; | ||
34 | u64 checkbit; | ||
35 | }; | ||
36 | |||
37 | static struct wd_ops *wd_ops; | ||
38 | |||
39 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | ||
40 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | ||
41 | */ | ||
42 | #define NMI_MAX_COUNTER_BITS 66 | ||
43 | |||
44 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: | ||
45 | * evtsel_nmi_owner tracks the ownership of the event selection | ||
46 | * - different performance counters/ event selection may be reserved for | ||
47 | * different subsystems this reservation system just tries to coordinate | ||
48 | * things a little | ||
49 | */ | ||
50 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); | ||
51 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); | ||
52 | |||
53 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
54 | |||
55 | /* converts an msr to an appropriate reservation bit */ | ||
56 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | ||
57 | { | ||
58 | return wd_ops ? msr - wd_ops->perfctr : 0; | ||
59 | } | ||
60 | |||
61 | /* converts an msr to an appropriate reservation bit */ | ||
62 | /* returns the bit offset of the event selection register */ | ||
63 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | ||
64 | { | ||
65 | return wd_ops ? msr - wd_ops->evntsel : 0; | ||
66 | } | ||
67 | |||
68 | /* checks for a bit availability (hack for oprofile) */ | ||
69 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | ||
70 | { | ||
71 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
72 | |||
73 | return (!test_bit(counter, perfctr_nmi_owner)); | ||
74 | } | ||
75 | |||
76 | /* checks the an msr for availability */ | ||
77 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
78 | { | ||
79 | unsigned int counter; | ||
80 | |||
81 | counter = nmi_perfctr_msr_to_bit(msr); | ||
82 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
83 | |||
84 | return (!test_bit(counter, perfctr_nmi_owner)); | ||
85 | } | ||
86 | |||
87 | int reserve_perfctr_nmi(unsigned int msr) | ||
88 | { | ||
89 | unsigned int counter; | ||
90 | |||
91 | counter = nmi_perfctr_msr_to_bit(msr); | ||
92 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
93 | |||
94 | if (!test_and_set_bit(counter, perfctr_nmi_owner)) | ||
95 | return 1; | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | void release_perfctr_nmi(unsigned int msr) | ||
100 | { | ||
101 | unsigned int counter; | ||
102 | |||
103 | counter = nmi_perfctr_msr_to_bit(msr); | ||
104 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
105 | |||
106 | clear_bit(counter, perfctr_nmi_owner); | ||
107 | } | ||
108 | |||
109 | int reserve_evntsel_nmi(unsigned int msr) | ||
110 | { | ||
111 | unsigned int counter; | ||
112 | |||
113 | counter = nmi_evntsel_msr_to_bit(msr); | ||
114 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
115 | |||
116 | if (!test_and_set_bit(counter, evntsel_nmi_owner)) | ||
117 | return 1; | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | void release_evntsel_nmi(unsigned int msr) | ||
122 | { | ||
123 | unsigned int counter; | ||
124 | |||
125 | counter = nmi_evntsel_msr_to_bit(msr); | ||
126 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
127 | |||
128 | clear_bit(counter, evntsel_nmi_owner); | ||
129 | } | ||
130 | |||
131 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | ||
132 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | ||
133 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
134 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
135 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
136 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
137 | |||
138 | void disable_lapic_nmi_watchdog(void) | ||
139 | { | ||
140 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
141 | |||
142 | if (atomic_read(&nmi_active) <= 0) | ||
143 | return; | ||
144 | |||
145 | on_each_cpu(wd_ops->stop, NULL, 0, 1); | ||
146 | wd_ops->unreserve(); | ||
147 | |||
148 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
149 | } | ||
150 | |||
151 | void enable_lapic_nmi_watchdog(void) | ||
152 | { | ||
153 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
154 | |||
155 | /* are we already enabled */ | ||
156 | if (atomic_read(&nmi_active) != 0) | ||
157 | return; | ||
158 | |||
159 | /* are we lapic aware */ | ||
160 | if (!wd_ops) | ||
161 | return; | ||
162 | if (!wd_ops->reserve()) { | ||
163 | printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | ||
168 | touch_nmi_watchdog(); | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Activate the NMI watchdog via the local APIC. | ||
173 | */ | ||
174 | |||
175 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
176 | { | ||
177 | u64 counter_val; | ||
178 | unsigned int retval = hz; | ||
179 | |||
180 | /* | ||
181 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
182 | * are writable, with higher bits sign extending from bit 31. | ||
183 | * So, we can only program the counter with 31 bit values and | ||
184 | * 32nd bit should be 1, for 33.. to be 1. | ||
185 | * Find the appropriate nmi_hz | ||
186 | */ | ||
187 | counter_val = (u64)cpu_khz * 1000; | ||
188 | do_div(counter_val, retval); | ||
189 | if (counter_val > 0x7fffffffULL) { | ||
190 | u64 count = (u64)cpu_khz * 1000; | ||
191 | do_div(count, 0x7fffffffUL); | ||
192 | retval = count + 1; | ||
193 | } | ||
194 | return retval; | ||
195 | } | ||
196 | |||
197 | static void | ||
198 | write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) | ||
199 | { | ||
200 | u64 count = (u64)cpu_khz * 1000; | ||
201 | |||
202 | do_div(count, nmi_hz); | ||
203 | if(descr) | ||
204 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
205 | wrmsrl(perfctr_msr, 0 - count); | ||
206 | } | ||
207 | |||
208 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
209 | const char *descr, unsigned nmi_hz) | ||
210 | { | ||
211 | u64 count = (u64)cpu_khz * 1000; | ||
212 | |||
213 | do_div(count, nmi_hz); | ||
214 | if(descr) | ||
215 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
216 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
217 | } | ||
218 | |||
219 | /* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface | ||
220 | nicely stable so there is not much variety */ | ||
221 | |||
222 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
223 | #define K7_EVNTSEL_INT (1 << 20) | ||
224 | #define K7_EVNTSEL_OS (1 << 17) | ||
225 | #define K7_EVNTSEL_USR (1 << 16) | ||
226 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
227 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
228 | |||
229 | static int setup_k7_watchdog(unsigned nmi_hz) | ||
230 | { | ||
231 | unsigned int perfctr_msr, evntsel_msr; | ||
232 | unsigned int evntsel; | ||
233 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
234 | |||
235 | perfctr_msr = MSR_K7_PERFCTR0; | ||
236 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
237 | |||
238 | wrmsrl(perfctr_msr, 0UL); | ||
239 | |||
240 | evntsel = K7_EVNTSEL_INT | ||
241 | | K7_EVNTSEL_OS | ||
242 | | K7_EVNTSEL_USR | ||
243 | | K7_NMI_EVENT; | ||
244 | |||
245 | /* setup the timer */ | ||
246 | wrmsr(evntsel_msr, evntsel, 0); | ||
247 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); | ||
248 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
249 | evntsel |= K7_EVNTSEL_ENABLE; | ||
250 | wrmsr(evntsel_msr, evntsel, 0); | ||
251 | |||
252 | wd->perfctr_msr = perfctr_msr; | ||
253 | wd->evntsel_msr = evntsel_msr; | ||
254 | wd->cccr_msr = 0; //unused | ||
255 | return 1; | ||
256 | } | ||
257 | |||
258 | static void single_msr_stop_watchdog(void *arg) | ||
259 | { | ||
260 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
261 | |||
262 | wrmsr(wd->evntsel_msr, 0, 0); | ||
263 | } | ||
264 | |||
265 | static int single_msr_reserve(void) | ||
266 | { | ||
267 | if (!reserve_perfctr_nmi(wd_ops->perfctr)) | ||
268 | return 0; | ||
269 | |||
270 | if (!reserve_evntsel_nmi(wd_ops->evntsel)) { | ||
271 | release_perfctr_nmi(wd_ops->perfctr); | ||
272 | return 0; | ||
273 | } | ||
274 | return 1; | ||
275 | } | ||
276 | |||
277 | static void single_msr_unreserve(void) | ||
278 | { | ||
279 | release_evntsel_nmi(wd_ops->perfctr); | ||
280 | release_perfctr_nmi(wd_ops->evntsel); | ||
281 | } | ||
282 | |||
283 | static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
284 | { | ||
285 | /* start the cycle over again */ | ||
286 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
287 | } | ||
288 | |||
289 | static struct wd_ops k7_wd_ops = { | ||
290 | .reserve = single_msr_reserve, | ||
291 | .unreserve = single_msr_unreserve, | ||
292 | .setup = setup_k7_watchdog, | ||
293 | .rearm = single_msr_rearm, | ||
294 | .stop = single_msr_stop_watchdog, | ||
295 | .perfctr = MSR_K7_PERFCTR0, | ||
296 | .evntsel = MSR_K7_EVNTSEL0, | ||
297 | .checkbit = 1ULL<<63, | ||
298 | }; | ||
299 | |||
300 | /* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ | ||
301 | |||
302 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
303 | #define P6_EVNTSEL_INT (1 << 20) | ||
304 | #define P6_EVNTSEL_OS (1 << 17) | ||
305 | #define P6_EVNTSEL_USR (1 << 16) | ||
306 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
307 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
308 | |||
309 | static int setup_p6_watchdog(unsigned nmi_hz) | ||
310 | { | ||
311 | unsigned int perfctr_msr, evntsel_msr; | ||
312 | unsigned int evntsel; | ||
313 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
314 | |||
315 | perfctr_msr = MSR_P6_PERFCTR0; | ||
316 | evntsel_msr = MSR_P6_EVNTSEL0; | ||
317 | |||
318 | wrmsrl(perfctr_msr, 0UL); | ||
319 | |||
320 | evntsel = P6_EVNTSEL_INT | ||
321 | | P6_EVNTSEL_OS | ||
322 | | P6_EVNTSEL_USR | ||
323 | | P6_NMI_EVENT; | ||
324 | |||
325 | /* setup the timer */ | ||
326 | wrmsr(evntsel_msr, evntsel, 0); | ||
327 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
328 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); | ||
329 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
330 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
331 | wrmsr(evntsel_msr, evntsel, 0); | ||
332 | |||
333 | wd->perfctr_msr = perfctr_msr; | ||
334 | wd->evntsel_msr = evntsel_msr; | ||
335 | wd->cccr_msr = 0; //unused | ||
336 | return 1; | ||
337 | } | ||
338 | |||
339 | static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
340 | { | ||
341 | /* P6 based Pentium M need to re-unmask | ||
342 | * the apic vector but it doesn't hurt | ||
343 | * other P6 variant. | ||
344 | * ArchPerfom/Core Duo also needs this */ | ||
345 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
346 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
347 | write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); | ||
348 | } | ||
349 | |||
350 | static struct wd_ops p6_wd_ops = { | ||
351 | .reserve = single_msr_reserve, | ||
352 | .unreserve = single_msr_unreserve, | ||
353 | .setup = setup_p6_watchdog, | ||
354 | .rearm = p6_rearm, | ||
355 | .stop = single_msr_stop_watchdog, | ||
356 | .perfctr = MSR_P6_PERFCTR0, | ||
357 | .evntsel = MSR_P6_EVNTSEL0, | ||
358 | .checkbit = 1ULL<<39, | ||
359 | }; | ||
360 | |||
361 | /* Intel P4 performance counters. By far the most complicated of all. */ | ||
362 | |||
363 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
364 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
365 | #define P4_ESCR_OS (1<<3) | ||
366 | #define P4_ESCR_USR (1<<2) | ||
367 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
368 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
369 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
370 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
371 | #define P4_CCCR_COMPARE (1<<18) | ||
372 | #define P4_CCCR_REQUIRED (3<<16) | ||
373 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
374 | #define P4_CCCR_ENABLE (1<<12) | ||
375 | #define P4_CCCR_OVF (1<<31) | ||
376 | |||
377 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
378 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
379 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
380 | |||
381 | static int setup_p4_watchdog(unsigned nmi_hz) | ||
382 | { | ||
383 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
384 | unsigned int evntsel, cccr_val; | ||
385 | unsigned int misc_enable, dummy; | ||
386 | unsigned int ht_num; | ||
387 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
388 | |||
389 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
390 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
391 | return 0; | ||
392 | |||
393 | #ifdef CONFIG_SMP | ||
394 | /* detect which hyperthread we are on */ | ||
395 | if (smp_num_siblings == 2) { | ||
396 | unsigned int ebx, apicid; | ||
397 | |||
398 | ebx = cpuid_ebx(1); | ||
399 | apicid = (ebx >> 24) & 0xff; | ||
400 | ht_num = apicid & 1; | ||
401 | } else | ||
402 | #endif | ||
403 | ht_num = 0; | ||
404 | |||
405 | /* performance counters are shared resources | ||
406 | * assign each hyperthread its own set | ||
407 | * (re-use the ESCR0 register, seems safe | ||
408 | * and keeps the cccr_val the same) | ||
409 | */ | ||
410 | if (!ht_num) { | ||
411 | /* logical cpu 0 */ | ||
412 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
413 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
414 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
415 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
416 | } else { | ||
417 | /* logical cpu 1 */ | ||
418 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
419 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
420 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
421 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
422 | } | ||
423 | |||
424 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
425 | | P4_ESCR_OS | ||
426 | | P4_ESCR_USR; | ||
427 | |||
428 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
429 | | P4_CCCR_COMPLEMENT | ||
430 | | P4_CCCR_COMPARE | ||
431 | | P4_CCCR_REQUIRED; | ||
432 | |||
433 | wrmsr(evntsel_msr, evntsel, 0); | ||
434 | wrmsr(cccr_msr, cccr_val, 0); | ||
435 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | ||
436 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
437 | cccr_val |= P4_CCCR_ENABLE; | ||
438 | wrmsr(cccr_msr, cccr_val, 0); | ||
439 | wd->perfctr_msr = perfctr_msr; | ||
440 | wd->evntsel_msr = evntsel_msr; | ||
441 | wd->cccr_msr = cccr_msr; | ||
442 | return 1; | ||
443 | } | ||
444 | |||
445 | static void stop_p4_watchdog(void *arg) | ||
446 | { | ||
447 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
448 | wrmsr(wd->cccr_msr, 0, 0); | ||
449 | wrmsr(wd->evntsel_msr, 0, 0); | ||
450 | } | ||
451 | |||
452 | static int p4_reserve(void) | ||
453 | { | ||
454 | if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) | ||
455 | return 0; | ||
456 | #ifdef CONFIG_SMP | ||
457 | if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) | ||
458 | goto fail1; | ||
459 | #endif | ||
460 | if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) | ||
461 | goto fail2; | ||
462 | /* RED-PEN why is ESCR1 not reserved here? */ | ||
463 | return 1; | ||
464 | fail2: | ||
465 | #ifdef CONFIG_SMP | ||
466 | if (smp_num_siblings > 1) | ||
467 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | ||
468 | fail1: | ||
469 | #endif | ||
470 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | static void p4_unreserve(void) | ||
475 | { | ||
476 | #ifdef CONFIG_SMP | ||
477 | if (smp_num_siblings > 1) | ||
478 | release_evntsel_nmi(MSR_P4_IQ_PERFCTR1); | ||
479 | #endif | ||
480 | release_evntsel_nmi(MSR_P4_IQ_PERFCTR0); | ||
481 | release_perfctr_nmi(MSR_P4_CRU_ESCR0); | ||
482 | } | ||
483 | |||
484 | static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
485 | { | ||
486 | unsigned dummy; | ||
487 | /* | ||
488 | * P4 quirks: | ||
489 | * - An overflown perfctr will assert its interrupt | ||
490 | * until the OVF flag in its CCCR is cleared. | ||
491 | * - LVTPC is masked on interrupt and must be | ||
492 | * unmasked by the LVTPC handler. | ||
493 | */ | ||
494 | rdmsrl(wd->cccr_msr, dummy); | ||
495 | dummy &= ~P4_CCCR_OVF; | ||
496 | wrmsrl(wd->cccr_msr, dummy); | ||
497 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
498 | /* start the cycle over again */ | ||
499 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
500 | } | ||
501 | |||
502 | static struct wd_ops p4_wd_ops = { | ||
503 | .reserve = p4_reserve, | ||
504 | .unreserve = p4_unreserve, | ||
505 | .setup = setup_p4_watchdog, | ||
506 | .rearm = p4_rearm, | ||
507 | .stop = stop_p4_watchdog, | ||
508 | /* RED-PEN this is wrong for the other sibling */ | ||
509 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
510 | .evntsel = MSR_P4_BSU_ESCR0, | ||
511 | .checkbit = 1ULL<<39, | ||
512 | }; | ||
513 | |||
514 | /* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully | ||
515 | all future Intel CPUs. */ | ||
516 | |||
517 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
518 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
519 | |||
520 | static int setup_intel_arch_watchdog(unsigned nmi_hz) | ||
521 | { | ||
522 | unsigned int ebx; | ||
523 | union cpuid10_eax eax; | ||
524 | unsigned int unused; | ||
525 | unsigned int perfctr_msr, evntsel_msr; | ||
526 | unsigned int evntsel; | ||
527 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
528 | |||
529 | /* | ||
530 | * Check whether the Architectural PerfMon supports | ||
531 | * Unhalted Core Cycles Event or not. | ||
532 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
533 | */ | ||
534 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
535 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
536 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
537 | return 0; | ||
538 | |||
539 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1; | ||
540 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL1; | ||
541 | |||
542 | wrmsrl(perfctr_msr, 0UL); | ||
543 | |||
544 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
545 | | ARCH_PERFMON_EVENTSEL_OS | ||
546 | | ARCH_PERFMON_EVENTSEL_USR | ||
547 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
548 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
549 | |||
550 | /* setup the timer */ | ||
551 | wrmsr(evntsel_msr, evntsel, 0); | ||
552 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
553 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | ||
554 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
555 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
556 | wrmsr(evntsel_msr, evntsel, 0); | ||
557 | |||
558 | wd->perfctr_msr = perfctr_msr; | ||
559 | wd->evntsel_msr = evntsel_msr; | ||
560 | wd->cccr_msr = 0; //unused | ||
561 | wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); | ||
562 | return 1; | ||
563 | } | ||
564 | |||
565 | static struct wd_ops intel_arch_wd_ops = { | ||
566 | .reserve = single_msr_reserve, | ||
567 | .unreserve = single_msr_unreserve, | ||
568 | .setup = setup_intel_arch_watchdog, | ||
569 | .rearm = p6_rearm, | ||
570 | .stop = single_msr_stop_watchdog, | ||
571 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
572 | .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
573 | }; | ||
574 | |||
575 | static void probe_nmi_watchdog(void) | ||
576 | { | ||
577 | switch (boot_cpu_data.x86_vendor) { | ||
578 | case X86_VENDOR_AMD: | ||
579 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | ||
580 | boot_cpu_data.x86 != 16) | ||
581 | return; | ||
582 | wd_ops = &k7_wd_ops; | ||
583 | break; | ||
584 | case X86_VENDOR_INTEL: | ||
585 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
586 | wd_ops = &intel_arch_wd_ops; | ||
587 | break; | ||
588 | } | ||
589 | switch (boot_cpu_data.x86) { | ||
590 | case 6: | ||
591 | if (boot_cpu_data.x86_model > 0xd) | ||
592 | return; | ||
593 | |||
594 | wd_ops = &p6_wd_ops; | ||
595 | break; | ||
596 | case 15: | ||
597 | if (boot_cpu_data.x86_model > 0x4) | ||
598 | return; | ||
599 | |||
600 | wd_ops = &p4_wd_ops; | ||
601 | break; | ||
602 | default: | ||
603 | return; | ||
604 | } | ||
605 | break; | ||
606 | } | ||
607 | } | ||
608 | |||
609 | /* Interface to nmi.c */ | ||
610 | |||
611 | int lapic_watchdog_init(unsigned nmi_hz) | ||
612 | { | ||
613 | if (!wd_ops) { | ||
614 | probe_nmi_watchdog(); | ||
615 | if (!wd_ops) | ||
616 | return -1; | ||
617 | } | ||
618 | |||
619 | if (!(wd_ops->setup(nmi_hz))) { | ||
620 | printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", | ||
621 | raw_smp_processor_id()); | ||
622 | return -1; | ||
623 | } | ||
624 | |||
625 | return 0; | ||
626 | } | ||
627 | |||
628 | void lapic_watchdog_stop(void) | ||
629 | { | ||
630 | if (wd_ops) | ||
631 | wd_ops->stop(NULL); | ||
632 | } | ||
633 | |||
634 | unsigned lapic_adjust_nmi_hz(unsigned hz) | ||
635 | { | ||
636 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
637 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
638 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) | ||
639 | hz = adjust_for_32bit_ctr(hz); | ||
640 | return hz; | ||
641 | } | ||
642 | |||
643 | int lapic_wd_event(unsigned nmi_hz) | ||
644 | { | ||
645 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
646 | u64 ctr; | ||
647 | rdmsrl(wd->perfctr_msr, ctr); | ||
648 | if (ctr & wd_ops->checkbit) { /* perfctr still running? */ | ||
649 | return 0; | ||
650 | } | ||
651 | wd_ops->rearm(wd, nmi_hz); | ||
652 | return 1; | ||
653 | } | ||
654 | |||
655 | int lapic_watchdog_ok(void) | ||
656 | { | ||
657 | return wd_ops != NULL; | ||
658 | } | ||
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 47e3ebbfb28..89d91e6cc97 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -72,8 +72,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
72 | "stc", | 72 | "stc", |
73 | "100mhzsteps", | 73 | "100mhzsteps", |
74 | "hwpstate", | 74 | "hwpstate", |
75 | NULL, | 75 | "", /* constant_tsc - moved to flags */ |
76 | NULL, /* constant_tsc - moved to flags */ | ||
77 | /* nothing */ | 76 | /* nothing */ |
78 | }; | 77 | }; |
79 | struct cpuinfo_x86 *c = v; | 78 | struct cpuinfo_x86 *c = v; |
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c index 9317f741498..50076f22e90 100644 --- a/arch/i386/kernel/cpu/rise.c +++ b/arch/i386/kernel/cpu/rise.c | |||
@@ -50,12 +50,3 @@ int __init rise_init_cpu(void) | |||
50 | return 0; | 50 | return 0; |
51 | } | 51 | } |
52 | 52 | ||
53 | //early_arch_initcall(rise_init_cpu); | ||
54 | |||
55 | static int __init rise_exit_cpu(void) | ||
56 | { | ||
57 | cpu_devs[X86_VENDOR_RISE] = NULL; | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | late_initcall(rise_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 5678d46863c..6471a5a1320 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c | |||
@@ -112,13 +112,3 @@ int __init transmeta_init_cpu(void) | |||
112 | cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; | 112 | cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; |
113 | return 0; | 113 | return 0; |
114 | } | 114 | } |
115 | |||
116 | //early_arch_initcall(transmeta_init_cpu); | ||
117 | |||
118 | static int __init transmeta_exit_cpu(void) | ||
119 | { | ||
120 | cpu_devs[X86_VENDOR_TRANSMETA] = NULL; | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | late_initcall(transmeta_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c index 1bf3f87e9c5..a7a4e75bdcd 100644 --- a/arch/i386/kernel/cpu/umc.c +++ b/arch/i386/kernel/cpu/umc.c | |||
@@ -24,13 +24,3 @@ int __init umc_init_cpu(void) | |||
24 | cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; | 24 | cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; |
25 | return 0; | 25 | return 0; |
26 | } | 26 | } |
27 | |||
28 | //early_arch_initcall(umc_init_cpu); | ||
29 | |||
30 | static int __init umc_exit_cpu(void) | ||
31 | { | ||
32 | cpu_devs[X86_VENDOR_UMC] = NULL; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | late_initcall(umc_exit_cpu); | ||
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c index b4d14c2eb34..265c5597efb 100644 --- a/arch/i386/kernel/doublefault.c +++ b/arch/i386/kernel/doublefault.c | |||
@@ -33,7 +33,7 @@ static void doublefault_fn(void) | |||
33 | printk("double fault, tss at %08lx\n", tss); | 33 | printk("double fault, tss at %08lx\n", tss); |
34 | 34 | ||
35 | if (ptr_ok(tss)) { | 35 | if (ptr_ok(tss)) { |
36 | struct tss_struct *t = (struct tss_struct *)tss; | 36 | struct i386_hw_tss *t = (struct i386_hw_tss *)tss; |
37 | 37 | ||
38 | printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp); | 38 | printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp); |
39 | 39 | ||
@@ -49,18 +49,21 @@ static void doublefault_fn(void) | |||
49 | } | 49 | } |
50 | 50 | ||
51 | struct tss_struct doublefault_tss __cacheline_aligned = { | 51 | struct tss_struct doublefault_tss __cacheline_aligned = { |
52 | .esp0 = STACK_START, | 52 | .x86_tss = { |
53 | .ss0 = __KERNEL_DS, | 53 | .esp0 = STACK_START, |
54 | .ldt = 0, | 54 | .ss0 = __KERNEL_DS, |
55 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | 55 | .ldt = 0, |
56 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | ||
56 | 57 | ||
57 | .eip = (unsigned long) doublefault_fn, | 58 | .eip = (unsigned long) doublefault_fn, |
58 | .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ | 59 | /* 0x2 bit is always set */ |
59 | .esp = STACK_START, | 60 | .eflags = X86_EFLAGS_SF | 0x2, |
60 | .es = __USER_DS, | 61 | .esp = STACK_START, |
61 | .cs = __KERNEL_CS, | 62 | .es = __USER_DS, |
62 | .ss = __KERNEL_DS, | 63 | .cs = __KERNEL_CS, |
63 | .ds = __USER_DS, | 64 | .ss = __KERNEL_DS, |
65 | .ds = __USER_DS, | ||
64 | 66 | ||
65 | .__cr3 = __pa(swapper_pg_dir) | 67 | .__cr3 = __pa(swapper_pg_dir) |
68 | } | ||
66 | }; | 69 | }; |
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 70f39560846..9645bb51f76 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c | |||
@@ -161,26 +161,27 @@ static struct resource standard_io_resources[] = { { | |||
161 | 161 | ||
162 | static int __init romsignature(const unsigned char *rom) | 162 | static int __init romsignature(const unsigned char *rom) |
163 | { | 163 | { |
164 | const unsigned short * const ptr = (const unsigned short *)rom; | ||
164 | unsigned short sig; | 165 | unsigned short sig; |
165 | 166 | ||
166 | return probe_kernel_address((const unsigned short *)rom, sig) == 0 && | 167 | return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; |
167 | sig == ROMSIGNATURE; | ||
168 | } | 168 | } |
169 | 169 | ||
170 | static int __init romchecksum(unsigned char *rom, unsigned long length) | 170 | static int __init romchecksum(const unsigned char *rom, unsigned long length) |
171 | { | 171 | { |
172 | unsigned char sum; | 172 | unsigned char sum, c; |
173 | 173 | ||
174 | for (sum = 0; length; length--) | 174 | for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) |
175 | sum += *rom++; | 175 | sum += c; |
176 | return sum == 0; | 176 | return !length && !sum; |
177 | } | 177 | } |
178 | 178 | ||
179 | static void __init probe_roms(void) | 179 | static void __init probe_roms(void) |
180 | { | 180 | { |
181 | const unsigned char *rom; | ||
181 | unsigned long start, length, upper; | 182 | unsigned long start, length, upper; |
182 | unsigned char *rom; | 183 | unsigned char c; |
183 | int i; | 184 | int i; |
184 | 185 | ||
185 | /* video rom */ | 186 | /* video rom */ |
186 | upper = adapter_rom_resources[0].start; | 187 | upper = adapter_rom_resources[0].start; |
@@ -191,8 +192,11 @@ static void __init probe_roms(void) | |||
191 | 192 | ||
192 | video_rom_resource.start = start; | 193 | video_rom_resource.start = start; |
193 | 194 | ||
195 | if (probe_kernel_address(rom + 2, c) != 0) | ||
196 | continue; | ||
197 | |||
194 | /* 0 < length <= 0x7f * 512, historically */ | 198 | /* 0 < length <= 0x7f * 512, historically */ |
195 | length = rom[2] * 512; | 199 | length = c * 512; |
196 | 200 | ||
197 | /* if checksum okay, trust length byte */ | 201 | /* if checksum okay, trust length byte */ |
198 | if (length && romchecksum(rom, length)) | 202 | if (length && romchecksum(rom, length)) |
@@ -226,8 +230,11 @@ static void __init probe_roms(void) | |||
226 | if (!romsignature(rom)) | 230 | if (!romsignature(rom)) |
227 | continue; | 231 | continue; |
228 | 232 | ||
233 | if (probe_kernel_address(rom + 2, c) != 0) | ||
234 | continue; | ||
235 | |||
229 | /* 0 < length <= 0x7f * 512, historically */ | 236 | /* 0 < length <= 0x7f * 512, historically */ |
230 | length = rom[2] * 512; | 237 | length = c * 512; |
231 | 238 | ||
232 | /* but accept any length that fits if checksum okay */ | 239 | /* but accept any length that fits if checksum okay */ |
233 | if (!length || start + length > upper || !romchecksum(rom, length)) | 240 | if (!length || start + length > upper || !romchecksum(rom, length)) |
@@ -386,10 +393,8 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
386 | ____________________33__ | 393 | ____________________33__ |
387 | ______________________4_ | 394 | ______________________4_ |
388 | */ | 395 | */ |
389 | printk("sanitize start\n"); | ||
390 | /* if there's only one memory region, don't bother */ | 396 | /* if there's only one memory region, don't bother */ |
391 | if (*pnr_map < 2) { | 397 | if (*pnr_map < 2) { |
392 | printk("sanitize bail 0\n"); | ||
393 | return -1; | 398 | return -1; |
394 | } | 399 | } |
395 | 400 | ||
@@ -398,7 +403,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
398 | /* bail out if we find any unreasonable addresses in bios map */ | 403 | /* bail out if we find any unreasonable addresses in bios map */ |
399 | for (i=0; i<old_nr; i++) | 404 | for (i=0; i<old_nr; i++) |
400 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { | 405 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { |
401 | printk("sanitize bail 1\n"); | ||
402 | return -1; | 406 | return -1; |
403 | } | 407 | } |
404 | 408 | ||
@@ -494,7 +498,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
494 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | 498 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); |
495 | *pnr_map = new_nr; | 499 | *pnr_map = new_nr; |
496 | 500 | ||
497 | printk("sanitize end\n"); | ||
498 | return 0; | 501 | return 0; |
499 | } | 502 | } |
500 | 503 | ||
@@ -525,7 +528,6 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |||
525 | unsigned long long size = biosmap->size; | 528 | unsigned long long size = biosmap->size; |
526 | unsigned long long end = start + size; | 529 | unsigned long long end = start + size; |
527 | unsigned long type = biosmap->type; | 530 | unsigned long type = biosmap->type; |
528 | printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); | ||
529 | 531 | ||
530 | /* Overflow in 64 bits? Ignore the memory map. */ | 532 | /* Overflow in 64 bits? Ignore the memory map. */ |
531 | if (start > end) | 533 | if (start > end) |
@@ -536,17 +538,11 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |||
536 | * Not right. Fix it up. | 538 | * Not right. Fix it up. |
537 | */ | 539 | */ |
538 | if (type == E820_RAM) { | 540 | if (type == E820_RAM) { |
539 | printk("copy_e820_map() type is E820_RAM\n"); | ||
540 | if (start < 0x100000ULL && end > 0xA0000ULL) { | 541 | if (start < 0x100000ULL && end > 0xA0000ULL) { |
541 | printk("copy_e820_map() lies in range...\n"); | 542 | if (start < 0xA0000ULL) |
542 | if (start < 0xA0000ULL) { | ||
543 | printk("copy_e820_map() start < 0xA0000ULL\n"); | ||
544 | add_memory_region(start, 0xA0000ULL-start, type); | 543 | add_memory_region(start, 0xA0000ULL-start, type); |
545 | } | 544 | if (end <= 0x100000ULL) |
546 | if (end <= 0x100000ULL) { | ||
547 | printk("copy_e820_map() end <= 0x100000ULL\n"); | ||
548 | continue; | 545 | continue; |
549 | } | ||
550 | start = 0x100000ULL; | 546 | start = 0x100000ULL; |
551 | size = end - start; | 547 | size = end - start; |
552 | } | 548 | } |
@@ -818,6 +814,26 @@ void __init limit_regions(unsigned long long size) | |||
818 | print_memory_map("limit_regions endfunc"); | 814 | print_memory_map("limit_regions endfunc"); |
819 | } | 815 | } |
820 | 816 | ||
817 | /* | ||
818 | * This function checks if any part of the range <start,end> is mapped | ||
819 | * with type. | ||
820 | */ | ||
821 | int | ||
822 | e820_any_mapped(u64 start, u64 end, unsigned type) | ||
823 | { | ||
824 | int i; | ||
825 | for (i = 0; i < e820.nr_map; i++) { | ||
826 | const struct e820entry *ei = &e820.map[i]; | ||
827 | if (type && ei->type != type) | ||
828 | continue; | ||
829 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
830 | continue; | ||
831 | return 1; | ||
832 | } | ||
833 | return 0; | ||
834 | } | ||
835 | EXPORT_SYMBOL_GPL(e820_any_mapped); | ||
836 | |||
821 | /* | 837 | /* |
822 | * This function checks if the entire range <start,end> is mapped with type. | 838 | * This function checks if the entire range <start,end> is mapped with type. |
823 | * | 839 | * |
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 8f9c624ace6..dd9e7faafa7 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c | |||
@@ -69,13 +69,11 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) | |||
69 | { | 69 | { |
70 | unsigned long cr4; | 70 | unsigned long cr4; |
71 | unsigned long temp; | 71 | unsigned long temp; |
72 | struct Xgt_desc_struct *cpu_gdt_descr; | 72 | struct Xgt_desc_struct gdt_descr; |
73 | 73 | ||
74 | spin_lock(&efi_rt_lock); | 74 | spin_lock(&efi_rt_lock); |
75 | local_irq_save(efi_rt_eflags); | 75 | local_irq_save(efi_rt_eflags); |
76 | 76 | ||
77 | cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); | ||
78 | |||
79 | /* | 77 | /* |
80 | * If I don't have PSE, I should just duplicate two entries in page | 78 | * If I don't have PSE, I should just duplicate two entries in page |
81 | * directory. If I have PSE, I just need to duplicate one entry in | 79 | * directory. If I have PSE, I just need to duplicate one entry in |
@@ -105,17 +103,19 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) | |||
105 | */ | 103 | */ |
106 | local_flush_tlb(); | 104 | local_flush_tlb(); |
107 | 105 | ||
108 | cpu_gdt_descr->address = __pa(cpu_gdt_descr->address); | 106 | gdt_descr.address = __pa(get_cpu_gdt_table(0)); |
109 | load_gdt(cpu_gdt_descr); | 107 | gdt_descr.size = GDT_SIZE - 1; |
108 | load_gdt(&gdt_descr); | ||
110 | } | 109 | } |
111 | 110 | ||
112 | static void efi_call_phys_epilog(void) __releases(efi_rt_lock) | 111 | static void efi_call_phys_epilog(void) __releases(efi_rt_lock) |
113 | { | 112 | { |
114 | unsigned long cr4; | 113 | unsigned long cr4; |
115 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); | 114 | struct Xgt_desc_struct gdt_descr; |
116 | 115 | ||
117 | cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); | 116 | gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); |
118 | load_gdt(cpu_gdt_descr); | 117 | gdt_descr.size = GDT_SIZE - 1; |
118 | load_gdt(&gdt_descr); | ||
119 | 119 | ||
120 | cr4 = read_cr4(); | 120 | cr4 = read_cr4(); |
121 | 121 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 18bddcb8e9e..b1f16ee65e4 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -15,7 +15,7 @@ | |||
15 | * I changed all the .align's to 4 (16 byte alignment), as that's faster | 15 | * I changed all the .align's to 4 (16 byte alignment), as that's faster |
16 | * on a 486. | 16 | * on a 486. |
17 | * | 17 | * |
18 | * Stack layout in 'ret_from_system_call': | 18 | * Stack layout in 'syscall_exit': |
19 | * ptrace needs to have all regs on the stack. | 19 | * ptrace needs to have all regs on the stack. |
20 | * if the order here is changed, it needs to be | 20 | * if the order here is changed, it needs to be |
21 | * updated in fork.c:copy_process, signal.c:do_signal, | 21 | * updated in fork.c:copy_process, signal.c:do_signal, |
@@ -132,7 +132,7 @@ VM_MASK = 0x00020000 | |||
132 | movl $(__USER_DS), %edx; \ | 132 | movl $(__USER_DS), %edx; \ |
133 | movl %edx, %ds; \ | 133 | movl %edx, %ds; \ |
134 | movl %edx, %es; \ | 134 | movl %edx, %es; \ |
135 | movl $(__KERNEL_PDA), %edx; \ | 135 | movl $(__KERNEL_PERCPU), %edx; \ |
136 | movl %edx, %fs | 136 | movl %edx, %fs |
137 | 137 | ||
138 | #define RESTORE_INT_REGS \ | 138 | #define RESTORE_INT_REGS \ |
@@ -305,16 +305,12 @@ sysenter_past_esp: | |||
305 | pushl $(__USER_CS) | 305 | pushl $(__USER_CS) |
306 | CFI_ADJUST_CFA_OFFSET 4 | 306 | CFI_ADJUST_CFA_OFFSET 4 |
307 | /*CFI_REL_OFFSET cs, 0*/ | 307 | /*CFI_REL_OFFSET cs, 0*/ |
308 | #ifndef CONFIG_COMPAT_VDSO | ||
309 | /* | 308 | /* |
310 | * Push current_thread_info()->sysenter_return to the stack. | 309 | * Push current_thread_info()->sysenter_return to the stack. |
311 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 310 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
312 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 311 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
313 | */ | 312 | */ |
314 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | 313 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) |
315 | #else | ||
316 | pushl $SYSENTER_RETURN | ||
317 | #endif | ||
318 | CFI_ADJUST_CFA_OFFSET 4 | 314 | CFI_ADJUST_CFA_OFFSET 4 |
319 | CFI_REL_OFFSET eip, 0 | 315 | CFI_REL_OFFSET eip, 0 |
320 | 316 | ||
@@ -342,7 +338,7 @@ sysenter_past_esp: | |||
342 | jae syscall_badsys | 338 | jae syscall_badsys |
343 | call *sys_call_table(,%eax,4) | 339 | call *sys_call_table(,%eax,4) |
344 | movl %eax,PT_EAX(%esp) | 340 | movl %eax,PT_EAX(%esp) |
345 | DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) | 341 | DISABLE_INTERRUPTS(CLBR_ANY) |
346 | TRACE_IRQS_OFF | 342 | TRACE_IRQS_OFF |
347 | movl TI_flags(%ebp), %ecx | 343 | movl TI_flags(%ebp), %ecx |
348 | testw $_TIF_ALLWORK_MASK, %cx | 344 | testw $_TIF_ALLWORK_MASK, %cx |
@@ -560,9 +556,7 @@ END(syscall_badsys) | |||
560 | 556 | ||
561 | #define FIXUP_ESPFIX_STACK \ | 557 | #define FIXUP_ESPFIX_STACK \ |
562 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | 558 | /* since we are on a wrong stack, we cant make it a C code :( */ \ |
563 | movl %fs:PDA_cpu, %ebx; \ | 559 | PER_CPU(gdt_page, %ebx); \ |
564 | PER_CPU(cpu_gdt_descr, %ebx); \ | ||
565 | movl GDS_address(%ebx), %ebx; \ | ||
566 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | 560 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
567 | addl %esp, %eax; \ | 561 | addl %esp, %eax; \ |
568 | pushl $__KERNEL_DS; \ | 562 | pushl $__KERNEL_DS; \ |
@@ -635,7 +629,7 @@ ENTRY(name) \ | |||
635 | SAVE_ALL; \ | 629 | SAVE_ALL; \ |
636 | TRACE_IRQS_OFF \ | 630 | TRACE_IRQS_OFF \ |
637 | movl %esp,%eax; \ | 631 | movl %esp,%eax; \ |
638 | call smp_/**/name; \ | 632 | call smp_##name; \ |
639 | jmp ret_from_intr; \ | 633 | jmp ret_from_intr; \ |
640 | CFI_ENDPROC; \ | 634 | CFI_ENDPROC; \ |
641 | ENDPROC(name) | 635 | ENDPROC(name) |
@@ -643,11 +637,6 @@ ENDPROC(name) | |||
643 | /* The include is where all of the SMP etc. interrupts come from */ | 637 | /* The include is where all of the SMP etc. interrupts come from */ |
644 | #include "entry_arch.h" | 638 | #include "entry_arch.h" |
645 | 639 | ||
646 | /* This alternate entry is needed because we hijack the apic LVTT */ | ||
647 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) | ||
648 | BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) | ||
649 | #endif | ||
650 | |||
651 | KPROBE_ENTRY(page_fault) | 640 | KPROBE_ENTRY(page_fault) |
652 | RING0_EC_FRAME | 641 | RING0_EC_FRAME |
653 | pushl $do_page_fault | 642 | pushl $do_page_fault |
@@ -686,7 +675,7 @@ error_code: | |||
686 | pushl %fs | 675 | pushl %fs |
687 | CFI_ADJUST_CFA_OFFSET 4 | 676 | CFI_ADJUST_CFA_OFFSET 4 |
688 | /*CFI_REL_OFFSET fs, 0*/ | 677 | /*CFI_REL_OFFSET fs, 0*/ |
689 | movl $(__KERNEL_PDA), %ecx | 678 | movl $(__KERNEL_PERCPU), %ecx |
690 | movl %ecx, %fs | 679 | movl %ecx, %fs |
691 | UNWIND_ESPFIX_STACK | 680 | UNWIND_ESPFIX_STACK |
692 | popl %ecx | 681 | popl %ecx |
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 3fa7f9389af..9b10af65faa 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S | |||
@@ -34,17 +34,32 @@ | |||
34 | 34 | ||
35 | /* | 35 | /* |
36 | * This is how much memory *in addition to the memory covered up to | 36 | * This is how much memory *in addition to the memory covered up to |
37 | * and including _end* we need mapped initially. We need one bit for | 37 | * and including _end* we need mapped initially. |
38 | * each possible page, but only in low memory, which means | 38 | * We need: |
39 | * 2^32/4096/8 = 128K worst case (4G/4G split.) | 39 | * - one bit for each possible page, but only in low memory, which means |
40 | * 2^32/4096/8 = 128K worst case (4G/4G split.) | ||
41 | * - enough space to map all low memory, which means | ||
42 | * (2^32/4096) / 1024 pages (worst case, non PAE) | ||
43 | * (2^32/4096) / 512 + 4 pages (worst case for PAE) | ||
44 | * - a few pages for allocator use before the kernel pagetable has | ||
45 | * been set up | ||
40 | * | 46 | * |
41 | * Modulo rounding, each megabyte assigned here requires a kilobyte of | 47 | * Modulo rounding, each megabyte assigned here requires a kilobyte of |
42 | * memory, which is currently unreclaimed. | 48 | * memory, which is currently unreclaimed. |
43 | * | 49 | * |
44 | * This should be a multiple of a page. | 50 | * This should be a multiple of a page. |
45 | */ | 51 | */ |
46 | #define INIT_MAP_BEYOND_END (128*1024) | 52 | LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) |
47 | 53 | ||
54 | #if PTRS_PER_PMD > 1 | ||
55 | PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD | ||
56 | #else | ||
57 | PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) | ||
58 | #endif | ||
59 | BOOTBITMAP_SIZE = LOW_PAGES / 8 | ||
60 | ALLOCATOR_SLOP = 4 | ||
61 | |||
62 | INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm | ||
48 | 63 | ||
49 | /* | 64 | /* |
50 | * 32-bit kernel entrypoint; only used by the boot CPU. On entry, | 65 | * 32-bit kernel entrypoint; only used by the boot CPU. On entry, |
@@ -147,8 +162,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
147 | /* | 162 | /* |
148 | * Non-boot CPU entry point; entered from trampoline.S | 163 | * Non-boot CPU entry point; entered from trampoline.S |
149 | * We can't lgdt here, because lgdt itself uses a data segment, but | 164 | * We can't lgdt here, because lgdt itself uses a data segment, but |
150 | * we know the trampoline has already loaded the boot_gdt_table GDT | 165 | * we know the trampoline has already loaded the boot_gdt for us. |
151 | * for us. | ||
152 | * | 166 | * |
153 | * If cpu hotplug is not supported then this code can go in init section | 167 | * If cpu hotplug is not supported then this code can go in init section |
154 | * which will be freed later | 168 | * which will be freed later |
@@ -318,12 +332,12 @@ is386: movl $2,%ecx # set MP | |||
318 | movl %eax,%cr0 | 332 | movl %eax,%cr0 |
319 | 333 | ||
320 | call check_x87 | 334 | call check_x87 |
321 | call setup_pda | ||
322 | lgdt early_gdt_descr | 335 | lgdt early_gdt_descr |
323 | lidt idt_descr | 336 | lidt idt_descr |
324 | ljmp $(__KERNEL_CS),$1f | 337 | ljmp $(__KERNEL_CS),$1f |
325 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers | 338 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers |
326 | movl %eax,%ss # after changing gdt. | 339 | movl %eax,%ss # after changing gdt. |
340 | movl %eax,%fs # gets reset once there's real percpu | ||
327 | 341 | ||
328 | movl $(__USER_DS),%eax # DS/ES contains default USER segment | 342 | movl $(__USER_DS),%eax # DS/ES contains default USER segment |
329 | movl %eax,%ds | 343 | movl %eax,%ds |
@@ -333,16 +347,17 @@ is386: movl $2,%ecx # set MP | |||
333 | movl %eax,%gs | 347 | movl %eax,%gs |
334 | lldt %ax | 348 | lldt %ax |
335 | 349 | ||
336 | movl $(__KERNEL_PDA),%eax | ||
337 | mov %eax,%fs | ||
338 | |||
339 | cld # gcc2 wants the direction flag cleared at all times | 350 | cld # gcc2 wants the direction flag cleared at all times |
340 | pushl $0 # fake return address for unwinder | 351 | pushl $0 # fake return address for unwinder |
341 | #ifdef CONFIG_SMP | 352 | #ifdef CONFIG_SMP |
342 | movb ready, %cl | 353 | movb ready, %cl |
343 | movb $1, ready | 354 | movb $1, ready |
344 | cmpb $0,%cl # the first CPU calls start_kernel | 355 | cmpb $0,%cl # the first CPU calls start_kernel |
345 | jne initialize_secondary # all other CPUs call initialize_secondary | 356 | je 1f |
357 | movl $(__KERNEL_PERCPU), %eax | ||
358 | movl %eax,%fs # set this cpu's percpu | ||
359 | jmp initialize_secondary # all other CPUs call initialize_secondary | ||
360 | 1: | ||
346 | #endif /* CONFIG_SMP */ | 361 | #endif /* CONFIG_SMP */ |
347 | jmp start_kernel | 362 | jmp start_kernel |
348 | 363 | ||
@@ -366,23 +381,6 @@ check_x87: | |||
366 | ret | 381 | ret |
367 | 382 | ||
368 | /* | 383 | /* |
369 | * Point the GDT at this CPU's PDA. On boot this will be | ||
370 | * cpu_gdt_table and boot_pda; for secondary CPUs, these will be | ||
371 | * that CPU's GDT and PDA. | ||
372 | */ | ||
373 | ENTRY(setup_pda) | ||
374 | /* get the PDA pointer */ | ||
375 | movl start_pda, %eax | ||
376 | |||
377 | /* slot the PDA address into the GDT */ | ||
378 | mov early_gdt_descr+2, %ecx | ||
379 | mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ | ||
380 | shr $16, %eax | ||
381 | mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ | ||
382 | mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ | ||
383 | ret | ||
384 | |||
385 | /* | ||
386 | * setup_idt | 384 | * setup_idt |
387 | * | 385 | * |
388 | * sets up a idt with 256 entries pointing to | 386 | * sets up a idt with 256 entries pointing to |
@@ -554,9 +552,6 @@ ENTRY(empty_zero_page) | |||
554 | * This starts the data section. | 552 | * This starts the data section. |
555 | */ | 553 | */ |
556 | .data | 554 | .data |
557 | ENTRY(start_pda) | ||
558 | .long boot_pda | ||
559 | |||
560 | ENTRY(stack_start) | 555 | ENTRY(stack_start) |
561 | .long init_thread_union+THREAD_SIZE | 556 | .long init_thread_union+THREAD_SIZE |
562 | .long __BOOT_DS | 557 | .long __BOOT_DS |
@@ -588,7 +583,7 @@ fault_msg: | |||
588 | .word 0 # 32 bit align gdt_desc.address | 583 | .word 0 # 32 bit align gdt_desc.address |
589 | boot_gdt_descr: | 584 | boot_gdt_descr: |
590 | .word __BOOT_DS+7 | 585 | .word __BOOT_DS+7 |
591 | .long boot_gdt_table - __PAGE_OFFSET | 586 | .long boot_gdt - __PAGE_OFFSET |
592 | 587 | ||
593 | .word 0 # 32-bit align idt_desc.address | 588 | .word 0 # 32-bit align idt_desc.address |
594 | idt_descr: | 589 | idt_descr: |
@@ -599,67 +594,14 @@ idt_descr: | |||
599 | .word 0 # 32 bit align gdt_desc.address | 594 | .word 0 # 32 bit align gdt_desc.address |
600 | ENTRY(early_gdt_descr) | 595 | ENTRY(early_gdt_descr) |
601 | .word GDT_ENTRIES*8-1 | 596 | .word GDT_ENTRIES*8-1 |
602 | .long cpu_gdt_table | 597 | .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ |
603 | 598 | ||
604 | /* | 599 | /* |
605 | * The boot_gdt_table must mirror the equivalent in setup.S and is | 600 | * The boot_gdt must mirror the equivalent in setup.S and is |
606 | * used only for booting. | 601 | * used only for booting. |
607 | */ | 602 | */ |
608 | .align L1_CACHE_BYTES | 603 | .align L1_CACHE_BYTES |
609 | ENTRY(boot_gdt_table) | 604 | ENTRY(boot_gdt) |
610 | .fill GDT_ENTRY_BOOT_CS,8,0 | 605 | .fill GDT_ENTRY_BOOT_CS,8,0 |
611 | .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ | 606 | .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ |
612 | .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ | 607 | .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ |
613 | |||
614 | /* | ||
615 | * The Global Descriptor Table contains 28 quadwords, per-CPU. | ||
616 | */ | ||
617 | .align L1_CACHE_BYTES | ||
618 | ENTRY(cpu_gdt_table) | ||
619 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
620 | .quad 0x0000000000000000 /* 0x0b reserved */ | ||
621 | .quad 0x0000000000000000 /* 0x13 reserved */ | ||
622 | .quad 0x0000000000000000 /* 0x1b reserved */ | ||
623 | .quad 0x0000000000000000 /* 0x20 unused */ | ||
624 | .quad 0x0000000000000000 /* 0x28 unused */ | ||
625 | .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ | ||
626 | .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ | ||
627 | .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ | ||
628 | .quad 0x0000000000000000 /* 0x4b reserved */ | ||
629 | .quad 0x0000000000000000 /* 0x53 reserved */ | ||
630 | .quad 0x0000000000000000 /* 0x5b reserved */ | ||
631 | |||
632 | .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ | ||
633 | .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ | ||
634 | .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ | ||
635 | .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ | ||
636 | |||
637 | .quad 0x0000000000000000 /* 0x80 TSS descriptor */ | ||
638 | .quad 0x0000000000000000 /* 0x88 LDT descriptor */ | ||
639 | |||
640 | /* | ||
641 | * Segments used for calling PnP BIOS have byte granularity. | ||
642 | * They code segments and data segments have fixed 64k limits, | ||
643 | * the transfer segment sizes are set at run time. | ||
644 | */ | ||
645 | .quad 0x00409a000000ffff /* 0x90 32-bit code */ | ||
646 | .quad 0x00009a000000ffff /* 0x98 16-bit code */ | ||
647 | .quad 0x000092000000ffff /* 0xa0 16-bit data */ | ||
648 | .quad 0x0000920000000000 /* 0xa8 16-bit data */ | ||
649 | .quad 0x0000920000000000 /* 0xb0 16-bit data */ | ||
650 | |||
651 | /* | ||
652 | * The APM segments have byte granularity and their bases | ||
653 | * are set at run time. All have 64k limits. | ||
654 | */ | ||
655 | .quad 0x00409a000000ffff /* 0xb8 APM CS code */ | ||
656 | .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ | ||
657 | .quad 0x004092000000ffff /* 0xc8 APM DS data */ | ||
658 | |||
659 | .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ | ||
660 | .quad 0x00cf92000000ffff /* 0xd8 - PDA */ | ||
661 | .quad 0x0000000000000000 /* 0xe0 - unused */ | ||
662 | .quad 0x0000000000000000 /* 0xe8 - unused */ | ||
663 | .quad 0x0000000000000000 /* 0xf0 - unused */ | ||
664 | .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ | ||
665 | |||
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 4afe26e8626..e3d4b73bfdb 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c | |||
@@ -28,5 +28,3 @@ EXPORT_SYMBOL(__read_lock_failed); | |||
28 | #endif | 28 | #endif |
29 | 29 | ||
30 | EXPORT_SYMBOL(csum_partial); | 30 | EXPORT_SYMBOL(csum_partial); |
31 | |||
32 | EXPORT_SYMBOL(_proxy_pda); | ||
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 89d85d24492..1b623cda3a6 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/msi.h> | 35 | #include <linux/msi.h> |
36 | #include <linux/htirq.h> | 36 | #include <linux/htirq.h> |
37 | #include <linux/freezer.h> | 37 | #include <linux/freezer.h> |
38 | #include <linux/kthread.h> | ||
38 | 39 | ||
39 | #include <asm/io.h> | 40 | #include <asm/io.h> |
40 | #include <asm/smp.h> | 41 | #include <asm/smp.h> |
@@ -661,8 +662,6 @@ static int balanced_irq(void *unused) | |||
661 | unsigned long prev_balance_time = jiffies; | 662 | unsigned long prev_balance_time = jiffies; |
662 | long time_remaining = balanced_irq_interval; | 663 | long time_remaining = balanced_irq_interval; |
663 | 664 | ||
664 | daemonize("kirqd"); | ||
665 | |||
666 | /* push everything to CPU 0 to give us a starting point. */ | 665 | /* push everything to CPU 0 to give us a starting point. */ |
667 | for (i = 0 ; i < NR_IRQS ; i++) { | 666 | for (i = 0 ; i < NR_IRQS ; i++) { |
668 | irq_desc[i].pending_mask = cpumask_of_cpu(0); | 667 | irq_desc[i].pending_mask = cpumask_of_cpu(0); |
@@ -722,10 +721,9 @@ static int __init balanced_irq_init(void) | |||
722 | } | 721 | } |
723 | 722 | ||
724 | printk(KERN_INFO "Starting balanced_irq\n"); | 723 | printk(KERN_INFO "Starting balanced_irq\n"); |
725 | if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) | 724 | if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) |
726 | return 0; | 725 | return 0; |
727 | else | 726 | printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); |
728 | printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); | ||
729 | failed: | 727 | failed: |
730 | for_each_possible_cpu(i) { | 728 | for_each_possible_cpu(i) { |
731 | kfree(irq_cpu_data[i].irq_delta); | 729 | kfree(irq_cpu_data[i].irq_delta); |
@@ -1403,10 +1401,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in | |||
1403 | enable_8259A_irq(0); | 1401 | enable_8259A_irq(0); |
1404 | } | 1402 | } |
1405 | 1403 | ||
1406 | static inline void UNEXPECTED_IO_APIC(void) | ||
1407 | { | ||
1408 | } | ||
1409 | |||
1410 | void __init print_IO_APIC(void) | 1404 | void __init print_IO_APIC(void) |
1411 | { | 1405 | { |
1412 | int apic, i; | 1406 | int apic, i; |
@@ -1446,34 +1440,12 @@ void __init print_IO_APIC(void) | |||
1446 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1440 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
1447 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | 1441 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
1448 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); | 1442 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); |
1449 | if (reg_00.bits.ID >= get_physical_broadcast()) | ||
1450 | UNEXPECTED_IO_APIC(); | ||
1451 | if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) | ||
1452 | UNEXPECTED_IO_APIC(); | ||
1453 | 1443 | ||
1454 | printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); | 1444 | printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); |
1455 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | 1445 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); |
1456 | if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ | ||
1457 | (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ | ||
1458 | (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ | ||
1459 | (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ | ||
1460 | (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ | ||
1461 | (reg_01.bits.entries != 0x2E) && | ||
1462 | (reg_01.bits.entries != 0x3F) | ||
1463 | ) | ||
1464 | UNEXPECTED_IO_APIC(); | ||
1465 | 1446 | ||
1466 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | 1447 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); |
1467 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | 1448 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); |
1468 | if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ | ||
1469 | (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ | ||
1470 | (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ | ||
1471 | (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ | ||
1472 | (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ | ||
1473 | ) | ||
1474 | UNEXPECTED_IO_APIC(); | ||
1475 | if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) | ||
1476 | UNEXPECTED_IO_APIC(); | ||
1477 | 1449 | ||
1478 | /* | 1450 | /* |
1479 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, | 1451 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, |
@@ -1483,8 +1455,6 @@ void __init print_IO_APIC(void) | |||
1483 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { | 1455 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { |
1484 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); | 1456 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); |
1485 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); | 1457 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); |
1486 | if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) | ||
1487 | UNEXPECTED_IO_APIC(); | ||
1488 | } | 1458 | } |
1489 | 1459 | ||
1490 | /* | 1460 | /* |
@@ -1496,8 +1466,6 @@ void __init print_IO_APIC(void) | |||
1496 | reg_03.raw != reg_01.raw) { | 1466 | reg_03.raw != reg_01.raw) { |
1497 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); | 1467 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); |
1498 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); | 1468 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); |
1499 | if (reg_03.bits.__reserved_1) | ||
1500 | UNEXPECTED_IO_APIC(); | ||
1501 | } | 1469 | } |
1502 | 1470 | ||
1503 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 1471 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index 498e8bc197d..d1e42e0dbe6 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/stddef.h> | 16 | #include <linux/stddef.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/thread_info.h> | 18 | #include <linux/thread_info.h> |
19 | #include <linux/syscalls.h> | ||
19 | 20 | ||
20 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | 21 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ |
21 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) | 22 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) |
@@ -113,7 +114,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
113 | * Reset the owner so that a process switch will not set | 114 | * Reset the owner so that a process switch will not set |
114 | * tss->io_bitmap_base to IO_BITMAP_OFFSET. | 115 | * tss->io_bitmap_base to IO_BITMAP_OFFSET. |
115 | */ | 116 | */ |
116 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | 117 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; |
117 | tss->io_bitmap_owner = NULL; | 118 | tss->io_bitmap_owner = NULL; |
118 | 119 | ||
119 | put_cpu(); | 120 | put_cpu(); |
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 8db8d514c9c..d2daf672f4a 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c | |||
@@ -24,6 +24,9 @@ | |||
24 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; | 24 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; |
25 | EXPORT_PER_CPU_SYMBOL(irq_stat); | 25 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
26 | 26 | ||
27 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); | ||
28 | EXPORT_PER_CPU_SYMBOL(irq_regs); | ||
29 | |||
27 | /* | 30 | /* |
28 | * 'what should we do if we get a hw irq event on an illegal vector'. | 31 | * 'what should we do if we get a hw irq event on an illegal vector'. |
29 | * each architecture has to answer this themselves. | 32 | * each architecture has to answer this themselves. |
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 4f5983c9866..0952eccd8f2 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c | |||
@@ -477,7 +477,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
477 | } | 477 | } |
478 | ++mpc_record; | 478 | ++mpc_record; |
479 | } | 479 | } |
480 | clustered_apic_check(); | 480 | setup_apic_routing(); |
481 | if (!num_processors) | 481 | if (!num_processors) |
482 | printk(KERN_ERR "SMP mptable: no processors registered!\n"); | 482 | printk(KERN_ERR "SMP mptable: no processors registered!\n"); |
483 | return num_processors; | 483 | return num_processors; |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 84c3497efb6..33cf2f3c444 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
21 | #include <linux/sysctl.h> | 21 | #include <linux/sysctl.h> |
22 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
23 | #include <linux/dmi.h> | ||
24 | #include <linux/kprobes.h> | 23 | #include <linux/kprobes.h> |
25 | #include <linux/cpumask.h> | 24 | #include <linux/cpumask.h> |
26 | #include <linux/kernel_stat.h> | 25 | #include <linux/kernel_stat.h> |
@@ -28,30 +27,14 @@ | |||
28 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
29 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
30 | #include <asm/kdebug.h> | 29 | #include <asm/kdebug.h> |
31 | #include <asm/intel_arch_perfmon.h> | ||
32 | 30 | ||
33 | #include "mach_traps.h" | 31 | #include "mach_traps.h" |
34 | 32 | ||
35 | int unknown_nmi_panic; | 33 | int unknown_nmi_panic; |
36 | int nmi_watchdog_enabled; | 34 | int nmi_watchdog_enabled; |
37 | 35 | ||
38 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: | ||
39 | * evtsel_nmi_owner tracks the ownership of the event selection | ||
40 | * - different performance counters/ event selection may be reserved for | ||
41 | * different subsystems this reservation system just tries to coordinate | ||
42 | * things a little | ||
43 | */ | ||
44 | |||
45 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | ||
46 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | ||
47 | */ | ||
48 | #define NMI_MAX_COUNTER_BITS 66 | ||
49 | #define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS) | ||
50 | |||
51 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]); | ||
52 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]); | ||
53 | |||
54 | static cpumask_t backtrace_mask = CPU_MASK_NONE; | 36 | static cpumask_t backtrace_mask = CPU_MASK_NONE; |
37 | |||
55 | /* nmi_active: | 38 | /* nmi_active: |
56 | * >0: the lapic NMI watchdog is active, but can be disabled | 39 | * >0: the lapic NMI watchdog is active, but can be disabled |
57 | * <0: the lapic NMI watchdog has not been set up, and cannot | 40 | * <0: the lapic NMI watchdog has not been set up, and cannot |
@@ -63,206 +46,11 @@ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | |||
63 | unsigned int nmi_watchdog = NMI_DEFAULT; | 46 | unsigned int nmi_watchdog = NMI_DEFAULT; |
64 | static unsigned int nmi_hz = HZ; | 47 | static unsigned int nmi_hz = HZ; |
65 | 48 | ||
66 | struct nmi_watchdog_ctlblk { | 49 | static DEFINE_PER_CPU(short, wd_enabled); |
67 | int enabled; | ||
68 | u64 check_bit; | ||
69 | unsigned int cccr_msr; | ||
70 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
71 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
72 | }; | ||
73 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
74 | 50 | ||
75 | /* local prototypes */ | 51 | /* local prototypes */ |
76 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); | 52 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
77 | 53 | ||
78 | extern void show_registers(struct pt_regs *regs); | ||
79 | extern int unknown_nmi_panic; | ||
80 | |||
81 | /* converts an msr to an appropriate reservation bit */ | ||
82 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | ||
83 | { | ||
84 | /* returns the bit offset of the performance counter register */ | ||
85 | switch (boot_cpu_data.x86_vendor) { | ||
86 | case X86_VENDOR_AMD: | ||
87 | return (msr - MSR_K7_PERFCTR0); | ||
88 | case X86_VENDOR_INTEL: | ||
89 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
90 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
91 | |||
92 | switch (boot_cpu_data.x86) { | ||
93 | case 6: | ||
94 | return (msr - MSR_P6_PERFCTR0); | ||
95 | case 15: | ||
96 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
97 | } | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* converts an msr to an appropriate reservation bit */ | ||
103 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | ||
104 | { | ||
105 | /* returns the bit offset of the event selection register */ | ||
106 | switch (boot_cpu_data.x86_vendor) { | ||
107 | case X86_VENDOR_AMD: | ||
108 | return (msr - MSR_K7_EVNTSEL0); | ||
109 | case X86_VENDOR_INTEL: | ||
110 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
111 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | ||
112 | |||
113 | switch (boot_cpu_data.x86) { | ||
114 | case 6: | ||
115 | return (msr - MSR_P6_EVNTSEL0); | ||
116 | case 15: | ||
117 | return (msr - MSR_P4_BSU_ESCR0); | ||
118 | } | ||
119 | } | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | /* checks for a bit availability (hack for oprofile) */ | ||
124 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | ||
125 | { | ||
126 | int cpu; | ||
127 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
128 | for_each_possible_cpu (cpu) { | ||
129 | if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) | ||
130 | return 0; | ||
131 | } | ||
132 | return 1; | ||
133 | } | ||
134 | |||
135 | /* checks the an msr for availability */ | ||
136 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
137 | { | ||
138 | unsigned int counter; | ||
139 | int cpu; | ||
140 | |||
141 | counter = nmi_perfctr_msr_to_bit(msr); | ||
142 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
143 | |||
144 | for_each_possible_cpu (cpu) { | ||
145 | if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) | ||
146 | return 0; | ||
147 | } | ||
148 | return 1; | ||
149 | } | ||
150 | |||
151 | static int __reserve_perfctr_nmi(int cpu, unsigned int msr) | ||
152 | { | ||
153 | unsigned int counter; | ||
154 | if (cpu < 0) | ||
155 | cpu = smp_processor_id(); | ||
156 | |||
157 | counter = nmi_perfctr_msr_to_bit(msr); | ||
158 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
159 | |||
160 | if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) | ||
161 | return 1; | ||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | static void __release_perfctr_nmi(int cpu, unsigned int msr) | ||
166 | { | ||
167 | unsigned int counter; | ||
168 | if (cpu < 0) | ||
169 | cpu = smp_processor_id(); | ||
170 | |||
171 | counter = nmi_perfctr_msr_to_bit(msr); | ||
172 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
173 | |||
174 | clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]); | ||
175 | } | ||
176 | |||
177 | int reserve_perfctr_nmi(unsigned int msr) | ||
178 | { | ||
179 | int cpu, i; | ||
180 | for_each_possible_cpu (cpu) { | ||
181 | if (!__reserve_perfctr_nmi(cpu, msr)) { | ||
182 | for_each_possible_cpu (i) { | ||
183 | if (i >= cpu) | ||
184 | break; | ||
185 | __release_perfctr_nmi(i, msr); | ||
186 | } | ||
187 | return 0; | ||
188 | } | ||
189 | } | ||
190 | return 1; | ||
191 | } | ||
192 | |||
193 | void release_perfctr_nmi(unsigned int msr) | ||
194 | { | ||
195 | int cpu; | ||
196 | for_each_possible_cpu (cpu) { | ||
197 | __release_perfctr_nmi(cpu, msr); | ||
198 | } | ||
199 | } | ||
200 | |||
201 | int __reserve_evntsel_nmi(int cpu, unsigned int msr) | ||
202 | { | ||
203 | unsigned int counter; | ||
204 | if (cpu < 0) | ||
205 | cpu = smp_processor_id(); | ||
206 | |||
207 | counter = nmi_evntsel_msr_to_bit(msr); | ||
208 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
209 | |||
210 | if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0])) | ||
211 | return 1; | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static void __release_evntsel_nmi(int cpu, unsigned int msr) | ||
216 | { | ||
217 | unsigned int counter; | ||
218 | if (cpu < 0) | ||
219 | cpu = smp_processor_id(); | ||
220 | |||
221 | counter = nmi_evntsel_msr_to_bit(msr); | ||
222 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
223 | |||
224 | clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]); | ||
225 | } | ||
226 | |||
227 | int reserve_evntsel_nmi(unsigned int msr) | ||
228 | { | ||
229 | int cpu, i; | ||
230 | for_each_possible_cpu (cpu) { | ||
231 | if (!__reserve_evntsel_nmi(cpu, msr)) { | ||
232 | for_each_possible_cpu (i) { | ||
233 | if (i >= cpu) | ||
234 | break; | ||
235 | __release_evntsel_nmi(i, msr); | ||
236 | } | ||
237 | return 0; | ||
238 | } | ||
239 | } | ||
240 | return 1; | ||
241 | } | ||
242 | |||
243 | void release_evntsel_nmi(unsigned int msr) | ||
244 | { | ||
245 | int cpu; | ||
246 | for_each_possible_cpu (cpu) { | ||
247 | __release_evntsel_nmi(cpu, msr); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | static __cpuinit inline int nmi_known_cpu(void) | ||
252 | { | ||
253 | switch (boot_cpu_data.x86_vendor) { | ||
254 | case X86_VENDOR_AMD: | ||
255 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6) | ||
256 | || (boot_cpu_data.x86 == 16)); | ||
257 | case X86_VENDOR_INTEL: | ||
258 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
259 | return 1; | ||
260 | else | ||
261 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
262 | } | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | static int endflag __initdata = 0; | 54 | static int endflag __initdata = 0; |
267 | 55 | ||
268 | #ifdef CONFIG_SMP | 56 | #ifdef CONFIG_SMP |
@@ -284,28 +72,6 @@ static __init void nmi_cpu_busy(void *data) | |||
284 | } | 72 | } |
285 | #endif | 73 | #endif |
286 | 74 | ||
287 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
288 | { | ||
289 | u64 counter_val; | ||
290 | unsigned int retval = hz; | ||
291 | |||
292 | /* | ||
293 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
294 | * are writable, with higher bits sign extending from bit 31. | ||
295 | * So, we can only program the counter with 31 bit values and | ||
296 | * 32nd bit should be 1, for 33.. to be 1. | ||
297 | * Find the appropriate nmi_hz | ||
298 | */ | ||
299 | counter_val = (u64)cpu_khz * 1000; | ||
300 | do_div(counter_val, retval); | ||
301 | if (counter_val > 0x7fffffffULL) { | ||
302 | u64 count = (u64)cpu_khz * 1000; | ||
303 | do_div(count, 0x7fffffffUL); | ||
304 | retval = count + 1; | ||
305 | } | ||
306 | return retval; | ||
307 | } | ||
308 | |||
309 | static int __init check_nmi_watchdog(void) | 75 | static int __init check_nmi_watchdog(void) |
310 | { | 76 | { |
311 | unsigned int *prev_nmi_count; | 77 | unsigned int *prev_nmi_count; |
@@ -338,14 +104,14 @@ static int __init check_nmi_watchdog(void) | |||
338 | if (!cpu_isset(cpu, cpu_callin_map)) | 104 | if (!cpu_isset(cpu, cpu_callin_map)) |
339 | continue; | 105 | continue; |
340 | #endif | 106 | #endif |
341 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | 107 | if (!per_cpu(wd_enabled, cpu)) |
342 | continue; | 108 | continue; |
343 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 109 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { |
344 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 110 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
345 | cpu, | 111 | cpu, |
346 | prev_nmi_count[cpu], | 112 | prev_nmi_count[cpu], |
347 | nmi_count(cpu)); | 113 | nmi_count(cpu)); |
348 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; | 114 | per_cpu(wd_enabled, cpu) = 0; |
349 | atomic_dec(&nmi_active); | 115 | atomic_dec(&nmi_active); |
350 | } | 116 | } |
351 | } | 117 | } |
@@ -359,16 +125,8 @@ static int __init check_nmi_watchdog(void) | |||
359 | 125 | ||
360 | /* now that we know it works we can reduce NMI frequency to | 126 | /* now that we know it works we can reduce NMI frequency to |
361 | something more reasonable; makes a difference in some configs */ | 127 | something more reasonable; makes a difference in some configs */ |
362 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 128 | if (nmi_watchdog == NMI_LOCAL_APIC) |
363 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 129 | nmi_hz = lapic_adjust_nmi_hz(1); |
364 | |||
365 | nmi_hz = 1; | ||
366 | |||
367 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
368 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
369 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
370 | } | ||
371 | } | ||
372 | 130 | ||
373 | kfree(prev_nmi_count); | 131 | kfree(prev_nmi_count); |
374 | return 0; | 132 | return 0; |
@@ -391,85 +149,8 @@ static int __init setup_nmi_watchdog(char *str) | |||
391 | 149 | ||
392 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 150 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
393 | 151 | ||
394 | static void disable_lapic_nmi_watchdog(void) | ||
395 | { | ||
396 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
397 | |||
398 | if (atomic_read(&nmi_active) <= 0) | ||
399 | return; | ||
400 | |||
401 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); | ||
402 | |||
403 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
404 | } | ||
405 | |||
406 | static void enable_lapic_nmi_watchdog(void) | ||
407 | { | ||
408 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
409 | |||
410 | /* are we already enabled */ | ||
411 | if (atomic_read(&nmi_active) != 0) | ||
412 | return; | ||
413 | |||
414 | /* are we lapic aware */ | ||
415 | if (nmi_known_cpu() <= 0) | ||
416 | return; | ||
417 | 152 | ||
418 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | 153 | /* Suspend/resume support */ |
419 | touch_nmi_watchdog(); | ||
420 | } | ||
421 | |||
422 | void disable_timer_nmi_watchdog(void) | ||
423 | { | ||
424 | BUG_ON(nmi_watchdog != NMI_IO_APIC); | ||
425 | |||
426 | if (atomic_read(&nmi_active) <= 0) | ||
427 | return; | ||
428 | |||
429 | disable_irq(0); | ||
430 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); | ||
431 | |||
432 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
433 | } | ||
434 | |||
435 | void enable_timer_nmi_watchdog(void) | ||
436 | { | ||
437 | BUG_ON(nmi_watchdog != NMI_IO_APIC); | ||
438 | |||
439 | if (atomic_read(&nmi_active) == 0) { | ||
440 | touch_nmi_watchdog(); | ||
441 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | ||
442 | enable_irq(0); | ||
443 | } | ||
444 | } | ||
445 | |||
446 | static void __acpi_nmi_disable(void *__unused) | ||
447 | { | ||
448 | apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
449 | } | ||
450 | |||
451 | /* | ||
452 | * Disable timer based NMIs on all CPUs: | ||
453 | */ | ||
454 | void acpi_nmi_disable(void) | ||
455 | { | ||
456 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
457 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | ||
458 | } | ||
459 | |||
460 | static void __acpi_nmi_enable(void *__unused) | ||
461 | { | ||
462 | apic_write_around(APIC_LVT0, APIC_DM_NMI); | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Enable timer based NMIs on all CPUs: | ||
467 | */ | ||
468 | void acpi_nmi_enable(void) | ||
469 | { | ||
470 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
471 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | ||
472 | } | ||
473 | 154 | ||
474 | #ifdef CONFIG_PM | 155 | #ifdef CONFIG_PM |
475 | 156 | ||
@@ -516,7 +197,7 @@ static int __init init_lapic_nmi_sysfs(void) | |||
516 | if (nmi_watchdog != NMI_LOCAL_APIC) | 197 | if (nmi_watchdog != NMI_LOCAL_APIC) |
517 | return 0; | 198 | return 0; |
518 | 199 | ||
519 | if ( atomic_read(&nmi_active) < 0 ) | 200 | if (atomic_read(&nmi_active) < 0) |
520 | return 0; | 201 | return 0; |
521 | 202 | ||
522 | error = sysdev_class_register(&nmi_sysclass); | 203 | error = sysdev_class_register(&nmi_sysclass); |
@@ -529,433 +210,69 @@ late_initcall(init_lapic_nmi_sysfs); | |||
529 | 210 | ||
530 | #endif /* CONFIG_PM */ | 211 | #endif /* CONFIG_PM */ |
531 | 212 | ||
532 | /* | 213 | static void __acpi_nmi_enable(void *__unused) |
533 | * Activate the NMI watchdog via the local APIC. | ||
534 | * Original code written by Keith Owens. | ||
535 | */ | ||
536 | |||
537 | static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) | ||
538 | { | ||
539 | u64 count = (u64)cpu_khz * 1000; | ||
540 | |||
541 | do_div(count, nmi_hz); | ||
542 | if(descr) | ||
543 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
544 | wrmsrl(perfctr_msr, 0 - count); | ||
545 | } | ||
546 | |||
547 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
548 | const char *descr) | ||
549 | { | ||
550 | u64 count = (u64)cpu_khz * 1000; | ||
551 | |||
552 | do_div(count, nmi_hz); | ||
553 | if(descr) | ||
554 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
555 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
556 | } | ||
557 | |||
558 | /* Note that these events don't tick when the CPU idles. This means | ||
559 | the frequency varies with CPU load. */ | ||
560 | |||
561 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
562 | #define K7_EVNTSEL_INT (1 << 20) | ||
563 | #define K7_EVNTSEL_OS (1 << 17) | ||
564 | #define K7_EVNTSEL_USR (1 << 16) | ||
565 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
566 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
567 | |||
568 | static int setup_k7_watchdog(void) | ||
569 | { | ||
570 | unsigned int perfctr_msr, evntsel_msr; | ||
571 | unsigned int evntsel; | ||
572 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
573 | |||
574 | perfctr_msr = MSR_K7_PERFCTR0; | ||
575 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
576 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
577 | goto fail; | ||
578 | |||
579 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
580 | goto fail1; | ||
581 | |||
582 | wrmsrl(perfctr_msr, 0UL); | ||
583 | |||
584 | evntsel = K7_EVNTSEL_INT | ||
585 | | K7_EVNTSEL_OS | ||
586 | | K7_EVNTSEL_USR | ||
587 | | K7_NMI_EVENT; | ||
588 | |||
589 | /* setup the timer */ | ||
590 | wrmsr(evntsel_msr, evntsel, 0); | ||
591 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0"); | ||
592 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
593 | evntsel |= K7_EVNTSEL_ENABLE; | ||
594 | wrmsr(evntsel_msr, evntsel, 0); | ||
595 | |||
596 | wd->perfctr_msr = perfctr_msr; | ||
597 | wd->evntsel_msr = evntsel_msr; | ||
598 | wd->cccr_msr = 0; //unused | ||
599 | wd->check_bit = 1ULL<<63; | ||
600 | return 1; | ||
601 | fail1: | ||
602 | __release_perfctr_nmi(-1, perfctr_msr); | ||
603 | fail: | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | static void stop_k7_watchdog(void) | ||
608 | { | ||
609 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
610 | |||
611 | wrmsr(wd->evntsel_msr, 0, 0); | ||
612 | |||
613 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
614 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
615 | } | ||
616 | |||
617 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
618 | #define P6_EVNTSEL_INT (1 << 20) | ||
619 | #define P6_EVNTSEL_OS (1 << 17) | ||
620 | #define P6_EVNTSEL_USR (1 << 16) | ||
621 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
622 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
623 | |||
624 | static int setup_p6_watchdog(void) | ||
625 | { | ||
626 | unsigned int perfctr_msr, evntsel_msr; | ||
627 | unsigned int evntsel; | ||
628 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
629 | |||
630 | perfctr_msr = MSR_P6_PERFCTR0; | ||
631 | evntsel_msr = MSR_P6_EVNTSEL0; | ||
632 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
633 | goto fail; | ||
634 | |||
635 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
636 | goto fail1; | ||
637 | |||
638 | wrmsrl(perfctr_msr, 0UL); | ||
639 | |||
640 | evntsel = P6_EVNTSEL_INT | ||
641 | | P6_EVNTSEL_OS | ||
642 | | P6_EVNTSEL_USR | ||
643 | | P6_NMI_EVENT; | ||
644 | |||
645 | /* setup the timer */ | ||
646 | wrmsr(evntsel_msr, evntsel, 0); | ||
647 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
648 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0"); | ||
649 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
650 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
651 | wrmsr(evntsel_msr, evntsel, 0); | ||
652 | |||
653 | wd->perfctr_msr = perfctr_msr; | ||
654 | wd->evntsel_msr = evntsel_msr; | ||
655 | wd->cccr_msr = 0; //unused | ||
656 | wd->check_bit = 1ULL<<39; | ||
657 | return 1; | ||
658 | fail1: | ||
659 | __release_perfctr_nmi(-1, perfctr_msr); | ||
660 | fail: | ||
661 | return 0; | ||
662 | } | ||
663 | |||
664 | static void stop_p6_watchdog(void) | ||
665 | { | ||
666 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
667 | |||
668 | wrmsr(wd->evntsel_msr, 0, 0); | ||
669 | |||
670 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
671 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
672 | } | ||
673 | |||
674 | /* Note that these events don't tick when the CPU idles. This means | ||
675 | the frequency varies with CPU load. */ | ||
676 | |||
677 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
678 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
679 | #define P4_ESCR_OS (1<<3) | ||
680 | #define P4_ESCR_USR (1<<2) | ||
681 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
682 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
683 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
684 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
685 | #define P4_CCCR_COMPARE (1<<18) | ||
686 | #define P4_CCCR_REQUIRED (3<<16) | ||
687 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
688 | #define P4_CCCR_ENABLE (1<<12) | ||
689 | #define P4_CCCR_OVF (1<<31) | ||
690 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
691 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
692 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
693 | |||
694 | static int setup_p4_watchdog(void) | ||
695 | { | 214 | { |
696 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | 215 | apic_write_around(APIC_LVT0, APIC_DM_NMI); |
697 | unsigned int evntsel, cccr_val; | ||
698 | unsigned int misc_enable, dummy; | ||
699 | unsigned int ht_num; | ||
700 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
701 | |||
702 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
703 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
704 | return 0; | ||
705 | |||
706 | #ifdef CONFIG_SMP | ||
707 | /* detect which hyperthread we are on */ | ||
708 | if (smp_num_siblings == 2) { | ||
709 | unsigned int ebx, apicid; | ||
710 | |||
711 | ebx = cpuid_ebx(1); | ||
712 | apicid = (ebx >> 24) & 0xff; | ||
713 | ht_num = apicid & 1; | ||
714 | } else | ||
715 | #endif | ||
716 | ht_num = 0; | ||
717 | |||
718 | /* performance counters are shared resources | ||
719 | * assign each hyperthread its own set | ||
720 | * (re-use the ESCR0 register, seems safe | ||
721 | * and keeps the cccr_val the same) | ||
722 | */ | ||
723 | if (!ht_num) { | ||
724 | /* logical cpu 0 */ | ||
725 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
726 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
727 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
728 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
729 | } else { | ||
730 | /* logical cpu 1 */ | ||
731 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
732 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
733 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
734 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
735 | } | ||
736 | |||
737 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
738 | goto fail; | ||
739 | |||
740 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
741 | goto fail1; | ||
742 | |||
743 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
744 | | P4_ESCR_OS | ||
745 | | P4_ESCR_USR; | ||
746 | |||
747 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
748 | | P4_CCCR_COMPLEMENT | ||
749 | | P4_CCCR_COMPARE | ||
750 | | P4_CCCR_REQUIRED; | ||
751 | |||
752 | wrmsr(evntsel_msr, evntsel, 0); | ||
753 | wrmsr(cccr_msr, cccr_val, 0); | ||
754 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0"); | ||
755 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
756 | cccr_val |= P4_CCCR_ENABLE; | ||
757 | wrmsr(cccr_msr, cccr_val, 0); | ||
758 | wd->perfctr_msr = perfctr_msr; | ||
759 | wd->evntsel_msr = evntsel_msr; | ||
760 | wd->cccr_msr = cccr_msr; | ||
761 | wd->check_bit = 1ULL<<39; | ||
762 | return 1; | ||
763 | fail1: | ||
764 | __release_perfctr_nmi(-1, perfctr_msr); | ||
765 | fail: | ||
766 | return 0; | ||
767 | } | 216 | } |
768 | 217 | ||
769 | static void stop_p4_watchdog(void) | 218 | /* |
219 | * Enable timer based NMIs on all CPUs: | ||
220 | */ | ||
221 | void acpi_nmi_enable(void) | ||
770 | { | 222 | { |
771 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 223 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) |
772 | 224 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | |
773 | wrmsr(wd->cccr_msr, 0, 0); | ||
774 | wrmsr(wd->evntsel_msr, 0, 0); | ||
775 | |||
776 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
777 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
778 | } | 225 | } |
779 | 226 | ||
780 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | 227 | static void __acpi_nmi_disable(void *__unused) |
781 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
782 | |||
783 | static int setup_intel_arch_watchdog(void) | ||
784 | { | 228 | { |
785 | unsigned int ebx; | 229 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); |
786 | union cpuid10_eax eax; | ||
787 | unsigned int unused; | ||
788 | unsigned int perfctr_msr, evntsel_msr; | ||
789 | unsigned int evntsel; | ||
790 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
791 | |||
792 | /* | ||
793 | * Check whether the Architectural PerfMon supports | ||
794 | * Unhalted Core Cycles Event or not. | ||
795 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
796 | */ | ||
797 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
798 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
799 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
800 | goto fail; | ||
801 | |||
802 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
803 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
804 | |||
805 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
806 | goto fail; | ||
807 | |||
808 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
809 | goto fail1; | ||
810 | |||
811 | wrmsrl(perfctr_msr, 0UL); | ||
812 | |||
813 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
814 | | ARCH_PERFMON_EVENTSEL_OS | ||
815 | | ARCH_PERFMON_EVENTSEL_USR | ||
816 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
817 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
818 | |||
819 | /* setup the timer */ | ||
820 | wrmsr(evntsel_msr, evntsel, 0); | ||
821 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
822 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | ||
823 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
824 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
825 | wrmsr(evntsel_msr, evntsel, 0); | ||
826 | |||
827 | wd->perfctr_msr = perfctr_msr; | ||
828 | wd->evntsel_msr = evntsel_msr; | ||
829 | wd->cccr_msr = 0; //unused | ||
830 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
831 | return 1; | ||
832 | fail1: | ||
833 | __release_perfctr_nmi(-1, perfctr_msr); | ||
834 | fail: | ||
835 | return 0; | ||
836 | } | 230 | } |
837 | 231 | ||
838 | static void stop_intel_arch_watchdog(void) | 232 | /* |
233 | * Disable timer based NMIs on all CPUs: | ||
234 | */ | ||
235 | void acpi_nmi_disable(void) | ||
839 | { | 236 | { |
840 | unsigned int ebx; | 237 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) |
841 | union cpuid10_eax eax; | 238 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); |
842 | unsigned int unused; | ||
843 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
844 | |||
845 | /* | ||
846 | * Check whether the Architectural PerfMon supports | ||
847 | * Unhalted Core Cycles Event or not. | ||
848 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
849 | */ | ||
850 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
851 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
852 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
853 | return; | ||
854 | |||
855 | wrmsr(wd->evntsel_msr, 0, 0); | ||
856 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
857 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
858 | } | 239 | } |
859 | 240 | ||
860 | void setup_apic_nmi_watchdog (void *unused) | 241 | void setup_apic_nmi_watchdog (void *unused) |
861 | { | 242 | { |
862 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 243 | if (__get_cpu_var(wd_enabled)) |
863 | 244 | return; | |
864 | /* only support LOCAL and IO APICs for now */ | ||
865 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
866 | (nmi_watchdog != NMI_IO_APIC)) | ||
867 | return; | ||
868 | |||
869 | if (wd->enabled == 1) | ||
870 | return; | ||
871 | 245 | ||
872 | /* cheap hack to support suspend/resume */ | 246 | /* cheap hack to support suspend/resume */ |
873 | /* if cpu0 is not active neither should the other cpus */ | 247 | /* if cpu0 is not active neither should the other cpus */ |
874 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) | 248 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) |
875 | return; | 249 | return; |
876 | 250 | ||
877 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 251 | switch (nmi_watchdog) { |
878 | switch (boot_cpu_data.x86_vendor) { | 252 | case NMI_LOCAL_APIC: |
879 | case X86_VENDOR_AMD: | 253 | __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ |
880 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | 254 | if (lapic_watchdog_init(nmi_hz) < 0) { |
881 | boot_cpu_data.x86 != 16) | 255 | __get_cpu_var(wd_enabled) = 0; |
882 | return; | ||
883 | if (!setup_k7_watchdog()) | ||
884 | return; | ||
885 | break; | ||
886 | case X86_VENDOR_INTEL: | ||
887 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
888 | if (!setup_intel_arch_watchdog()) | ||
889 | return; | ||
890 | break; | ||
891 | } | ||
892 | switch (boot_cpu_data.x86) { | ||
893 | case 6: | ||
894 | if (boot_cpu_data.x86_model > 0xd) | ||
895 | return; | ||
896 | |||
897 | if (!setup_p6_watchdog()) | ||
898 | return; | ||
899 | break; | ||
900 | case 15: | ||
901 | if (boot_cpu_data.x86_model > 0x4) | ||
902 | return; | ||
903 | |||
904 | if (!setup_p4_watchdog()) | ||
905 | return; | ||
906 | break; | ||
907 | default: | ||
908 | return; | ||
909 | } | ||
910 | break; | ||
911 | default: | ||
912 | return; | 256 | return; |
913 | } | 257 | } |
258 | /* FALL THROUGH */ | ||
259 | case NMI_IO_APIC: | ||
260 | __get_cpu_var(wd_enabled) = 1; | ||
261 | atomic_inc(&nmi_active); | ||
914 | } | 262 | } |
915 | wd->enabled = 1; | ||
916 | atomic_inc(&nmi_active); | ||
917 | } | 263 | } |
918 | 264 | ||
919 | void stop_apic_nmi_watchdog(void *unused) | 265 | void stop_apic_nmi_watchdog(void *unused) |
920 | { | 266 | { |
921 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
922 | |||
923 | /* only support LOCAL and IO APICs for now */ | 267 | /* only support LOCAL and IO APICs for now */ |
924 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | 268 | if ((nmi_watchdog != NMI_LOCAL_APIC) && |
925 | (nmi_watchdog != NMI_IO_APIC)) | 269 | (nmi_watchdog != NMI_IO_APIC)) |
926 | return; | 270 | return; |
927 | 271 | if (__get_cpu_var(wd_enabled) == 0) | |
928 | if (wd->enabled == 0) | ||
929 | return; | 272 | return; |
930 | 273 | if (nmi_watchdog == NMI_LOCAL_APIC) | |
931 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 274 | lapic_watchdog_stop(); |
932 | switch (boot_cpu_data.x86_vendor) { | 275 | __get_cpu_var(wd_enabled) = 0; |
933 | case X86_VENDOR_AMD: | ||
934 | stop_k7_watchdog(); | ||
935 | break; | ||
936 | case X86_VENDOR_INTEL: | ||
937 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
938 | stop_intel_arch_watchdog(); | ||
939 | break; | ||
940 | } | ||
941 | switch (boot_cpu_data.x86) { | ||
942 | case 6: | ||
943 | if (boot_cpu_data.x86_model > 0xd) | ||
944 | break; | ||
945 | stop_p6_watchdog(); | ||
946 | break; | ||
947 | case 15: | ||
948 | if (boot_cpu_data.x86_model > 0x4) | ||
949 | break; | ||
950 | stop_p4_watchdog(); | ||
951 | break; | ||
952 | } | ||
953 | break; | ||
954 | default: | ||
955 | return; | ||
956 | } | ||
957 | } | ||
958 | wd->enabled = 0; | ||
959 | atomic_dec(&nmi_active); | 276 | atomic_dec(&nmi_active); |
960 | } | 277 | } |
961 | 278 | ||
@@ -1011,8 +328,6 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
1011 | unsigned int sum; | 328 | unsigned int sum; |
1012 | int touched = 0; | 329 | int touched = 0; |
1013 | int cpu = smp_processor_id(); | 330 | int cpu = smp_processor_id(); |
1014 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
1015 | u64 dummy; | ||
1016 | int rc=0; | 331 | int rc=0; |
1017 | 332 | ||
1018 | /* check for other users first */ | 333 | /* check for other users first */ |
@@ -1055,53 +370,20 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
1055 | alert_counter[cpu] = 0; | 370 | alert_counter[cpu] = 0; |
1056 | } | 371 | } |
1057 | /* see if the nmi watchdog went off */ | 372 | /* see if the nmi watchdog went off */ |
1058 | if (wd->enabled) { | 373 | if (!__get_cpu_var(wd_enabled)) |
1059 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 374 | return rc; |
1060 | rdmsrl(wd->perfctr_msr, dummy); | 375 | switch (nmi_watchdog) { |
1061 | if (dummy & wd->check_bit){ | 376 | case NMI_LOCAL_APIC: |
1062 | /* this wasn't a watchdog timer interrupt */ | 377 | rc |= lapic_wd_event(nmi_hz); |
1063 | goto done; | 378 | break; |
1064 | } | 379 | case NMI_IO_APIC: |
1065 | 380 | /* don't know how to accurately check for this. | |
1066 | /* only Intel P4 uses the cccr msr */ | 381 | * just assume it was a watchdog timer interrupt |
1067 | if (wd->cccr_msr != 0) { | 382 | * This matches the old behaviour. |
1068 | /* | 383 | */ |
1069 | * P4 quirks: | 384 | rc = 1; |
1070 | * - An overflown perfctr will assert its interrupt | 385 | break; |
1071 | * until the OVF flag in its CCCR is cleared. | ||
1072 | * - LVTPC is masked on interrupt and must be | ||
1073 | * unmasked by the LVTPC handler. | ||
1074 | */ | ||
1075 | rdmsrl(wd->cccr_msr, dummy); | ||
1076 | dummy &= ~P4_CCCR_OVF; | ||
1077 | wrmsrl(wd->cccr_msr, dummy); | ||
1078 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1079 | /* start the cycle over again */ | ||
1080 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
1081 | } | ||
1082 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
1083 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
1084 | /* P6 based Pentium M need to re-unmask | ||
1085 | * the apic vector but it doesn't hurt | ||
1086 | * other P6 variant. | ||
1087 | * ArchPerfom/Core Duo also needs this */ | ||
1088 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1089 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
1090 | write_watchdog_counter32(wd->perfctr_msr, NULL); | ||
1091 | } else { | ||
1092 | /* start the cycle over again */ | ||
1093 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
1094 | } | ||
1095 | rc = 1; | ||
1096 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
1097 | /* don't know how to accurately check for this. | ||
1098 | * just assume it was a watchdog timer interrupt | ||
1099 | * This matches the old behaviour. | ||
1100 | */ | ||
1101 | rc = 1; | ||
1102 | } | ||
1103 | } | 386 | } |
1104 | done: | ||
1105 | return rc; | 387 | return rc; |
1106 | } | 388 | } |
1107 | 389 | ||
@@ -1146,7 +428,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
1146 | } | 428 | } |
1147 | 429 | ||
1148 | if (nmi_watchdog == NMI_DEFAULT) { | 430 | if (nmi_watchdog == NMI_DEFAULT) { |
1149 | if (nmi_known_cpu() > 0) | 431 | if (lapic_watchdog_ok()) |
1150 | nmi_watchdog = NMI_LOCAL_APIC; | 432 | nmi_watchdog = NMI_LOCAL_APIC; |
1151 | else | 433 | else |
1152 | nmi_watchdog = NMI_IO_APIC; | 434 | nmi_watchdog = NMI_IO_APIC; |
@@ -1182,11 +464,3 @@ void __trigger_all_cpu_backtrace(void) | |||
1182 | 464 | ||
1183 | EXPORT_SYMBOL(nmi_active); | 465 | EXPORT_SYMBOL(nmi_active); |
1184 | EXPORT_SYMBOL(nmi_watchdog); | 466 | EXPORT_SYMBOL(nmi_watchdog); |
1185 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | ||
1186 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | ||
1187 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
1188 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
1189 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
1190 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
1191 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | ||
1192 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | ||
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 2ec331e03fa..5c10f376bce 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/efi.h> | 20 | #include <linux/efi.h> |
21 | #include <linux/bcd.h> | 21 | #include <linux/bcd.h> |
22 | #include <linux/start_kernel.h> | 22 | #include <linux/start_kernel.h> |
23 | #include <linux/highmem.h> | ||
23 | 24 | ||
24 | #include <asm/bug.h> | 25 | #include <asm/bug.h> |
25 | #include <asm/paravirt.h> | 26 | #include <asm/paravirt.h> |
@@ -35,7 +36,7 @@ | |||
35 | #include <asm/timer.h> | 36 | #include <asm/timer.h> |
36 | 37 | ||
37 | /* nop stub */ | 38 | /* nop stub */ |
38 | static void native_nop(void) | 39 | void _paravirt_nop(void) |
39 | { | 40 | { |
40 | } | 41 | } |
41 | 42 | ||
@@ -54,331 +55,148 @@ char *memory_setup(void) | |||
54 | #define DEF_NATIVE(name, code) \ | 55 | #define DEF_NATIVE(name, code) \ |
55 | extern const char start_##name[], end_##name[]; \ | 56 | extern const char start_##name[], end_##name[]; \ |
56 | asm("start_" #name ": " code "; end_" #name ":") | 57 | asm("start_" #name ": " code "; end_" #name ":") |
57 | DEF_NATIVE(cli, "cli"); | 58 | |
58 | DEF_NATIVE(sti, "sti"); | 59 | DEF_NATIVE(irq_disable, "cli"); |
59 | DEF_NATIVE(popf, "push %eax; popf"); | 60 | DEF_NATIVE(irq_enable, "sti"); |
60 | DEF_NATIVE(pushf, "pushf; pop %eax"); | 61 | DEF_NATIVE(restore_fl, "push %eax; popf"); |
61 | DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli"); | 62 | DEF_NATIVE(save_fl, "pushf; pop %eax"); |
62 | DEF_NATIVE(iret, "iret"); | 63 | DEF_NATIVE(iret, "iret"); |
63 | DEF_NATIVE(sti_sysexit, "sti; sysexit"); | 64 | DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); |
65 | DEF_NATIVE(read_cr2, "mov %cr2, %eax"); | ||
66 | DEF_NATIVE(write_cr3, "mov %eax, %cr3"); | ||
67 | DEF_NATIVE(read_cr3, "mov %cr3, %eax"); | ||
68 | DEF_NATIVE(clts, "clts"); | ||
69 | DEF_NATIVE(read_tsc, "rdtsc"); | ||
64 | 70 | ||
65 | static const struct native_insns | 71 | DEF_NATIVE(ud2a, "ud2a"); |
66 | { | ||
67 | const char *start, *end; | ||
68 | } native_insns[] = { | ||
69 | [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli }, | ||
70 | [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti }, | ||
71 | [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf }, | ||
72 | [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf }, | ||
73 | [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli }, | ||
74 | [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret }, | ||
75 | [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit }, | ||
76 | }; | ||
77 | 72 | ||
78 | static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) | 73 | static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) |
79 | { | 74 | { |
80 | unsigned int insn_len; | 75 | const unsigned char *start, *end; |
81 | 76 | unsigned ret; | |
82 | /* Don't touch it if we don't have a replacement */ | 77 | |
83 | if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start) | 78 | switch(type) { |
84 | return len; | 79 | #define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site |
85 | 80 | SITE(irq_disable); | |
86 | insn_len = native_insns[type].end - native_insns[type].start; | 81 | SITE(irq_enable); |
87 | 82 | SITE(restore_fl); | |
88 | /* Similarly if we can't fit replacement. */ | 83 | SITE(save_fl); |
89 | if (len < insn_len) | 84 | SITE(iret); |
90 | return len; | 85 | SITE(irq_enable_sysexit); |
86 | SITE(read_cr2); | ||
87 | SITE(read_cr3); | ||
88 | SITE(write_cr3); | ||
89 | SITE(clts); | ||
90 | SITE(read_tsc); | ||
91 | #undef SITE | ||
92 | |||
93 | patch_site: | ||
94 | ret = paravirt_patch_insns(insns, len, start, end); | ||
95 | break; | ||
91 | 96 | ||
92 | memcpy(insns, native_insns[type].start, insn_len); | 97 | case PARAVIRT_PATCH(make_pgd): |
93 | return insn_len; | 98 | case PARAVIRT_PATCH(make_pte): |
94 | } | 99 | case PARAVIRT_PATCH(pgd_val): |
100 | case PARAVIRT_PATCH(pte_val): | ||
101 | #ifdef CONFIG_X86_PAE | ||
102 | case PARAVIRT_PATCH(make_pmd): | ||
103 | case PARAVIRT_PATCH(pmd_val): | ||
104 | #endif | ||
105 | /* These functions end up returning exactly what | ||
106 | they're passed, in the same registers. */ | ||
107 | ret = paravirt_patch_nop(); | ||
108 | break; | ||
95 | 109 | ||
96 | static unsigned long native_get_debugreg(int regno) | ||
97 | { | ||
98 | unsigned long val = 0; /* Damn you, gcc! */ | ||
99 | |||
100 | switch (regno) { | ||
101 | case 0: | ||
102 | asm("movl %%db0, %0" :"=r" (val)); break; | ||
103 | case 1: | ||
104 | asm("movl %%db1, %0" :"=r" (val)); break; | ||
105 | case 2: | ||
106 | asm("movl %%db2, %0" :"=r" (val)); break; | ||
107 | case 3: | ||
108 | asm("movl %%db3, %0" :"=r" (val)); break; | ||
109 | case 6: | ||
110 | asm("movl %%db6, %0" :"=r" (val)); break; | ||
111 | case 7: | ||
112 | asm("movl %%db7, %0" :"=r" (val)); break; | ||
113 | default: | 110 | default: |
114 | BUG(); | 111 | ret = paravirt_patch_default(type, clobbers, insns, len); |
115 | } | ||
116 | return val; | ||
117 | } | ||
118 | |||
119 | static void native_set_debugreg(int regno, unsigned long value) | ||
120 | { | ||
121 | switch (regno) { | ||
122 | case 0: | ||
123 | asm("movl %0,%%db0" : /* no output */ :"r" (value)); | ||
124 | break; | ||
125 | case 1: | ||
126 | asm("movl %0,%%db1" : /* no output */ :"r" (value)); | ||
127 | break; | ||
128 | case 2: | ||
129 | asm("movl %0,%%db2" : /* no output */ :"r" (value)); | ||
130 | break; | 112 | break; |
131 | case 3: | ||
132 | asm("movl %0,%%db3" : /* no output */ :"r" (value)); | ||
133 | break; | ||
134 | case 6: | ||
135 | asm("movl %0,%%db6" : /* no output */ :"r" (value)); | ||
136 | break; | ||
137 | case 7: | ||
138 | asm("movl %0,%%db7" : /* no output */ :"r" (value)); | ||
139 | break; | ||
140 | default: | ||
141 | BUG(); | ||
142 | } | 113 | } |
143 | } | ||
144 | |||
145 | void init_IRQ(void) | ||
146 | { | ||
147 | paravirt_ops.init_IRQ(); | ||
148 | } | ||
149 | |||
150 | static void native_clts(void) | ||
151 | { | ||
152 | asm volatile ("clts"); | ||
153 | } | ||
154 | |||
155 | static unsigned long native_read_cr0(void) | ||
156 | { | ||
157 | unsigned long val; | ||
158 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); | ||
159 | return val; | ||
160 | } | ||
161 | |||
162 | static void native_write_cr0(unsigned long val) | ||
163 | { | ||
164 | asm volatile("movl %0,%%cr0": :"r" (val)); | ||
165 | } | ||
166 | |||
167 | static unsigned long native_read_cr2(void) | ||
168 | { | ||
169 | unsigned long val; | ||
170 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); | ||
171 | return val; | ||
172 | } | ||
173 | |||
174 | static void native_write_cr2(unsigned long val) | ||
175 | { | ||
176 | asm volatile("movl %0,%%cr2": :"r" (val)); | ||
177 | } | ||
178 | |||
179 | static unsigned long native_read_cr3(void) | ||
180 | { | ||
181 | unsigned long val; | ||
182 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); | ||
183 | return val; | ||
184 | } | ||
185 | |||
186 | static void native_write_cr3(unsigned long val) | ||
187 | { | ||
188 | asm volatile("movl %0,%%cr3": :"r" (val)); | ||
189 | } | ||
190 | |||
191 | static unsigned long native_read_cr4(void) | ||
192 | { | ||
193 | unsigned long val; | ||
194 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); | ||
195 | return val; | ||
196 | } | ||
197 | |||
198 | static unsigned long native_read_cr4_safe(void) | ||
199 | { | ||
200 | unsigned long val; | ||
201 | /* This could fault if %cr4 does not exist */ | ||
202 | asm("1: movl %%cr4, %0 \n" | ||
203 | "2: \n" | ||
204 | ".section __ex_table,\"a\" \n" | ||
205 | ".long 1b,2b \n" | ||
206 | ".previous \n" | ||
207 | : "=r" (val): "0" (0)); | ||
208 | return val; | ||
209 | } | ||
210 | |||
211 | static void native_write_cr4(unsigned long val) | ||
212 | { | ||
213 | asm volatile("movl %0,%%cr4": :"r" (val)); | ||
214 | } | ||
215 | |||
216 | static unsigned long native_save_fl(void) | ||
217 | { | ||
218 | unsigned long f; | ||
219 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); | ||
220 | return f; | ||
221 | } | ||
222 | |||
223 | static void native_restore_fl(unsigned long f) | ||
224 | { | ||
225 | asm volatile("pushl %0 ; popfl": /* no output */ | ||
226 | :"g" (f) | ||
227 | :"memory", "cc"); | ||
228 | } | ||
229 | |||
230 | static void native_irq_disable(void) | ||
231 | { | ||
232 | asm volatile("cli": : :"memory"); | ||
233 | } | ||
234 | |||
235 | static void native_irq_enable(void) | ||
236 | { | ||
237 | asm volatile("sti": : :"memory"); | ||
238 | } | ||
239 | |||
240 | static void native_safe_halt(void) | ||
241 | { | ||
242 | asm volatile("sti; hlt": : :"memory"); | ||
243 | } | ||
244 | 114 | ||
245 | static void native_halt(void) | 115 | return ret; |
246 | { | ||
247 | asm volatile("hlt": : :"memory"); | ||
248 | } | 116 | } |
249 | 117 | ||
250 | static void native_wbinvd(void) | 118 | unsigned paravirt_patch_nop(void) |
251 | { | 119 | { |
252 | asm volatile("wbinvd": : :"memory"); | 120 | return 0; |
253 | } | 121 | } |
254 | 122 | ||
255 | static unsigned long long native_read_msr(unsigned int msr, int *err) | 123 | unsigned paravirt_patch_ignore(unsigned len) |
256 | { | 124 | { |
257 | unsigned long long val; | 125 | return len; |
258 | |||
259 | asm volatile("2: rdmsr ; xorl %0,%0\n" | ||
260 | "1:\n\t" | ||
261 | ".section .fixup,\"ax\"\n\t" | ||
262 | "3: movl %3,%0 ; jmp 1b\n\t" | ||
263 | ".previous\n\t" | ||
264 | ".section __ex_table,\"a\"\n" | ||
265 | " .align 4\n\t" | ||
266 | " .long 2b,3b\n\t" | ||
267 | ".previous" | ||
268 | : "=r" (*err), "=A" (val) | ||
269 | : "c" (msr), "i" (-EFAULT)); | ||
270 | |||
271 | return val; | ||
272 | } | 126 | } |
273 | 127 | ||
274 | static int native_write_msr(unsigned int msr, unsigned long long val) | 128 | unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, |
129 | void *site, u16 site_clobbers, | ||
130 | unsigned len) | ||
275 | { | 131 | { |
276 | int err; | 132 | unsigned char *call = site; |
277 | asm volatile("2: wrmsr ; xorl %0,%0\n" | 133 | unsigned long delta = (unsigned long)target - (unsigned long)(call+5); |
278 | "1:\n\t" | ||
279 | ".section .fixup,\"ax\"\n\t" | ||
280 | "3: movl %4,%0 ; jmp 1b\n\t" | ||
281 | ".previous\n\t" | ||
282 | ".section __ex_table,\"a\"\n" | ||
283 | " .align 4\n\t" | ||
284 | " .long 2b,3b\n\t" | ||
285 | ".previous" | ||
286 | : "=a" (err) | ||
287 | : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), | ||
288 | "i" (-EFAULT)); | ||
289 | return err; | ||
290 | } | ||
291 | 134 | ||
292 | static unsigned long long native_read_tsc(void) | 135 | if (tgt_clobbers & ~site_clobbers) |
293 | { | 136 | return len; /* target would clobber too much for this site */ |
294 | unsigned long long val; | 137 | if (len < 5) |
295 | asm volatile("rdtsc" : "=A" (val)); | 138 | return len; /* call too long for patch site */ |
296 | return val; | ||
297 | } | ||
298 | 139 | ||
299 | static unsigned long long native_read_pmc(void) | 140 | *call++ = 0xe8; /* call */ |
300 | { | 141 | *(unsigned long *)call = delta; |
301 | unsigned long long val; | ||
302 | asm volatile("rdpmc" : "=A" (val)); | ||
303 | return val; | ||
304 | } | ||
305 | 142 | ||
306 | static void native_load_tr_desc(void) | 143 | return 5; |
307 | { | ||
308 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | ||
309 | } | 144 | } |
310 | 145 | ||
311 | static void native_load_gdt(const struct Xgt_desc_struct *dtr) | 146 | unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) |
312 | { | 147 | { |
313 | asm volatile("lgdt %0"::"m" (*dtr)); | 148 | unsigned char *jmp = site; |
314 | } | 149 | unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5); |
315 | 150 | ||
316 | static void native_load_idt(const struct Xgt_desc_struct *dtr) | 151 | if (len < 5) |
317 | { | 152 | return len; /* call too long for patch site */ |
318 | asm volatile("lidt %0"::"m" (*dtr)); | ||
319 | } | ||
320 | 153 | ||
321 | static void native_store_gdt(struct Xgt_desc_struct *dtr) | 154 | *jmp++ = 0xe9; /* jmp */ |
322 | { | 155 | *(unsigned long *)jmp = delta; |
323 | asm ("sgdt %0":"=m" (*dtr)); | ||
324 | } | ||
325 | 156 | ||
326 | static void native_store_idt(struct Xgt_desc_struct *dtr) | 157 | return 5; |
327 | { | ||
328 | asm ("sidt %0":"=m" (*dtr)); | ||
329 | } | 158 | } |
330 | 159 | ||
331 | static unsigned long native_store_tr(void) | 160 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len) |
332 | { | 161 | { |
333 | unsigned long tr; | 162 | void *opfunc = *((void **)¶virt_ops + type); |
334 | asm ("str %0":"=r" (tr)); | 163 | unsigned ret; |
335 | return tr; | ||
336 | } | ||
337 | 164 | ||
338 | static void native_load_tls(struct thread_struct *t, unsigned int cpu) | 165 | if (opfunc == NULL) |
339 | { | 166 | /* If there's no function, patch it with a ud2a (BUG) */ |
340 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] | 167 | ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a); |
341 | C(0); C(1); C(2); | 168 | else if (opfunc == paravirt_nop) |
342 | #undef C | 169 | /* If the operation is a nop, then nop the callsite */ |
343 | } | 170 | ret = paravirt_patch_nop(); |
171 | else if (type == PARAVIRT_PATCH(iret) || | ||
172 | type == PARAVIRT_PATCH(irq_enable_sysexit)) | ||
173 | /* If operation requires a jmp, then jmp */ | ||
174 | ret = paravirt_patch_jmp(opfunc, site, len); | ||
175 | else | ||
176 | /* Otherwise call the function; assume target could | ||
177 | clobber any caller-save reg */ | ||
178 | ret = paravirt_patch_call(opfunc, CLBR_ANY, | ||
179 | site, clobbers, len); | ||
344 | 180 | ||
345 | static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) | 181 | return ret; |
346 | { | ||
347 | u32 *lp = (u32 *)((char *)dt + entry*8); | ||
348 | lp[0] = entry_low; | ||
349 | lp[1] = entry_high; | ||
350 | } | 182 | } |
351 | 183 | ||
352 | static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) | 184 | unsigned paravirt_patch_insns(void *site, unsigned len, |
185 | const char *start, const char *end) | ||
353 | { | 186 | { |
354 | native_write_dt_entry(dt, entrynum, low, high); | 187 | unsigned insn_len = end - start; |
355 | } | ||
356 | 188 | ||
357 | static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) | 189 | if (insn_len > len || start == NULL) |
358 | { | 190 | insn_len = len; |
359 | native_write_dt_entry(dt, entrynum, low, high); | 191 | else |
360 | } | 192 | memcpy(site, start, insn_len); |
361 | |||
362 | static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) | ||
363 | { | ||
364 | native_write_dt_entry(dt, entrynum, low, high); | ||
365 | } | ||
366 | 193 | ||
367 | static void native_load_esp0(struct tss_struct *tss, | 194 | return insn_len; |
368 | struct thread_struct *thread) | ||
369 | { | ||
370 | tss->esp0 = thread->esp0; | ||
371 | |||
372 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
373 | if (unlikely(tss->ss1 != thread->sysenter_cs)) { | ||
374 | tss->ss1 = thread->sysenter_cs; | ||
375 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
376 | } | ||
377 | } | 195 | } |
378 | 196 | ||
379 | static void native_io_delay(void) | 197 | void init_IRQ(void) |
380 | { | 198 | { |
381 | asm volatile("outb %al,$0x80"); | 199 | paravirt_ops.init_IRQ(); |
382 | } | 200 | } |
383 | 201 | ||
384 | static void native_flush_tlb(void) | 202 | static void native_flush_tlb(void) |
@@ -395,83 +213,11 @@ static void native_flush_tlb_global(void) | |||
395 | __native_flush_tlb_global(); | 213 | __native_flush_tlb_global(); |
396 | } | 214 | } |
397 | 215 | ||
398 | static void native_flush_tlb_single(u32 addr) | 216 | static void native_flush_tlb_single(unsigned long addr) |
399 | { | 217 | { |
400 | __native_flush_tlb_single(addr); | 218 | __native_flush_tlb_single(addr); |
401 | } | 219 | } |
402 | 220 | ||
403 | #ifndef CONFIG_X86_PAE | ||
404 | static void native_set_pte(pte_t *ptep, pte_t pteval) | ||
405 | { | ||
406 | *ptep = pteval; | ||
407 | } | ||
408 | |||
409 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) | ||
410 | { | ||
411 | *ptep = pteval; | ||
412 | } | ||
413 | |||
414 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
415 | { | ||
416 | *pmdp = pmdval; | ||
417 | } | ||
418 | |||
419 | #else /* CONFIG_X86_PAE */ | ||
420 | |||
421 | static void native_set_pte(pte_t *ptep, pte_t pte) | ||
422 | { | ||
423 | ptep->pte_high = pte.pte_high; | ||
424 | smp_wmb(); | ||
425 | ptep->pte_low = pte.pte_low; | ||
426 | } | ||
427 | |||
428 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | ||
429 | { | ||
430 | ptep->pte_high = pte.pte_high; | ||
431 | smp_wmb(); | ||
432 | ptep->pte_low = pte.pte_low; | ||
433 | } | ||
434 | |||
435 | static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | ||
436 | { | ||
437 | ptep->pte_low = 0; | ||
438 | smp_wmb(); | ||
439 | ptep->pte_high = pte.pte_high; | ||
440 | smp_wmb(); | ||
441 | ptep->pte_low = pte.pte_low; | ||
442 | } | ||
443 | |||
444 | static void native_set_pte_atomic(pte_t *ptep, pte_t pteval) | ||
445 | { | ||
446 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | ||
447 | } | ||
448 | |||
449 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
450 | { | ||
451 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); | ||
452 | } | ||
453 | |||
454 | static void native_set_pud(pud_t *pudp, pud_t pudval) | ||
455 | { | ||
456 | *pudp = pudval; | ||
457 | } | ||
458 | |||
459 | static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
460 | { | ||
461 | ptep->pte_low = 0; | ||
462 | smp_wmb(); | ||
463 | ptep->pte_high = 0; | ||
464 | } | ||
465 | |||
466 | static void native_pmd_clear(pmd_t *pmd) | ||
467 | { | ||
468 | u32 *tmp = (u32 *)pmd; | ||
469 | *tmp = 0; | ||
470 | smp_wmb(); | ||
471 | *(tmp + 1) = 0; | ||
472 | } | ||
473 | #endif /* CONFIG_X86_PAE */ | ||
474 | |||
475 | /* These are in entry.S */ | 221 | /* These are in entry.S */ |
476 | extern void native_iret(void); | 222 | extern void native_iret(void); |
477 | extern void native_irq_enable_sysexit(void); | 223 | extern void native_irq_enable_sysexit(void); |
@@ -487,10 +233,11 @@ struct paravirt_ops paravirt_ops = { | |||
487 | .name = "bare hardware", | 233 | .name = "bare hardware", |
488 | .paravirt_enabled = 0, | 234 | .paravirt_enabled = 0, |
489 | .kernel_rpl = 0, | 235 | .kernel_rpl = 0, |
236 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | ||
490 | 237 | ||
491 | .patch = native_patch, | 238 | .patch = native_patch, |
492 | .banner = default_banner, | 239 | .banner = default_banner, |
493 | .arch_setup = native_nop, | 240 | .arch_setup = paravirt_nop, |
494 | .memory_setup = machine_specific_memory_setup, | 241 | .memory_setup = machine_specific_memory_setup, |
495 | .get_wallclock = native_get_wallclock, | 242 | .get_wallclock = native_get_wallclock, |
496 | .set_wallclock = native_set_wallclock, | 243 | .set_wallclock = native_set_wallclock, |
@@ -517,8 +264,8 @@ struct paravirt_ops paravirt_ops = { | |||
517 | .safe_halt = native_safe_halt, | 264 | .safe_halt = native_safe_halt, |
518 | .halt = native_halt, | 265 | .halt = native_halt, |
519 | .wbinvd = native_wbinvd, | 266 | .wbinvd = native_wbinvd, |
520 | .read_msr = native_read_msr, | 267 | .read_msr = native_read_msr_safe, |
521 | .write_msr = native_write_msr, | 268 | .write_msr = native_write_msr_safe, |
522 | .read_tsc = native_read_tsc, | 269 | .read_tsc = native_read_tsc, |
523 | .read_pmc = native_read_pmc, | 270 | .read_pmc = native_read_pmc, |
524 | .get_scheduled_cycles = native_read_tsc, | 271 | .get_scheduled_cycles = native_read_tsc, |
@@ -531,9 +278,9 @@ struct paravirt_ops paravirt_ops = { | |||
531 | .store_idt = native_store_idt, | 278 | .store_idt = native_store_idt, |
532 | .store_tr = native_store_tr, | 279 | .store_tr = native_store_tr, |
533 | .load_tls = native_load_tls, | 280 | .load_tls = native_load_tls, |
534 | .write_ldt_entry = native_write_ldt_entry, | 281 | .write_ldt_entry = write_dt_entry, |
535 | .write_gdt_entry = native_write_gdt_entry, | 282 | .write_gdt_entry = write_dt_entry, |
536 | .write_idt_entry = native_write_idt_entry, | 283 | .write_idt_entry = write_dt_entry, |
537 | .load_esp0 = native_load_esp0, | 284 | .load_esp0 = native_load_esp0, |
538 | 285 | ||
539 | .set_iopl_mask = native_set_iopl_mask, | 286 | .set_iopl_mask = native_set_iopl_mask, |
@@ -545,44 +292,57 @@ struct paravirt_ops paravirt_ops = { | |||
545 | .apic_read = native_apic_read, | 292 | .apic_read = native_apic_read, |
546 | .setup_boot_clock = setup_boot_APIC_clock, | 293 | .setup_boot_clock = setup_boot_APIC_clock, |
547 | .setup_secondary_clock = setup_secondary_APIC_clock, | 294 | .setup_secondary_clock = setup_secondary_APIC_clock, |
295 | .startup_ipi_hook = paravirt_nop, | ||
548 | #endif | 296 | #endif |
549 | .set_lazy_mode = (void *)native_nop, | 297 | .set_lazy_mode = paravirt_nop, |
298 | |||
299 | .pagetable_setup_start = native_pagetable_setup_start, | ||
300 | .pagetable_setup_done = native_pagetable_setup_done, | ||
550 | 301 | ||
551 | .flush_tlb_user = native_flush_tlb, | 302 | .flush_tlb_user = native_flush_tlb, |
552 | .flush_tlb_kernel = native_flush_tlb_global, | 303 | .flush_tlb_kernel = native_flush_tlb_global, |
553 | .flush_tlb_single = native_flush_tlb_single, | 304 | .flush_tlb_single = native_flush_tlb_single, |
305 | .flush_tlb_others = native_flush_tlb_others, | ||
554 | 306 | ||
555 | .map_pt_hook = (void *)native_nop, | 307 | .alloc_pt = paravirt_nop, |
556 | 308 | .alloc_pd = paravirt_nop, | |
557 | .alloc_pt = (void *)native_nop, | 309 | .alloc_pd_clone = paravirt_nop, |
558 | .alloc_pd = (void *)native_nop, | 310 | .release_pt = paravirt_nop, |
559 | .alloc_pd_clone = (void *)native_nop, | 311 | .release_pd = paravirt_nop, |
560 | .release_pt = (void *)native_nop, | ||
561 | .release_pd = (void *)native_nop, | ||
562 | 312 | ||
563 | .set_pte = native_set_pte, | 313 | .set_pte = native_set_pte, |
564 | .set_pte_at = native_set_pte_at, | 314 | .set_pte_at = native_set_pte_at, |
565 | .set_pmd = native_set_pmd, | 315 | .set_pmd = native_set_pmd, |
566 | .pte_update = (void *)native_nop, | 316 | .pte_update = paravirt_nop, |
567 | .pte_update_defer = (void *)native_nop, | 317 | .pte_update_defer = paravirt_nop, |
318 | |||
319 | #ifdef CONFIG_HIGHPTE | ||
320 | .kmap_atomic_pte = kmap_atomic, | ||
321 | #endif | ||
322 | |||
568 | #ifdef CONFIG_X86_PAE | 323 | #ifdef CONFIG_X86_PAE |
569 | .set_pte_atomic = native_set_pte_atomic, | 324 | .set_pte_atomic = native_set_pte_atomic, |
570 | .set_pte_present = native_set_pte_present, | 325 | .set_pte_present = native_set_pte_present, |
571 | .set_pud = native_set_pud, | 326 | .set_pud = native_set_pud, |
572 | .pte_clear = native_pte_clear, | 327 | .pte_clear = native_pte_clear, |
573 | .pmd_clear = native_pmd_clear, | 328 | .pmd_clear = native_pmd_clear, |
329 | |||
330 | .pmd_val = native_pmd_val, | ||
331 | .make_pmd = native_make_pmd, | ||
574 | #endif | 332 | #endif |
575 | 333 | ||
334 | .pte_val = native_pte_val, | ||
335 | .pgd_val = native_pgd_val, | ||
336 | |||
337 | .make_pte = native_make_pte, | ||
338 | .make_pgd = native_make_pgd, | ||
339 | |||
576 | .irq_enable_sysexit = native_irq_enable_sysexit, | 340 | .irq_enable_sysexit = native_irq_enable_sysexit, |
577 | .iret = native_iret, | 341 | .iret = native_iret, |
578 | 342 | ||
579 | .startup_ipi_hook = (void *)native_nop, | 343 | .dup_mmap = paravirt_nop, |
344 | .exit_mmap = paravirt_nop, | ||
345 | .activate_mm = paravirt_nop, | ||
580 | }; | 346 | }; |
581 | 347 | ||
582 | /* | 348 | EXPORT_SYMBOL(paravirt_ops); |
583 | * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops | ||
584 | * semantics are subject to change. Hence we only do this | ||
585 | * internal-only export of this, until it gets sorted out and | ||
586 | * all lowlevel CPU ops used by modules are separately exported. | ||
587 | */ | ||
588 | EXPORT_SYMBOL_GPL(paravirt_ops); | ||
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 393a67d5d94..61999479b7a 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/random.h> | 39 | #include <linux/random.h> |
40 | #include <linux/personality.h> | 40 | #include <linux/personality.h> |
41 | #include <linux/tick.h> | 41 | #include <linux/tick.h> |
42 | #include <linux/percpu.h> | ||
42 | 43 | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | #include <asm/pgtable.h> | 45 | #include <asm/pgtable.h> |
@@ -57,7 +58,6 @@ | |||
57 | 58 | ||
58 | #include <asm/tlbflush.h> | 59 | #include <asm/tlbflush.h> |
59 | #include <asm/cpu.h> | 60 | #include <asm/cpu.h> |
60 | #include <asm/pda.h> | ||
61 | 61 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 63 | ||
@@ -66,6 +66,12 @@ static int hlt_counter; | |||
66 | unsigned long boot_option_idle_override = 0; | 66 | unsigned long boot_option_idle_override = 0; |
67 | EXPORT_SYMBOL(boot_option_idle_override); | 67 | EXPORT_SYMBOL(boot_option_idle_override); |
68 | 68 | ||
69 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
70 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
71 | |||
72 | DEFINE_PER_CPU(int, cpu_number); | ||
73 | EXPORT_PER_CPU_SYMBOL(cpu_number); | ||
74 | |||
69 | /* | 75 | /* |
70 | * Return saved PC of a blocked thread. | 76 | * Return saved PC of a blocked thread. |
71 | */ | 77 | */ |
@@ -272,25 +278,24 @@ void __devinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
272 | } | 278 | } |
273 | } | 279 | } |
274 | 280 | ||
275 | static int __init idle_setup (char *str) | 281 | static int __init idle_setup(char *str) |
276 | { | 282 | { |
277 | if (!strncmp(str, "poll", 4)) { | 283 | if (!strcmp(str, "poll")) { |
278 | printk("using polling idle threads.\n"); | 284 | printk("using polling idle threads.\n"); |
279 | pm_idle = poll_idle; | 285 | pm_idle = poll_idle; |
280 | #ifdef CONFIG_X86_SMP | 286 | #ifdef CONFIG_X86_SMP |
281 | if (smp_num_siblings > 1) | 287 | if (smp_num_siblings > 1) |
282 | printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); | 288 | printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); |
283 | #endif | 289 | #endif |
284 | } else if (!strncmp(str, "halt", 4)) { | 290 | } else if (!strcmp(str, "mwait")) |
285 | printk("using halt in idle threads.\n"); | 291 | force_mwait = 1; |
286 | pm_idle = default_idle; | 292 | else |
287 | } | 293 | return -1; |
288 | 294 | ||
289 | boot_option_idle_override = 1; | 295 | boot_option_idle_override = 1; |
290 | return 1; | 296 | return 0; |
291 | } | 297 | } |
292 | 298 | early_param("idle", idle_setup); | |
293 | __setup("idle=", idle_setup); | ||
294 | 299 | ||
295 | void show_regs(struct pt_regs * regs) | 300 | void show_regs(struct pt_regs * regs) |
296 | { | 301 | { |
@@ -343,7 +348,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
343 | 348 | ||
344 | regs.xds = __USER_DS; | 349 | regs.xds = __USER_DS; |
345 | regs.xes = __USER_DS; | 350 | regs.xes = __USER_DS; |
346 | regs.xfs = __KERNEL_PDA; | 351 | regs.xfs = __KERNEL_PERCPU; |
347 | regs.orig_eax = -1; | 352 | regs.orig_eax = -1; |
348 | regs.eip = (unsigned long) kernel_thread_helper; | 353 | regs.eip = (unsigned long) kernel_thread_helper; |
349 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | 354 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
@@ -376,7 +381,7 @@ void exit_thread(void) | |||
376 | t->io_bitmap_max = 0; | 381 | t->io_bitmap_max = 0; |
377 | tss->io_bitmap_owner = NULL; | 382 | tss->io_bitmap_owner = NULL; |
378 | tss->io_bitmap_max = 0; | 383 | tss->io_bitmap_max = 0; |
379 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 384 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
380 | put_cpu(); | 385 | put_cpu(); |
381 | } | 386 | } |
382 | } | 387 | } |
@@ -555,7 +560,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, | |||
555 | * Disable the bitmap via an invalid offset. We still cache | 560 | * Disable the bitmap via an invalid offset. We still cache |
556 | * the previous bitmap owner and the IO bitmap contents: | 561 | * the previous bitmap owner and the IO bitmap contents: |
557 | */ | 562 | */ |
558 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 563 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
559 | return; | 564 | return; |
560 | } | 565 | } |
561 | 566 | ||
@@ -565,7 +570,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, | |||
565 | * matches the next task, we dont have to do anything but | 570 | * matches the next task, we dont have to do anything but |
566 | * to set a valid offset in the TSS: | 571 | * to set a valid offset in the TSS: |
567 | */ | 572 | */ |
568 | tss->io_bitmap_base = IO_BITMAP_OFFSET; | 573 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
569 | return; | 574 | return; |
570 | } | 575 | } |
571 | /* | 576 | /* |
@@ -577,7 +582,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, | |||
577 | * redundant copies when the currently switched task does not | 582 | * redundant copies when the currently switched task does not |
578 | * perform any I/O during its timeslice. | 583 | * perform any I/O during its timeslice. |
579 | */ | 584 | */ |
580 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | 585 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; |
581 | } | 586 | } |
582 | 587 | ||
583 | /* | 588 | /* |
@@ -712,7 +717,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
712 | if (prev->gs | next->gs) | 717 | if (prev->gs | next->gs) |
713 | loadsegment(gs, next->gs); | 718 | loadsegment(gs, next->gs); |
714 | 719 | ||
715 | write_pda(pcurrent, next_p); | 720 | x86_write_percpu(current_task, next_p); |
716 | 721 | ||
717 | return prev_p; | 722 | return prev_p; |
718 | } | 723 | } |
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index 34874c398b4..9f6ab1789bb 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c | |||
@@ -3,12 +3,10 @@ | |||
3 | */ | 3 | */ |
4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
5 | #include <linux/irq.h> | 5 | #include <linux/irq.h> |
6 | #include <asm/pci-direct.h> | ||
7 | #include <asm/genapic.h> | ||
8 | #include <asm/cpu.h> | ||
9 | 6 | ||
10 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) | 7 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) |
11 | static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | 8 | |
9 | static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | ||
12 | { | 10 | { |
13 | u8 config, rev; | 11 | u8 config, rev; |
14 | u32 word; | 12 | u32 word; |
@@ -16,12 +14,14 @@ static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | |||
16 | /* BIOS may enable hardware IRQ balancing for | 14 | /* BIOS may enable hardware IRQ balancing for |
17 | * E7520/E7320/E7525(revision ID 0x9 and below) | 15 | * E7520/E7320/E7525(revision ID 0x9 and below) |
18 | * based platforms. | 16 | * based platforms. |
19 | * For those platforms, make sure that the genapic is set to 'flat' | 17 | * Disable SW irqbalance/affinity on those platforms. |
20 | */ | 18 | */ |
21 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | 19 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); |
22 | if (rev > 0x9) | 20 | if (rev > 0x9) |
23 | return; | 21 | return; |
24 | 22 | ||
23 | printk(KERN_INFO "Intel E7520/7320/7525 detected."); | ||
24 | |||
25 | /* enable access to config space*/ | 25 | /* enable access to config space*/ |
26 | pci_read_config_byte(dev, 0xf4, &config); | 26 | pci_read_config_byte(dev, 0xf4, &config); |
27 | pci_write_config_byte(dev, 0xf4, config|0x2); | 27 | pci_write_config_byte(dev, 0xf4, config|0x2); |
@@ -30,44 +30,6 @@ static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | |||
30 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); | 30 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); |
31 | 31 | ||
32 | if (!(word & (1 << 13))) { | 32 | if (!(word & (1 << 13))) { |
33 | #ifdef CONFIG_X86_64 | ||
34 | if (genapic != &apic_flat) | ||
35 | panic("APIC mode must be flat on this system\n"); | ||
36 | #elif defined(CONFIG_X86_GENERICARCH) | ||
37 | if (genapic != &apic_default) | ||
38 | panic("APIC mode must be default(flat) on this system. Use apic=default\n"); | ||
39 | #endif | ||
40 | } | ||
41 | |||
42 | /* put back the original value for config space*/ | ||
43 | if (!(config & 0x2)) | ||
44 | pci_write_config_byte(dev, 0xf4, config); | ||
45 | } | ||
46 | |||
47 | void __init quirk_intel_irqbalance(void) | ||
48 | { | ||
49 | u8 config, rev; | ||
50 | u32 word; | ||
51 | |||
52 | /* BIOS may enable hardware IRQ balancing for | ||
53 | * E7520/E7320/E7525(revision ID 0x9 and below) | ||
54 | * based platforms. | ||
55 | * Disable SW irqbalance/affinity on those platforms. | ||
56 | */ | ||
57 | rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); | ||
58 | if (rev > 0x9) | ||
59 | return; | ||
60 | |||
61 | printk(KERN_INFO "Intel E7520/7320/7525 detected."); | ||
62 | |||
63 | /* enable access to config space */ | ||
64 | config = read_pci_config_byte(0, 0, 0, 0xf4); | ||
65 | write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); | ||
66 | |||
67 | /* read xTPR register */ | ||
68 | word = read_pci_config_16(0, 0, 0x40, 0x4c); | ||
69 | |||
70 | if (!(word & (1 << 13))) { | ||
71 | printk(KERN_INFO "Disabling irq balancing and affinity\n"); | 33 | printk(KERN_INFO "Disabling irq balancing and affinity\n"); |
72 | #ifdef CONFIG_IRQBALANCE | 34 | #ifdef CONFIG_IRQBALANCE |
73 | irqbalance_disable(""); | 35 | irqbalance_disable(""); |
@@ -76,24 +38,13 @@ void __init quirk_intel_irqbalance(void) | |||
76 | #ifdef CONFIG_PROC_FS | 38 | #ifdef CONFIG_PROC_FS |
77 | no_irq_affinity = 1; | 39 | no_irq_affinity = 1; |
78 | #endif | 40 | #endif |
79 | #ifdef CONFIG_HOTPLUG_CPU | ||
80 | printk(KERN_INFO "Disabling cpu hotplug control\n"); | ||
81 | enable_cpu_hotplug = 0; | ||
82 | #endif | ||
83 | #ifdef CONFIG_X86_64 | ||
84 | /* force the genapic selection to flat mode so that | ||
85 | * interrupts can be redirected to more than one CPU. | ||
86 | */ | ||
87 | genapic_force = &apic_flat; | ||
88 | #endif | ||
89 | } | 41 | } |
90 | 42 | ||
91 | /* put back the original value for config space */ | 43 | /* put back the original value for config space*/ |
92 | if (!(config & 0x2)) | 44 | if (!(config & 0x2)) |
93 | write_pci_config_byte(0, 0, 0, 0xf4, config); | 45 | pci_write_config_byte(dev, 0xf4, config); |
94 | } | 46 | } |
95 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); | 47 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); |
96 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); | 48 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); |
97 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); | 49 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); |
98 | |||
99 | #endif | 50 | #endif |
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 3514b4153f7..50dfc65319c 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c | |||
@@ -17,7 +17,8 @@ | |||
17 | #include <asm/apic.h> | 17 | #include <asm/apic.h> |
18 | #include <asm/desc.h> | 18 | #include <asm/desc.h> |
19 | #include "mach_reboot.h" | 19 | #include "mach_reboot.h" |
20 | #include <linux/reboot_fixups.h> | 20 | #include <asm/reboot_fixups.h> |
21 | #include <asm/reboot.h> | ||
21 | 22 | ||
22 | /* | 23 | /* |
23 | * Power off function, if any | 24 | * Power off function, if any |
@@ -197,8 +198,6 @@ static unsigned char jump_to_bios [] = | |||
197 | */ | 198 | */ |
198 | void machine_real_restart(unsigned char *code, int length) | 199 | void machine_real_restart(unsigned char *code, int length) |
199 | { | 200 | { |
200 | unsigned long flags; | ||
201 | |||
202 | local_irq_disable(); | 201 | local_irq_disable(); |
203 | 202 | ||
204 | /* Write zero to CMOS register number 0x0f, which the BIOS POST | 203 | /* Write zero to CMOS register number 0x0f, which the BIOS POST |
@@ -211,9 +210,9 @@ void machine_real_restart(unsigned char *code, int length) | |||
211 | safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) | 210 | safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) |
212 | */ | 211 | */ |
213 | 212 | ||
214 | spin_lock_irqsave(&rtc_lock, flags); | 213 | spin_lock(&rtc_lock); |
215 | CMOS_WRITE(0x00, 0x8f); | 214 | CMOS_WRITE(0x00, 0x8f); |
216 | spin_unlock_irqrestore(&rtc_lock, flags); | 215 | spin_unlock(&rtc_lock); |
217 | 216 | ||
218 | /* Remap the kernel at virtual address zero, as well as offset zero | 217 | /* Remap the kernel at virtual address zero, as well as offset zero |
219 | from the kernel segment. This assumes the kernel segment starts at | 218 | from the kernel segment. This assumes the kernel segment starts at |
@@ -280,7 +279,7 @@ void machine_real_restart(unsigned char *code, int length) | |||
280 | EXPORT_SYMBOL(machine_real_restart); | 279 | EXPORT_SYMBOL(machine_real_restart); |
281 | #endif | 280 | #endif |
282 | 281 | ||
283 | void machine_shutdown(void) | 282 | static void native_machine_shutdown(void) |
284 | { | 283 | { |
285 | #ifdef CONFIG_SMP | 284 | #ifdef CONFIG_SMP |
286 | int reboot_cpu_id; | 285 | int reboot_cpu_id; |
@@ -316,7 +315,11 @@ void machine_shutdown(void) | |||
316 | #endif | 315 | #endif |
317 | } | 316 | } |
318 | 317 | ||
319 | void machine_emergency_restart(void) | 318 | void __attribute__((weak)) mach_reboot_fixups(void) |
319 | { | ||
320 | } | ||
321 | |||
322 | static void native_machine_emergency_restart(void) | ||
320 | { | 323 | { |
321 | if (!reboot_thru_bios) { | 324 | if (!reboot_thru_bios) { |
322 | if (efi_enabled) { | 325 | if (efi_enabled) { |
@@ -340,17 +343,17 @@ void machine_emergency_restart(void) | |||
340 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); | 343 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); |
341 | } | 344 | } |
342 | 345 | ||
343 | void machine_restart(char * __unused) | 346 | static void native_machine_restart(char * __unused) |
344 | { | 347 | { |
345 | machine_shutdown(); | 348 | machine_shutdown(); |
346 | machine_emergency_restart(); | 349 | machine_emergency_restart(); |
347 | } | 350 | } |
348 | 351 | ||
349 | void machine_halt(void) | 352 | static void native_machine_halt(void) |
350 | { | 353 | { |
351 | } | 354 | } |
352 | 355 | ||
353 | void machine_power_off(void) | 356 | static void native_machine_power_off(void) |
354 | { | 357 | { |
355 | if (pm_power_off) { | 358 | if (pm_power_off) { |
356 | machine_shutdown(); | 359 | machine_shutdown(); |
@@ -359,3 +362,35 @@ void machine_power_off(void) | |||
359 | } | 362 | } |
360 | 363 | ||
361 | 364 | ||
365 | struct machine_ops machine_ops = { | ||
366 | .power_off = native_machine_power_off, | ||
367 | .shutdown = native_machine_shutdown, | ||
368 | .emergency_restart = native_machine_emergency_restart, | ||
369 | .restart = native_machine_restart, | ||
370 | .halt = native_machine_halt, | ||
371 | }; | ||
372 | |||
373 | void machine_power_off(void) | ||
374 | { | ||
375 | machine_ops.power_off(); | ||
376 | } | ||
377 | |||
378 | void machine_shutdown(void) | ||
379 | { | ||
380 | machine_ops.shutdown(); | ||
381 | } | ||
382 | |||
383 | void machine_emergency_restart(void) | ||
384 | { | ||
385 | machine_ops.emergency_restart(); | ||
386 | } | ||
387 | |||
388 | void machine_restart(char *cmd) | ||
389 | { | ||
390 | machine_ops.restart(cmd); | ||
391 | } | ||
392 | |||
393 | void machine_halt(void) | ||
394 | { | ||
395 | machine_ops.halt(); | ||
396 | } | ||
diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 99aab41a05b..2d78d918340 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c | |||
@@ -10,7 +10,7 @@ | |||
10 | 10 | ||
11 | #include <asm/delay.h> | 11 | #include <asm/delay.h> |
12 | #include <linux/pci.h> | 12 | #include <linux/pci.h> |
13 | #include <linux/reboot_fixups.h> | 13 | #include <asm/reboot_fixups.h> |
14 | 14 | ||
15 | static void cs5530a_warm_reset(struct pci_dev *dev) | 15 | static void cs5530a_warm_reset(struct pci_dev *dev) |
16 | { | 16 | { |
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 0e8977871b1..89a45a9ddcd 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
@@ -165,20 +165,20 @@ void fastcall send_IPI_self(int vector) | |||
165 | } | 165 | } |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * This is only used on smaller machines. | 168 | * This is used to send an IPI with no shorthand notation (the destination is |
169 | * specified in bits 56 to 63 of the ICR). | ||
169 | */ | 170 | */ |
170 | void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | 171 | static inline void __send_IPI_dest_field(unsigned long mask, int vector) |
171 | { | 172 | { |
172 | unsigned long mask = cpus_addr(cpumask)[0]; | ||
173 | unsigned long cfg; | 173 | unsigned long cfg; |
174 | unsigned long flags; | ||
175 | 174 | ||
176 | local_irq_save(flags); | ||
177 | WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); | ||
178 | /* | 175 | /* |
179 | * Wait for idle. | 176 | * Wait for idle. |
180 | */ | 177 | */ |
181 | apic_wait_icr_idle(); | 178 | if (unlikely(vector == NMI_VECTOR)) |
179 | safe_apic_wait_icr_idle(); | ||
180 | else | ||
181 | apic_wait_icr_idle(); | ||
182 | 182 | ||
183 | /* | 183 | /* |
184 | * prepare target chip field | 184 | * prepare target chip field |
@@ -195,13 +195,25 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | |||
195 | * Send the IPI. The write to APIC_ICR fires this off. | 195 | * Send the IPI. The write to APIC_ICR fires this off. |
196 | */ | 196 | */ |
197 | apic_write_around(APIC_ICR, cfg); | 197 | apic_write_around(APIC_ICR, cfg); |
198 | } | ||
199 | |||
200 | /* | ||
201 | * This is only used on smaller machines. | ||
202 | */ | ||
203 | void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | ||
204 | { | ||
205 | unsigned long mask = cpus_addr(cpumask)[0]; | ||
206 | unsigned long flags; | ||
198 | 207 | ||
208 | local_irq_save(flags); | ||
209 | WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); | ||
210 | __send_IPI_dest_field(mask, vector); | ||
199 | local_irq_restore(flags); | 211 | local_irq_restore(flags); |
200 | } | 212 | } |
201 | 213 | ||
202 | void send_IPI_mask_sequence(cpumask_t mask, int vector) | 214 | void send_IPI_mask_sequence(cpumask_t mask, int vector) |
203 | { | 215 | { |
204 | unsigned long cfg, flags; | 216 | unsigned long flags; |
205 | unsigned int query_cpu; | 217 | unsigned int query_cpu; |
206 | 218 | ||
207 | /* | 219 | /* |
@@ -211,30 +223,10 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) | |||
211 | */ | 223 | */ |
212 | 224 | ||
213 | local_irq_save(flags); | 225 | local_irq_save(flags); |
214 | |||
215 | for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { | 226 | for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { |
216 | if (cpu_isset(query_cpu, mask)) { | 227 | if (cpu_isset(query_cpu, mask)) { |
217 | 228 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), | |
218 | /* | 229 | vector); |
219 | * Wait for idle. | ||
220 | */ | ||
221 | apic_wait_icr_idle(); | ||
222 | |||
223 | /* | ||
224 | * prepare target chip field | ||
225 | */ | ||
226 | cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu)); | ||
227 | apic_write_around(APIC_ICR2, cfg); | ||
228 | |||
229 | /* | ||
230 | * program the ICR | ||
231 | */ | ||
232 | cfg = __prepare_ICR(0, vector); | ||
233 | |||
234 | /* | ||
235 | * Send the IPI. The write to APIC_ICR fires this off. | ||
236 | */ | ||
237 | apic_write_around(APIC_ICR, cfg); | ||
238 | } | 230 | } |
239 | } | 231 | } |
240 | local_irq_restore(flags); | 232 | local_irq_restore(flags); |
@@ -256,7 +248,6 @@ static cpumask_t flush_cpumask; | |||
256 | static struct mm_struct * flush_mm; | 248 | static struct mm_struct * flush_mm; |
257 | static unsigned long flush_va; | 249 | static unsigned long flush_va; |
258 | static DEFINE_SPINLOCK(tlbstate_lock); | 250 | static DEFINE_SPINLOCK(tlbstate_lock); |
259 | #define FLUSH_ALL 0xffffffff | ||
260 | 251 | ||
261 | /* | 252 | /* |
262 | * We cannot call mmdrop() because we are in interrupt context, | 253 | * We cannot call mmdrop() because we are in interrupt context, |
@@ -338,7 +329,7 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs) | |||
338 | 329 | ||
339 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { | 330 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { |
340 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { | 331 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { |
341 | if (flush_va == FLUSH_ALL) | 332 | if (flush_va == TLB_FLUSH_ALL) |
342 | local_flush_tlb(); | 333 | local_flush_tlb(); |
343 | else | 334 | else |
344 | __flush_tlb_one(flush_va); | 335 | __flush_tlb_one(flush_va); |
@@ -353,9 +344,11 @@ out: | |||
353 | put_cpu_no_resched(); | 344 | put_cpu_no_resched(); |
354 | } | 345 | } |
355 | 346 | ||
356 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | 347 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
357 | unsigned long va) | 348 | unsigned long va) |
358 | { | 349 | { |
350 | cpumask_t cpumask = *cpumaskp; | ||
351 | |||
359 | /* | 352 | /* |
360 | * A couple of (to be removed) sanity checks: | 353 | * A couple of (to be removed) sanity checks: |
361 | * | 354 | * |
@@ -366,10 +359,12 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
366 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | 359 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); |
367 | BUG_ON(!mm); | 360 | BUG_ON(!mm); |
368 | 361 | ||
362 | #ifdef CONFIG_HOTPLUG_CPU | ||
369 | /* If a CPU which we ran on has gone down, OK. */ | 363 | /* If a CPU which we ran on has gone down, OK. */ |
370 | cpus_and(cpumask, cpumask, cpu_online_map); | 364 | cpus_and(cpumask, cpumask, cpu_online_map); |
371 | if (cpus_empty(cpumask)) | 365 | if (unlikely(cpus_empty(cpumask))) |
372 | return; | 366 | return; |
367 | #endif | ||
373 | 368 | ||
374 | /* | 369 | /* |
375 | * i'm not happy about this global shared spinlock in the | 370 | * i'm not happy about this global shared spinlock in the |
@@ -380,17 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
380 | 375 | ||
381 | flush_mm = mm; | 376 | flush_mm = mm; |
382 | flush_va = va; | 377 | flush_va = va; |
383 | #if NR_CPUS <= BITS_PER_LONG | 378 | cpus_or(flush_cpumask, cpumask, flush_cpumask); |
384 | atomic_set_mask(cpumask, &flush_cpumask); | ||
385 | #else | ||
386 | { | ||
387 | int k; | ||
388 | unsigned long *flush_mask = (unsigned long *)&flush_cpumask; | ||
389 | unsigned long *cpu_mask = (unsigned long *)&cpumask; | ||
390 | for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) | ||
391 | atomic_set_mask(cpu_mask[k], &flush_mask[k]); | ||
392 | } | ||
393 | #endif | ||
394 | /* | 379 | /* |
395 | * We have to send the IPI only to | 380 | * We have to send the IPI only to |
396 | * CPUs affected. | 381 | * CPUs affected. |
@@ -417,7 +402,7 @@ void flush_tlb_current_task(void) | |||
417 | 402 | ||
418 | local_flush_tlb(); | 403 | local_flush_tlb(); |
419 | if (!cpus_empty(cpu_mask)) | 404 | if (!cpus_empty(cpu_mask)) |
420 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 405 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
421 | preempt_enable(); | 406 | preempt_enable(); |
422 | } | 407 | } |
423 | 408 | ||
@@ -436,7 +421,7 @@ void flush_tlb_mm (struct mm_struct * mm) | |||
436 | leave_mm(smp_processor_id()); | 421 | leave_mm(smp_processor_id()); |
437 | } | 422 | } |
438 | if (!cpus_empty(cpu_mask)) | 423 | if (!cpus_empty(cpu_mask)) |
439 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 424 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
440 | 425 | ||
441 | preempt_enable(); | 426 | preempt_enable(); |
442 | } | 427 | } |
@@ -483,7 +468,7 @@ void flush_tlb_all(void) | |||
483 | * it goes straight through and wastes no time serializing | 468 | * it goes straight through and wastes no time serializing |
484 | * anything. Worst case is that we lose a reschedule ... | 469 | * anything. Worst case is that we lose a reschedule ... |
485 | */ | 470 | */ |
486 | void smp_send_reschedule(int cpu) | 471 | void native_smp_send_reschedule(int cpu) |
487 | { | 472 | { |
488 | WARN_ON(cpu_is_offline(cpu)); | 473 | WARN_ON(cpu_is_offline(cpu)); |
489 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | 474 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); |
@@ -515,36 +500,78 @@ void unlock_ipi_call_lock(void) | |||
515 | 500 | ||
516 | static struct call_data_struct *call_data; | 501 | static struct call_data_struct *call_data; |
517 | 502 | ||
503 | static void __smp_call_function(void (*func) (void *info), void *info, | ||
504 | int nonatomic, int wait) | ||
505 | { | ||
506 | struct call_data_struct data; | ||
507 | int cpus = num_online_cpus() - 1; | ||
508 | |||
509 | if (!cpus) | ||
510 | return; | ||
511 | |||
512 | data.func = func; | ||
513 | data.info = info; | ||
514 | atomic_set(&data.started, 0); | ||
515 | data.wait = wait; | ||
516 | if (wait) | ||
517 | atomic_set(&data.finished, 0); | ||
518 | |||
519 | call_data = &data; | ||
520 | mb(); | ||
521 | |||
522 | /* Send a message to all other CPUs and wait for them to respond */ | ||
523 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
524 | |||
525 | /* Wait for response */ | ||
526 | while (atomic_read(&data.started) != cpus) | ||
527 | cpu_relax(); | ||
528 | |||
529 | if (wait) | ||
530 | while (atomic_read(&data.finished) != cpus) | ||
531 | cpu_relax(); | ||
532 | } | ||
533 | |||
534 | |||
518 | /** | 535 | /** |
519 | * smp_call_function(): Run a function on all other CPUs. | 536 | * smp_call_function_mask(): Run a function on a set of other CPUs. |
537 | * @mask: The set of cpus to run on. Must not include the current cpu. | ||
520 | * @func: The function to run. This must be fast and non-blocking. | 538 | * @func: The function to run. This must be fast and non-blocking. |
521 | * @info: An arbitrary pointer to pass to the function. | 539 | * @info: An arbitrary pointer to pass to the function. |
522 | * @nonatomic: currently unused. | ||
523 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 540 | * @wait: If true, wait (atomically) until function has completed on other CPUs. |
524 | * | 541 | * |
525 | * Returns 0 on success, else a negative status code. Does not return until | 542 | * Returns 0 on success, else a negative status code. |
526 | * remote CPUs are nearly ready to execute <<func>> or are or have executed. | 543 | * |
544 | * If @wait is true, then returns once @func has returned; otherwise | ||
545 | * it returns just before the target cpu calls @func. | ||
527 | * | 546 | * |
528 | * You must not call this function with disabled interrupts or from a | 547 | * You must not call this function with disabled interrupts or from a |
529 | * hardware interrupt handler or from a bottom half handler. | 548 | * hardware interrupt handler or from a bottom half handler. |
530 | */ | 549 | */ |
531 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | 550 | int native_smp_call_function_mask(cpumask_t mask, |
532 | int wait) | 551 | void (*func)(void *), void *info, |
552 | int wait) | ||
533 | { | 553 | { |
534 | struct call_data_struct data; | 554 | struct call_data_struct data; |
555 | cpumask_t allbutself; | ||
535 | int cpus; | 556 | int cpus; |
536 | 557 | ||
558 | /* Can deadlock when called with interrupts disabled */ | ||
559 | WARN_ON(irqs_disabled()); | ||
560 | |||
537 | /* Holding any lock stops cpus from going down. */ | 561 | /* Holding any lock stops cpus from going down. */ |
538 | spin_lock(&call_lock); | 562 | spin_lock(&call_lock); |
539 | cpus = num_online_cpus() - 1; | 563 | |
564 | allbutself = cpu_online_map; | ||
565 | cpu_clear(smp_processor_id(), allbutself); | ||
566 | |||
567 | cpus_and(mask, mask, allbutself); | ||
568 | cpus = cpus_weight(mask); | ||
569 | |||
540 | if (!cpus) { | 570 | if (!cpus) { |
541 | spin_unlock(&call_lock); | 571 | spin_unlock(&call_lock); |
542 | return 0; | 572 | return 0; |
543 | } | 573 | } |
544 | 574 | ||
545 | /* Can deadlock when called with interrupts disabled */ | ||
546 | WARN_ON(irqs_disabled()); | ||
547 | |||
548 | data.func = func; | 575 | data.func = func; |
549 | data.info = info; | 576 | data.info = info; |
550 | atomic_set(&data.started, 0); | 577 | atomic_set(&data.started, 0); |
@@ -554,9 +581,12 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
554 | 581 | ||
555 | call_data = &data; | 582 | call_data = &data; |
556 | mb(); | 583 | mb(); |
557 | 584 | ||
558 | /* Send a message to all other CPUs and wait for them to respond */ | 585 | /* Send a message to other CPUs */ |
559 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | 586 | if (cpus_equal(mask, allbutself)) |
587 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
588 | else | ||
589 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | ||
560 | 590 | ||
561 | /* Wait for response */ | 591 | /* Wait for response */ |
562 | while (atomic_read(&data.started) != cpus) | 592 | while (atomic_read(&data.started) != cpus) |
@@ -569,15 +599,68 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
569 | 599 | ||
570 | return 0; | 600 | return 0; |
571 | } | 601 | } |
602 | |||
603 | /** | ||
604 | * smp_call_function(): Run a function on all other CPUs. | ||
605 | * @func: The function to run. This must be fast and non-blocking. | ||
606 | * @info: An arbitrary pointer to pass to the function. | ||
607 | * @nonatomic: Unused. | ||
608 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | ||
609 | * | ||
610 | * Returns 0 on success, else a negative status code. | ||
611 | * | ||
612 | * If @wait is true, then returns once @func has returned; otherwise | ||
613 | * it returns just before the target cpu calls @func. | ||
614 | * | ||
615 | * You must not call this function with disabled interrupts or from a | ||
616 | * hardware interrupt handler or from a bottom half handler. | ||
617 | */ | ||
618 | int smp_call_function(void (*func) (void *info), void *info, int nonatomic, | ||
619 | int wait) | ||
620 | { | ||
621 | return smp_call_function_mask(cpu_online_map, func, info, wait); | ||
622 | } | ||
572 | EXPORT_SYMBOL(smp_call_function); | 623 | EXPORT_SYMBOL(smp_call_function); |
573 | 624 | ||
625 | /** | ||
626 | * smp_call_function_single - Run a function on another CPU | ||
627 | * @cpu: The target CPU. Cannot be the calling CPU. | ||
628 | * @func: The function to run. This must be fast and non-blocking. | ||
629 | * @info: An arbitrary pointer to pass to the function. | ||
630 | * @nonatomic: Unused. | ||
631 | * @wait: If true, wait until function has completed on other CPUs. | ||
632 | * | ||
633 | * Returns 0 on success, else a negative status code. | ||
634 | * | ||
635 | * If @wait is true, then returns once @func has returned; otherwise | ||
636 | * it returns just before the target cpu calls @func. | ||
637 | */ | ||
638 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
639 | int nonatomic, int wait) | ||
640 | { | ||
641 | /* prevent preemption and reschedule on another processor */ | ||
642 | int ret; | ||
643 | int me = get_cpu(); | ||
644 | if (cpu == me) { | ||
645 | WARN_ON(1); | ||
646 | put_cpu(); | ||
647 | return -EBUSY; | ||
648 | } | ||
649 | |||
650 | ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); | ||
651 | |||
652 | put_cpu(); | ||
653 | return ret; | ||
654 | } | ||
655 | EXPORT_SYMBOL(smp_call_function_single); | ||
656 | |||
574 | static void stop_this_cpu (void * dummy) | 657 | static void stop_this_cpu (void * dummy) |
575 | { | 658 | { |
659 | local_irq_disable(); | ||
576 | /* | 660 | /* |
577 | * Remove this CPU: | 661 | * Remove this CPU: |
578 | */ | 662 | */ |
579 | cpu_clear(smp_processor_id(), cpu_online_map); | 663 | cpu_clear(smp_processor_id(), cpu_online_map); |
580 | local_irq_disable(); | ||
581 | disable_local_APIC(); | 664 | disable_local_APIC(); |
582 | if (cpu_data[smp_processor_id()].hlt_works_ok) | 665 | if (cpu_data[smp_processor_id()].hlt_works_ok) |
583 | for(;;) halt(); | 666 | for(;;) halt(); |
@@ -588,13 +671,18 @@ static void stop_this_cpu (void * dummy) | |||
588 | * this function calls the 'stop' function on all other CPUs in the system. | 671 | * this function calls the 'stop' function on all other CPUs in the system. |
589 | */ | 672 | */ |
590 | 673 | ||
591 | void smp_send_stop(void) | 674 | void native_smp_send_stop(void) |
592 | { | 675 | { |
593 | smp_call_function(stop_this_cpu, NULL, 1, 0); | 676 | /* Don't deadlock on the call lock in panic */ |
677 | int nolock = !spin_trylock(&call_lock); | ||
678 | unsigned long flags; | ||
594 | 679 | ||
595 | local_irq_disable(); | 680 | local_irq_save(flags); |
681 | __smp_call_function(stop_this_cpu, NULL, 0, 0); | ||
682 | if (!nolock) | ||
683 | spin_unlock(&call_lock); | ||
596 | disable_local_APIC(); | 684 | disable_local_APIC(); |
597 | local_irq_enable(); | 685 | local_irq_restore(flags); |
598 | } | 686 | } |
599 | 687 | ||
600 | /* | 688 | /* |
@@ -633,77 +721,6 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) | |||
633 | } | 721 | } |
634 | } | 722 | } |
635 | 723 | ||
636 | /* | ||
637 | * this function sends a 'generic call function' IPI to one other CPU | ||
638 | * in the system. | ||
639 | * | ||
640 | * cpu is a standard Linux logical CPU number. | ||
641 | */ | ||
642 | static void | ||
643 | __smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
644 | int nonatomic, int wait) | ||
645 | { | ||
646 | struct call_data_struct data; | ||
647 | int cpus = 1; | ||
648 | |||
649 | data.func = func; | ||
650 | data.info = info; | ||
651 | atomic_set(&data.started, 0); | ||
652 | data.wait = wait; | ||
653 | if (wait) | ||
654 | atomic_set(&data.finished, 0); | ||
655 | |||
656 | call_data = &data; | ||
657 | wmb(); | ||
658 | /* Send a message to all other CPUs and wait for them to respond */ | ||
659 | send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); | ||
660 | |||
661 | /* Wait for response */ | ||
662 | while (atomic_read(&data.started) != cpus) | ||
663 | cpu_relax(); | ||
664 | |||
665 | if (!wait) | ||
666 | return; | ||
667 | |||
668 | while (atomic_read(&data.finished) != cpus) | ||
669 | cpu_relax(); | ||
670 | } | ||
671 | |||
672 | /* | ||
673 | * smp_call_function_single - Run a function on another CPU | ||
674 | * @func: The function to run. This must be fast and non-blocking. | ||
675 | * @info: An arbitrary pointer to pass to the function. | ||
676 | * @nonatomic: Currently unused. | ||
677 | * @wait: If true, wait until function has completed on other CPUs. | ||
678 | * | ||
679 | * Retrurns 0 on success, else a negative status code. | ||
680 | * | ||
681 | * Does not return until the remote CPU is nearly ready to execute <func> | ||
682 | * or is or has executed. | ||
683 | */ | ||
684 | |||
685 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
686 | int nonatomic, int wait) | ||
687 | { | ||
688 | /* prevent preemption and reschedule on another processor */ | ||
689 | int me = get_cpu(); | ||
690 | if (cpu == me) { | ||
691 | WARN_ON(1); | ||
692 | put_cpu(); | ||
693 | return -EBUSY; | ||
694 | } | ||
695 | |||
696 | /* Can deadlock when called with interrupts disabled */ | ||
697 | WARN_ON(irqs_disabled()); | ||
698 | |||
699 | spin_lock_bh(&call_lock); | ||
700 | __smp_call_function_single(cpu, func, info, nonatomic, wait); | ||
701 | spin_unlock_bh(&call_lock); | ||
702 | put_cpu(); | ||
703 | return 0; | ||
704 | } | ||
705 | EXPORT_SYMBOL(smp_call_function_single); | ||
706 | |||
707 | static int convert_apicid_to_cpu(int apic_id) | 724 | static int convert_apicid_to_cpu(int apic_id) |
708 | { | 725 | { |
709 | int i; | 726 | int i; |
@@ -730,3 +747,14 @@ int safe_smp_processor_id(void) | |||
730 | 747 | ||
731 | return cpuid >= 0 ? cpuid : 0; | 748 | return cpuid >= 0 ? cpuid : 0; |
732 | } | 749 | } |
750 | |||
751 | struct smp_ops smp_ops = { | ||
752 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | ||
753 | .smp_prepare_cpus = native_smp_prepare_cpus, | ||
754 | .cpu_up = native_cpu_up, | ||
755 | .smp_cpus_done = native_smp_cpus_done, | ||
756 | |||
757 | .smp_send_stop = native_smp_send_stop, | ||
758 | .smp_send_reschedule = native_smp_send_reschedule, | ||
759 | .smp_call_function_mask = native_smp_call_function_mask, | ||
760 | }; | ||
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 4ff55e67557..a4b7ad283f4 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -53,13 +53,12 @@ | |||
53 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
54 | #include <asm/arch_hooks.h> | 54 | #include <asm/arch_hooks.h> |
55 | #include <asm/nmi.h> | 55 | #include <asm/nmi.h> |
56 | #include <asm/pda.h> | ||
57 | #include <asm/genapic.h> | ||
58 | 56 | ||
59 | #include <mach_apic.h> | 57 | #include <mach_apic.h> |
60 | #include <mach_wakecpu.h> | 58 | #include <mach_wakecpu.h> |
61 | #include <smpboot_hooks.h> | 59 | #include <smpboot_hooks.h> |
62 | #include <asm/vmi.h> | 60 | #include <asm/vmi.h> |
61 | #include <asm/mtrr.h> | ||
63 | 62 | ||
64 | /* Set if we find a B stepping CPU */ | 63 | /* Set if we find a B stepping CPU */ |
65 | static int __devinitdata smp_b_stepping; | 64 | static int __devinitdata smp_b_stepping; |
@@ -100,6 +99,9 @@ EXPORT_SYMBOL(x86_cpu_to_apicid); | |||
100 | 99 | ||
101 | u8 apicid_2_node[MAX_APICID]; | 100 | u8 apicid_2_node[MAX_APICID]; |
102 | 101 | ||
102 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | ||
103 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | ||
104 | |||
103 | /* | 105 | /* |
104 | * Trampoline 80x86 program as an array. | 106 | * Trampoline 80x86 program as an array. |
105 | */ | 107 | */ |
@@ -156,7 +158,7 @@ static void __cpuinit smp_store_cpu_info(int id) | |||
156 | 158 | ||
157 | *c = boot_cpu_data; | 159 | *c = boot_cpu_data; |
158 | if (id!=0) | 160 | if (id!=0) |
159 | identify_cpu(c); | 161 | identify_secondary_cpu(c); |
160 | /* | 162 | /* |
161 | * Mask B, Pentium, but not Pentium MMX | 163 | * Mask B, Pentium, but not Pentium MMX |
162 | */ | 164 | */ |
@@ -379,14 +381,14 @@ set_cpu_sibling_map(int cpu) | |||
379 | static void __cpuinit start_secondary(void *unused) | 381 | static void __cpuinit start_secondary(void *unused) |
380 | { | 382 | { |
381 | /* | 383 | /* |
382 | * Don't put *anything* before secondary_cpu_init(), SMP | 384 | * Don't put *anything* before cpu_init(), SMP booting is too |
383 | * booting is too fragile that we want to limit the | 385 | * fragile that we want to limit the things done here to the |
384 | * things done here to the most necessary things. | 386 | * most necessary things. |
385 | */ | 387 | */ |
386 | #ifdef CONFIG_VMI | 388 | #ifdef CONFIG_VMI |
387 | vmi_bringup(); | 389 | vmi_bringup(); |
388 | #endif | 390 | #endif |
389 | secondary_cpu_init(); | 391 | cpu_init(); |
390 | preempt_disable(); | 392 | preempt_disable(); |
391 | smp_callin(); | 393 | smp_callin(); |
392 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 394 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
@@ -441,12 +443,6 @@ static void __cpuinit start_secondary(void *unused) | |||
441 | void __devinit initialize_secondary(void) | 443 | void __devinit initialize_secondary(void) |
442 | { | 444 | { |
443 | /* | 445 | /* |
444 | * switch to the per CPU GDT we already set up | ||
445 | * in do_boot_cpu() | ||
446 | */ | ||
447 | cpu_set_gdt(current_thread_info()->cpu); | ||
448 | |||
449 | /* | ||
450 | * We don't actually need to load the full TSS, | 446 | * We don't actually need to load the full TSS, |
451 | * basically just the stack pointer and the eip. | 447 | * basically just the stack pointer and the eip. |
452 | */ | 448 | */ |
@@ -463,7 +459,6 @@ extern struct { | |||
463 | void * esp; | 459 | void * esp; |
464 | unsigned short ss; | 460 | unsigned short ss; |
465 | } stack_start; | 461 | } stack_start; |
466 | extern struct i386_pda *start_pda; | ||
467 | 462 | ||
468 | #ifdef CONFIG_NUMA | 463 | #ifdef CONFIG_NUMA |
469 | 464 | ||
@@ -521,12 +516,12 @@ static void unmap_cpu_to_logical_apicid(int cpu) | |||
521 | unmap_cpu_to_node(cpu); | 516 | unmap_cpu_to_node(cpu); |
522 | } | 517 | } |
523 | 518 | ||
524 | #if APIC_DEBUG | ||
525 | static inline void __inquire_remote_apic(int apicid) | 519 | static inline void __inquire_remote_apic(int apicid) |
526 | { | 520 | { |
527 | int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 521 | int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
528 | char *names[] = { "ID", "VERSION", "SPIV" }; | 522 | char *names[] = { "ID", "VERSION", "SPIV" }; |
529 | int timeout, status; | 523 | int timeout; |
524 | unsigned long status; | ||
530 | 525 | ||
531 | printk("Inquiring remote APIC #%d...\n", apicid); | 526 | printk("Inquiring remote APIC #%d...\n", apicid); |
532 | 527 | ||
@@ -536,7 +531,9 @@ static inline void __inquire_remote_apic(int apicid) | |||
536 | /* | 531 | /* |
537 | * Wait for idle. | 532 | * Wait for idle. |
538 | */ | 533 | */ |
539 | apic_wait_icr_idle(); | 534 | status = safe_apic_wait_icr_idle(); |
535 | if (status) | ||
536 | printk("a previous APIC delivery may have failed\n"); | ||
540 | 537 | ||
541 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | 538 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); |
542 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); | 539 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); |
@@ -550,14 +547,13 @@ static inline void __inquire_remote_apic(int apicid) | |||
550 | switch (status) { | 547 | switch (status) { |
551 | case APIC_ICR_RR_VALID: | 548 | case APIC_ICR_RR_VALID: |
552 | status = apic_read(APIC_RRR); | 549 | status = apic_read(APIC_RRR); |
553 | printk("%08x\n", status); | 550 | printk("%lx\n", status); |
554 | break; | 551 | break; |
555 | default: | 552 | default: |
556 | printk("failed\n"); | 553 | printk("failed\n"); |
557 | } | 554 | } |
558 | } | 555 | } |
559 | } | 556 | } |
560 | #endif | ||
561 | 557 | ||
562 | #ifdef WAKE_SECONDARY_VIA_NMI | 558 | #ifdef WAKE_SECONDARY_VIA_NMI |
563 | /* | 559 | /* |
@@ -568,8 +564,8 @@ static inline void __inquire_remote_apic(int apicid) | |||
568 | static int __devinit | 564 | static int __devinit |
569 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | 565 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) |
570 | { | 566 | { |
571 | unsigned long send_status = 0, accept_status = 0; | 567 | unsigned long send_status, accept_status = 0; |
572 | int timeout, maxlvt; | 568 | int maxlvt; |
573 | 569 | ||
574 | /* Target chip */ | 570 | /* Target chip */ |
575 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); | 571 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); |
@@ -579,12 +575,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
579 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); | 575 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); |
580 | 576 | ||
581 | Dprintk("Waiting for send to finish...\n"); | 577 | Dprintk("Waiting for send to finish...\n"); |
582 | timeout = 0; | 578 | send_status = safe_apic_wait_icr_idle(); |
583 | do { | ||
584 | Dprintk("+"); | ||
585 | udelay(100); | ||
586 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
587 | } while (send_status && (timeout++ < 1000)); | ||
588 | 579 | ||
589 | /* | 580 | /* |
590 | * Give the other CPU some time to accept the IPI. | 581 | * Give the other CPU some time to accept the IPI. |
@@ -614,8 +605,8 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
614 | static int __devinit | 605 | static int __devinit |
615 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | 606 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) |
616 | { | 607 | { |
617 | unsigned long send_status = 0, accept_status = 0; | 608 | unsigned long send_status, accept_status = 0; |
618 | int maxlvt, timeout, num_starts, j; | 609 | int maxlvt, num_starts, j; |
619 | 610 | ||
620 | /* | 611 | /* |
621 | * Be paranoid about clearing APIC errors. | 612 | * Be paranoid about clearing APIC errors. |
@@ -640,12 +631,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
640 | | APIC_DM_INIT); | 631 | | APIC_DM_INIT); |
641 | 632 | ||
642 | Dprintk("Waiting for send to finish...\n"); | 633 | Dprintk("Waiting for send to finish...\n"); |
643 | timeout = 0; | 634 | send_status = safe_apic_wait_icr_idle(); |
644 | do { | ||
645 | Dprintk("+"); | ||
646 | udelay(100); | ||
647 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
648 | } while (send_status && (timeout++ < 1000)); | ||
649 | 635 | ||
650 | mdelay(10); | 636 | mdelay(10); |
651 | 637 | ||
@@ -658,12 +644,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
658 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | 644 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); |
659 | 645 | ||
660 | Dprintk("Waiting for send to finish...\n"); | 646 | Dprintk("Waiting for send to finish...\n"); |
661 | timeout = 0; | 647 | send_status = safe_apic_wait_icr_idle(); |
662 | do { | ||
663 | Dprintk("+"); | ||
664 | udelay(100); | ||
665 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
666 | } while (send_status && (timeout++ < 1000)); | ||
667 | 648 | ||
668 | atomic_set(&init_deasserted, 1); | 649 | atomic_set(&init_deasserted, 1); |
669 | 650 | ||
@@ -719,12 +700,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
719 | Dprintk("Startup point 1.\n"); | 700 | Dprintk("Startup point 1.\n"); |
720 | 701 | ||
721 | Dprintk("Waiting for send to finish...\n"); | 702 | Dprintk("Waiting for send to finish...\n"); |
722 | timeout = 0; | 703 | send_status = safe_apic_wait_icr_idle(); |
723 | do { | ||
724 | Dprintk("+"); | ||
725 | udelay(100); | ||
726 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
727 | } while (send_status && (timeout++ < 1000)); | ||
728 | 704 | ||
729 | /* | 705 | /* |
730 | * Give the other CPU some time to accept the IPI. | 706 | * Give the other CPU some time to accept the IPI. |
@@ -788,6 +764,25 @@ static inline struct task_struct * alloc_idle_task(int cpu) | |||
788 | #define alloc_idle_task(cpu) fork_idle(cpu) | 764 | #define alloc_idle_task(cpu) fork_idle(cpu) |
789 | #endif | 765 | #endif |
790 | 766 | ||
767 | /* Initialize the CPU's GDT. This is either the boot CPU doing itself | ||
768 | (still using the master per-cpu area), or a CPU doing it for a | ||
769 | secondary which will soon come up. */ | ||
770 | static __cpuinit void init_gdt(int cpu) | ||
771 | { | ||
772 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
773 | |||
774 | pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, | ||
775 | (u32 *)&gdt[GDT_ENTRY_PERCPU].b, | ||
776 | __per_cpu_offset[cpu], 0xFFFFF, | ||
777 | 0x80 | DESCTYPE_S | 0x2, 0x8); | ||
778 | |||
779 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | ||
780 | per_cpu(cpu_number, cpu) = cpu; | ||
781 | } | ||
782 | |||
783 | /* Defined in head.S */ | ||
784 | extern struct Xgt_desc_struct early_gdt_descr; | ||
785 | |||
791 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 786 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
792 | /* | 787 | /* |
793 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 788 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
@@ -802,6 +797,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
802 | unsigned short nmi_high = 0, nmi_low = 0; | 797 | unsigned short nmi_high = 0, nmi_low = 0; |
803 | 798 | ||
804 | /* | 799 | /* |
800 | * Save current MTRR state in case it was changed since early boot | ||
801 | * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: | ||
802 | */ | ||
803 | mtrr_save_state(); | ||
804 | |||
805 | /* | ||
805 | * We can't use kernel_thread since we must avoid to | 806 | * We can't use kernel_thread since we must avoid to |
806 | * reschedule the child. | 807 | * reschedule the child. |
807 | */ | 808 | */ |
@@ -809,13 +810,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
809 | if (IS_ERR(idle)) | 810 | if (IS_ERR(idle)) |
810 | panic("failed fork for CPU %d", cpu); | 811 | panic("failed fork for CPU %d", cpu); |
811 | 812 | ||
812 | /* Pre-allocate and initialize the CPU's GDT and PDA so it | 813 | init_gdt(cpu); |
813 | doesn't have to do any memory allocation during the | 814 | per_cpu(current_task, cpu) = idle; |
814 | delicate CPU-bringup phase. */ | 815 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
815 | if (!init_gdt(cpu, idle)) { | ||
816 | printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); | ||
817 | return -1; /* ? */ | ||
818 | } | ||
819 | 816 | ||
820 | idle->thread.eip = (unsigned long) start_secondary; | 817 | idle->thread.eip = (unsigned long) start_secondary; |
821 | /* start_eip had better be page-aligned! */ | 818 | /* start_eip had better be page-aligned! */ |
@@ -941,7 +938,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
941 | DECLARE_COMPLETION_ONSTACK(done); | 938 | DECLARE_COMPLETION_ONSTACK(done); |
942 | struct warm_boot_cpu_info info; | 939 | struct warm_boot_cpu_info info; |
943 | int apicid, ret; | 940 | int apicid, ret; |
944 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
945 | 941 | ||
946 | apicid = x86_cpu_to_apicid[cpu]; | 942 | apicid = x86_cpu_to_apicid[cpu]; |
947 | if (apicid == BAD_APICID) { | 943 | if (apicid == BAD_APICID) { |
@@ -949,18 +945,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
949 | goto exit; | 945 | goto exit; |
950 | } | 946 | } |
951 | 947 | ||
952 | /* | ||
953 | * the CPU isn't initialized at boot time, allocate gdt table here. | ||
954 | * cpu_init will initialize it | ||
955 | */ | ||
956 | if (!cpu_gdt_descr->address) { | ||
957 | cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL); | ||
958 | if (!cpu_gdt_descr->address) | ||
959 | printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); | ||
960 | ret = -ENOMEM; | ||
961 | goto exit; | ||
962 | } | ||
963 | |||
964 | info.complete = &done; | 948 | info.complete = &done; |
965 | info.apicid = apicid; | 949 | info.apicid = apicid; |
966 | info.cpu = cpu; | 950 | info.cpu = cpu; |
@@ -1173,7 +1157,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1173 | 1157 | ||
1174 | /* These are wrappers to interface to the new boot process. Someone | 1158 | /* These are wrappers to interface to the new boot process. Someone |
1175 | who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ | 1159 | who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ |
1176 | void __init smp_prepare_cpus(unsigned int max_cpus) | 1160 | void __init native_smp_prepare_cpus(unsigned int max_cpus) |
1177 | { | 1161 | { |
1178 | smp_commenced_mask = cpumask_of_cpu(0); | 1162 | smp_commenced_mask = cpumask_of_cpu(0); |
1179 | cpu_callin_map = cpumask_of_cpu(0); | 1163 | cpu_callin_map = cpumask_of_cpu(0); |
@@ -1181,13 +1165,18 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
1181 | smp_boot_cpus(max_cpus); | 1165 | smp_boot_cpus(max_cpus); |
1182 | } | 1166 | } |
1183 | 1167 | ||
1184 | void __devinit smp_prepare_boot_cpu(void) | 1168 | void __init native_smp_prepare_boot_cpu(void) |
1185 | { | 1169 | { |
1186 | cpu_set(smp_processor_id(), cpu_online_map); | 1170 | unsigned int cpu = smp_processor_id(); |
1187 | cpu_set(smp_processor_id(), cpu_callout_map); | 1171 | |
1188 | cpu_set(smp_processor_id(), cpu_present_map); | 1172 | init_gdt(cpu); |
1189 | cpu_set(smp_processor_id(), cpu_possible_map); | 1173 | switch_to_new_gdt(); |
1190 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 1174 | |
1175 | cpu_set(cpu, cpu_online_map); | ||
1176 | cpu_set(cpu, cpu_callout_map); | ||
1177 | cpu_set(cpu, cpu_present_map); | ||
1178 | cpu_set(cpu, cpu_possible_map); | ||
1179 | __get_cpu_var(cpu_state) = CPU_ONLINE; | ||
1191 | } | 1180 | } |
1192 | 1181 | ||
1193 | #ifdef CONFIG_HOTPLUG_CPU | 1182 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1277,7 +1266,7 @@ void __cpu_die(unsigned int cpu) | |||
1277 | } | 1266 | } |
1278 | #endif /* CONFIG_HOTPLUG_CPU */ | 1267 | #endif /* CONFIG_HOTPLUG_CPU */ |
1279 | 1268 | ||
1280 | int __cpuinit __cpu_up(unsigned int cpu) | 1269 | int __cpuinit native_cpu_up(unsigned int cpu) |
1281 | { | 1270 | { |
1282 | unsigned long flags; | 1271 | unsigned long flags; |
1283 | #ifdef CONFIG_HOTPLUG_CPU | 1272 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1319,15 +1308,10 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1319 | touch_nmi_watchdog(); | 1308 | touch_nmi_watchdog(); |
1320 | } | 1309 | } |
1321 | 1310 | ||
1322 | #ifdef CONFIG_X86_GENERICARCH | ||
1323 | if (num_online_cpus() > 8 && genapic == &apic_default) | ||
1324 | panic("Default flat APIC routing can't be used with > 8 cpus\n"); | ||
1325 | #endif | ||
1326 | |||
1327 | return 0; | 1311 | return 0; |
1328 | } | 1312 | } |
1329 | 1313 | ||
1330 | void __init smp_cpus_done(unsigned int max_cpus) | 1314 | void __init native_smp_cpus_done(unsigned int max_cpus) |
1331 | { | 1315 | { |
1332 | #ifdef CONFIG_X86_IO_APIC | 1316 | #ifdef CONFIG_X86_IO_APIC |
1333 | setup_ioapic_dest(); | 1317 | setup_ioapic_dest(); |
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 13ca54a85a1..ff4ee6f3326 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -22,16 +22,26 @@ | |||
22 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
25 | #include <asm/elf.h> | ||
26 | #include <asm/tlbflush.h> | ||
27 | |||
28 | enum { | ||
29 | VDSO_DISABLED = 0, | ||
30 | VDSO_ENABLED = 1, | ||
31 | VDSO_COMPAT = 2, | ||
32 | }; | ||
33 | |||
34 | #ifdef CONFIG_COMPAT_VDSO | ||
35 | #define VDSO_DEFAULT VDSO_COMPAT | ||
36 | #else | ||
37 | #define VDSO_DEFAULT VDSO_ENABLED | ||
38 | #endif | ||
25 | 39 | ||
26 | /* | 40 | /* |
27 | * Should the kernel map a VDSO page into processes and pass its | 41 | * Should the kernel map a VDSO page into processes and pass its |
28 | * address down to glibc upon exec()? | 42 | * address down to glibc upon exec()? |
29 | */ | 43 | */ |
30 | #ifdef CONFIG_PARAVIRT | 44 | unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; |
31 | unsigned int __read_mostly vdso_enabled = 0; | ||
32 | #else | ||
33 | unsigned int __read_mostly vdso_enabled = 1; | ||
34 | #endif | ||
35 | 45 | ||
36 | EXPORT_SYMBOL_GPL(vdso_enabled); | 46 | EXPORT_SYMBOL_GPL(vdso_enabled); |
37 | 47 | ||
@@ -46,6 +56,123 @@ __setup("vdso=", vdso_setup); | |||
46 | 56 | ||
47 | extern asmlinkage void sysenter_entry(void); | 57 | extern asmlinkage void sysenter_entry(void); |
48 | 58 | ||
59 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, | ||
60 | unsigned offset, unsigned size) | ||
61 | { | ||
62 | Elf32_Sym *sym = (void *)ehdr + offset; | ||
63 | unsigned nsym = size / sizeof(*sym); | ||
64 | unsigned i; | ||
65 | |||
66 | for(i = 0; i < nsym; i++, sym++) { | ||
67 | if (sym->st_shndx == SHN_UNDEF || | ||
68 | sym->st_shndx == SHN_ABS) | ||
69 | continue; /* skip */ | ||
70 | |||
71 | if (sym->st_shndx > SHN_LORESERVE) { | ||
72 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", | ||
73 | sym->st_shndx); | ||
74 | continue; | ||
75 | } | ||
76 | |||
77 | switch(ELF_ST_TYPE(sym->st_info)) { | ||
78 | case STT_OBJECT: | ||
79 | case STT_FUNC: | ||
80 | case STT_SECTION: | ||
81 | case STT_FILE: | ||
82 | sym->st_value += VDSO_HIGH_BASE; | ||
83 | } | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) | ||
88 | { | ||
89 | Elf32_Dyn *dyn = (void *)ehdr + offset; | ||
90 | |||
91 | for(; dyn->d_tag != DT_NULL; dyn++) | ||
92 | switch(dyn->d_tag) { | ||
93 | case DT_PLTGOT: | ||
94 | case DT_HASH: | ||
95 | case DT_STRTAB: | ||
96 | case DT_SYMTAB: | ||
97 | case DT_RELA: | ||
98 | case DT_INIT: | ||
99 | case DT_FINI: | ||
100 | case DT_REL: | ||
101 | case DT_DEBUG: | ||
102 | case DT_JMPREL: | ||
103 | case DT_VERSYM: | ||
104 | case DT_VERDEF: | ||
105 | case DT_VERNEED: | ||
106 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: | ||
107 | /* definitely pointers needing relocation */ | ||
108 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
109 | break; | ||
110 | |||
111 | case DT_ENCODING ... OLD_DT_LOOS-1: | ||
112 | case DT_LOOS ... DT_HIOS-1: | ||
113 | /* Tags above DT_ENCODING are pointers if | ||
114 | they're even */ | ||
115 | if (dyn->d_tag >= DT_ENCODING && | ||
116 | (dyn->d_tag & 1) == 0) | ||
117 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
118 | break; | ||
119 | |||
120 | case DT_VERDEFNUM: | ||
121 | case DT_VERNEEDNUM: | ||
122 | case DT_FLAGS_1: | ||
123 | case DT_RELACOUNT: | ||
124 | case DT_RELCOUNT: | ||
125 | case DT_VALRNGLO ... DT_VALRNGHI: | ||
126 | /* definitely not pointers */ | ||
127 | break; | ||
128 | |||
129 | case OLD_DT_LOOS ... DT_LOOS-1: | ||
130 | case DT_HIOS ... DT_VALRNGLO-1: | ||
131 | default: | ||
132 | if (dyn->d_tag > DT_ENCODING) | ||
133 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", | ||
134 | dyn->d_tag); | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) | ||
140 | { | ||
141 | Elf32_Phdr *phdr; | ||
142 | Elf32_Shdr *shdr; | ||
143 | int i; | ||
144 | |||
145 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || | ||
146 | !elf_check_arch(ehdr) || | ||
147 | ehdr->e_type != ET_DYN); | ||
148 | |||
149 | ehdr->e_entry += VDSO_HIGH_BASE; | ||
150 | |||
151 | /* rebase phdrs */ | ||
152 | phdr = (void *)ehdr + ehdr->e_phoff; | ||
153 | for (i = 0; i < ehdr->e_phnum; i++) { | ||
154 | phdr[i].p_vaddr += VDSO_HIGH_BASE; | ||
155 | |||
156 | /* relocate dynamic stuff */ | ||
157 | if (phdr[i].p_type == PT_DYNAMIC) | ||
158 | reloc_dyn(ehdr, phdr[i].p_offset); | ||
159 | } | ||
160 | |||
161 | /* rebase sections */ | ||
162 | shdr = (void *)ehdr + ehdr->e_shoff; | ||
163 | for(i = 0; i < ehdr->e_shnum; i++) { | ||
164 | if (!(shdr[i].sh_flags & SHF_ALLOC)) | ||
165 | continue; | ||
166 | |||
167 | shdr[i].sh_addr += VDSO_HIGH_BASE; | ||
168 | |||
169 | if (shdr[i].sh_type == SHT_SYMTAB || | ||
170 | shdr[i].sh_type == SHT_DYNSYM) | ||
171 | reloc_symtab(ehdr, shdr[i].sh_offset, | ||
172 | shdr[i].sh_size); | ||
173 | } | ||
174 | } | ||
175 | |||
49 | void enable_sep_cpu(void) | 176 | void enable_sep_cpu(void) |
50 | { | 177 | { |
51 | int cpu = get_cpu(); | 178 | int cpu = get_cpu(); |
@@ -56,14 +183,33 @@ void enable_sep_cpu(void) | |||
56 | return; | 183 | return; |
57 | } | 184 | } |
58 | 185 | ||
59 | tss->ss1 = __KERNEL_CS; | 186 | tss->x86_tss.ss1 = __KERNEL_CS; |
60 | tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; | 187 | tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss; |
61 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 188 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
62 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); | 189 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0); |
63 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); | 190 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); |
64 | put_cpu(); | 191 | put_cpu(); |
65 | } | 192 | } |
66 | 193 | ||
194 | static struct vm_area_struct gate_vma; | ||
195 | |||
196 | static int __init gate_vma_init(void) | ||
197 | { | ||
198 | gate_vma.vm_mm = NULL; | ||
199 | gate_vma.vm_start = FIXADDR_USER_START; | ||
200 | gate_vma.vm_end = FIXADDR_USER_END; | ||
201 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | ||
202 | gate_vma.vm_page_prot = __P101; | ||
203 | /* | ||
204 | * Make sure the vDSO gets into every core dump. | ||
205 | * Dumping its contents makes post-mortem fully interpretable later | ||
206 | * without matching up the same kernel and hardware config to see | ||
207 | * what PC values meant. | ||
208 | */ | ||
209 | gate_vma.vm_flags |= VM_ALWAYSDUMP; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
67 | /* | 213 | /* |
68 | * These symbols are defined by vsyscall.o to mark the bounds | 214 | * These symbols are defined by vsyscall.o to mark the bounds |
69 | * of the ELF DSO images included therein. | 215 | * of the ELF DSO images included therein. |
@@ -72,31 +218,48 @@ extern const char vsyscall_int80_start, vsyscall_int80_end; | |||
72 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | 218 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
73 | static struct page *syscall_pages[1]; | 219 | static struct page *syscall_pages[1]; |
74 | 220 | ||
221 | static void map_compat_vdso(int map) | ||
222 | { | ||
223 | static int vdso_mapped; | ||
224 | |||
225 | if (map == vdso_mapped) | ||
226 | return; | ||
227 | |||
228 | vdso_mapped = map; | ||
229 | |||
230 | __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, | ||
231 | map ? PAGE_READONLY_EXEC : PAGE_NONE); | ||
232 | |||
233 | /* flush stray tlbs */ | ||
234 | flush_tlb_all(); | ||
235 | } | ||
236 | |||
75 | int __init sysenter_setup(void) | 237 | int __init sysenter_setup(void) |
76 | { | 238 | { |
77 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | 239 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
240 | const void *vsyscall; | ||
241 | size_t vsyscall_len; | ||
242 | |||
78 | syscall_pages[0] = virt_to_page(syscall_page); | 243 | syscall_pages[0] = virt_to_page(syscall_page); |
79 | 244 | ||
80 | #ifdef CONFIG_COMPAT_VDSO | 245 | gate_vma_init(); |
81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); | 246 | |
82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | 247 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
83 | #endif | ||
84 | 248 | ||
85 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 249 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
86 | memcpy(syscall_page, | 250 | vsyscall = &vsyscall_int80_start; |
87 | &vsyscall_int80_start, | 251 | vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; |
88 | &vsyscall_int80_end - &vsyscall_int80_start); | 252 | } else { |
89 | return 0; | 253 | vsyscall = &vsyscall_sysenter_start; |
254 | vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; | ||
90 | } | 255 | } |
91 | 256 | ||
92 | memcpy(syscall_page, | 257 | memcpy(syscall_page, vsyscall, vsyscall_len); |
93 | &vsyscall_sysenter_start, | 258 | relocate_vdso(syscall_page); |
94 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | ||
95 | 259 | ||
96 | return 0; | 260 | return 0; |
97 | } | 261 | } |
98 | 262 | ||
99 | #ifndef CONFIG_COMPAT_VDSO | ||
100 | /* Defined in vsyscall-sysenter.S */ | 263 | /* Defined in vsyscall-sysenter.S */ |
101 | extern void SYSENTER_RETURN; | 264 | extern void SYSENTER_RETURN; |
102 | 265 | ||
@@ -105,36 +268,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | |||
105 | { | 268 | { |
106 | struct mm_struct *mm = current->mm; | 269 | struct mm_struct *mm = current->mm; |
107 | unsigned long addr; | 270 | unsigned long addr; |
108 | int ret; | 271 | int ret = 0; |
272 | bool compat; | ||
109 | 273 | ||
110 | down_write(&mm->mmap_sem); | 274 | down_write(&mm->mmap_sem); |
111 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
112 | if (IS_ERR_VALUE(addr)) { | ||
113 | ret = addr; | ||
114 | goto up_fail; | ||
115 | } | ||
116 | 275 | ||
117 | /* | 276 | /* Test compat mode once here, in case someone |
118 | * MAYWRITE to allow gdb to COW and set breakpoints | 277 | changes it via sysctl */ |
119 | * | 278 | compat = (vdso_enabled == VDSO_COMPAT); |
120 | * Make sure the vDSO gets into every core dump. | 279 | |
121 | * Dumping its contents makes post-mortem fully interpretable later | 280 | map_compat_vdso(compat); |
122 | * without matching up the same kernel and hardware config to see | 281 | |
123 | * what PC values meant. | 282 | if (compat) |
124 | */ | 283 | addr = VDSO_HIGH_BASE; |
125 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | 284 | else { |
126 | VM_READ|VM_EXEC| | 285 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
127 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 286 | if (IS_ERR_VALUE(addr)) { |
128 | VM_ALWAYSDUMP, | 287 | ret = addr; |
129 | syscall_pages); | 288 | goto up_fail; |
130 | if (ret) | 289 | } |
131 | goto up_fail; | 290 | |
291 | /* | ||
292 | * MAYWRITE to allow gdb to COW and set breakpoints | ||
293 | * | ||
294 | * Make sure the vDSO gets into every core dump. | ||
295 | * Dumping its contents makes post-mortem fully | ||
296 | * interpretable later without matching up the same | ||
297 | * kernel and hardware config to see what PC values | ||
298 | * meant. | ||
299 | */ | ||
300 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | ||
301 | VM_READ|VM_EXEC| | ||
302 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | ||
303 | VM_ALWAYSDUMP, | ||
304 | syscall_pages); | ||
305 | |||
306 | if (ret) | ||
307 | goto up_fail; | ||
308 | } | ||
132 | 309 | ||
133 | current->mm->context.vdso = (void *)addr; | 310 | current->mm->context.vdso = (void *)addr; |
134 | current_thread_info()->sysenter_return = | 311 | current_thread_info()->sysenter_return = |
135 | (void *)VDSO_SYM(&SYSENTER_RETURN); | 312 | (void *)VDSO_SYM(&SYSENTER_RETURN); |
136 | up_fail: | 313 | |
314 | up_fail: | ||
137 | up_write(&mm->mmap_sem); | 315 | up_write(&mm->mmap_sem); |
316 | |||
138 | return ret; | 317 | return ret; |
139 | } | 318 | } |
140 | 319 | ||
@@ -147,6 +326,11 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
147 | 326 | ||
148 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | 327 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) |
149 | { | 328 | { |
329 | struct mm_struct *mm = tsk->mm; | ||
330 | |||
331 | /* Check to see if this task was created in compat vdso mode */ | ||
332 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | ||
333 | return &gate_vma; | ||
150 | return NULL; | 334 | return NULL; |
151 | } | 335 | } |
152 | 336 | ||
@@ -159,4 +343,3 @@ int in_gate_area_no_task(unsigned long addr) | |||
159 | { | 343 | { |
160 | return 0; | 344 | return 0; |
161 | } | 345 | } |
162 | #endif | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 94e5cb09110..a665df61f08 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -70,8 +70,6 @@ | |||
70 | 70 | ||
71 | #include <asm/i8259.h> | 71 | #include <asm/i8259.h> |
72 | 72 | ||
73 | int pit_latch_buggy; /* extern */ | ||
74 | |||
75 | #include "do_timer.h" | 73 | #include "do_timer.h" |
76 | 74 | ||
77 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ | 75 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ |
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S index 2f1814c5cfd..f62815f8d06 100644 --- a/arch/i386/kernel/trampoline.S +++ b/arch/i386/kernel/trampoline.S | |||
@@ -29,7 +29,7 @@ | |||
29 | * | 29 | * |
30 | * TYPE VALUE | 30 | * TYPE VALUE |
31 | * R_386_32 startup_32_smp | 31 | * R_386_32 startup_32_smp |
32 | * R_386_32 boot_gdt_table | 32 | * R_386_32 boot_gdt |
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/linkage.h> | 35 | #include <linux/linkage.h> |
@@ -62,8 +62,8 @@ r_base = . | |||
62 | * to 32 bit. | 62 | * to 32 bit. |
63 | */ | 63 | */ |
64 | 64 | ||
65 | lidtl boot_idt - r_base # load idt with 0, 0 | 65 | lidtl boot_idt_descr - r_base # load idt with 0, 0 |
66 | lgdtl boot_gdt - r_base # load gdt with whatever is appropriate | 66 | lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate |
67 | 67 | ||
68 | xor %ax, %ax | 68 | xor %ax, %ax |
69 | inc %ax # protected mode (PE) bit | 69 | inc %ax # protected mode (PE) bit |
@@ -73,11 +73,11 @@ r_base = . | |||
73 | 73 | ||
74 | # These need to be in the same 64K segment as the above; | 74 | # These need to be in the same 64K segment as the above; |
75 | # hence we don't use the boot_gdt_descr defined in head.S | 75 | # hence we don't use the boot_gdt_descr defined in head.S |
76 | boot_gdt: | 76 | boot_gdt_descr: |
77 | .word __BOOT_DS + 7 # gdt limit | 77 | .word __BOOT_DS + 7 # gdt limit |
78 | .long boot_gdt_table-__PAGE_OFFSET # gdt base | 78 | .long boot_gdt - __PAGE_OFFSET # gdt base |
79 | 79 | ||
80 | boot_idt: | 80 | boot_idt_descr: |
81 | .word 0 # idt limit = 0 | 81 | .word 0 # idt limit = 0 |
82 | .long 0 # idt base = 0L | 82 | .long 0 # idt base = 0L |
83 | 83 | ||
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index af0d3f70a81..f21b41e7770 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -476,8 +476,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, | |||
476 | siginfo_t *info) | 476 | siginfo_t *info) |
477 | { | 477 | { |
478 | struct task_struct *tsk = current; | 478 | struct task_struct *tsk = current; |
479 | tsk->thread.error_code = error_code; | ||
480 | tsk->thread.trap_no = trapnr; | ||
481 | 479 | ||
482 | if (regs->eflags & VM_MASK) { | 480 | if (regs->eflags & VM_MASK) { |
483 | if (vm86) | 481 | if (vm86) |
@@ -489,6 +487,18 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, | |||
489 | goto kernel_trap; | 487 | goto kernel_trap; |
490 | 488 | ||
491 | trap_signal: { | 489 | trap_signal: { |
490 | /* | ||
491 | * We want error_code and trap_no set for userspace faults and | ||
492 | * kernelspace faults which result in die(), but not | ||
493 | * kernelspace faults which are fixed up. die() gives the | ||
494 | * process no chance to handle the signal and notice the | ||
495 | * kernel fault information, so that won't result in polluting | ||
496 | * the information about previously queued, but not yet | ||
497 | * delivered, faults. See also do_general_protection below. | ||
498 | */ | ||
499 | tsk->thread.error_code = error_code; | ||
500 | tsk->thread.trap_no = trapnr; | ||
501 | |||
492 | if (info) | 502 | if (info) |
493 | force_sig_info(signr, info, tsk); | 503 | force_sig_info(signr, info, tsk); |
494 | else | 504 | else |
@@ -497,8 +507,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, | |||
497 | } | 507 | } |
498 | 508 | ||
499 | kernel_trap: { | 509 | kernel_trap: { |
500 | if (!fixup_exception(regs)) | 510 | if (!fixup_exception(regs)) { |
511 | tsk->thread.error_code = error_code; | ||
512 | tsk->thread.trap_no = trapnr; | ||
501 | die(str, regs, error_code); | 513 | die(str, regs, error_code); |
514 | } | ||
502 | return; | 515 | return; |
503 | } | 516 | } |
504 | 517 | ||
@@ -583,7 +596,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, | |||
583 | * and we set the offset field correctly. Then we let the CPU to | 596 | * and we set the offset field correctly. Then we let the CPU to |
584 | * restart the faulting instruction. | 597 | * restart the faulting instruction. |
585 | */ | 598 | */ |
586 | if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | 599 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && |
587 | thread->io_bitmap_ptr) { | 600 | thread->io_bitmap_ptr) { |
588 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | 601 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, |
589 | thread->io_bitmap_max); | 602 | thread->io_bitmap_max); |
@@ -596,16 +609,13 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, | |||
596 | thread->io_bitmap_max, 0xff, | 609 | thread->io_bitmap_max, 0xff, |
597 | tss->io_bitmap_max - thread->io_bitmap_max); | 610 | tss->io_bitmap_max - thread->io_bitmap_max); |
598 | tss->io_bitmap_max = thread->io_bitmap_max; | 611 | tss->io_bitmap_max = thread->io_bitmap_max; |
599 | tss->io_bitmap_base = IO_BITMAP_OFFSET; | 612 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
600 | tss->io_bitmap_owner = thread; | 613 | tss->io_bitmap_owner = thread; |
601 | put_cpu(); | 614 | put_cpu(); |
602 | return; | 615 | return; |
603 | } | 616 | } |
604 | put_cpu(); | 617 | put_cpu(); |
605 | 618 | ||
606 | current->thread.error_code = error_code; | ||
607 | current->thread.trap_no = 13; | ||
608 | |||
609 | if (regs->eflags & VM_MASK) | 619 | if (regs->eflags & VM_MASK) |
610 | goto gp_in_vm86; | 620 | goto gp_in_vm86; |
611 | 621 | ||
@@ -624,6 +634,8 @@ gp_in_vm86: | |||
624 | 634 | ||
625 | gp_in_kernel: | 635 | gp_in_kernel: |
626 | if (!fixup_exception(regs)) { | 636 | if (!fixup_exception(regs)) { |
637 | current->thread.error_code = error_code; | ||
638 | current->thread.trap_no = 13; | ||
627 | if (notify_die(DIE_GPF, "general protection fault", regs, | 639 | if (notify_die(DIE_GPF, "general protection fault", regs, |
628 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | 640 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
629 | return; | 641 | return; |
@@ -1018,9 +1030,7 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, | |||
1018 | fastcall unsigned long patch_espfix_desc(unsigned long uesp, | 1030 | fastcall unsigned long patch_espfix_desc(unsigned long uesp, |
1019 | unsigned long kesp) | 1031 | unsigned long kesp) |
1020 | { | 1032 | { |
1021 | int cpu = smp_processor_id(); | 1033 | struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; |
1022 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
1023 | struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
1024 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; | 1034 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; |
1025 | unsigned long new_kesp = kesp - base; | 1035 | unsigned long new_kesp = kesp - base; |
1026 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | 1036 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; |
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 6cb8f533673..f64b81f3033 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -200,13 +200,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | |||
200 | { | 200 | { |
201 | struct cpufreq_freqs *freq = data; | 201 | struct cpufreq_freqs *freq = data; |
202 | 202 | ||
203 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
204 | write_seqlock_irq(&xtime_lock); | ||
205 | |||
206 | if (!ref_freq) { | 203 | if (!ref_freq) { |
207 | if (!freq->old){ | 204 | if (!freq->old){ |
208 | ref_freq = freq->new; | 205 | ref_freq = freq->new; |
209 | goto end; | 206 | return 0; |
210 | } | 207 | } |
211 | ref_freq = freq->old; | 208 | ref_freq = freq->old; |
212 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; | 209 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; |
@@ -233,13 +230,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | |||
233 | * TSC based sched_clock turns | 230 | * TSC based sched_clock turns |
234 | * to junk w/ cpufreq | 231 | * to junk w/ cpufreq |
235 | */ | 232 | */ |
236 | mark_tsc_unstable(); | 233 | mark_tsc_unstable("cpufreq changes"); |
237 | } | 234 | } |
238 | } | 235 | } |
239 | } | 236 | } |
240 | end: | ||
241 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
242 | write_sequnlock_irq(&xtime_lock); | ||
243 | 237 | ||
244 | return 0; | 238 | return 0; |
245 | } | 239 | } |
@@ -281,11 +275,12 @@ static struct clocksource clocksource_tsc = { | |||
281 | CLOCK_SOURCE_MUST_VERIFY, | 275 | CLOCK_SOURCE_MUST_VERIFY, |
282 | }; | 276 | }; |
283 | 277 | ||
284 | void mark_tsc_unstable(void) | 278 | void mark_tsc_unstable(char *reason) |
285 | { | 279 | { |
286 | if (!tsc_unstable) { | 280 | if (!tsc_unstable) { |
287 | tsc_unstable = 1; | 281 | tsc_unstable = 1; |
288 | tsc_enabled = 0; | 282 | tsc_enabled = 0; |
283 | printk("Marking TSC unstable due to: %s.\n", reason); | ||
289 | /* Can be called before registration */ | 284 | /* Can be called before registration */ |
290 | if (clocksource_tsc.mult) | 285 | if (clocksource_tsc.mult) |
291 | clocksource_change_rating(&clocksource_tsc, 0); | 286 | clocksource_change_rating(&clocksource_tsc, 0); |
diff --git a/arch/i386/kernel/verify_cpu.S b/arch/i386/kernel/verify_cpu.S new file mode 100644 index 00000000000..e51a8695d54 --- /dev/null +++ b/arch/i386/kernel/verify_cpu.S | |||
@@ -0,0 +1,65 @@ | |||
1 | /* Check if CPU has some minimum CPUID bits | ||
2 | This runs in 16bit mode so that the caller can still use the BIOS | ||
3 | to output errors on the screen */ | ||
4 | #include <asm/cpufeature.h> | ||
5 | |||
6 | verify_cpu: | ||
7 | pushfl # Save caller passed flags | ||
8 | pushl $0 # Kill any dangerous flags | ||
9 | popfl | ||
10 | |||
11 | #if CONFIG_X86_MINIMUM_CPU_MODEL >= 4 | ||
12 | pushfl | ||
13 | orl $(1<<18),(%esp) # try setting AC | ||
14 | popfl | ||
15 | pushfl | ||
16 | popl %eax | ||
17 | testl $(1<<18),%eax | ||
18 | jz bad | ||
19 | #endif | ||
20 | #if REQUIRED_MASK1 != 0 | ||
21 | pushfl # standard way to check for cpuid | ||
22 | popl %eax | ||
23 | movl %eax,%ebx | ||
24 | xorl $0x200000,%eax | ||
25 | pushl %eax | ||
26 | popfl | ||
27 | pushfl | ||
28 | popl %eax | ||
29 | cmpl %eax,%ebx | ||
30 | pushfl # standard way to check for cpuid | ||
31 | popl %eax | ||
32 | movl %eax,%ebx | ||
33 | xorl $0x200000,%eax | ||
34 | pushl %eax | ||
35 | popfl | ||
36 | pushfl | ||
37 | popl %eax | ||
38 | cmpl %eax,%ebx | ||
39 | jz bad # REQUIRED_MASK1 != 0 requires CPUID | ||
40 | |||
41 | movl $0x0,%eax # See if cpuid 1 is implemented | ||
42 | cpuid | ||
43 | cmpl $0x1,%eax | ||
44 | jb bad # no cpuid 1 | ||
45 | |||
46 | movl $0x1,%eax # Does the cpu have what it takes | ||
47 | cpuid | ||
48 | |||
49 | #if CONFIG_X86_MINIMUM_CPU_MODEL > 4 | ||
50 | #error add proper model checking here | ||
51 | #endif | ||
52 | |||
53 | andl $REQUIRED_MASK1,%edx | ||
54 | xorl $REQUIRED_MASK1,%edx | ||
55 | jnz bad | ||
56 | #endif /* REQUIRED_MASK1 */ | ||
57 | |||
58 | popfl | ||
59 | xor %eax,%eax | ||
60 | ret | ||
61 | |||
62 | bad: | ||
63 | popfl | ||
64 | movl $1,%eax | ||
65 | ret | ||
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index 697a70e8c0c..c8726c424b3 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/highmem.h> | ||
29 | #include <asm/vmi.h> | 30 | #include <asm/vmi.h> |
30 | #include <asm/io.h> | 31 | #include <asm/io.h> |
31 | #include <asm/fixmap.h> | 32 | #include <asm/fixmap.h> |
@@ -56,7 +57,7 @@ static int disable_noidle; | |||
56 | static int disable_vmi_timer; | 57 | static int disable_vmi_timer; |
57 | 58 | ||
58 | /* Cached VMI operations */ | 59 | /* Cached VMI operations */ |
59 | struct { | 60 | static struct { |
60 | void (*cpuid)(void /* non-c */); | 61 | void (*cpuid)(void /* non-c */); |
61 | void (*_set_ldt)(u32 selector); | 62 | void (*_set_ldt)(u32 selector); |
62 | void (*set_tr)(u32 selector); | 63 | void (*set_tr)(u32 selector); |
@@ -65,16 +66,15 @@ struct { | |||
65 | void (*release_page)(u32, u32); | 66 | void (*release_page)(u32, u32); |
66 | void (*set_pte)(pte_t, pte_t *, unsigned); | 67 | void (*set_pte)(pte_t, pte_t *, unsigned); |
67 | void (*update_pte)(pte_t *, unsigned); | 68 | void (*update_pte)(pte_t *, unsigned); |
68 | void (*set_linear_mapping)(int, u32, u32, u32); | 69 | void (*set_linear_mapping)(int, void *, u32, u32); |
69 | void (*flush_tlb)(int); | 70 | void (*_flush_tlb)(int); |
70 | void (*set_initial_ap_state)(int, int); | 71 | void (*set_initial_ap_state)(int, int); |
71 | void (*halt)(void); | 72 | void (*halt)(void); |
72 | void (*set_lazy_mode)(int mode); | 73 | void (*set_lazy_mode)(int mode); |
73 | } vmi_ops; | 74 | } vmi_ops; |
74 | 75 | ||
75 | /* XXX move this to alternative.h */ | 76 | /* Cached VMI operations */ |
76 | extern struct paravirt_patch __start_parainstructions[], | 77 | struct vmi_timer_ops vmi_timer_ops; |
77 | __stop_parainstructions[]; | ||
78 | 78 | ||
79 | /* | 79 | /* |
80 | * VMI patching routines. | 80 | * VMI patching routines. |
@@ -83,11 +83,6 @@ extern struct paravirt_patch __start_parainstructions[], | |||
83 | #define MNEM_JMP 0xe9 | 83 | #define MNEM_JMP 0xe9 |
84 | #define MNEM_RET 0xc3 | 84 | #define MNEM_RET 0xc3 |
85 | 85 | ||
86 | static char irq_save_disable_callout[] = { | ||
87 | MNEM_CALL, 0, 0, 0, 0, | ||
88 | MNEM_CALL, 0, 0, 0, 0, | ||
89 | MNEM_RET | ||
90 | }; | ||
91 | #define IRQ_PATCH_INT_MASK 0 | 86 | #define IRQ_PATCH_INT_MASK 0 |
92 | #define IRQ_PATCH_DISABLE 5 | 87 | #define IRQ_PATCH_DISABLE 5 |
93 | 88 | ||
@@ -135,33 +130,17 @@ static unsigned patch_internal(int call, unsigned len, void *insns) | |||
135 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) | 130 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) |
136 | { | 131 | { |
137 | switch (type) { | 132 | switch (type) { |
138 | case PARAVIRT_IRQ_DISABLE: | 133 | case PARAVIRT_PATCH(irq_disable): |
139 | return patch_internal(VMI_CALL_DisableInterrupts, len, insns); | 134 | return patch_internal(VMI_CALL_DisableInterrupts, len, insns); |
140 | case PARAVIRT_IRQ_ENABLE: | 135 | case PARAVIRT_PATCH(irq_enable): |
141 | return patch_internal(VMI_CALL_EnableInterrupts, len, insns); | 136 | return patch_internal(VMI_CALL_EnableInterrupts, len, insns); |
142 | case PARAVIRT_RESTORE_FLAGS: | 137 | case PARAVIRT_PATCH(restore_fl): |
143 | return patch_internal(VMI_CALL_SetInterruptMask, len, insns); | 138 | return patch_internal(VMI_CALL_SetInterruptMask, len, insns); |
144 | case PARAVIRT_SAVE_FLAGS: | 139 | case PARAVIRT_PATCH(save_fl): |
145 | return patch_internal(VMI_CALL_GetInterruptMask, len, insns); | 140 | return patch_internal(VMI_CALL_GetInterruptMask, len, insns); |
146 | case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE: | 141 | case PARAVIRT_PATCH(iret): |
147 | if (len >= 10) { | ||
148 | patch_internal(VMI_CALL_GetInterruptMask, len, insns); | ||
149 | patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5); | ||
150 | return 10; | ||
151 | } else { | ||
152 | /* | ||
153 | * You bastards didn't leave enough room to | ||
154 | * patch save_flags_irq_disable inline. Patch | ||
155 | * to a helper | ||
156 | */ | ||
157 | BUG_ON(len < 5); | ||
158 | *(char *)insns = MNEM_CALL; | ||
159 | patch_offset(insns, irq_save_disable_callout); | ||
160 | return 5; | ||
161 | } | ||
162 | case PARAVIRT_INTERRUPT_RETURN: | ||
163 | return patch_internal(VMI_CALL_IRET, len, insns); | 142 | return patch_internal(VMI_CALL_IRET, len, insns); |
164 | case PARAVIRT_STI_SYSEXIT: | 143 | case PARAVIRT_PATCH(irq_enable_sysexit): |
165 | return patch_internal(VMI_CALL_SYSEXIT, len, insns); | 144 | return patch_internal(VMI_CALL_SYSEXIT, len, insns); |
166 | default: | 145 | default: |
167 | break; | 146 | break; |
@@ -230,24 +209,24 @@ static void vmi_set_tr(void) | |||
230 | static void vmi_load_esp0(struct tss_struct *tss, | 209 | static void vmi_load_esp0(struct tss_struct *tss, |
231 | struct thread_struct *thread) | 210 | struct thread_struct *thread) |
232 | { | 211 | { |
233 | tss->esp0 = thread->esp0; | 212 | tss->x86_tss.esp0 = thread->esp0; |
234 | 213 | ||
235 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | 214 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ |
236 | if (unlikely(tss->ss1 != thread->sysenter_cs)) { | 215 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { |
237 | tss->ss1 = thread->sysenter_cs; | 216 | tss->x86_tss.ss1 = thread->sysenter_cs; |
238 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | 217 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
239 | } | 218 | } |
240 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0); | 219 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0); |
241 | } | 220 | } |
242 | 221 | ||
243 | static void vmi_flush_tlb_user(void) | 222 | static void vmi_flush_tlb_user(void) |
244 | { | 223 | { |
245 | vmi_ops.flush_tlb(VMI_FLUSH_TLB); | 224 | vmi_ops._flush_tlb(VMI_FLUSH_TLB); |
246 | } | 225 | } |
247 | 226 | ||
248 | static void vmi_flush_tlb_kernel(void) | 227 | static void vmi_flush_tlb_kernel(void) |
249 | { | 228 | { |
250 | vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); | 229 | vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); |
251 | } | 230 | } |
252 | 231 | ||
253 | /* Stub to do nothing at all; used for delays and unimplemented calls */ | 232 | /* Stub to do nothing at all; used for delays and unimplemented calls */ |
@@ -255,18 +234,6 @@ static void vmi_nop(void) | |||
255 | { | 234 | { |
256 | } | 235 | } |
257 | 236 | ||
258 | /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ | ||
259 | static fastcall void vmi_safe_halt(void) | ||
260 | { | ||
261 | int idle = vmi_stop_hz_timer(); | ||
262 | vmi_ops.halt(); | ||
263 | if (idle) { | ||
264 | local_irq_disable(); | ||
265 | vmi_account_time_restart_hz_timer(); | ||
266 | local_irq_enable(); | ||
267 | } | ||
268 | } | ||
269 | |||
270 | #ifdef CONFIG_DEBUG_PAGE_TYPE | 237 | #ifdef CONFIG_DEBUG_PAGE_TYPE |
271 | 238 | ||
272 | #ifdef CONFIG_X86_PAE | 239 | #ifdef CONFIG_X86_PAE |
@@ -370,8 +337,11 @@ static void vmi_check_page_type(u32 pfn, int type) | |||
370 | #define vmi_check_page_type(p,t) do { } while (0) | 337 | #define vmi_check_page_type(p,t) do { } while (0) |
371 | #endif | 338 | #endif |
372 | 339 | ||
373 | static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn) | 340 | #ifdef CONFIG_HIGHPTE |
341 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | ||
374 | { | 342 | { |
343 | void *va = kmap_atomic(page, type); | ||
344 | |||
375 | /* | 345 | /* |
376 | * Internally, the VMI ROM must map virtual addresses to physical | 346 | * Internally, the VMI ROM must map virtual addresses to physical |
377 | * addresses for processing MMU updates. By the time MMU updates | 347 | * addresses for processing MMU updates. By the time MMU updates |
@@ -385,8 +355,11 @@ static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn) | |||
385 | * args: SLOT VA COUNT PFN | 355 | * args: SLOT VA COUNT PFN |
386 | */ | 356 | */ |
387 | BUG_ON(type != KM_PTE0 && type != KM_PTE1); | 357 | BUG_ON(type != KM_PTE0 && type != KM_PTE1); |
388 | vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn); | 358 | vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page)); |
359 | |||
360 | return va; | ||
389 | } | 361 | } |
362 | #endif | ||
390 | 363 | ||
391 | static void vmi_allocate_pt(u32 pfn) | 364 | static void vmi_allocate_pt(u32 pfn) |
392 | { | 365 | { |
@@ -443,13 +416,13 @@ static void vmi_release_pd(u32 pfn) | |||
443 | ((level) | (is_current_as(mm, user) ? \ | 416 | ((level) | (is_current_as(mm, user) ? \ |
444 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | 417 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) |
445 | 418 | ||
446 | static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep) | 419 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
447 | { | 420 | { |
448 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | 421 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); |
449 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 422 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
450 | } | 423 | } |
451 | 424 | ||
452 | static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) | 425 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
453 | { | 426 | { |
454 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | 427 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); |
455 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | 428 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); |
@@ -462,7 +435,7 @@ static void vmi_set_pte(pte_t *ptep, pte_t pte) | |||
462 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | 435 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); |
463 | } | 436 | } |
464 | 437 | ||
465 | static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | 438 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
466 | { | 439 | { |
467 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | 440 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); |
468 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 441 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
@@ -516,7 +489,7 @@ static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
516 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 489 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
517 | } | 490 | } |
518 | 491 | ||
519 | void vmi_pmd_clear(pmd_t *pmd) | 492 | static void vmi_pmd_clear(pmd_t *pmd) |
520 | { | 493 | { |
521 | const pte_t pte = { 0 }; | 494 | const pte_t pte = { 0 }; |
522 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | 495 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); |
@@ -525,8 +498,6 @@ void vmi_pmd_clear(pmd_t *pmd) | |||
525 | #endif | 498 | #endif |
526 | 499 | ||
527 | #ifdef CONFIG_SMP | 500 | #ifdef CONFIG_SMP |
528 | extern void setup_pda(void); | ||
529 | |||
530 | static void __devinit | 501 | static void __devinit |
531 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | 502 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, |
532 | unsigned long start_esp) | 503 | unsigned long start_esp) |
@@ -551,13 +522,11 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
551 | 522 | ||
552 | ap.ds = __USER_DS; | 523 | ap.ds = __USER_DS; |
553 | ap.es = __USER_DS; | 524 | ap.es = __USER_DS; |
554 | ap.fs = __KERNEL_PDA; | 525 | ap.fs = __KERNEL_PERCPU; |
555 | ap.gs = 0; | 526 | ap.gs = 0; |
556 | 527 | ||
557 | ap.eflags = 0; | 528 | ap.eflags = 0; |
558 | 529 | ||
559 | setup_pda(); | ||
560 | |||
561 | #ifdef CONFIG_X86_PAE | 530 | #ifdef CONFIG_X86_PAE |
562 | /* efer should match BSP efer. */ | 531 | /* efer should match BSP efer. */ |
563 | if (cpu_has_nx) { | 532 | if (cpu_has_nx) { |
@@ -575,9 +544,9 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
575 | } | 544 | } |
576 | #endif | 545 | #endif |
577 | 546 | ||
578 | static void vmi_set_lazy_mode(int mode) | 547 | static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode) |
579 | { | 548 | { |
580 | static DEFINE_PER_CPU(int, lazy_mode); | 549 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode); |
581 | 550 | ||
582 | if (!vmi_ops.set_lazy_mode) | 551 | if (!vmi_ops.set_lazy_mode) |
583 | return; | 552 | return; |
@@ -685,7 +654,7 @@ void vmi_bringup(void) | |||
685 | { | 654 | { |
686 | /* We must establish the lowmem mapping for MMU ops to work */ | 655 | /* We must establish the lowmem mapping for MMU ops to work */ |
687 | if (vmi_ops.set_linear_mapping) | 656 | if (vmi_ops.set_linear_mapping) |
688 | vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); | 657 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); |
689 | } | 658 | } |
690 | 659 | ||
691 | /* | 660 | /* |
@@ -740,7 +709,6 @@ do { \ | |||
740 | } \ | 709 | } \ |
741 | } while (0) | 710 | } while (0) |
742 | 711 | ||
743 | |||
744 | /* | 712 | /* |
745 | * Activate the VMI interface and switch into paravirtualized mode | 713 | * Activate the VMI interface and switch into paravirtualized mode |
746 | */ | 714 | */ |
@@ -796,12 +764,6 @@ static inline int __init activate_vmi(void) | |||
796 | para_fill(irq_disable, DisableInterrupts); | 764 | para_fill(irq_disable, DisableInterrupts); |
797 | para_fill(irq_enable, EnableInterrupts); | 765 | para_fill(irq_enable, EnableInterrupts); |
798 | 766 | ||
799 | /* irq_save_disable !!! sheer pain */ | ||
800 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], | ||
801 | (char *)paravirt_ops.save_fl); | ||
802 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], | ||
803 | (char *)paravirt_ops.irq_disable); | ||
804 | |||
805 | para_fill(wbinvd, WBINVD); | 767 | para_fill(wbinvd, WBINVD); |
806 | para_fill(read_tsc, RDTSC); | 768 | para_fill(read_tsc, RDTSC); |
807 | 769 | ||
@@ -831,8 +793,8 @@ static inline int __init activate_vmi(void) | |||
831 | para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); | 793 | para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); |
832 | 794 | ||
833 | /* user and kernel flush are just handled with different flags to FlushTLB */ | 795 | /* user and kernel flush are just handled with different flags to FlushTLB */ |
834 | para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB); | 796 | para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); |
835 | para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB); | 797 | para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); |
836 | para_fill(flush_tlb_single, InvalPage); | 798 | para_fill(flush_tlb_single, InvalPage); |
837 | 799 | ||
838 | /* | 800 | /* |
@@ -878,8 +840,13 @@ static inline int __init activate_vmi(void) | |||
878 | paravirt_ops.release_pt = vmi_release_pt; | 840 | paravirt_ops.release_pt = vmi_release_pt; |
879 | paravirt_ops.release_pd = vmi_release_pd; | 841 | paravirt_ops.release_pd = vmi_release_pd; |
880 | } | 842 | } |
881 | para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping, | 843 | |
882 | SetLinearMapping); | 844 | /* Set linear is needed in all cases */ |
845 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
846 | #ifdef CONFIG_HIGHPTE | ||
847 | if (vmi_ops.set_linear_mapping) | ||
848 | paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; | ||
849 | #endif | ||
883 | 850 | ||
884 | /* | 851 | /* |
885 | * These MUST always be patched. Don't support indirect jumps | 852 | * These MUST always be patched. Don't support indirect jumps |
@@ -920,8 +887,8 @@ static inline int __init activate_vmi(void) | |||
920 | paravirt_ops.get_wallclock = vmi_get_wallclock; | 887 | paravirt_ops.get_wallclock = vmi_get_wallclock; |
921 | paravirt_ops.set_wallclock = vmi_set_wallclock; | 888 | paravirt_ops.set_wallclock = vmi_set_wallclock; |
922 | #ifdef CONFIG_X86_LOCAL_APIC | 889 | #ifdef CONFIG_X86_LOCAL_APIC |
923 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; | 890 | paravirt_ops.setup_boot_clock = vmi_time_bsp_init; |
924 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; | 891 | paravirt_ops.setup_secondary_clock = vmi_time_ap_init; |
925 | #endif | 892 | #endif |
926 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; | 893 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; |
927 | paravirt_ops.get_cpu_khz = vmi_cpu_khz; | 894 | paravirt_ops.get_cpu_khz = vmi_cpu_khz; |
@@ -933,11 +900,7 @@ static inline int __init activate_vmi(void) | |||
933 | disable_vmi_timer = 1; | 900 | disable_vmi_timer = 1; |
934 | } | 901 | } |
935 | 902 | ||
936 | /* No idle HZ mode only works if VMI timer and no idle is enabled */ | 903 | para_fill(safe_halt, Halt); |
937 | if (disable_noidle || disable_vmi_timer) | ||
938 | para_fill(safe_halt, Halt); | ||
939 | else | ||
940 | para_wrap(safe_halt, vmi_safe_halt, halt, Halt); | ||
941 | 904 | ||
942 | /* | 905 | /* |
943 | * Alternative instruction rewriting doesn't happen soon enough | 906 | * Alternative instruction rewriting doesn't happen soon enough |
@@ -945,7 +908,7 @@ static inline int __init activate_vmi(void) | |||
945 | * to do this before IRQs get reenabled. Fortunately, it is | 908 | * to do this before IRQs get reenabled. Fortunately, it is |
946 | * idempotent. | 909 | * idempotent. |
947 | */ | 910 | */ |
948 | apply_paravirt(__start_parainstructions, __stop_parainstructions); | 911 | apply_paravirt(__parainstructions, __parainstructions_end); |
949 | 912 | ||
950 | vmi_bringup(); | 913 | vmi_bringup(); |
951 | 914 | ||
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c new file mode 100644 index 00000000000..26a37f8a876 --- /dev/null +++ b/arch/i386/kernel/vmiclock.c | |||
@@ -0,0 +1,318 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2007, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/smp.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/cpumask.h> | ||
26 | #include <linux/clocksource.h> | ||
27 | #include <linux/clockchips.h> | ||
28 | |||
29 | #include <asm/vmi.h> | ||
30 | #include <asm/vmi_time.h> | ||
31 | #include <asm/arch_hooks.h> | ||
32 | #include <asm/apicdef.h> | ||
33 | #include <asm/apic.h> | ||
34 | #include <asm/timer.h> | ||
35 | |||
36 | #include <irq_vectors.h> | ||
37 | #include "io_ports.h" | ||
38 | |||
39 | #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
40 | #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
41 | |||
42 | static DEFINE_PER_CPU(struct clock_event_device, local_events); | ||
43 | |||
44 | static inline u32 vmi_counter(u32 flags) | ||
45 | { | ||
46 | /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding | ||
47 | * cycle counter. */ | ||
48 | return flags & VMI_ALARM_COUNTER_MASK; | ||
49 | } | ||
50 | |||
51 | /* paravirt_ops.get_wallclock = vmi_get_wallclock */ | ||
52 | unsigned long vmi_get_wallclock(void) | ||
53 | { | ||
54 | unsigned long long wallclock; | ||
55 | wallclock = vmi_timer_ops.get_wallclock(); // nsec | ||
56 | (void)do_div(wallclock, 1000000000); // sec | ||
57 | |||
58 | return wallclock; | ||
59 | } | ||
60 | |||
61 | /* paravirt_ops.set_wallclock = vmi_set_wallclock */ | ||
62 | int vmi_set_wallclock(unsigned long now) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | /* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ | ||
68 | unsigned long long vmi_get_sched_cycles(void) | ||
69 | { | ||
70 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
71 | } | ||
72 | |||
73 | /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ | ||
74 | unsigned long vmi_cpu_khz(void) | ||
75 | { | ||
76 | unsigned long long khz; | ||
77 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
78 | (void)do_div(khz, 1000); | ||
79 | return khz; | ||
80 | } | ||
81 | |||
82 | static inline unsigned int vmi_get_timer_vector(void) | ||
83 | { | ||
84 | #ifdef CONFIG_X86_IO_APIC | ||
85 | return FIRST_DEVICE_VECTOR; | ||
86 | #else | ||
87 | return FIRST_EXTERNAL_VECTOR; | ||
88 | #endif | ||
89 | } | ||
90 | |||
91 | /** vmi clockchip */ | ||
92 | #ifdef CONFIG_X86_LOCAL_APIC | ||
93 | static unsigned int startup_timer_irq(unsigned int irq) | ||
94 | { | ||
95 | unsigned long val = apic_read(APIC_LVTT); | ||
96 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
97 | |||
98 | return (val & APIC_SEND_PENDING); | ||
99 | } | ||
100 | |||
101 | static void mask_timer_irq(unsigned int irq) | ||
102 | { | ||
103 | unsigned long val = apic_read(APIC_LVTT); | ||
104 | apic_write(APIC_LVTT, val | APIC_LVT_MASKED); | ||
105 | } | ||
106 | |||
107 | static void unmask_timer_irq(unsigned int irq) | ||
108 | { | ||
109 | unsigned long val = apic_read(APIC_LVTT); | ||
110 | apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED); | ||
111 | } | ||
112 | |||
113 | static void ack_timer_irq(unsigned int irq) | ||
114 | { | ||
115 | ack_APIC_irq(); | ||
116 | } | ||
117 | |||
118 | static struct irq_chip vmi_chip __read_mostly = { | ||
119 | .name = "VMI-LOCAL", | ||
120 | .startup = startup_timer_irq, | ||
121 | .mask = mask_timer_irq, | ||
122 | .unmask = unmask_timer_irq, | ||
123 | .ack = ack_timer_irq | ||
124 | }; | ||
125 | #endif | ||
126 | |||
127 | /** vmi clockevent */ | ||
128 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
129 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
130 | static int vmi_wiring = VMI_ALARM_WIRED_IRQ0; | ||
131 | |||
132 | static inline int vmi_get_alarm_wiring(void) | ||
133 | { | ||
134 | return vmi_wiring; | ||
135 | } | ||
136 | |||
137 | static void vmi_timer_set_mode(enum clock_event_mode mode, | ||
138 | struct clock_event_device *evt) | ||
139 | { | ||
140 | cycle_t now, cycles_per_hz; | ||
141 | BUG_ON(!irqs_disabled()); | ||
142 | |||
143 | switch (mode) { | ||
144 | case CLOCK_EVT_MODE_ONESHOT: | ||
145 | break; | ||
146 | case CLOCK_EVT_MODE_PERIODIC: | ||
147 | cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); | ||
148 | (void)do_div(cycles_per_hz, HZ); | ||
149 | now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC)); | ||
150 | vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz); | ||
151 | break; | ||
152 | case CLOCK_EVT_MODE_UNUSED: | ||
153 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
154 | switch (evt->mode) { | ||
155 | case CLOCK_EVT_MODE_ONESHOT: | ||
156 | vmi_timer_ops.cancel_alarm(VMI_ONESHOT); | ||
157 | break; | ||
158 | case CLOCK_EVT_MODE_PERIODIC: | ||
159 | vmi_timer_ops.cancel_alarm(VMI_PERIODIC); | ||
160 | break; | ||
161 | default: | ||
162 | break; | ||
163 | } | ||
164 | break; | ||
165 | default: | ||
166 | break; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static int vmi_timer_next_event(unsigned long delta, | ||
171 | struct clock_event_device *evt) | ||
172 | { | ||
173 | /* Unfortunately, set_next_event interface only passes relative | ||
174 | * expiry, but we want absolute expiry. It'd be better if were | ||
175 | * were passed an aboslute expiry, since a bunch of time may | ||
176 | * have been stolen between the time the delta is computed and | ||
177 | * when we set the alarm below. */ | ||
178 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); | ||
179 | |||
180 | BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
181 | vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | static struct clock_event_device vmi_clockevent = { | ||
186 | .name = "vmi-timer", | ||
187 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
188 | .shift = 22, | ||
189 | .set_mode = vmi_timer_set_mode, | ||
190 | .set_next_event = vmi_timer_next_event, | ||
191 | .rating = 1000, | ||
192 | .irq = 0, | ||
193 | }; | ||
194 | |||
195 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
196 | { | ||
197 | struct clock_event_device *evt = &__get_cpu_var(local_events); | ||
198 | evt->event_handler(evt); | ||
199 | return IRQ_HANDLED; | ||
200 | } | ||
201 | |||
202 | static struct irqaction vmi_clock_action = { | ||
203 | .name = "vmi-timer", | ||
204 | .handler = vmi_timer_interrupt, | ||
205 | .flags = IRQF_DISABLED | IRQF_NOBALANCING, | ||
206 | .mask = CPU_MASK_ALL, | ||
207 | }; | ||
208 | |||
209 | static void __devinit vmi_time_init_clockevent(void) | ||
210 | { | ||
211 | cycle_t cycles_per_msec; | ||
212 | struct clock_event_device *evt; | ||
213 | |||
214 | int cpu = smp_processor_id(); | ||
215 | evt = &__get_cpu_var(local_events); | ||
216 | |||
217 | /* Use cycles_per_msec since div_sc params are 32-bits. */ | ||
218 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
219 | (void)do_div(cycles_per_msec, 1000); | ||
220 | |||
221 | memcpy(evt, &vmi_clockevent, sizeof(*evt)); | ||
222 | /* Must pick .shift such that .mult fits in 32-bits. Choosing | ||
223 | * .shift to be 22 allows 2^(32-22) cycles per nano-seconds | ||
224 | * before overflow. */ | ||
225 | evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift); | ||
226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | ||
227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | ||
228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | ||
229 | evt->cpumask = cpumask_of_cpu(cpu); | ||
230 | |||
231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | ||
232 | evt->name, evt->mult, evt->shift); | ||
233 | clockevents_register_device(evt); | ||
234 | } | ||
235 | |||
236 | void __init vmi_time_init(void) | ||
237 | { | ||
238 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ | ||
239 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
240 | |||
241 | vmi_time_init_clockevent(); | ||
242 | setup_irq(0, &vmi_clock_action); | ||
243 | } | ||
244 | |||
245 | #ifdef CONFIG_X86_LOCAL_APIC | ||
246 | void __devinit vmi_time_bsp_init(void) | ||
247 | { | ||
248 | /* | ||
249 | * On APIC systems, we want local timers to fire on each cpu. We do | ||
250 | * this by programming LVTT to deliver timer events to the IRQ handler | ||
251 | * for IRQ-0, since we can't re-use the APIC local timer handler | ||
252 | * without interfering with that code. | ||
253 | */ | ||
254 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | ||
255 | local_irq_disable(); | ||
256 | #ifdef CONFIG_X86_SMP | ||
257 | /* | ||
258 | * XXX handle_percpu_irq only defined for SMP; we need to switch over | ||
259 | * to using it, since this is a local interrupt, which each CPU must | ||
260 | * handle individually without locking out or dropping simultaneous | ||
261 | * local timers on other CPUs. We also don't want to trigger the | ||
262 | * quirk workaround code for interrupts which gets invoked from | ||
263 | * handle_percpu_irq via eoi, so we use our own IRQ chip. | ||
264 | */ | ||
265 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt"); | ||
266 | #else | ||
267 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt"); | ||
268 | #endif | ||
269 | vmi_wiring = VMI_ALARM_WIRED_LVTT; | ||
270 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
271 | local_irq_enable(); | ||
272 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); | ||
273 | } | ||
274 | |||
275 | void __devinit vmi_time_ap_init(void) | ||
276 | { | ||
277 | vmi_time_init_clockevent(); | ||
278 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
279 | } | ||
280 | #endif | ||
281 | |||
282 | /** vmi clocksource */ | ||
283 | |||
284 | static cycle_t read_real_cycles(void) | ||
285 | { | ||
286 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
287 | } | ||
288 | |||
289 | static struct clocksource clocksource_vmi = { | ||
290 | .name = "vmi-timer", | ||
291 | .rating = 450, | ||
292 | .read = read_real_cycles, | ||
293 | .mask = CLOCKSOURCE_MASK(64), | ||
294 | .mult = 0, /* to be set */ | ||
295 | .shift = 22, | ||
296 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
297 | }; | ||
298 | |||
299 | static int __init init_vmi_clocksource(void) | ||
300 | { | ||
301 | cycle_t cycles_per_msec; | ||
302 | |||
303 | if (!vmi_timer_ops.get_cycle_frequency) | ||
304 | return 0; | ||
305 | /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */ | ||
306 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
307 | (void)do_div(cycles_per_msec, 1000); | ||
308 | |||
309 | /* Note that clocksource.{mult, shift} converts in the opposite direction | ||
310 | * as clockevents. */ | ||
311 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
312 | clocksource_vmi.shift); | ||
313 | |||
314 | printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec); | ||
315 | return clocksource_register(&clocksource_vmi); | ||
316 | |||
317 | } | ||
318 | module_init(init_vmi_clocksource); | ||
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c deleted file mode 100644 index 9dfb17739b6..00000000000 --- a/arch/i386/kernel/vmitime.c +++ /dev/null | |||
@@ -1,482 +0,0 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. | ||
27 | * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. | ||
28 | * See comments there for proper credits. | ||
29 | */ | ||
30 | |||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/jiffies.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/kernel_stat.h> | ||
37 | #include <linux/rcupdate.h> | ||
38 | #include <linux/clocksource.h> | ||
39 | |||
40 | #include <asm/timer.h> | ||
41 | #include <asm/io.h> | ||
42 | #include <asm/apic.h> | ||
43 | #include <asm/div64.h> | ||
44 | #include <asm/timer.h> | ||
45 | #include <asm/desc.h> | ||
46 | |||
47 | #include <asm/vmi.h> | ||
48 | #include <asm/vmi_time.h> | ||
49 | |||
50 | #include <mach_timer.h> | ||
51 | #include <io_ports.h> | ||
52 | |||
53 | #ifdef CONFIG_X86_LOCAL_APIC | ||
54 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT | ||
55 | #else | ||
56 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 | ||
57 | #endif | ||
58 | |||
59 | /* Cached VMI operations */ | ||
60 | struct vmi_timer_ops vmi_timer_ops; | ||
61 | |||
62 | #ifdef CONFIG_NO_IDLE_HZ | ||
63 | |||
64 | /* /proc/sys/kernel/hz_timer state. */ | ||
65 | int sysctl_hz_timer; | ||
66 | |||
67 | /* Some stats */ | ||
68 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); | ||
69 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); | ||
70 | static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); | ||
71 | |||
72 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
73 | |||
74 | /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ | ||
75 | static int alarm_hz = CONFIG_VMI_ALARM_HZ; | ||
76 | |||
77 | /* Cache of the value get_cycle_frequency / HZ. */ | ||
78 | static signed long long cycles_per_jiffy; | ||
79 | |||
80 | /* Cache of the value get_cycle_frequency / alarm_hz. */ | ||
81 | static signed long long cycles_per_alarm; | ||
82 | |||
83 | /* The number of cycles accounted for by the 'jiffies'/'xtime' count. | ||
84 | * Protected by xtime_lock. */ | ||
85 | static unsigned long long real_cycles_accounted_system; | ||
86 | |||
87 | /* The number of cycles accounted for by update_process_times(), per cpu. */ | ||
88 | static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); | ||
89 | |||
90 | /* The number of stolen cycles accounted, per cpu. */ | ||
91 | static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); | ||
92 | |||
93 | /* Clock source. */ | ||
94 | static cycle_t read_real_cycles(void) | ||
95 | { | ||
96 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
97 | } | ||
98 | |||
99 | static cycle_t read_available_cycles(void) | ||
100 | { | ||
101 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
102 | } | ||
103 | |||
104 | #if 0 | ||
105 | static cycle_t read_stolen_cycles(void) | ||
106 | { | ||
107 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); | ||
108 | } | ||
109 | #endif /* 0 */ | ||
110 | |||
111 | static struct clocksource clocksource_vmi = { | ||
112 | .name = "vmi-timer", | ||
113 | .rating = 450, | ||
114 | .read = read_real_cycles, | ||
115 | .mask = CLOCKSOURCE_MASK(64), | ||
116 | .mult = 0, /* to be set */ | ||
117 | .shift = 22, | ||
118 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
119 | }; | ||
120 | |||
121 | |||
122 | /* Timer interrupt handler. */ | ||
123 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); | ||
124 | |||
125 | static struct irqaction vmi_timer_irq = { | ||
126 | .handler = vmi_timer_interrupt, | ||
127 | .flags = IRQF_DISABLED, | ||
128 | .mask = CPU_MASK_NONE, | ||
129 | .name = "VMI-alarm", | ||
130 | }; | ||
131 | |||
132 | /* Alarm rate */ | ||
133 | static int __init vmi_timer_alarm_rate_setup(char* str) | ||
134 | { | ||
135 | int alarm_rate; | ||
136 | if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { | ||
137 | alarm_hz = alarm_rate; | ||
138 | printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); | ||
139 | } | ||
140 | return 1; | ||
141 | } | ||
142 | __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); | ||
143 | |||
144 | |||
145 | /* Initialization */ | ||
146 | static void vmi_get_wallclock_ts(struct timespec *ts) | ||
147 | { | ||
148 | unsigned long long wallclock; | ||
149 | wallclock = vmi_timer_ops.get_wallclock(); // nsec units | ||
150 | ts->tv_nsec = do_div(wallclock, 1000000000); | ||
151 | ts->tv_sec = wallclock; | ||
152 | } | ||
153 | |||
154 | unsigned long vmi_get_wallclock(void) | ||
155 | { | ||
156 | struct timespec ts; | ||
157 | vmi_get_wallclock_ts(&ts); | ||
158 | return ts.tv_sec; | ||
159 | } | ||
160 | |||
161 | int vmi_set_wallclock(unsigned long now) | ||
162 | { | ||
163 | return -1; | ||
164 | } | ||
165 | |||
166 | unsigned long long vmi_get_sched_cycles(void) | ||
167 | { | ||
168 | return read_available_cycles(); | ||
169 | } | ||
170 | |||
171 | unsigned long vmi_cpu_khz(void) | ||
172 | { | ||
173 | unsigned long long khz; | ||
174 | |||
175 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
176 | (void)do_div(khz, 1000); | ||
177 | return khz; | ||
178 | } | ||
179 | |||
180 | void __init vmi_time_init(void) | ||
181 | { | ||
182 | unsigned long long cycles_per_sec, cycles_per_msec; | ||
183 | unsigned long flags; | ||
184 | |||
185 | local_irq_save(flags); | ||
186 | setup_irq(0, &vmi_timer_irq); | ||
187 | #ifdef CONFIG_X86_LOCAL_APIC | ||
188 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); | ||
189 | #endif | ||
190 | |||
191 | real_cycles_accounted_system = read_real_cycles(); | ||
192 | per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); | ||
193 | |||
194 | cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); | ||
195 | cycles_per_jiffy = cycles_per_sec; | ||
196 | (void)do_div(cycles_per_jiffy, HZ); | ||
197 | cycles_per_alarm = cycles_per_sec; | ||
198 | (void)do_div(cycles_per_alarm, alarm_hz); | ||
199 | cycles_per_msec = cycles_per_sec; | ||
200 | (void)do_div(cycles_per_msec, 1000); | ||
201 | |||
202 | printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" | ||
203 | "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, | ||
204 | cycles_per_alarm); | ||
205 | |||
206 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
207 | clocksource_vmi.shift); | ||
208 | if (clocksource_register(&clocksource_vmi)) | ||
209 | printk(KERN_WARNING "Error registering VMITIME clocksource."); | ||
210 | |||
211 | /* Disable PIT. */ | ||
212 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
213 | |||
214 | /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu | ||
215 | * reduce the latency calling update_process_times. */ | ||
216 | vmi_timer_ops.set_alarm( | ||
217 | VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
218 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
219 | cycles_per_alarm); | ||
220 | |||
221 | local_irq_restore(flags); | ||
222 | } | ||
223 | |||
224 | #ifdef CONFIG_X86_LOCAL_APIC | ||
225 | |||
226 | void __init vmi_timer_setup_boot_alarm(void) | ||
227 | { | ||
228 | local_irq_disable(); | ||
229 | |||
230 | /* Route the interrupt to the correct vector. */ | ||
231 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
232 | |||
233 | /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ | ||
234 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
235 | vmi_timer_ops.set_alarm( | ||
236 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
237 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
238 | cycles_per_alarm); | ||
239 | local_irq_enable(); | ||
240 | } | ||
241 | |||
242 | /* Initialize the time accounting variables for an AP on an SMP system. | ||
243 | * Also, set the local alarm for the AP. */ | ||
244 | void __devinit vmi_timer_setup_secondary_alarm(void) | ||
245 | { | ||
246 | int cpu = smp_processor_id(); | ||
247 | |||
248 | /* Route the interrupt to the correct vector. */ | ||
249 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
250 | |||
251 | per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); | ||
252 | |||
253 | vmi_timer_ops.set_alarm( | ||
254 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
255 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
256 | cycles_per_alarm); | ||
257 | } | ||
258 | |||
259 | #endif | ||
260 | |||
261 | /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ | ||
262 | static void vmi_account_real_cycles(unsigned long long cur_real_cycles) | ||
263 | { | ||
264 | long long cycles_not_accounted; | ||
265 | |||
266 | write_seqlock(&xtime_lock); | ||
267 | |||
268 | cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; | ||
269 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
270 | /* systems wide jiffies. */ | ||
271 | do_timer(1); | ||
272 | |||
273 | cycles_not_accounted -= cycles_per_jiffy; | ||
274 | real_cycles_accounted_system += cycles_per_jiffy; | ||
275 | } | ||
276 | |||
277 | write_sequnlock(&xtime_lock); | ||
278 | } | ||
279 | |||
280 | /* Update per-cpu process times. */ | ||
281 | static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, | ||
282 | unsigned long long cur_process_times_cycles) | ||
283 | { | ||
284 | long long cycles_not_accounted; | ||
285 | cycles_not_accounted = cur_process_times_cycles - | ||
286 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
287 | |||
288 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
289 | /* Account time to the current process. This includes | ||
290 | * calling into the scheduler to decrement the timeslice | ||
291 | * and possibly reschedule.*/ | ||
292 | update_process_times(user_mode(regs)); | ||
293 | /* XXX handle /proc/profile multiplier. */ | ||
294 | profile_tick(CPU_PROFILING); | ||
295 | |||
296 | cycles_not_accounted -= cycles_per_jiffy; | ||
297 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | #ifdef CONFIG_NO_IDLE_HZ | ||
302 | /* Update per-cpu idle times. Used when a no-hz halt is ended. */ | ||
303 | static void vmi_account_no_hz_idle_cycles(int cpu, | ||
304 | unsigned long long cur_process_times_cycles) | ||
305 | { | ||
306 | long long cycles_not_accounted; | ||
307 | unsigned long no_idle_hz_jiffies = 0; | ||
308 | |||
309 | cycles_not_accounted = cur_process_times_cycles - | ||
310 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
311 | |||
312 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
313 | no_idle_hz_jiffies++; | ||
314 | cycles_not_accounted -= cycles_per_jiffy; | ||
315 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
316 | } | ||
317 | /* Account time to the idle process. */ | ||
318 | account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); | ||
319 | } | ||
320 | #endif | ||
321 | |||
322 | /* Update per-cpu stolen time. */ | ||
323 | static void vmi_account_stolen_cycles(int cpu, | ||
324 | unsigned long long cur_real_cycles, | ||
325 | unsigned long long cur_avail_cycles) | ||
326 | { | ||
327 | long long stolen_cycles_not_accounted; | ||
328 | unsigned long stolen_jiffies = 0; | ||
329 | |||
330 | if (cur_real_cycles < cur_avail_cycles) | ||
331 | return; | ||
332 | |||
333 | stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - | ||
334 | per_cpu(stolen_cycles_accounted_cpu, cpu); | ||
335 | |||
336 | while (stolen_cycles_not_accounted >= cycles_per_jiffy) { | ||
337 | stolen_jiffies++; | ||
338 | stolen_cycles_not_accounted -= cycles_per_jiffy; | ||
339 | per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
340 | } | ||
341 | /* HACK: pass NULL to force time onto cpustat->steal. */ | ||
342 | account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); | ||
343 | } | ||
344 | |||
345 | /* Body of either IRQ0 interrupt handler (UP no local-APIC) or | ||
346 | * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ | ||
347 | static void vmi_local_timer_interrupt(int cpu) | ||
348 | { | ||
349 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
350 | |||
351 | cur_real_cycles = read_real_cycles(); | ||
352 | cur_process_times_cycles = read_available_cycles(); | ||
353 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
354 | vmi_account_real_cycles(cur_real_cycles); | ||
355 | /* Update per-cpu process times. */ | ||
356 | vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); | ||
357 | /* Update time stolen from this cpu by the hypervisor. */ | ||
358 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
359 | } | ||
360 | |||
361 | #ifdef CONFIG_NO_IDLE_HZ | ||
362 | |||
363 | /* Must be called only from idle loop, with interrupts disabled. */ | ||
364 | int vmi_stop_hz_timer(void) | ||
365 | { | ||
366 | /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ | ||
367 | |||
368 | unsigned long seq, next; | ||
369 | unsigned long long real_cycles_expiry; | ||
370 | int cpu = smp_processor_id(); | ||
371 | |||
372 | BUG_ON(!irqs_disabled()); | ||
373 | if (sysctl_hz_timer != 0) | ||
374 | return 0; | ||
375 | |||
376 | cpu_set(cpu, nohz_cpu_mask); | ||
377 | smp_mb(); | ||
378 | |||
379 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || | ||
380 | (next = next_timer_interrupt(), | ||
381 | time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) { | ||
382 | cpu_clear(cpu, nohz_cpu_mask); | ||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | /* Convert jiffies to the real cycle counter. */ | ||
387 | do { | ||
388 | seq = read_seqbegin(&xtime_lock); | ||
389 | real_cycles_expiry = real_cycles_accounted_system + | ||
390 | (long)(next - jiffies) * cycles_per_jiffy; | ||
391 | } while (read_seqretry(&xtime_lock, seq)); | ||
392 | |||
393 | /* This cpu is going idle. Disable the periodic alarm. */ | ||
394 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
395 | per_cpu(idle_start_jiffies, cpu) = jiffies; | ||
396 | /* Set the real time alarm to expire at the next event. */ | ||
397 | vmi_timer_ops.set_alarm( | ||
398 | VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, | ||
399 | real_cycles_expiry, 0); | ||
400 | return 1; | ||
401 | } | ||
402 | |||
403 | static void vmi_reenable_hz_timer(int cpu) | ||
404 | { | ||
405 | /* For /proc/vmi/info idle_hz stat. */ | ||
406 | per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); | ||
407 | per_cpu(vmi_idle_no_hz_irqs, cpu)++; | ||
408 | |||
409 | /* Don't bother explicitly cancelling the one-shot alarm -- at | ||
410 | * worse we will receive a spurious timer interrupt. */ | ||
411 | vmi_timer_ops.set_alarm( | ||
412 | VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
413 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
414 | cycles_per_alarm); | ||
415 | /* Indicate this cpu is no longer nohz idle. */ | ||
416 | cpu_clear(cpu, nohz_cpu_mask); | ||
417 | } | ||
418 | |||
419 | /* Called from interrupt handlers when (local) HZ timer is disabled. */ | ||
420 | void vmi_account_time_restart_hz_timer(void) | ||
421 | { | ||
422 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
423 | int cpu = smp_processor_id(); | ||
424 | |||
425 | BUG_ON(!irqs_disabled()); | ||
426 | /* Account the time during which the HZ timer was disabled. */ | ||
427 | cur_real_cycles = read_real_cycles(); | ||
428 | cur_process_times_cycles = read_available_cycles(); | ||
429 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
430 | vmi_account_real_cycles(cur_real_cycles); | ||
431 | /* Update per-cpu idle times. */ | ||
432 | vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); | ||
433 | /* Update time stolen from this cpu by the hypervisor. */ | ||
434 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
435 | /* Reenable the hz timer. */ | ||
436 | vmi_reenable_hz_timer(cpu); | ||
437 | } | ||
438 | |||
439 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
440 | |||
441 | /* UP (and no local-APIC) VMI-timer alarm interrupt handler. | ||
442 | * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after | ||
443 | * APIC setup and setup_boot_vmi_alarm() is called. */ | ||
444 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
445 | { | ||
446 | vmi_local_timer_interrupt(smp_processor_id()); | ||
447 | return IRQ_HANDLED; | ||
448 | } | ||
449 | |||
450 | #ifdef CONFIG_X86_LOCAL_APIC | ||
451 | |||
452 | /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. | ||
453 | * Also used in UP when CONFIG_X86_LOCAL_APIC. | ||
454 | * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ | ||
455 | void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) | ||
456 | { | ||
457 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
458 | int cpu = smp_processor_id(); | ||
459 | |||
460 | /* | ||
461 | * the NMI deadlock-detector uses this. | ||
462 | */ | ||
463 | per_cpu(irq_stat,cpu).apic_timer_irqs++; | ||
464 | |||
465 | /* | ||
466 | * NOTE! We'd better ACK the irq immediately, | ||
467 | * because timer handling can be slow. | ||
468 | */ | ||
469 | ack_APIC_irq(); | ||
470 | |||
471 | /* | ||
472 | * update_process_times() expects us to have done irq_enter(). | ||
473 | * Besides, if we don't timer interrupts ignore the global | ||
474 | * interrupt lock, which is the WrongThing (tm) to do. | ||
475 | */ | ||
476 | irq_enter(); | ||
477 | vmi_local_timer_interrupt(cpu); | ||
478 | irq_exit(); | ||
479 | set_irq_regs(old_regs); | ||
480 | } | ||
481 | |||
482 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 6f38f818380..23e8614edee 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -26,12 +26,11 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") | |||
26 | OUTPUT_ARCH(i386) | 26 | OUTPUT_ARCH(i386) |
27 | ENTRY(phys_startup_32) | 27 | ENTRY(phys_startup_32) |
28 | jiffies = jiffies_64; | 28 | jiffies = jiffies_64; |
29 | _proxy_pda = 1; | ||
30 | 29 | ||
31 | PHDRS { | 30 | PHDRS { |
32 | text PT_LOAD FLAGS(5); /* R_E */ | 31 | text PT_LOAD FLAGS(5); /* R_E */ |
33 | data PT_LOAD FLAGS(7); /* RWE */ | 32 | data PT_LOAD FLAGS(7); /* RWE */ |
34 | note PT_NOTE FLAGS(4); /* R__ */ | 33 | note PT_NOTE FLAGS(0); /* ___ */ |
35 | } | 34 | } |
36 | SECTIONS | 35 | SECTIONS |
37 | { | 36 | { |
@@ -61,8 +60,6 @@ SECTIONS | |||
61 | __stop___ex_table = .; | 60 | __stop___ex_table = .; |
62 | } | 61 | } |
63 | 62 | ||
64 | RODATA | ||
65 | |||
66 | BUG_TABLE | 63 | BUG_TABLE |
67 | 64 | ||
68 | . = ALIGN(4); | 65 | . = ALIGN(4); |
@@ -72,6 +69,8 @@ SECTIONS | |||
72 | __tracedata_end = .; | 69 | __tracedata_end = .; |
73 | } | 70 | } |
74 | 71 | ||
72 | RODATA | ||
73 | |||
75 | /* writeable */ | 74 | /* writeable */ |
76 | . = ALIGN(4096); | 75 | . = ALIGN(4096); |
77 | .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ | 76 | .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ |
@@ -117,22 +116,11 @@ SECTIONS | |||
117 | 116 | ||
118 | /* might get freed after init */ | 117 | /* might get freed after init */ |
119 | . = ALIGN(4096); | 118 | . = ALIGN(4096); |
120 | .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { | ||
121 | __smp_alt_begin = .; | ||
122 | __smp_alt_instructions = .; | ||
123 | *(.smp_altinstructions) | ||
124 | __smp_alt_instructions_end = .; | ||
125 | } | ||
126 | . = ALIGN(4); | ||
127 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | 119 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { |
128 | __smp_locks = .; | 120 | __smp_locks = .; |
129 | *(.smp_locks) | 121 | *(.smp_locks) |
130 | __smp_locks_end = .; | 122 | __smp_locks_end = .; |
131 | } | 123 | } |
132 | .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { | ||
133 | *(.smp_altinstr_replacement) | ||
134 | __smp_alt_end = .; | ||
135 | } | ||
136 | /* will be freed after init | 124 | /* will be freed after init |
137 | * Following ALIGN() is required to make sure no other data falls on the | 125 | * Following ALIGN() is required to make sure no other data falls on the |
138 | * same page where __smp_alt_end is pointing as that page might be freed | 126 | * same page where __smp_alt_end is pointing as that page might be freed |
@@ -178,9 +166,9 @@ SECTIONS | |||
178 | } | 166 | } |
179 | . = ALIGN(4); | 167 | . = ALIGN(4); |
180 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | 168 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { |
181 | __start_parainstructions = .; | 169 | __parainstructions = .; |
182 | *(.parainstructions) | 170 | *(.parainstructions) |
183 | __stop_parainstructions = .; | 171 | __parainstructions_end = .; |
184 | } | 172 | } |
185 | /* .exit.text is discard at runtime, not link time, to deal with references | 173 | /* .exit.text is discard at runtime, not link time, to deal with references |
186 | from .altinstructions and .eh_frame */ | 174 | from .altinstructions and .eh_frame */ |
@@ -194,7 +182,7 @@ SECTIONS | |||
194 | __initramfs_end = .; | 182 | __initramfs_end = .; |
195 | } | 183 | } |
196 | #endif | 184 | #endif |
197 | . = ALIGN(L1_CACHE_BYTES); | 185 | . = ALIGN(4096); |
198 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { | 186 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { |
199 | __per_cpu_start = .; | 187 | __per_cpu_start = .; |
200 | *(.data.percpu) | 188 | *(.data.percpu) |
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index f66cd11adb7..4a8b0ed9b8f 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | SECTIONS | 8 | SECTIONS |
9 | { | 9 | { |
10 | . = VDSO_PRELINK + SIZEOF_HEADERS; | 10 | . = VDSO_PRELINK_asm + SIZEOF_HEADERS; |
11 | 11 | ||
12 | .hash : { *(.hash) } :text | 12 | .hash : { *(.hash) } :text |
13 | .gnu.hash : { *(.gnu.hash) } | 13 | .gnu.hash : { *(.gnu.hash) } |
@@ -21,7 +21,7 @@ SECTIONS | |||
21 | For the layouts to match, we need to skip more than enough | 21 | For the layouts to match, we need to skip more than enough |
22 | space for the dynamic symbol table et al. If this amount | 22 | space for the dynamic symbol table et al. If this amount |
23 | is insufficient, ld -shared will barf. Just increase it here. */ | 23 | is insufficient, ld -shared will barf. Just increase it here. */ |
24 | . = VDSO_PRELINK + 0x400; | 24 | . = VDSO_PRELINK_asm + 0x400; |
25 | 25 | ||
26 | .text : { *(.text) } :text =0x90909090 | 26 | .text : { *(.text) } :text =0x90909090 |
27 | .note : { *(.note.*) } :text :note | 27 | .note : { *(.note.*) } :text :note |
diff --git a/arch/i386/lib/bitops.c b/arch/i386/lib/bitops.c index 97db3853dc8..afd0045595d 100644 --- a/arch/i386/lib/bitops.c +++ b/arch/i386/lib/bitops.c | |||
@@ -43,7 +43,7 @@ EXPORT_SYMBOL(find_next_bit); | |||
43 | */ | 43 | */ |
44 | int find_next_zero_bit(const unsigned long *addr, int size, int offset) | 44 | int find_next_zero_bit(const unsigned long *addr, int size, int offset) |
45 | { | 45 | { |
46 | unsigned long * p = ((unsigned long *) addr) + (offset >> 5); | 46 | const unsigned long *p = addr + (offset >> 5); |
47 | int set = 0, bit = offset & 31, res; | 47 | int set = 0, bit = offset & 31, res; |
48 | 48 | ||
49 | if (bit) { | 49 | if (bit) { |
@@ -64,7 +64,7 @@ int find_next_zero_bit(const unsigned long *addr, int size, int offset) | |||
64 | /* | 64 | /* |
65 | * No zero yet, search remaining full bytes for a zero | 65 | * No zero yet, search remaining full bytes for a zero |
66 | */ | 66 | */ |
67 | res = find_first_zero_bit (p, size - 32 * (p - (unsigned long *) addr)); | 67 | res = find_first_zero_bit(p, size - 32 * (p - addr)); |
68 | return (offset + set + res); | 68 | return (offset + set + res); |
69 | } | 69 | } |
70 | EXPORT_SYMBOL(find_next_zero_bit); | 70 | EXPORT_SYMBOL(find_next_zero_bit); |
diff --git a/arch/i386/lib/checksum.S b/arch/i386/lib/checksum.S index 75ffd02654f..adbccd0bbb7 100644 --- a/arch/i386/lib/checksum.S +++ b/arch/i386/lib/checksum.S | |||
@@ -25,6 +25,8 @@ | |||
25 | * 2 of the License, or (at your option) any later version. | 25 | * 2 of the License, or (at your option) any later version. |
26 | */ | 26 | */ |
27 | 27 | ||
28 | #include <linux/linkage.h> | ||
29 | #include <asm/dwarf2.h> | ||
28 | #include <asm/errno.h> | 30 | #include <asm/errno.h> |
29 | 31 | ||
30 | /* | 32 | /* |
@@ -36,8 +38,6 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |||
36 | */ | 38 | */ |
37 | 39 | ||
38 | .text | 40 | .text |
39 | .align 4 | ||
40 | .globl csum_partial | ||
41 | 41 | ||
42 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | 42 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM |
43 | 43 | ||
@@ -48,9 +48,14 @@ unsigned int csum_partial(const unsigned char * buff, int len, unsigned int sum) | |||
48 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte | 48 | * Fortunately, it is easy to convert 2-byte alignment to 4-byte |
49 | * alignment for the unrolled loop. | 49 | * alignment for the unrolled loop. |
50 | */ | 50 | */ |
51 | csum_partial: | 51 | ENTRY(csum_partial) |
52 | CFI_STARTPROC | ||
52 | pushl %esi | 53 | pushl %esi |
54 | CFI_ADJUST_CFA_OFFSET 4 | ||
55 | CFI_REL_OFFSET esi, 0 | ||
53 | pushl %ebx | 56 | pushl %ebx |
57 | CFI_ADJUST_CFA_OFFSET 4 | ||
58 | CFI_REL_OFFSET ebx, 0 | ||
54 | movl 20(%esp),%eax # Function arg: unsigned int sum | 59 | movl 20(%esp),%eax # Function arg: unsigned int sum |
55 | movl 16(%esp),%ecx # Function arg: int len | 60 | movl 16(%esp),%ecx # Function arg: int len |
56 | movl 12(%esp),%esi # Function arg: unsigned char *buff | 61 | movl 12(%esp),%esi # Function arg: unsigned char *buff |
@@ -128,16 +133,27 @@ csum_partial: | |||
128 | roll $8, %eax | 133 | roll $8, %eax |
129 | 8: | 134 | 8: |
130 | popl %ebx | 135 | popl %ebx |
136 | CFI_ADJUST_CFA_OFFSET -4 | ||
137 | CFI_RESTORE ebx | ||
131 | popl %esi | 138 | popl %esi |
139 | CFI_ADJUST_CFA_OFFSET -4 | ||
140 | CFI_RESTORE esi | ||
132 | ret | 141 | ret |
142 | CFI_ENDPROC | ||
143 | ENDPROC(csum_partial) | ||
133 | 144 | ||
134 | #else | 145 | #else |
135 | 146 | ||
136 | /* Version for PentiumII/PPro */ | 147 | /* Version for PentiumII/PPro */ |
137 | 148 | ||
138 | csum_partial: | 149 | ENTRY(csum_partial) |
150 | CFI_STARTPROC | ||
139 | pushl %esi | 151 | pushl %esi |
152 | CFI_ADJUST_CFA_OFFSET 4 | ||
153 | CFI_REL_OFFSET esi, 0 | ||
140 | pushl %ebx | 154 | pushl %ebx |
155 | CFI_ADJUST_CFA_OFFSET 4 | ||
156 | CFI_REL_OFFSET ebx, 0 | ||
141 | movl 20(%esp),%eax # Function arg: unsigned int sum | 157 | movl 20(%esp),%eax # Function arg: unsigned int sum |
142 | movl 16(%esp),%ecx # Function arg: int len | 158 | movl 16(%esp),%ecx # Function arg: int len |
143 | movl 12(%esp),%esi # Function arg: const unsigned char *buf | 159 | movl 12(%esp),%esi # Function arg: const unsigned char *buf |
@@ -245,8 +261,14 @@ csum_partial: | |||
245 | roll $8, %eax | 261 | roll $8, %eax |
246 | 90: | 262 | 90: |
247 | popl %ebx | 263 | popl %ebx |
264 | CFI_ADJUST_CFA_OFFSET -4 | ||
265 | CFI_RESTORE ebx | ||
248 | popl %esi | 266 | popl %esi |
267 | CFI_ADJUST_CFA_OFFSET -4 | ||
268 | CFI_RESTORE esi | ||
249 | ret | 269 | ret |
270 | CFI_ENDPROC | ||
271 | ENDPROC(csum_partial) | ||
250 | 272 | ||
251 | #endif | 273 | #endif |
252 | 274 | ||
@@ -278,19 +300,24 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, | |||
278 | .long 9999b, 6002f ; \ | 300 | .long 9999b, 6002f ; \ |
279 | .previous | 301 | .previous |
280 | 302 | ||
281 | .align 4 | ||
282 | .globl csum_partial_copy_generic | ||
283 | |||
284 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM | 303 | #ifndef CONFIG_X86_USE_PPRO_CHECKSUM |
285 | 304 | ||
286 | #define ARGBASE 16 | 305 | #define ARGBASE 16 |
287 | #define FP 12 | 306 | #define FP 12 |
288 | 307 | ||
289 | csum_partial_copy_generic: | 308 | ENTRY(csum_partial_copy_generic) |
309 | CFI_STARTPROC | ||
290 | subl $4,%esp | 310 | subl $4,%esp |
311 | CFI_ADJUST_CFA_OFFSET 4 | ||
291 | pushl %edi | 312 | pushl %edi |
313 | CFI_ADJUST_CFA_OFFSET 4 | ||
314 | CFI_REL_OFFSET edi, 0 | ||
292 | pushl %esi | 315 | pushl %esi |
316 | CFI_ADJUST_CFA_OFFSET 4 | ||
317 | CFI_REL_OFFSET esi, 0 | ||
293 | pushl %ebx | 318 | pushl %ebx |
319 | CFI_ADJUST_CFA_OFFSET 4 | ||
320 | CFI_REL_OFFSET ebx, 0 | ||
294 | movl ARGBASE+16(%esp),%eax # sum | 321 | movl ARGBASE+16(%esp),%eax # sum |
295 | movl ARGBASE+12(%esp),%ecx # len | 322 | movl ARGBASE+12(%esp),%ecx # len |
296 | movl ARGBASE+4(%esp),%esi # src | 323 | movl ARGBASE+4(%esp),%esi # src |
@@ -400,10 +427,19 @@ DST( movb %cl, (%edi) ) | |||
400 | .previous | 427 | .previous |
401 | 428 | ||
402 | popl %ebx | 429 | popl %ebx |
430 | CFI_ADJUST_CFA_OFFSET -4 | ||
431 | CFI_RESTORE ebx | ||
403 | popl %esi | 432 | popl %esi |
433 | CFI_ADJUST_CFA_OFFSET -4 | ||
434 | CFI_RESTORE esi | ||
404 | popl %edi | 435 | popl %edi |
436 | CFI_ADJUST_CFA_OFFSET -4 | ||
437 | CFI_RESTORE edi | ||
405 | popl %ecx # equivalent to addl $4,%esp | 438 | popl %ecx # equivalent to addl $4,%esp |
439 | CFI_ADJUST_CFA_OFFSET -4 | ||
406 | ret | 440 | ret |
441 | CFI_ENDPROC | ||
442 | ENDPROC(csum_partial_copy_generic) | ||
407 | 443 | ||
408 | #else | 444 | #else |
409 | 445 | ||
@@ -421,10 +457,17 @@ DST( movb %cl, (%edi) ) | |||
421 | 457 | ||
422 | #define ARGBASE 12 | 458 | #define ARGBASE 12 |
423 | 459 | ||
424 | csum_partial_copy_generic: | 460 | ENTRY(csum_partial_copy_generic) |
461 | CFI_STARTPROC | ||
425 | pushl %ebx | 462 | pushl %ebx |
463 | CFI_ADJUST_CFA_OFFSET 4 | ||
464 | CFI_REL_OFFSET ebx, 0 | ||
426 | pushl %edi | 465 | pushl %edi |
466 | CFI_ADJUST_CFA_OFFSET 4 | ||
467 | CFI_REL_OFFSET edi, 0 | ||
427 | pushl %esi | 468 | pushl %esi |
469 | CFI_ADJUST_CFA_OFFSET 4 | ||
470 | CFI_REL_OFFSET esi, 0 | ||
428 | movl ARGBASE+4(%esp),%esi #src | 471 | movl ARGBASE+4(%esp),%esi #src |
429 | movl ARGBASE+8(%esp),%edi #dst | 472 | movl ARGBASE+8(%esp),%edi #dst |
430 | movl ARGBASE+12(%esp),%ecx #len | 473 | movl ARGBASE+12(%esp),%ecx #len |
@@ -485,9 +528,17 @@ DST( movb %dl, (%edi) ) | |||
485 | .previous | 528 | .previous |
486 | 529 | ||
487 | popl %esi | 530 | popl %esi |
531 | CFI_ADJUST_CFA_OFFSET -4 | ||
532 | CFI_RESTORE esi | ||
488 | popl %edi | 533 | popl %edi |
534 | CFI_ADJUST_CFA_OFFSET -4 | ||
535 | CFI_RESTORE edi | ||
489 | popl %ebx | 536 | popl %ebx |
537 | CFI_ADJUST_CFA_OFFSET -4 | ||
538 | CFI_RESTORE ebx | ||
490 | ret | 539 | ret |
540 | CFI_ENDPROC | ||
541 | ENDPROC(csum_partial_copy_generic) | ||
491 | 542 | ||
492 | #undef ROUND | 543 | #undef ROUND |
493 | #undef ROUND1 | 544 | #undef ROUND1 |
diff --git a/arch/i386/lib/getuser.S b/arch/i386/lib/getuser.S index 62d7f178a32..6d84b53f12a 100644 --- a/arch/i386/lib/getuser.S +++ b/arch/i386/lib/getuser.S | |||
@@ -8,6 +8,8 @@ | |||
8 | * return an error value in addition to the "real" | 8 | * return an error value in addition to the "real" |
9 | * return value. | 9 | * return value. |
10 | */ | 10 | */ |
11 | #include <linux/linkage.h> | ||
12 | #include <asm/dwarf2.h> | ||
11 | #include <asm/thread_info.h> | 13 | #include <asm/thread_info.h> |
12 | 14 | ||
13 | 15 | ||
@@ -24,19 +26,19 @@ | |||
24 | */ | 26 | */ |
25 | 27 | ||
26 | .text | 28 | .text |
27 | .align 4 | 29 | ENTRY(__get_user_1) |
28 | .globl __get_user_1 | 30 | CFI_STARTPROC |
29 | __get_user_1: | ||
30 | GET_THREAD_INFO(%edx) | 31 | GET_THREAD_INFO(%edx) |
31 | cmpl TI_addr_limit(%edx),%eax | 32 | cmpl TI_addr_limit(%edx),%eax |
32 | jae bad_get_user | 33 | jae bad_get_user |
33 | 1: movzbl (%eax),%edx | 34 | 1: movzbl (%eax),%edx |
34 | xorl %eax,%eax | 35 | xorl %eax,%eax |
35 | ret | 36 | ret |
37 | CFI_ENDPROC | ||
38 | ENDPROC(__get_user_1) | ||
36 | 39 | ||
37 | .align 4 | 40 | ENTRY(__get_user_2) |
38 | .globl __get_user_2 | 41 | CFI_STARTPROC |
39 | __get_user_2: | ||
40 | addl $1,%eax | 42 | addl $1,%eax |
41 | jc bad_get_user | 43 | jc bad_get_user |
42 | GET_THREAD_INFO(%edx) | 44 | GET_THREAD_INFO(%edx) |
@@ -45,10 +47,11 @@ __get_user_2: | |||
45 | 2: movzwl -1(%eax),%edx | 47 | 2: movzwl -1(%eax),%edx |
46 | xorl %eax,%eax | 48 | xorl %eax,%eax |
47 | ret | 49 | ret |
50 | CFI_ENDPROC | ||
51 | ENDPROC(__get_user_2) | ||
48 | 52 | ||
49 | .align 4 | 53 | ENTRY(__get_user_4) |
50 | .globl __get_user_4 | 54 | CFI_STARTPROC |
51 | __get_user_4: | ||
52 | addl $3,%eax | 55 | addl $3,%eax |
53 | jc bad_get_user | 56 | jc bad_get_user |
54 | GET_THREAD_INFO(%edx) | 57 | GET_THREAD_INFO(%edx) |
@@ -57,11 +60,16 @@ __get_user_4: | |||
57 | 3: movl -3(%eax),%edx | 60 | 3: movl -3(%eax),%edx |
58 | xorl %eax,%eax | 61 | xorl %eax,%eax |
59 | ret | 62 | ret |
63 | CFI_ENDPROC | ||
64 | ENDPROC(__get_user_4) | ||
60 | 65 | ||
61 | bad_get_user: | 66 | bad_get_user: |
67 | CFI_STARTPROC | ||
62 | xorl %edx,%edx | 68 | xorl %edx,%edx |
63 | movl $-14,%eax | 69 | movl $-14,%eax |
64 | ret | 70 | ret |
71 | CFI_ENDPROC | ||
72 | END(bad_get_user) | ||
65 | 73 | ||
66 | .section __ex_table,"a" | 74 | .section __ex_table,"a" |
67 | .long 1b,bad_get_user | 75 | .long 1b,bad_get_user |
diff --git a/arch/i386/lib/putuser.S b/arch/i386/lib/putuser.S index a32d9f570f4..f58fba109d1 100644 --- a/arch/i386/lib/putuser.S +++ b/arch/i386/lib/putuser.S | |||
@@ -8,6 +8,8 @@ | |||
8 | * return an error value in addition to the "real" | 8 | * return an error value in addition to the "real" |
9 | * return value. | 9 | * return value. |
10 | */ | 10 | */ |
11 | #include <linux/linkage.h> | ||
12 | #include <asm/dwarf2.h> | ||
11 | #include <asm/thread_info.h> | 13 | #include <asm/thread_info.h> |
12 | 14 | ||
13 | 15 | ||
@@ -23,23 +25,28 @@ | |||
23 | * as they get called from within inline assembly. | 25 | * as they get called from within inline assembly. |
24 | */ | 26 | */ |
25 | 27 | ||
26 | #define ENTER pushl %ebx ; GET_THREAD_INFO(%ebx) | 28 | #define ENTER CFI_STARTPROC ; \ |
27 | #define EXIT popl %ebx ; ret | 29 | pushl %ebx ; \ |
30 | CFI_ADJUST_CFA_OFFSET 4 ; \ | ||
31 | CFI_REL_OFFSET ebx, 0 ; \ | ||
32 | GET_THREAD_INFO(%ebx) | ||
33 | #define EXIT popl %ebx ; \ | ||
34 | CFI_ADJUST_CFA_OFFSET -4 ; \ | ||
35 | CFI_RESTORE ebx ; \ | ||
36 | ret ; \ | ||
37 | CFI_ENDPROC | ||
28 | 38 | ||
29 | .text | 39 | .text |
30 | .align 4 | 40 | ENTRY(__put_user_1) |
31 | .globl __put_user_1 | ||
32 | __put_user_1: | ||
33 | ENTER | 41 | ENTER |
34 | cmpl TI_addr_limit(%ebx),%ecx | 42 | cmpl TI_addr_limit(%ebx),%ecx |
35 | jae bad_put_user | 43 | jae bad_put_user |
36 | 1: movb %al,(%ecx) | 44 | 1: movb %al,(%ecx) |
37 | xorl %eax,%eax | 45 | xorl %eax,%eax |
38 | EXIT | 46 | EXIT |
47 | ENDPROC(__put_user_1) | ||
39 | 48 | ||
40 | .align 4 | 49 | ENTRY(__put_user_2) |
41 | .globl __put_user_2 | ||
42 | __put_user_2: | ||
43 | ENTER | 50 | ENTER |
44 | movl TI_addr_limit(%ebx),%ebx | 51 | movl TI_addr_limit(%ebx),%ebx |
45 | subl $1,%ebx | 52 | subl $1,%ebx |
@@ -48,10 +55,9 @@ __put_user_2: | |||
48 | 2: movw %ax,(%ecx) | 55 | 2: movw %ax,(%ecx) |
49 | xorl %eax,%eax | 56 | xorl %eax,%eax |
50 | EXIT | 57 | EXIT |
58 | ENDPROC(__put_user_2) | ||
51 | 59 | ||
52 | .align 4 | 60 | ENTRY(__put_user_4) |
53 | .globl __put_user_4 | ||
54 | __put_user_4: | ||
55 | ENTER | 61 | ENTER |
56 | movl TI_addr_limit(%ebx),%ebx | 62 | movl TI_addr_limit(%ebx),%ebx |
57 | subl $3,%ebx | 63 | subl $3,%ebx |
@@ -60,10 +66,9 @@ __put_user_4: | |||
60 | 3: movl %eax,(%ecx) | 66 | 3: movl %eax,(%ecx) |
61 | xorl %eax,%eax | 67 | xorl %eax,%eax |
62 | EXIT | 68 | EXIT |
69 | ENDPROC(__put_user_4) | ||
63 | 70 | ||
64 | .align 4 | 71 | ENTRY(__put_user_8) |
65 | .globl __put_user_8 | ||
66 | __put_user_8: | ||
67 | ENTER | 72 | ENTER |
68 | movl TI_addr_limit(%ebx),%ebx | 73 | movl TI_addr_limit(%ebx),%ebx |
69 | subl $7,%ebx | 74 | subl $7,%ebx |
@@ -73,10 +78,16 @@ __put_user_8: | |||
73 | 5: movl %edx,4(%ecx) | 78 | 5: movl %edx,4(%ecx) |
74 | xorl %eax,%eax | 79 | xorl %eax,%eax |
75 | EXIT | 80 | EXIT |
81 | ENDPROC(__put_user_8) | ||
76 | 82 | ||
77 | bad_put_user: | 83 | bad_put_user: |
84 | CFI_STARTPROC simple | ||
85 | CFI_DEF_CFA esp, 2*4 | ||
86 | CFI_OFFSET eip, -1*4 | ||
87 | CFI_OFFSET ebx, -2*4 | ||
78 | movl $-14,%eax | 88 | movl $-14,%eax |
79 | EXIT | 89 | EXIT |
90 | END(bad_put_user) | ||
80 | 91 | ||
81 | .section __ex_table,"a" | 92 | .section __ex_table,"a" |
82 | .long 1b,bad_put_user | 93 | .long 1b,bad_put_user |
diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c index 086b3726862..9f38b12b4af 100644 --- a/arch/i386/lib/usercopy.c +++ b/arch/i386/lib/usercopy.c | |||
@@ -716,7 +716,6 @@ do { \ | |||
716 | unsigned long __copy_to_user_ll(void __user *to, const void *from, | 716 | unsigned long __copy_to_user_ll(void __user *to, const void *from, |
717 | unsigned long n) | 717 | unsigned long n) |
718 | { | 718 | { |
719 | BUG_ON((long) n < 0); | ||
720 | #ifndef CONFIG_X86_WP_WORKS_OK | 719 | #ifndef CONFIG_X86_WP_WORKS_OK |
721 | if (unlikely(boot_cpu_data.wp_works_ok == 0) && | 720 | if (unlikely(boot_cpu_data.wp_works_ok == 0) && |
722 | ((unsigned long )to) < TASK_SIZE) { | 721 | ((unsigned long )to) < TASK_SIZE) { |
@@ -785,7 +784,6 @@ EXPORT_SYMBOL(__copy_to_user_ll); | |||
785 | unsigned long __copy_from_user_ll(void *to, const void __user *from, | 784 | unsigned long __copy_from_user_ll(void *to, const void __user *from, |
786 | unsigned long n) | 785 | unsigned long n) |
787 | { | 786 | { |
788 | BUG_ON((long)n < 0); | ||
789 | if (movsl_is_ok(to, from, n)) | 787 | if (movsl_is_ok(to, from, n)) |
790 | __copy_user_zeroing(to, from, n); | 788 | __copy_user_zeroing(to, from, n); |
791 | else | 789 | else |
@@ -797,7 +795,6 @@ EXPORT_SYMBOL(__copy_from_user_ll); | |||
797 | unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, | 795 | unsigned long __copy_from_user_ll_nozero(void *to, const void __user *from, |
798 | unsigned long n) | 796 | unsigned long n) |
799 | { | 797 | { |
800 | BUG_ON((long)n < 0); | ||
801 | if (movsl_is_ok(to, from, n)) | 798 | if (movsl_is_ok(to, from, n)) |
802 | __copy_user(to, from, n); | 799 | __copy_user(to, from, n); |
803 | else | 800 | else |
@@ -810,7 +807,6 @@ EXPORT_SYMBOL(__copy_from_user_ll_nozero); | |||
810 | unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, | 807 | unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, |
811 | unsigned long n) | 808 | unsigned long n) |
812 | { | 809 | { |
813 | BUG_ON((long)n < 0); | ||
814 | #ifdef CONFIG_X86_INTEL_USERCOPY | 810 | #ifdef CONFIG_X86_INTEL_USERCOPY |
815 | if ( n > 64 && cpu_has_xmm2) | 811 | if ( n > 64 && cpu_has_xmm2) |
816 | n = __copy_user_zeroing_intel_nocache(to, from, n); | 812 | n = __copy_user_zeroing_intel_nocache(to, from, n); |
@@ -825,7 +821,6 @@ unsigned long __copy_from_user_ll_nocache(void *to, const void __user *from, | |||
825 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, | 821 | unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *from, |
826 | unsigned long n) | 822 | unsigned long n) |
827 | { | 823 | { |
828 | BUG_ON((long)n < 0); | ||
829 | #ifdef CONFIG_X86_INTEL_USERCOPY | 824 | #ifdef CONFIG_X86_INTEL_USERCOPY |
830 | if ( n > 64 && cpu_has_xmm2) | 825 | if ( n > 64 && cpu_has_xmm2) |
831 | n = __copy_user_intel_nocache(to, from, n); | 826 | n = __copy_user_intel_nocache(to, from, n); |
@@ -853,7 +848,6 @@ unsigned long __copy_from_user_ll_nocache_nozero(void *to, const void __user *fr | |||
853 | unsigned long | 848 | unsigned long |
854 | copy_to_user(void __user *to, const void *from, unsigned long n) | 849 | copy_to_user(void __user *to, const void *from, unsigned long n) |
855 | { | 850 | { |
856 | BUG_ON((long) n < 0); | ||
857 | if (access_ok(VERIFY_WRITE, to, n)) | 851 | if (access_ok(VERIFY_WRITE, to, n)) |
858 | n = __copy_to_user(to, from, n); | 852 | n = __copy_to_user(to, from, n); |
859 | return n; | 853 | return n; |
@@ -879,7 +873,6 @@ EXPORT_SYMBOL(copy_to_user); | |||
879 | unsigned long | 873 | unsigned long |
880 | copy_from_user(void *to, const void __user *from, unsigned long n) | 874 | copy_from_user(void *to, const void __user *from, unsigned long n) |
881 | { | 875 | { |
882 | BUG_ON((long) n < 0); | ||
883 | if (access_ok(VERIFY_READ, from, n)) | 876 | if (access_ok(VERIFY_READ, from, n)) |
884 | n = __copy_from_user(to, from, n); | 877 | n = __copy_from_user(to, from, n); |
885 | else | 878 | else |
diff --git a/arch/i386/mach-generic/bigsmp.c b/arch/i386/mach-generic/bigsmp.c index 8a210fa915b..e932d3485ae 100644 --- a/arch/i386/mach-generic/bigsmp.c +++ b/arch/i386/mach-generic/bigsmp.c | |||
@@ -45,7 +45,7 @@ static struct dmi_system_id __initdata bigsmp_dmi_table[] = { | |||
45 | }; | 45 | }; |
46 | 46 | ||
47 | 47 | ||
48 | static int probe_bigsmp(void) | 48 | static int __init probe_bigsmp(void) |
49 | { | 49 | { |
50 | if (def_to_bigsmp) | 50 | if (def_to_bigsmp) |
51 | dmi_bigsmp = 1; | 51 | dmi_bigsmp = 1; |
diff --git a/arch/i386/mach-generic/es7000.c b/arch/i386/mach-generic/es7000.c index b8963a5a3b2..b47f951c0ec 100644 --- a/arch/i386/mach-generic/es7000.c +++ b/arch/i386/mach-generic/es7000.c | |||
@@ -25,4 +25,45 @@ static int probe_es7000(void) | |||
25 | return 0; | 25 | return 0; |
26 | } | 26 | } |
27 | 27 | ||
28 | extern void es7000_sw_apic(void); | ||
29 | static void __init enable_apic_mode(void) | ||
30 | { | ||
31 | es7000_sw_apic(); | ||
32 | return; | ||
33 | } | ||
34 | |||
35 | static __init int mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
36 | char *productid) | ||
37 | { | ||
38 | if (mpc->mpc_oemptr) { | ||
39 | struct mp_config_oemtable *oem_table = | ||
40 | (struct mp_config_oemtable *)mpc->mpc_oemptr; | ||
41 | if (!strncmp(oem, "UNISYS", 6)) | ||
42 | return parse_unisys_oem((char *)oem_table); | ||
43 | } | ||
44 | return 0; | ||
45 | } | ||
46 | |||
47 | #ifdef CONFIG_ACPI | ||
48 | /* Hook from generic ACPI tables.c */ | ||
49 | static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
50 | { | ||
51 | unsigned long oem_addr; | ||
52 | if (!find_unisys_acpi_oem_table(&oem_addr)) { | ||
53 | if (es7000_check_dsdt()) | ||
54 | return parse_unisys_oem((char *)oem_addr); | ||
55 | else { | ||
56 | setup_unisys(); | ||
57 | return 1; | ||
58 | } | ||
59 | } | ||
60 | return 0; | ||
61 | } | ||
62 | #else | ||
63 | static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
64 | { | ||
65 | return 0; | ||
66 | } | ||
67 | #endif | ||
68 | |||
28 | struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000); | 69 | struct genapic apic_es7000 = APIC_INIT("es7000", probe_es7000); |
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c index fe0ed393294..1a5e448a29c 100644 --- a/arch/i386/mach-voyager/voyager_smp.c +++ b/arch/i386/mach-voyager/voyager_smp.c | |||
@@ -573,15 +573,7 @@ do_boot_cpu(__u8 cpu) | |||
573 | /* init_tasks (in sched.c) is indexed logically */ | 573 | /* init_tasks (in sched.c) is indexed logically */ |
574 | stack_start.esp = (void *) idle->thread.esp; | 574 | stack_start.esp = (void *) idle->thread.esp; |
575 | 575 | ||
576 | /* Pre-allocate and initialize the CPU's GDT and PDA so it | 576 | init_gdt(cpu, idle); |
577 | doesn't have to do any memory allocation during the | ||
578 | delicate CPU-bringup phase. */ | ||
579 | if (!init_gdt(cpu, idle)) { | ||
580 | printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); | ||
581 | cpucount--; | ||
582 | return; | ||
583 | } | ||
584 | |||
585 | irq_ctx_init(cpu); | 577 | irq_ctx_init(cpu); |
586 | 578 | ||
587 | /* Note: Don't modify initial ss override */ | 579 | /* Note: Don't modify initial ss override */ |
@@ -749,12 +741,6 @@ initialize_secondary(void) | |||
749 | #endif | 741 | #endif |
750 | 742 | ||
751 | /* | 743 | /* |
752 | * switch to the per CPU GDT we already set up | ||
753 | * in do_boot_cpu() | ||
754 | */ | ||
755 | cpu_set_gdt(current_thread_info()->cpu); | ||
756 | |||
757 | /* | ||
758 | * We don't actually need to load the full TSS, | 744 | * We don't actually need to load the full TSS, |
759 | * basically just the stack pointer and the eip. | 745 | * basically just the stack pointer and the eip. |
760 | */ | 746 | */ |
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index b8c4e259fc8..f534c29e80b 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/tty.h> | 20 | #include <linux/tty.h> |
21 | #include <linux/vt_kern.h> /* For unblank_screen() */ | 21 | #include <linux/vt_kern.h> /* For unblank_screen() */ |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | #include <linux/bootmem.h> /* for max_low_pfn */ | ||
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
24 | #include <linux/kprobes.h> | 25 | #include <linux/kprobes.h> |
25 | #include <linux/uaccess.h> | 26 | #include <linux/uaccess.h> |
@@ -301,7 +302,6 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs, | |||
301 | struct mm_struct *mm; | 302 | struct mm_struct *mm; |
302 | struct vm_area_struct * vma; | 303 | struct vm_area_struct * vma; |
303 | unsigned long address; | 304 | unsigned long address; |
304 | unsigned long page; | ||
305 | int write, si_code; | 305 | int write, si_code; |
306 | 306 | ||
307 | /* get the address */ | 307 | /* get the address */ |
@@ -510,7 +510,9 @@ no_context: | |||
510 | bust_spinlocks(1); | 510 | bust_spinlocks(1); |
511 | 511 | ||
512 | if (oops_may_print()) { | 512 | if (oops_may_print()) { |
513 | #ifdef CONFIG_X86_PAE | 513 | __typeof__(pte_val(__pte(0))) page; |
514 | |||
515 | #ifdef CONFIG_X86_PAE | ||
514 | if (error_code & 16) { | 516 | if (error_code & 16) { |
515 | pte_t *pte = lookup_address(address); | 517 | pte_t *pte = lookup_address(address); |
516 | 518 | ||
@@ -519,7 +521,7 @@ no_context: | |||
519 | "NX-protected page - exploit attempt? " | 521 | "NX-protected page - exploit attempt? " |
520 | "(uid: %d)\n", current->uid); | 522 | "(uid: %d)\n", current->uid); |
521 | } | 523 | } |
522 | #endif | 524 | #endif |
523 | if (address < PAGE_SIZE) | 525 | if (address < PAGE_SIZE) |
524 | printk(KERN_ALERT "BUG: unable to handle kernel NULL " | 526 | printk(KERN_ALERT "BUG: unable to handle kernel NULL " |
525 | "pointer dereference"); | 527 | "pointer dereference"); |
@@ -529,25 +531,38 @@ no_context: | |||
529 | printk(" at virtual address %08lx\n",address); | 531 | printk(" at virtual address %08lx\n",address); |
530 | printk(KERN_ALERT " printing eip:\n"); | 532 | printk(KERN_ALERT " printing eip:\n"); |
531 | printk("%08lx\n", regs->eip); | 533 | printk("%08lx\n", regs->eip); |
532 | } | 534 | |
533 | page = read_cr3(); | 535 | page = read_cr3(); |
534 | page = ((unsigned long *) __va(page))[address >> 22]; | 536 | page = ((__typeof__(page) *) __va(page))[address >> PGDIR_SHIFT]; |
535 | if (oops_may_print()) | 537 | #ifdef CONFIG_X86_PAE |
538 | printk(KERN_ALERT "*pdpt = %016Lx\n", page); | ||
539 | if ((page >> PAGE_SHIFT) < max_low_pfn | ||
540 | && page & _PAGE_PRESENT) { | ||
541 | page &= PAGE_MASK; | ||
542 | page = ((__typeof__(page) *) __va(page))[(address >> PMD_SHIFT) | ||
543 | & (PTRS_PER_PMD - 1)]; | ||
544 | printk(KERN_ALERT "*pde = %016Lx\n", page); | ||
545 | page &= ~_PAGE_NX; | ||
546 | } | ||
547 | #else | ||
536 | printk(KERN_ALERT "*pde = %08lx\n", page); | 548 | printk(KERN_ALERT "*pde = %08lx\n", page); |
537 | /* | ||
538 | * We must not directly access the pte in the highpte | ||
539 | * case, the page table might be allocated in highmem. | ||
540 | * And lets rather not kmap-atomic the pte, just in case | ||
541 | * it's allocated already. | ||
542 | */ | ||
543 | #ifndef CONFIG_HIGHPTE | ||
544 | if ((page & 1) && oops_may_print()) { | ||
545 | page &= PAGE_MASK; | ||
546 | address &= 0x003ff000; | ||
547 | page = ((unsigned long *) __va(page))[address >> PAGE_SHIFT]; | ||
548 | printk(KERN_ALERT "*pte = %08lx\n", page); | ||
549 | } | ||
550 | #endif | 549 | #endif |
550 | |||
551 | /* | ||
552 | * We must not directly access the pte in the highpte | ||
553 | * case if the page table is located in highmem. | ||
554 | * And let's rather not kmap-atomic the pte, just in case | ||
555 | * it's allocated already. | ||
556 | */ | ||
557 | if ((page >> PAGE_SHIFT) < max_low_pfn | ||
558 | && (page & _PAGE_PRESENT)) { | ||
559 | page &= PAGE_MASK; | ||
560 | page = ((__typeof__(page) *) __va(page))[(address >> PAGE_SHIFT) | ||
561 | & (PTRS_PER_PTE - 1)]; | ||
562 | printk(KERN_ALERT "*pte = %0*Lx\n", sizeof(page)*2, (u64)page); | ||
563 | } | ||
564 | } | ||
565 | |||
551 | tsk->thread.cr2 = address; | 566 | tsk->thread.cr2 = address; |
552 | tsk->thread.trap_no = 14; | 567 | tsk->thread.trap_no = 14; |
553 | tsk->thread.error_code = error_code; | 568 | tsk->thread.error_code = error_code; |
@@ -588,7 +603,6 @@ do_sigbus: | |||
588 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); | 603 | force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk); |
589 | } | 604 | } |
590 | 605 | ||
591 | #ifndef CONFIG_X86_PAE | ||
592 | void vmalloc_sync_all(void) | 606 | void vmalloc_sync_all(void) |
593 | { | 607 | { |
594 | /* | 608 | /* |
@@ -601,6 +615,9 @@ void vmalloc_sync_all(void) | |||
601 | static unsigned long start = TASK_SIZE; | 615 | static unsigned long start = TASK_SIZE; |
602 | unsigned long address; | 616 | unsigned long address; |
603 | 617 | ||
618 | if (SHARED_KERNEL_PMD) | ||
619 | return; | ||
620 | |||
604 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); | 621 | BUILD_BUG_ON(TASK_SIZE & ~PGDIR_MASK); |
605 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { | 622 | for (address = start; address >= TASK_SIZE; address += PGDIR_SIZE) { |
606 | if (!test_bit(pgd_index(address), insync)) { | 623 | if (!test_bit(pgd_index(address), insync)) { |
@@ -623,4 +640,3 @@ void vmalloc_sync_all(void) | |||
623 | start = address + PGDIR_SIZE; | 640 | start = address + PGDIR_SIZE; |
624 | } | 641 | } |
625 | } | 642 | } |
626 | #endif | ||
diff --git a/arch/i386/mm/highmem.c b/arch/i386/mm/highmem.c index ac70d09df7e..ad8d86cc683 100644 --- a/arch/i386/mm/highmem.c +++ b/arch/i386/mm/highmem.c | |||
@@ -26,7 +26,7 @@ void kunmap(struct page *page) | |||
26 | * However when holding an atomic kmap is is not legal to sleep, so atomic | 26 | * However when holding an atomic kmap is is not legal to sleep, so atomic |
27 | * kmaps are appropriate for short, tight code paths only. | 27 | * kmaps are appropriate for short, tight code paths only. |
28 | */ | 28 | */ |
29 | void *kmap_atomic(struct page *page, enum km_type type) | 29 | void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) |
30 | { | 30 | { |
31 | enum fixed_addresses idx; | 31 | enum fixed_addresses idx; |
32 | unsigned long vaddr; | 32 | unsigned long vaddr; |
@@ -41,12 +41,17 @@ void *kmap_atomic(struct page *page, enum km_type type) | |||
41 | return page_address(page); | 41 | return page_address(page); |
42 | 42 | ||
43 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 43 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
44 | set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); | 44 | set_pte(kmap_pte-idx, mk_pte(page, prot)); |
45 | arch_flush_lazy_mmu_mode(); | 45 | arch_flush_lazy_mmu_mode(); |
46 | 46 | ||
47 | return (void*) vaddr; | 47 | return (void*) vaddr; |
48 | } | 48 | } |
49 | 49 | ||
50 | void *kmap_atomic(struct page *page, enum km_type type) | ||
51 | { | ||
52 | return kmap_atomic_prot(page, type, kmap_prot); | ||
53 | } | ||
54 | |||
50 | void kunmap_atomic(void *kvaddr, enum km_type type) | 55 | void kunmap_atomic(void *kvaddr, enum km_type type) |
51 | { | 56 | { |
52 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; | 57 | unsigned long vaddr = (unsigned long) kvaddr & PAGE_MASK; |
@@ -67,6 +72,7 @@ void kunmap_atomic(void *kvaddr, enum km_type type) | |||
67 | #endif | 72 | #endif |
68 | } | 73 | } |
69 | 74 | ||
75 | arch_flush_lazy_mmu_mode(); | ||
70 | pagefault_enable(); | 76 | pagefault_enable(); |
71 | } | 77 | } |
72 | 78 | ||
diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index ae436882af7..dbe16f63a56 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/init.h> | 22 | #include <linux/init.h> |
23 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/pagemap.h> | 24 | #include <linux/pagemap.h> |
25 | #include <linux/pfn.h> | ||
25 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
26 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
27 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
@@ -42,6 +43,7 @@ | |||
42 | #include <asm/tlb.h> | 43 | #include <asm/tlb.h> |
43 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
44 | #include <asm/sections.h> | 45 | #include <asm/sections.h> |
46 | #include <asm/paravirt.h> | ||
45 | 47 | ||
46 | unsigned int __VMALLOC_RESERVE = 128 << 20; | 48 | unsigned int __VMALLOC_RESERVE = 128 << 20; |
47 | 49 | ||
@@ -61,17 +63,18 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
61 | pmd_t *pmd_table; | 63 | pmd_t *pmd_table; |
62 | 64 | ||
63 | #ifdef CONFIG_X86_PAE | 65 | #ifdef CONFIG_X86_PAE |
64 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 66 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
65 | paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); | 67 | pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
66 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 68 | |
67 | pud = pud_offset(pgd, 0); | 69 | paravirt_alloc_pd(__pa(pmd_table) >> PAGE_SHIFT); |
68 | if (pmd_table != pmd_offset(pud, 0)) | 70 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
69 | BUG(); | 71 | pud = pud_offset(pgd, 0); |
70 | #else | 72 | if (pmd_table != pmd_offset(pud, 0)) |
73 | BUG(); | ||
74 | } | ||
75 | #endif | ||
71 | pud = pud_offset(pgd, 0); | 76 | pud = pud_offset(pgd, 0); |
72 | pmd_table = pmd_offset(pud, 0); | 77 | pmd_table = pmd_offset(pud, 0); |
73 | #endif | ||
74 | |||
75 | return pmd_table; | 78 | return pmd_table; |
76 | } | 79 | } |
77 | 80 | ||
@@ -81,14 +84,12 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
81 | */ | 84 | */ |
82 | static pte_t * __init one_page_table_init(pmd_t *pmd) | 85 | static pte_t * __init one_page_table_init(pmd_t *pmd) |
83 | { | 86 | { |
84 | if (pmd_none(*pmd)) { | 87 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) { |
85 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); | 88 | pte_t *page_table = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE); |
89 | |||
86 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); | 90 | paravirt_alloc_pt(__pa(page_table) >> PAGE_SHIFT); |
87 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 91 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
88 | if (page_table != pte_offset_kernel(pmd, 0)) | 92 | BUG_ON(page_table != pte_offset_kernel(pmd, 0)); |
89 | BUG(); | ||
90 | |||
91 | return page_table; | ||
92 | } | 93 | } |
93 | 94 | ||
94 | return pte_offset_kernel(pmd, 0); | 95 | return pte_offset_kernel(pmd, 0); |
@@ -108,7 +109,6 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
108 | static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) | 109 | static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) |
109 | { | 110 | { |
110 | pgd_t *pgd; | 111 | pgd_t *pgd; |
111 | pud_t *pud; | ||
112 | pmd_t *pmd; | 112 | pmd_t *pmd; |
113 | int pgd_idx, pmd_idx; | 113 | int pgd_idx, pmd_idx; |
114 | unsigned long vaddr; | 114 | unsigned long vaddr; |
@@ -119,13 +119,10 @@ static void __init page_table_range_init (unsigned long start, unsigned long end | |||
119 | pgd = pgd_base + pgd_idx; | 119 | pgd = pgd_base + pgd_idx; |
120 | 120 | ||
121 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { | 121 | for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) { |
122 | if (pgd_none(*pgd)) | 122 | pmd = one_md_table_init(pgd); |
123 | one_md_table_init(pgd); | 123 | pmd = pmd + pmd_index(vaddr); |
124 | pud = pud_offset(pgd, vaddr); | ||
125 | pmd = pmd_offset(pud, vaddr); | ||
126 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { | 124 | for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { |
127 | if (pmd_none(*pmd)) | 125 | one_page_table_init(pmd); |
128 | one_page_table_init(pmd); | ||
129 | 126 | ||
130 | vaddr += PMD_SIZE; | 127 | vaddr += PMD_SIZE; |
131 | } | 128 | } |
@@ -167,20 +164,22 @@ static void __init kernel_physical_mapping_init(pgd_t *pgd_base) | |||
167 | /* Map with big pages if possible, otherwise create normal page tables. */ | 164 | /* Map with big pages if possible, otherwise create normal page tables. */ |
168 | if (cpu_has_pse) { | 165 | if (cpu_has_pse) { |
169 | unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; | 166 | unsigned int address2 = (pfn + PTRS_PER_PTE - 1) * PAGE_SIZE + PAGE_OFFSET + PAGE_SIZE-1; |
170 | |||
171 | if (is_kernel_text(address) || is_kernel_text(address2)) | 167 | if (is_kernel_text(address) || is_kernel_text(address2)) |
172 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); | 168 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE_EXEC)); |
173 | else | 169 | else |
174 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); | 170 | set_pmd(pmd, pfn_pmd(pfn, PAGE_KERNEL_LARGE)); |
171 | |||
175 | pfn += PTRS_PER_PTE; | 172 | pfn += PTRS_PER_PTE; |
176 | } else { | 173 | } else { |
177 | pte = one_page_table_init(pmd); | 174 | pte = one_page_table_init(pmd); |
178 | 175 | ||
179 | for (pte_ofs = 0; pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; pte++, pfn++, pte_ofs++) { | 176 | for (pte_ofs = 0; |
180 | if (is_kernel_text(address)) | 177 | pte_ofs < PTRS_PER_PTE && pfn < max_low_pfn; |
181 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); | 178 | pte++, pfn++, pte_ofs++, address += PAGE_SIZE) { |
182 | else | 179 | if (is_kernel_text(address)) |
183 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); | 180 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL_EXEC)); |
181 | else | ||
182 | set_pte(pte, pfn_pte(pfn, PAGE_KERNEL)); | ||
184 | } | 183 | } |
185 | } | 184 | } |
186 | } | 185 | } |
@@ -337,24 +336,78 @@ extern void __init remap_numa_kva(void); | |||
337 | #define remap_numa_kva() do {} while (0) | 336 | #define remap_numa_kva() do {} while (0) |
338 | #endif | 337 | #endif |
339 | 338 | ||
340 | static void __init pagetable_init (void) | 339 | void __init native_pagetable_setup_start(pgd_t *base) |
341 | { | 340 | { |
342 | unsigned long vaddr; | ||
343 | pgd_t *pgd_base = swapper_pg_dir; | ||
344 | |||
345 | #ifdef CONFIG_X86_PAE | 341 | #ifdef CONFIG_X86_PAE |
346 | int i; | 342 | int i; |
347 | /* Init entries of the first-level page table to the zero page */ | 343 | |
348 | for (i = 0; i < PTRS_PER_PGD; i++) | 344 | /* |
349 | set_pgd(pgd_base + i, __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); | 345 | * Init entries of the first-level page table to the |
346 | * zero page, if they haven't already been set up. | ||
347 | * | ||
348 | * In a normal native boot, we'll be running on a | ||
349 | * pagetable rooted in swapper_pg_dir, but not in PAE | ||
350 | * mode, so this will end up clobbering the mappings | ||
351 | * for the lower 24Mbytes of the address space, | ||
352 | * without affecting the kernel address space. | ||
353 | */ | ||
354 | for (i = 0; i < USER_PTRS_PER_PGD; i++) | ||
355 | set_pgd(&base[i], | ||
356 | __pgd(__pa(empty_zero_page) | _PAGE_PRESENT)); | ||
357 | |||
358 | /* Make sure kernel address space is empty so that a pagetable | ||
359 | will be allocated for it. */ | ||
360 | memset(&base[USER_PTRS_PER_PGD], 0, | ||
361 | KERNEL_PGD_PTRS * sizeof(pgd_t)); | ||
350 | #else | 362 | #else |
351 | paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); | 363 | paravirt_alloc_pd(__pa(swapper_pg_dir) >> PAGE_SHIFT); |
352 | #endif | 364 | #endif |
365 | } | ||
366 | |||
367 | void __init native_pagetable_setup_done(pgd_t *base) | ||
368 | { | ||
369 | #ifdef CONFIG_X86_PAE | ||
370 | /* | ||
371 | * Add low memory identity-mappings - SMP needs it when | ||
372 | * starting up on an AP from real-mode. In the non-PAE | ||
373 | * case we already have these mappings through head.S. | ||
374 | * All user-space mappings are explicitly cleared after | ||
375 | * SMP startup. | ||
376 | */ | ||
377 | set_pgd(&base[0], base[USER_PTRS_PER_PGD]); | ||
378 | #endif | ||
379 | } | ||
380 | |||
381 | /* | ||
382 | * Build a proper pagetable for the kernel mappings. Up until this | ||
383 | * point, we've been running on some set of pagetables constructed by | ||
384 | * the boot process. | ||
385 | * | ||
386 | * If we're booting on native hardware, this will be a pagetable | ||
387 | * constructed in arch/i386/kernel/head.S, and not running in PAE mode | ||
388 | * (even if we'll end up running in PAE). The root of the pagetable | ||
389 | * will be swapper_pg_dir. | ||
390 | * | ||
391 | * If we're booting paravirtualized under a hypervisor, then there are | ||
392 | * more options: we may already be running PAE, and the pagetable may | ||
393 | * or may not be based in swapper_pg_dir. In any case, | ||
394 | * paravirt_pagetable_setup_start() will set up swapper_pg_dir | ||
395 | * appropriately for the rest of the initialization to work. | ||
396 | * | ||
397 | * In general, pagetable_init() assumes that the pagetable may already | ||
398 | * be partially populated, and so it avoids stomping on any existing | ||
399 | * mappings. | ||
400 | */ | ||
401 | static void __init pagetable_init (void) | ||
402 | { | ||
403 | unsigned long vaddr, end; | ||
404 | pgd_t *pgd_base = swapper_pg_dir; | ||
405 | |||
406 | paravirt_pagetable_setup_start(pgd_base); | ||
353 | 407 | ||
354 | /* Enable PSE if available */ | 408 | /* Enable PSE if available */ |
355 | if (cpu_has_pse) { | 409 | if (cpu_has_pse) |
356 | set_in_cr4(X86_CR4_PSE); | 410 | set_in_cr4(X86_CR4_PSE); |
357 | } | ||
358 | 411 | ||
359 | /* Enable PGE if available */ | 412 | /* Enable PGE if available */ |
360 | if (cpu_has_pge) { | 413 | if (cpu_has_pge) { |
@@ -371,20 +424,12 @@ static void __init pagetable_init (void) | |||
371 | * created - mappings will be set by set_fixmap(): | 424 | * created - mappings will be set by set_fixmap(): |
372 | */ | 425 | */ |
373 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; | 426 | vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; |
374 | page_table_range_init(vaddr, 0, pgd_base); | 427 | end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; |
428 | page_table_range_init(vaddr, end, pgd_base); | ||
375 | 429 | ||
376 | permanent_kmaps_init(pgd_base); | 430 | permanent_kmaps_init(pgd_base); |
377 | 431 | ||
378 | #ifdef CONFIG_X86_PAE | 432 | paravirt_pagetable_setup_done(pgd_base); |
379 | /* | ||
380 | * Add low memory identity-mappings - SMP needs it when | ||
381 | * starting up on an AP from real-mode. In the non-PAE | ||
382 | * case we already have these mappings through head.S. | ||
383 | * All user-space mappings are explicitly cleared after | ||
384 | * SMP startup. | ||
385 | */ | ||
386 | set_pgd(&pgd_base[0], pgd_base[USER_PTRS_PER_PGD]); | ||
387 | #endif | ||
388 | } | 433 | } |
389 | 434 | ||
390 | #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) | 435 | #if defined(CONFIG_SOFTWARE_SUSPEND) || defined(CONFIG_ACPI_SLEEP) |
@@ -700,6 +745,8 @@ struct kmem_cache *pmd_cache; | |||
700 | 745 | ||
701 | void __init pgtable_cache_init(void) | 746 | void __init pgtable_cache_init(void) |
702 | { | 747 | { |
748 | size_t pgd_size = PTRS_PER_PGD*sizeof(pgd_t); | ||
749 | |||
703 | if (PTRS_PER_PMD > 1) { | 750 | if (PTRS_PER_PMD > 1) { |
704 | pmd_cache = kmem_cache_create("pmd", | 751 | pmd_cache = kmem_cache_create("pmd", |
705 | PTRS_PER_PMD*sizeof(pmd_t), | 752 | PTRS_PER_PMD*sizeof(pmd_t), |
@@ -709,13 +756,23 @@ void __init pgtable_cache_init(void) | |||
709 | NULL); | 756 | NULL); |
710 | if (!pmd_cache) | 757 | if (!pmd_cache) |
711 | panic("pgtable_cache_init(): cannot create pmd cache"); | 758 | panic("pgtable_cache_init(): cannot create pmd cache"); |
759 | |||
760 | if (!SHARED_KERNEL_PMD) { | ||
761 | /* If we're in PAE mode and have a non-shared | ||
762 | kernel pmd, then the pgd size must be a | ||
763 | page size. This is because the pgd_list | ||
764 | links through the page structure, so there | ||
765 | can only be one pgd per page for this to | ||
766 | work. */ | ||
767 | pgd_size = PAGE_SIZE; | ||
768 | } | ||
712 | } | 769 | } |
713 | pgd_cache = kmem_cache_create("pgd", | 770 | pgd_cache = kmem_cache_create("pgd", |
714 | PTRS_PER_PGD*sizeof(pgd_t), | 771 | pgd_size, |
715 | PTRS_PER_PGD*sizeof(pgd_t), | 772 | pgd_size, |
716 | 0, | 773 | 0, |
717 | pgd_ctor, | 774 | pgd_ctor, |
718 | PTRS_PER_PMD == 1 ? pgd_dtor : NULL); | 775 | (!SHARED_KERNEL_PMD) ? pgd_dtor : NULL); |
719 | if (!pgd_cache) | 776 | if (!pgd_cache) |
720 | panic("pgtable_cache_init(): Cannot create pgd cache"); | 777 | panic("pgtable_cache_init(): Cannot create pgd cache"); |
721 | } | 778 | } |
@@ -751,13 +808,25 @@ static int noinline do_test_wp_bit(void) | |||
751 | 808 | ||
752 | void mark_rodata_ro(void) | 809 | void mark_rodata_ro(void) |
753 | { | 810 | { |
754 | unsigned long addr = (unsigned long)__start_rodata; | 811 | unsigned long start = PFN_ALIGN(_text); |
812 | unsigned long size = PFN_ALIGN(_etext) - start; | ||
755 | 813 | ||
756 | for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | 814 | #ifdef CONFIG_HOTPLUG_CPU |
757 | change_page_attr(virt_to_page(addr), 1, PAGE_KERNEL_RO); | 815 | /* It must still be possible to apply SMP alternatives. */ |
816 | if (num_possible_cpus() <= 1) | ||
817 | #endif | ||
818 | { | ||
819 | change_page_attr(virt_to_page(start), | ||
820 | size >> PAGE_SHIFT, PAGE_KERNEL_RX); | ||
821 | printk("Write protecting the kernel text: %luk\n", size >> 10); | ||
822 | } | ||
758 | 823 | ||
759 | printk("Write protecting the kernel read-only data: %uk\n", | 824 | start += size; |
760 | (__end_rodata - __start_rodata) >> 10); | 825 | size = (unsigned long)__end_rodata - start; |
826 | change_page_attr(virt_to_page(start), | ||
827 | size >> PAGE_SHIFT, PAGE_KERNEL_RO); | ||
828 | printk("Write protecting the kernel read-only data: %luk\n", | ||
829 | size >> 10); | ||
761 | 830 | ||
762 | /* | 831 | /* |
763 | * change_page_attr() requires a global_flush_tlb() call after it. | 832 | * change_page_attr() requires a global_flush_tlb() call after it. |
@@ -774,26 +843,27 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
774 | unsigned long addr; | 843 | unsigned long addr; |
775 | 844 | ||
776 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 845 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
777 | ClearPageReserved(virt_to_page(addr)); | 846 | struct page *page = pfn_to_page(addr >> PAGE_SHIFT); |
778 | init_page_count(virt_to_page(addr)); | 847 | ClearPageReserved(page); |
779 | memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE); | 848 | init_page_count(page); |
780 | free_page(addr); | 849 | memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); |
850 | __free_page(page); | ||
781 | totalram_pages++; | 851 | totalram_pages++; |
782 | } | 852 | } |
783 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | 853 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
784 | } | 854 | } |
785 | 855 | ||
786 | void free_initmem(void) | 856 | void free_initmem(void) |
787 | { | 857 | { |
788 | free_init_pages("unused kernel memory", | 858 | free_init_pages("unused kernel memory", |
789 | (unsigned long)(&__init_begin), | 859 | __pa_symbol(&__init_begin), |
790 | (unsigned long)(&__init_end)); | 860 | __pa_symbol(&__init_end)); |
791 | } | 861 | } |
792 | 862 | ||
793 | #ifdef CONFIG_BLK_DEV_INITRD | 863 | #ifdef CONFIG_BLK_DEV_INITRD |
794 | void free_initrd_mem(unsigned long start, unsigned long end) | 864 | void free_initrd_mem(unsigned long start, unsigned long end) |
795 | { | 865 | { |
796 | free_init_pages("initrd memory", start, end); | 866 | free_init_pages("initrd memory", __pa(start), __pa(end)); |
797 | } | 867 | } |
798 | #endif | 868 | #endif |
799 | 869 | ||
diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 412ebbd8adb..47bd477c8ec 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c | |||
@@ -91,7 +91,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) | |||
91 | unsigned long flags; | 91 | unsigned long flags; |
92 | 92 | ||
93 | set_pte_atomic(kpte, pte); /* change init_mm */ | 93 | set_pte_atomic(kpte, pte); /* change init_mm */ |
94 | if (PTRS_PER_PMD > 1) | 94 | if (SHARED_KERNEL_PMD) |
95 | return; | 95 | return; |
96 | 96 | ||
97 | spin_lock_irqsave(&pgd_lock, flags); | 97 | spin_lock_irqsave(&pgd_lock, flags); |
@@ -142,7 +142,7 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
142 | return -EINVAL; | 142 | return -EINVAL; |
143 | kpte_page = virt_to_page(kpte); | 143 | kpte_page = virt_to_page(kpte); |
144 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { | 144 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { |
145 | if ((pte_val(*kpte) & _PAGE_PSE) == 0) { | 145 | if (!pte_huge(*kpte)) { |
146 | set_pte_atomic(kpte, mk_pte(page, prot)); | 146 | set_pte_atomic(kpte, mk_pte(page, prot)); |
147 | } else { | 147 | } else { |
148 | pgprot_t ref_prot; | 148 | pgprot_t ref_prot; |
@@ -158,7 +158,7 @@ __change_page_attr(struct page *page, pgprot_t prot) | |||
158 | kpte_page = split; | 158 | kpte_page = split; |
159 | } | 159 | } |
160 | page_private(kpte_page)++; | 160 | page_private(kpte_page)++; |
161 | } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { | 161 | } else if (!pte_huge(*kpte)) { |
162 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); | 162 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); |
163 | BUG_ON(page_private(kpte_page) == 0); | 163 | BUG_ON(page_private(kpte_page) == 0); |
164 | page_private(kpte_page)--; | 164 | page_private(kpte_page)--; |
diff --git a/arch/i386/mm/pgtable.c b/arch/i386/mm/pgtable.c index fa0cfbd551e..9a96c164742 100644 --- a/arch/i386/mm/pgtable.c +++ b/arch/i386/mm/pgtable.c | |||
@@ -144,10 +144,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | |||
144 | } | 144 | } |
145 | 145 | ||
146 | static int fixmaps; | 146 | static int fixmaps; |
147 | #ifndef CONFIG_COMPAT_VDSO | ||
148 | unsigned long __FIXADDR_TOP = 0xfffff000; | 147 | unsigned long __FIXADDR_TOP = 0xfffff000; |
149 | EXPORT_SYMBOL(__FIXADDR_TOP); | 148 | EXPORT_SYMBOL(__FIXADDR_TOP); |
150 | #endif | ||
151 | 149 | ||
152 | void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) | 150 | void __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t flags) |
153 | { | 151 | { |
@@ -173,12 +171,8 @@ void reserve_top_address(unsigned long reserve) | |||
173 | BUG_ON(fixmaps > 0); | 171 | BUG_ON(fixmaps > 0); |
174 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", | 172 | printk(KERN_INFO "Reserving virtual address space above 0x%08x\n", |
175 | (int)-reserve); | 173 | (int)-reserve); |
176 | #ifdef CONFIG_COMPAT_VDSO | ||
177 | BUG_ON(reserve != 0); | ||
178 | #else | ||
179 | __FIXADDR_TOP = -reserve - PAGE_SIZE; | 174 | __FIXADDR_TOP = -reserve - PAGE_SIZE; |
180 | __VMALLOC_RESERVE += reserve; | 175 | __VMALLOC_RESERVE += reserve; |
181 | #endif | ||
182 | } | 176 | } |
183 | 177 | ||
184 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 178 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
@@ -238,42 +232,92 @@ static inline void pgd_list_del(pgd_t *pgd) | |||
238 | set_page_private(next, (unsigned long)pprev); | 232 | set_page_private(next, (unsigned long)pprev); |
239 | } | 233 | } |
240 | 234 | ||
235 | #if (PTRS_PER_PMD == 1) | ||
236 | /* Non-PAE pgd constructor */ | ||
241 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | 237 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
242 | { | 238 | { |
243 | unsigned long flags; | 239 | unsigned long flags; |
244 | 240 | ||
245 | if (PTRS_PER_PMD == 1) { | 241 | /* !PAE, no pagetable sharing */ |
246 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | 242 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); |
247 | spin_lock_irqsave(&pgd_lock, flags); | 243 | |
248 | } | 244 | spin_lock_irqsave(&pgd_lock, flags); |
249 | 245 | ||
246 | /* must happen under lock */ | ||
250 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | 247 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, |
251 | swapper_pg_dir + USER_PTRS_PER_PGD, | 248 | swapper_pg_dir + USER_PTRS_PER_PGD, |
252 | KERNEL_PGD_PTRS); | 249 | KERNEL_PGD_PTRS); |
253 | |||
254 | if (PTRS_PER_PMD > 1) | ||
255 | return; | ||
256 | |||
257 | /* must happen under lock */ | ||
258 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, | 250 | paravirt_alloc_pd_clone(__pa(pgd) >> PAGE_SHIFT, |
259 | __pa(swapper_pg_dir) >> PAGE_SHIFT, | 251 | __pa(swapper_pg_dir) >> PAGE_SHIFT, |
260 | USER_PTRS_PER_PGD, PTRS_PER_PGD - USER_PTRS_PER_PGD); | 252 | USER_PTRS_PER_PGD, |
261 | 253 | KERNEL_PGD_PTRS); | |
262 | pgd_list_add(pgd); | 254 | pgd_list_add(pgd); |
263 | spin_unlock_irqrestore(&pgd_lock, flags); | 255 | spin_unlock_irqrestore(&pgd_lock, flags); |
264 | } | 256 | } |
257 | #else /* PTRS_PER_PMD > 1 */ | ||
258 | /* PAE pgd constructor */ | ||
259 | void pgd_ctor(void *pgd, struct kmem_cache *cache, unsigned long unused) | ||
260 | { | ||
261 | /* PAE, kernel PMD may be shared */ | ||
262 | |||
263 | if (SHARED_KERNEL_PMD) { | ||
264 | clone_pgd_range((pgd_t *)pgd + USER_PTRS_PER_PGD, | ||
265 | swapper_pg_dir + USER_PTRS_PER_PGD, | ||
266 | KERNEL_PGD_PTRS); | ||
267 | } else { | ||
268 | unsigned long flags; | ||
269 | |||
270 | memset(pgd, 0, USER_PTRS_PER_PGD*sizeof(pgd_t)); | ||
271 | spin_lock_irqsave(&pgd_lock, flags); | ||
272 | pgd_list_add(pgd); | ||
273 | spin_unlock_irqrestore(&pgd_lock, flags); | ||
274 | } | ||
275 | } | ||
276 | #endif /* PTRS_PER_PMD */ | ||
265 | 277 | ||
266 | /* never called when PTRS_PER_PMD > 1 */ | ||
267 | void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) | 278 | void pgd_dtor(void *pgd, struct kmem_cache *cache, unsigned long unused) |
268 | { | 279 | { |
269 | unsigned long flags; /* can be called from interrupt context */ | 280 | unsigned long flags; /* can be called from interrupt context */ |
270 | 281 | ||
282 | BUG_ON(SHARED_KERNEL_PMD); | ||
283 | |||
271 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); | 284 | paravirt_release_pd(__pa(pgd) >> PAGE_SHIFT); |
272 | spin_lock_irqsave(&pgd_lock, flags); | 285 | spin_lock_irqsave(&pgd_lock, flags); |
273 | pgd_list_del(pgd); | 286 | pgd_list_del(pgd); |
274 | spin_unlock_irqrestore(&pgd_lock, flags); | 287 | spin_unlock_irqrestore(&pgd_lock, flags); |
275 | } | 288 | } |
276 | 289 | ||
290 | #define UNSHARED_PTRS_PER_PGD \ | ||
291 | (SHARED_KERNEL_PMD ? USER_PTRS_PER_PGD : PTRS_PER_PGD) | ||
292 | |||
293 | /* If we allocate a pmd for part of the kernel address space, then | ||
294 | make sure its initialized with the appropriate kernel mappings. | ||
295 | Otherwise use a cached zeroed pmd. */ | ||
296 | static pmd_t *pmd_cache_alloc(int idx) | ||
297 | { | ||
298 | pmd_t *pmd; | ||
299 | |||
300 | if (idx >= USER_PTRS_PER_PGD) { | ||
301 | pmd = (pmd_t *)__get_free_page(GFP_KERNEL); | ||
302 | |||
303 | if (pmd) | ||
304 | memcpy(pmd, | ||
305 | (void *)pgd_page_vaddr(swapper_pg_dir[idx]), | ||
306 | sizeof(pmd_t) * PTRS_PER_PMD); | ||
307 | } else | ||
308 | pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | ||
309 | |||
310 | return pmd; | ||
311 | } | ||
312 | |||
313 | static void pmd_cache_free(pmd_t *pmd, int idx) | ||
314 | { | ||
315 | if (idx >= USER_PTRS_PER_PGD) | ||
316 | free_page((unsigned long)pmd); | ||
317 | else | ||
318 | kmem_cache_free(pmd_cache, pmd); | ||
319 | } | ||
320 | |||
277 | pgd_t *pgd_alloc(struct mm_struct *mm) | 321 | pgd_t *pgd_alloc(struct mm_struct *mm) |
278 | { | 322 | { |
279 | int i; | 323 | int i; |
@@ -282,10 +326,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
282 | if (PTRS_PER_PMD == 1 || !pgd) | 326 | if (PTRS_PER_PMD == 1 || !pgd) |
283 | return pgd; | 327 | return pgd; |
284 | 328 | ||
285 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | 329 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { |
286 | pmd_t *pmd = kmem_cache_alloc(pmd_cache, GFP_KERNEL); | 330 | pmd_t *pmd = pmd_cache_alloc(i); |
331 | |||
287 | if (!pmd) | 332 | if (!pmd) |
288 | goto out_oom; | 333 | goto out_oom; |
334 | |||
289 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); | 335 | paravirt_alloc_pd(__pa(pmd) >> PAGE_SHIFT); |
290 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); | 336 | set_pgd(&pgd[i], __pgd(1 + __pa(pmd))); |
291 | } | 337 | } |
@@ -296,7 +342,7 @@ out_oom: | |||
296 | pgd_t pgdent = pgd[i]; | 342 | pgd_t pgdent = pgd[i]; |
297 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | 343 | void* pmd = (void *)__va(pgd_val(pgdent)-1); |
298 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | 344 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
299 | kmem_cache_free(pmd_cache, pmd); | 345 | pmd_cache_free(pmd, i); |
300 | } | 346 | } |
301 | kmem_cache_free(pgd_cache, pgd); | 347 | kmem_cache_free(pgd_cache, pgd); |
302 | return NULL; | 348 | return NULL; |
@@ -308,11 +354,11 @@ void pgd_free(pgd_t *pgd) | |||
308 | 354 | ||
309 | /* in the PAE case user pgd entries are overwritten before usage */ | 355 | /* in the PAE case user pgd entries are overwritten before usage */ |
310 | if (PTRS_PER_PMD > 1) | 356 | if (PTRS_PER_PMD > 1) |
311 | for (i = 0; i < USER_PTRS_PER_PGD; ++i) { | 357 | for (i = 0; i < UNSHARED_PTRS_PER_PGD; ++i) { |
312 | pgd_t pgdent = pgd[i]; | 358 | pgd_t pgdent = pgd[i]; |
313 | void* pmd = (void *)__va(pgd_val(pgdent)-1); | 359 | void* pmd = (void *)__va(pgd_val(pgdent)-1); |
314 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); | 360 | paravirt_release_pd(__pa(pmd) >> PAGE_SHIFT); |
315 | kmem_cache_free(pmd_cache, pmd); | 361 | pmd_cache_free(pmd, i); |
316 | } | 362 | } |
317 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ | 363 | /* in the non-PAE case, free_pgtables() clears user pgd entries */ |
318 | kmem_cache_free(pgd_cache, pgd); | 364 | kmem_cache_free(pgd_cache, pgd); |
diff --git a/arch/i386/oprofile/nmi_int.c b/arch/i386/oprofile/nmi_int.c index 8fda7be9dd4..695f737516a 100644 --- a/arch/i386/oprofile/nmi_int.c +++ b/arch/i386/oprofile/nmi_int.c | |||
@@ -414,6 +414,10 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
414 | user space an consistent name. */ | 414 | user space an consistent name. */ |
415 | cpu_type = "x86-64/hammer"; | 415 | cpu_type = "x86-64/hammer"; |
416 | break; | 416 | break; |
417 | case 0x10: | ||
418 | model = &op_athlon_spec; | ||
419 | cpu_type = "x86-64/family10"; | ||
420 | break; | ||
417 | } | 421 | } |
418 | break; | 422 | break; |
419 | 423 | ||
diff --git a/arch/i386/pci/init.c b/arch/i386/pci/init.c index b21b6da8ab1..1cf11af96de 100644 --- a/arch/i386/pci/init.c +++ b/arch/i386/pci/init.c | |||
@@ -6,7 +6,7 @@ | |||
6 | in the right sequence from here. */ | 6 | in the right sequence from here. */ |
7 | static __init int pci_access_init(void) | 7 | static __init int pci_access_init(void) |
8 | { | 8 | { |
9 | int type = 0; | 9 | int type __attribute__((unused)) = 0; |
10 | 10 | ||
11 | #ifdef CONFIG_PCI_DIRECT | 11 | #ifdef CONFIG_PCI_DIRECT |
12 | type = pci_direct_probe(); | 12 | type = pci_direct_probe(); |
diff --git a/arch/i386/pci/mmconfig-shared.c b/arch/i386/pci/mmconfig-shared.c index 747d8c63b0c..c7cabeed4d7 100644 --- a/arch/i386/pci/mmconfig-shared.c +++ b/arch/i386/pci/mmconfig-shared.c | |||
@@ -60,14 +60,19 @@ static const char __init *pci_mmcfg_e7520(void) | |||
60 | u32 win; | 60 | u32 win; |
61 | pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); | 61 | pci_conf1_read(0, 0, PCI_DEVFN(0,0), 0xce, 2, &win); |
62 | 62 | ||
63 | pci_mmcfg_config_num = 1; | 63 | win = win & 0xf000; |
64 | pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); | 64 | if(win == 0x0000 || win == 0xf000) |
65 | if (!pci_mmcfg_config) | 65 | pci_mmcfg_config_num = 0; |
66 | return NULL; | 66 | else { |
67 | pci_mmcfg_config[0].address = (win & 0xf000) << 16; | 67 | pci_mmcfg_config_num = 1; |
68 | pci_mmcfg_config[0].pci_segment = 0; | 68 | pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); |
69 | pci_mmcfg_config[0].start_bus_number = 0; | 69 | if (!pci_mmcfg_config) |
70 | pci_mmcfg_config[0].end_bus_number = 255; | 70 | return NULL; |
71 | pci_mmcfg_config[0].address = win << 16; | ||
72 | pci_mmcfg_config[0].pci_segment = 0; | ||
73 | pci_mmcfg_config[0].start_bus_number = 0; | ||
74 | pci_mmcfg_config[0].end_bus_number = 255; | ||
75 | } | ||
71 | 76 | ||
72 | return "Intel Corporation E7520 Memory Controller Hub"; | 77 | return "Intel Corporation E7520 Memory Controller Hub"; |
73 | } | 78 | } |
@@ -108,6 +113,10 @@ static const char __init *pci_mmcfg_intel_945(void) | |||
108 | if ((pciexbar & mask) & 0x0fffffffU) | 113 | if ((pciexbar & mask) & 0x0fffffffU) |
109 | pci_mmcfg_config_num = 0; | 114 | pci_mmcfg_config_num = 0; |
110 | 115 | ||
116 | /* Don't hit the APIC registers and their friends */ | ||
117 | if ((pciexbar & mask) >= 0xf0000000U) | ||
118 | pci_mmcfg_config_num = 0; | ||
119 | |||
111 | if (pci_mmcfg_config_num) { | 120 | if (pci_mmcfg_config_num) { |
112 | pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); | 121 | pci_mmcfg_config = kzalloc(sizeof(pci_mmcfg_config[0]), GFP_KERNEL); |
113 | if (!pci_mmcfg_config) | 122 | if (!pci_mmcfg_config) |
diff --git a/arch/i386/power/cpu.c b/arch/i386/power/cpu.c index 2c15500f871..998fd3ec0d6 100644 --- a/arch/i386/power/cpu.c +++ b/arch/i386/power/cpu.c | |||
@@ -21,6 +21,7 @@ unsigned long saved_context_eflags; | |||
21 | 21 | ||
22 | void __save_processor_state(struct saved_context *ctxt) | 22 | void __save_processor_state(struct saved_context *ctxt) |
23 | { | 23 | { |
24 | mtrr_save_fixed_ranges(NULL); | ||
24 | kernel_fpu_begin(); | 25 | kernel_fpu_begin(); |
25 | 26 | ||
26 | /* | 27 | /* |
diff --git a/arch/i386/power/suspend.c b/arch/i386/power/suspend.c index db5e98d2eb7..a0020b913f3 100644 --- a/arch/i386/power/suspend.c +++ b/arch/i386/power/suspend.c | |||
@@ -16,6 +16,9 @@ | |||
16 | /* Defined in arch/i386/power/swsusp.S */ | 16 | /* Defined in arch/i386/power/swsusp.S */ |
17 | extern int restore_image(void); | 17 | extern int restore_image(void); |
18 | 18 | ||
19 | /* References to section boundaries */ | ||
20 | extern const void __nosave_begin, __nosave_end; | ||
21 | |||
19 | /* Pointer to the temporary resume page tables */ | 22 | /* Pointer to the temporary resume page tables */ |
20 | pgd_t *resume_pg_dir; | 23 | pgd_t *resume_pg_dir; |
21 | 24 | ||
@@ -156,3 +159,14 @@ int swsusp_arch_resume(void) | |||
156 | restore_image(); | 159 | restore_image(); |
157 | return 0; | 160 | return 0; |
158 | } | 161 | } |
162 | |||
163 | /* | ||
164 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | ||
165 | */ | ||
166 | |||
167 | int pfn_is_nosave(unsigned long pfn) | ||
168 | { | ||
169 | unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; | ||
170 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; | ||
171 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | ||
172 | } | ||
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S index 439cc257cd1..6c73bca3f47 100644 --- a/arch/m32r/kernel/vmlinux.lds.S +++ b/arch/m32r/kernel/vmlinux.lds.S | |||
@@ -110,7 +110,7 @@ SECTIONS | |||
110 | __initramfs_end = .; | 110 | __initramfs_end = .; |
111 | #endif | 111 | #endif |
112 | 112 | ||
113 | . = ALIGN(32); | 113 | . = ALIGN(4096); |
114 | __per_cpu_start = .; | 114 | __per_cpu_start = .; |
115 | .data.percpu : { *(.data.percpu) } | 115 | .data.percpu : { *(.data.percpu) } |
116 | __per_cpu_end = .; | 116 | __per_cpu_end = .; |
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index c76b793310c..043f637e3d1 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S | |||
@@ -119,7 +119,7 @@ SECTIONS | |||
119 | .init.ramfs : { *(.init.ramfs) } | 119 | .init.ramfs : { *(.init.ramfs) } |
120 | __initramfs_end = .; | 120 | __initramfs_end = .; |
121 | #endif | 121 | #endif |
122 | . = ALIGN(32); | 122 | . = ALIGN(_PAGE_SIZE); |
123 | __per_cpu_start = .; | 123 | __per_cpu_start = .; |
124 | .data.percpu : { *(.data.percpu) } | 124 | .data.percpu : { *(.data.percpu) } |
125 | __per_cpu_end = .; | 125 | __per_cpu_end = .; |
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 2a8253358c6..c7458599059 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S | |||
@@ -181,7 +181,7 @@ SECTIONS | |||
181 | .init.ramfs : { *(.init.ramfs) } | 181 | .init.ramfs : { *(.init.ramfs) } |
182 | __initramfs_end = .; | 182 | __initramfs_end = .; |
183 | #endif | 183 | #endif |
184 | . = ALIGN(32); | 184 | . = ALIGN(ASM_PAGE_SIZE); |
185 | __per_cpu_start = .; | 185 | __per_cpu_start = .; |
186 | .data.percpu : { *(.data.percpu) } | 186 | .data.percpu : { *(.data.percpu) } |
187 | __per_cpu_end = .; | 187 | __per_cpu_end = .; |
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index e0fa80eca36..aa693d0f151 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile | |||
@@ -37,6 +37,7 @@ obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | |||
37 | obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o | 37 | obj-$(CONFIG_6xx) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o |
38 | obj-$(CONFIG_TAU) += tau_6xx.o | 38 | obj-$(CONFIG_TAU) += tau_6xx.o |
39 | obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o | 39 | obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o |
40 | obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o | ||
40 | obj32-$(CONFIG_MODULES) += module_32.o | 41 | obj32-$(CONFIG_MODULES) += module_32.o |
41 | 42 | ||
42 | ifeq ($(CONFIG_PPC_MERGE),y) | 43 | ifeq ($(CONFIG_PPC_MERGE),y) |
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 22083ce3cc3..6018178708a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c | |||
@@ -582,14 +582,14 @@ void __init setup_per_cpu_areas(void) | |||
582 | char *ptr; | 582 | char *ptr; |
583 | 583 | ||
584 | /* Copy section for each CPU (we discard the original) */ | 584 | /* Copy section for each CPU (we discard the original) */ |
585 | size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); | 585 | size = ALIGN(__per_cpu_end - __per_cpu_start, PAGE_SIZE); |
586 | #ifdef CONFIG_MODULES | 586 | #ifdef CONFIG_MODULES |
587 | if (size < PERCPU_ENOUGH_ROOM) | 587 | if (size < PERCPU_ENOUGH_ROOM) |
588 | size = PERCPU_ENOUGH_ROOM; | 588 | size = PERCPU_ENOUGH_ROOM; |
589 | #endif | 589 | #endif |
590 | 590 | ||
591 | for_each_possible_cpu(i) { | 591 | for_each_possible_cpu(i) { |
592 | ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); | 592 | ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); |
593 | if (!ptr) | 593 | if (!ptr) |
594 | panic("Cannot allocate cpu data for CPU %d\n", i); | 594 | panic("Cannot allocate cpu data for CPU %d\n", i); |
595 | 595 | ||
diff --git a/arch/powerpc/kernel/suspend.c b/arch/powerpc/kernel/suspend.c new file mode 100644 index 00000000000..8cee5710754 --- /dev/null +++ b/arch/powerpc/kernel/suspend.c | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * Suspend support specific for power. | ||
3 | * | ||
4 | * Distribute under GPLv2 | ||
5 | * | ||
6 | * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> | ||
7 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> | ||
8 | */ | ||
9 | |||
10 | #include <asm/page.h> | ||
11 | |||
12 | /* References to section boundaries */ | ||
13 | extern const void __nosave_begin, __nosave_end; | ||
14 | |||
15 | /* | ||
16 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | ||
17 | */ | ||
18 | |||
19 | int pfn_is_nosave(unsigned long pfn) | ||
20 | { | ||
21 | unsigned long nosave_begin_pfn = __pa(&__nosave_begin) >> PAGE_SHIFT; | ||
22 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa(&__nosave_end)) >> PAGE_SHIFT; | ||
23 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | ||
24 | } | ||
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7eefeb4a30e..13206731314 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S | |||
@@ -139,11 +139,7 @@ SECTIONS | |||
139 | __initramfs_end = .; | 139 | __initramfs_end = .; |
140 | } | 140 | } |
141 | #endif | 141 | #endif |
142 | #ifdef CONFIG_PPC32 | 142 | . = ALIGN(PAGE_SIZE); |
143 | . = ALIGN(32); | ||
144 | #else | ||
145 | . = ALIGN(128); | ||
146 | #endif | ||
147 | .data.percpu : { | 143 | .data.percpu : { |
148 | __per_cpu_start = .; | 144 | __per_cpu_start = .; |
149 | *(.data.percpu) | 145 | *(.data.percpu) |
diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S index a0625562a44..44cd128fb71 100644 --- a/arch/ppc/kernel/vmlinux.lds.S +++ b/arch/ppc/kernel/vmlinux.lds.S | |||
@@ -130,7 +130,7 @@ SECTIONS | |||
130 | __ftr_fixup : { *(__ftr_fixup) } | 130 | __ftr_fixup : { *(__ftr_fixup) } |
131 | __stop___ftr_fixup = .; | 131 | __stop___ftr_fixup = .; |
132 | 132 | ||
133 | . = ALIGN(32); | 133 | . = ALIGN(4096); |
134 | __per_cpu_start = .; | 134 | __per_cpu_start = .; |
135 | .data.percpu : { *(.data.percpu) } | 135 | .data.percpu : { *(.data.percpu) } |
136 | __per_cpu_end = .; | 136 | __per_cpu_end = .; |
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 418f6426a94..e9d3432aba6 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S | |||
@@ -107,7 +107,7 @@ SECTIONS | |||
107 | . = ALIGN(2); | 107 | . = ALIGN(2); |
108 | __initramfs_end = .; | 108 | __initramfs_end = .; |
109 | #endif | 109 | #endif |
110 | . = ALIGN(256); | 110 | . = ALIGN(4096); |
111 | __per_cpu_start = .; | 111 | __per_cpu_start = .; |
112 | .data.percpu : { *(.data.percpu) } | 112 | .data.percpu : { *(.data.percpu) } |
113 | __per_cpu_end = .; | 113 | __per_cpu_end = .; |
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S index 78a6c09875b..2f606d0ce1f 100644 --- a/arch/sh/kernel/vmlinux.lds.S +++ b/arch/sh/kernel/vmlinux.lds.S | |||
@@ -54,7 +54,7 @@ SECTIONS | |||
54 | . = ALIGN(PAGE_SIZE); | 54 | . = ALIGN(PAGE_SIZE); |
55 | .data.page_aligned : { *(.data.page_aligned) } | 55 | .data.page_aligned : { *(.data.page_aligned) } |
56 | 56 | ||
57 | . = ALIGN(L1_CACHE_BYTES); | 57 | . = ALIGN(PAGE_SIZE); |
58 | __per_cpu_start = .; | 58 | __per_cpu_start = .; |
59 | .data.percpu : { *(.data.percpu) } | 59 | .data.percpu : { *(.data.percpu) } |
60 | __per_cpu_end = .; | 60 | __per_cpu_end = .; |
diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S index a59c5e99813..4f9616f3983 100644 --- a/arch/sh64/kernel/vmlinux.lds.S +++ b/arch/sh64/kernel/vmlinux.lds.S | |||
@@ -85,7 +85,7 @@ SECTIONS | |||
85 | . = ALIGN(PAGE_SIZE); | 85 | . = ALIGN(PAGE_SIZE); |
86 | .data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) } | 86 | .data.page_aligned : C_PHYS(.data.page_aligned) { *(.data.page_aligned) } |
87 | 87 | ||
88 | . = ALIGN(L1_CACHE_BYTES); | 88 | . = ALIGN(PAGE_SIZE); |
89 | __per_cpu_start = .; | 89 | __per_cpu_start = .; |
90 | .data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) } | 90 | .data.percpu : C_PHYS(.data.percpu) { *(.data.percpu) } |
91 | __per_cpu_end = . ; | 91 | __per_cpu_end = . ; |
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S index e5c24e0521d..f0bb6e60e62 100644 --- a/arch/sparc/kernel/vmlinux.lds.S +++ b/arch/sparc/kernel/vmlinux.lds.S | |||
@@ -65,7 +65,7 @@ SECTIONS | |||
65 | __initramfs_end = .; | 65 | __initramfs_end = .; |
66 | #endif | 66 | #endif |
67 | 67 | ||
68 | . = ALIGN(32); | 68 | . = ALIGN(4096); |
69 | __per_cpu_start = .; | 69 | __per_cpu_start = .; |
70 | .data.percpu : { *(.data.percpu) } | 70 | .data.percpu : { *(.data.percpu) } |
71 | __per_cpu_end = .; | 71 | __per_cpu_end = .; |
diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c index d4f0a70f484..1fac215252e 100644 --- a/arch/sparc64/kernel/smp.c +++ b/arch/sparc64/kernel/smp.c | |||
@@ -1343,11 +1343,11 @@ void __init setup_per_cpu_areas(void) | |||
1343 | /* Copy section for each CPU (we discard the original) */ | 1343 | /* Copy section for each CPU (we discard the original) */ |
1344 | goal = PERCPU_ENOUGH_ROOM; | 1344 | goal = PERCPU_ENOUGH_ROOM; |
1345 | 1345 | ||
1346 | __per_cpu_shift = 0; | 1346 | __per_cpu_shift = PAGE_SHIFT; |
1347 | for (size = 1UL; size < goal; size <<= 1UL) | 1347 | for (size = PAGE_SIZE; size < goal; size <<= 1UL) |
1348 | __per_cpu_shift++; | 1348 | __per_cpu_shift++; |
1349 | 1349 | ||
1350 | ptr = alloc_bootmem(size * NR_CPUS); | 1350 | ptr = alloc_bootmem_pages(size * NR_CPUS); |
1351 | 1351 | ||
1352 | __per_cpu_base = ptr - __per_cpu_start; | 1352 | __per_cpu_base = ptr - __per_cpu_start; |
1353 | 1353 | ||
diff --git a/arch/um/defconfig b/arch/um/defconfig index 780cc0a4a12..f938fa82214 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig | |||
@@ -41,6 +41,7 @@ CONFIG_M686=y | |||
41 | # CONFIG_MGEODE_LX is not set | 41 | # CONFIG_MGEODE_LX is not set |
42 | # CONFIG_MCYRIXIII is not set | 42 | # CONFIG_MCYRIXIII is not set |
43 | # CONFIG_MVIAC3_2 is not set | 43 | # CONFIG_MVIAC3_2 is not set |
44 | # CONFIG_MVIAC7 is not set | ||
44 | # CONFIG_X86_GENERIC is not set | 45 | # CONFIG_X86_GENERIC is not set |
45 | CONFIG_X86_CMPXCHG=y | 46 | CONFIG_X86_CMPXCHG=y |
46 | CONFIG_X86_XADD=y | 47 | CONFIG_X86_XADD=y |
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index e9b4f058a49..145bb824b2a 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -415,13 +415,13 @@ config OUT_OF_LINE_PFN_TO_PAGE | |||
415 | depends on DISCONTIGMEM | 415 | depends on DISCONTIGMEM |
416 | 416 | ||
417 | config NR_CPUS | 417 | config NR_CPUS |
418 | int "Maximum number of CPUs (2-256)" | 418 | int "Maximum number of CPUs (2-255)" |
419 | range 2 255 | 419 | range 2 255 |
420 | depends on SMP | 420 | depends on SMP |
421 | default "8" | 421 | default "8" |
422 | help | 422 | help |
423 | This allows you to specify the maximum number of CPUs which this | 423 | This allows you to specify the maximum number of CPUs which this |
424 | kernel will support. Current maximum is 256 CPUs due to | 424 | kernel will support. Current maximum is 255 CPUs due to |
425 | APIC addressing limits. Less depending on the hardware. | 425 | APIC addressing limits. Less depending on the hardware. |
426 | 426 | ||
427 | This is purely to save memory - each supported CPU requires | 427 | This is purely to save memory - each supported CPU requires |
@@ -565,23 +565,56 @@ config CRASH_DUMP | |||
565 | PHYSICAL_START. | 565 | PHYSICAL_START. |
566 | For more details see Documentation/kdump/kdump.txt | 566 | For more details see Documentation/kdump/kdump.txt |
567 | 567 | ||
568 | config RELOCATABLE | ||
569 | bool "Build a relocatable kernel(EXPERIMENTAL)" | ||
570 | depends on EXPERIMENTAL | ||
571 | help | ||
572 | Builds a relocatable kernel. This enables loading and running | ||
573 | a kernel binary from a different physical address than it has | ||
574 | been compiled for. | ||
575 | |||
576 | One use is for the kexec on panic case where the recovery kernel | ||
577 | must live at a different physical address than the primary | ||
578 | kernel. | ||
579 | |||
580 | Note: If CONFIG_RELOCATABLE=y, then kernel run from the address | ||
581 | it has been loaded at and compile time physical address | ||
582 | (CONFIG_PHYSICAL_START) is ignored. | ||
583 | |||
568 | config PHYSICAL_START | 584 | config PHYSICAL_START |
569 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) | 585 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) |
570 | default "0x1000000" if CRASH_DUMP | ||
571 | default "0x200000" | 586 | default "0x200000" |
572 | help | 587 | help |
573 | This gives the physical address where the kernel is loaded. Normally | 588 | This gives the physical address where the kernel is loaded. It |
574 | for regular kernels this value is 0x200000 (2MB). But in the case | 589 | should be aligned to 2MB boundary. |
575 | of kexec on panic the fail safe kernel needs to run at a different | 590 | |
576 | address than the panic-ed kernel. This option is used to set the load | 591 | If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then |
577 | address for kernels used to capture crash dump on being kexec'ed | 592 | bzImage will decompress itself to above physical address and |
578 | after panic. The default value for crash dump kernels is | 593 | run from there. Otherwise, bzImage will run from the address where |
579 | 0x1000000 (16MB). This can also be set based on the "X" value as | 594 | it has been loaded by the boot loader and will ignore above physical |
595 | address. | ||
596 | |||
597 | In normal kdump cases one does not have to set/change this option | ||
598 | as now bzImage can be compiled as a completely relocatable image | ||
599 | (CONFIG_RELOCATABLE=y) and be used to load and run from a different | ||
600 | address. This option is mainly useful for the folks who don't want | ||
601 | to use a bzImage for capturing the crash dump and want to use a | ||
602 | vmlinux instead. | ||
603 | |||
604 | So if you are using bzImage for capturing the crash dump, leave | ||
605 | the value here unchanged to 0x200000 and set CONFIG_RELOCATABLE=y. | ||
606 | Otherwise if you plan to use vmlinux for capturing the crash dump | ||
607 | change this value to start of the reserved region (Typically 16MB | ||
608 | 0x1000000). In other words, it can be set based on the "X" value as | ||
580 | specified in the "crashkernel=YM@XM" command line boot parameter | 609 | specified in the "crashkernel=YM@XM" command line boot parameter |
581 | passed to the panic-ed kernel. Typically this parameter is set as | 610 | passed to the panic-ed kernel. Typically this parameter is set as |
582 | crashkernel=64M@16M. Please take a look at | 611 | crashkernel=64M@16M. Please take a look at |
583 | Documentation/kdump/kdump.txt for more details about crash dumps. | 612 | Documentation/kdump/kdump.txt for more details about crash dumps. |
584 | 613 | ||
614 | Usage of bzImage for capturing the crash dump is advantageous as | ||
615 | one does not have to build two kernels. Same kernel can be used | ||
616 | as production kernel and capture kernel. | ||
617 | |||
585 | Don't change this unless you know what you are doing. | 618 | Don't change this unless you know what you are doing. |
586 | 619 | ||
587 | config SECCOMP | 620 | config SECCOMP |
@@ -627,14 +660,6 @@ config CC_STACKPROTECTOR_ALL | |||
627 | 660 | ||
628 | source kernel/Kconfig.hz | 661 | source kernel/Kconfig.hz |
629 | 662 | ||
630 | config REORDER | ||
631 | bool "Function reordering" | ||
632 | default n | ||
633 | help | ||
634 | This option enables the toolchain to reorder functions for a more | ||
635 | optimal TLB usage. If you have pretty much any version of binutils, | ||
636 | this can increase your kernel build time by roughly one minute. | ||
637 | |||
638 | config K8_NB | 663 | config K8_NB |
639 | def_bool y | 664 | def_bool y |
640 | depends on AGP_AMD64 || IOMMU || (PCI && NUMA) | 665 | depends on AGP_AMD64 || IOMMU || (PCI && NUMA) |
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index 2941a915d4e..29617ae3926 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile | |||
@@ -40,10 +40,6 @@ cflags-y += -m64 | |||
40 | cflags-y += -mno-red-zone | 40 | cflags-y += -mno-red-zone |
41 | cflags-y += -mcmodel=kernel | 41 | cflags-y += -mcmodel=kernel |
42 | cflags-y += -pipe | 42 | cflags-y += -pipe |
43 | cflags-kernel-$(CONFIG_REORDER) += -ffunction-sections | ||
44 | # this makes reading assembly source easier, but produces worse code | ||
45 | # actually it makes the kernel smaller too. | ||
46 | cflags-y += -fno-reorder-blocks | ||
47 | cflags-y += -Wno-sign-compare | 43 | cflags-y += -Wno-sign-compare |
48 | cflags-y += -fno-asynchronous-unwind-tables | 44 | cflags-y += -fno-asynchronous-unwind-tables |
49 | ifneq ($(CONFIG_DEBUG_INFO),y) | 45 | ifneq ($(CONFIG_DEBUG_INFO),y) |
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile index deb063e7762..ee6f6505f95 100644 --- a/arch/x86_64/boot/Makefile +++ b/arch/x86_64/boot/Makefile | |||
@@ -36,7 +36,7 @@ subdir- := compressed/ #Let make clean descend in compressed/ | |||
36 | # --------------------------------------------------------------------------- | 36 | # --------------------------------------------------------------------------- |
37 | 37 | ||
38 | $(obj)/bzImage: IMAGE_OFFSET := 0x100000 | 38 | $(obj)/bzImage: IMAGE_OFFSET := 0x100000 |
39 | $(obj)/bzImage: EXTRA_AFLAGS := -traditional $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ | 39 | $(obj)/bzImage: EXTRA_AFLAGS := $(SVGA_MODE) $(RAMDISK) -D__BIG_KERNEL__ |
40 | $(obj)/bzImage: BUILDFLAGS := -b | 40 | $(obj)/bzImage: BUILDFLAGS := -b |
41 | 41 | ||
42 | quiet_cmd_image = BUILD $@ | 42 | quiet_cmd_image = BUILD $@ |
diff --git a/arch/x86_64/boot/compressed/Makefile b/arch/x86_64/boot/compressed/Makefile index e70fa6e1da0..705a3e33d7e 100644 --- a/arch/x86_64/boot/compressed/Makefile +++ b/arch/x86_64/boot/compressed/Makefile | |||
@@ -8,16 +8,14 @@ | |||
8 | 8 | ||
9 | targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o | 9 | targets := vmlinux vmlinux.bin vmlinux.bin.gz head.o misc.o piggy.o |
10 | EXTRA_AFLAGS := -traditional | 10 | EXTRA_AFLAGS := -traditional |
11 | AFLAGS := $(subst -m64,-m32,$(AFLAGS)) | ||
12 | 11 | ||
13 | # cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with | 12 | # cannot use EXTRA_CFLAGS because base CFLAGS contains -mkernel which conflicts with |
14 | # -m32 | 13 | # -m32 |
15 | CFLAGS := -m32 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing | 14 | CFLAGS := -m64 -D__KERNEL__ -Iinclude -O2 -fno-strict-aliasing -fPIC -mcmodel=small -fno-builtin |
16 | LDFLAGS := -m elf_i386 | 15 | LDFLAGS := -m elf_x86_64 |
17 | 16 | ||
18 | LDFLAGS_vmlinux := -Ttext $(IMAGE_OFFSET) -e startup_32 -m elf_i386 | 17 | LDFLAGS_vmlinux := -T |
19 | 18 | $(obj)/vmlinux: $(src)/vmlinux.lds $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE | |
20 | $(obj)/vmlinux: $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o FORCE | ||
21 | $(call if_changed,ld) | 19 | $(call if_changed,ld) |
22 | @: | 20 | @: |
23 | 21 | ||
@@ -27,7 +25,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE | |||
27 | $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE | 25 | $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE |
28 | $(call if_changed,gzip) | 26 | $(call if_changed,gzip) |
29 | 27 | ||
30 | LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T | 28 | LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T |
31 | 29 | ||
32 | $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE | 30 | $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.gz FORCE |
33 | $(call if_changed,ld) | 31 | $(call if_changed,ld) |
diff --git a/arch/x86_64/boot/compressed/head.S b/arch/x86_64/boot/compressed/head.S index 6f55565e4d4..f9d5692a010 100644 --- a/arch/x86_64/boot/compressed/head.S +++ b/arch/x86_64/boot/compressed/head.S | |||
@@ -26,116 +26,279 @@ | |||
26 | 26 | ||
27 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
28 | #include <asm/segment.h> | 28 | #include <asm/segment.h> |
29 | #include <asm/pgtable.h> | ||
29 | #include <asm/page.h> | 30 | #include <asm/page.h> |
31 | #include <asm/msr.h> | ||
30 | 32 | ||
33 | .section ".text.head" | ||
31 | .code32 | 34 | .code32 |
32 | .globl startup_32 | 35 | .globl startup_32 |
33 | 36 | ||
34 | startup_32: | 37 | startup_32: |
35 | cld | 38 | cld |
36 | cli | 39 | cli |
37 | movl $(__KERNEL_DS),%eax | 40 | movl $(__KERNEL_DS), %eax |
38 | movl %eax,%ds | 41 | movl %eax, %ds |
39 | movl %eax,%es | 42 | movl %eax, %es |
40 | movl %eax,%fs | 43 | movl %eax, %ss |
41 | movl %eax,%gs | 44 | |
42 | 45 | /* Calculate the delta between where we were compiled to run | |
43 | lss stack_start,%esp | 46 | * at and where we were actually loaded at. This can only be done |
44 | xorl %eax,%eax | 47 | * with a short local call on x86. Nothing else will tell us what |
45 | 1: incl %eax # check that A20 really IS enabled | 48 | * address we are running at. The reserved chunk of the real-mode |
46 | movl %eax,0x000000 # loop forever if it isn't | 49 | * data at 0x34-0x3f are used as the stack for this calculation. |
47 | cmpl %eax,0x100000 | 50 | * Only 4 bytes are needed. |
48 | je 1b | 51 | */ |
52 | leal 0x40(%esi), %esp | ||
53 | call 1f | ||
54 | 1: popl %ebp | ||
55 | subl $1b, %ebp | ||
56 | |||
57 | /* setup a stack and make sure cpu supports long mode. */ | ||
58 | movl $user_stack_end, %eax | ||
59 | addl %ebp, %eax | ||
60 | movl %eax, %esp | ||
61 | |||
62 | call verify_cpu | ||
63 | testl %eax, %eax | ||
64 | jnz no_longmode | ||
65 | |||
66 | /* Compute the delta between where we were compiled to run at | ||
67 | * and where the code will actually run at. | ||
68 | */ | ||
69 | /* %ebp contains the address we are loaded at by the boot loader and %ebx | ||
70 | * contains the address where we should move the kernel image temporarily | ||
71 | * for safe in-place decompression. | ||
72 | */ | ||
73 | |||
74 | #ifdef CONFIG_RELOCATABLE | ||
75 | movl %ebp, %ebx | ||
76 | addl $(LARGE_PAGE_SIZE -1), %ebx | ||
77 | andl $LARGE_PAGE_MASK, %ebx | ||
78 | #else | ||
79 | movl $CONFIG_PHYSICAL_START, %ebx | ||
80 | #endif | ||
81 | |||
82 | /* Replace the compressed data size with the uncompressed size */ | ||
83 | subl input_len(%ebp), %ebx | ||
84 | movl output_len(%ebp), %eax | ||
85 | addl %eax, %ebx | ||
86 | /* Add 8 bytes for every 32K input block */ | ||
87 | shrl $12, %eax | ||
88 | addl %eax, %ebx | ||
89 | /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ | ||
90 | addl $(32768 + 18 + 4095), %ebx | ||
91 | andl $~4095, %ebx | ||
49 | 92 | ||
50 | /* | 93 | /* |
51 | * Initialize eflags. Some BIOS's leave bits like NT set. This would | 94 | * Prepare for entering 64 bit mode |
52 | * confuse the debugger if this code is traced. | ||
53 | * XXX - best to initialize before switching to protected mode. | ||
54 | */ | 95 | */ |
55 | pushl $0 | 96 | |
56 | popfl | 97 | /* Load new GDT with the 64bit segments using 32bit descriptor */ |
98 | leal gdt(%ebp), %eax | ||
99 | movl %eax, gdt+2(%ebp) | ||
100 | lgdt gdt(%ebp) | ||
101 | |||
102 | /* Enable PAE mode */ | ||
103 | xorl %eax, %eax | ||
104 | orl $(1 << 5), %eax | ||
105 | movl %eax, %cr4 | ||
106 | |||
107 | /* | ||
108 | * Build early 4G boot pagetable | ||
109 | */ | ||
110 | /* Initialize Page tables to 0*/ | ||
111 | leal pgtable(%ebx), %edi | ||
112 | xorl %eax, %eax | ||
113 | movl $((4096*6)/4), %ecx | ||
114 | rep stosl | ||
115 | |||
116 | /* Build Level 4 */ | ||
117 | leal pgtable + 0(%ebx), %edi | ||
118 | leal 0x1007 (%edi), %eax | ||
119 | movl %eax, 0(%edi) | ||
120 | |||
121 | /* Build Level 3 */ | ||
122 | leal pgtable + 0x1000(%ebx), %edi | ||
123 | leal 0x1007(%edi), %eax | ||
124 | movl $4, %ecx | ||
125 | 1: movl %eax, 0x00(%edi) | ||
126 | addl $0x00001000, %eax | ||
127 | addl $8, %edi | ||
128 | decl %ecx | ||
129 | jnz 1b | ||
130 | |||
131 | /* Build Level 2 */ | ||
132 | leal pgtable + 0x2000(%ebx), %edi | ||
133 | movl $0x00000183, %eax | ||
134 | movl $2048, %ecx | ||
135 | 1: movl %eax, 0(%edi) | ||
136 | addl $0x00200000, %eax | ||
137 | addl $8, %edi | ||
138 | decl %ecx | ||
139 | jnz 1b | ||
140 | |||
141 | /* Enable the boot page tables */ | ||
142 | leal pgtable(%ebx), %eax | ||
143 | movl %eax, %cr3 | ||
144 | |||
145 | /* Enable Long mode in EFER (Extended Feature Enable Register) */ | ||
146 | movl $MSR_EFER, %ecx | ||
147 | rdmsr | ||
148 | btsl $_EFER_LME, %eax | ||
149 | wrmsr | ||
150 | |||
151 | /* Setup for the jump to 64bit mode | ||
152 | * | ||
153 | * When the jump is performend we will be in long mode but | ||
154 | * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 | ||
155 | * (and in turn EFER.LMA = 1). To jump into 64bit mode we use | ||
156 | * the new gdt/idt that has __KERNEL_CS with CS.L = 1. | ||
157 | * We place all of the values on our mini stack so lret can | ||
158 | * used to perform that far jump. | ||
159 | */ | ||
160 | pushl $__KERNEL_CS | ||
161 | leal startup_64(%ebp), %eax | ||
162 | pushl %eax | ||
163 | |||
164 | /* Enter paged protected Mode, activating Long Mode */ | ||
165 | movl $0x80000001, %eax /* Enable Paging and Protected mode */ | ||
166 | movl %eax, %cr0 | ||
167 | |||
168 | /* Jump from 32bit compatibility mode into 64bit mode. */ | ||
169 | lret | ||
170 | |||
171 | no_longmode: | ||
172 | /* This isn't an x86-64 CPU so hang */ | ||
173 | 1: | ||
174 | hlt | ||
175 | jmp 1b | ||
176 | |||
177 | #include "../../kernel/verify_cpu.S" | ||
178 | |||
179 | /* Be careful here startup_64 needs to be at a predictable | ||
180 | * address so I can export it in an ELF header. Bootloaders | ||
181 | * should look at the ELF header to find this address, as | ||
182 | * it may change in the future. | ||
183 | */ | ||
184 | .code64 | ||
185 | .org 0x200 | ||
186 | ENTRY(startup_64) | ||
187 | /* We come here either from startup_32 or directly from a | ||
188 | * 64bit bootloader. If we come here from a bootloader we depend on | ||
189 | * an identity mapped page table being provied that maps our | ||
190 | * entire text+data+bss and hopefully all of memory. | ||
191 | */ | ||
192 | |||
193 | /* Setup data segments. */ | ||
194 | xorl %eax, %eax | ||
195 | movl %eax, %ds | ||
196 | movl %eax, %es | ||
197 | movl %eax, %ss | ||
198 | |||
199 | /* Compute the decompressed kernel start address. It is where | ||
200 | * we were loaded at aligned to a 2M boundary. %rbp contains the | ||
201 | * decompressed kernel start address. | ||
202 | * | ||
203 | * If it is a relocatable kernel then decompress and run the kernel | ||
204 | * from load address aligned to 2MB addr, otherwise decompress and | ||
205 | * run the kernel from CONFIG_PHYSICAL_START | ||
206 | */ | ||
207 | |||
208 | /* Start with the delta to where the kernel will run at. */ | ||
209 | #ifdef CONFIG_RELOCATABLE | ||
210 | leaq startup_32(%rip) /* - $startup_32 */, %rbp | ||
211 | addq $(LARGE_PAGE_SIZE - 1), %rbp | ||
212 | andq $LARGE_PAGE_MASK, %rbp | ||
213 | movq %rbp, %rbx | ||
214 | #else | ||
215 | movq $CONFIG_PHYSICAL_START, %rbp | ||
216 | movq %rbp, %rbx | ||
217 | #endif | ||
218 | |||
219 | /* Replace the compressed data size with the uncompressed size */ | ||
220 | movl input_len(%rip), %eax | ||
221 | subq %rax, %rbx | ||
222 | movl output_len(%rip), %eax | ||
223 | addq %rax, %rbx | ||
224 | /* Add 8 bytes for every 32K input block */ | ||
225 | shrq $12, %rax | ||
226 | addq %rax, %rbx | ||
227 | /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ | ||
228 | addq $(32768 + 18 + 4095), %rbx | ||
229 | andq $~4095, %rbx | ||
230 | |||
231 | /* Copy the compressed kernel to the end of our buffer | ||
232 | * where decompression in place becomes safe. | ||
233 | */ | ||
234 | leaq _end(%rip), %r8 | ||
235 | leaq _end(%rbx), %r9 | ||
236 | movq $_end /* - $startup_32 */, %rcx | ||
237 | 1: subq $8, %r8 | ||
238 | subq $8, %r9 | ||
239 | movq 0(%r8), %rax | ||
240 | movq %rax, 0(%r9) | ||
241 | subq $8, %rcx | ||
242 | jnz 1b | ||
243 | |||
244 | /* | ||
245 | * Jump to the relocated address. | ||
246 | */ | ||
247 | leaq relocated(%rbx), %rax | ||
248 | jmp *%rax | ||
249 | |||
250 | .section ".text" | ||
251 | relocated: | ||
252 | |||
57 | /* | 253 | /* |
58 | * Clear BSS | 254 | * Clear BSS |
59 | */ | 255 | */ |
60 | xorl %eax,%eax | 256 | xorq %rax, %rax |
61 | movl $_edata,%edi | 257 | leaq _edata(%rbx), %rdi |
62 | movl $_end,%ecx | 258 | leaq _end(%rbx), %rcx |
63 | subl %edi,%ecx | 259 | subq %rdi, %rcx |
64 | cld | 260 | cld |
65 | rep | 261 | rep |
66 | stosb | 262 | stosb |
263 | |||
264 | /* Setup the stack */ | ||
265 | leaq user_stack_end(%rip), %rsp | ||
266 | |||
267 | /* zero EFLAGS after setting rsp */ | ||
268 | pushq $0 | ||
269 | popfq | ||
270 | |||
67 | /* | 271 | /* |
68 | * Do the decompression, and jump to the new kernel.. | 272 | * Do the decompression, and jump to the new kernel.. |
69 | */ | 273 | */ |
70 | subl $16,%esp # place for structure on the stack | 274 | pushq %rsi # Save the real mode argument |
71 | movl %esp,%eax | 275 | movq %rsi, %rdi # real mode address |
72 | pushl %esi # real mode pointer as second arg | 276 | leaq _heap(%rip), %rsi # _heap |
73 | pushl %eax # address of structure as first arg | 277 | leaq input_data(%rip), %rdx # input_data |
74 | call decompress_kernel | 278 | movl input_len(%rip), %eax |
75 | orl %eax,%eax | 279 | movq %rax, %rcx # input_len |
76 | jnz 3f | 280 | movq %rbp, %r8 # output |
77 | addl $8,%esp | 281 | call decompress_kernel |
78 | xorl %ebx,%ebx | 282 | popq %rsi |
79 | ljmp $(__KERNEL_CS), $__PHYSICAL_START | ||
80 | 283 | ||
81 | /* | ||
82 | * We come here, if we were loaded high. | ||
83 | * We need to move the move-in-place routine down to 0x1000 | ||
84 | * and then start it with the buffer addresses in registers, | ||
85 | * which we got from the stack. | ||
86 | */ | ||
87 | 3: | ||
88 | movl %esi,%ebx | ||
89 | movl $move_routine_start,%esi | ||
90 | movl $0x1000,%edi | ||
91 | movl $move_routine_end,%ecx | ||
92 | subl %esi,%ecx | ||
93 | addl $3,%ecx | ||
94 | shrl $2,%ecx | ||
95 | cld | ||
96 | rep | ||
97 | movsl | ||
98 | |||
99 | popl %esi # discard the address | ||
100 | addl $4,%esp # real mode pointer | ||
101 | popl %esi # low_buffer_start | ||
102 | popl %ecx # lcount | ||
103 | popl %edx # high_buffer_start | ||
104 | popl %eax # hcount | ||
105 | movl $__PHYSICAL_START,%edi | ||
106 | cli # make sure we don't get interrupted | ||
107 | ljmp $(__KERNEL_CS), $0x1000 # and jump to the move routine | ||
108 | 284 | ||
109 | /* | 285 | /* |
110 | * Routine (template) for moving the decompressed kernel in place, | 286 | * Jump to the decompressed kernel. |
111 | * if we were high loaded. This _must_ PIC-code ! | ||
112 | */ | 287 | */ |
113 | move_routine_start: | 288 | jmp *%rbp |
114 | movl %ecx,%ebp | ||
115 | shrl $2,%ecx | ||
116 | rep | ||
117 | movsl | ||
118 | movl %ebp,%ecx | ||
119 | andl $3,%ecx | ||
120 | rep | ||
121 | movsb | ||
122 | movl %edx,%esi | ||
123 | movl %eax,%ecx # NOTE: rep movsb won't move if %ecx == 0 | ||
124 | addl $3,%ecx | ||
125 | shrl $2,%ecx | ||
126 | rep | ||
127 | movsl | ||
128 | movl %ebx,%esi # Restore setup pointer | ||
129 | xorl %ebx,%ebx | ||
130 | ljmp $(__KERNEL_CS), $__PHYSICAL_START | ||
131 | move_routine_end: | ||
132 | 289 | ||
133 | 290 | .data | |
134 | /* Stack for uncompression */ | 291 | gdt: |
135 | .align 32 | 292 | .word gdt_end - gdt |
136 | user_stack: | 293 | .long gdt |
294 | .word 0 | ||
295 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
296 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | ||
297 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | ||
298 | gdt_end: | ||
299 | .bss | ||
300 | /* Stack for uncompression */ | ||
301 | .balign 4 | ||
302 | user_stack: | ||
137 | .fill 4096,4,0 | 303 | .fill 4096,4,0 |
138 | stack_start: | 304 | user_stack_end: |
139 | .long user_stack+4096 | ||
140 | .word __KERNEL_DS | ||
141 | |||
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c index 3755b2e394d..f932b0e8909 100644 --- a/arch/x86_64/boot/compressed/misc.c +++ b/arch/x86_64/boot/compressed/misc.c | |||
@@ -9,10 +9,95 @@ | |||
9 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 | 9 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #define _LINUX_STRING_H_ 1 | ||
13 | #define __LINUX_BITMAP_H 1 | ||
14 | |||
15 | #include <linux/linkage.h> | ||
12 | #include <linux/screen_info.h> | 16 | #include <linux/screen_info.h> |
13 | #include <asm/io.h> | 17 | #include <asm/io.h> |
14 | #include <asm/page.h> | 18 | #include <asm/page.h> |
15 | 19 | ||
20 | /* WARNING!! | ||
21 | * This code is compiled with -fPIC and it is relocated dynamically | ||
22 | * at run time, but no relocation processing is performed. | ||
23 | * This means that it is not safe to place pointers in static structures. | ||
24 | */ | ||
25 | |||
26 | /* | ||
27 | * Getting to provable safe in place decompression is hard. | ||
28 | * Worst case behaviours need to be analized. | ||
29 | * Background information: | ||
30 | * | ||
31 | * The file layout is: | ||
32 | * magic[2] | ||
33 | * method[1] | ||
34 | * flags[1] | ||
35 | * timestamp[4] | ||
36 | * extraflags[1] | ||
37 | * os[1] | ||
38 | * compressed data blocks[N] | ||
39 | * crc[4] orig_len[4] | ||
40 | * | ||
41 | * resulting in 18 bytes of non compressed data overhead. | ||
42 | * | ||
43 | * Files divided into blocks | ||
44 | * 1 bit (last block flag) | ||
45 | * 2 bits (block type) | ||
46 | * | ||
47 | * 1 block occurs every 32K -1 bytes or when there 50% compression has been achieved. | ||
48 | * The smallest block type encoding is always used. | ||
49 | * | ||
50 | * stored: | ||
51 | * 32 bits length in bytes. | ||
52 | * | ||
53 | * fixed: | ||
54 | * magic fixed tree. | ||
55 | * symbols. | ||
56 | * | ||
57 | * dynamic: | ||
58 | * dynamic tree encoding. | ||
59 | * symbols. | ||
60 | * | ||
61 | * | ||
62 | * The buffer for decompression in place is the length of the | ||
63 | * uncompressed data, plus a small amount extra to keep the algorithm safe. | ||
64 | * The compressed data is placed at the end of the buffer. The output | ||
65 | * pointer is placed at the start of the buffer and the input pointer | ||
66 | * is placed where the compressed data starts. Problems will occur | ||
67 | * when the output pointer overruns the input pointer. | ||
68 | * | ||
69 | * The output pointer can only overrun the input pointer if the input | ||
70 | * pointer is moving faster than the output pointer. A condition only | ||
71 | * triggered by data whose compressed form is larger than the uncompressed | ||
72 | * form. | ||
73 | * | ||
74 | * The worst case at the block level is a growth of the compressed data | ||
75 | * of 5 bytes per 32767 bytes. | ||
76 | * | ||
77 | * The worst case internal to a compressed block is very hard to figure. | ||
78 | * The worst case can at least be boundined by having one bit that represents | ||
79 | * 32764 bytes and then all of the rest of the bytes representing the very | ||
80 | * very last byte. | ||
81 | * | ||
82 | * All of which is enough to compute an amount of extra data that is required | ||
83 | * to be safe. To avoid problems at the block level allocating 5 extra bytes | ||
84 | * per 32767 bytes of data is sufficient. To avoind problems internal to a block | ||
85 | * adding an extra 32767 bytes (the worst case uncompressed block size) is | ||
86 | * sufficient, to ensure that in the worst case the decompressed data for | ||
87 | * block will stop the byte before the compressed data for a block begins. | ||
88 | * To avoid problems with the compressed data's meta information an extra 18 | ||
89 | * bytes are needed. Leading to the formula: | ||
90 | * | ||
91 | * extra_bytes = (uncompressed_size >> 12) + 32768 + 18 + decompressor_size. | ||
92 | * | ||
93 | * Adding 8 bytes per 32K is a bit excessive but much easier to calculate. | ||
94 | * Adding 32768 instead of 32767 just makes for round numbers. | ||
95 | * Adding the decompressor_size is necessary as it musht live after all | ||
96 | * of the data as well. Last I measured the decompressor is about 14K. | ||
97 | * 10K of actuall data and 4K of bss. | ||
98 | * | ||
99 | */ | ||
100 | |||
16 | /* | 101 | /* |
17 | * gzip declarations | 102 | * gzip declarations |
18 | */ | 103 | */ |
@@ -28,15 +113,20 @@ typedef unsigned char uch; | |||
28 | typedef unsigned short ush; | 113 | typedef unsigned short ush; |
29 | typedef unsigned long ulg; | 114 | typedef unsigned long ulg; |
30 | 115 | ||
31 | #define WSIZE 0x8000 /* Window size must be at least 32k, */ | 116 | #define WSIZE 0x80000000 /* Window size must be at least 32k, |
32 | /* and a power of two */ | 117 | * and a power of two |
118 | * We don't actually have a window just | ||
119 | * a huge output buffer so I report | ||
120 | * a 2G windows size, as that should | ||
121 | * always be larger than our output buffer. | ||
122 | */ | ||
33 | 123 | ||
34 | static uch *inbuf; /* input buffer */ | 124 | static uch *inbuf; /* input buffer */ |
35 | static uch window[WSIZE]; /* Sliding window buffer */ | 125 | static uch *window; /* Sliding window buffer, (and final output buffer) */ |
36 | 126 | ||
37 | static unsigned insize = 0; /* valid bytes in inbuf */ | 127 | static unsigned insize; /* valid bytes in inbuf */ |
38 | static unsigned inptr = 0; /* index of next byte to be processed in inbuf */ | 128 | static unsigned inptr; /* index of next byte to be processed in inbuf */ |
39 | static unsigned outcnt = 0; /* bytes in output buffer */ | 129 | static unsigned outcnt; /* bytes in output buffer */ |
40 | 130 | ||
41 | /* gzip flag byte */ | 131 | /* gzip flag byte */ |
42 | #define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ | 132 | #define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */ |
@@ -87,8 +177,6 @@ extern unsigned char input_data[]; | |||
87 | extern int input_len; | 177 | extern int input_len; |
88 | 178 | ||
89 | static long bytes_out = 0; | 179 | static long bytes_out = 0; |
90 | static uch *output_data; | ||
91 | static unsigned long output_ptr = 0; | ||
92 | 180 | ||
93 | static void *malloc(int size); | 181 | static void *malloc(int size); |
94 | static void free(void *where); | 182 | static void free(void *where); |
@@ -98,17 +186,10 @@ static void *memcpy(void *dest, const void *src, unsigned n); | |||
98 | 186 | ||
99 | static void putstr(const char *); | 187 | static void putstr(const char *); |
100 | 188 | ||
101 | extern int end; | 189 | static long free_mem_ptr; |
102 | static long free_mem_ptr = (long)&end; | ||
103 | static long free_mem_end_ptr; | 190 | static long free_mem_end_ptr; |
104 | 191 | ||
105 | #define INPLACE_MOVE_ROUTINE 0x1000 | 192 | #define HEAP_SIZE 0x7000 |
106 | #define LOW_BUFFER_START 0x2000 | ||
107 | #define LOW_BUFFER_MAX 0x90000 | ||
108 | #define HEAP_SIZE 0x3000 | ||
109 | static unsigned int low_buffer_end, low_buffer_size; | ||
110 | static int high_loaded =0; | ||
111 | static uch *high_buffer_start /* = (uch *)(((ulg)&end) + HEAP_SIZE)*/; | ||
112 | 193 | ||
113 | static char *vidmem = (char *)0xb8000; | 194 | static char *vidmem = (char *)0xb8000; |
114 | static int vidport; | 195 | static int vidport; |
@@ -218,58 +299,31 @@ static void* memcpy(void* dest, const void* src, unsigned n) | |||
218 | */ | 299 | */ |
219 | static int fill_inbuf(void) | 300 | static int fill_inbuf(void) |
220 | { | 301 | { |
221 | if (insize != 0) { | 302 | error("ran out of input data"); |
222 | error("ran out of input data"); | 303 | return 0; |
223 | } | ||
224 | |||
225 | inbuf = input_data; | ||
226 | insize = input_len; | ||
227 | inptr = 1; | ||
228 | return inbuf[0]; | ||
229 | } | 304 | } |
230 | 305 | ||
231 | /* =========================================================================== | 306 | /* =========================================================================== |
232 | * Write the output window window[0..outcnt-1] and update crc and bytes_out. | 307 | * Write the output window window[0..outcnt-1] and update crc and bytes_out. |
233 | * (Used for the decompressed data only.) | 308 | * (Used for the decompressed data only.) |
234 | */ | 309 | */ |
235 | static void flush_window_low(void) | ||
236 | { | ||
237 | ulg c = crc; /* temporary variable */ | ||
238 | unsigned n; | ||
239 | uch *in, *out, ch; | ||
240 | |||
241 | in = window; | ||
242 | out = &output_data[output_ptr]; | ||
243 | for (n = 0; n < outcnt; n++) { | ||
244 | ch = *out++ = *in++; | ||
245 | c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); | ||
246 | } | ||
247 | crc = c; | ||
248 | bytes_out += (ulg)outcnt; | ||
249 | output_ptr += (ulg)outcnt; | ||
250 | outcnt = 0; | ||
251 | } | ||
252 | |||
253 | static void flush_window_high(void) | ||
254 | { | ||
255 | ulg c = crc; /* temporary variable */ | ||
256 | unsigned n; | ||
257 | uch *in, ch; | ||
258 | in = window; | ||
259 | for (n = 0; n < outcnt; n++) { | ||
260 | ch = *output_data++ = *in++; | ||
261 | if ((ulg)output_data == low_buffer_end) output_data=high_buffer_start; | ||
262 | c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); | ||
263 | } | ||
264 | crc = c; | ||
265 | bytes_out += (ulg)outcnt; | ||
266 | outcnt = 0; | ||
267 | } | ||
268 | |||
269 | static void flush_window(void) | 310 | static void flush_window(void) |
270 | { | 311 | { |
271 | if (high_loaded) flush_window_high(); | 312 | /* With my window equal to my output buffer |
272 | else flush_window_low(); | 313 | * I only need to compute the crc here. |
314 | */ | ||
315 | ulg c = crc; /* temporary variable */ | ||
316 | unsigned n; | ||
317 | uch *in, ch; | ||
318 | |||
319 | in = window; | ||
320 | for (n = 0; n < outcnt; n++) { | ||
321 | ch = *in++; | ||
322 | c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8); | ||
323 | } | ||
324 | crc = c; | ||
325 | bytes_out += (ulg)outcnt; | ||
326 | outcnt = 0; | ||
273 | } | 327 | } |
274 | 328 | ||
275 | static void error(char *x) | 329 | static void error(char *x) |
@@ -281,57 +335,8 @@ static void error(char *x) | |||
281 | while(1); /* Halt */ | 335 | while(1); /* Halt */ |
282 | } | 336 | } |
283 | 337 | ||
284 | static void setup_normal_output_buffer(void) | 338 | asmlinkage void decompress_kernel(void *rmode, unsigned long heap, |
285 | { | 339 | uch *input_data, unsigned long input_len, uch *output) |
286 | #ifdef STANDARD_MEMORY_BIOS_CALL | ||
287 | if (RM_EXT_MEM_K < 1024) error("Less than 2MB of memory"); | ||
288 | #else | ||
289 | if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < 1024) error("Less than 2MB of memory"); | ||
290 | #endif | ||
291 | output_data = (unsigned char *)__PHYSICAL_START; /* Normally Points to 1M */ | ||
292 | free_mem_end_ptr = (long)real_mode; | ||
293 | } | ||
294 | |||
295 | struct moveparams { | ||
296 | uch *low_buffer_start; int lcount; | ||
297 | uch *high_buffer_start; int hcount; | ||
298 | }; | ||
299 | |||
300 | static void setup_output_buffer_if_we_run_high(struct moveparams *mv) | ||
301 | { | ||
302 | high_buffer_start = (uch *)(((ulg)&end) + HEAP_SIZE); | ||
303 | #ifdef STANDARD_MEMORY_BIOS_CALL | ||
304 | if (RM_EXT_MEM_K < (3*1024)) error("Less than 4MB of memory"); | ||
305 | #else | ||
306 | if ((RM_ALT_MEM_K > RM_EXT_MEM_K ? RM_ALT_MEM_K : RM_EXT_MEM_K) < (3*1024)) error("Less than 4MB of memory"); | ||
307 | #endif | ||
308 | mv->low_buffer_start = output_data = (unsigned char *)LOW_BUFFER_START; | ||
309 | low_buffer_end = ((unsigned int)real_mode > LOW_BUFFER_MAX | ||
310 | ? LOW_BUFFER_MAX : (unsigned int)real_mode) & ~0xfff; | ||
311 | low_buffer_size = low_buffer_end - LOW_BUFFER_START; | ||
312 | high_loaded = 1; | ||
313 | free_mem_end_ptr = (long)high_buffer_start; | ||
314 | if ( (__PHYSICAL_START + low_buffer_size) > ((ulg)high_buffer_start)) { | ||
315 | high_buffer_start = (uch *)(__PHYSICAL_START + low_buffer_size); | ||
316 | mv->hcount = 0; /* say: we need not to move high_buffer */ | ||
317 | } | ||
318 | else mv->hcount = -1; | ||
319 | mv->high_buffer_start = high_buffer_start; | ||
320 | } | ||
321 | |||
322 | static void close_output_buffer_if_we_run_high(struct moveparams *mv) | ||
323 | { | ||
324 | if (bytes_out > low_buffer_size) { | ||
325 | mv->lcount = low_buffer_size; | ||
326 | if (mv->hcount) | ||
327 | mv->hcount = bytes_out - low_buffer_size; | ||
328 | } else { | ||
329 | mv->lcount = bytes_out; | ||
330 | mv->hcount = 0; | ||
331 | } | ||
332 | } | ||
333 | |||
334 | int decompress_kernel(struct moveparams *mv, void *rmode) | ||
335 | { | 340 | { |
336 | real_mode = rmode; | 341 | real_mode = rmode; |
337 | 342 | ||
@@ -346,13 +351,21 @@ int decompress_kernel(struct moveparams *mv, void *rmode) | |||
346 | lines = RM_SCREEN_INFO.orig_video_lines; | 351 | lines = RM_SCREEN_INFO.orig_video_lines; |
347 | cols = RM_SCREEN_INFO.orig_video_cols; | 352 | cols = RM_SCREEN_INFO.orig_video_cols; |
348 | 353 | ||
349 | if (free_mem_ptr < 0x100000) setup_normal_output_buffer(); | 354 | window = output; /* Output buffer (Normally at 1M) */ |
350 | else setup_output_buffer_if_we_run_high(mv); | 355 | free_mem_ptr = heap; /* Heap */ |
356 | free_mem_end_ptr = heap + HEAP_SIZE; | ||
357 | inbuf = input_data; /* Input buffer */ | ||
358 | insize = input_len; | ||
359 | inptr = 0; | ||
360 | |||
361 | if ((ulg)output & (__KERNEL_ALIGN - 1)) | ||
362 | error("Destination address not 2M aligned"); | ||
363 | if ((ulg)output >= 0xffffffffffUL) | ||
364 | error("Destination address too large"); | ||
351 | 365 | ||
352 | makecrc(); | 366 | makecrc(); |
353 | putstr(".\nDecompressing Linux..."); | 367 | putstr(".\nDecompressing Linux..."); |
354 | gunzip(); | 368 | gunzip(); |
355 | putstr("done.\nBooting the kernel.\n"); | 369 | putstr("done.\nBooting the kernel.\n"); |
356 | if (high_loaded) close_output_buffer_if_we_run_high(mv); | 370 | return; |
357 | return high_loaded; | ||
358 | } | 371 | } |
diff --git a/arch/x86_64/boot/compressed/vmlinux.lds b/arch/x86_64/boot/compressed/vmlinux.lds new file mode 100644 index 00000000000..94c13e557fb --- /dev/null +++ b/arch/x86_64/boot/compressed/vmlinux.lds | |||
@@ -0,0 +1,44 @@ | |||
1 | OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") | ||
2 | OUTPUT_ARCH(i386:x86-64) | ||
3 | ENTRY(startup_64) | ||
4 | SECTIONS | ||
5 | { | ||
6 | /* Be careful parts of head.S assume startup_32 is at | ||
7 | * address 0. | ||
8 | */ | ||
9 | . = 0; | ||
10 | .text : { | ||
11 | _head = . ; | ||
12 | *(.text.head) | ||
13 | _ehead = . ; | ||
14 | *(.text.compressed) | ||
15 | _text = .; /* Text */ | ||
16 | *(.text) | ||
17 | *(.text.*) | ||
18 | _etext = . ; | ||
19 | } | ||
20 | .rodata : { | ||
21 | _rodata = . ; | ||
22 | *(.rodata) /* read-only data */ | ||
23 | *(.rodata.*) | ||
24 | _erodata = . ; | ||
25 | } | ||
26 | .data : { | ||
27 | _data = . ; | ||
28 | *(.data) | ||
29 | *(.data.*) | ||
30 | _edata = . ; | ||
31 | } | ||
32 | .bss : { | ||
33 | _bss = . ; | ||
34 | *(.bss) | ||
35 | *(.bss.*) | ||
36 | *(COMMON) | ||
37 | . = ALIGN(8); | ||
38 | _end = . ; | ||
39 | . = ALIGN(4096); | ||
40 | pgtable = . ; | ||
41 | . = . + 4096 * 6; | ||
42 | _heap = .; | ||
43 | } | ||
44 | } | ||
diff --git a/arch/x86_64/boot/compressed/vmlinux.scr b/arch/x86_64/boot/compressed/vmlinux.scr index 1ed9d791f86..bd1429ce193 100644 --- a/arch/x86_64/boot/compressed/vmlinux.scr +++ b/arch/x86_64/boot/compressed/vmlinux.scr | |||
@@ -1,9 +1,10 @@ | |||
1 | SECTIONS | 1 | SECTIONS |
2 | { | 2 | { |
3 | .data : { | 3 | .text.compressed : { |
4 | input_len = .; | 4 | input_len = .; |
5 | LONG(input_data_end - input_data) input_data = .; | 5 | LONG(input_data_end - input_data) input_data = .; |
6 | *(.data) | 6 | *(.data) |
7 | input_data_end = .; | 7 | output_len = . - 4; |
8 | input_data_end = .; | ||
8 | } | 9 | } |
9 | } | 10 | } |
diff --git a/arch/x86_64/boot/setup.S b/arch/x86_64/boot/setup.S index 770940cc010..e9e33f94969 100644 --- a/arch/x86_64/boot/setup.S +++ b/arch/x86_64/boot/setup.S | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/boot.h> | 51 | #include <asm/boot.h> |
52 | #include <asm/e820.h> | 52 | #include <asm/e820.h> |
53 | #include <asm/page.h> | 53 | #include <asm/page.h> |
54 | #include <asm/setup.h> | ||
54 | 55 | ||
55 | /* Signature words to ensure LILO loaded us right */ | 56 | /* Signature words to ensure LILO loaded us right */ |
56 | #define SIG1 0xAA55 | 57 | #define SIG1 0xAA55 |
@@ -80,7 +81,7 @@ start: | |||
80 | # This is the setup header, and it must start at %cs:2 (old 0x9020:2) | 81 | # This is the setup header, and it must start at %cs:2 (old 0x9020:2) |
81 | 82 | ||
82 | .ascii "HdrS" # header signature | 83 | .ascii "HdrS" # header signature |
83 | .word 0x0204 # header version number (>= 0x0105) | 84 | .word 0x0206 # header version number (>= 0x0105) |
84 | # or else old loadlin-1.5 will fail) | 85 | # or else old loadlin-1.5 will fail) |
85 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 86 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
86 | start_sys_seg: .word SYSSEG | 87 | start_sys_seg: .word SYSSEG |
@@ -155,7 +156,20 @@ cmd_line_ptr: .long 0 # (Header version 0x0202 or later) | |||
155 | # low memory 0x10000 or higher. | 156 | # low memory 0x10000 or higher. |
156 | 157 | ||
157 | ramdisk_max: .long 0xffffffff | 158 | ramdisk_max: .long 0xffffffff |
158 | 159 | kernel_alignment: .long 0x200000 # physical addr alignment required for | |
160 | # protected mode relocatable kernel | ||
161 | #ifdef CONFIG_RELOCATABLE | ||
162 | relocatable_kernel: .byte 1 | ||
163 | #else | ||
164 | relocatable_kernel: .byte 0 | ||
165 | #endif | ||
166 | pad2: .byte 0 | ||
167 | pad3: .word 0 | ||
168 | |||
169 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | ||
170 | #added with boot protocol | ||
171 | #version 2.06 | ||
172 | |||
159 | trampoline: call start_of_setup | 173 | trampoline: call start_of_setup |
160 | .align 16 | 174 | .align 16 |
161 | # The offset at this point is 0x240 | 175 | # The offset at this point is 0x240 |
@@ -290,64 +304,10 @@ loader_ok: | |||
290 | movw %cs,%ax | 304 | movw %cs,%ax |
291 | movw %ax,%ds | 305 | movw %ax,%ds |
292 | 306 | ||
293 | /* minimum CPUID flags for x86-64 */ | 307 | call verify_cpu |
294 | /* see http://www.x86-64.org/lists/discuss/msg02971.html */ | 308 | testl %eax,%eax |
295 | #define SSE_MASK ((1<<25)|(1<<26)) | 309 | jz sse_ok |
296 | #define REQUIRED_MASK1 ((1<<0)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<8)|\ | 310 | |
297 | (1<<13)|(1<<15)|(1<<24)) | ||
298 | #define REQUIRED_MASK2 (1<<29) | ||
299 | |||
300 | pushfl /* standard way to check for cpuid */ | ||
301 | popl %eax | ||
302 | movl %eax,%ebx | ||
303 | xorl $0x200000,%eax | ||
304 | pushl %eax | ||
305 | popfl | ||
306 | pushfl | ||
307 | popl %eax | ||
308 | cmpl %eax,%ebx | ||
309 | jz no_longmode /* cpu has no cpuid */ | ||
310 | movl $0x0,%eax | ||
311 | cpuid | ||
312 | cmpl $0x1,%eax | ||
313 | jb no_longmode /* no cpuid 1 */ | ||
314 | xor %di,%di | ||
315 | cmpl $0x68747541,%ebx /* AuthenticAMD */ | ||
316 | jnz noamd | ||
317 | cmpl $0x69746e65,%edx | ||
318 | jnz noamd | ||
319 | cmpl $0x444d4163,%ecx | ||
320 | jnz noamd | ||
321 | mov $1,%di /* cpu is from AMD */ | ||
322 | noamd: | ||
323 | movl $0x1,%eax | ||
324 | cpuid | ||
325 | andl $REQUIRED_MASK1,%edx | ||
326 | xorl $REQUIRED_MASK1,%edx | ||
327 | jnz no_longmode | ||
328 | movl $0x80000000,%eax | ||
329 | cpuid | ||
330 | cmpl $0x80000001,%eax | ||
331 | jb no_longmode /* no extended cpuid */ | ||
332 | movl $0x80000001,%eax | ||
333 | cpuid | ||
334 | andl $REQUIRED_MASK2,%edx | ||
335 | xorl $REQUIRED_MASK2,%edx | ||
336 | jnz no_longmode | ||
337 | sse_test: | ||
338 | movl $1,%eax | ||
339 | cpuid | ||
340 | andl $SSE_MASK,%edx | ||
341 | cmpl $SSE_MASK,%edx | ||
342 | je sse_ok | ||
343 | test %di,%di | ||
344 | jz no_longmode /* only try to force SSE on AMD */ | ||
345 | movl $0xc0010015,%ecx /* HWCR */ | ||
346 | rdmsr | ||
347 | btr $15,%eax /* enable SSE */ | ||
348 | wrmsr | ||
349 | xor %di,%di /* don't loop */ | ||
350 | jmp sse_test /* try again */ | ||
351 | no_longmode: | 311 | no_longmode: |
352 | call beep | 312 | call beep |
353 | lea long_mode_panic,%si | 313 | lea long_mode_panic,%si |
@@ -357,7 +317,8 @@ no_longmode_loop: | |||
357 | long_mode_panic: | 317 | long_mode_panic: |
358 | .string "Your CPU does not support long mode. Use a 32bit distribution." | 318 | .string "Your CPU does not support long mode. Use a 32bit distribution." |
359 | .byte 0 | 319 | .byte 0 |
360 | 320 | ||
321 | #include "../kernel/verify_cpu.S" | ||
361 | sse_ok: | 322 | sse_ok: |
362 | popw %ds | 323 | popw %ds |
363 | 324 | ||
@@ -846,7 +807,7 @@ gdt_48: | |||
846 | 807 | ||
847 | # Include video setup & detection code | 808 | # Include video setup & detection code |
848 | 809 | ||
849 | #include "video.S" | 810 | #include "../../i386/boot/video.S" |
850 | 811 | ||
851 | # Setup signature -- must be last | 812 | # Setup signature -- must be last |
852 | setup_sig1: .word SIG1 | 813 | setup_sig1: .word SIG1 |
diff --git a/arch/x86_64/boot/video.S b/arch/x86_64/boot/video.S deleted file mode 100644 index 6090516c9c7..00000000000 --- a/arch/x86_64/boot/video.S +++ /dev/null | |||
@@ -1,2043 +0,0 @@ | |||
1 | /* video.S | ||
2 | * | ||
3 | * Display adapter & video mode setup, version 2.13 (14-May-99) | ||
4 | * | ||
5 | * Copyright (C) 1995 -- 1998 Martin Mares <mj@ucw.cz> | ||
6 | * Based on the original setup.S code (C) Linus Torvalds and Mats Anderson | ||
7 | * | ||
8 | * Rewritten to use GNU 'as' by Chris Noe <stiker@northlink.com> May 1999 | ||
9 | * | ||
10 | * For further information, look at Documentation/svga.txt. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | /* Enable autodetection of SVGA adapters and modes. */ | ||
15 | #undef CONFIG_VIDEO_SVGA | ||
16 | |||
17 | /* Enable autodetection of VESA modes */ | ||
18 | #define CONFIG_VIDEO_VESA | ||
19 | |||
20 | /* Enable compacting of mode table */ | ||
21 | #define CONFIG_VIDEO_COMPACT | ||
22 | |||
23 | /* Retain screen contents when switching modes */ | ||
24 | #define CONFIG_VIDEO_RETAIN | ||
25 | |||
26 | /* Enable local mode list */ | ||
27 | #undef CONFIG_VIDEO_LOCAL | ||
28 | |||
29 | /* Force 400 scan lines for standard modes (hack to fix bad BIOS behaviour */ | ||
30 | #undef CONFIG_VIDEO_400_HACK | ||
31 | |||
32 | /* Hack that lets you force specific BIOS mode ID and specific dimensions */ | ||
33 | #undef CONFIG_VIDEO_GFX_HACK | ||
34 | #define VIDEO_GFX_BIOS_AX 0x4f02 /* 800x600 on ThinkPad */ | ||
35 | #define VIDEO_GFX_BIOS_BX 0x0102 | ||
36 | #define VIDEO_GFX_DUMMY_RESOLUTION 0x6425 /* 100x37 */ | ||
37 | |||
38 | /* This code uses an extended set of video mode numbers. These include: | ||
39 | * Aliases for standard modes | ||
40 | * NORMAL_VGA (-1) | ||
41 | * EXTENDED_VGA (-2) | ||
42 | * ASK_VGA (-3) | ||
43 | * Video modes numbered by menu position -- NOT RECOMMENDED because of lack | ||
44 | * of compatibility when extending the table. These are between 0x00 and 0xff. | ||
45 | */ | ||
46 | #define VIDEO_FIRST_MENU 0x0000 | ||
47 | |||
48 | /* Standard BIOS video modes (BIOS number + 0x0100) */ | ||
49 | #define VIDEO_FIRST_BIOS 0x0100 | ||
50 | |||
51 | /* VESA BIOS video modes (VESA number + 0x0200) */ | ||
52 | #define VIDEO_FIRST_VESA 0x0200 | ||
53 | |||
54 | /* Video7 special modes (BIOS number + 0x0900) */ | ||
55 | #define VIDEO_FIRST_V7 0x0900 | ||
56 | |||
57 | /* Special video modes */ | ||
58 | #define VIDEO_FIRST_SPECIAL 0x0f00 | ||
59 | #define VIDEO_80x25 0x0f00 | ||
60 | #define VIDEO_8POINT 0x0f01 | ||
61 | #define VIDEO_80x43 0x0f02 | ||
62 | #define VIDEO_80x28 0x0f03 | ||
63 | #define VIDEO_CURRENT_MODE 0x0f04 | ||
64 | #define VIDEO_80x30 0x0f05 | ||
65 | #define VIDEO_80x34 0x0f06 | ||
66 | #define VIDEO_80x60 0x0f07 | ||
67 | #define VIDEO_GFX_HACK 0x0f08 | ||
68 | #define VIDEO_LAST_SPECIAL 0x0f09 | ||
69 | |||
70 | /* Video modes given by resolution */ | ||
71 | #define VIDEO_FIRST_RESOLUTION 0x1000 | ||
72 | |||
73 | /* The "recalculate timings" flag */ | ||
74 | #define VIDEO_RECALC 0x8000 | ||
75 | |||
76 | /* Positions of various video parameters passed to the kernel */ | ||
77 | /* (see also include/linux/tty.h) */ | ||
78 | #define PARAM_CURSOR_POS 0x00 | ||
79 | #define PARAM_VIDEO_PAGE 0x04 | ||
80 | #define PARAM_VIDEO_MODE 0x06 | ||
81 | #define PARAM_VIDEO_COLS 0x07 | ||
82 | #define PARAM_VIDEO_EGA_BX 0x0a | ||
83 | #define PARAM_VIDEO_LINES 0x0e | ||
84 | #define PARAM_HAVE_VGA 0x0f | ||
85 | #define PARAM_FONT_POINTS 0x10 | ||
86 | |||
87 | #define PARAM_LFB_WIDTH 0x12 | ||
88 | #define PARAM_LFB_HEIGHT 0x14 | ||
89 | #define PARAM_LFB_DEPTH 0x16 | ||
90 | #define PARAM_LFB_BASE 0x18 | ||
91 | #define PARAM_LFB_SIZE 0x1c | ||
92 | #define PARAM_LFB_LINELENGTH 0x24 | ||
93 | #define PARAM_LFB_COLORS 0x26 | ||
94 | #define PARAM_VESAPM_SEG 0x2e | ||
95 | #define PARAM_VESAPM_OFF 0x30 | ||
96 | #define PARAM_LFB_PAGES 0x32 | ||
97 | #define PARAM_VESA_ATTRIB 0x34 | ||
98 | #define PARAM_CAPABILITIES 0x36 | ||
99 | |||
100 | /* Define DO_STORE according to CONFIG_VIDEO_RETAIN */ | ||
101 | #ifdef CONFIG_VIDEO_RETAIN | ||
102 | #define DO_STORE call store_screen | ||
103 | #else | ||
104 | #define DO_STORE | ||
105 | #endif /* CONFIG_VIDEO_RETAIN */ | ||
106 | |||
107 | # This is the main entry point called by setup.S | ||
108 | # %ds *must* be pointing to the bootsector | ||
109 | video: pushw %ds # We use different segments | ||
110 | pushw %ds # FS contains original DS | ||
111 | popw %fs | ||
112 | pushw %cs # DS is equal to CS | ||
113 | popw %ds | ||
114 | pushw %cs # ES is equal to CS | ||
115 | popw %es | ||
116 | xorw %ax, %ax | ||
117 | movw %ax, %gs # GS is zero | ||
118 | cld | ||
119 | call basic_detect # Basic adapter type testing (EGA/VGA/MDA/CGA) | ||
120 | #ifdef CONFIG_VIDEO_SELECT | ||
121 | movw %fs:(0x01fa), %ax # User selected video mode | ||
122 | cmpw $ASK_VGA, %ax # Bring up the menu | ||
123 | jz vid2 | ||
124 | |||
125 | call mode_set # Set the mode | ||
126 | jc vid1 | ||
127 | |||
128 | leaw badmdt, %si # Invalid mode ID | ||
129 | call prtstr | ||
130 | vid2: call mode_menu | ||
131 | vid1: | ||
132 | #ifdef CONFIG_VIDEO_RETAIN | ||
133 | call restore_screen # Restore screen contents | ||
134 | #endif /* CONFIG_VIDEO_RETAIN */ | ||
135 | call store_edid | ||
136 | #endif /* CONFIG_VIDEO_SELECT */ | ||
137 | call mode_params # Store mode parameters | ||
138 | popw %ds # Restore original DS | ||
139 | ret | ||
140 | |||
141 | # Detect if we have CGA, MDA, EGA or VGA and pass it to the kernel. | ||
142 | basic_detect: | ||
143 | movb $0, %fs:(PARAM_HAVE_VGA) | ||
144 | movb $0x12, %ah # Check EGA/VGA | ||
145 | movb $0x10, %bl | ||
146 | int $0x10 | ||
147 | movw %bx, %fs:(PARAM_VIDEO_EGA_BX) # Identifies EGA to the kernel | ||
148 | cmpb $0x10, %bl # No, it's a CGA/MDA/HGA card. | ||
149 | je basret | ||
150 | |||
151 | incb adapter | ||
152 | movw $0x1a00, %ax # Check EGA or VGA? | ||
153 | int $0x10 | ||
154 | cmpb $0x1a, %al # 1a means VGA... | ||
155 | jne basret # anything else is EGA. | ||
156 | |||
157 | incb %fs:(PARAM_HAVE_VGA) # We've detected a VGA | ||
158 | incb adapter | ||
159 | basret: ret | ||
160 | |||
161 | # Store the video mode parameters for later usage by the kernel. | ||
162 | # This is done by asking the BIOS except for the rows/columns | ||
163 | # parameters in the default 80x25 mode -- these are set directly, | ||
164 | # because some very obscure BIOSes supply insane values. | ||
165 | mode_params: | ||
166 | #ifdef CONFIG_VIDEO_SELECT | ||
167 | cmpb $0, graphic_mode | ||
168 | jnz mopar_gr | ||
169 | #endif | ||
170 | movb $0x03, %ah # Read cursor position | ||
171 | xorb %bh, %bh | ||
172 | int $0x10 | ||
173 | movw %dx, %fs:(PARAM_CURSOR_POS) | ||
174 | movb $0x0f, %ah # Read page/mode/width | ||
175 | int $0x10 | ||
176 | movw %bx, %fs:(PARAM_VIDEO_PAGE) | ||
177 | movw %ax, %fs:(PARAM_VIDEO_MODE) # Video mode and screen width | ||
178 | cmpb $0x7, %al # MDA/HGA => segment differs | ||
179 | jnz mopar0 | ||
180 | |||
181 | movw $0xb000, video_segment | ||
182 | mopar0: movw %gs:(0x485), %ax # Font size | ||
183 | movw %ax, %fs:(PARAM_FONT_POINTS) # (valid only on EGA/VGA) | ||
184 | movw force_size, %ax # Forced size? | ||
185 | orw %ax, %ax | ||
186 | jz mopar1 | ||
187 | |||
188 | movb %ah, %fs:(PARAM_VIDEO_COLS) | ||
189 | movb %al, %fs:(PARAM_VIDEO_LINES) | ||
190 | ret | ||
191 | |||
192 | mopar1: movb $25, %al | ||
193 | cmpb $0, adapter # If we are on CGA/MDA/HGA, the | ||
194 | jz mopar2 # screen must have 25 lines. | ||
195 | |||
196 | movb %gs:(0x484), %al # On EGA/VGA, use the EGA+ BIOS | ||
197 | incb %al # location of max lines. | ||
198 | mopar2: movb %al, %fs:(PARAM_VIDEO_LINES) | ||
199 | ret | ||
200 | |||
201 | #ifdef CONFIG_VIDEO_SELECT | ||
202 | # Fetching of VESA frame buffer parameters | ||
203 | mopar_gr: | ||
204 | leaw modelist+1024, %di | ||
205 | movb $0x23, %fs:(PARAM_HAVE_VGA) | ||
206 | movw 16(%di), %ax | ||
207 | movw %ax, %fs:(PARAM_LFB_LINELENGTH) | ||
208 | movw 18(%di), %ax | ||
209 | movw %ax, %fs:(PARAM_LFB_WIDTH) | ||
210 | movw 20(%di), %ax | ||
211 | movw %ax, %fs:(PARAM_LFB_HEIGHT) | ||
212 | movb 25(%di), %al | ||
213 | movb $0, %ah | ||
214 | movw %ax, %fs:(PARAM_LFB_DEPTH) | ||
215 | movb 29(%di), %al | ||
216 | movb $0, %ah | ||
217 | movw %ax, %fs:(PARAM_LFB_PAGES) | ||
218 | movl 40(%di), %eax | ||
219 | movl %eax, %fs:(PARAM_LFB_BASE) | ||
220 | movl 31(%di), %eax | ||
221 | movl %eax, %fs:(PARAM_LFB_COLORS) | ||
222 | movl 35(%di), %eax | ||
223 | movl %eax, %fs:(PARAM_LFB_COLORS+4) | ||
224 | movw 0(%di), %ax | ||
225 | movw %ax, %fs:(PARAM_VESA_ATTRIB) | ||
226 | |||
227 | # get video mem size | ||
228 | leaw modelist+1024, %di | ||
229 | movw $0x4f00, %ax | ||
230 | int $0x10 | ||
231 | xorl %eax, %eax | ||
232 | movw 18(%di), %ax | ||
233 | movl %eax, %fs:(PARAM_LFB_SIZE) | ||
234 | |||
235 | # store mode capabilities | ||
236 | movl 10(%di), %eax | ||
237 | movl %eax, %fs:(PARAM_CAPABILITIES) | ||
238 | |||
239 | # switching the DAC to 8-bit is for <= 8 bpp only | ||
240 | movw %fs:(PARAM_LFB_DEPTH), %ax | ||
241 | cmpw $8, %ax | ||
242 | jg dac_done | ||
243 | |||
244 | # get DAC switching capability | ||
245 | xorl %eax, %eax | ||
246 | movb 10(%di), %al | ||
247 | testb $1, %al | ||
248 | jz dac_set | ||
249 | |||
250 | # attempt to switch DAC to 8-bit | ||
251 | movw $0x4f08, %ax | ||
252 | movw $0x0800, %bx | ||
253 | int $0x10 | ||
254 | cmpw $0x004f, %ax | ||
255 | jne dac_set | ||
256 | movb %bh, dac_size # store actual DAC size | ||
257 | |||
258 | dac_set: | ||
259 | # set color size to DAC size | ||
260 | movb dac_size, %al | ||
261 | movb %al, %fs:(PARAM_LFB_COLORS+0) | ||
262 | movb %al, %fs:(PARAM_LFB_COLORS+2) | ||
263 | movb %al, %fs:(PARAM_LFB_COLORS+4) | ||
264 | movb %al, %fs:(PARAM_LFB_COLORS+6) | ||
265 | |||
266 | # set color offsets to 0 | ||
267 | movb $0, %fs:(PARAM_LFB_COLORS+1) | ||
268 | movb $0, %fs:(PARAM_LFB_COLORS+3) | ||
269 | movb $0, %fs:(PARAM_LFB_COLORS+5) | ||
270 | movb $0, %fs:(PARAM_LFB_COLORS+7) | ||
271 | |||
272 | dac_done: | ||
273 | # get protected mode interface informations | ||
274 | movw $0x4f0a, %ax | ||
275 | xorw %bx, %bx | ||
276 | xorw %di, %di | ||
277 | int $0x10 | ||
278 | cmp $0x004f, %ax | ||
279 | jnz no_pm | ||
280 | |||
281 | movw %es, %fs:(PARAM_VESAPM_SEG) | ||
282 | movw %di, %fs:(PARAM_VESAPM_OFF) | ||
283 | no_pm: ret | ||
284 | |||
285 | # The video mode menu | ||
286 | mode_menu: | ||
287 | leaw keymsg, %si # "Return/Space/Timeout" message | ||
288 | call prtstr | ||
289 | call flush | ||
290 | nokey: call getkt | ||
291 | |||
292 | cmpb $0x0d, %al # ENTER ? | ||
293 | je listm # yes - manual mode selection | ||
294 | |||
295 | cmpb $0x20, %al # SPACE ? | ||
296 | je defmd1 # no - repeat | ||
297 | |||
298 | call beep | ||
299 | jmp nokey | ||
300 | |||
301 | defmd1: ret # No mode chosen? Default 80x25 | ||
302 | |||
303 | listm: call mode_table # List mode table | ||
304 | listm0: leaw name_bann, %si # Print adapter name | ||
305 | call prtstr | ||
306 | movw card_name, %si | ||
307 | orw %si, %si | ||
308 | jnz an2 | ||
309 | |||
310 | movb adapter, %al | ||
311 | leaw old_name, %si | ||
312 | orb %al, %al | ||
313 | jz an1 | ||
314 | |||
315 | leaw ega_name, %si | ||
316 | decb %al | ||
317 | jz an1 | ||
318 | |||
319 | leaw vga_name, %si | ||
320 | jmp an1 | ||
321 | |||
322 | an2: call prtstr | ||
323 | leaw svga_name, %si | ||
324 | an1: call prtstr | ||
325 | leaw listhdr, %si # Table header | ||
326 | call prtstr | ||
327 | movb $0x30, %dl # DL holds mode number | ||
328 | leaw modelist, %si | ||
329 | lm1: cmpw $ASK_VGA, (%si) # End? | ||
330 | jz lm2 | ||
331 | |||
332 | movb %dl, %al # Menu selection number | ||
333 | call prtchr | ||
334 | call prtsp2 | ||
335 | lodsw | ||
336 | call prthw # Mode ID | ||
337 | call prtsp2 | ||
338 | movb 0x1(%si), %al | ||
339 | call prtdec # Rows | ||
340 | movb $0x78, %al # the letter 'x' | ||
341 | call prtchr | ||
342 | lodsw | ||
343 | call prtdec # Columns | ||
344 | movb $0x0d, %al # New line | ||
345 | call prtchr | ||
346 | movb $0x0a, %al | ||
347 | call prtchr | ||
348 | incb %dl # Next character | ||
349 | cmpb $0x3a, %dl | ||
350 | jnz lm1 | ||
351 | |||
352 | movb $0x61, %dl | ||
353 | jmp lm1 | ||
354 | |||
355 | lm2: leaw prompt, %si # Mode prompt | ||
356 | call prtstr | ||
357 | leaw edit_buf, %di # Editor buffer | ||
358 | lm3: call getkey | ||
359 | cmpb $0x0d, %al # Enter? | ||
360 | jz lment | ||
361 | |||
362 | cmpb $0x08, %al # Backspace? | ||
363 | jz lmbs | ||
364 | |||
365 | cmpb $0x20, %al # Printable? | ||
366 | jc lm3 | ||
367 | |||
368 | cmpw $edit_buf+4, %di # Enough space? | ||
369 | jz lm3 | ||
370 | |||
371 | stosb | ||
372 | call prtchr | ||
373 | jmp lm3 | ||
374 | |||
375 | lmbs: cmpw $edit_buf, %di # Backspace | ||
376 | jz lm3 | ||
377 | |||
378 | decw %di | ||
379 | movb $0x08, %al | ||
380 | call prtchr | ||
381 | call prtspc | ||
382 | movb $0x08, %al | ||
383 | call prtchr | ||
384 | jmp lm3 | ||
385 | |||
386 | lment: movb $0, (%di) | ||
387 | leaw crlft, %si | ||
388 | call prtstr | ||
389 | leaw edit_buf, %si | ||
390 | cmpb $0, (%si) # Empty string = default mode | ||
391 | jz lmdef | ||
392 | |||
393 | cmpb $0, 1(%si) # One character = menu selection | ||
394 | jz mnusel | ||
395 | |||
396 | cmpw $0x6373, (%si) # "scan" => mode scanning | ||
397 | jnz lmhx | ||
398 | |||
399 | cmpw $0x6e61, 2(%si) | ||
400 | jz lmscan | ||
401 | |||
402 | lmhx: xorw %bx, %bx # Else => mode ID in hex | ||
403 | lmhex: lodsb | ||
404 | orb %al, %al | ||
405 | jz lmuse1 | ||
406 | |||
407 | subb $0x30, %al | ||
408 | jc lmbad | ||
409 | |||
410 | cmpb $10, %al | ||
411 | jc lmhx1 | ||
412 | |||
413 | subb $7, %al | ||
414 | andb $0xdf, %al | ||
415 | cmpb $10, %al | ||
416 | jc lmbad | ||
417 | |||
418 | cmpb $16, %al | ||
419 | jnc lmbad | ||
420 | |||
421 | lmhx1: shlw $4, %bx | ||
422 | orb %al, %bl | ||
423 | jmp lmhex | ||
424 | |||
425 | lmuse1: movw %bx, %ax | ||
426 | jmp lmuse | ||
427 | |||
428 | mnusel: lodsb # Menu selection | ||
429 | xorb %ah, %ah | ||
430 | subb $0x30, %al | ||
431 | jc lmbad | ||
432 | |||
433 | cmpb $10, %al | ||
434 | jc lmuse | ||
435 | |||
436 | cmpb $0x61-0x30, %al | ||
437 | jc lmbad | ||
438 | |||
439 | subb $0x61-0x30-10, %al | ||
440 | cmpb $36, %al | ||
441 | jnc lmbad | ||
442 | |||
443 | lmuse: call mode_set | ||
444 | jc lmdef | ||
445 | |||
446 | lmbad: leaw unknt, %si | ||
447 | call prtstr | ||
448 | jmp lm2 | ||
449 | lmscan: cmpb $0, adapter # Scanning only on EGA/VGA | ||
450 | jz lmbad | ||
451 | |||
452 | movw $0, mt_end # Scanning of modes is | ||
453 | movb $1, scanning # done as new autodetection. | ||
454 | call mode_table | ||
455 | jmp listm0 | ||
456 | lmdef: ret | ||
457 | |||
458 | # Additional parts of mode_set... (relative jumps, you know) | ||
459 | setv7: # Video7 extended modes | ||
460 | DO_STORE | ||
461 | subb $VIDEO_FIRST_V7>>8, %bh | ||
462 | movw $0x6f05, %ax | ||
463 | int $0x10 | ||
464 | stc | ||
465 | ret | ||
466 | |||
467 | _setrec: jmp setrec # Ugly... | ||
468 | _set_80x25: jmp set_80x25 | ||
469 | |||
470 | # Aliases for backward compatibility. | ||
471 | setalias: | ||
472 | movw $VIDEO_80x25, %ax | ||
473 | incw %bx | ||
474 | jz mode_set | ||
475 | |||
476 | movb $VIDEO_8POINT-VIDEO_FIRST_SPECIAL, %al | ||
477 | incw %bx | ||
478 | jnz setbad # Fall-through! | ||
479 | |||
480 | # Setting of user mode (AX=mode ID) => CF=success | ||
481 | mode_set: | ||
482 | movw %ax, %fs:(0x01fa) # Store mode for use in acpi_wakeup.S | ||
483 | movw %ax, %bx | ||
484 | cmpb $0xff, %ah | ||
485 | jz setalias | ||
486 | |||
487 | testb $VIDEO_RECALC>>8, %ah | ||
488 | jnz _setrec | ||
489 | |||
490 | cmpb $VIDEO_FIRST_RESOLUTION>>8, %ah | ||
491 | jnc setres | ||
492 | |||
493 | cmpb $VIDEO_FIRST_SPECIAL>>8, %ah | ||
494 | jz setspc | ||
495 | |||
496 | cmpb $VIDEO_FIRST_V7>>8, %ah | ||
497 | jz setv7 | ||
498 | |||
499 | cmpb $VIDEO_FIRST_VESA>>8, %ah | ||
500 | jnc check_vesa | ||
501 | |||
502 | orb %ah, %ah | ||
503 | jz setmenu | ||
504 | |||
505 | decb %ah | ||
506 | jz setbios | ||
507 | |||
508 | setbad: clc | ||
509 | movb $0, do_restore # The screen needn't be restored | ||
510 | ret | ||
511 | |||
512 | setvesa: | ||
513 | DO_STORE | ||
514 | subb $VIDEO_FIRST_VESA>>8, %bh | ||
515 | movw $0x4f02, %ax # VESA BIOS mode set call | ||
516 | int $0x10 | ||
517 | cmpw $0x004f, %ax # AL=4f if implemented | ||
518 | jnz setbad # AH=0 if OK | ||
519 | |||
520 | stc | ||
521 | ret | ||
522 | |||
523 | setbios: | ||
524 | DO_STORE | ||
525 | int $0x10 # Standard BIOS mode set call | ||
526 | pushw %bx | ||
527 | movb $0x0f, %ah # Check if really set | ||
528 | int $0x10 | ||
529 | popw %bx | ||
530 | cmpb %bl, %al | ||
531 | jnz setbad | ||
532 | |||
533 | stc | ||
534 | ret | ||
535 | |||
536 | setspc: xorb %bh, %bh # Set special mode | ||
537 | cmpb $VIDEO_LAST_SPECIAL-VIDEO_FIRST_SPECIAL, %bl | ||
538 | jnc setbad | ||
539 | |||
540 | addw %bx, %bx | ||
541 | jmp *spec_inits(%bx) | ||
542 | |||
543 | setmenu: | ||
544 | orb %al, %al # 80x25 is an exception | ||
545 | jz _set_80x25 | ||
546 | |||
547 | pushw %bx # Set mode chosen from menu | ||
548 | call mode_table # Build the mode table | ||
549 | popw %ax | ||
550 | shlw $2, %ax | ||
551 | addw %ax, %si | ||
552 | cmpw %di, %si | ||
553 | jnc setbad | ||
554 | |||
555 | movw (%si), %ax # Fetch mode ID | ||
556 | _m_s: jmp mode_set | ||
557 | |||
558 | setres: pushw %bx # Set mode chosen by resolution | ||
559 | call mode_table | ||
560 | popw %bx | ||
561 | xchgb %bl, %bh | ||
562 | setr1: lodsw | ||
563 | cmpw $ASK_VGA, %ax # End of the list? | ||
564 | jz setbad | ||
565 | |||
566 | lodsw | ||
567 | cmpw %bx, %ax | ||
568 | jnz setr1 | ||
569 | |||
570 | movw -4(%si), %ax # Fetch mode ID | ||
571 | jmp _m_s | ||
572 | |||
573 | check_vesa: | ||
574 | #ifdef CONFIG_FIRMWARE_EDID | ||
575 | leaw modelist+1024, %di | ||
576 | movw $0x4f00, %ax | ||
577 | int $0x10 | ||
578 | cmpw $0x004f, %ax | ||
579 | jnz setbad | ||
580 | |||
581 | movw 4(%di), %ax | ||
582 | movw %ax, vbe_version | ||
583 | #endif | ||
584 | leaw modelist+1024, %di | ||
585 | subb $VIDEO_FIRST_VESA>>8, %bh | ||
586 | movw %bx, %cx # Get mode information structure | ||
587 | movw $0x4f01, %ax | ||
588 | int $0x10 | ||
589 | addb $VIDEO_FIRST_VESA>>8, %bh | ||
590 | cmpw $0x004f, %ax | ||
591 | jnz setbad | ||
592 | |||
593 | movb (%di), %al # Check capabilities. | ||
594 | andb $0x19, %al | ||
595 | cmpb $0x09, %al | ||
596 | jz setvesa # This is a text mode | ||
597 | |||
598 | movb (%di), %al # Check capabilities. | ||
599 | andb $0x99, %al | ||
600 | cmpb $0x99, %al | ||
601 | jnz _setbad # Doh! No linear frame buffer. | ||
602 | |||
603 | subb $VIDEO_FIRST_VESA>>8, %bh | ||
604 | orw $0x4000, %bx # Use linear frame buffer | ||
605 | movw $0x4f02, %ax # VESA BIOS mode set call | ||
606 | int $0x10 | ||
607 | cmpw $0x004f, %ax # AL=4f if implemented | ||
608 | jnz _setbad # AH=0 if OK | ||
609 | |||
610 | movb $1, graphic_mode # flag graphic mode | ||
611 | movb $0, do_restore # no screen restore | ||
612 | stc | ||
613 | ret | ||
614 | |||
615 | _setbad: jmp setbad # Ugly... | ||
616 | |||
617 | # Recalculate vertical display end registers -- this fixes various | ||
618 | # inconsistencies of extended modes on many adapters. Called when | ||
619 | # the VIDEO_RECALC flag is set in the mode ID. | ||
620 | |||
621 | setrec: subb $VIDEO_RECALC>>8, %ah # Set the base mode | ||
622 | call mode_set | ||
623 | jnc rct3 | ||
624 | |||
625 | movw %gs:(0x485), %ax # Font size in pixels | ||
626 | movb %gs:(0x484), %bl # Number of rows | ||
627 | incb %bl | ||
628 | mulb %bl # Number of visible | ||
629 | decw %ax # scan lines - 1 | ||
630 | movw $0x3d4, %dx | ||
631 | movw %ax, %bx | ||
632 | movb $0x12, %al # Lower 8 bits | ||
633 | movb %bl, %ah | ||
634 | outw %ax, %dx | ||
635 | movb $0x07, %al # Bits 8 and 9 in the overflow register | ||
636 | call inidx | ||
637 | xchgb %al, %ah | ||
638 | andb $0xbd, %ah | ||
639 | shrb %bh | ||
640 | jnc rct1 | ||
641 | orb $0x02, %ah | ||
642 | rct1: shrb %bh | ||
643 | jnc rct2 | ||
644 | orb $0x40, %ah | ||
645 | rct2: movb $0x07, %al | ||
646 | outw %ax, %dx | ||
647 | stc | ||
648 | rct3: ret | ||
649 | |||
650 | # Table of routines for setting of the special modes. | ||
651 | spec_inits: | ||
652 | .word set_80x25 | ||
653 | .word set_8pixel | ||
654 | .word set_80x43 | ||
655 | .word set_80x28 | ||
656 | .word set_current | ||
657 | .word set_80x30 | ||
658 | .word set_80x34 | ||
659 | .word set_80x60 | ||
660 | .word set_gfx | ||
661 | |||
662 | # Set the 80x25 mode. If already set, do nothing. | ||
663 | set_80x25: | ||
664 | movw $0x5019, force_size # Override possibly broken BIOS | ||
665 | use_80x25: | ||
666 | #ifdef CONFIG_VIDEO_400_HACK | ||
667 | movw $0x1202, %ax # Force 400 scan lines | ||
668 | movb $0x30, %bl | ||
669 | int $0x10 | ||
670 | #else | ||
671 | movb $0x0f, %ah # Get current mode ID | ||
672 | int $0x10 | ||
673 | cmpw $0x5007, %ax # Mode 7 (80x25 mono) is the only one available | ||
674 | jz st80 # on CGA/MDA/HGA and is also available on EGAM | ||
675 | |||
676 | cmpw $0x5003, %ax # Unknown mode, force 80x25 color | ||
677 | jnz force3 | ||
678 | |||
679 | st80: cmpb $0, adapter # CGA/MDA/HGA => mode 3/7 is always 80x25 | ||
680 | jz set80 | ||
681 | |||
682 | movb %gs:(0x0484), %al # This is EGA+ -- beware of 80x50 etc. | ||
683 | orb %al, %al # Some buggy BIOS'es set 0 rows | ||
684 | jz set80 | ||
685 | |||
686 | cmpb $24, %al # It's hopefully correct | ||
687 | jz set80 | ||
688 | #endif /* CONFIG_VIDEO_400_HACK */ | ||
689 | force3: DO_STORE | ||
690 | movw $0x0003, %ax # Forced set | ||
691 | int $0x10 | ||
692 | set80: stc | ||
693 | ret | ||
694 | |||
695 | # Set the 80x50/80x43 8-pixel mode. Simple BIOS calls. | ||
696 | set_8pixel: | ||
697 | DO_STORE | ||
698 | call use_80x25 # The base is 80x25 | ||
699 | set_8pt: | ||
700 | movw $0x1112, %ax # Use 8x8 font | ||
701 | xorb %bl, %bl | ||
702 | int $0x10 | ||
703 | movw $0x1200, %ax # Use alternate print screen | ||
704 | movb $0x20, %bl | ||
705 | int $0x10 | ||
706 | movw $0x1201, %ax # Turn off cursor emulation | ||
707 | movb $0x34, %bl | ||
708 | int $0x10 | ||
709 | movb $0x01, %ah # Define cursor scan lines 6-7 | ||
710 | movw $0x0607, %cx | ||
711 | int $0x10 | ||
712 | set_current: | ||
713 | stc | ||
714 | ret | ||
715 | |||
716 | # Set the 80x28 mode. This mode works on all VGA's, because it's a standard | ||
717 | # 80x25 mode with 14-point fonts instead of 16-point. | ||
718 | set_80x28: | ||
719 | DO_STORE | ||
720 | call use_80x25 # The base is 80x25 | ||
721 | set14: movw $0x1111, %ax # Use 9x14 font | ||
722 | xorb %bl, %bl | ||
723 | int $0x10 | ||
724 | movb $0x01, %ah # Define cursor scan lines 11-12 | ||
725 | movw $0x0b0c, %cx | ||
726 | int $0x10 | ||
727 | stc | ||
728 | ret | ||
729 | |||
730 | # Set the 80x43 mode. This mode is works on all VGA's. | ||
731 | # It's a 350-scanline mode with 8-pixel font. | ||
732 | set_80x43: | ||
733 | DO_STORE | ||
734 | movw $0x1201, %ax # Set 350 scans | ||
735 | movb $0x30, %bl | ||
736 | int $0x10 | ||
737 | movw $0x0003, %ax # Reset video mode | ||
738 | int $0x10 | ||
739 | jmp set_8pt # Use 8-pixel font | ||
740 | |||
741 | # Set the 80x30 mode (all VGA's). 480 scanlines, 16-pixel font. | ||
742 | set_80x30: | ||
743 | call use_80x25 # Start with real 80x25 | ||
744 | DO_STORE | ||
745 | movw $0x3cc, %dx # Get CRTC port | ||
746 | inb %dx, %al | ||
747 | movb $0xd4, %dl | ||
748 | rorb %al # Mono or color? | ||
749 | jc set48a | ||
750 | |||
751 | movb $0xb4, %dl | ||
752 | set48a: movw $0x0c11, %ax # Vertical sync end (also unlocks CR0-7) | ||
753 | call outidx | ||
754 | movw $0x0b06, %ax # Vertical total | ||
755 | call outidx | ||
756 | movw $0x3e07, %ax # (Vertical) overflow | ||
757 | call outidx | ||
758 | movw $0xea10, %ax # Vertical sync start | ||
759 | call outidx | ||
760 | movw $0xdf12, %ax # Vertical display end | ||
761 | call outidx | ||
762 | movw $0xe715, %ax # Vertical blank start | ||
763 | call outidx | ||
764 | movw $0x0416, %ax # Vertical blank end | ||
765 | call outidx | ||
766 | pushw %dx | ||
767 | movb $0xcc, %dl # Misc output register (read) | ||
768 | inb %dx, %al | ||
769 | movb $0xc2, %dl # (write) | ||
770 | andb $0x0d, %al # Preserve clock select bits and color bit | ||
771 | orb $0xe2, %al # Set correct sync polarity | ||
772 | outb %al, %dx | ||
773 | popw %dx | ||
774 | movw $0x501e, force_size | ||
775 | stc # That's all. | ||
776 | ret | ||
777 | |||
778 | # Set the 80x34 mode (all VGA's). 480 scans, 14-pixel font. | ||
779 | set_80x34: | ||
780 | call set_80x30 # Set 480 scans | ||
781 | call set14 # And 14-pt font | ||
782 | movw $0xdb12, %ax # VGA vertical display end | ||
783 | movw $0x5022, force_size | ||
784 | setvde: call outidx | ||
785 | stc | ||
786 | ret | ||
787 | |||
788 | # Set the 80x60 mode (all VGA's). 480 scans, 8-pixel font. | ||
789 | set_80x60: | ||
790 | call set_80x30 # Set 480 scans | ||
791 | call set_8pt # And 8-pt font | ||
792 | movw $0xdf12, %ax # VGA vertical display end | ||
793 | movw $0x503c, force_size | ||
794 | jmp setvde | ||
795 | |||
796 | # Special hack for ThinkPad graphics | ||
797 | set_gfx: | ||
798 | #ifdef CONFIG_VIDEO_GFX_HACK | ||
799 | movw $VIDEO_GFX_BIOS_AX, %ax | ||
800 | movw $VIDEO_GFX_BIOS_BX, %bx | ||
801 | int $0x10 | ||
802 | movw $VIDEO_GFX_DUMMY_RESOLUTION, force_size | ||
803 | stc | ||
804 | #endif | ||
805 | ret | ||
806 | |||
807 | #ifdef CONFIG_VIDEO_RETAIN | ||
808 | |||
809 | # Store screen contents to temporary buffer. | ||
810 | store_screen: | ||
811 | cmpb $0, do_restore # Already stored? | ||
812 | jnz stsr | ||
813 | |||
814 | testb $CAN_USE_HEAP, loadflags # Have we space for storing? | ||
815 | jz stsr | ||
816 | |||
817 | pushw %ax | ||
818 | pushw %bx | ||
819 | pushw force_size # Don't force specific size | ||
820 | movw $0, force_size | ||
821 | call mode_params # Obtain params of current mode | ||
822 | popw force_size | ||
823 | movb %fs:(PARAM_VIDEO_LINES), %ah | ||
824 | movb %fs:(PARAM_VIDEO_COLS), %al | ||
825 | movw %ax, %bx # BX=dimensions | ||
826 | mulb %ah | ||
827 | movw %ax, %cx # CX=number of characters | ||
828 | addw %ax, %ax # Calculate image size | ||
829 | addw $modelist+1024+4, %ax | ||
830 | cmpw heap_end_ptr, %ax | ||
831 | jnc sts1 # Unfortunately, out of memory | ||
832 | |||
833 | movw %fs:(PARAM_CURSOR_POS), %ax # Store mode params | ||
834 | leaw modelist+1024, %di | ||
835 | stosw | ||
836 | movw %bx, %ax | ||
837 | stosw | ||
838 | pushw %ds # Store the screen | ||
839 | movw video_segment, %ds | ||
840 | xorw %si, %si | ||
841 | rep | ||
842 | movsw | ||
843 | popw %ds | ||
844 | incb do_restore # Screen will be restored later | ||
845 | sts1: popw %bx | ||
846 | popw %ax | ||
847 | stsr: ret | ||
848 | |||
849 | # Restore screen contents from temporary buffer. | ||
850 | restore_screen: | ||
851 | cmpb $0, do_restore # Has the screen been stored? | ||
852 | jz res1 | ||
853 | |||
854 | call mode_params # Get parameters of current mode | ||
855 | movb %fs:(PARAM_VIDEO_LINES), %cl | ||
856 | movb %fs:(PARAM_VIDEO_COLS), %ch | ||
857 | leaw modelist+1024, %si # Screen buffer | ||
858 | lodsw # Set cursor position | ||
859 | movw %ax, %dx | ||
860 | cmpb %cl, %dh | ||
861 | jc res2 | ||
862 | |||
863 | movb %cl, %dh | ||
864 | decb %dh | ||
865 | res2: cmpb %ch, %dl | ||
866 | jc res3 | ||
867 | |||
868 | movb %ch, %dl | ||
869 | decb %dl | ||
870 | res3: movb $0x02, %ah | ||
871 | movb $0x00, %bh | ||
872 | int $0x10 | ||
873 | lodsw # Display size | ||
874 | movb %ah, %dl # DL=number of lines | ||
875 | movb $0, %ah # BX=phys. length of orig. line | ||
876 | movw %ax, %bx | ||
877 | cmpb %cl, %dl # Too many? | ||
878 | jc res4 | ||
879 | |||
880 | pushw %ax | ||
881 | movb %dl, %al | ||
882 | subb %cl, %al | ||
883 | mulb %bl | ||
884 | addw %ax, %si | ||
885 | addw %ax, %si | ||
886 | popw %ax | ||
887 | movb %cl, %dl | ||
888 | res4: cmpb %ch, %al # Too wide? | ||
889 | jc res5 | ||
890 | |||
891 | movb %ch, %al # AX=width of src. line | ||
892 | res5: movb $0, %cl | ||
893 | xchgb %ch, %cl | ||
894 | movw %cx, %bp # BP=width of dest. line | ||
895 | pushw %es | ||
896 | movw video_segment, %es | ||
897 | xorw %di, %di # Move the data | ||
898 | addw %bx, %bx # Convert BX and BP to _bytes_ | ||
899 | addw %bp, %bp | ||
900 | res6: pushw %si | ||
901 | pushw %di | ||
902 | movw %ax, %cx | ||
903 | rep | ||
904 | movsw | ||
905 | popw %di | ||
906 | popw %si | ||
907 | addw %bp, %di | ||
908 | addw %bx, %si | ||
909 | decb %dl | ||
910 | jnz res6 | ||
911 | |||
912 | popw %es # Done | ||
913 | res1: ret | ||
914 | #endif /* CONFIG_VIDEO_RETAIN */ | ||
915 | |||
916 | # Write to indexed VGA register (AL=index, AH=data, DX=index reg. port) | ||
917 | outidx: outb %al, %dx | ||
918 | pushw %ax | ||
919 | movb %ah, %al | ||
920 | incw %dx | ||
921 | outb %al, %dx | ||
922 | decw %dx | ||
923 | popw %ax | ||
924 | ret | ||
925 | |||
926 | # Build the table of video modes (stored after the setup.S code at the | ||
927 | # `modelist' label. Each video mode record looks like: | ||
928 | # .word MODE-ID (our special mode ID (see above)) | ||
929 | # .byte rows (number of rows) | ||
930 | # .byte columns (number of columns) | ||
931 | # Returns address of the end of the table in DI, the end is marked | ||
932 | # with a ASK_VGA ID. | ||
933 | mode_table: | ||
934 | movw mt_end, %di # Already filled? | ||
935 | orw %di, %di | ||
936 | jnz mtab1x | ||
937 | |||
938 | leaw modelist, %di # Store standard modes: | ||
939 | movl $VIDEO_80x25 + 0x50190000, %eax # The 80x25 mode (ALL) | ||
940 | stosl | ||
941 | movb adapter, %al # CGA/MDA/HGA -- no more modes | ||
942 | orb %al, %al | ||
943 | jz mtabe | ||
944 | |||
945 | decb %al | ||
946 | jnz mtabv | ||
947 | |||
948 | movl $VIDEO_8POINT + 0x502b0000, %eax # The 80x43 EGA mode | ||
949 | stosl | ||
950 | jmp mtabe | ||
951 | |||
952 | mtab1x: jmp mtab1 | ||
953 | |||
954 | mtabv: leaw vga_modes, %si # All modes for std VGA | ||
955 | movw $vga_modes_end-vga_modes, %cx | ||
956 | rep # I'm unable to use movsw as I don't know how to store a half | ||
957 | movsb # of the expression above to cx without using explicit shr. | ||
958 | |||
959 | cmpb $0, scanning # Mode scan requested? | ||
960 | jz mscan1 | ||
961 | |||
962 | call mode_scan | ||
963 | mscan1: | ||
964 | |||
965 | #ifdef CONFIG_VIDEO_LOCAL | ||
966 | call local_modes | ||
967 | #endif /* CONFIG_VIDEO_LOCAL */ | ||
968 | |||
969 | #ifdef CONFIG_VIDEO_VESA | ||
970 | call vesa_modes # Detect VESA VGA modes | ||
971 | #endif /* CONFIG_VIDEO_VESA */ | ||
972 | |||
973 | #ifdef CONFIG_VIDEO_SVGA | ||
974 | cmpb $0, scanning # Bypass when scanning | ||
975 | jnz mscan2 | ||
976 | |||
977 | call svga_modes # Detect SVGA cards & modes | ||
978 | mscan2: | ||
979 | #endif /* CONFIG_VIDEO_SVGA */ | ||
980 | |||
981 | mtabe: | ||
982 | |||
983 | #ifdef CONFIG_VIDEO_COMPACT | ||
984 | leaw modelist, %si | ||
985 | movw %di, %dx | ||
986 | movw %si, %di | ||
987 | cmt1: cmpw %dx, %si # Scan all modes | ||
988 | jz cmt2 | ||
989 | |||
990 | leaw modelist, %bx # Find in previous entries | ||
991 | movw 2(%si), %cx | ||
992 | cmt3: cmpw %bx, %si | ||
993 | jz cmt4 | ||
994 | |||
995 | cmpw 2(%bx), %cx # Found => don't copy this entry | ||
996 | jz cmt5 | ||
997 | |||
998 | addw $4, %bx | ||
999 | jmp cmt3 | ||
1000 | |||
1001 | cmt4: movsl # Copy entry | ||
1002 | jmp cmt1 | ||
1003 | |||
1004 | cmt5: addw $4, %si # Skip entry | ||
1005 | jmp cmt1 | ||
1006 | |||
1007 | cmt2: | ||
1008 | #endif /* CONFIG_VIDEO_COMPACT */ | ||
1009 | |||
1010 | movw $ASK_VGA, (%di) # End marker | ||
1011 | movw %di, mt_end | ||
1012 | mtab1: leaw modelist, %si # SI=mode list, DI=list end | ||
1013 | ret0: ret | ||
1014 | |||
1015 | # Modes usable on all standard VGAs | ||
1016 | vga_modes: | ||
1017 | .word VIDEO_8POINT | ||
1018 | .word 0x5032 # 80x50 | ||
1019 | .word VIDEO_80x43 | ||
1020 | .word 0x502b # 80x43 | ||
1021 | .word VIDEO_80x28 | ||
1022 | .word 0x501c # 80x28 | ||
1023 | .word VIDEO_80x30 | ||
1024 | .word 0x501e # 80x30 | ||
1025 | .word VIDEO_80x34 | ||
1026 | .word 0x5022 # 80x34 | ||
1027 | .word VIDEO_80x60 | ||
1028 | .word 0x503c # 80x60 | ||
1029 | #ifdef CONFIG_VIDEO_GFX_HACK | ||
1030 | .word VIDEO_GFX_HACK | ||
1031 | .word VIDEO_GFX_DUMMY_RESOLUTION | ||
1032 | #endif | ||
1033 | |||
1034 | vga_modes_end: | ||
1035 | # Detect VESA modes. | ||
1036 | |||
1037 | #ifdef CONFIG_VIDEO_VESA | ||
1038 | vesa_modes: | ||
1039 | cmpb $2, adapter # VGA only | ||
1040 | jnz ret0 | ||
1041 | |||
1042 | movw %di, %bp # BP=original mode table end | ||
1043 | addw $0x200, %di # Buffer space | ||
1044 | movw $0x4f00, %ax # VESA Get card info call | ||
1045 | int $0x10 | ||
1046 | movw %bp, %di | ||
1047 | cmpw $0x004f, %ax # Successful? | ||
1048 | jnz ret0 | ||
1049 | |||
1050 | cmpw $0x4556, 0x200(%di) | ||
1051 | jnz ret0 | ||
1052 | |||
1053 | cmpw $0x4153, 0x202(%di) | ||
1054 | jnz ret0 | ||
1055 | |||
1056 | movw $vesa_name, card_name # Set name to "VESA VGA" | ||
1057 | pushw %gs | ||
1058 | lgsw 0x20e(%di), %si # GS:SI=mode list | ||
1059 | movw $128, %cx # Iteration limit | ||
1060 | vesa1: | ||
1061 | # gas version 2.9.1, using BFD version 2.9.1.0.23 buggers the next inst. | ||
1062 | # XXX: lodsw %gs:(%si), %ax # Get next mode in the list | ||
1063 | gs; lodsw | ||
1064 | cmpw $0xffff, %ax # End of the table? | ||
1065 | jz vesar | ||
1066 | |||
1067 | cmpw $0x0080, %ax # Check validity of mode ID | ||
1068 | jc vesa2 | ||
1069 | |||
1070 | orb %ah, %ah # Valid IDs: 0x0000-0x007f/0x0100-0x07ff | ||
1071 | jz vesan # Certain BIOSes report 0x80-0xff! | ||
1072 | |||
1073 | cmpw $0x0800, %ax | ||
1074 | jnc vesae | ||
1075 | |||
1076 | vesa2: pushw %cx | ||
1077 | movw %ax, %cx # Get mode information structure | ||
1078 | movw $0x4f01, %ax | ||
1079 | int $0x10 | ||
1080 | movw %cx, %bx # BX=mode number | ||
1081 | addb $VIDEO_FIRST_VESA>>8, %bh | ||
1082 | popw %cx | ||
1083 | cmpw $0x004f, %ax | ||
1084 | jnz vesan # Don't report errors (buggy BIOSES) | ||
1085 | |||
1086 | movb (%di), %al # Check capabilities. We require | ||
1087 | andb $0x19, %al # a color text mode. | ||
1088 | cmpb $0x09, %al | ||
1089 | jnz vesan | ||
1090 | |||
1091 | cmpw $0xb800, 8(%di) # Standard video memory address required | ||
1092 | jnz vesan | ||
1093 | |||
1094 | testb $2, (%di) # Mode characteristics supplied? | ||
1095 | movw %bx, (%di) # Store mode number | ||
1096 | jz vesa3 | ||
1097 | |||
1098 | xorw %dx, %dx | ||
1099 | movw 0x12(%di), %bx # Width | ||
1100 | orb %bh, %bh | ||
1101 | jnz vesan | ||
1102 | |||
1103 | movb %bl, 0x3(%di) | ||
1104 | movw 0x14(%di), %ax # Height | ||
1105 | orb %ah, %ah | ||
1106 | jnz vesan | ||
1107 | |||
1108 | movb %al, 2(%di) | ||
1109 | mulb %bl | ||
1110 | cmpw $8193, %ax # Small enough for Linux console driver? | ||
1111 | jnc vesan | ||
1112 | |||
1113 | jmp vesaok | ||
1114 | |||
1115 | vesa3: subw $0x8108, %bx # This mode has no detailed info specified, | ||
1116 | jc vesan # so it must be a standard VESA mode. | ||
1117 | |||
1118 | cmpw $5, %bx | ||
1119 | jnc vesan | ||
1120 | |||
1121 | movw vesa_text_mode_table(%bx), %ax | ||
1122 | movw %ax, 2(%di) | ||
1123 | vesaok: addw $4, %di # The mode is valid. Store it. | ||
1124 | vesan: loop vesa1 # Next mode. Limit exceeded => error | ||
1125 | vesae: leaw vesaer, %si | ||
1126 | call prtstr | ||
1127 | movw %bp, %di # Discard already found modes. | ||
1128 | vesar: popw %gs | ||
1129 | ret | ||
1130 | |||
1131 | # Dimensions of standard VESA text modes | ||
1132 | vesa_text_mode_table: | ||
1133 | .byte 60, 80 # 0108 | ||
1134 | .byte 25, 132 # 0109 | ||
1135 | .byte 43, 132 # 010A | ||
1136 | .byte 50, 132 # 010B | ||
1137 | .byte 60, 132 # 010C | ||
1138 | #endif /* CONFIG_VIDEO_VESA */ | ||
1139 | |||
1140 | # Scan for video modes. A bit dirty, but should work. | ||
1141 | mode_scan: | ||
1142 | movw $0x0100, %cx # Start with mode 0 | ||
1143 | scm1: movb $0, %ah # Test the mode | ||
1144 | movb %cl, %al | ||
1145 | int $0x10 | ||
1146 | movb $0x0f, %ah | ||
1147 | int $0x10 | ||
1148 | cmpb %cl, %al | ||
1149 | jnz scm2 # Mode not set | ||
1150 | |||
1151 | movw $0x3c0, %dx # Test if it's a text mode | ||
1152 | movb $0x10, %al # Mode bits | ||
1153 | call inidx | ||
1154 | andb $0x03, %al | ||
1155 | jnz scm2 | ||
1156 | |||
1157 | movb $0xce, %dl # Another set of mode bits | ||
1158 | movb $0x06, %al | ||
1159 | call inidx | ||
1160 | shrb %al | ||
1161 | jc scm2 | ||
1162 | |||
1163 | movb $0xd4, %dl # Cursor location | ||
1164 | movb $0x0f, %al | ||
1165 | call inidx | ||
1166 | orb %al, %al | ||
1167 | jnz scm2 | ||
1168 | |||
1169 | movw %cx, %ax # Ok, store the mode | ||
1170 | stosw | ||
1171 | movb %gs:(0x484), %al # Number of rows | ||
1172 | incb %al | ||
1173 | stosb | ||
1174 | movw %gs:(0x44a), %ax # Number of columns | ||
1175 | stosb | ||
1176 | scm2: incb %cl | ||
1177 | jns scm1 | ||
1178 | |||
1179 | movw $0x0003, %ax # Return back to mode 3 | ||
1180 | int $0x10 | ||
1181 | ret | ||
1182 | |||
1183 | tstidx: outw %ax, %dx # OUT DX,AX and inidx | ||
1184 | inidx: outb %al, %dx # Read from indexed VGA register | ||
1185 | incw %dx # AL=index, DX=index reg port -> AL=data | ||
1186 | inb %dx, %al | ||
1187 | decw %dx | ||
1188 | ret | ||
1189 | |||
1190 | # Try to detect type of SVGA card and supply (usually approximate) video | ||
1191 | # mode table for it. | ||
1192 | |||
1193 | #ifdef CONFIG_VIDEO_SVGA | ||
1194 | svga_modes: | ||
1195 | leaw svga_table, %si # Test all known SVGA adapters | ||
1196 | dosvga: lodsw | ||
1197 | movw %ax, %bp # Default mode table | ||
1198 | orw %ax, %ax | ||
1199 | jz didsv1 | ||
1200 | |||
1201 | lodsw # Pointer to test routine | ||
1202 | pushw %si | ||
1203 | pushw %di | ||
1204 | pushw %es | ||
1205 | movw $0xc000, %bx | ||
1206 | movw %bx, %es | ||
1207 | call *%ax # Call test routine | ||
1208 | popw %es | ||
1209 | popw %di | ||
1210 | popw %si | ||
1211 | orw %bp, %bp | ||
1212 | jz dosvga | ||
1213 | |||
1214 | movw %bp, %si # Found, copy the modes | ||
1215 | movb svga_prefix, %ah | ||
1216 | cpsvga: lodsb | ||
1217 | orb %al, %al | ||
1218 | jz didsv | ||
1219 | |||
1220 | stosw | ||
1221 | movsw | ||
1222 | jmp cpsvga | ||
1223 | |||
1224 | didsv: movw %si, card_name # Store pointer to card name | ||
1225 | didsv1: ret | ||
1226 | |||
1227 | # Table of all known SVGA cards. For each card, we store a pointer to | ||
1228 | # a table of video modes supported by the card and a pointer to a routine | ||
1229 | # used for testing of presence of the card. The video mode table is always | ||
1230 | # followed by the name of the card or the chipset. | ||
1231 | svga_table: | ||
1232 | .word ati_md, ati_test | ||
1233 | .word oak_md, oak_test | ||
1234 | .word paradise_md, paradise_test | ||
1235 | .word realtek_md, realtek_test | ||
1236 | .word s3_md, s3_test | ||
1237 | .word chips_md, chips_test | ||
1238 | .word video7_md, video7_test | ||
1239 | .word cirrus5_md, cirrus5_test | ||
1240 | .word cirrus6_md, cirrus6_test | ||
1241 | .word cirrus1_md, cirrus1_test | ||
1242 | .word ahead_md, ahead_test | ||
1243 | .word everex_md, everex_test | ||
1244 | .word genoa_md, genoa_test | ||
1245 | .word trident_md, trident_test | ||
1246 | .word tseng_md, tseng_test | ||
1247 | .word 0 | ||
1248 | |||
1249 | # Test routines and mode tables: | ||
1250 | |||
1251 | # S3 - The test algorithm was taken from the SuperProbe package | ||
1252 | # for XFree86 1.2.1. Report bugs to Christoph.Niemann@linux.org | ||
1253 | s3_test: | ||
1254 | movw $0x0f35, %cx # we store some constants in cl/ch | ||
1255 | movw $0x03d4, %dx | ||
1256 | movb $0x38, %al | ||
1257 | call inidx | ||
1258 | movb %al, %bh # store current CRT-register 0x38 | ||
1259 | movw $0x0038, %ax | ||
1260 | call outidx # disable writing to special regs | ||
1261 | movb %cl, %al # check whether we can write special reg 0x35 | ||
1262 | call inidx | ||
1263 | movb %al, %bl # save the current value of CRT reg 0x35 | ||
1264 | andb $0xf0, %al # clear bits 0-3 | ||
1265 | movb %al, %ah | ||
1266 | movb %cl, %al # and write it to CRT reg 0x35 | ||
1267 | call outidx | ||
1268 | call inidx # now read it back | ||
1269 | andb %ch, %al # clear the upper 4 bits | ||
1270 | jz s3_2 # the first test failed. But we have a | ||
1271 | |||
1272 | movb %bl, %ah # second chance | ||
1273 | movb %cl, %al | ||
1274 | call outidx | ||
1275 | jmp s3_1 # do the other tests | ||
1276 | |||
1277 | s3_2: movw %cx, %ax # load ah with 0xf and al with 0x35 | ||
1278 | orb %bl, %ah # set the upper 4 bits of ah with the orig value | ||
1279 | call outidx # write ... | ||
1280 | call inidx # ... and reread | ||
1281 | andb %cl, %al # turn off the upper 4 bits | ||
1282 | pushw %ax | ||
1283 | movb %bl, %ah # restore old value in register 0x35 | ||
1284 | movb %cl, %al | ||
1285 | call outidx | ||
1286 | popw %ax | ||
1287 | cmpb %ch, %al # setting lower 4 bits was successful => bad | ||
1288 | je no_s3 # writing is allowed => this is not an S3 | ||
1289 | |||
1290 | s3_1: movw $0x4838, %ax # allow writing to special regs by putting | ||
1291 | call outidx # magic number into CRT-register 0x38 | ||
1292 | movb %cl, %al # check whether we can write special reg 0x35 | ||
1293 | call inidx | ||
1294 | movb %al, %bl | ||
1295 | andb $0xf0, %al | ||
1296 | movb %al, %ah | ||
1297 | movb %cl, %al | ||
1298 | call outidx | ||
1299 | call inidx | ||
1300 | andb %ch, %al | ||
1301 | jnz no_s3 # no, we can't write => no S3 | ||
1302 | |||
1303 | movw %cx, %ax | ||
1304 | orb %bl, %ah | ||
1305 | call outidx | ||
1306 | call inidx | ||
1307 | andb %ch, %al | ||
1308 | pushw %ax | ||
1309 | movb %bl, %ah # restore old value in register 0x35 | ||
1310 | movb %cl, %al | ||
1311 | call outidx | ||
1312 | popw %ax | ||
1313 | cmpb %ch, %al | ||
1314 | jne no_s31 # writing not possible => no S3 | ||
1315 | movb $0x30, %al | ||
1316 | call inidx # now get the S3 id ... | ||
1317 | leaw idS3, %di | ||
1318 | movw $0x10, %cx | ||
1319 | repne | ||
1320 | scasb | ||
1321 | je no_s31 | ||
1322 | |||
1323 | movb %bh, %ah | ||
1324 | movb $0x38, %al | ||
1325 | jmp s3rest | ||
1326 | |||
1327 | no_s3: movb $0x35, %al # restore CRT register 0x35 | ||
1328 | movb %bl, %ah | ||
1329 | call outidx | ||
1330 | no_s31: xorw %bp, %bp # Detection failed | ||
1331 | s3rest: movb %bh, %ah | ||
1332 | movb $0x38, %al # restore old value of CRT register 0x38 | ||
1333 | jmp outidx | ||
1334 | |||
1335 | idS3: .byte 0x81, 0x82, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95 | ||
1336 | .byte 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa8, 0xb0 | ||
1337 | |||
1338 | s3_md: .byte 0x54, 0x2b, 0x84 | ||
1339 | .byte 0x55, 0x19, 0x84 | ||
1340 | .byte 0 | ||
1341 | .ascii "S3" | ||
1342 | .byte 0 | ||
1343 | |||
1344 | # ATI cards. | ||
1345 | ati_test: | ||
1346 | leaw idati, %si | ||
1347 | movw $0x31, %di | ||
1348 | movw $0x09, %cx | ||
1349 | repe | ||
1350 | cmpsb | ||
1351 | je atiok | ||
1352 | |||
1353 | xorw %bp, %bp | ||
1354 | atiok: ret | ||
1355 | |||
1356 | idati: .ascii "761295520" | ||
1357 | |||
1358 | ati_md: .byte 0x23, 0x19, 0x84 | ||
1359 | .byte 0x33, 0x2c, 0x84 | ||
1360 | .byte 0x22, 0x1e, 0x64 | ||
1361 | .byte 0x21, 0x19, 0x64 | ||
1362 | .byte 0x58, 0x21, 0x50 | ||
1363 | .byte 0x5b, 0x1e, 0x50 | ||
1364 | .byte 0 | ||
1365 | .ascii "ATI" | ||
1366 | .byte 0 | ||
1367 | |||
1368 | # AHEAD | ||
1369 | ahead_test: | ||
1370 | movw $0x200f, %ax | ||
1371 | movw $0x3ce, %dx | ||
1372 | outw %ax, %dx | ||
1373 | incw %dx | ||
1374 | inb %dx, %al | ||
1375 | cmpb $0x20, %al | ||
1376 | je isahed | ||
1377 | |||
1378 | cmpb $0x21, %al | ||
1379 | je isahed | ||
1380 | |||
1381 | xorw %bp, %bp | ||
1382 | isahed: ret | ||
1383 | |||
1384 | ahead_md: | ||
1385 | .byte 0x22, 0x2c, 0x84 | ||
1386 | .byte 0x23, 0x19, 0x84 | ||
1387 | .byte 0x24, 0x1c, 0x84 | ||
1388 | .byte 0x2f, 0x32, 0xa0 | ||
1389 | .byte 0x32, 0x22, 0x50 | ||
1390 | .byte 0x34, 0x42, 0x50 | ||
1391 | .byte 0 | ||
1392 | .ascii "Ahead" | ||
1393 | .byte 0 | ||
1394 | |||
1395 | # Chips & Tech. | ||
1396 | chips_test: | ||
1397 | movw $0x3c3, %dx | ||
1398 | inb %dx, %al | ||
1399 | orb $0x10, %al | ||
1400 | outb %al, %dx | ||
1401 | movw $0x104, %dx | ||
1402 | inb %dx, %al | ||
1403 | movb %al, %bl | ||
1404 | movw $0x3c3, %dx | ||
1405 | inb %dx, %al | ||
1406 | andb $0xef, %al | ||
1407 | outb %al, %dx | ||
1408 | cmpb $0xa5, %bl | ||
1409 | je cantok | ||
1410 | |||
1411 | xorw %bp, %bp | ||
1412 | cantok: ret | ||
1413 | |||
1414 | chips_md: | ||
1415 | .byte 0x60, 0x19, 0x84 | ||
1416 | .byte 0x61, 0x32, 0x84 | ||
1417 | .byte 0 | ||
1418 | .ascii "Chips & Technologies" | ||
1419 | .byte 0 | ||
1420 | |||
1421 | # Cirrus Logic 5X0 | ||
1422 | cirrus1_test: | ||
1423 | movw $0x3d4, %dx | ||
1424 | movb $0x0c, %al | ||
1425 | outb %al, %dx | ||
1426 | incw %dx | ||
1427 | inb %dx, %al | ||
1428 | movb %al, %bl | ||
1429 | xorb %al, %al | ||
1430 | outb %al, %dx | ||
1431 | decw %dx | ||
1432 | movb $0x1f, %al | ||
1433 | outb %al, %dx | ||
1434 | incw %dx | ||
1435 | inb %dx, %al | ||
1436 | movb %al, %bh | ||
1437 | xorb %ah, %ah | ||
1438 | shlb $4, %al | ||
1439 | movw %ax, %cx | ||
1440 | movb %bh, %al | ||
1441 | shrb $4, %al | ||
1442 | addw %ax, %cx | ||
1443 | shlw $8, %cx | ||
1444 | addw $6, %cx | ||
1445 | movw %cx, %ax | ||
1446 | movw $0x3c4, %dx | ||
1447 | outw %ax, %dx | ||
1448 | incw %dx | ||
1449 | inb %dx, %al | ||
1450 | andb %al, %al | ||
1451 | jnz nocirr | ||
1452 | |||
1453 | movb %bh, %al | ||
1454 | outb %al, %dx | ||
1455 | inb %dx, %al | ||
1456 | cmpb $0x01, %al | ||
1457 | je iscirr | ||
1458 | |||
1459 | nocirr: xorw %bp, %bp | ||
1460 | iscirr: movw $0x3d4, %dx | ||
1461 | movb %bl, %al | ||
1462 | xorb %ah, %ah | ||
1463 | shlw $8, %ax | ||
1464 | addw $0x0c, %ax | ||
1465 | outw %ax, %dx | ||
1466 | ret | ||
1467 | |||
1468 | cirrus1_md: | ||
1469 | .byte 0x1f, 0x19, 0x84 | ||
1470 | .byte 0x20, 0x2c, 0x84 | ||
1471 | .byte 0x22, 0x1e, 0x84 | ||
1472 | .byte 0x31, 0x25, 0x64 | ||
1473 | .byte 0 | ||
1474 | .ascii "Cirrus Logic 5X0" | ||
1475 | .byte 0 | ||
1476 | |||
1477 | # Cirrus Logic 54XX | ||
1478 | cirrus5_test: | ||
1479 | movw $0x3c4, %dx | ||
1480 | movb $6, %al | ||
1481 | call inidx | ||
1482 | movb %al, %bl # BL=backup | ||
1483 | movw $6, %ax | ||
1484 | call tstidx | ||
1485 | cmpb $0x0f, %al | ||
1486 | jne c5fail | ||
1487 | |||
1488 | movw $0x1206, %ax | ||
1489 | call tstidx | ||
1490 | cmpb $0x12, %al | ||
1491 | jne c5fail | ||
1492 | |||
1493 | movb $0x1e, %al | ||
1494 | call inidx | ||
1495 | movb %al, %bh | ||
1496 | movb %bh, %ah | ||
1497 | andb $0xc0, %ah | ||
1498 | movb $0x1e, %al | ||
1499 | call tstidx | ||
1500 | andb $0x3f, %al | ||
1501 | jne c5xx | ||
1502 | |||
1503 | movb $0x1e, %al | ||
1504 | movb %bh, %ah | ||
1505 | orb $0x3f, %ah | ||
1506 | call tstidx | ||
1507 | xorb $0x3f, %al | ||
1508 | andb $0x3f, %al | ||
1509 | c5xx: pushf | ||
1510 | movb $0x1e, %al | ||
1511 | movb %bh, %ah | ||
1512 | outw %ax, %dx | ||
1513 | popf | ||
1514 | je c5done | ||
1515 | |||
1516 | c5fail: xorw %bp, %bp | ||
1517 | c5done: movb $6, %al | ||
1518 | movb %bl, %ah | ||
1519 | outw %ax, %dx | ||
1520 | ret | ||
1521 | |||
1522 | cirrus5_md: | ||
1523 | .byte 0x14, 0x19, 0x84 | ||
1524 | .byte 0x54, 0x2b, 0x84 | ||
1525 | .byte 0 | ||
1526 | .ascii "Cirrus Logic 54XX" | ||
1527 | .byte 0 | ||
1528 | |||
1529 | # Cirrus Logic 64XX -- no known extra modes, but must be identified, because | ||
1530 | # it's misidentified by the Ahead test. | ||
1531 | cirrus6_test: | ||
1532 | movw $0x3ce, %dx | ||
1533 | movb $0x0a, %al | ||
1534 | call inidx | ||
1535 | movb %al, %bl # BL=backup | ||
1536 | movw $0xce0a, %ax | ||
1537 | call tstidx | ||
1538 | orb %al, %al | ||
1539 | jne c2fail | ||
1540 | |||
1541 | movw $0xec0a, %ax | ||
1542 | call tstidx | ||
1543 | cmpb $0x01, %al | ||
1544 | jne c2fail | ||
1545 | |||
1546 | movb $0xaa, %al | ||
1547 | call inidx # 4X, 5X, 7X and 8X are valid 64XX chip ID's. | ||
1548 | shrb $4, %al | ||
1549 | subb $4, %al | ||
1550 | jz c6done | ||
1551 | |||
1552 | decb %al | ||
1553 | jz c6done | ||
1554 | |||
1555 | subb $2, %al | ||
1556 | jz c6done | ||
1557 | |||
1558 | decb %al | ||
1559 | jz c6done | ||
1560 | |||
1561 | c2fail: xorw %bp, %bp | ||
1562 | c6done: movb $0x0a, %al | ||
1563 | movb %bl, %ah | ||
1564 | outw %ax, %dx | ||
1565 | ret | ||
1566 | |||
1567 | cirrus6_md: | ||
1568 | .byte 0 | ||
1569 | .ascii "Cirrus Logic 64XX" | ||
1570 | .byte 0 | ||
1571 | |||
1572 | # Everex / Trident | ||
1573 | everex_test: | ||
1574 | movw $0x7000, %ax | ||
1575 | xorw %bx, %bx | ||
1576 | int $0x10 | ||
1577 | cmpb $0x70, %al | ||
1578 | jne noevrx | ||
1579 | |||
1580 | shrw $4, %dx | ||
1581 | cmpw $0x678, %dx | ||
1582 | je evtrid | ||
1583 | |||
1584 | cmpw $0x236, %dx | ||
1585 | jne evrxok | ||
1586 | |||
1587 | evtrid: leaw trident_md, %bp | ||
1588 | evrxok: ret | ||
1589 | |||
1590 | noevrx: xorw %bp, %bp | ||
1591 | ret | ||
1592 | |||
1593 | everex_md: | ||
1594 | .byte 0x03, 0x22, 0x50 | ||
1595 | .byte 0x04, 0x3c, 0x50 | ||
1596 | .byte 0x07, 0x2b, 0x64 | ||
1597 | .byte 0x08, 0x4b, 0x64 | ||
1598 | .byte 0x0a, 0x19, 0x84 | ||
1599 | .byte 0x0b, 0x2c, 0x84 | ||
1600 | .byte 0x16, 0x1e, 0x50 | ||
1601 | .byte 0x18, 0x1b, 0x64 | ||
1602 | .byte 0x21, 0x40, 0xa0 | ||
1603 | .byte 0x40, 0x1e, 0x84 | ||
1604 | .byte 0 | ||
1605 | .ascii "Everex/Trident" | ||
1606 | .byte 0 | ||
1607 | |||
1608 | # Genoa. | ||
1609 | genoa_test: | ||
1610 | leaw idgenoa, %si # Check Genoa 'clues' | ||
1611 | xorw %ax, %ax | ||
1612 | movb %es:(0x37), %al | ||
1613 | movw %ax, %di | ||
1614 | movw $0x04, %cx | ||
1615 | decw %si | ||
1616 | decw %di | ||
1617 | l1: incw %si | ||
1618 | incw %di | ||
1619 | movb (%si), %al | ||
1620 | testb %al, %al | ||
1621 | jz l2 | ||
1622 | |||
1623 | cmpb %es:(%di), %al | ||
1624 | l2: loope l1 | ||
1625 | orw %cx, %cx | ||
1626 | je isgen | ||
1627 | |||
1628 | xorw %bp, %bp | ||
1629 | isgen: ret | ||
1630 | |||
1631 | idgenoa: .byte 0x77, 0x00, 0x99, 0x66 | ||
1632 | |||
1633 | genoa_md: | ||
1634 | .byte 0x58, 0x20, 0x50 | ||
1635 | .byte 0x5a, 0x2a, 0x64 | ||
1636 | .byte 0x60, 0x19, 0x84 | ||
1637 | .byte 0x61, 0x1d, 0x84 | ||
1638 | .byte 0x62, 0x20, 0x84 | ||
1639 | .byte 0x63, 0x2c, 0x84 | ||
1640 | .byte 0x64, 0x3c, 0x84 | ||
1641 | .byte 0x6b, 0x4f, 0x64 | ||
1642 | .byte 0x72, 0x3c, 0x50 | ||
1643 | .byte 0x74, 0x42, 0x50 | ||
1644 | .byte 0x78, 0x4b, 0x64 | ||
1645 | .byte 0 | ||
1646 | .ascii "Genoa" | ||
1647 | .byte 0 | ||
1648 | |||
1649 | # OAK | ||
1650 | oak_test: | ||
1651 | leaw idoakvga, %si | ||
1652 | movw $0x08, %di | ||
1653 | movw $0x08, %cx | ||
1654 | repe | ||
1655 | cmpsb | ||
1656 | je isoak | ||
1657 | |||
1658 | xorw %bp, %bp | ||
1659 | isoak: ret | ||
1660 | |||
1661 | idoakvga: .ascii "OAK VGA " | ||
1662 | |||
1663 | oak_md: .byte 0x4e, 0x3c, 0x50 | ||
1664 | .byte 0x4f, 0x3c, 0x84 | ||
1665 | .byte 0x50, 0x19, 0x84 | ||
1666 | .byte 0x51, 0x2b, 0x84 | ||
1667 | .byte 0 | ||
1668 | .ascii "OAK" | ||
1669 | .byte 0 | ||
1670 | |||
1671 | # WD Paradise. | ||
1672 | paradise_test: | ||
1673 | leaw idparadise, %si | ||
1674 | movw $0x7d, %di | ||
1675 | movw $0x04, %cx | ||
1676 | repe | ||
1677 | cmpsb | ||
1678 | je ispara | ||
1679 | |||
1680 | xorw %bp, %bp | ||
1681 | ispara: ret | ||
1682 | |||
1683 | idparadise: .ascii "VGA=" | ||
1684 | |||
1685 | paradise_md: | ||
1686 | .byte 0x41, 0x22, 0x50 | ||
1687 | .byte 0x47, 0x1c, 0x84 | ||
1688 | .byte 0x55, 0x19, 0x84 | ||
1689 | .byte 0x54, 0x2c, 0x84 | ||
1690 | .byte 0 | ||
1691 | .ascii "Paradise" | ||
1692 | .byte 0 | ||
1693 | |||
1694 | # Trident. | ||
1695 | trident_test: | ||
1696 | movw $0x3c4, %dx | ||
1697 | movb $0x0e, %al | ||
1698 | outb %al, %dx | ||
1699 | incw %dx | ||
1700 | inb %dx, %al | ||
1701 | xchgb %al, %ah | ||
1702 | xorb %al, %al | ||
1703 | outb %al, %dx | ||
1704 | inb %dx, %al | ||
1705 | xchgb %ah, %al | ||
1706 | movb %al, %bl # Strange thing ... in the book this wasn't | ||
1707 | andb $0x02, %bl # necessary but it worked on my card which | ||
1708 | jz setb2 # is a trident. Without it the screen goes | ||
1709 | # blurred ... | ||
1710 | andb $0xfd, %al | ||
1711 | jmp clrb2 | ||
1712 | |||
1713 | setb2: orb $0x02, %al | ||
1714 | clrb2: outb %al, %dx | ||
1715 | andb $0x0f, %ah | ||
1716 | cmpb $0x02, %ah | ||
1717 | je istrid | ||
1718 | |||
1719 | xorw %bp, %bp | ||
1720 | istrid: ret | ||
1721 | |||
1722 | trident_md: | ||
1723 | .byte 0x50, 0x1e, 0x50 | ||
1724 | .byte 0x51, 0x2b, 0x50 | ||
1725 | .byte 0x52, 0x3c, 0x50 | ||
1726 | .byte 0x57, 0x19, 0x84 | ||
1727 | .byte 0x58, 0x1e, 0x84 | ||
1728 | .byte 0x59, 0x2b, 0x84 | ||
1729 | .byte 0x5a, 0x3c, 0x84 | ||
1730 | .byte 0 | ||
1731 | .ascii "Trident" | ||
1732 | .byte 0 | ||
1733 | |||
1734 | # Tseng. | ||
1735 | tseng_test: | ||
1736 | movw $0x3cd, %dx | ||
1737 | inb %dx, %al # Could things be this simple ! :-) | ||
1738 | movb %al, %bl | ||
1739 | movb $0x55, %al | ||
1740 | outb %al, %dx | ||
1741 | inb %dx, %al | ||
1742 | movb %al, %ah | ||
1743 | movb %bl, %al | ||
1744 | outb %al, %dx | ||
1745 | cmpb $0x55, %ah | ||
1746 | je istsen | ||
1747 | |||
1748 | isnot: xorw %bp, %bp | ||
1749 | istsen: ret | ||
1750 | |||
1751 | tseng_md: | ||
1752 | .byte 0x26, 0x3c, 0x50 | ||
1753 | .byte 0x2a, 0x28, 0x64 | ||
1754 | .byte 0x23, 0x19, 0x84 | ||
1755 | .byte 0x24, 0x1c, 0x84 | ||
1756 | .byte 0x22, 0x2c, 0x84 | ||
1757 | .byte 0x21, 0x3c, 0x84 | ||
1758 | .byte 0 | ||
1759 | .ascii "Tseng" | ||
1760 | .byte 0 | ||
1761 | |||
1762 | # Video7. | ||
1763 | video7_test: | ||
1764 | movw $0x3cc, %dx | ||
1765 | inb %dx, %al | ||
1766 | movw $0x3b4, %dx | ||
1767 | andb $0x01, %al | ||
1768 | jz even7 | ||
1769 | |||
1770 | movw $0x3d4, %dx | ||
1771 | even7: movb $0x0c, %al | ||
1772 | outb %al, %dx | ||
1773 | incw %dx | ||
1774 | inb %dx, %al | ||
1775 | movb %al, %bl | ||
1776 | movb $0x55, %al | ||
1777 | outb %al, %dx | ||
1778 | inb %dx, %al | ||
1779 | decw %dx | ||
1780 | movb $0x1f, %al | ||
1781 | outb %al, %dx | ||
1782 | incw %dx | ||
1783 | inb %dx, %al | ||
1784 | movb %al, %bh | ||
1785 | decw %dx | ||
1786 | movb $0x0c, %al | ||
1787 | outb %al, %dx | ||
1788 | incw %dx | ||
1789 | movb %bl, %al | ||
1790 | outb %al, %dx | ||
1791 | movb $0x55, %al | ||
1792 | xorb $0xea, %al | ||
1793 | cmpb %bh, %al | ||
1794 | jne isnot | ||
1795 | |||
1796 | movb $VIDEO_FIRST_V7>>8, svga_prefix # Use special mode switching | ||
1797 | ret | ||
1798 | |||
1799 | video7_md: | ||
1800 | .byte 0x40, 0x2b, 0x50 | ||
1801 | .byte 0x43, 0x3c, 0x50 | ||
1802 | .byte 0x44, 0x3c, 0x64 | ||
1803 | .byte 0x41, 0x19, 0x84 | ||
1804 | .byte 0x42, 0x2c, 0x84 | ||
1805 | .byte 0x45, 0x1c, 0x84 | ||
1806 | .byte 0 | ||
1807 | .ascii "Video 7" | ||
1808 | .byte 0 | ||
1809 | |||
1810 | # Realtek VGA | ||
1811 | realtek_test: | ||
1812 | leaw idrtvga, %si | ||
1813 | movw $0x45, %di | ||
1814 | movw $0x0b, %cx | ||
1815 | repe | ||
1816 | cmpsb | ||
1817 | je isrt | ||
1818 | |||
1819 | xorw %bp, %bp | ||
1820 | isrt: ret | ||
1821 | |||
1822 | idrtvga: .ascii "REALTEK VGA" | ||
1823 | |||
1824 | realtek_md: | ||
1825 | .byte 0x1a, 0x3c, 0x50 | ||
1826 | .byte 0x1b, 0x19, 0x84 | ||
1827 | .byte 0x1c, 0x1e, 0x84 | ||
1828 | .byte 0x1d, 0x2b, 0x84 | ||
1829 | .byte 0x1e, 0x3c, 0x84 | ||
1830 | .byte 0 | ||
1831 | .ascii "REALTEK" | ||
1832 | .byte 0 | ||
1833 | |||
1834 | #endif /* CONFIG_VIDEO_SVGA */ | ||
1835 | |||
1836 | # User-defined local mode table (VGA only) | ||
1837 | #ifdef CONFIG_VIDEO_LOCAL | ||
1838 | local_modes: | ||
1839 | leaw local_mode_table, %si | ||
1840 | locm1: lodsw | ||
1841 | orw %ax, %ax | ||
1842 | jz locm2 | ||
1843 | |||
1844 | stosw | ||
1845 | movsw | ||
1846 | jmp locm1 | ||
1847 | |||
1848 | locm2: ret | ||
1849 | |||
1850 | # This is the table of local video modes which can be supplied manually | ||
1851 | # by the user. Each entry consists of mode ID (word) and dimensions | ||
1852 | # (byte for column count and another byte for row count). These modes | ||
1853 | # are placed before all SVGA and VESA modes and override them if table | ||
1854 | # compacting is enabled. The table must end with a zero word followed | ||
1855 | # by NUL-terminated video adapter name. | ||
1856 | local_mode_table: | ||
1857 | .word 0x0100 # Example: 40x25 | ||
1858 | .byte 25,40 | ||
1859 | .word 0 | ||
1860 | .ascii "Local" | ||
1861 | .byte 0 | ||
1862 | #endif /* CONFIG_VIDEO_LOCAL */ | ||
1863 | |||
1864 | # Read a key and return the ASCII code in al, scan code in ah | ||
1865 | getkey: xorb %ah, %ah | ||
1866 | int $0x16 | ||
1867 | ret | ||
1868 | |||
1869 | # Read a key with a timeout of 30 seconds. | ||
1870 | # The hardware clock is used to get the time. | ||
1871 | getkt: call gettime | ||
1872 | addb $30, %al # Wait 30 seconds | ||
1873 | cmpb $60, %al | ||
1874 | jl lminute | ||
1875 | |||
1876 | subb $60, %al | ||
1877 | lminute: | ||
1878 | movb %al, %cl | ||
1879 | again: movb $0x01, %ah | ||
1880 | int $0x16 | ||
1881 | jnz getkey # key pressed, so get it | ||
1882 | |||
1883 | call gettime | ||
1884 | cmpb %cl, %al | ||
1885 | jne again | ||
1886 | |||
1887 | movb $0x20, %al # timeout, return `space' | ||
1888 | ret | ||
1889 | |||
1890 | # Flush the keyboard buffer | ||
1891 | flush: movb $0x01, %ah | ||
1892 | int $0x16 | ||
1893 | jz empty | ||
1894 | |||
1895 | xorb %ah, %ah | ||
1896 | int $0x16 | ||
1897 | jmp flush | ||
1898 | |||
1899 | empty: ret | ||
1900 | |||
1901 | # Print hexadecimal number. | ||
1902 | prthw: pushw %ax | ||
1903 | movb %ah, %al | ||
1904 | call prthb | ||
1905 | popw %ax | ||
1906 | prthb: pushw %ax | ||
1907 | shrb $4, %al | ||
1908 | call prthn | ||
1909 | popw %ax | ||
1910 | andb $0x0f, %al | ||
1911 | prthn: cmpb $0x0a, %al | ||
1912 | jc prth1 | ||
1913 | |||
1914 | addb $0x07, %al | ||
1915 | prth1: addb $0x30, %al | ||
1916 | jmp prtchr | ||
1917 | |||
1918 | # Print decimal number in al | ||
1919 | prtdec: pushw %ax | ||
1920 | pushw %cx | ||
1921 | xorb %ah, %ah | ||
1922 | movb $0x0a, %cl | ||
1923 | idivb %cl | ||
1924 | cmpb $0x09, %al | ||
1925 | jbe lt100 | ||
1926 | |||
1927 | call prtdec | ||
1928 | jmp skip10 | ||
1929 | |||
1930 | lt100: addb $0x30, %al | ||
1931 | call prtchr | ||
1932 | skip10: movb %ah, %al | ||
1933 | addb $0x30, %al | ||
1934 | call prtchr | ||
1935 | popw %cx | ||
1936 | popw %ax | ||
1937 | ret | ||
1938 | |||
1939 | store_edid: | ||
1940 | #ifdef CONFIG_FIRMWARE_EDID | ||
1941 | pushw %es # just save all registers | ||
1942 | pushw %ax | ||
1943 | pushw %bx | ||
1944 | pushw %cx | ||
1945 | pushw %dx | ||
1946 | pushw %di | ||
1947 | |||
1948 | pushw %fs | ||
1949 | popw %es | ||
1950 | |||
1951 | movl $0x13131313, %eax # memset block with 0x13 | ||
1952 | movw $32, %cx | ||
1953 | movw $0x140, %di | ||
1954 | cld | ||
1955 | rep | ||
1956 | stosl | ||
1957 | |||
1958 | cmpw $0x0200, vbe_version # only do EDID on >= VBE2.0 | ||
1959 | jl no_edid | ||
1960 | |||
1961 | pushw %es # save ES | ||
1962 | xorw %di, %di # Report Capability | ||
1963 | pushw %di | ||
1964 | popw %es # ES:DI must be 0:0 | ||
1965 | movw $0x4f15, %ax | ||
1966 | xorw %bx, %bx | ||
1967 | xorw %cx, %cx | ||
1968 | int $0x10 | ||
1969 | popw %es # restore ES | ||
1970 | |||
1971 | cmpb $0x00, %ah # call successful | ||
1972 | jne no_edid | ||
1973 | |||
1974 | cmpb $0x4f, %al # function supported | ||
1975 | jne no_edid | ||
1976 | |||
1977 | movw $0x4f15, %ax # do VBE/DDC | ||
1978 | movw $0x01, %bx | ||
1979 | movw $0x00, %cx | ||
1980 | movw $0x01, %dx | ||
1981 | movw $0x140, %di | ||
1982 | int $0x10 | ||
1983 | |||
1984 | no_edid: | ||
1985 | popw %di # restore all registers | ||
1986 | popw %dx | ||
1987 | popw %cx | ||
1988 | popw %bx | ||
1989 | popw %ax | ||
1990 | popw %es | ||
1991 | #endif | ||
1992 | ret | ||
1993 | |||
1994 | # VIDEO_SELECT-only variables | ||
1995 | mt_end: .word 0 # End of video mode table if built | ||
1996 | edit_buf: .space 6 # Line editor buffer | ||
1997 | card_name: .word 0 # Pointer to adapter name | ||
1998 | scanning: .byte 0 # Performing mode scan | ||
1999 | do_restore: .byte 0 # Screen contents altered during mode change | ||
2000 | svga_prefix: .byte VIDEO_FIRST_BIOS>>8 # Default prefix for BIOS modes | ||
2001 | graphic_mode: .byte 0 # Graphic mode with a linear frame buffer | ||
2002 | dac_size: .byte 6 # DAC bit depth | ||
2003 | vbe_version: .word 0 # VBE bios version | ||
2004 | |||
2005 | # Status messages | ||
2006 | keymsg: .ascii "Press <RETURN> to see video modes available, " | ||
2007 | .ascii "<SPACE> to continue or wait 30 secs" | ||
2008 | .byte 0x0d, 0x0a, 0 | ||
2009 | |||
2010 | listhdr: .byte 0x0d, 0x0a | ||
2011 | .ascii "Mode: COLSxROWS:" | ||
2012 | |||
2013 | crlft: .byte 0x0d, 0x0a, 0 | ||
2014 | |||
2015 | prompt: .byte 0x0d, 0x0a | ||
2016 | .asciz "Enter mode number or `scan': " | ||
2017 | |||
2018 | unknt: .asciz "Unknown mode ID. Try again." | ||
2019 | |||
2020 | badmdt: .ascii "You passed an undefined mode number." | ||
2021 | .byte 0x0d, 0x0a, 0 | ||
2022 | |||
2023 | vesaer: .ascii "Error: Scanning of VESA modes failed. Please " | ||
2024 | .ascii "report to <mj@ucw.cz>." | ||
2025 | .byte 0x0d, 0x0a, 0 | ||
2026 | |||
2027 | old_name: .asciz "CGA/MDA/HGA" | ||
2028 | |||
2029 | ega_name: .asciz "EGA" | ||
2030 | |||
2031 | svga_name: .ascii " " | ||
2032 | |||
2033 | vga_name: .asciz "VGA" | ||
2034 | |||
2035 | vesa_name: .asciz "VESA" | ||
2036 | |||
2037 | name_bann: .asciz "Video adapter: " | ||
2038 | #endif /* CONFIG_VIDEO_SELECT */ | ||
2039 | |||
2040 | # Other variables: | ||
2041 | adapter: .byte 0 # Video adapter: 0=CGA/MDA/HGA,1=EGA,2=VGA | ||
2042 | video_segment: .word 0xb800 # Video memory segment | ||
2043 | force_size: .word 0 # Use this size instead of the one in BIOS vars | ||
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index b26378815b9..941a7e3aa5f 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.21-rc3 | 3 | # Linux kernel version: 2.6.21-git3 |
4 | # Wed Mar 7 15:29:47 2007 | 4 | # Tue May 1 07:30:48 2007 |
5 | # | 5 | # |
6 | CONFIG_X86_64=y | 6 | CONFIG_X86_64=y |
7 | CONFIG_64BIT=y | 7 | CONFIG_64BIT=y |
@@ -118,11 +118,11 @@ CONFIG_X86_PC=y | |||
118 | # CONFIG_X86_VSMP is not set | 118 | # CONFIG_X86_VSMP is not set |
119 | # CONFIG_MK8 is not set | 119 | # CONFIG_MK8 is not set |
120 | # CONFIG_MPSC is not set | 120 | # CONFIG_MPSC is not set |
121 | # CONFIG_MCORE2 is not set | 121 | CONFIG_MCORE2=y |
122 | CONFIG_GENERIC_CPU=y | 122 | # CONFIG_GENERIC_CPU is not set |
123 | CONFIG_X86_L1_CACHE_BYTES=128 | 123 | CONFIG_X86_L1_CACHE_BYTES=64 |
124 | CONFIG_X86_L1_CACHE_SHIFT=7 | 124 | CONFIG_X86_L1_CACHE_SHIFT=6 |
125 | CONFIG_X86_INTERNODE_CACHE_BYTES=128 | 125 | CONFIG_X86_INTERNODE_CACHE_BYTES=64 |
126 | CONFIG_X86_TSC=y | 126 | CONFIG_X86_TSC=y |
127 | CONFIG_X86_GOOD_APIC=y | 127 | CONFIG_X86_GOOD_APIC=y |
128 | # CONFIG_MICROCODE is not set | 128 | # CONFIG_MICROCODE is not set |
@@ -174,6 +174,7 @@ CONFIG_X86_MCE_INTEL=y | |||
174 | CONFIG_X86_MCE_AMD=y | 174 | CONFIG_X86_MCE_AMD=y |
175 | # CONFIG_KEXEC is not set | 175 | # CONFIG_KEXEC is not set |
176 | # CONFIG_CRASH_DUMP is not set | 176 | # CONFIG_CRASH_DUMP is not set |
177 | # CONFIG_RELOCATABLE is not set | ||
177 | CONFIG_PHYSICAL_START=0x200000 | 178 | CONFIG_PHYSICAL_START=0x200000 |
178 | CONFIG_SECCOMP=y | 179 | CONFIG_SECCOMP=y |
179 | # CONFIG_CC_STACKPROTECTOR is not set | 180 | # CONFIG_CC_STACKPROTECTOR is not set |
@@ -182,7 +183,6 @@ CONFIG_HZ_250=y | |||
182 | # CONFIG_HZ_300 is not set | 183 | # CONFIG_HZ_300 is not set |
183 | # CONFIG_HZ_1000 is not set | 184 | # CONFIG_HZ_1000 is not set |
184 | CONFIG_HZ=250 | 185 | CONFIG_HZ=250 |
185 | # CONFIG_REORDER is not set | ||
186 | CONFIG_K8_NB=y | 186 | CONFIG_K8_NB=y |
187 | CONFIG_GENERIC_HARDIRQS=y | 187 | CONFIG_GENERIC_HARDIRQS=y |
188 | CONFIG_GENERIC_IRQ_PROBE=y | 188 | CONFIG_GENERIC_IRQ_PROBE=y |
@@ -218,7 +218,6 @@ CONFIG_ACPI_HOTPLUG_CPU=y | |||
218 | CONFIG_ACPI_THERMAL=y | 218 | CONFIG_ACPI_THERMAL=y |
219 | CONFIG_ACPI_NUMA=y | 219 | CONFIG_ACPI_NUMA=y |
220 | # CONFIG_ACPI_ASUS is not set | 220 | # CONFIG_ACPI_ASUS is not set |
221 | # CONFIG_ACPI_IBM is not set | ||
222 | # CONFIG_ACPI_TOSHIBA is not set | 221 | # CONFIG_ACPI_TOSHIBA is not set |
223 | CONFIG_ACPI_BLACKLIST_YEAR=0 | 222 | CONFIG_ACPI_BLACKLIST_YEAR=0 |
224 | # CONFIG_ACPI_DEBUG is not set | 223 | # CONFIG_ACPI_DEBUG is not set |
@@ -243,7 +242,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=y | |||
243 | # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set | 242 | # CONFIG_CPU_FREQ_GOV_POWERSAVE is not set |
244 | CONFIG_CPU_FREQ_GOV_USERSPACE=y | 243 | CONFIG_CPU_FREQ_GOV_USERSPACE=y |
245 | CONFIG_CPU_FREQ_GOV_ONDEMAND=y | 244 | CONFIG_CPU_FREQ_GOV_ONDEMAND=y |
246 | # CONFIG_CPU_FREQ_GOV_CONSERVATIVE is not set | 245 | CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y |
247 | 246 | ||
248 | # | 247 | # |
249 | # CPUFreq processor drivers | 248 | # CPUFreq processor drivers |
@@ -299,7 +298,6 @@ CONFIG_NET=y | |||
299 | # | 298 | # |
300 | # Networking options | 299 | # Networking options |
301 | # | 300 | # |
302 | # CONFIG_NETDEBUG is not set | ||
303 | CONFIG_PACKET=y | 301 | CONFIG_PACKET=y |
304 | # CONFIG_PACKET_MMAP is not set | 302 | # CONFIG_PACKET_MMAP is not set |
305 | CONFIG_UNIX=y | 303 | CONFIG_UNIX=y |
@@ -334,6 +332,7 @@ CONFIG_DEFAULT_TCP_CONG="cubic" | |||
334 | CONFIG_IPV6=y | 332 | CONFIG_IPV6=y |
335 | # CONFIG_IPV6_PRIVACY is not set | 333 | # CONFIG_IPV6_PRIVACY is not set |
336 | # CONFIG_IPV6_ROUTER_PREF is not set | 334 | # CONFIG_IPV6_ROUTER_PREF is not set |
335 | # CONFIG_IPV6_OPTIMISTIC_DAD is not set | ||
337 | # CONFIG_INET6_AH is not set | 336 | # CONFIG_INET6_AH is not set |
338 | # CONFIG_INET6_ESP is not set | 337 | # CONFIG_INET6_ESP is not set |
339 | # CONFIG_INET6_IPCOMP is not set | 338 | # CONFIG_INET6_IPCOMP is not set |
@@ -389,6 +388,13 @@ CONFIG_IPV6_SIT=y | |||
389 | # CONFIG_HAMRADIO is not set | 388 | # CONFIG_HAMRADIO is not set |
390 | # CONFIG_IRDA is not set | 389 | # CONFIG_IRDA is not set |
391 | # CONFIG_BT is not set | 390 | # CONFIG_BT is not set |
391 | # CONFIG_AF_RXRPC is not set | ||
392 | |||
393 | # | ||
394 | # Wireless | ||
395 | # | ||
396 | # CONFIG_CFG80211 is not set | ||
397 | # CONFIG_WIRELESS_EXT is not set | ||
392 | # CONFIG_IEEE80211 is not set | 398 | # CONFIG_IEEE80211 is not set |
393 | 399 | ||
394 | # | 400 | # |
@@ -409,10 +415,6 @@ CONFIG_FW_LOADER=y | |||
409 | # Connector - unified userspace <-> kernelspace linker | 415 | # Connector - unified userspace <-> kernelspace linker |
410 | # | 416 | # |
411 | # CONFIG_CONNECTOR is not set | 417 | # CONFIG_CONNECTOR is not set |
412 | |||
413 | # | ||
414 | # Memory Technology Devices (MTD) | ||
415 | # | ||
416 | # CONFIG_MTD is not set | 418 | # CONFIG_MTD is not set |
417 | 419 | ||
418 | # | 420 | # |
@@ -459,6 +461,7 @@ CONFIG_BLK_DEV_RAM_BLOCKSIZE=1024 | |||
459 | # CONFIG_SGI_IOC4 is not set | 461 | # CONFIG_SGI_IOC4 is not set |
460 | # CONFIG_TIFM_CORE is not set | 462 | # CONFIG_TIFM_CORE is not set |
461 | # CONFIG_SONY_LAPTOP is not set | 463 | # CONFIG_SONY_LAPTOP is not set |
464 | # CONFIG_THINKPAD_ACPI is not set | ||
462 | 465 | ||
463 | # | 466 | # |
464 | # ATA/ATAPI/MFM/RLL support | 467 | # ATA/ATAPI/MFM/RLL support |
@@ -494,7 +497,6 @@ CONFIG_BLK_DEV_IDEPCI=y | |||
494 | # CONFIG_BLK_DEV_RZ1000 is not set | 497 | # CONFIG_BLK_DEV_RZ1000 is not set |
495 | CONFIG_BLK_DEV_IDEDMA_PCI=y | 498 | CONFIG_BLK_DEV_IDEDMA_PCI=y |
496 | # CONFIG_BLK_DEV_IDEDMA_FORCED is not set | 499 | # CONFIG_BLK_DEV_IDEDMA_FORCED is not set |
497 | CONFIG_IDEDMA_PCI_AUTO=y | ||
498 | # CONFIG_IDEDMA_ONLYDISK is not set | 500 | # CONFIG_IDEDMA_ONLYDISK is not set |
499 | # CONFIG_BLK_DEV_AEC62XX is not set | 501 | # CONFIG_BLK_DEV_AEC62XX is not set |
500 | # CONFIG_BLK_DEV_ALI15X3 is not set | 502 | # CONFIG_BLK_DEV_ALI15X3 is not set |
@@ -525,7 +527,6 @@ CONFIG_BLK_DEV_PDC202XX_NEW=y | |||
525 | # CONFIG_IDE_ARM is not set | 527 | # CONFIG_IDE_ARM is not set |
526 | CONFIG_BLK_DEV_IDEDMA=y | 528 | CONFIG_BLK_DEV_IDEDMA=y |
527 | # CONFIG_IDEDMA_IVB is not set | 529 | # CONFIG_IDEDMA_IVB is not set |
528 | CONFIG_IDEDMA_AUTO=y | ||
529 | # CONFIG_BLK_DEV_HD is not set | 530 | # CONFIG_BLK_DEV_HD is not set |
530 | 531 | ||
531 | # | 532 | # |
@@ -584,11 +585,9 @@ CONFIG_AIC79XX_DEBUG_MASK=0 | |||
584 | # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set | 585 | # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set |
585 | # CONFIG_SCSI_AIC94XX is not set | 586 | # CONFIG_SCSI_AIC94XX is not set |
586 | # CONFIG_SCSI_ARCMSR is not set | 587 | # CONFIG_SCSI_ARCMSR is not set |
587 | CONFIG_MEGARAID_NEWGEN=y | 588 | # CONFIG_MEGARAID_NEWGEN is not set |
588 | CONFIG_MEGARAID_MM=y | ||
589 | CONFIG_MEGARAID_MAILBOX=y | ||
590 | # CONFIG_MEGARAID_LEGACY is not set | 589 | # CONFIG_MEGARAID_LEGACY is not set |
591 | CONFIG_MEGARAID_SAS=y | 590 | # CONFIG_MEGARAID_SAS is not set |
592 | # CONFIG_SCSI_HPTIOP is not set | 591 | # CONFIG_SCSI_HPTIOP is not set |
593 | # CONFIG_SCSI_BUSLOGIC is not set | 592 | # CONFIG_SCSI_BUSLOGIC is not set |
594 | # CONFIG_SCSI_DMX3191D is not set | 593 | # CONFIG_SCSI_DMX3191D is not set |
@@ -608,6 +607,7 @@ CONFIG_MEGARAID_SAS=y | |||
608 | # CONFIG_SCSI_DC395x is not set | 607 | # CONFIG_SCSI_DC395x is not set |
609 | # CONFIG_SCSI_DC390T is not set | 608 | # CONFIG_SCSI_DC390T is not set |
610 | # CONFIG_SCSI_DEBUG is not set | 609 | # CONFIG_SCSI_DEBUG is not set |
610 | # CONFIG_SCSI_ESP_CORE is not set | ||
611 | # CONFIG_SCSI_SRP is not set | 611 | # CONFIG_SCSI_SRP is not set |
612 | 612 | ||
613 | # | 613 | # |
@@ -636,6 +636,7 @@ CONFIG_SATA_ACPI=y | |||
636 | # CONFIG_PATA_AMD is not set | 636 | # CONFIG_PATA_AMD is not set |
637 | # CONFIG_PATA_ARTOP is not set | 637 | # CONFIG_PATA_ARTOP is not set |
638 | # CONFIG_PATA_ATIIXP is not set | 638 | # CONFIG_PATA_ATIIXP is not set |
639 | # CONFIG_PATA_CMD640_PCI is not set | ||
639 | # CONFIG_PATA_CMD64X is not set | 640 | # CONFIG_PATA_CMD64X is not set |
640 | # CONFIG_PATA_CS5520 is not set | 641 | # CONFIG_PATA_CS5520 is not set |
641 | # CONFIG_PATA_CS5530 is not set | 642 | # CONFIG_PATA_CS5530 is not set |
@@ -687,7 +688,7 @@ CONFIG_BLK_DEV_DM=y | |||
687 | CONFIG_FUSION=y | 688 | CONFIG_FUSION=y |
688 | CONFIG_FUSION_SPI=y | 689 | CONFIG_FUSION_SPI=y |
689 | # CONFIG_FUSION_FC is not set | 690 | # CONFIG_FUSION_FC is not set |
690 | CONFIG_FUSION_SAS=y | 691 | # CONFIG_FUSION_SAS is not set |
691 | CONFIG_FUSION_MAX_SGE=128 | 692 | CONFIG_FUSION_MAX_SGE=128 |
692 | # CONFIG_FUSION_CTL is not set | 693 | # CONFIG_FUSION_CTL is not set |
693 | 694 | ||
@@ -700,19 +701,22 @@ CONFIG_IEEE1394=y | |||
700 | # Subsystem Options | 701 | # Subsystem Options |
701 | # | 702 | # |
702 | # CONFIG_IEEE1394_VERBOSEDEBUG is not set | 703 | # CONFIG_IEEE1394_VERBOSEDEBUG is not set |
703 | # CONFIG_IEEE1394_EXTRA_CONFIG_ROMS is not set | ||
704 | 704 | ||
705 | # | 705 | # |
706 | # Device Drivers | 706 | # Controllers |
707 | # | ||
708 | |||
709 | # | ||
710 | # Texas Instruments PCILynx requires I2C | ||
707 | # | 711 | # |
708 | # CONFIG_IEEE1394_PCILYNX is not set | ||
709 | CONFIG_IEEE1394_OHCI1394=y | 712 | CONFIG_IEEE1394_OHCI1394=y |
710 | 713 | ||
711 | # | 714 | # |
712 | # Protocol Drivers | 715 | # Protocols |
713 | # | 716 | # |
714 | # CONFIG_IEEE1394_VIDEO1394 is not set | 717 | # CONFIG_IEEE1394_VIDEO1394 is not set |
715 | # CONFIG_IEEE1394_SBP2 is not set | 718 | # CONFIG_IEEE1394_SBP2 is not set |
719 | # CONFIG_IEEE1394_ETH1394_ROM_ENTRY is not set | ||
716 | # CONFIG_IEEE1394_ETH1394 is not set | 720 | # CONFIG_IEEE1394_ETH1394 is not set |
717 | # CONFIG_IEEE1394_DV1394 is not set | 721 | # CONFIG_IEEE1394_DV1394 is not set |
718 | CONFIG_IEEE1394_RAWIO=y | 722 | CONFIG_IEEE1394_RAWIO=y |
@@ -775,7 +779,8 @@ CONFIG_TULIP=y | |||
775 | # CONFIG_HP100 is not set | 779 | # CONFIG_HP100 is not set |
776 | CONFIG_NET_PCI=y | 780 | CONFIG_NET_PCI=y |
777 | # CONFIG_PCNET32 is not set | 781 | # CONFIG_PCNET32 is not set |
778 | # CONFIG_AMD8111_ETH is not set | 782 | CONFIG_AMD8111_ETH=y |
783 | # CONFIG_AMD8111E_NAPI is not set | ||
779 | # CONFIG_ADAPTEC_STARFIRE is not set | 784 | # CONFIG_ADAPTEC_STARFIRE is not set |
780 | CONFIG_B44=y | 785 | CONFIG_B44=y |
781 | CONFIG_FORCEDETH=y | 786 | CONFIG_FORCEDETH=y |
@@ -837,9 +842,10 @@ CONFIG_S2IO=m | |||
837 | # CONFIG_TR is not set | 842 | # CONFIG_TR is not set |
838 | 843 | ||
839 | # | 844 | # |
840 | # Wireless LAN (non-hamradio) | 845 | # Wireless LAN |
841 | # | 846 | # |
842 | # CONFIG_NET_RADIO is not set | 847 | # CONFIG_WLAN_PRE80211 is not set |
848 | # CONFIG_WLAN_80211 is not set | ||
843 | 849 | ||
844 | # | 850 | # |
845 | # Wan interfaces | 851 | # Wan interfaces |
@@ -853,7 +859,6 @@ CONFIG_S2IO=m | |||
853 | # CONFIG_SHAPER is not set | 859 | # CONFIG_SHAPER is not set |
854 | CONFIG_NETCONSOLE=y | 860 | CONFIG_NETCONSOLE=y |
855 | CONFIG_NETPOLL=y | 861 | CONFIG_NETPOLL=y |
856 | # CONFIG_NETPOLL_RX is not set | ||
857 | # CONFIG_NETPOLL_TRAP is not set | 862 | # CONFIG_NETPOLL_TRAP is not set |
858 | CONFIG_NET_POLL_CONTROLLER=y | 863 | CONFIG_NET_POLL_CONTROLLER=y |
859 | 864 | ||
@@ -987,57 +992,7 @@ CONFIG_HPET_MMAP=y | |||
987 | # | 992 | # |
988 | # I2C support | 993 | # I2C support |
989 | # | 994 | # |
990 | CONFIG_I2C=m | 995 | # CONFIG_I2C is not set |
991 | CONFIG_I2C_CHARDEV=m | ||
992 | |||
993 | # | ||
994 | # I2C Algorithms | ||
995 | # | ||
996 | # CONFIG_I2C_ALGOBIT is not set | ||
997 | # CONFIG_I2C_ALGOPCF is not set | ||
998 | # CONFIG_I2C_ALGOPCA is not set | ||
999 | |||
1000 | # | ||
1001 | # I2C Hardware Bus support | ||
1002 | # | ||
1003 | # CONFIG_I2C_ALI1535 is not set | ||
1004 | # CONFIG_I2C_ALI1563 is not set | ||
1005 | # CONFIG_I2C_ALI15X3 is not set | ||
1006 | # CONFIG_I2C_AMD756 is not set | ||
1007 | # CONFIG_I2C_AMD8111 is not set | ||
1008 | # CONFIG_I2C_I801 is not set | ||
1009 | # CONFIG_I2C_I810 is not set | ||
1010 | # CONFIG_I2C_PIIX4 is not set | ||
1011 | CONFIG_I2C_ISA=m | ||
1012 | # CONFIG_I2C_NFORCE2 is not set | ||
1013 | # CONFIG_I2C_OCORES is not set | ||
1014 | # CONFIG_I2C_PARPORT_LIGHT is not set | ||
1015 | # CONFIG_I2C_PASEMI is not set | ||
1016 | # CONFIG_I2C_PROSAVAGE is not set | ||
1017 | # CONFIG_I2C_SAVAGE4 is not set | ||
1018 | # CONFIG_I2C_SIS5595 is not set | ||
1019 | # CONFIG_I2C_SIS630 is not set | ||
1020 | # CONFIG_I2C_SIS96X is not set | ||
1021 | # CONFIG_I2C_STUB is not set | ||
1022 | # CONFIG_I2C_VIA is not set | ||
1023 | # CONFIG_I2C_VIAPRO is not set | ||
1024 | # CONFIG_I2C_VOODOO3 is not set | ||
1025 | # CONFIG_I2C_PCA_ISA is not set | ||
1026 | |||
1027 | # | ||
1028 | # Miscellaneous I2C Chip support | ||
1029 | # | ||
1030 | # CONFIG_SENSORS_DS1337 is not set | ||
1031 | # CONFIG_SENSORS_DS1374 is not set | ||
1032 | # CONFIG_SENSORS_EEPROM is not set | ||
1033 | # CONFIG_SENSORS_PCF8574 is not set | ||
1034 | # CONFIG_SENSORS_PCA9539 is not set | ||
1035 | # CONFIG_SENSORS_PCF8591 is not set | ||
1036 | # CONFIG_SENSORS_MAX6875 is not set | ||
1037 | # CONFIG_I2C_DEBUG_CORE is not set | ||
1038 | # CONFIG_I2C_DEBUG_ALGO is not set | ||
1039 | # CONFIG_I2C_DEBUG_BUS is not set | ||
1040 | # CONFIG_I2C_DEBUG_CHIP is not set | ||
1041 | 996 | ||
1042 | # | 997 | # |
1043 | # SPI support | 998 | # SPI support |
@@ -1053,54 +1008,8 @@ CONFIG_I2C_ISA=m | |||
1053 | # | 1008 | # |
1054 | # Hardware Monitoring support | 1009 | # Hardware Monitoring support |
1055 | # | 1010 | # |
1056 | CONFIG_HWMON=y | 1011 | # CONFIG_HWMON is not set |
1057 | # CONFIG_HWMON_VID is not set | 1012 | # CONFIG_HWMON_VID is not set |
1058 | # CONFIG_SENSORS_ABITUGURU is not set | ||
1059 | # CONFIG_SENSORS_ADM1021 is not set | ||
1060 | # CONFIG_SENSORS_ADM1025 is not set | ||
1061 | # CONFIG_SENSORS_ADM1026 is not set | ||
1062 | # CONFIG_SENSORS_ADM1029 is not set | ||
1063 | # CONFIG_SENSORS_ADM1031 is not set | ||
1064 | # CONFIG_SENSORS_ADM9240 is not set | ||
1065 | # CONFIG_SENSORS_K8TEMP is not set | ||
1066 | # CONFIG_SENSORS_ASB100 is not set | ||
1067 | # CONFIG_SENSORS_ATXP1 is not set | ||
1068 | # CONFIG_SENSORS_DS1621 is not set | ||
1069 | # CONFIG_SENSORS_F71805F is not set | ||
1070 | # CONFIG_SENSORS_FSCHER is not set | ||
1071 | # CONFIG_SENSORS_FSCPOS is not set | ||
1072 | # CONFIG_SENSORS_GL518SM is not set | ||
1073 | # CONFIG_SENSORS_GL520SM is not set | ||
1074 | # CONFIG_SENSORS_IT87 is not set | ||
1075 | # CONFIG_SENSORS_LM63 is not set | ||
1076 | # CONFIG_SENSORS_LM75 is not set | ||
1077 | # CONFIG_SENSORS_LM77 is not set | ||
1078 | # CONFIG_SENSORS_LM78 is not set | ||
1079 | # CONFIG_SENSORS_LM80 is not set | ||
1080 | # CONFIG_SENSORS_LM83 is not set | ||
1081 | # CONFIG_SENSORS_LM85 is not set | ||
1082 | # CONFIG_SENSORS_LM87 is not set | ||
1083 | # CONFIG_SENSORS_LM90 is not set | ||
1084 | # CONFIG_SENSORS_LM92 is not set | ||
1085 | # CONFIG_SENSORS_MAX1619 is not set | ||
1086 | # CONFIG_SENSORS_PC87360 is not set | ||
1087 | # CONFIG_SENSORS_PC87427 is not set | ||
1088 | # CONFIG_SENSORS_SIS5595 is not set | ||
1089 | # CONFIG_SENSORS_SMSC47M1 is not set | ||
1090 | # CONFIG_SENSORS_SMSC47M192 is not set | ||
1091 | CONFIG_SENSORS_SMSC47B397=m | ||
1092 | # CONFIG_SENSORS_VIA686A is not set | ||
1093 | # CONFIG_SENSORS_VT1211 is not set | ||
1094 | # CONFIG_SENSORS_VT8231 is not set | ||
1095 | # CONFIG_SENSORS_W83781D is not set | ||
1096 | # CONFIG_SENSORS_W83791D is not set | ||
1097 | # CONFIG_SENSORS_W83792D is not set | ||
1098 | # CONFIG_SENSORS_W83793 is not set | ||
1099 | # CONFIG_SENSORS_W83L785TS is not set | ||
1100 | # CONFIG_SENSORS_W83627HF is not set | ||
1101 | # CONFIG_SENSORS_W83627EHF is not set | ||
1102 | # CONFIG_SENSORS_HDAPS is not set | ||
1103 | # CONFIG_HWMON_DEBUG_CHIP is not set | ||
1104 | 1013 | ||
1105 | # | 1014 | # |
1106 | # Multifunction device drivers | 1015 | # Multifunction device drivers |
@@ -1147,8 +1056,9 @@ CONFIG_SOUND=y | |||
1147 | # Open Sound System | 1056 | # Open Sound System |
1148 | # | 1057 | # |
1149 | CONFIG_SOUND_PRIME=y | 1058 | CONFIG_SOUND_PRIME=y |
1150 | # CONFIG_OBSOLETE_OSS is not set | 1059 | CONFIG_OBSOLETE_OSS=y |
1151 | # CONFIG_SOUND_BT878 is not set | 1060 | # CONFIG_SOUND_BT878 is not set |
1061 | # CONFIG_SOUND_ES1371 is not set | ||
1152 | CONFIG_SOUND_ICH=y | 1062 | CONFIG_SOUND_ICH=y |
1153 | # CONFIG_SOUND_TRIDENT is not set | 1063 | # CONFIG_SOUND_TRIDENT is not set |
1154 | # CONFIG_SOUND_MSNDCLAS is not set | 1064 | # CONFIG_SOUND_MSNDCLAS is not set |
@@ -1163,6 +1073,14 @@ CONFIG_HID=y | |||
1163 | # CONFIG_HID_DEBUG is not set | 1073 | # CONFIG_HID_DEBUG is not set |
1164 | 1074 | ||
1165 | # | 1075 | # |
1076 | # USB Input Devices | ||
1077 | # | ||
1078 | CONFIG_USB_HID=y | ||
1079 | # CONFIG_USB_HIDINPUT_POWERBOOK is not set | ||
1080 | # CONFIG_HID_FF is not set | ||
1081 | # CONFIG_USB_HIDDEV is not set | ||
1082 | |||
1083 | # | ||
1166 | # USB support | 1084 | # USB support |
1167 | # | 1085 | # |
1168 | CONFIG_USB_ARCH_HAS_HCD=y | 1086 | CONFIG_USB_ARCH_HAS_HCD=y |
@@ -1175,6 +1093,7 @@ CONFIG_USB=y | |||
1175 | # Miscellaneous USB options | 1093 | # Miscellaneous USB options |
1176 | # | 1094 | # |
1177 | CONFIG_USB_DEVICEFS=y | 1095 | CONFIG_USB_DEVICEFS=y |
1096 | # CONFIG_USB_DEVICE_CLASS is not set | ||
1178 | # CONFIG_USB_DYNAMIC_MINORS is not set | 1097 | # CONFIG_USB_DYNAMIC_MINORS is not set |
1179 | # CONFIG_USB_SUSPEND is not set | 1098 | # CONFIG_USB_SUSPEND is not set |
1180 | # CONFIG_USB_OTG is not set | 1099 | # CONFIG_USB_OTG is not set |
@@ -1225,10 +1144,6 @@ CONFIG_USB_STORAGE=y | |||
1225 | # | 1144 | # |
1226 | # USB Input Devices | 1145 | # USB Input Devices |
1227 | # | 1146 | # |
1228 | CONFIG_USB_HID=y | ||
1229 | # CONFIG_USB_HIDINPUT_POWERBOOK is not set | ||
1230 | # CONFIG_HID_FF is not set | ||
1231 | # CONFIG_USB_HIDDEV is not set | ||
1232 | # CONFIG_USB_AIPTEK is not set | 1147 | # CONFIG_USB_AIPTEK is not set |
1233 | # CONFIG_USB_WACOM is not set | 1148 | # CONFIG_USB_WACOM is not set |
1234 | # CONFIG_USB_ACECAD is not set | 1149 | # CONFIG_USB_ACECAD is not set |
@@ -1556,7 +1471,7 @@ CONFIG_DEBUG_KERNEL=y | |||
1556 | CONFIG_LOG_BUF_SHIFT=18 | 1471 | CONFIG_LOG_BUF_SHIFT=18 |
1557 | CONFIG_DETECT_SOFTLOCKUP=y | 1472 | CONFIG_DETECT_SOFTLOCKUP=y |
1558 | # CONFIG_SCHEDSTATS is not set | 1473 | # CONFIG_SCHEDSTATS is not set |
1559 | # CONFIG_TIMER_STATS is not set | 1474 | CONFIG_TIMER_STATS=y |
1560 | # CONFIG_DEBUG_SLAB is not set | 1475 | # CONFIG_DEBUG_SLAB is not set |
1561 | # CONFIG_DEBUG_RT_MUTEXES is not set | 1476 | # CONFIG_DEBUG_RT_MUTEXES is not set |
1562 | # CONFIG_RT_MUTEX_TESTER is not set | 1477 | # CONFIG_RT_MUTEX_TESTER is not set |
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index 071100ea125..185399baaf6 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c | |||
@@ -5,6 +5,11 @@ | |||
5 | * This tricks binfmt_elf.c into loading 32bit binaries using lots | 5 | * This tricks binfmt_elf.c into loading 32bit binaries using lots |
6 | * of ugly preprocessor tricks. Talk about very very poor man's inheritance. | 6 | * of ugly preprocessor tricks. Talk about very very poor man's inheritance. |
7 | */ | 7 | */ |
8 | #define __ASM_X86_64_ELF_H 1 | ||
9 | |||
10 | #undef ELF_CLASS | ||
11 | #define ELF_CLASS ELFCLASS32 | ||
12 | |||
8 | #include <linux/types.h> | 13 | #include <linux/types.h> |
9 | #include <linux/stddef.h> | 14 | #include <linux/stddef.h> |
10 | #include <linux/rwsem.h> | 15 | #include <linux/rwsem.h> |
@@ -50,9 +55,6 @@ struct elf_phdr; | |||
50 | #undef ELF_ARCH | 55 | #undef ELF_ARCH |
51 | #define ELF_ARCH EM_386 | 56 | #define ELF_ARCH EM_386 |
52 | 57 | ||
53 | #undef ELF_CLASS | ||
54 | #define ELF_CLASS ELFCLASS32 | ||
55 | |||
56 | #define ELF_DATA ELFDATA2LSB | 58 | #define ELF_DATA ELFDATA2LSB |
57 | 59 | ||
58 | #define USE_ELF_CORE_DUMP 1 | 60 | #define USE_ELF_CORE_DUMP 1 |
@@ -136,7 +138,7 @@ struct elf_prpsinfo | |||
136 | 138 | ||
137 | #define user user32 | 139 | #define user user32 |
138 | 140 | ||
139 | #define __ASM_X86_64_ELF_H 1 | 141 | #undef elf_read_implies_exec |
140 | #define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) | 142 | #define elf_read_implies_exec(ex, executable_stack) (executable_stack != EXSTACK_DISABLE_X) |
141 | //#include <asm/ia32.h> | 143 | //#include <asm/ia32.h> |
142 | #include <linux/elf.h> | 144 | #include <linux/elf.h> |
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index 796df6992f6..c48087db6f7 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S | |||
@@ -481,11 +481,7 @@ ia32_sys_call_table: | |||
481 | .quad sys_symlink | 481 | .quad sys_symlink |
482 | .quad sys_lstat | 482 | .quad sys_lstat |
483 | .quad sys_readlink /* 85 */ | 483 | .quad sys_readlink /* 85 */ |
484 | #ifdef CONFIG_IA32_AOUT | ||
485 | .quad sys_uselib | 484 | .quad sys_uselib |
486 | #else | ||
487 | .quad quiet_ni_syscall | ||
488 | #endif | ||
489 | .quad sys_swapon | 485 | .quad sys_swapon |
490 | .quad sys_reboot | 486 | .quad sys_reboot |
491 | .quad compat_sys_old_readdir | 487 | .quad compat_sys_old_readdir |
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c index 568ff0df89e..fc4419ff035 100644 --- a/arch/x86_64/ia32/syscall32.c +++ b/arch/x86_64/ia32/syscall32.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/proto.h> | 13 | #include <asm/proto.h> |
14 | #include <asm/tlbflush.h> | 14 | #include <asm/tlbflush.h> |
15 | #include <asm/ia32_unistd.h> | 15 | #include <asm/ia32_unistd.h> |
16 | #include <asm/vsyscall32.h> | ||
16 | 17 | ||
17 | extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; | 18 | extern unsigned char syscall32_syscall[], syscall32_syscall_end[]; |
18 | extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; | 19 | extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[]; |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index bb47e86f3d0..4d94c51803d 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -8,7 +8,8 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ | 8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ |
9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ | 9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ |
10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ | 10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ |
11 | pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o | 11 | pci-dma.o pci-nommu.o alternative.o hpet.o tsc.o bugs.o \ |
12 | perfctr-watchdog.o | ||
12 | 13 | ||
13 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 14 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
14 | obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o | 15 | obj-$(CONFIG_X86_MCE) += mce.o therm_throt.o |
@@ -21,8 +22,7 @@ obj-$(CONFIG_MICROCODE) += microcode.o | |||
21 | obj-$(CONFIG_X86_CPUID) += cpuid.o | 22 | obj-$(CONFIG_X86_CPUID) += cpuid.o |
22 | obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o | 23 | obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o tsc_sync.o |
23 | obj-y += apic.o nmi.o | 24 | obj-y += apic.o nmi.o |
24 | obj-y += io_apic.o mpparse.o \ | 25 | obj-y += io_apic.o mpparse.o genapic.o genapic_flat.o |
25 | genapic.o genapic_cluster.o genapic_flat.o | ||
26 | obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o | 26 | obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o |
27 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | 27 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o |
28 | obj-$(CONFIG_PM) += suspend.o | 28 | obj-$(CONFIG_PM) += suspend.o |
@@ -58,3 +58,4 @@ i8237-y += ../../i386/kernel/i8237.o | |||
58 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o | 58 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o |
59 | alternative-y += ../../i386/kernel/alternative.o | 59 | alternative-y += ../../i386/kernel/alternative.o |
60 | pcspeaker-y += ../../i386/kernel/pcspeaker.o | 60 | pcspeaker-y += ../../i386/kernel/pcspeaker.o |
61 | perfctr-watchdog-y += ../../i386/kernel/cpu/perfctr-watchdog.o | ||
diff --git a/arch/x86_64/kernel/acpi/sleep.c b/arch/x86_64/kernel/acpi/sleep.c index e1548fbe95a..195b7034a14 100644 --- a/arch/x86_64/kernel/acpi/sleep.c +++ b/arch/x86_64/kernel/acpi/sleep.c | |||
@@ -60,19 +60,6 @@ extern char wakeup_start, wakeup_end; | |||
60 | 60 | ||
61 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); | 61 | extern unsigned long acpi_copy_wakeup_routine(unsigned long); |
62 | 62 | ||
63 | static pgd_t low_ptr; | ||
64 | |||
65 | static void init_low_mapping(void) | ||
66 | { | ||
67 | pgd_t *slot0 = pgd_offset(current->mm, 0UL); | ||
68 | low_ptr = *slot0; | ||
69 | /* FIXME: We're playing with the current task's page tables here, which | ||
70 | * is potentially dangerous on SMP systems. | ||
71 | */ | ||
72 | set_pgd(slot0, *pgd_offset(current->mm, PAGE_OFFSET)); | ||
73 | local_flush_tlb(); | ||
74 | } | ||
75 | |||
76 | /** | 63 | /** |
77 | * acpi_save_state_mem - save kernel state | 64 | * acpi_save_state_mem - save kernel state |
78 | * | 65 | * |
@@ -81,8 +68,6 @@ static void init_low_mapping(void) | |||
81 | */ | 68 | */ |
82 | int acpi_save_state_mem(void) | 69 | int acpi_save_state_mem(void) |
83 | { | 70 | { |
84 | init_low_mapping(); | ||
85 | |||
86 | memcpy((void *)acpi_wakeup_address, &wakeup_start, | 71 | memcpy((void *)acpi_wakeup_address, &wakeup_start, |
87 | &wakeup_end - &wakeup_start); | 72 | &wakeup_end - &wakeup_start); |
88 | acpi_copy_wakeup_routine(acpi_wakeup_address); | 73 | acpi_copy_wakeup_routine(acpi_wakeup_address); |
@@ -95,8 +80,6 @@ int acpi_save_state_mem(void) | |||
95 | */ | 80 | */ |
96 | void acpi_restore_state_mem(void) | 81 | void acpi_restore_state_mem(void) |
97 | { | 82 | { |
98 | set_pgd(pgd_offset(current->mm, 0UL), low_ptr); | ||
99 | local_flush_tlb(); | ||
100 | } | 83 | } |
101 | 84 | ||
102 | /** | 85 | /** |
@@ -109,10 +92,11 @@ void acpi_restore_state_mem(void) | |||
109 | */ | 92 | */ |
110 | void __init acpi_reserve_bootmem(void) | 93 | void __init acpi_reserve_bootmem(void) |
111 | { | 94 | { |
112 | acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE); | 95 | acpi_wakeup_address = (unsigned long)alloc_bootmem_low(PAGE_SIZE*2); |
113 | if ((&wakeup_end - &wakeup_start) > PAGE_SIZE) | 96 | if ((&wakeup_end - &wakeup_start) > (PAGE_SIZE*2)) |
114 | printk(KERN_CRIT | 97 | printk(KERN_CRIT |
115 | "ACPI: Wakeup code way too big, will crash on attempt to suspend\n"); | 98 | "ACPI: Wakeup code way too big, will crash on attempt" |
99 | " to suspend\n"); | ||
116 | } | 100 | } |
117 | 101 | ||
118 | static int __init acpi_sleep_setup(char *str) | 102 | static int __init acpi_sleep_setup(char *str) |
diff --git a/arch/x86_64/kernel/acpi/wakeup.S b/arch/x86_64/kernel/acpi/wakeup.S index 185faa911db..8550a6ffa27 100644 --- a/arch/x86_64/kernel/acpi/wakeup.S +++ b/arch/x86_64/kernel/acpi/wakeup.S | |||
@@ -1,6 +1,7 @@ | |||
1 | .text | 1 | .text |
2 | #include <linux/linkage.h> | 2 | #include <linux/linkage.h> |
3 | #include <asm/segment.h> | 3 | #include <asm/segment.h> |
4 | #include <asm/pgtable.h> | ||
4 | #include <asm/page.h> | 5 | #include <asm/page.h> |
5 | #include <asm/msr.h> | 6 | #include <asm/msr.h> |
6 | 7 | ||
@@ -30,22 +31,28 @@ wakeup_code: | |||
30 | cld | 31 | cld |
31 | # setup data segment | 32 | # setup data segment |
32 | movw %cs, %ax | 33 | movw %cs, %ax |
33 | movw %ax, %ds # Make ds:0 point to wakeup_start | 34 | movw %ax, %ds # Make ds:0 point to wakeup_start |
34 | movw %ax, %ss | 35 | movw %ax, %ss |
35 | mov $(wakeup_stack - wakeup_code), %sp # Private stack is needed for ASUS board | 36 | # Private stack is needed for ASUS board |
37 | mov $(wakeup_stack - wakeup_code), %sp | ||
36 | 38 | ||
37 | pushl $0 # Kill any dangerous flags | 39 | pushl $0 # Kill any dangerous flags |
38 | popfl | 40 | popfl |
39 | 41 | ||
40 | movl real_magic - wakeup_code, %eax | 42 | movl real_magic - wakeup_code, %eax |
41 | cmpl $0x12345678, %eax | 43 | cmpl $0x12345678, %eax |
42 | jne bogus_real_magic | 44 | jne bogus_real_magic |
43 | 45 | ||
46 | call verify_cpu # Verify the cpu supports long | ||
47 | # mode | ||
48 | testl %eax, %eax | ||
49 | jnz no_longmode | ||
50 | |||
44 | testl $1, video_flags - wakeup_code | 51 | testl $1, video_flags - wakeup_code |
45 | jz 1f | 52 | jz 1f |
46 | lcall $0xc000,$3 | 53 | lcall $0xc000,$3 |
47 | movw %cs, %ax | 54 | movw %cs, %ax |
48 | movw %ax, %ds # Bios might have played with that | 55 | movw %ax, %ds # Bios might have played with that |
49 | movw %ax, %ss | 56 | movw %ax, %ss |
50 | 1: | 57 | 1: |
51 | 58 | ||
@@ -61,12 +68,15 @@ wakeup_code: | |||
61 | 68 | ||
62 | movb $0xa2, %al ; outb %al, $0x80 | 69 | movb $0xa2, %al ; outb %al, $0x80 |
63 | 70 | ||
64 | lidt %ds:idt_48a - wakeup_code | 71 | mov %ds, %ax # Find 32bit wakeup_code addr |
65 | xorl %eax, %eax | 72 | movzx %ax, %esi # (Convert %ds:gdt to a liner ptr) |
66 | movw %ds, %ax # (Convert %ds:gdt to a linear ptr) | 73 | shll $4, %esi |
67 | shll $4, %eax | 74 | # Fix up the vectors |
68 | addl $(gdta - wakeup_code), %eax | 75 | addl %esi, wakeup_32_vector - wakeup_code |
69 | movl %eax, gdt_48a +2 - wakeup_code | 76 | addl %esi, wakeup_long64_vector - wakeup_code |
77 | addl %esi, gdt_48a + 2 - wakeup_code # Fixup the gdt pointer | ||
78 | |||
79 | lidtl %ds:idt_48a - wakeup_code | ||
70 | lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is | 80 | lgdtl %ds:gdt_48a - wakeup_code # load gdt with whatever is |
71 | # appropriate | 81 | # appropriate |
72 | 82 | ||
@@ -75,86 +85,63 @@ wakeup_code: | |||
75 | jmp 1f | 85 | jmp 1f |
76 | 1: | 86 | 1: |
77 | 87 | ||
78 | .byte 0x66, 0xea # prefix + jmpi-opcode | 88 | ljmpl *(wakeup_32_vector - wakeup_code) |
79 | .long wakeup_32 - __START_KERNEL_map | 89 | |
80 | .word __KERNEL_CS | 90 | .balign 4 |
91 | wakeup_32_vector: | ||
92 | .long wakeup_32 - wakeup_code | ||
93 | .word __KERNEL32_CS, 0 | ||
81 | 94 | ||
82 | .code32 | 95 | .code32 |
83 | wakeup_32: | 96 | wakeup_32: |
84 | # Running in this code, but at low address; paging is not yet turned on. | 97 | # Running in this code, but at low address; paging is not yet turned on. |
85 | movb $0xa5, %al ; outb %al, $0x80 | 98 | movb $0xa5, %al ; outb %al, $0x80 |
86 | 99 | ||
87 | /* Check if extended functions are implemented */ | 100 | movl $__KERNEL_DS, %eax |
88 | movl $0x80000000, %eax | 101 | movl %eax, %ds |
89 | cpuid | ||
90 | cmpl $0x80000000, %eax | ||
91 | jbe bogus_cpu | ||
92 | wbinvd | ||
93 | mov $0x80000001, %eax | ||
94 | cpuid | ||
95 | btl $29, %edx | ||
96 | jnc bogus_cpu | ||
97 | movl %edx,%edi | ||
98 | |||
99 | movw $__KERNEL_DS, %ax | ||
100 | movw %ax, %ds | ||
101 | movw %ax, %es | ||
102 | movw %ax, %fs | ||
103 | movw %ax, %gs | ||
104 | |||
105 | movw $__KERNEL_DS, %ax | ||
106 | movw %ax, %ss | ||
107 | 102 | ||
108 | mov $(wakeup_stack - __START_KERNEL_map), %esp | 103 | movw $0x0e00 + 'i', %ds:(0xb8012) |
109 | movl saved_magic - __START_KERNEL_map, %eax | 104 | movb $0xa8, %al ; outb %al, $0x80; |
110 | cmpl $0x9abcdef0, %eax | ||
111 | jne bogus_32_magic | ||
112 | 105 | ||
113 | /* | 106 | /* |
114 | * Prepare for entering 64bits mode | 107 | * Prepare for entering 64bits mode |
115 | */ | 108 | */ |
116 | 109 | ||
117 | /* Enable PAE mode and PGE */ | 110 | /* Enable PAE */ |
118 | xorl %eax, %eax | 111 | xorl %eax, %eax |
119 | btsl $5, %eax | 112 | btsl $5, %eax |
120 | btsl $7, %eax | ||
121 | movl %eax, %cr4 | 113 | movl %eax, %cr4 |
122 | 114 | ||
123 | /* Setup early boot stage 4 level pagetables */ | 115 | /* Setup early boot stage 4 level pagetables */ |
124 | movl $(wakeup_level4_pgt - __START_KERNEL_map), %eax | 116 | leal (wakeup_level4_pgt - wakeup_code)(%esi), %eax |
125 | movl %eax, %cr3 | 117 | movl %eax, %cr3 |
126 | 118 | ||
127 | /* Setup EFER (Extended Feature Enable Register) */ | 119 | /* Check if nx is implemented */ |
128 | movl $MSR_EFER, %ecx | 120 | movl $0x80000001, %eax |
129 | rdmsr | 121 | cpuid |
130 | /* Fool rdmsr and reset %eax to avoid dependences */ | 122 | movl %edx,%edi |
131 | xorl %eax, %eax | 123 | |
132 | /* Enable Long Mode */ | 124 | /* Enable Long Mode */ |
125 | xorl %eax, %eax | ||
133 | btsl $_EFER_LME, %eax | 126 | btsl $_EFER_LME, %eax |
134 | /* Enable System Call */ | ||
135 | btsl $_EFER_SCE, %eax | ||
136 | 127 | ||
137 | /* No Execute supported? */ | 128 | /* No Execute supported? */ |
138 | btl $20,%edi | 129 | btl $20,%edi |
139 | jnc 1f | 130 | jnc 1f |
140 | btsl $_EFER_NX, %eax | 131 | btsl $_EFER_NX, %eax |
141 | 1: | ||
142 | 132 | ||
143 | /* Make changes effective */ | 133 | /* Make changes effective */ |
134 | 1: movl $MSR_EFER, %ecx | ||
135 | xorl %edx, %edx | ||
144 | wrmsr | 136 | wrmsr |
145 | wbinvd | ||
146 | 137 | ||
147 | xorl %eax, %eax | 138 | xorl %eax, %eax |
148 | btsl $31, %eax /* Enable paging and in turn activate Long Mode */ | 139 | btsl $31, %eax /* Enable paging and in turn activate Long Mode */ |
149 | btsl $0, %eax /* Enable protected mode */ | 140 | btsl $0, %eax /* Enable protected mode */ |
150 | btsl $1, %eax /* Enable MP */ | ||
151 | btsl $4, %eax /* Enable ET */ | ||
152 | btsl $5, %eax /* Enable NE */ | ||
153 | btsl $16, %eax /* Enable WP */ | ||
154 | btsl $18, %eax /* Enable AM */ | ||
155 | 141 | ||
156 | /* Make changes effective */ | 142 | /* Make changes effective */ |
157 | movl %eax, %cr0 | 143 | movl %eax, %cr0 |
144 | |||
158 | /* At this point: | 145 | /* At this point: |
159 | CR4.PAE must be 1 | 146 | CR4.PAE must be 1 |
160 | CS.L must be 0 | 147 | CS.L must be 0 |
@@ -162,11 +149,6 @@ wakeup_32: | |||
162 | Next instruction must be a branch | 149 | Next instruction must be a branch |
163 | This must be on identity-mapped page | 150 | This must be on identity-mapped page |
164 | */ | 151 | */ |
165 | jmp reach_compatibility_mode | ||
166 | reach_compatibility_mode: | ||
167 | movw $0x0e00 + 'i', %ds:(0xb8012) | ||
168 | movb $0xa8, %al ; outb %al, $0x80; | ||
169 | |||
170 | /* | 152 | /* |
171 | * At this point we're in long mode but in 32bit compatibility mode | 153 | * At this point we're in long mode but in 32bit compatibility mode |
172 | * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn | 154 | * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn |
@@ -174,24 +156,19 @@ reach_compatibility_mode: | |||
174 | * the new gdt/idt that has __KERNEL_CS with CS.L = 1. | 156 | * the new gdt/idt that has __KERNEL_CS with CS.L = 1. |
175 | */ | 157 | */ |
176 | 158 | ||
177 | movw $0x0e00 + 'n', %ds:(0xb8014) | ||
178 | movb $0xa9, %al ; outb %al, $0x80 | ||
179 | |||
180 | /* Load new GDT with the 64bit segment using 32bit descriptor */ | ||
181 | movl $(pGDT32 - __START_KERNEL_map), %eax | ||
182 | lgdt (%eax) | ||
183 | |||
184 | movl $(wakeup_jumpvector - __START_KERNEL_map), %eax | ||
185 | /* Finally jump in 64bit mode */ | 159 | /* Finally jump in 64bit mode */ |
186 | ljmp *(%eax) | 160 | ljmp *(wakeup_long64_vector - wakeup_code)(%esi) |
187 | 161 | ||
188 | wakeup_jumpvector: | 162 | .balign 4 |
189 | .long wakeup_long64 - __START_KERNEL_map | 163 | wakeup_long64_vector: |
190 | .word __KERNEL_CS | 164 | .long wakeup_long64 - wakeup_code |
165 | .word __KERNEL_CS, 0 | ||
191 | 166 | ||
192 | .code64 | 167 | .code64 |
193 | 168 | ||
194 | /* Hooray, we are in Long 64-bit mode (but still running in low memory) */ | 169 | /* Hooray, we are in Long 64-bit mode (but still running in |
170 | * low memory) | ||
171 | */ | ||
195 | wakeup_long64: | 172 | wakeup_long64: |
196 | /* | 173 | /* |
197 | * We must switch to a new descriptor in kernel space for the GDT | 174 | * We must switch to a new descriptor in kernel space for the GDT |
@@ -199,7 +176,15 @@ wakeup_long64: | |||
199 | * addresses where we're currently running on. We have to do that here | 176 | * addresses where we're currently running on. We have to do that here |
200 | * because in 32bit we couldn't load a 64bit linear address. | 177 | * because in 32bit we couldn't load a 64bit linear address. |
201 | */ | 178 | */ |
202 | lgdt cpu_gdt_descr - __START_KERNEL_map | 179 | lgdt cpu_gdt_descr |
180 | |||
181 | movw $0x0e00 + 'n', %ds:(0xb8014) | ||
182 | movb $0xa9, %al ; outb %al, $0x80 | ||
183 | |||
184 | movq saved_magic, %rax | ||
185 | movq $0x123456789abcdef0, %rdx | ||
186 | cmpq %rdx, %rax | ||
187 | jne bogus_64_magic | ||
203 | 188 | ||
204 | movw $0x0e00 + 'u', %ds:(0xb8016) | 189 | movw $0x0e00 + 'u', %ds:(0xb8016) |
205 | 190 | ||
@@ -211,75 +196,58 @@ wakeup_long64: | |||
211 | movw %ax, %es | 196 | movw %ax, %es |
212 | movw %ax, %fs | 197 | movw %ax, %fs |
213 | movw %ax, %gs | 198 | movw %ax, %gs |
214 | movq saved_esp, %rsp | 199 | movq saved_rsp, %rsp |
215 | 200 | ||
216 | movw $0x0e00 + 'x', %ds:(0xb8018) | 201 | movw $0x0e00 + 'x', %ds:(0xb8018) |
217 | movq saved_ebx, %rbx | 202 | movq saved_rbx, %rbx |
218 | movq saved_edi, %rdi | 203 | movq saved_rdi, %rdi |
219 | movq saved_esi, %rsi | 204 | movq saved_rsi, %rsi |
220 | movq saved_ebp, %rbp | 205 | movq saved_rbp, %rbp |
221 | 206 | ||
222 | movw $0x0e00 + '!', %ds:(0xb801a) | 207 | movw $0x0e00 + '!', %ds:(0xb801a) |
223 | movq saved_eip, %rax | 208 | movq saved_rip, %rax |
224 | jmp *%rax | 209 | jmp *%rax |
225 | 210 | ||
226 | .code32 | 211 | .code32 |
227 | 212 | ||
228 | .align 64 | 213 | .align 64 |
229 | gdta: | 214 | gdta: |
215 | /* Its good to keep gdt in sync with one in trampoline.S */ | ||
230 | .word 0, 0, 0, 0 # dummy | 216 | .word 0, 0, 0, 0 # dummy |
231 | 217 | /* ??? Why I need the accessed bit set in order for this to work? */ | |
232 | .word 0, 0, 0, 0 # unused | 218 | .quad 0x00cf9b000000ffff # __KERNEL32_CS |
233 | 219 | .quad 0x00af9b000000ffff # __KERNEL_CS | |
234 | .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) | 220 | .quad 0x00cf93000000ffff # __KERNEL_DS |
235 | .word 0 # base address = 0 | ||
236 | .word 0x9B00 # code read/exec. ??? Why I need 0x9B00 (as opposed to 0x9A00 in order for this to work?) | ||
237 | .word 0x00CF # granularity = 4096, 386 | ||
238 | # (+5th nibble of limit) | ||
239 | |||
240 | .word 0xFFFF # 4Gb - (0x100000*0x1000 = 4Gb) | ||
241 | .word 0 # base address = 0 | ||
242 | .word 0x9200 # data read/write | ||
243 | .word 0x00CF # granularity = 4096, 386 | ||
244 | # (+5th nibble of limit) | ||
245 | # this is 64bit descriptor for code | ||
246 | .word 0xFFFF | ||
247 | .word 0 | ||
248 | .word 0x9A00 # code read/exec | ||
249 | .word 0x00AF # as above, but it is long mode and with D=0 | ||
250 | 221 | ||
251 | idt_48a: | 222 | idt_48a: |
252 | .word 0 # idt limit = 0 | 223 | .word 0 # idt limit = 0 |
253 | .word 0, 0 # idt base = 0L | 224 | .word 0, 0 # idt base = 0L |
254 | 225 | ||
255 | gdt_48a: | 226 | gdt_48a: |
256 | .word 0x8000 # gdt limit=2048, | 227 | .word 0x800 # gdt limit=2048, |
257 | # 256 GDT entries | 228 | # 256 GDT entries |
258 | .word 0, 0 # gdt base (filled in later) | 229 | .long gdta - wakeup_code # gdt base (relocated in later) |
259 | |||
260 | 230 | ||
261 | real_save_gdt: .word 0 | ||
262 | .quad 0 | ||
263 | real_magic: .quad 0 | 231 | real_magic: .quad 0 |
264 | video_mode: .quad 0 | 232 | video_mode: .quad 0 |
265 | video_flags: .quad 0 | 233 | video_flags: .quad 0 |
266 | 234 | ||
235 | .code16 | ||
267 | bogus_real_magic: | 236 | bogus_real_magic: |
268 | movb $0xba,%al ; outb %al,$0x80 | 237 | movb $0xba,%al ; outb %al,$0x80 |
269 | jmp bogus_real_magic | 238 | jmp bogus_real_magic |
270 | 239 | ||
271 | bogus_32_magic: | 240 | .code64 |
241 | bogus_64_magic: | ||
272 | movb $0xb3,%al ; outb %al,$0x80 | 242 | movb $0xb3,%al ; outb %al,$0x80 |
273 | jmp bogus_32_magic | 243 | jmp bogus_64_magic |
274 | 244 | ||
275 | bogus_31_magic: | 245 | .code16 |
276 | movb $0xb1,%al ; outb %al,$0x80 | 246 | no_longmode: |
277 | jmp bogus_31_magic | 247 | movb $0xbc,%al ; outb %al,$0x80 |
278 | 248 | jmp no_longmode | |
279 | bogus_cpu: | ||
280 | movb $0xbc,%al ; outb %al,$0x80 | ||
281 | jmp bogus_cpu | ||
282 | 249 | ||
250 | #include "../verify_cpu.S" | ||
283 | 251 | ||
284 | /* This code uses an extended set of video mode numbers. These include: | 252 | /* This code uses an extended set of video mode numbers. These include: |
285 | * Aliases for standard modes | 253 | * Aliases for standard modes |
@@ -301,6 +269,7 @@ bogus_cpu: | |||
301 | #define VIDEO_FIRST_V7 0x0900 | 269 | #define VIDEO_FIRST_V7 0x0900 |
302 | 270 | ||
303 | # Setting of user mode (AX=mode ID) => CF=success | 271 | # Setting of user mode (AX=mode ID) => CF=success |
272 | .code16 | ||
304 | mode_seta: | 273 | mode_seta: |
305 | movw %ax, %bx | 274 | movw %ax, %bx |
306 | #if 0 | 275 | #if 0 |
@@ -346,21 +315,18 @@ check_vesaa: | |||
346 | 315 | ||
347 | _setbada: jmp setbada | 316 | _setbada: jmp setbada |
348 | 317 | ||
349 | .code64 | ||
350 | bogus_magic: | ||
351 | movw $0x0e00 + 'B', %ds:(0xb8018) | ||
352 | jmp bogus_magic | ||
353 | |||
354 | bogus_magic2: | ||
355 | movw $0x0e00 + '2', %ds:(0xb8018) | ||
356 | jmp bogus_magic2 | ||
357 | |||
358 | |||
359 | wakeup_stack_begin: # Stack grows down | 318 | wakeup_stack_begin: # Stack grows down |
360 | 319 | ||
361 | .org 0xff0 | 320 | .org 0xff0 |
362 | wakeup_stack: # Just below end of page | 321 | wakeup_stack: # Just below end of page |
363 | 322 | ||
323 | .org 0x1000 | ||
324 | ENTRY(wakeup_level4_pgt) | ||
325 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
326 | .fill 510,8,0 | ||
327 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
328 | .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
329 | |||
364 | ENTRY(wakeup_end) | 330 | ENTRY(wakeup_end) |
365 | 331 | ||
366 | ## | 332 | ## |
@@ -373,28 +339,11 @@ ENTRY(wakeup_end) | |||
373 | # | 339 | # |
374 | # Returned address is location of code in low memory (past data and stack) | 340 | # Returned address is location of code in low memory (past data and stack) |
375 | # | 341 | # |
342 | .code64 | ||
376 | ENTRY(acpi_copy_wakeup_routine) | 343 | ENTRY(acpi_copy_wakeup_routine) |
377 | pushq %rax | 344 | pushq %rax |
378 | pushq %rcx | ||
379 | pushq %rdx | 345 | pushq %rdx |
380 | 346 | ||
381 | sgdt saved_gdt | ||
382 | sidt saved_idt | ||
383 | sldt saved_ldt | ||
384 | str saved_tss | ||
385 | |||
386 | movq %cr3, %rdx | ||
387 | movq %rdx, saved_cr3 | ||
388 | movq %cr4, %rdx | ||
389 | movq %rdx, saved_cr4 | ||
390 | movq %cr0, %rdx | ||
391 | movq %rdx, saved_cr0 | ||
392 | sgdt real_save_gdt - wakeup_start (,%rdi) | ||
393 | movl $MSR_EFER, %ecx | ||
394 | rdmsr | ||
395 | movl %eax, saved_efer | ||
396 | movl %edx, saved_efer2 | ||
397 | |||
398 | movl saved_video_mode, %edx | 347 | movl saved_video_mode, %edx |
399 | movl %edx, video_mode - wakeup_start (,%rdi) | 348 | movl %edx, video_mode - wakeup_start (,%rdi) |
400 | movl acpi_video_flags, %edx | 349 | movl acpi_video_flags, %edx |
@@ -403,21 +352,13 @@ ENTRY(acpi_copy_wakeup_routine) | |||
403 | movq $0x123456789abcdef0, %rdx | 352 | movq $0x123456789abcdef0, %rdx |
404 | movq %rdx, saved_magic | 353 | movq %rdx, saved_magic |
405 | 354 | ||
406 | movl saved_magic - __START_KERNEL_map, %eax | 355 | movq saved_magic, %rax |
407 | cmpl $0x9abcdef0, %eax | 356 | movq $0x123456789abcdef0, %rdx |
408 | jne bogus_32_magic | 357 | cmpq %rdx, %rax |
409 | 358 | jne bogus_64_magic | |
410 | # make sure %cr4 is set correctly (features, etc) | ||
411 | movl saved_cr4 - __START_KERNEL_map, %eax | ||
412 | movq %rax, %cr4 | ||
413 | 359 | ||
414 | movl saved_cr0 - __START_KERNEL_map, %eax | ||
415 | movq %rax, %cr0 | ||
416 | jmp 1f # Flush pipelines | ||
417 | 1: | ||
418 | # restore the regs we used | 360 | # restore the regs we used |
419 | popq %rdx | 361 | popq %rdx |
420 | popq %rcx | ||
421 | popq %rax | 362 | popq %rax |
422 | ENTRY(do_suspend_lowlevel_s4bios) | 363 | ENTRY(do_suspend_lowlevel_s4bios) |
423 | ret | 364 | ret |
@@ -450,13 +391,13 @@ do_suspend_lowlevel: | |||
450 | movq %r15, saved_context_r15(%rip) | 391 | movq %r15, saved_context_r15(%rip) |
451 | pushfq ; popq saved_context_eflags(%rip) | 392 | pushfq ; popq saved_context_eflags(%rip) |
452 | 393 | ||
453 | movq $.L97, saved_eip(%rip) | 394 | movq $.L97, saved_rip(%rip) |
454 | 395 | ||
455 | movq %rsp,saved_esp | 396 | movq %rsp,saved_rsp |
456 | movq %rbp,saved_ebp | 397 | movq %rbp,saved_rbp |
457 | movq %rbx,saved_ebx | 398 | movq %rbx,saved_rbx |
458 | movq %rdi,saved_edi | 399 | movq %rdi,saved_rdi |
459 | movq %rsi,saved_esi | 400 | movq %rsi,saved_rsi |
460 | 401 | ||
461 | addq $8, %rsp | 402 | addq $8, %rsp |
462 | movl $3, %edi | 403 | movl $3, %edi |
@@ -503,25 +444,12 @@ do_suspend_lowlevel: | |||
503 | 444 | ||
504 | .data | 445 | .data |
505 | ALIGN | 446 | ALIGN |
506 | ENTRY(saved_ebp) .quad 0 | 447 | ENTRY(saved_rbp) .quad 0 |
507 | ENTRY(saved_esi) .quad 0 | 448 | ENTRY(saved_rsi) .quad 0 |
508 | ENTRY(saved_edi) .quad 0 | 449 | ENTRY(saved_rdi) .quad 0 |
509 | ENTRY(saved_ebx) .quad 0 | 450 | ENTRY(saved_rbx) .quad 0 |
510 | 451 | ||
511 | ENTRY(saved_eip) .quad 0 | 452 | ENTRY(saved_rip) .quad 0 |
512 | ENTRY(saved_esp) .quad 0 | 453 | ENTRY(saved_rsp) .quad 0 |
513 | 454 | ||
514 | ENTRY(saved_magic) .quad 0 | 455 | ENTRY(saved_magic) .quad 0 |
515 | |||
516 | ALIGN | ||
517 | # saved registers | ||
518 | saved_gdt: .quad 0,0 | ||
519 | saved_idt: .quad 0,0 | ||
520 | saved_ldt: .quad 0 | ||
521 | saved_tss: .quad 0 | ||
522 | |||
523 | saved_cr0: .quad 0 | ||
524 | saved_cr3: .quad 0 | ||
525 | saved_cr4: .quad 0 | ||
526 | saved_efer: .quad 0 | ||
527 | saved_efer2: .quad 0 | ||
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index b487396c4c5..a52af582059 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c | |||
@@ -51,7 +51,6 @@ static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) | |||
51 | 51 | ||
52 | static u32 __init allocate_aperture(void) | 52 | static u32 __init allocate_aperture(void) |
53 | { | 53 | { |
54 | pg_data_t *nd0 = NODE_DATA(0); | ||
55 | u32 aper_size; | 54 | u32 aper_size; |
56 | void *p; | 55 | void *p; |
57 | 56 | ||
@@ -65,12 +64,12 @@ static u32 __init allocate_aperture(void) | |||
65 | * Unfortunately we cannot move it up because that would make the | 64 | * Unfortunately we cannot move it up because that would make the |
66 | * IOMMU useless. | 65 | * IOMMU useless. |
67 | */ | 66 | */ |
68 | p = __alloc_bootmem_node(nd0, aper_size, aper_size, 0); | 67 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 0); |
69 | if (!p || __pa(p)+aper_size > 0xffffffff) { | 68 | if (!p || __pa(p)+aper_size > 0xffffffff) { |
70 | printk("Cannot allocate aperture memory hole (%p,%uK)\n", | 69 | printk("Cannot allocate aperture memory hole (%p,%uK)\n", |
71 | p, aper_size>>10); | 70 | p, aper_size>>10); |
72 | if (p) | 71 | if (p) |
73 | free_bootmem_node(nd0, __pa(p), aper_size); | 72 | free_bootmem(__pa(p), aper_size); |
74 | return 0; | 73 | return 0; |
75 | } | 74 | } |
76 | printk("Mapping aperture over %d KB of RAM @ %lx\n", | 75 | printk("Mapping aperture over %d KB of RAM @ %lx\n", |
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index bd3e45d47c3..d198f7d82e5 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -68,6 +68,28 @@ int using_apic_timer __read_mostly = 0; | |||
68 | 68 | ||
69 | static void apic_pm_activate(void); | 69 | static void apic_pm_activate(void); |
70 | 70 | ||
71 | void apic_wait_icr_idle(void) | ||
72 | { | ||
73 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | ||
74 | cpu_relax(); | ||
75 | } | ||
76 | |||
77 | unsigned int safe_apic_wait_icr_idle(void) | ||
78 | { | ||
79 | unsigned int send_status; | ||
80 | int timeout; | ||
81 | |||
82 | timeout = 0; | ||
83 | do { | ||
84 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
85 | if (!send_status) | ||
86 | break; | ||
87 | udelay(100); | ||
88 | } while (timeout++ < 1000); | ||
89 | |||
90 | return send_status; | ||
91 | } | ||
92 | |||
71 | void enable_NMI_through_LVT0 (void * dummy) | 93 | void enable_NMI_through_LVT0 (void * dummy) |
72 | { | 94 | { |
73 | unsigned int v; | 95 | unsigned int v; |
@@ -817,14 +839,15 @@ static void setup_APIC_timer(unsigned int clocks) | |||
817 | 839 | ||
818 | static int __init calibrate_APIC_clock(void) | 840 | static int __init calibrate_APIC_clock(void) |
819 | { | 841 | { |
820 | int apic, apic_start, tsc, tsc_start; | 842 | unsigned apic, apic_start; |
843 | unsigned long tsc, tsc_start; | ||
821 | int result; | 844 | int result; |
822 | /* | 845 | /* |
823 | * Put whatever arbitrary (but long enough) timeout | 846 | * Put whatever arbitrary (but long enough) timeout |
824 | * value into the APIC clock, we just want to get the | 847 | * value into the APIC clock, we just want to get the |
825 | * counter running for calibration. | 848 | * counter running for calibration. |
826 | */ | 849 | */ |
827 | __setup_APIC_LVTT(1000000000); | 850 | __setup_APIC_LVTT(4000000000); |
828 | 851 | ||
829 | apic_start = apic_read(APIC_TMCCT); | 852 | apic_start = apic_read(APIC_TMCCT); |
830 | #ifdef CONFIG_X86_PM_TIMER | 853 | #ifdef CONFIG_X86_PM_TIMER |
@@ -835,15 +858,15 @@ static int __init calibrate_APIC_clock(void) | |||
835 | } else | 858 | } else |
836 | #endif | 859 | #endif |
837 | { | 860 | { |
838 | rdtscl(tsc_start); | 861 | rdtscll(tsc_start); |
839 | 862 | ||
840 | do { | 863 | do { |
841 | apic = apic_read(APIC_TMCCT); | 864 | apic = apic_read(APIC_TMCCT); |
842 | rdtscl(tsc); | 865 | rdtscll(tsc); |
843 | } while ((tsc - tsc_start) < TICK_COUNT && | 866 | } while ((tsc - tsc_start) < TICK_COUNT && |
844 | (apic - apic_start) < TICK_COUNT); | 867 | (apic_start - apic) < TICK_COUNT); |
845 | 868 | ||
846 | result = (apic_start - apic) * 1000L * cpu_khz / | 869 | result = (apic_start - apic) * 1000L * tsc_khz / |
847 | (tsc - tsc_start); | 870 | (tsc - tsc_start); |
848 | } | 871 | } |
849 | printk("result %d\n", result); | 872 | printk("result %d\n", result); |
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c index 96687e2beb2..778953bc636 100644 --- a/arch/x86_64/kernel/asm-offsets.c +++ b/arch/x86_64/kernel/asm-offsets.c | |||
@@ -21,6 +21,14 @@ | |||
21 | 21 | ||
22 | #define BLANK() asm volatile("\n->" : : ) | 22 | #define BLANK() asm volatile("\n->" : : ) |
23 | 23 | ||
24 | #define __NO_STUBS 1 | ||
25 | #undef __SYSCALL | ||
26 | #undef _ASM_X86_64_UNISTD_H_ | ||
27 | #define __SYSCALL(nr, sym) [nr] = 1, | ||
28 | static char syscalls[] = { | ||
29 | #include <asm/unistd.h> | ||
30 | }; | ||
31 | |||
24 | int main(void) | 32 | int main(void) |
25 | { | 33 | { |
26 | #define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) | 34 | #define ENTRY(entry) DEFINE(tsk_ ## entry, offsetof(struct task_struct, entry)) |
@@ -71,5 +79,7 @@ int main(void) | |||
71 | DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); | 79 | DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); |
72 | BLANK(); | 80 | BLANK(); |
73 | DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); | 81 | DEFINE(crypto_tfm_ctx_offset, offsetof(struct crypto_tfm, __crt_ctx)); |
82 | BLANK(); | ||
83 | DEFINE(__NR_syscall_max, sizeof(syscalls) - 1); | ||
74 | return 0; | 84 | return 0; |
75 | } | 85 | } |
diff --git a/arch/x86_64/kernel/bugs.c b/arch/x86_64/kernel/bugs.c new file mode 100644 index 00000000000..12b585b5345 --- /dev/null +++ b/arch/x86_64/kernel/bugs.c | |||
@@ -0,0 +1,21 @@ | |||
1 | /* | ||
2 | * arch/x86_64/kernel/bugs.c | ||
3 | * | ||
4 | * Copyright (C) 1994 Linus Torvalds | ||
5 | * Copyright (C) 2000 SuSE | ||
6 | */ | ||
7 | |||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/init.h> | ||
10 | #include <asm/alternative.h> | ||
11 | #include <asm/processor.h> | ||
12 | |||
13 | void __init check_bugs(void) | ||
14 | { | ||
15 | identify_cpu(&boot_cpu_data); | ||
16 | #if !defined(CONFIG_SMP) | ||
17 | printk("CPU: "); | ||
18 | print_cpu_info(&boot_cpu_data); | ||
19 | #endif | ||
20 | alternative_instructions(); | ||
21 | } | ||
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index a490fabfcf4..be8965427a9 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <asm/bootsetup.h> | 25 | #include <asm/bootsetup.h> |
26 | #include <asm/sections.h> | 26 | #include <asm/sections.h> |
27 | 27 | ||
28 | struct e820map e820 __initdata; | 28 | struct e820map e820; |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * PFN of last memory page. | 31 | * PFN of last memory page. |
@@ -98,7 +98,7 @@ static inline int bad_addr(unsigned long *addrp, unsigned long size) | |||
98 | * This function checks if any part of the range <start,end> is mapped | 98 | * This function checks if any part of the range <start,end> is mapped |
99 | * with type. | 99 | * with type. |
100 | */ | 100 | */ |
101 | int __meminit | 101 | int |
102 | e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | 102 | e820_any_mapped(unsigned long start, unsigned long end, unsigned type) |
103 | { | 103 | { |
104 | int i; | 104 | int i; |
@@ -112,6 +112,7 @@ e820_any_mapped(unsigned long start, unsigned long end, unsigned type) | |||
112 | } | 112 | } |
113 | return 0; | 113 | return 0; |
114 | } | 114 | } |
115 | EXPORT_SYMBOL_GPL(e820_any_mapped); | ||
115 | 116 | ||
116 | /* | 117 | /* |
117 | * This function checks if the entire range <start,end> is mapped with type. | 118 | * This function checks if the entire range <start,end> is mapped with type. |
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c index fede55a5399..990d9c218a5 100644 --- a/arch/x86_64/kernel/early-quirks.c +++ b/arch/x86_64/kernel/early-quirks.c | |||
@@ -71,18 +71,6 @@ static void __init ati_bugs(void) | |||
71 | } | 71 | } |
72 | } | 72 | } |
73 | 73 | ||
74 | static void intel_bugs(void) | ||
75 | { | ||
76 | u16 device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); | ||
77 | |||
78 | #ifdef CONFIG_SMP | ||
79 | if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || | ||
80 | device == PCI_DEVICE_ID_INTEL_E7520_MCH || | ||
81 | device == PCI_DEVICE_ID_INTEL_E7525_MCH) | ||
82 | quirk_intel_irqbalance(); | ||
83 | #endif | ||
84 | } | ||
85 | |||
86 | struct chipset { | 74 | struct chipset { |
87 | u16 vendor; | 75 | u16 vendor; |
88 | void (*f)(void); | 76 | void (*f)(void); |
@@ -92,7 +80,6 @@ static struct chipset early_qrk[] __initdata = { | |||
92 | { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, | 80 | { PCI_VENDOR_ID_NVIDIA, nvidia_bugs }, |
93 | { PCI_VENDOR_ID_VIA, via_bugs }, | 81 | { PCI_VENDOR_ID_VIA, via_bugs }, |
94 | { PCI_VENDOR_ID_ATI, ati_bugs }, | 82 | { PCI_VENDOR_ID_ATI, ati_bugs }, |
95 | { PCI_VENDOR_ID_INTEL, intel_bugs}, | ||
96 | {} | 83 | {} |
97 | }; | 84 | }; |
98 | 85 | ||
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 47b6d90349d..92213d2b7c1 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c | |||
@@ -11,11 +11,10 @@ | |||
11 | 11 | ||
12 | #ifdef __i386__ | 12 | #ifdef __i386__ |
13 | #include <asm/setup.h> | 13 | #include <asm/setup.h> |
14 | #define VGABASE (__ISA_IO_base + 0xb8000) | ||
15 | #else | 14 | #else |
16 | #include <asm/bootsetup.h> | 15 | #include <asm/bootsetup.h> |
17 | #define VGABASE ((void __iomem *)0xffffffff800b8000UL) | ||
18 | #endif | 16 | #endif |
17 | #define VGABASE (__ISA_IO_base + 0xb8000) | ||
19 | 18 | ||
20 | static int max_ypos = 25, max_xpos = 80; | 19 | static int max_ypos = 25, max_xpos = 80; |
21 | static int current_ypos = 25, current_xpos = 0; | 20 | static int current_ypos = 25, current_xpos = 0; |
@@ -176,7 +175,7 @@ static noinline long simnow(long cmd, long a, long b, long c) | |||
176 | return ret; | 175 | return ret; |
177 | } | 176 | } |
178 | 177 | ||
179 | void __init simnow_init(char *str) | 178 | static void __init simnow_init(char *str) |
180 | { | 179 | { |
181 | char *fn = "klog"; | 180 | char *fn = "klog"; |
182 | if (*str == '=') | 181 | if (*str == '=') |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index ed4350ced3d..fa984b53e7e 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -701,6 +701,7 @@ END(spurious_interrupt) | |||
701 | CFI_ADJUST_CFA_OFFSET 8 | 701 | CFI_ADJUST_CFA_OFFSET 8 |
702 | pushq %rax /* push real oldrax to the rdi slot */ | 702 | pushq %rax /* push real oldrax to the rdi slot */ |
703 | CFI_ADJUST_CFA_OFFSET 8 | 703 | CFI_ADJUST_CFA_OFFSET 8 |
704 | CFI_REL_OFFSET rax,0 | ||
704 | leaq \sym(%rip),%rax | 705 | leaq \sym(%rip),%rax |
705 | jmp error_entry | 706 | jmp error_entry |
706 | CFI_ENDPROC | 707 | CFI_ENDPROC |
@@ -710,6 +711,7 @@ END(spurious_interrupt) | |||
710 | XCPT_FRAME | 711 | XCPT_FRAME |
711 | pushq %rax | 712 | pushq %rax |
712 | CFI_ADJUST_CFA_OFFSET 8 | 713 | CFI_ADJUST_CFA_OFFSET 8 |
714 | CFI_REL_OFFSET rax,0 | ||
713 | leaq \sym(%rip),%rax | 715 | leaq \sym(%rip),%rax |
714 | jmp error_entry | 716 | jmp error_entry |
715 | CFI_ENDPROC | 717 | CFI_ENDPROC |
@@ -817,6 +819,7 @@ paranoid_schedule\trace: | |||
817 | */ | 819 | */ |
818 | KPROBE_ENTRY(error_entry) | 820 | KPROBE_ENTRY(error_entry) |
819 | _frame RDI | 821 | _frame RDI |
822 | CFI_REL_OFFSET rax,0 | ||
820 | /* rdi slot contains rax, oldrax contains error code */ | 823 | /* rdi slot contains rax, oldrax contains error code */ |
821 | cld | 824 | cld |
822 | subq $14*8,%rsp | 825 | subq $14*8,%rsp |
@@ -824,6 +827,7 @@ KPROBE_ENTRY(error_entry) | |||
824 | movq %rsi,13*8(%rsp) | 827 | movq %rsi,13*8(%rsp) |
825 | CFI_REL_OFFSET rsi,RSI | 828 | CFI_REL_OFFSET rsi,RSI |
826 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ | 829 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ |
830 | CFI_REGISTER rax,rsi | ||
827 | movq %rdx,12*8(%rsp) | 831 | movq %rdx,12*8(%rsp) |
828 | CFI_REL_OFFSET rdx,RDX | 832 | CFI_REL_OFFSET rdx,RDX |
829 | movq %rcx,11*8(%rsp) | 833 | movq %rcx,11*8(%rsp) |
@@ -857,6 +861,7 @@ error_swapgs: | |||
857 | swapgs | 861 | swapgs |
858 | error_sti: | 862 | error_sti: |
859 | movq %rdi,RDI(%rsp) | 863 | movq %rdi,RDI(%rsp) |
864 | CFI_REL_OFFSET rdi,RDI | ||
860 | movq %rsp,%rdi | 865 | movq %rsp,%rdi |
861 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | 866 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
862 | movq $-1,ORIG_RAX(%rsp) | 867 | movq $-1,ORIG_RAX(%rsp) |
diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist deleted file mode 100644 index 7ae18ec1245..00000000000 --- a/arch/x86_64/kernel/functionlist +++ /dev/null | |||
@@ -1,1284 +0,0 @@ | |||
1 | *(.text.flush_thread) | ||
2 | *(.text.check_poison_obj) | ||
3 | *(.text.copy_page) | ||
4 | *(.text.__set_personality) | ||
5 | *(.text.gart_map_sg) | ||
6 | *(.text.kmem_cache_free) | ||
7 | *(.text.find_get_page) | ||
8 | *(.text._raw_spin_lock) | ||
9 | *(.text.ide_outb) | ||
10 | *(.text.unmap_vmas) | ||
11 | *(.text.copy_page_range) | ||
12 | *(.text.kprobe_handler) | ||
13 | *(.text.__handle_mm_fault) | ||
14 | *(.text.__d_lookup) | ||
15 | *(.text.copy_user_generic) | ||
16 | *(.text.__link_path_walk) | ||
17 | *(.text.get_page_from_freelist) | ||
18 | *(.text.kmem_cache_alloc) | ||
19 | *(.text.drive_cmd_intr) | ||
20 | *(.text.ia32_setup_sigcontext) | ||
21 | *(.text.huge_pte_offset) | ||
22 | *(.text.do_page_fault) | ||
23 | *(.text.page_remove_rmap) | ||
24 | *(.text.release_pages) | ||
25 | *(.text.ide_end_request) | ||
26 | *(.text.__mutex_lock_slowpath) | ||
27 | *(.text.__find_get_block) | ||
28 | *(.text.kfree) | ||
29 | *(.text.vfs_read) | ||
30 | *(.text._raw_spin_unlock) | ||
31 | *(.text.free_hot_cold_page) | ||
32 | *(.text.fget_light) | ||
33 | *(.text.schedule) | ||
34 | *(.text.memcmp) | ||
35 | *(.text.touch_atime) | ||
36 | *(.text.__might_sleep) | ||
37 | *(.text.__down_read_trylock) | ||
38 | *(.text.arch_pick_mmap_layout) | ||
39 | *(.text.find_vma) | ||
40 | *(.text.__make_request) | ||
41 | *(.text.do_generic_mapping_read) | ||
42 | *(.text.mutex_lock_interruptible) | ||
43 | *(.text.__generic_file_aio_read) | ||
44 | *(.text._atomic_dec_and_lock) | ||
45 | *(.text.__wake_up_bit) | ||
46 | *(.text.add_to_page_cache) | ||
47 | *(.text.cache_alloc_debugcheck_after) | ||
48 | *(.text.vm_normal_page) | ||
49 | *(.text.mutex_debug_check_no_locks_freed) | ||
50 | *(.text.net_rx_action) | ||
51 | *(.text.__find_first_zero_bit) | ||
52 | *(.text.put_page) | ||
53 | *(.text._raw_read_lock) | ||
54 | *(.text.__delay) | ||
55 | *(.text.dnotify_parent) | ||
56 | *(.text.do_path_lookup) | ||
57 | *(.text.do_sync_read) | ||
58 | *(.text.do_lookup) | ||
59 | *(.text.bit_waitqueue) | ||
60 | *(.text.file_read_actor) | ||
61 | *(.text.strncpy_from_user) | ||
62 | *(.text.__pagevec_lru_add_active) | ||
63 | *(.text.fget) | ||
64 | *(.text.dput) | ||
65 | *(.text.__strnlen_user) | ||
66 | *(.text.inotify_inode_queue_event) | ||
67 | *(.text.rw_verify_area) | ||
68 | *(.text.ide_intr) | ||
69 | *(.text.inotify_dentry_parent_queue_event) | ||
70 | *(.text.permission) | ||
71 | *(.text.memscan) | ||
72 | *(.text.hpet_rtc_interrupt) | ||
73 | *(.text.do_mmap_pgoff) | ||
74 | *(.text.current_fs_time) | ||
75 | *(.text.vfs_getattr) | ||
76 | *(.text.kmem_flagcheck) | ||
77 | *(.text.mark_page_accessed) | ||
78 | *(.text.free_pages_and_swap_cache) | ||
79 | *(.text.generic_fillattr) | ||
80 | *(.text.__block_prepare_write) | ||
81 | *(.text.__set_page_dirty_nobuffers) | ||
82 | *(.text.link_path_walk) | ||
83 | *(.text.find_get_pages_tag) | ||
84 | *(.text.ide_do_request) | ||
85 | *(.text.__alloc_pages) | ||
86 | *(.text.generic_permission) | ||
87 | *(.text.mod_page_state_offset) | ||
88 | *(.text.free_pgd_range) | ||
89 | *(.text.generic_file_buffered_write) | ||
90 | *(.text.number) | ||
91 | *(.text.ide_do_rw_disk) | ||
92 | *(.text.__brelse) | ||
93 | *(.text.__mod_page_state_offset) | ||
94 | *(.text.rotate_reclaimable_page) | ||
95 | *(.text.find_vma_prepare) | ||
96 | *(.text.find_vma_prev) | ||
97 | *(.text.lru_cache_add_active) | ||
98 | *(.text.__kmalloc_track_caller) | ||
99 | *(.text.smp_invalidate_interrupt) | ||
100 | *(.text.handle_IRQ_event) | ||
101 | *(.text.__find_get_block_slow) | ||
102 | *(.text.do_wp_page) | ||
103 | *(.text.do_select) | ||
104 | *(.text.set_user_nice) | ||
105 | *(.text.sys_read) | ||
106 | *(.text.do_munmap) | ||
107 | *(.text.csum_partial) | ||
108 | *(.text.__do_softirq) | ||
109 | *(.text.may_open) | ||
110 | *(.text.getname) | ||
111 | *(.text.get_empty_filp) | ||
112 | *(.text.__fput) | ||
113 | *(.text.remove_mapping) | ||
114 | *(.text.filp_ctor) | ||
115 | *(.text.poison_obj) | ||
116 | *(.text.unmap_region) | ||
117 | *(.text.test_set_page_writeback) | ||
118 | *(.text.__do_page_cache_readahead) | ||
119 | *(.text.sock_def_readable) | ||
120 | *(.text.ide_outl) | ||
121 | *(.text.shrink_zone) | ||
122 | *(.text.rb_insert_color) | ||
123 | *(.text.get_request) | ||
124 | *(.text.sys_pread64) | ||
125 | *(.text.spin_bug) | ||
126 | *(.text.ide_outsl) | ||
127 | *(.text.mask_and_ack_8259A) | ||
128 | *(.text.filemap_nopage) | ||
129 | *(.text.page_add_file_rmap) | ||
130 | *(.text.find_lock_page) | ||
131 | *(.text.tcp_poll) | ||
132 | *(.text.__mark_inode_dirty) | ||
133 | *(.text.file_ra_state_init) | ||
134 | *(.text.generic_file_llseek) | ||
135 | *(.text.__pagevec_lru_add) | ||
136 | *(.text.page_cache_readahead) | ||
137 | *(.text.n_tty_receive_buf) | ||
138 | *(.text.zonelist_policy) | ||
139 | *(.text.vma_adjust) | ||
140 | *(.text.test_clear_page_dirty) | ||
141 | *(.text.sync_buffer) | ||
142 | *(.text.do_exit) | ||
143 | *(.text.__bitmap_weight) | ||
144 | *(.text.alloc_pages_current) | ||
145 | *(.text.get_unused_fd) | ||
146 | *(.text.zone_watermark_ok) | ||
147 | *(.text.cpuset_update_task_memory_state) | ||
148 | *(.text.__bitmap_empty) | ||
149 | *(.text.sys_munmap) | ||
150 | *(.text.__inode_dir_notify) | ||
151 | *(.text.__generic_file_aio_write_nolock) | ||
152 | *(.text.__pte_alloc) | ||
153 | *(.text.sys_select) | ||
154 | *(.text.vm_acct_memory) | ||
155 | *(.text.vfs_write) | ||
156 | *(.text.__lru_add_drain) | ||
157 | *(.text.prio_tree_insert) | ||
158 | *(.text.generic_file_aio_read) | ||
159 | *(.text.vma_merge) | ||
160 | *(.text.block_write_full_page) | ||
161 | *(.text.__page_set_anon_rmap) | ||
162 | *(.text.apic_timer_interrupt) | ||
163 | *(.text.release_console_sem) | ||
164 | *(.text.sys_write) | ||
165 | *(.text.sys_brk) | ||
166 | *(.text.dup_mm) | ||
167 | *(.text.read_current_timer) | ||
168 | *(.text.ll_rw_block) | ||
169 | *(.text.blk_rq_map_sg) | ||
170 | *(.text.dbg_userword) | ||
171 | *(.text.__block_commit_write) | ||
172 | *(.text.cache_grow) | ||
173 | *(.text.copy_strings) | ||
174 | *(.text.release_task) | ||
175 | *(.text.do_sync_write) | ||
176 | *(.text.unlock_page) | ||
177 | *(.text.load_elf_binary) | ||
178 | *(.text.__follow_mount) | ||
179 | *(.text.__getblk) | ||
180 | *(.text.do_sys_open) | ||
181 | *(.text.current_kernel_time) | ||
182 | *(.text.call_rcu) | ||
183 | *(.text.write_chan) | ||
184 | *(.text.vsnprintf) | ||
185 | *(.text.dummy_inode_setsecurity) | ||
186 | *(.text.submit_bh) | ||
187 | *(.text.poll_freewait) | ||
188 | *(.text.bio_alloc_bioset) | ||
189 | *(.text.skb_clone) | ||
190 | *(.text.page_waitqueue) | ||
191 | *(.text.__mutex_lock_interruptible_slowpath) | ||
192 | *(.text.get_index) | ||
193 | *(.text.csum_partial_copy_generic) | ||
194 | *(.text.bad_range) | ||
195 | *(.text.remove_vma) | ||
196 | *(.text.cp_new_stat) | ||
197 | *(.text.alloc_arraycache) | ||
198 | *(.text.test_clear_page_writeback) | ||
199 | *(.text.strsep) | ||
200 | *(.text.open_namei) | ||
201 | *(.text._raw_read_unlock) | ||
202 | *(.text.get_vma_policy) | ||
203 | *(.text.__down_write_trylock) | ||
204 | *(.text.find_get_pages) | ||
205 | *(.text.tcp_rcv_established) | ||
206 | *(.text.generic_make_request) | ||
207 | *(.text.__block_write_full_page) | ||
208 | *(.text.cfq_set_request) | ||
209 | *(.text.sys_inotify_init) | ||
210 | *(.text.split_vma) | ||
211 | *(.text.__mod_timer) | ||
212 | *(.text.get_options) | ||
213 | *(.text.vma_link) | ||
214 | *(.text.mpage_writepages) | ||
215 | *(.text.truncate_complete_page) | ||
216 | *(.text.tcp_recvmsg) | ||
217 | *(.text.sigprocmask) | ||
218 | *(.text.filemap_populate) | ||
219 | *(.text.sys_close) | ||
220 | *(.text.inotify_dev_queue_event) | ||
221 | *(.text.do_task_stat) | ||
222 | *(.text.__dentry_open) | ||
223 | *(.text.unlink_file_vma) | ||
224 | *(.text.__pollwait) | ||
225 | *(.text.packet_rcv_spkt) | ||
226 | *(.text.drop_buffers) | ||
227 | *(.text.free_pgtables) | ||
228 | *(.text.generic_file_direct_write) | ||
229 | *(.text.copy_process) | ||
230 | *(.text.netif_receive_skb) | ||
231 | *(.text.dnotify_flush) | ||
232 | *(.text.print_bad_pte) | ||
233 | *(.text.anon_vma_unlink) | ||
234 | *(.text.sys_mprotect) | ||
235 | *(.text.sync_sb_inodes) | ||
236 | *(.text.find_inode_fast) | ||
237 | *(.text.dummy_inode_readlink) | ||
238 | *(.text.putname) | ||
239 | *(.text.init_smp_flush) | ||
240 | *(.text.dbg_redzone2) | ||
241 | *(.text.sk_run_filter) | ||
242 | *(.text.may_expand_vm) | ||
243 | *(.text.generic_file_aio_write) | ||
244 | *(.text.find_next_zero_bit) | ||
245 | *(.text.file_kill) | ||
246 | *(.text.audit_getname) | ||
247 | *(.text.arch_unmap_area_topdown) | ||
248 | *(.text.alloc_page_vma) | ||
249 | *(.text.tcp_transmit_skb) | ||
250 | *(.text.rb_next) | ||
251 | *(.text.dbg_redzone1) | ||
252 | *(.text.generic_file_mmap) | ||
253 | *(.text.vfs_fstat) | ||
254 | *(.text.sys_time) | ||
255 | *(.text.page_lock_anon_vma) | ||
256 | *(.text.get_unmapped_area) | ||
257 | *(.text.remote_llseek) | ||
258 | *(.text.__up_read) | ||
259 | *(.text.fd_install) | ||
260 | *(.text.eventpoll_init_file) | ||
261 | *(.text.dma_alloc_coherent) | ||
262 | *(.text.create_empty_buffers) | ||
263 | *(.text.__mutex_unlock_slowpath) | ||
264 | *(.text.dup_fd) | ||
265 | *(.text.d_alloc) | ||
266 | *(.text.tty_ldisc_try) | ||
267 | *(.text.sys_stime) | ||
268 | *(.text.__rb_rotate_right) | ||
269 | *(.text.d_validate) | ||
270 | *(.text.rb_erase) | ||
271 | *(.text.path_release) | ||
272 | *(.text.memmove) | ||
273 | *(.text.invalidate_complete_page) | ||
274 | *(.text.clear_inode) | ||
275 | *(.text.cache_estimate) | ||
276 | *(.text.alloc_buffer_head) | ||
277 | *(.text.smp_call_function_interrupt) | ||
278 | *(.text.flush_tlb_others) | ||
279 | *(.text.file_move) | ||
280 | *(.text.balance_dirty_pages_ratelimited) | ||
281 | *(.text.vma_prio_tree_add) | ||
282 | *(.text.timespec_trunc) | ||
283 | *(.text.mempool_alloc) | ||
284 | *(.text.iget_locked) | ||
285 | *(.text.d_alloc_root) | ||
286 | *(.text.cpuset_populate_dir) | ||
287 | *(.text.anon_vma_prepare) | ||
288 | *(.text.sys_newstat) | ||
289 | *(.text.alloc_page_interleave) | ||
290 | *(.text.__path_lookup_intent_open) | ||
291 | *(.text.__pagevec_free) | ||
292 | *(.text.inode_init_once) | ||
293 | *(.text.free_vfsmnt) | ||
294 | *(.text.__user_walk_fd) | ||
295 | *(.text.cfq_idle_slice_timer) | ||
296 | *(.text.sys_mmap) | ||
297 | *(.text.sys_llseek) | ||
298 | *(.text.prio_tree_remove) | ||
299 | *(.text.filp_close) | ||
300 | *(.text.file_permission) | ||
301 | *(.text.vma_prio_tree_remove) | ||
302 | *(.text.tcp_ack) | ||
303 | *(.text.nameidata_to_filp) | ||
304 | *(.text.sys_lseek) | ||
305 | *(.text.percpu_counter_mod) | ||
306 | *(.text.igrab) | ||
307 | *(.text.__bread) | ||
308 | *(.text.alloc_inode) | ||
309 | *(.text.filldir) | ||
310 | *(.text.__rb_rotate_left) | ||
311 | *(.text.irq_affinity_write_proc) | ||
312 | *(.text.init_request_from_bio) | ||
313 | *(.text.find_or_create_page) | ||
314 | *(.text.tty_poll) | ||
315 | *(.text.tcp_sendmsg) | ||
316 | *(.text.ide_wait_stat) | ||
317 | *(.text.free_buffer_head) | ||
318 | *(.text.flush_signal_handlers) | ||
319 | *(.text.tcp_v4_rcv) | ||
320 | *(.text.nr_blockdev_pages) | ||
321 | *(.text.locks_remove_flock) | ||
322 | *(.text.__iowrite32_copy) | ||
323 | *(.text.do_filp_open) | ||
324 | *(.text.try_to_release_page) | ||
325 | *(.text.page_add_new_anon_rmap) | ||
326 | *(.text.kmem_cache_size) | ||
327 | *(.text.eth_type_trans) | ||
328 | *(.text.try_to_free_buffers) | ||
329 | *(.text.schedule_tail) | ||
330 | *(.text.proc_lookup) | ||
331 | *(.text.no_llseek) | ||
332 | *(.text.kfree_skbmem) | ||
333 | *(.text.do_wait) | ||
334 | *(.text.do_mpage_readpage) | ||
335 | *(.text.vfs_stat_fd) | ||
336 | *(.text.tty_write) | ||
337 | *(.text.705) | ||
338 | *(.text.sync_page) | ||
339 | *(.text.__remove_shared_vm_struct) | ||
340 | *(.text.__kfree_skb) | ||
341 | *(.text.sock_poll) | ||
342 | *(.text.get_request_wait) | ||
343 | *(.text.do_sigaction) | ||
344 | *(.text.do_brk) | ||
345 | *(.text.tcp_event_data_recv) | ||
346 | *(.text.read_chan) | ||
347 | *(.text.pipe_writev) | ||
348 | *(.text.__emul_lookup_dentry) | ||
349 | *(.text.rtc_get_rtc_time) | ||
350 | *(.text.print_objinfo) | ||
351 | *(.text.file_update_time) | ||
352 | *(.text.do_signal) | ||
353 | *(.text.disable_8259A_irq) | ||
354 | *(.text.blk_queue_bounce) | ||
355 | *(.text.__anon_vma_link) | ||
356 | *(.text.__vma_link) | ||
357 | *(.text.vfs_rename) | ||
358 | *(.text.sys_newlstat) | ||
359 | *(.text.sys_newfstat) | ||
360 | *(.text.sys_mknod) | ||
361 | *(.text.__show_regs) | ||
362 | *(.text.iput) | ||
363 | *(.text.get_signal_to_deliver) | ||
364 | *(.text.flush_tlb_page) | ||
365 | *(.text.debug_mutex_wake_waiter) | ||
366 | *(.text.copy_thread) | ||
367 | *(.text.clear_page_dirty_for_io) | ||
368 | *(.text.buffer_io_error) | ||
369 | *(.text.vfs_permission) | ||
370 | *(.text.truncate_inode_pages_range) | ||
371 | *(.text.sys_recvfrom) | ||
372 | *(.text.remove_suid) | ||
373 | *(.text.mark_buffer_dirty) | ||
374 | *(.text.local_bh_enable) | ||
375 | *(.text.get_zeroed_page) | ||
376 | *(.text.get_vmalloc_info) | ||
377 | *(.text.flush_old_exec) | ||
378 | *(.text.dummy_inode_permission) | ||
379 | *(.text.__bio_add_page) | ||
380 | *(.text.prio_tree_replace) | ||
381 | *(.text.notify_change) | ||
382 | *(.text.mntput_no_expire) | ||
383 | *(.text.fput) | ||
384 | *(.text.__end_that_request_first) | ||
385 | *(.text.wake_up_bit) | ||
386 | *(.text.unuse_mm) | ||
387 | *(.text.shrink_icache_memory) | ||
388 | *(.text.sched_balance_self) | ||
389 | *(.text.__pmd_alloc) | ||
390 | *(.text.pipe_poll) | ||
391 | *(.text.normal_poll) | ||
392 | *(.text.__free_pages) | ||
393 | *(.text.follow_mount) | ||
394 | *(.text.cdrom_start_packet_command) | ||
395 | *(.text.blk_recount_segments) | ||
396 | *(.text.bio_put) | ||
397 | *(.text.__alloc_skb) | ||
398 | *(.text.__wake_up) | ||
399 | *(.text.vm_stat_account) | ||
400 | *(.text.sys_fcntl) | ||
401 | *(.text.sys_fadvise64) | ||
402 | *(.text._raw_write_unlock) | ||
403 | *(.text.__pud_alloc) | ||
404 | *(.text.alloc_page_buffers) | ||
405 | *(.text.vfs_llseek) | ||
406 | *(.text.sockfd_lookup) | ||
407 | *(.text._raw_write_lock) | ||
408 | *(.text.put_compound_page) | ||
409 | *(.text.prune_dcache) | ||
410 | *(.text.pipe_readv) | ||
411 | *(.text.mempool_free) | ||
412 | *(.text.make_ahead_window) | ||
413 | *(.text.lru_add_drain) | ||
414 | *(.text.constant_test_bit) | ||
415 | *(.text.__clear_user) | ||
416 | *(.text.arch_unmap_area) | ||
417 | *(.text.anon_vma_link) | ||
418 | *(.text.sys_chroot) | ||
419 | *(.text.setup_arg_pages) | ||
420 | *(.text.radix_tree_preload) | ||
421 | *(.text.init_rwsem) | ||
422 | *(.text.generic_osync_inode) | ||
423 | *(.text.generic_delete_inode) | ||
424 | *(.text.do_sys_poll) | ||
425 | *(.text.dev_queue_xmit) | ||
426 | *(.text.default_llseek) | ||
427 | *(.text.__writeback_single_inode) | ||
428 | *(.text.vfs_ioctl) | ||
429 | *(.text.__up_write) | ||
430 | *(.text.unix_poll) | ||
431 | *(.text.sys_rt_sigprocmask) | ||
432 | *(.text.sock_recvmsg) | ||
433 | *(.text.recalc_bh_state) | ||
434 | *(.text.__put_unused_fd) | ||
435 | *(.text.process_backlog) | ||
436 | *(.text.locks_remove_posix) | ||
437 | *(.text.lease_modify) | ||
438 | *(.text.expand_files) | ||
439 | *(.text.end_buffer_read_nobh) | ||
440 | *(.text.d_splice_alias) | ||
441 | *(.text.debug_mutex_init_waiter) | ||
442 | *(.text.copy_from_user) | ||
443 | *(.text.cap_vm_enough_memory) | ||
444 | *(.text.show_vfsmnt) | ||
445 | *(.text.release_sock) | ||
446 | *(.text.pfifo_fast_enqueue) | ||
447 | *(.text.half_md4_transform) | ||
448 | *(.text.fs_may_remount_ro) | ||
449 | *(.text.do_fork) | ||
450 | *(.text.copy_hugetlb_page_range) | ||
451 | *(.text.cache_free_debugcheck) | ||
452 | *(.text.__tcp_select_window) | ||
453 | *(.text.task_handoff_register) | ||
454 | *(.text.sys_open) | ||
455 | *(.text.strlcpy) | ||
456 | *(.text.skb_copy_datagram_iovec) | ||
457 | *(.text.set_up_list3s) | ||
458 | *(.text.release_open_intent) | ||
459 | *(.text.qdisc_restart) | ||
460 | *(.text.n_tty_chars_in_buffer) | ||
461 | *(.text.inode_change_ok) | ||
462 | *(.text.__downgrade_write) | ||
463 | *(.text.debug_mutex_unlock) | ||
464 | *(.text.add_timer_randomness) | ||
465 | *(.text.sock_common_recvmsg) | ||
466 | *(.text.set_bh_page) | ||
467 | *(.text.printk_lock) | ||
468 | *(.text.path_release_on_umount) | ||
469 | *(.text.ip_output) | ||
470 | *(.text.ide_build_dmatable) | ||
471 | *(.text.__get_user_8) | ||
472 | *(.text.end_buffer_read_sync) | ||
473 | *(.text.__d_path) | ||
474 | *(.text.d_move) | ||
475 | *(.text.del_timer) | ||
476 | *(.text.constant_test_bit) | ||
477 | *(.text.blockable_page_cache_readahead) | ||
478 | *(.text.tty_read) | ||
479 | *(.text.sys_readlink) | ||
480 | *(.text.sys_faccessat) | ||
481 | *(.text.read_swap_cache_async) | ||
482 | *(.text.pty_write_room) | ||
483 | *(.text.page_address_in_vma) | ||
484 | *(.text.kthread) | ||
485 | *(.text.cfq_exit_io_context) | ||
486 | *(.text.__tcp_push_pending_frames) | ||
487 | *(.text.sys_pipe) | ||
488 | *(.text.submit_bio) | ||
489 | *(.text.pid_revalidate) | ||
490 | *(.text.page_referenced_file) | ||
491 | *(.text.lock_sock) | ||
492 | *(.text.get_page_state_node) | ||
493 | *(.text.generic_block_bmap) | ||
494 | *(.text.do_setitimer) | ||
495 | *(.text.dev_queue_xmit_nit) | ||
496 | *(.text.copy_from_read_buf) | ||
497 | *(.text.__const_udelay) | ||
498 | *(.text.console_conditional_schedule) | ||
499 | *(.text.wake_up_new_task) | ||
500 | *(.text.wait_for_completion_interruptible) | ||
501 | *(.text.tcp_rcv_rtt_update) | ||
502 | *(.text.sys_mlockall) | ||
503 | *(.text.set_fs_altroot) | ||
504 | *(.text.schedule_timeout) | ||
505 | *(.text.nr_free_pagecache_pages) | ||
506 | *(.text.nf_iterate) | ||
507 | *(.text.mapping_tagged) | ||
508 | *(.text.ip_queue_xmit) | ||
509 | *(.text.ip_local_deliver) | ||
510 | *(.text.follow_page) | ||
511 | *(.text.elf_map) | ||
512 | *(.text.dummy_file_permission) | ||
513 | *(.text.dispose_list) | ||
514 | *(.text.dentry_open) | ||
515 | *(.text.dentry_iput) | ||
516 | *(.text.bio_alloc) | ||
517 | *(.text.wait_on_page_bit) | ||
518 | *(.text.vfs_readdir) | ||
519 | *(.text.vfs_lstat) | ||
520 | *(.text.seq_escape) | ||
521 | *(.text.__posix_lock_file) | ||
522 | *(.text.mm_release) | ||
523 | *(.text.kref_put) | ||
524 | *(.text.ip_rcv) | ||
525 | *(.text.__iget) | ||
526 | *(.text.free_pages) | ||
527 | *(.text.find_mergeable_anon_vma) | ||
528 | *(.text.find_extend_vma) | ||
529 | *(.text.dummy_inode_listsecurity) | ||
530 | *(.text.bio_add_page) | ||
531 | *(.text.__vm_enough_memory) | ||
532 | *(.text.vfs_stat) | ||
533 | *(.text.tty_paranoia_check) | ||
534 | *(.text.tcp_read_sock) | ||
535 | *(.text.tcp_data_queue) | ||
536 | *(.text.sys_uname) | ||
537 | *(.text.sys_renameat) | ||
538 | *(.text.__strncpy_from_user) | ||
539 | *(.text.__mutex_init) | ||
540 | *(.text.__lookup_hash) | ||
541 | *(.text.kref_get) | ||
542 | *(.text.ip_route_input) | ||
543 | *(.text.__insert_inode_hash) | ||
544 | *(.text.do_sock_write) | ||
545 | *(.text.blk_done_softirq) | ||
546 | *(.text.__wake_up_sync) | ||
547 | *(.text.__vma_link_rb) | ||
548 | *(.text.tty_ioctl) | ||
549 | *(.text.tracesys) | ||
550 | *(.text.sys_getdents) | ||
551 | *(.text.sys_dup) | ||
552 | *(.text.stub_execve) | ||
553 | *(.text.sha_transform) | ||
554 | *(.text.radix_tree_tag_clear) | ||
555 | *(.text.put_unused_fd) | ||
556 | *(.text.put_files_struct) | ||
557 | *(.text.mpage_readpages) | ||
558 | *(.text.may_delete) | ||
559 | *(.text.kmem_cache_create) | ||
560 | *(.text.ip_mc_output) | ||
561 | *(.text.interleave_nodes) | ||
562 | *(.text.groups_search) | ||
563 | *(.text.generic_drop_inode) | ||
564 | *(.text.generic_commit_write) | ||
565 | *(.text.fcntl_setlk) | ||
566 | *(.text.exit_mmap) | ||
567 | *(.text.end_page_writeback) | ||
568 | *(.text.__d_rehash) | ||
569 | *(.text.debug_mutex_free_waiter) | ||
570 | *(.text.csum_ipv6_magic) | ||
571 | *(.text.count) | ||
572 | *(.text.cleanup_rbuf) | ||
573 | *(.text.check_spinlock_acquired_node) | ||
574 | *(.text.can_vma_merge_after) | ||
575 | *(.text.bio_endio) | ||
576 | *(.text.alloc_pidmap) | ||
577 | *(.text.write_ldt) | ||
578 | *(.text.vmtruncate_range) | ||
579 | *(.text.vfs_create) | ||
580 | *(.text.__user_walk) | ||
581 | *(.text.update_send_head) | ||
582 | *(.text.unmap_underlying_metadata) | ||
583 | *(.text.tty_ldisc_deref) | ||
584 | *(.text.tcp_setsockopt) | ||
585 | *(.text.tcp_send_ack) | ||
586 | *(.text.sys_pause) | ||
587 | *(.text.sys_gettimeofday) | ||
588 | *(.text.sync_dirty_buffer) | ||
589 | *(.text.strncmp) | ||
590 | *(.text.release_posix_timer) | ||
591 | *(.text.proc_file_read) | ||
592 | *(.text.prepare_to_wait) | ||
593 | *(.text.locks_mandatory_locked) | ||
594 | *(.text.interruptible_sleep_on_timeout) | ||
595 | *(.text.inode_sub_bytes) | ||
596 | *(.text.in_group_p) | ||
597 | *(.text.hrtimer_try_to_cancel) | ||
598 | *(.text.filldir64) | ||
599 | *(.text.fasync_helper) | ||
600 | *(.text.dummy_sb_pivotroot) | ||
601 | *(.text.d_lookup) | ||
602 | *(.text.d_instantiate) | ||
603 | *(.text.__d_find_alias) | ||
604 | *(.text.cpu_idle_wait) | ||
605 | *(.text.cond_resched_lock) | ||
606 | *(.text.chown_common) | ||
607 | *(.text.blk_congestion_wait) | ||
608 | *(.text.activate_page) | ||
609 | *(.text.unlock_buffer) | ||
610 | *(.text.tty_wakeup) | ||
611 | *(.text.tcp_v4_do_rcv) | ||
612 | *(.text.tcp_current_mss) | ||
613 | *(.text.sys_openat) | ||
614 | *(.text.sys_fchdir) | ||
615 | *(.text.strnlen_user) | ||
616 | *(.text.strnlen) | ||
617 | *(.text.strchr) | ||
618 | *(.text.sock_common_getsockopt) | ||
619 | *(.text.skb_checksum) | ||
620 | *(.text.remove_wait_queue) | ||
621 | *(.text.rb_replace_node) | ||
622 | *(.text.radix_tree_node_ctor) | ||
623 | *(.text.pty_chars_in_buffer) | ||
624 | *(.text.profile_hit) | ||
625 | *(.text.prio_tree_left) | ||
626 | *(.text.pgd_clear_bad) | ||
627 | *(.text.pfifo_fast_dequeue) | ||
628 | *(.text.page_referenced) | ||
629 | *(.text.open_exec) | ||
630 | *(.text.mmput) | ||
631 | *(.text.mm_init) | ||
632 | *(.text.__ide_dma_off_quietly) | ||
633 | *(.text.ide_dma_intr) | ||
634 | *(.text.hrtimer_start) | ||
635 | *(.text.get_io_context) | ||
636 | *(.text.__get_free_pages) | ||
637 | *(.text.find_first_zero_bit) | ||
638 | *(.text.file_free_rcu) | ||
639 | *(.text.dummy_socket_sendmsg) | ||
640 | *(.text.do_unlinkat) | ||
641 | *(.text.do_arch_prctl) | ||
642 | *(.text.destroy_inode) | ||
643 | *(.text.can_vma_merge_before) | ||
644 | *(.text.block_sync_page) | ||
645 | *(.text.block_prepare_write) | ||
646 | *(.text.bio_init) | ||
647 | *(.text.arch_ptrace) | ||
648 | *(.text.wake_up_inode) | ||
649 | *(.text.wait_on_retry_sync_kiocb) | ||
650 | *(.text.vma_prio_tree_next) | ||
651 | *(.text.tcp_rcv_space_adjust) | ||
652 | *(.text.__tcp_ack_snd_check) | ||
653 | *(.text.sys_utime) | ||
654 | *(.text.sys_recvmsg) | ||
655 | *(.text.sys_mremap) | ||
656 | *(.text.sys_bdflush) | ||
657 | *(.text.sleep_on) | ||
658 | *(.text.set_page_dirty_lock) | ||
659 | *(.text.seq_path) | ||
660 | *(.text.schedule_timeout_interruptible) | ||
661 | *(.text.sched_fork) | ||
662 | *(.text.rt_run_flush) | ||
663 | *(.text.profile_munmap) | ||
664 | *(.text.prepare_binprm) | ||
665 | *(.text.__pagevec_release_nonlru) | ||
666 | *(.text.m_show) | ||
667 | *(.text.lookup_mnt) | ||
668 | *(.text.__lookup_mnt) | ||
669 | *(.text.lock_timer_base) | ||
670 | *(.text.is_subdir) | ||
671 | *(.text.invalidate_bh_lru) | ||
672 | *(.text.init_buffer_head) | ||
673 | *(.text.ifind_fast) | ||
674 | *(.text.ide_dma_start) | ||
675 | *(.text.__get_page_state) | ||
676 | *(.text.flock_to_posix_lock) | ||
677 | *(.text.__find_symbol) | ||
678 | *(.text.do_futex) | ||
679 | *(.text.do_execve) | ||
680 | *(.text.dirty_writeback_centisecs_handler) | ||
681 | *(.text.dev_watchdog) | ||
682 | *(.text.can_share_swap_page) | ||
683 | *(.text.blkdev_put) | ||
684 | *(.text.bio_get_nr_vecs) | ||
685 | *(.text.xfrm_compile_policy) | ||
686 | *(.text.vma_prio_tree_insert) | ||
687 | *(.text.vfs_lstat_fd) | ||
688 | *(.text.__user_path_lookup_open) | ||
689 | *(.text.thread_return) | ||
690 | *(.text.tcp_send_delayed_ack) | ||
691 | *(.text.sock_def_error_report) | ||
692 | *(.text.shrink_slab) | ||
693 | *(.text.serial_out) | ||
694 | *(.text.seq_read) | ||
695 | *(.text.secure_ip_id) | ||
696 | *(.text.search_binary_handler) | ||
697 | *(.text.proc_pid_unhash) | ||
698 | *(.text.pagevec_lookup) | ||
699 | *(.text.new_inode) | ||
700 | *(.text.memcpy_toiovec) | ||
701 | *(.text.locks_free_lock) | ||
702 | *(.text.__lock_page) | ||
703 | *(.text.__lock_buffer) | ||
704 | *(.text.load_module) | ||
705 | *(.text.is_bad_inode) | ||
706 | *(.text.invalidate_inode_buffers) | ||
707 | *(.text.insert_vm_struct) | ||
708 | *(.text.inode_setattr) | ||
709 | *(.text.inode_add_bytes) | ||
710 | *(.text.ide_read_24) | ||
711 | *(.text.ide_get_error_location) | ||
712 | *(.text.ide_do_drive_cmd) | ||
713 | *(.text.get_locked_pte) | ||
714 | *(.text.get_filesystem_list) | ||
715 | *(.text.generic_file_open) | ||
716 | *(.text.follow_down) | ||
717 | *(.text.find_next_bit) | ||
718 | *(.text.__find_first_bit) | ||
719 | *(.text.exit_mm) | ||
720 | *(.text.exec_keys) | ||
721 | *(.text.end_buffer_write_sync) | ||
722 | *(.text.end_bio_bh_io_sync) | ||
723 | *(.text.dummy_socket_shutdown) | ||
724 | *(.text.d_rehash) | ||
725 | *(.text.d_path) | ||
726 | *(.text.do_ioctl) | ||
727 | *(.text.dget_locked) | ||
728 | *(.text.copy_thread_group_keys) | ||
729 | *(.text.cdrom_end_request) | ||
730 | *(.text.cap_bprm_apply_creds) | ||
731 | *(.text.blk_rq_bio_prep) | ||
732 | *(.text.__bitmap_intersects) | ||
733 | *(.text.bio_phys_segments) | ||
734 | *(.text.bio_free) | ||
735 | *(.text.arch_get_unmapped_area_topdown) | ||
736 | *(.text.writeback_in_progress) | ||
737 | *(.text.vfs_follow_link) | ||
738 | *(.text.tcp_rcv_state_process) | ||
739 | *(.text.tcp_check_space) | ||
740 | *(.text.sys_stat) | ||
741 | *(.text.sys_rt_sigreturn) | ||
742 | *(.text.sys_rt_sigaction) | ||
743 | *(.text.sys_remap_file_pages) | ||
744 | *(.text.sys_pwrite64) | ||
745 | *(.text.sys_fchownat) | ||
746 | *(.text.sys_fchmodat) | ||
747 | *(.text.strncat) | ||
748 | *(.text.strlcat) | ||
749 | *(.text.strcmp) | ||
750 | *(.text.steal_locks) | ||
751 | *(.text.sock_create) | ||
752 | *(.text.sk_stream_rfree) | ||
753 | *(.text.sk_stream_mem_schedule) | ||
754 | *(.text.skip_atoi) | ||
755 | *(.text.sk_alloc) | ||
756 | *(.text.show_stat) | ||
757 | *(.text.set_fs_pwd) | ||
758 | *(.text.set_binfmt) | ||
759 | *(.text.pty_unthrottle) | ||
760 | *(.text.proc_symlink) | ||
761 | *(.text.pipe_release) | ||
762 | *(.text.pageout) | ||
763 | *(.text.n_tty_write_wakeup) | ||
764 | *(.text.n_tty_ioctl) | ||
765 | *(.text.nr_free_zone_pages) | ||
766 | *(.text.migration_thread) | ||
767 | *(.text.mempool_free_slab) | ||
768 | *(.text.meminfo_read_proc) | ||
769 | *(.text.max_sane_readahead) | ||
770 | *(.text.lru_cache_add) | ||
771 | *(.text.kill_fasync) | ||
772 | *(.text.kernel_read) | ||
773 | *(.text.invalidate_mapping_pages) | ||
774 | *(.text.inode_has_buffers) | ||
775 | *(.text.init_once) | ||
776 | *(.text.inet_sendmsg) | ||
777 | *(.text.idedisk_issue_flush) | ||
778 | *(.text.generic_file_write) | ||
779 | *(.text.free_more_memory) | ||
780 | *(.text.__free_fdtable) | ||
781 | *(.text.filp_dtor) | ||
782 | *(.text.exit_sem) | ||
783 | *(.text.exit_itimers) | ||
784 | *(.text.error_interrupt) | ||
785 | *(.text.end_buffer_async_write) | ||
786 | *(.text.eligible_child) | ||
787 | *(.text.elf_map) | ||
788 | *(.text.dump_task_regs) | ||
789 | *(.text.dummy_task_setscheduler) | ||
790 | *(.text.dummy_socket_accept) | ||
791 | *(.text.dummy_file_free_security) | ||
792 | *(.text.__down_read) | ||
793 | *(.text.do_sock_read) | ||
794 | *(.text.do_sigaltstack) | ||
795 | *(.text.do_mremap) | ||
796 | *(.text.current_io_context) | ||
797 | *(.text.cpu_swap_callback) | ||
798 | *(.text.copy_vma) | ||
799 | *(.text.cap_bprm_set_security) | ||
800 | *(.text.blk_insert_request) | ||
801 | *(.text.bio_map_kern_endio) | ||
802 | *(.text.bio_hw_segments) | ||
803 | *(.text.bictcp_cong_avoid) | ||
804 | *(.text.add_interrupt_randomness) | ||
805 | *(.text.wait_for_completion) | ||
806 | *(.text.version_read_proc) | ||
807 | *(.text.unix_write_space) | ||
808 | *(.text.tty_ldisc_ref_wait) | ||
809 | *(.text.tty_ldisc_put) | ||
810 | *(.text.try_to_wake_up) | ||
811 | *(.text.tcp_v4_tw_remember_stamp) | ||
812 | *(.text.tcp_try_undo_dsack) | ||
813 | *(.text.tcp_may_send_now) | ||
814 | *(.text.sys_waitid) | ||
815 | *(.text.sys_sched_getparam) | ||
816 | *(.text.sys_getppid) | ||
817 | *(.text.sys_getcwd) | ||
818 | *(.text.sys_dup2) | ||
819 | *(.text.sys_chmod) | ||
820 | *(.text.sys_chdir) | ||
821 | *(.text.sprintf) | ||
822 | *(.text.sock_wfree) | ||
823 | *(.text.sock_aio_write) | ||
824 | *(.text.skb_drop_fraglist) | ||
825 | *(.text.skb_dequeue) | ||
826 | *(.text.set_close_on_exec) | ||
827 | *(.text.set_brk) | ||
828 | *(.text.seq_puts) | ||
829 | *(.text.SELECT_DRIVE) | ||
830 | *(.text.sched_exec) | ||
831 | *(.text.return_EIO) | ||
832 | *(.text.remove_from_page_cache) | ||
833 | *(.text.rcu_start_batch) | ||
834 | *(.text.__put_task_struct) | ||
835 | *(.text.proc_pid_readdir) | ||
836 | *(.text.proc_get_inode) | ||
837 | *(.text.prepare_to_wait_exclusive) | ||
838 | *(.text.pipe_wait) | ||
839 | *(.text.pipe_new) | ||
840 | *(.text.pdflush_operation) | ||
841 | *(.text.__pagevec_release) | ||
842 | *(.text.pagevec_lookup_tag) | ||
843 | *(.text.packet_rcv) | ||
844 | *(.text.n_tty_set_room) | ||
845 | *(.text.nr_free_pages) | ||
846 | *(.text.__net_timestamp) | ||
847 | *(.text.mpage_end_io_read) | ||
848 | *(.text.mod_timer) | ||
849 | *(.text.__memcpy) | ||
850 | *(.text.mb_cache_shrink_fn) | ||
851 | *(.text.lock_rename) | ||
852 | *(.text.kstrdup) | ||
853 | *(.text.is_ignored) | ||
854 | *(.text.int_very_careful) | ||
855 | *(.text.inotify_inode_is_dead) | ||
856 | *(.text.inotify_get_cookie) | ||
857 | *(.text.inode_get_bytes) | ||
858 | *(.text.init_timer) | ||
859 | *(.text.init_dev) | ||
860 | *(.text.inet_getname) | ||
861 | *(.text.ide_map_sg) | ||
862 | *(.text.__ide_dma_end) | ||
863 | *(.text.hrtimer_get_remaining) | ||
864 | *(.text.get_task_mm) | ||
865 | *(.text.get_random_int) | ||
866 | *(.text.free_pipe_info) | ||
867 | *(.text.filemap_write_and_wait_range) | ||
868 | *(.text.exit_thread) | ||
869 | *(.text.enter_idle) | ||
870 | *(.text.end_that_request_first) | ||
871 | *(.text.end_8259A_irq) | ||
872 | *(.text.dummy_file_alloc_security) | ||
873 | *(.text.do_group_exit) | ||
874 | *(.text.debug_mutex_init) | ||
875 | *(.text.cpuset_exit) | ||
876 | *(.text.cpu_idle) | ||
877 | *(.text.copy_semundo) | ||
878 | *(.text.copy_files) | ||
879 | *(.text.chrdev_open) | ||
880 | *(.text.cdrom_transfer_packet_command) | ||
881 | *(.text.cdrom_mode_sense) | ||
882 | *(.text.blk_phys_contig_segment) | ||
883 | *(.text.blk_get_queue) | ||
884 | *(.text.bio_split) | ||
885 | *(.text.audit_alloc) | ||
886 | *(.text.anon_pipe_buf_release) | ||
887 | *(.text.add_wait_queue_exclusive) | ||
888 | *(.text.add_wait_queue) | ||
889 | *(.text.acct_process) | ||
890 | *(.text.account) | ||
891 | *(.text.zeromap_page_range) | ||
892 | *(.text.yield) | ||
893 | *(.text.writeback_acquire) | ||
894 | *(.text.worker_thread) | ||
895 | *(.text.wait_on_page_writeback_range) | ||
896 | *(.text.__wait_on_buffer) | ||
897 | *(.text.vscnprintf) | ||
898 | *(.text.vmalloc_to_pfn) | ||
899 | *(.text.vgacon_save_screen) | ||
900 | *(.text.vfs_unlink) | ||
901 | *(.text.vfs_rmdir) | ||
902 | *(.text.unregister_md_personality) | ||
903 | *(.text.unlock_new_inode) | ||
904 | *(.text.unix_stream_sendmsg) | ||
905 | *(.text.unix_stream_recvmsg) | ||
906 | *(.text.unhash_process) | ||
907 | *(.text.udp_v4_lookup_longway) | ||
908 | *(.text.tty_ldisc_flush) | ||
909 | *(.text.tty_ldisc_enable) | ||
910 | *(.text.tty_hung_up_p) | ||
911 | *(.text.tty_buffer_free_all) | ||
912 | *(.text.tso_fragment) | ||
913 | *(.text.try_to_del_timer_sync) | ||
914 | *(.text.tcp_v4_err) | ||
915 | *(.text.tcp_unhash) | ||
916 | *(.text.tcp_seq_next) | ||
917 | *(.text.tcp_select_initial_window) | ||
918 | *(.text.tcp_sacktag_write_queue) | ||
919 | *(.text.tcp_cwnd_validate) | ||
920 | *(.text.sys_vhangup) | ||
921 | *(.text.sys_uselib) | ||
922 | *(.text.sys_symlink) | ||
923 | *(.text.sys_signal) | ||
924 | *(.text.sys_poll) | ||
925 | *(.text.sys_mount) | ||
926 | *(.text.sys_kill) | ||
927 | *(.text.sys_ioctl) | ||
928 | *(.text.sys_inotify_add_watch) | ||
929 | *(.text.sys_getuid) | ||
930 | *(.text.sys_getrlimit) | ||
931 | *(.text.sys_getitimer) | ||
932 | *(.text.sys_getgroups) | ||
933 | *(.text.sys_ftruncate) | ||
934 | *(.text.sysfs_lookup) | ||
935 | *(.text.sys_exit_group) | ||
936 | *(.text.stub_fork) | ||
937 | *(.text.sscanf) | ||
938 | *(.text.sock_map_fd) | ||
939 | *(.text.sock_get_timestamp) | ||
940 | *(.text.__sock_create) | ||
941 | *(.text.smp_call_function_single) | ||
942 | *(.text.sk_stop_timer) | ||
943 | *(.text.skb_copy_and_csum_datagram) | ||
944 | *(.text.__skb_checksum_complete) | ||
945 | *(.text.single_next) | ||
946 | *(.text.sigqueue_alloc) | ||
947 | *(.text.shrink_dcache_parent) | ||
948 | *(.text.select_idle_routine) | ||
949 | *(.text.run_workqueue) | ||
950 | *(.text.run_local_timers) | ||
951 | *(.text.remove_inode_hash) | ||
952 | *(.text.remove_dquot_ref) | ||
953 | *(.text.register_binfmt) | ||
954 | *(.text.read_cache_pages) | ||
955 | *(.text.rb_last) | ||
956 | *(.text.pty_open) | ||
957 | *(.text.proc_root_readdir) | ||
958 | *(.text.proc_pid_flush) | ||
959 | *(.text.proc_pident_lookup) | ||
960 | *(.text.proc_fill_super) | ||
961 | *(.text.proc_exe_link) | ||
962 | *(.text.posix_locks_deadlock) | ||
963 | *(.text.pipe_iov_copy_from_user) | ||
964 | *(.text.opost) | ||
965 | *(.text.nf_register_hook) | ||
966 | *(.text.netif_rx_ni) | ||
967 | *(.text.m_start) | ||
968 | *(.text.mpage_writepage) | ||
969 | *(.text.mm_alloc) | ||
970 | *(.text.memory_open) | ||
971 | *(.text.mark_buffer_async_write) | ||
972 | *(.text.lru_add_drain_all) | ||
973 | *(.text.locks_init_lock) | ||
974 | *(.text.locks_delete_lock) | ||
975 | *(.text.lock_hrtimer_base) | ||
976 | *(.text.load_script) | ||
977 | *(.text.__kill_fasync) | ||
978 | *(.text.ip_mc_sf_allow) | ||
979 | *(.text.__ioremap) | ||
980 | *(.text.int_with_check) | ||
981 | *(.text.int_sqrt) | ||
982 | *(.text.install_thread_keyring) | ||
983 | *(.text.init_page_buffers) | ||
984 | *(.text.inet_sock_destruct) | ||
985 | *(.text.idle_notifier_register) | ||
986 | *(.text.ide_execute_command) | ||
987 | *(.text.ide_end_drive_cmd) | ||
988 | *(.text.__ide_dma_host_on) | ||
989 | *(.text.hrtimer_run_queues) | ||
990 | *(.text.hpet_mask_rtc_irq_bit) | ||
991 | *(.text.__get_zone_counts) | ||
992 | *(.text.get_zone_counts) | ||
993 | *(.text.get_write_access) | ||
994 | *(.text.get_fs_struct) | ||
995 | *(.text.get_dirty_limits) | ||
996 | *(.text.generic_readlink) | ||
997 | *(.text.free_hot_page) | ||
998 | *(.text.finish_wait) | ||
999 | *(.text.find_inode) | ||
1000 | *(.text.find_first_bit) | ||
1001 | *(.text.__filemap_fdatawrite_range) | ||
1002 | *(.text.__filemap_copy_from_user_iovec) | ||
1003 | *(.text.exit_aio) | ||
1004 | *(.text.elv_set_request) | ||
1005 | *(.text.elv_former_request) | ||
1006 | *(.text.dup_namespace) | ||
1007 | *(.text.dupfd) | ||
1008 | *(.text.dummy_socket_getsockopt) | ||
1009 | *(.text.dummy_sb_post_mountroot) | ||
1010 | *(.text.dummy_quotactl) | ||
1011 | *(.text.dummy_inode_rename) | ||
1012 | *(.text.__do_SAK) | ||
1013 | *(.text.do_pipe) | ||
1014 | *(.text.do_fsync) | ||
1015 | *(.text.d_instantiate_unique) | ||
1016 | *(.text.d_find_alias) | ||
1017 | *(.text.deny_write_access) | ||
1018 | *(.text.dentry_unhash) | ||
1019 | *(.text.d_delete) | ||
1020 | *(.text.datagram_poll) | ||
1021 | *(.text.cpuset_fork) | ||
1022 | *(.text.cpuid_read) | ||
1023 | *(.text.copy_namespace) | ||
1024 | *(.text.cond_resched) | ||
1025 | *(.text.check_version) | ||
1026 | *(.text.__change_page_attr) | ||
1027 | *(.text.cfq_slab_kill) | ||
1028 | *(.text.cfq_completed_request) | ||
1029 | *(.text.cdrom_pc_intr) | ||
1030 | *(.text.cdrom_decode_status) | ||
1031 | *(.text.cap_capset_check) | ||
1032 | *(.text.blk_put_request) | ||
1033 | *(.text.bio_fs_destructor) | ||
1034 | *(.text.bictcp_min_cwnd) | ||
1035 | *(.text.alloc_chrdev_region) | ||
1036 | *(.text.add_element) | ||
1037 | *(.text.acct_update_integrals) | ||
1038 | *(.text.write_boundary_block) | ||
1039 | *(.text.writeback_release) | ||
1040 | *(.text.writeback_inodes) | ||
1041 | *(.text.wake_up_state) | ||
1042 | *(.text.__wake_up_locked) | ||
1043 | *(.text.wake_futex) | ||
1044 | *(.text.wait_task_inactive) | ||
1045 | *(.text.__wait_on_freeing_inode) | ||
1046 | *(.text.wait_noreap_copyout) | ||
1047 | *(.text.vmstat_start) | ||
1048 | *(.text.vgacon_do_font_op) | ||
1049 | *(.text.vfs_readv) | ||
1050 | *(.text.vfs_quota_sync) | ||
1051 | *(.text.update_queue) | ||
1052 | *(.text.unshare_files) | ||
1053 | *(.text.unmap_vm_area) | ||
1054 | *(.text.unix_socketpair) | ||
1055 | *(.text.unix_release_sock) | ||
1056 | *(.text.unix_detach_fds) | ||
1057 | *(.text.unix_create1) | ||
1058 | *(.text.unix_bind) | ||
1059 | *(.text.udp_sendmsg) | ||
1060 | *(.text.udp_rcv) | ||
1061 | *(.text.udp_queue_rcv_skb) | ||
1062 | *(.text.uart_write) | ||
1063 | *(.text.uart_startup) | ||
1064 | *(.text.uart_open) | ||
1065 | *(.text.tty_vhangup) | ||
1066 | *(.text.tty_termios_baud_rate) | ||
1067 | *(.text.tty_release) | ||
1068 | *(.text.tty_ldisc_ref) | ||
1069 | *(.text.throttle_vm_writeout) | ||
1070 | *(.text.058) | ||
1071 | *(.text.tcp_xmit_probe_skb) | ||
1072 | *(.text.tcp_v4_send_check) | ||
1073 | *(.text.tcp_v4_destroy_sock) | ||
1074 | *(.text.tcp_sync_mss) | ||
1075 | *(.text.tcp_snd_test) | ||
1076 | *(.text.tcp_slow_start) | ||
1077 | *(.text.tcp_send_fin) | ||
1078 | *(.text.tcp_rtt_estimator) | ||
1079 | *(.text.tcp_parse_options) | ||
1080 | *(.text.tcp_ioctl) | ||
1081 | *(.text.tcp_init_tso_segs) | ||
1082 | *(.text.tcp_init_cwnd) | ||
1083 | *(.text.tcp_getsockopt) | ||
1084 | *(.text.tcp_fin) | ||
1085 | *(.text.tcp_connect) | ||
1086 | *(.text.tcp_cong_avoid) | ||
1087 | *(.text.__tcp_checksum_complete_user) | ||
1088 | *(.text.task_dumpable) | ||
1089 | *(.text.sys_wait4) | ||
1090 | *(.text.sys_utimes) | ||
1091 | *(.text.sys_symlinkat) | ||
1092 | *(.text.sys_socketpair) | ||
1093 | *(.text.sys_rmdir) | ||
1094 | *(.text.sys_readahead) | ||
1095 | *(.text.sys_nanosleep) | ||
1096 | *(.text.sys_linkat) | ||
1097 | *(.text.sys_fstat) | ||
1098 | *(.text.sysfs_readdir) | ||
1099 | *(.text.sys_execve) | ||
1100 | *(.text.sysenter_tracesys) | ||
1101 | *(.text.sys_chown) | ||
1102 | *(.text.stub_clone) | ||
1103 | *(.text.strrchr) | ||
1104 | *(.text.strncpy) | ||
1105 | *(.text.stopmachine_set_state) | ||
1106 | *(.text.sock_sendmsg) | ||
1107 | *(.text.sock_release) | ||
1108 | *(.text.sock_fasync) | ||
1109 | *(.text.sock_close) | ||
1110 | *(.text.sk_stream_write_space) | ||
1111 | *(.text.sk_reset_timer) | ||
1112 | *(.text.skb_split) | ||
1113 | *(.text.skb_recv_datagram) | ||
1114 | *(.text.skb_queue_tail) | ||
1115 | *(.text.sk_attach_filter) | ||
1116 | *(.text.si_swapinfo) | ||
1117 | *(.text.simple_strtoll) | ||
1118 | *(.text.set_termios) | ||
1119 | *(.text.set_task_comm) | ||
1120 | *(.text.set_shrinker) | ||
1121 | *(.text.set_normalized_timespec) | ||
1122 | *(.text.set_brk) | ||
1123 | *(.text.serial_in) | ||
1124 | *(.text.seq_printf) | ||
1125 | *(.text.secure_dccp_sequence_number) | ||
1126 | *(.text.rwlock_bug) | ||
1127 | *(.text.rt_hash_code) | ||
1128 | *(.text.__rta_fill) | ||
1129 | *(.text.__request_resource) | ||
1130 | *(.text.relocate_new_kernel) | ||
1131 | *(.text.release_thread) | ||
1132 | *(.text.release_mem) | ||
1133 | *(.text.rb_prev) | ||
1134 | *(.text.rb_first) | ||
1135 | *(.text.random_poll) | ||
1136 | *(.text.__put_super_and_need_restart) | ||
1137 | *(.text.pty_write) | ||
1138 | *(.text.ptrace_stop) | ||
1139 | *(.text.proc_self_readlink) | ||
1140 | *(.text.proc_root_lookup) | ||
1141 | *(.text.proc_root_link) | ||
1142 | *(.text.proc_pid_make_inode) | ||
1143 | *(.text.proc_pid_attr_write) | ||
1144 | *(.text.proc_lookupfd) | ||
1145 | *(.text.proc_delete_inode) | ||
1146 | *(.text.posix_same_owner) | ||
1147 | *(.text.posix_block_lock) | ||
1148 | *(.text.poll_initwait) | ||
1149 | *(.text.pipe_write) | ||
1150 | *(.text.pipe_read_fasync) | ||
1151 | *(.text.pipe_ioctl) | ||
1152 | *(.text.pdflush) | ||
1153 | *(.text.pci_user_read_config_dword) | ||
1154 | *(.text.page_readlink) | ||
1155 | *(.text.null_lseek) | ||
1156 | *(.text.nf_hook_slow) | ||
1157 | *(.text.netlink_sock_destruct) | ||
1158 | *(.text.netlink_broadcast) | ||
1159 | *(.text.neigh_resolve_output) | ||
1160 | *(.text.name_to_int) | ||
1161 | *(.text.mwait_idle) | ||
1162 | *(.text.mutex_trylock) | ||
1163 | *(.text.mutex_debug_check_no_locks_held) | ||
1164 | *(.text.m_stop) | ||
1165 | *(.text.mpage_end_io_write) | ||
1166 | *(.text.mpage_alloc) | ||
1167 | *(.text.move_page_tables) | ||
1168 | *(.text.mounts_open) | ||
1169 | *(.text.__memset) | ||
1170 | *(.text.memcpy_fromiovec) | ||
1171 | *(.text.make_8259A_irq) | ||
1172 | *(.text.lookup_user_key_possessed) | ||
1173 | *(.text.lookup_create) | ||
1174 | *(.text.locks_insert_lock) | ||
1175 | *(.text.locks_alloc_lock) | ||
1176 | *(.text.kthread_should_stop) | ||
1177 | *(.text.kswapd) | ||
1178 | *(.text.kobject_uevent) | ||
1179 | *(.text.kobject_get_path) | ||
1180 | *(.text.kobject_get) | ||
1181 | *(.text.klist_children_put) | ||
1182 | *(.text.__ip_route_output_key) | ||
1183 | *(.text.ip_flush_pending_frames) | ||
1184 | *(.text.ip_compute_csum) | ||
1185 | *(.text.ip_append_data) | ||
1186 | *(.text.ioc_set_batching) | ||
1187 | *(.text.invalidate_inode_pages) | ||
1188 | *(.text.__invalidate_device) | ||
1189 | *(.text.install_arg_page) | ||
1190 | *(.text.in_sched_functions) | ||
1191 | *(.text.inotify_unmount_inodes) | ||
1192 | *(.text.init_once) | ||
1193 | *(.text.init_cdrom_command) | ||
1194 | *(.text.inet_stream_connect) | ||
1195 | *(.text.inet_sk_rebuild_header) | ||
1196 | *(.text.inet_csk_addr2sockaddr) | ||
1197 | *(.text.inet_create) | ||
1198 | *(.text.ifind) | ||
1199 | *(.text.ide_setup_dma) | ||
1200 | *(.text.ide_outsw) | ||
1201 | *(.text.ide_fixstring) | ||
1202 | *(.text.ide_dma_setup) | ||
1203 | *(.text.ide_cdrom_packet) | ||
1204 | *(.text.ide_cd_put) | ||
1205 | *(.text.ide_build_sglist) | ||
1206 | *(.text.i8259A_shutdown) | ||
1207 | *(.text.hung_up_tty_ioctl) | ||
1208 | *(.text.hrtimer_nanosleep) | ||
1209 | *(.text.hrtimer_init) | ||
1210 | *(.text.hrtimer_cancel) | ||
1211 | *(.text.hash_futex) | ||
1212 | *(.text.group_send_sig_info) | ||
1213 | *(.text.grab_cache_page_nowait) | ||
1214 | *(.text.get_wchan) | ||
1215 | *(.text.get_stack) | ||
1216 | *(.text.get_page_state) | ||
1217 | *(.text.getnstimeofday) | ||
1218 | *(.text.get_node) | ||
1219 | *(.text.get_kprobe) | ||
1220 | *(.text.generic_unplug_device) | ||
1221 | *(.text.free_task) | ||
1222 | *(.text.frag_show) | ||
1223 | *(.text.find_next_zero_string) | ||
1224 | *(.text.filp_open) | ||
1225 | *(.text.fillonedir) | ||
1226 | *(.text.exit_io_context) | ||
1227 | *(.text.exit_idle) | ||
1228 | *(.text.exact_lock) | ||
1229 | *(.text.eth_header) | ||
1230 | *(.text.dummy_unregister_security) | ||
1231 | *(.text.dummy_socket_post_create) | ||
1232 | *(.text.dummy_socket_listen) | ||
1233 | *(.text.dummy_quota_on) | ||
1234 | *(.text.dummy_inode_follow_link) | ||
1235 | *(.text.dummy_file_receive) | ||
1236 | *(.text.dummy_file_mprotect) | ||
1237 | *(.text.dummy_file_lock) | ||
1238 | *(.text.dummy_file_ioctl) | ||
1239 | *(.text.dummy_bprm_post_apply_creds) | ||
1240 | *(.text.do_writepages) | ||
1241 | *(.text.__down_interruptible) | ||
1242 | *(.text.do_notify_resume) | ||
1243 | *(.text.do_acct_process) | ||
1244 | *(.text.del_timer_sync) | ||
1245 | *(.text.default_rebuild_header) | ||
1246 | *(.text.d_callback) | ||
1247 | *(.text.dcache_readdir) | ||
1248 | *(.text.ctrl_dumpfamily) | ||
1249 | *(.text.cpuset_rmdir) | ||
1250 | *(.text.copy_strings_kernel) | ||
1251 | *(.text.con_write_room) | ||
1252 | *(.text.complete_all) | ||
1253 | *(.text.collect_sigign_sigcatch) | ||
1254 | *(.text.clear_user) | ||
1255 | *(.text.check_unthrottle) | ||
1256 | *(.text.cdrom_release) | ||
1257 | *(.text.cdrom_newpc_intr) | ||
1258 | *(.text.cdrom_ioctl) | ||
1259 | *(.text.cdrom_check_status) | ||
1260 | *(.text.cdev_put) | ||
1261 | *(.text.cdev_add) | ||
1262 | *(.text.cap_ptrace) | ||
1263 | *(.text.cap_bprm_secureexec) | ||
1264 | *(.text.cache_alloc_refill) | ||
1265 | *(.text.bmap) | ||
1266 | *(.text.blk_run_queue) | ||
1267 | *(.text.blk_queue_dma_alignment) | ||
1268 | *(.text.blk_ordered_req_seq) | ||
1269 | *(.text.blk_backing_dev_unplug) | ||
1270 | *(.text.__bitmap_subset) | ||
1271 | *(.text.__bitmap_and) | ||
1272 | *(.text.bio_unmap_user) | ||
1273 | *(.text.__bforget) | ||
1274 | *(.text.bd_forget) | ||
1275 | *(.text.bad_pipe_w) | ||
1276 | *(.text.bad_get_user) | ||
1277 | *(.text.audit_free) | ||
1278 | *(.text.anon_vma_ctor) | ||
1279 | *(.text.anon_pipe_buf_map) | ||
1280 | *(.text.alloc_sock_iocb) | ||
1281 | *(.text.alloc_fdset) | ||
1282 | *(.text.aio_kick_handler) | ||
1283 | *(.text.__add_entropy_words) | ||
1284 | *(.text.add_disk_randomness) | ||
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 0b3603adf56..47496a40e84 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c | |||
@@ -11,120 +11,54 @@ | |||
11 | #include <linux/threads.h> | 11 | #include <linux/threads.h> |
12 | #include <linux/cpumask.h> | 12 | #include <linux/cpumask.h> |
13 | #include <linux/string.h> | 13 | #include <linux/string.h> |
14 | #include <linux/module.h> | ||
14 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
15 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
16 | #include <linux/init.h> | 17 | #include <linux/init.h> |
17 | #include <linux/module.h> | ||
18 | 18 | ||
19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
20 | #include <asm/ipi.h> | 20 | #include <asm/ipi.h> |
21 | #include <asm/genapic.h> | ||
21 | 22 | ||
22 | #if defined(CONFIG_ACPI) | 23 | #ifdef CONFIG_ACPI |
23 | #include <acpi/acpi_bus.h> | 24 | #include <acpi/acpi_bus.h> |
24 | #endif | 25 | #endif |
25 | 26 | ||
26 | /* which logical CPU number maps to which CPU (physical APIC ID) */ | 27 | /* which logical CPU number maps to which CPU (physical APIC ID) */ |
27 | u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 28 | u8 x86_cpu_to_apicid[NR_CPUS] __read_mostly |
29 | = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
28 | EXPORT_SYMBOL(x86_cpu_to_apicid); | 30 | EXPORT_SYMBOL(x86_cpu_to_apicid); |
29 | u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; | ||
30 | 31 | ||
31 | extern struct genapic apic_cluster; | 32 | u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; |
32 | extern struct genapic apic_flat; | ||
33 | extern struct genapic apic_physflat; | ||
34 | 33 | ||
35 | struct genapic *genapic = &apic_flat; | 34 | struct genapic __read_mostly *genapic = &apic_flat; |
36 | struct genapic *genapic_force; | ||
37 | 35 | ||
38 | /* | 36 | /* |
39 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 37 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
40 | */ | 38 | */ |
41 | void __init clustered_apic_check(void) | 39 | void __init setup_apic_routing(void) |
42 | { | 40 | { |
43 | long i; | 41 | #ifdef CONFIG_ACPI |
44 | u8 clusters, max_cluster; | ||
45 | u8 id; | ||
46 | u8 cluster_cnt[NUM_APIC_CLUSTERS]; | ||
47 | int max_apic = 0; | ||
48 | |||
49 | /* genapic selection can be forced because of certain quirks. | ||
50 | */ | ||
51 | if (genapic_force) { | ||
52 | genapic = genapic_force; | ||
53 | goto print; | ||
54 | } | ||
55 | |||
56 | #if defined(CONFIG_ACPI) | ||
57 | /* | 42 | /* |
58 | * Some x86_64 machines use physical APIC mode regardless of how many | 43 | * Quirk: some x86_64 machines can only use physical APIC mode |
59 | * procs/clusters are present (x86_64 ES7000 is an example). | 44 | * regardless of how many processors are present (x86_64 ES7000 |
45 | * is an example). | ||
60 | */ | 46 | */ |
61 | if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID) | 47 | if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && |
62 | if (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL) { | 48 | (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) |
63 | genapic = &apic_cluster; | ||
64 | goto print; | ||
65 | } | ||
66 | #endif | ||
67 | |||
68 | memset(cluster_cnt, 0, sizeof(cluster_cnt)); | ||
69 | for (i = 0; i < NR_CPUS; i++) { | ||
70 | id = bios_cpu_apicid[i]; | ||
71 | if (id == BAD_APICID) | ||
72 | continue; | ||
73 | if (id > max_apic) | ||
74 | max_apic = id; | ||
75 | cluster_cnt[APIC_CLUSTERID(id)]++; | ||
76 | } | ||
77 | |||
78 | /* Don't use clustered mode on AMD platforms. */ | ||
79 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { | ||
80 | genapic = &apic_physflat; | 49 | genapic = &apic_physflat; |
81 | #ifndef CONFIG_HOTPLUG_CPU | 50 | else |
82 | /* In the CPU hotplug case we cannot use broadcast mode | ||
83 | because that opens a race when a CPU is removed. | ||
84 | Stay at physflat mode in this case. | ||
85 | It is bad to do this unconditionally though. Once | ||
86 | we have ACPI platform support for CPU hotplug | ||
87 | we should detect hotplug capablity from ACPI tables and | ||
88 | only do this when really needed. -AK */ | ||
89 | if (max_apic <= 8) | ||
90 | genapic = &apic_flat; | ||
91 | #endif | 51 | #endif |
92 | goto print; | ||
93 | } | ||
94 | 52 | ||
95 | clusters = 0; | 53 | if (cpus_weight(cpu_possible_map) <= 8) |
96 | max_cluster = 0; | ||
97 | |||
98 | for (i = 0; i < NUM_APIC_CLUSTERS; i++) { | ||
99 | if (cluster_cnt[i] > 0) { | ||
100 | ++clusters; | ||
101 | if (cluster_cnt[i] > max_cluster) | ||
102 | max_cluster = cluster_cnt[i]; | ||
103 | } | ||
104 | } | ||
105 | |||
106 | /* | ||
107 | * If we have clusters <= 1 and CPUs <= 8 in cluster 0, then flat mode, | ||
108 | * else if max_cluster <= 4 and cluster_cnt[15] == 0, clustered logical | ||
109 | * else physical mode. | ||
110 | * (We don't use lowest priority delivery + HW APIC IRQ steering, so | ||
111 | * can ignore the clustered logical case and go straight to physical.) | ||
112 | */ | ||
113 | if (clusters <= 1 && max_cluster <= 8 && cluster_cnt[0] == max_cluster) { | ||
114 | #ifdef CONFIG_HOTPLUG_CPU | ||
115 | /* Don't use APIC shortcuts in CPU hotplug to avoid races */ | ||
116 | genapic = &apic_physflat; | ||
117 | #else | ||
118 | genapic = &apic_flat; | 54 | genapic = &apic_flat; |
119 | #endif | 55 | else |
120 | } else | 56 | genapic = &apic_physflat; |
121 | genapic = &apic_cluster; | ||
122 | 57 | ||
123 | print: | ||
124 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | 58 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); |
125 | } | 59 | } |
126 | 60 | ||
127 | /* Same for both flat and clustered. */ | 61 | /* Same for both flat and physical. */ |
128 | 62 | ||
129 | void send_IPI_self(int vector) | 63 | void send_IPI_self(int vector) |
130 | { | 64 | { |
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c deleted file mode 100644 index 73d76308b95..00000000000 --- a/arch/x86_64/kernel/genapic_cluster.c +++ /dev/null | |||
@@ -1,137 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright 2004 James Cleverdon, IBM. | ||
3 | * Subject to the GNU Public License, v.2 | ||
4 | * | ||
5 | * Clustered APIC subarch code. Up to 255 CPUs, physical delivery. | ||
6 | * (A more realistic maximum is around 230 CPUs.) | ||
7 | * | ||
8 | * Hacked for x86-64 by James Cleverdon from i386 architecture code by | ||
9 | * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and | ||
10 | * James Cleverdon. | ||
11 | */ | ||
12 | #include <linux/threads.h> | ||
13 | #include <linux/cpumask.h> | ||
14 | #include <linux/string.h> | ||
15 | #include <linux/kernel.h> | ||
16 | #include <linux/ctype.h> | ||
17 | #include <linux/init.h> | ||
18 | #include <asm/smp.h> | ||
19 | #include <asm/ipi.h> | ||
20 | |||
21 | |||
22 | /* | ||
23 | * Set up the logical destination ID. | ||
24 | * | ||
25 | * Intel recommends to set DFR, LDR and TPR before enabling | ||
26 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | ||
27 | * document number 292116). So here it goes... | ||
28 | */ | ||
29 | static void cluster_init_apic_ldr(void) | ||
30 | { | ||
31 | unsigned long val, id; | ||
32 | long i, count; | ||
33 | u8 lid; | ||
34 | u8 my_id = hard_smp_processor_id(); | ||
35 | u8 my_cluster = APIC_CLUSTER(my_id); | ||
36 | |||
37 | /* Create logical APIC IDs by counting CPUs already in cluster. */ | ||
38 | for (count = 0, i = NR_CPUS; --i >= 0; ) { | ||
39 | lid = x86_cpu_to_log_apicid[i]; | ||
40 | if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) | ||
41 | ++count; | ||
42 | } | ||
43 | /* | ||
44 | * We only have a 4 wide bitmap in cluster mode. There's no way | ||
45 | * to get above 60 CPUs and still give each one it's own bit. | ||
46 | * But, we're using physical IRQ delivery, so we don't care. | ||
47 | * Use bit 3 for the 4th through Nth CPU in each cluster. | ||
48 | */ | ||
49 | if (count >= XAPIC_DEST_CPUS_SHIFT) | ||
50 | count = 3; | ||
51 | id = my_cluster | (1UL << count); | ||
52 | x86_cpu_to_log_apicid[smp_processor_id()] = id; | ||
53 | apic_write(APIC_DFR, APIC_DFR_CLUSTER); | ||
54 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | ||
55 | val |= SET_APIC_LOGICAL_ID(id); | ||
56 | apic_write(APIC_LDR, val); | ||
57 | } | ||
58 | |||
59 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | ||
60 | |||
61 | static cpumask_t cluster_target_cpus(void) | ||
62 | { | ||
63 | return cpumask_of_cpu(0); | ||
64 | } | ||
65 | |||
66 | static cpumask_t cluster_vector_allocation_domain(int cpu) | ||
67 | { | ||
68 | cpumask_t domain = CPU_MASK_NONE; | ||
69 | cpu_set(cpu, domain); | ||
70 | return domain; | ||
71 | } | ||
72 | |||
73 | static void cluster_send_IPI_mask(cpumask_t mask, int vector) | ||
74 | { | ||
75 | send_IPI_mask_sequence(mask, vector); | ||
76 | } | ||
77 | |||
78 | static void cluster_send_IPI_allbutself(int vector) | ||
79 | { | ||
80 | cpumask_t mask = cpu_online_map; | ||
81 | |||
82 | cpu_clear(smp_processor_id(), mask); | ||
83 | |||
84 | if (!cpus_empty(mask)) | ||
85 | cluster_send_IPI_mask(mask, vector); | ||
86 | } | ||
87 | |||
88 | static void cluster_send_IPI_all(int vector) | ||
89 | { | ||
90 | cluster_send_IPI_mask(cpu_online_map, vector); | ||
91 | } | ||
92 | |||
93 | static int cluster_apic_id_registered(void) | ||
94 | { | ||
95 | return 1; | ||
96 | } | ||
97 | |||
98 | static unsigned int cluster_cpu_mask_to_apicid(cpumask_t cpumask) | ||
99 | { | ||
100 | int cpu; | ||
101 | |||
102 | /* | ||
103 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
104 | * May as well be the first. | ||
105 | */ | ||
106 | cpu = first_cpu(cpumask); | ||
107 | if ((unsigned)cpu < NR_CPUS) | ||
108 | return x86_cpu_to_apicid[cpu]; | ||
109 | else | ||
110 | return BAD_APICID; | ||
111 | } | ||
112 | |||
113 | /* cpuid returns the value latched in the HW at reset, not the APIC ID | ||
114 | * register's value. For any box whose BIOS changes APIC IDs, like | ||
115 | * clustered APIC systems, we must use hard_smp_processor_id. | ||
116 | * | ||
117 | * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. | ||
118 | */ | ||
119 | static unsigned int phys_pkg_id(int index_msb) | ||
120 | { | ||
121 | return hard_smp_processor_id() >> index_msb; | ||
122 | } | ||
123 | |||
124 | struct genapic apic_cluster = { | ||
125 | .name = "clustered", | ||
126 | .int_delivery_mode = dest_Fixed, | ||
127 | .int_dest_mode = (APIC_DEST_PHYSICAL != 0), | ||
128 | .target_cpus = cluster_target_cpus, | ||
129 | .vector_allocation_domain = cluster_vector_allocation_domain, | ||
130 | .apic_id_registered = cluster_apic_id_registered, | ||
131 | .init_apic_ldr = cluster_init_apic_ldr, | ||
132 | .send_IPI_all = cluster_send_IPI_all, | ||
133 | .send_IPI_allbutself = cluster_send_IPI_allbutself, | ||
134 | .send_IPI_mask = cluster_send_IPI_mask, | ||
135 | .cpu_mask_to_apicid = cluster_cpu_mask_to_apicid, | ||
136 | .phys_pkg_id = phys_pkg_id, | ||
137 | }; | ||
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 7c01db8fa9d..ecb01eefdd2 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and | 8 | * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and |
9 | * James Cleverdon. | 9 | * James Cleverdon. |
10 | */ | 10 | */ |
11 | #include <linux/errno.h> | ||
11 | #include <linux/threads.h> | 12 | #include <linux/threads.h> |
12 | #include <linux/cpumask.h> | 13 | #include <linux/cpumask.h> |
13 | #include <linux/string.h> | 14 | #include <linux/string.h> |
@@ -16,6 +17,7 @@ | |||
16 | #include <linux/init.h> | 17 | #include <linux/init.h> |
17 | #include <asm/smp.h> | 18 | #include <asm/smp.h> |
18 | #include <asm/ipi.h> | 19 | #include <asm/ipi.h> |
20 | #include <asm/genapic.h> | ||
19 | 21 | ||
20 | static cpumask_t flat_target_cpus(void) | 22 | static cpumask_t flat_target_cpus(void) |
21 | { | 23 | { |
@@ -60,31 +62,10 @@ static void flat_init_apic_ldr(void) | |||
60 | static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | 62 | static void flat_send_IPI_mask(cpumask_t cpumask, int vector) |
61 | { | 63 | { |
62 | unsigned long mask = cpus_addr(cpumask)[0]; | 64 | unsigned long mask = cpus_addr(cpumask)[0]; |
63 | unsigned long cfg; | ||
64 | unsigned long flags; | 65 | unsigned long flags; |
65 | 66 | ||
66 | local_irq_save(flags); | 67 | local_irq_save(flags); |
67 | 68 | __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); | |
68 | /* | ||
69 | * Wait for idle. | ||
70 | */ | ||
71 | apic_wait_icr_idle(); | ||
72 | |||
73 | /* | ||
74 | * prepare target chip field | ||
75 | */ | ||
76 | cfg = __prepare_ICR2(mask); | ||
77 | apic_write(APIC_ICR2, cfg); | ||
78 | |||
79 | /* | ||
80 | * program the ICR | ||
81 | */ | ||
82 | cfg = __prepare_ICR(0, vector, APIC_DEST_LOGICAL); | ||
83 | |||
84 | /* | ||
85 | * Send the IPI. The write to APIC_ICR fires this off. | ||
86 | */ | ||
87 | apic_write(APIC_ICR, cfg); | ||
88 | local_irq_restore(flags); | 69 | local_irq_restore(flags); |
89 | } | 70 | } |
90 | 71 | ||
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 598a4d0351f..1fab487dee8 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
@@ -5,6 +5,7 @@ | |||
5 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> | 5 | * Copyright (C) 2000 Pavel Machek <pavel@suse.cz> |
6 | * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> | 6 | * Copyright (C) 2000 Karsten Keil <kkeil@suse.de> |
7 | * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> | 7 | * Copyright (C) 2001,2002 Andi Kleen <ak@suse.de> |
8 | * Copyright (C) 2005 Eric Biederman <ebiederm@xmission.com> | ||
8 | */ | 9 | */ |
9 | 10 | ||
10 | 11 | ||
@@ -13,97 +14,131 @@ | |||
13 | #include <linux/init.h> | 14 | #include <linux/init.h> |
14 | #include <asm/desc.h> | 15 | #include <asm/desc.h> |
15 | #include <asm/segment.h> | 16 | #include <asm/segment.h> |
17 | #include <asm/pgtable.h> | ||
16 | #include <asm/page.h> | 18 | #include <asm/page.h> |
17 | #include <asm/msr.h> | 19 | #include <asm/msr.h> |
18 | #include <asm/cache.h> | 20 | #include <asm/cache.h> |
19 | 21 | ||
20 | /* we are not able to switch in one step to the final KERNEL ADRESS SPACE | 22 | /* we are not able to switch in one step to the final KERNEL ADRESS SPACE |
21 | * because we need identity-mapped pages on setup so define __START_KERNEL to | 23 | * because we need identity-mapped pages. |
22 | * 0x100000 for this stage | 24 | * |
23 | * | ||
24 | */ | 25 | */ |
25 | 26 | ||
26 | .text | 27 | .text |
27 | .section .bootstrap.text | 28 | .section .bootstrap.text |
28 | .code32 | 29 | .code64 |
29 | .globl startup_32 | 30 | .globl startup_64 |
30 | /* %bx: 1 if coming from smp trampoline on secondary cpu */ | 31 | startup_64: |
31 | startup_32: | 32 | |
32 | |||
33 | /* | 33 | /* |
34 | * At this point the CPU runs in 32bit protected mode (CS.D = 1) with | 34 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, |
35 | * paging disabled and the point of this file is to switch to 64bit | 35 | * and someone has loaded an identity mapped page table |
36 | * long mode with a kernel mapping for kerneland to jump into the | 36 | * for us. These identity mapped page tables map all of the |
37 | * kernel virtual addresses. | 37 | * kernel pages and possibly all of memory. |
38 | * There is no stack until we set one up. | 38 | * |
39 | * %esi holds a physical pointer to real_mode_data. | ||
40 | * | ||
41 | * We come here either directly from a 64bit bootloader, or from | ||
42 | * arch/x86_64/boot/compressed/head.S. | ||
43 | * | ||
44 | * We only come here initially at boot nothing else comes here. | ||
45 | * | ||
46 | * Since we may be loaded at an address different from what we were | ||
47 | * compiled to run at we first fixup the physical addresses in our page | ||
48 | * tables and then reload them. | ||
39 | */ | 49 | */ |
40 | 50 | ||
41 | /* Initialize the %ds segment register */ | 51 | /* Compute the delta between the address I am compiled to run at and the |
42 | movl $__KERNEL_DS,%eax | 52 | * address I am actually running at. |
43 | movl %eax,%ds | ||
44 | |||
45 | /* Load new GDT with the 64bit segments using 32bit descriptor */ | ||
46 | lgdt pGDT32 - __START_KERNEL_map | ||
47 | |||
48 | /* If the CPU doesn't support CPUID this will double fault. | ||
49 | * Unfortunately it is hard to check for CPUID without a stack. | ||
50 | */ | 53 | */ |
51 | 54 | leaq _text(%rip), %rbp | |
52 | /* Check if extended functions are implemented */ | 55 | subq $_text - __START_KERNEL_map, %rbp |
53 | movl $0x80000000, %eax | 56 | |
54 | cpuid | 57 | /* Is the address not 2M aligned? */ |
55 | cmpl $0x80000000, %eax | 58 | movq %rbp, %rax |
56 | jbe no_long_mode | 59 | andl $~LARGE_PAGE_MASK, %eax |
57 | /* Check if long mode is implemented */ | 60 | testl %eax, %eax |
58 | mov $0x80000001, %eax | 61 | jnz bad_address |
59 | cpuid | 62 | |
60 | btl $29, %edx | 63 | /* Is the address too large? */ |
61 | jnc no_long_mode | 64 | leaq _text(%rip), %rdx |
62 | 65 | movq $PGDIR_SIZE, %rax | |
63 | /* | 66 | cmpq %rax, %rdx |
64 | * Prepare for entering 64bits mode | 67 | jae bad_address |
68 | |||
69 | /* Fixup the physical addresses in the page table | ||
65 | */ | 70 | */ |
71 | addq %rbp, init_level4_pgt + 0(%rip) | ||
72 | addq %rbp, init_level4_pgt + (258*8)(%rip) | ||
73 | addq %rbp, init_level4_pgt + (511*8)(%rip) | ||
74 | |||
75 | addq %rbp, level3_ident_pgt + 0(%rip) | ||
76 | addq %rbp, level3_kernel_pgt + (510*8)(%rip) | ||
77 | |||
78 | /* Add an Identity mapping if I am above 1G */ | ||
79 | leaq _text(%rip), %rdi | ||
80 | andq $LARGE_PAGE_MASK, %rdi | ||
81 | |||
82 | movq %rdi, %rax | ||
83 | shrq $PUD_SHIFT, %rax | ||
84 | andq $(PTRS_PER_PUD - 1), %rax | ||
85 | jz ident_complete | ||
86 | |||
87 | leaq (level2_spare_pgt - __START_KERNEL_map + _KERNPG_TABLE)(%rbp), %rdx | ||
88 | leaq level3_ident_pgt(%rip), %rbx | ||
89 | movq %rdx, 0(%rbx, %rax, 8) | ||
90 | |||
91 | movq %rdi, %rax | ||
92 | shrq $PMD_SHIFT, %rax | ||
93 | andq $(PTRS_PER_PMD - 1), %rax | ||
94 | leaq __PAGE_KERNEL_LARGE_EXEC(%rdi), %rdx | ||
95 | leaq level2_spare_pgt(%rip), %rbx | ||
96 | movq %rdx, 0(%rbx, %rax, 8) | ||
97 | ident_complete: | ||
98 | |||
99 | /* Fixup the kernel text+data virtual addresses | ||
100 | */ | ||
101 | leaq level2_kernel_pgt(%rip), %rdi | ||
102 | leaq 4096(%rdi), %r8 | ||
103 | /* See if it is a valid page table entry */ | ||
104 | 1: testq $1, 0(%rdi) | ||
105 | jz 2f | ||
106 | addq %rbp, 0(%rdi) | ||
107 | /* Go to the next page */ | ||
108 | 2: addq $8, %rdi | ||
109 | cmp %r8, %rdi | ||
110 | jne 1b | ||
111 | |||
112 | /* Fixup phys_base */ | ||
113 | addq %rbp, phys_base(%rip) | ||
66 | 114 | ||
67 | /* Enable PAE mode */ | 115 | #ifdef CONFIG_SMP |
68 | xorl %eax, %eax | 116 | addq %rbp, trampoline_level4_pgt + 0(%rip) |
69 | btsl $5, %eax | 117 | addq %rbp, trampoline_level4_pgt + (511*8)(%rip) |
70 | movl %eax, %cr4 | 118 | #endif |
71 | 119 | #ifdef CONFIG_ACPI_SLEEP | |
72 | /* Setup early boot stage 4 level pagetables */ | 120 | addq %rbp, wakeup_level4_pgt + 0(%rip) |
73 | movl $(boot_level4_pgt - __START_KERNEL_map), %eax | 121 | addq %rbp, wakeup_level4_pgt + (511*8)(%rip) |
74 | movl %eax, %cr3 | 122 | #endif |
75 | |||
76 | /* Setup EFER (Extended Feature Enable Register) */ | ||
77 | movl $MSR_EFER, %ecx | ||
78 | rdmsr | ||
79 | |||
80 | /* Enable Long Mode */ | ||
81 | btsl $_EFER_LME, %eax | ||
82 | |||
83 | /* Make changes effective */ | ||
84 | wrmsr | ||
85 | 123 | ||
86 | xorl %eax, %eax | 124 | /* Due to ENTRY(), sometimes the empty space gets filled with |
87 | btsl $31, %eax /* Enable paging and in turn activate Long Mode */ | 125 | * zeros. Better take a jmp than relying on empty space being |
88 | btsl $0, %eax /* Enable protected mode */ | 126 | * filled with 0x90 (nop) |
89 | /* Make changes effective */ | ||
90 | movl %eax, %cr0 | ||
91 | /* | ||
92 | * At this point we're in long mode but in 32bit compatibility mode | ||
93 | * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn | ||
94 | * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use | ||
95 | * the new gdt/idt that has __KERNEL_CS with CS.L = 1. | ||
96 | */ | 127 | */ |
97 | ljmp $__KERNEL_CS, $(startup_64 - __START_KERNEL_map) | 128 | jmp secondary_startup_64 |
98 | 129 | ENTRY(secondary_startup_64) | |
99 | .code64 | 130 | /* |
100 | .org 0x100 | 131 | * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 1, |
101 | .globl startup_64 | 132 | * and someone has loaded a mapped page table. |
102 | startup_64: | 133 | * |
103 | /* We come here either from startup_32 | 134 | * %esi holds a physical pointer to real_mode_data. |
104 | * or directly from a 64bit bootloader. | 135 | * |
105 | * Since we may have come directly from a bootloader we | 136 | * We come here either from startup_64 (using physical addresses) |
106 | * reload the page tables here. | 137 | * or from trampoline.S (using virtual addresses). |
138 | * | ||
139 | * Using virtual addresses from trampoline.S removes the need | ||
140 | * to have any identity mapped pages in the kernel page table | ||
141 | * after the boot processor executes this code. | ||
107 | */ | 142 | */ |
108 | 143 | ||
109 | /* Enable PAE mode and PGE */ | 144 | /* Enable PAE mode and PGE */ |
@@ -113,9 +148,15 @@ startup_64: | |||
113 | movq %rax, %cr4 | 148 | movq %rax, %cr4 |
114 | 149 | ||
115 | /* Setup early boot stage 4 level pagetables. */ | 150 | /* Setup early boot stage 4 level pagetables. */ |
116 | movq $(boot_level4_pgt - __START_KERNEL_map), %rax | 151 | movq $(init_level4_pgt - __START_KERNEL_map), %rax |
152 | addq phys_base(%rip), %rax | ||
117 | movq %rax, %cr3 | 153 | movq %rax, %cr3 |
118 | 154 | ||
155 | /* Ensure I am executing from virtual addresses */ | ||
156 | movq $1f, %rax | ||
157 | jmp *%rax | ||
158 | 1: | ||
159 | |||
119 | /* Check if nx is implemented */ | 160 | /* Check if nx is implemented */ |
120 | movl $0x80000001, %eax | 161 | movl $0x80000001, %eax |
121 | cpuid | 162 | cpuid |
@@ -124,17 +165,11 @@ startup_64: | |||
124 | /* Setup EFER (Extended Feature Enable Register) */ | 165 | /* Setup EFER (Extended Feature Enable Register) */ |
125 | movl $MSR_EFER, %ecx | 166 | movl $MSR_EFER, %ecx |
126 | rdmsr | 167 | rdmsr |
127 | 168 | btsl $_EFER_SCE, %eax /* Enable System Call */ | |
128 | /* Enable System Call */ | 169 | btl $20,%edi /* No Execute supported? */ |
129 | btsl $_EFER_SCE, %eax | ||
130 | |||
131 | /* No Execute supported? */ | ||
132 | btl $20,%edi | ||
133 | jnc 1f | 170 | jnc 1f |
134 | btsl $_EFER_NX, %eax | 171 | btsl $_EFER_NX, %eax |
135 | 1: | 172 | 1: wrmsr /* Make changes effective */ |
136 | /* Make changes effective */ | ||
137 | wrmsr | ||
138 | 173 | ||
139 | /* Setup cr0 */ | 174 | /* Setup cr0 */ |
140 | #define CR0_PM 1 /* protected mode */ | 175 | #define CR0_PM 1 /* protected mode */ |
@@ -161,7 +196,7 @@ startup_64: | |||
161 | * addresses where we're currently running on. We have to do that here | 196 | * addresses where we're currently running on. We have to do that here |
162 | * because in 32bit we couldn't load a 64bit linear address. | 197 | * because in 32bit we couldn't load a 64bit linear address. |
163 | */ | 198 | */ |
164 | lgdt cpu_gdt_descr | 199 | lgdt cpu_gdt_descr(%rip) |
165 | 200 | ||
166 | /* set up data segments. actually 0 would do too */ | 201 | /* set up data segments. actually 0 would do too */ |
167 | movl $__KERNEL_DS,%eax | 202 | movl $__KERNEL_DS,%eax |
@@ -212,6 +247,9 @@ initial_code: | |||
212 | init_rsp: | 247 | init_rsp: |
213 | .quad init_thread_union+THREAD_SIZE-8 | 248 | .quad init_thread_union+THREAD_SIZE-8 |
214 | 249 | ||
250 | bad_address: | ||
251 | jmp bad_address | ||
252 | |||
215 | ENTRY(early_idt_handler) | 253 | ENTRY(early_idt_handler) |
216 | cmpl $2,early_recursion_flag(%rip) | 254 | cmpl $2,early_recursion_flag(%rip) |
217 | jz 1f | 255 | jz 1f |
@@ -240,110 +278,66 @@ early_idt_msg: | |||
240 | early_idt_ripmsg: | 278 | early_idt_ripmsg: |
241 | .asciz "RIP %s\n" | 279 | .asciz "RIP %s\n" |
242 | 280 | ||
243 | .code32 | 281 | .balign PAGE_SIZE |
244 | ENTRY(no_long_mode) | ||
245 | /* This isn't an x86-64 CPU so hang */ | ||
246 | 1: | ||
247 | jmp 1b | ||
248 | |||
249 | .org 0xf00 | ||
250 | .globl pGDT32 | ||
251 | pGDT32: | ||
252 | .word gdt_end-cpu_gdt_table-1 | ||
253 | .long cpu_gdt_table-__START_KERNEL_map | ||
254 | |||
255 | .org 0xf10 | ||
256 | ljumpvector: | ||
257 | .long startup_64-__START_KERNEL_map | ||
258 | .word __KERNEL_CS | ||
259 | 282 | ||
260 | ENTRY(stext) | ||
261 | ENTRY(_stext) | ||
262 | |||
263 | $page = 0 | ||
264 | #define NEXT_PAGE(name) \ | 283 | #define NEXT_PAGE(name) \ |
265 | $page = $page + 1; \ | 284 | .balign PAGE_SIZE; \ |
266 | .org $page * 0x1000; \ | ||
267 | phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \ | ||
268 | ENTRY(name) | 285 | ENTRY(name) |
269 | 286 | ||
287 | /* Automate the creation of 1 to 1 mapping pmd entries */ | ||
288 | #define PMDS(START, PERM, COUNT) \ | ||
289 | i = 0 ; \ | ||
290 | .rept (COUNT) ; \ | ||
291 | .quad (START) + (i << 21) + (PERM) ; \ | ||
292 | i = i + 1 ; \ | ||
293 | .endr | ||
294 | |||
295 | /* | ||
296 | * This default setting generates an ident mapping at address 0x100000 | ||
297 | * and a mapping for the kernel that precisely maps virtual address | ||
298 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
299 | * 2Mbyte large pages provided by PAE mode) | ||
300 | */ | ||
270 | NEXT_PAGE(init_level4_pgt) | 301 | NEXT_PAGE(init_level4_pgt) |
271 | /* This gets initialized in x86_64_start_kernel */ | 302 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE |
272 | .fill 512,8,0 | 303 | .fill 257,8,0 |
304 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
305 | .fill 252,8,0 | ||
306 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
307 | .quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE | ||
273 | 308 | ||
274 | NEXT_PAGE(level3_ident_pgt) | 309 | NEXT_PAGE(level3_ident_pgt) |
275 | .quad phys_level2_ident_pgt | 0x007 | 310 | .quad level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE |
276 | .fill 511,8,0 | 311 | .fill 511,8,0 |
277 | 312 | ||
278 | NEXT_PAGE(level3_kernel_pgt) | 313 | NEXT_PAGE(level3_kernel_pgt) |
279 | .fill 510,8,0 | 314 | .fill 510,8,0 |
280 | /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ | 315 | /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ |
281 | .quad phys_level2_kernel_pgt | 0x007 | 316 | .quad level2_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE |
282 | .fill 1,8,0 | 317 | .fill 1,8,0 |
283 | 318 | ||
284 | NEXT_PAGE(level2_ident_pgt) | 319 | NEXT_PAGE(level2_ident_pgt) |
285 | /* 40MB for bootup. */ | 320 | /* Since I easily can, map the first 1G. |
286 | i = 0 | 321 | * Don't set NX because code runs from these pages. |
287 | .rept 20 | 322 | */ |
288 | .quad i << 21 | 0x083 | 323 | PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC, PTRS_PER_PMD) |
289 | i = i + 1 | 324 | |
290 | .endr | ||
291 | /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ | ||
292 | .globl temp_boot_pmds | ||
293 | temp_boot_pmds: | ||
294 | .fill 492,8,0 | ||
295 | |||
296 | NEXT_PAGE(level2_kernel_pgt) | 325 | NEXT_PAGE(level2_kernel_pgt) |
297 | /* 40MB kernel mapping. The kernel code cannot be bigger than that. | 326 | /* 40MB kernel mapping. The kernel code cannot be bigger than that. |
298 | When you change this change KERNEL_TEXT_SIZE in page.h too. */ | 327 | When you change this change KERNEL_TEXT_SIZE in page.h too. */ |
299 | /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ | 328 | /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ |
300 | i = 0 | 329 | PMDS(0x0000000000000000, __PAGE_KERNEL_LARGE_EXEC|_PAGE_GLOBAL, |
301 | .rept 20 | 330 | KERNEL_TEXT_SIZE/PMD_SIZE) |
302 | .quad i << 21 | 0x183 | ||
303 | i = i + 1 | ||
304 | .endr | ||
305 | /* Module mapping starts here */ | 331 | /* Module mapping starts here */ |
306 | .fill 492,8,0 | 332 | .fill (PTRS_PER_PMD - (KERNEL_TEXT_SIZE/PMD_SIZE)),8,0 |
307 | 333 | ||
308 | NEXT_PAGE(level3_physmem_pgt) | 334 | NEXT_PAGE(level2_spare_pgt) |
309 | .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ | 335 | .fill 512,8,0 |
310 | .fill 511,8,0 | ||
311 | 336 | ||
337 | #undef PMDS | ||
312 | #undef NEXT_PAGE | 338 | #undef NEXT_PAGE |
313 | 339 | ||
314 | .data | 340 | .data |
315 | |||
316 | #ifdef CONFIG_ACPI_SLEEP | ||
317 | .align PAGE_SIZE | ||
318 | ENTRY(wakeup_level4_pgt) | ||
319 | .quad phys_level3_ident_pgt | 0x007 | ||
320 | .fill 255,8,0 | ||
321 | .quad phys_level3_physmem_pgt | 0x007 | ||
322 | .fill 254,8,0 | ||
323 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
324 | .quad phys_level3_kernel_pgt | 0x007 | ||
325 | #endif | ||
326 | |||
327 | #ifndef CONFIG_HOTPLUG_CPU | ||
328 | __INITDATA | ||
329 | #endif | ||
330 | /* | ||
331 | * This default setting generates an ident mapping at address 0x100000 | ||
332 | * and a mapping for the kernel that precisely maps virtual address | ||
333 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
334 | * 2Mbyte large pages provided by PAE mode) | ||
335 | */ | ||
336 | .align PAGE_SIZE | ||
337 | ENTRY(boot_level4_pgt) | ||
338 | .quad phys_level3_ident_pgt | 0x007 | ||
339 | .fill 255,8,0 | ||
340 | .quad phys_level3_physmem_pgt | 0x007 | ||
341 | .fill 254,8,0 | ||
342 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
343 | .quad phys_level3_kernel_pgt | 0x007 | ||
344 | |||
345 | .data | ||
346 | |||
347 | .align 16 | 341 | .align 16 |
348 | .globl cpu_gdt_descr | 342 | .globl cpu_gdt_descr |
349 | cpu_gdt_descr: | 343 | cpu_gdt_descr: |
@@ -357,6 +351,10 @@ gdt: | |||
357 | .endr | 351 | .endr |
358 | #endif | 352 | #endif |
359 | 353 | ||
354 | ENTRY(phys_base) | ||
355 | /* This must match the first entry in level2_kernel_pgt */ | ||
356 | .quad 0x0000000000000000 | ||
357 | |||
360 | /* We need valid kernel segments for data and code in long mode too | 358 | /* We need valid kernel segments for data and code in long mode too |
361 | * IRET will check the segment types kkeil 2000/10/28 | 359 | * IRET will check the segment types kkeil 2000/10/28 |
362 | * Also sysret mandates a special GDT layout | 360 | * Also sysret mandates a special GDT layout |
@@ -370,13 +368,13 @@ gdt: | |||
370 | 368 | ||
371 | ENTRY(cpu_gdt_table) | 369 | ENTRY(cpu_gdt_table) |
372 | .quad 0x0000000000000000 /* NULL descriptor */ | 370 | .quad 0x0000000000000000 /* NULL descriptor */ |
371 | .quad 0x00cf9b000000ffff /* __KERNEL32_CS */ | ||
372 | .quad 0x00af9b000000ffff /* __KERNEL_CS */ | ||
373 | .quad 0x00cf93000000ffff /* __KERNEL_DS */ | ||
374 | .quad 0x00cffb000000ffff /* __USER32_CS */ | ||
375 | .quad 0x00cff3000000ffff /* __USER_DS, __USER32_DS */ | ||
376 | .quad 0x00affb000000ffff /* __USER_CS */ | ||
373 | .quad 0x0 /* unused */ | 377 | .quad 0x0 /* unused */ |
374 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | ||
375 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | ||
376 | .quad 0x00cffa000000ffff /* __USER32_CS */ | ||
377 | .quad 0x00cff2000000ffff /* __USER_DS, __USER32_DS */ | ||
378 | .quad 0x00affa000000ffff /* __USER_CS */ | ||
379 | .quad 0x00cf9a000000ffff /* __KERNEL32_CS */ | ||
380 | .quad 0,0 /* TSS */ | 378 | .quad 0,0 /* TSS */ |
381 | .quad 0,0 /* LDT */ | 379 | .quad 0,0 /* LDT */ |
382 | .quad 0,0,0 /* three TLS descriptors */ | 380 | .quad 0,0,0 /* three TLS descriptors */ |
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index 5f197b0a330..213d90e0475 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c | |||
@@ -18,8 +18,16 @@ | |||
18 | #include <asm/setup.h> | 18 | #include <asm/setup.h> |
19 | #include <asm/desc.h> | 19 | #include <asm/desc.h> |
20 | #include <asm/pgtable.h> | 20 | #include <asm/pgtable.h> |
21 | #include <asm/tlbflush.h> | ||
21 | #include <asm/sections.h> | 22 | #include <asm/sections.h> |
22 | 23 | ||
24 | static void __init zap_identity_mappings(void) | ||
25 | { | ||
26 | pgd_t *pgd = pgd_offset_k(0UL); | ||
27 | pgd_clear(pgd); | ||
28 | __flush_tlb(); | ||
29 | } | ||
30 | |||
23 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 31 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
24 | yet. */ | 32 | yet. */ |
25 | static void __init clear_bss(void) | 33 | static void __init clear_bss(void) |
@@ -29,25 +37,24 @@ static void __init clear_bss(void) | |||
29 | } | 37 | } |
30 | 38 | ||
31 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ | 39 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ |
32 | #define OLD_CL_MAGIC_ADDR 0x90020 | 40 | #define OLD_CL_MAGIC_ADDR 0x20 |
33 | #define OLD_CL_MAGIC 0xA33F | 41 | #define OLD_CL_MAGIC 0xA33F |
34 | #define OLD_CL_BASE_ADDR 0x90000 | 42 | #define OLD_CL_OFFSET 0x22 |
35 | #define OLD_CL_OFFSET 0x90022 | ||
36 | 43 | ||
37 | static void __init copy_bootdata(char *real_mode_data) | 44 | static void __init copy_bootdata(char *real_mode_data) |
38 | { | 45 | { |
39 | int new_data; | 46 | unsigned long new_data; |
40 | char * command_line; | 47 | char * command_line; |
41 | 48 | ||
42 | memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); | 49 | memcpy(x86_boot_params, real_mode_data, BOOT_PARAM_SIZE); |
43 | new_data = *(int *) (x86_boot_params + NEW_CL_POINTER); | 50 | new_data = *(u32 *) (x86_boot_params + NEW_CL_POINTER); |
44 | if (!new_data) { | 51 | if (!new_data) { |
45 | if (OLD_CL_MAGIC != * (u16 *) OLD_CL_MAGIC_ADDR) { | 52 | if (OLD_CL_MAGIC != *(u16 *)(real_mode_data + OLD_CL_MAGIC_ADDR)) { |
46 | return; | 53 | return; |
47 | } | 54 | } |
48 | new_data = OLD_CL_BASE_ADDR + * (u16 *) OLD_CL_OFFSET; | 55 | new_data = __pa(real_mode_data) + *(u16 *)(real_mode_data + OLD_CL_OFFSET); |
49 | } | 56 | } |
50 | command_line = (char *) ((u64)(new_data)); | 57 | command_line = __va(new_data); |
51 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); | 58 | memcpy(boot_command_line, command_line, COMMAND_LINE_SIZE); |
52 | } | 59 | } |
53 | 60 | ||
@@ -55,26 +62,30 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
55 | { | 62 | { |
56 | int i; | 63 | int i; |
57 | 64 | ||
65 | /* | ||
66 | * Make sure kernel is aligned to 2MB address. Catching it at compile | ||
67 | * time is better. Change your config file and compile the kernel | ||
68 | * for a 2MB aligned address (CONFIG_PHYSICAL_START) | ||
69 | */ | ||
70 | BUILD_BUG_ON(CONFIG_PHYSICAL_START & (__KERNEL_ALIGN - 1)); | ||
71 | |||
58 | /* clear bss before set_intr_gate with early_idt_handler */ | 72 | /* clear bss before set_intr_gate with early_idt_handler */ |
59 | clear_bss(); | 73 | clear_bss(); |
60 | 74 | ||
75 | /* Make NULL pointers segfault */ | ||
76 | zap_identity_mappings(); | ||
77 | |||
61 | for (i = 0; i < IDT_ENTRIES; i++) | 78 | for (i = 0; i < IDT_ENTRIES; i++) |
62 | set_intr_gate(i, early_idt_handler); | 79 | set_intr_gate(i, early_idt_handler); |
63 | asm volatile("lidt %0" :: "m" (idt_descr)); | 80 | asm volatile("lidt %0" :: "m" (idt_descr)); |
64 | 81 | ||
65 | early_printk("Kernel alive\n"); | 82 | early_printk("Kernel alive\n"); |
66 | 83 | ||
67 | /* | ||
68 | * switch to init_level4_pgt from boot_level4_pgt | ||
69 | */ | ||
70 | memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); | ||
71 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
72 | |||
73 | for (i = 0; i < NR_CPUS; i++) | 84 | for (i = 0; i < NR_CPUS; i++) |
74 | cpu_pda(i) = &boot_cpu_pda[i]; | 85 | cpu_pda(i) = &boot_cpu_pda[i]; |
75 | 86 | ||
76 | pda_init(0); | 87 | pda_init(0); |
77 | copy_bootdata(real_mode_data); | 88 | copy_bootdata(__va(real_mode_data)); |
78 | #ifdef CONFIG_SMP | 89 | #ifdef CONFIG_SMP |
79 | cpu_set(0, cpu_online_map); | 90 | cpu_set(0, cpu_online_map); |
80 | #endif | 91 | #endif |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index b7d2b76b92d..2a2df14dab7 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -907,10 +907,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in | |||
907 | enable_8259A_irq(0); | 907 | enable_8259A_irq(0); |
908 | } | 908 | } |
909 | 909 | ||
910 | void __init UNEXPECTED_IO_APIC(void) | ||
911 | { | ||
912 | } | ||
913 | |||
914 | void __apicdebuginit print_IO_APIC(void) | 910 | void __apicdebuginit print_IO_APIC(void) |
915 | { | 911 | { |
916 | int apic, i; | 912 | int apic, i; |
@@ -946,40 +942,16 @@ void __apicdebuginit print_IO_APIC(void) | |||
946 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); | 942 | printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mpc_apicid); |
947 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); | 943 | printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); |
948 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 944 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
949 | if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) | ||
950 | UNEXPECTED_IO_APIC(); | ||
951 | 945 | ||
952 | printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); | 946 | printk(KERN_DEBUG ".... register #01: %08X\n", *(int *)®_01); |
953 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | 947 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); |
954 | if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ | ||
955 | (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ | ||
956 | (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ | ||
957 | (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ | ||
958 | (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ | ||
959 | (reg_01.bits.entries != 0x2E) && | ||
960 | (reg_01.bits.entries != 0x3F) && | ||
961 | (reg_01.bits.entries != 0x03) | ||
962 | ) | ||
963 | UNEXPECTED_IO_APIC(); | ||
964 | 948 | ||
965 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | 949 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); |
966 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | 950 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); |
967 | if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ | ||
968 | (reg_01.bits.version != 0x02) && /* 82801BA IO-APICs (ICH2) */ | ||
969 | (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ | ||
970 | (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ | ||
971 | (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ | ||
972 | (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ | ||
973 | ) | ||
974 | UNEXPECTED_IO_APIC(); | ||
975 | if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) | ||
976 | UNEXPECTED_IO_APIC(); | ||
977 | 951 | ||
978 | if (reg_01.bits.version >= 0x10) { | 952 | if (reg_01.bits.version >= 0x10) { |
979 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); | 953 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); |
980 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); | 954 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); |
981 | if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) | ||
982 | UNEXPECTED_IO_APIC(); | ||
983 | } | 955 | } |
984 | 956 | ||
985 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 957 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
@@ -1407,8 +1379,7 @@ static void irq_complete_move(unsigned int irq) | |||
1407 | 1379 | ||
1408 | vector = ~get_irq_regs()->orig_rax; | 1380 | vector = ~get_irq_regs()->orig_rax; |
1409 | me = smp_processor_id(); | 1381 | me = smp_processor_id(); |
1410 | if ((vector == cfg->vector) && | 1382 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { |
1411 | cpu_isset(smp_processor_id(), cfg->domain)) { | ||
1412 | cpumask_t cleanup_mask; | 1383 | cpumask_t cleanup_mask; |
1413 | 1384 | ||
1414 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 1385 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); |
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c index 745b1f0f494..387d347b0e0 100644 --- a/arch/x86_64/kernel/ioport.c +++ b/arch/x86_64/kernel/ioport.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/stddef.h> | 16 | #include <linux/stddef.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/thread_info.h> | 18 | #include <linux/thread_info.h> |
19 | #include <linux/syscalls.h> | ||
19 | 20 | ||
20 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | 21 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ |
21 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) | 22 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) |
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 0497e3bd5bf..a8bb33c1a8f 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c | |||
@@ -191,19 +191,19 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
191 | 191 | ||
192 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 192 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
193 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 193 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; |
194 | page_list[PA_PGD] = __pa(kexec_pgd); | 194 | page_list[PA_PGD] = __pa_symbol(&kexec_pgd); |
195 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | 195 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
196 | page_list[PA_PUD_0] = __pa(kexec_pud0); | 196 | page_list[PA_PUD_0] = __pa_symbol(&kexec_pud0); |
197 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; | 197 | page_list[VA_PUD_0] = (unsigned long)kexec_pud0; |
198 | page_list[PA_PMD_0] = __pa(kexec_pmd0); | 198 | page_list[PA_PMD_0] = __pa_symbol(&kexec_pmd0); |
199 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | 199 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; |
200 | page_list[PA_PTE_0] = __pa(kexec_pte0); | 200 | page_list[PA_PTE_0] = __pa_symbol(&kexec_pte0); |
201 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | 201 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
202 | page_list[PA_PUD_1] = __pa(kexec_pud1); | 202 | page_list[PA_PUD_1] = __pa_symbol(&kexec_pud1); |
203 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; | 203 | page_list[VA_PUD_1] = (unsigned long)kexec_pud1; |
204 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | 204 | page_list[PA_PMD_1] = __pa_symbol(&kexec_pmd1); |
205 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | 205 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; |
206 | page_list[PA_PTE_1] = __pa(kexec_pte1); | 206 | page_list[PA_PTE_1] = __pa_symbol(&kexec_pte1); |
207 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | 207 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
208 | 208 | ||
209 | page_list[PA_TABLE_PAGE] = | 209 | page_list[PA_TABLE_PAGE] = |
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 8011a8e1c7d..fa267268247 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -323,10 +323,13 @@ void mce_log_therm_throt_event(unsigned int cpu, __u64 status) | |||
323 | #endif /* CONFIG_X86_MCE_INTEL */ | 323 | #endif /* CONFIG_X86_MCE_INTEL */ |
324 | 324 | ||
325 | /* | 325 | /* |
326 | * Periodic polling timer for "silent" machine check errors. | 326 | * Periodic polling timer for "silent" machine check errors. If the |
327 | * poller finds an MCE, poll 2x faster. When the poller finds no more | ||
328 | * errors, poll 2x slower (up to check_interval seconds). | ||
327 | */ | 329 | */ |
328 | 330 | ||
329 | static int check_interval = 5 * 60; /* 5 minutes */ | 331 | static int check_interval = 5 * 60; /* 5 minutes */ |
332 | static int next_interval; /* in jiffies */ | ||
330 | static void mcheck_timer(struct work_struct *work); | 333 | static void mcheck_timer(struct work_struct *work); |
331 | static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); | 334 | static DECLARE_DELAYED_WORK(mcheck_work, mcheck_timer); |
332 | 335 | ||
@@ -339,7 +342,6 @@ static void mcheck_check_cpu(void *info) | |||
339 | static void mcheck_timer(struct work_struct *work) | 342 | static void mcheck_timer(struct work_struct *work) |
340 | { | 343 | { |
341 | on_each_cpu(mcheck_check_cpu, NULL, 1, 1); | 344 | on_each_cpu(mcheck_check_cpu, NULL, 1, 1); |
342 | schedule_delayed_work(&mcheck_work, check_interval * HZ); | ||
343 | 345 | ||
344 | /* | 346 | /* |
345 | * It's ok to read stale data here for notify_user and | 347 | * It's ok to read stale data here for notify_user and |
@@ -349,17 +351,30 @@ static void mcheck_timer(struct work_struct *work) | |||
349 | * writes. | 351 | * writes. |
350 | */ | 352 | */ |
351 | if (notify_user && console_logged) { | 353 | if (notify_user && console_logged) { |
354 | static unsigned long last_print; | ||
355 | unsigned long now = jiffies; | ||
356 | |||
357 | /* if we logged an MCE, reduce the polling interval */ | ||
358 | next_interval = max(next_interval/2, HZ/100); | ||
352 | notify_user = 0; | 359 | notify_user = 0; |
353 | clear_bit(0, &console_logged); | 360 | clear_bit(0, &console_logged); |
354 | printk(KERN_INFO "Machine check events logged\n"); | 361 | if (time_after_eq(now, last_print + (check_interval*HZ))) { |
362 | last_print = now; | ||
363 | printk(KERN_INFO "Machine check events logged\n"); | ||
364 | } | ||
365 | } else { | ||
366 | next_interval = min(next_interval*2, check_interval*HZ); | ||
355 | } | 367 | } |
368 | |||
369 | schedule_delayed_work(&mcheck_work, next_interval); | ||
356 | } | 370 | } |
357 | 371 | ||
358 | 372 | ||
359 | static __init int periodic_mcheck_init(void) | 373 | static __init int periodic_mcheck_init(void) |
360 | { | 374 | { |
361 | if (check_interval) | 375 | next_interval = check_interval * HZ; |
362 | schedule_delayed_work(&mcheck_work, check_interval*HZ); | 376 | if (next_interval) |
377 | schedule_delayed_work(&mcheck_work, next_interval); | ||
363 | return 0; | 378 | return 0; |
364 | } | 379 | } |
365 | __initcall(periodic_mcheck_init); | 380 | __initcall(periodic_mcheck_init); |
@@ -597,12 +612,13 @@ static int mce_resume(struct sys_device *dev) | |||
597 | /* Reinit MCEs after user configuration changes */ | 612 | /* Reinit MCEs after user configuration changes */ |
598 | static void mce_restart(void) | 613 | static void mce_restart(void) |
599 | { | 614 | { |
600 | if (check_interval) | 615 | if (next_interval) |
601 | cancel_delayed_work(&mcheck_work); | 616 | cancel_delayed_work(&mcheck_work); |
602 | /* Timer race is harmless here */ | 617 | /* Timer race is harmless here */ |
603 | on_each_cpu(mce_init, NULL, 1, 1); | 618 | on_each_cpu(mce_init, NULL, 1, 1); |
604 | if (check_interval) | 619 | next_interval = check_interval * HZ; |
605 | schedule_delayed_work(&mcheck_work, check_interval*HZ); | 620 | if (next_interval) |
621 | schedule_delayed_work(&mcheck_work, next_interval); | ||
606 | } | 622 | } |
607 | 623 | ||
608 | static struct sysdev_class mce_sysclass = { | 624 | static struct sysdev_class mce_sysclass = { |
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 455aa0b932f..d0dc4891599 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c | |||
@@ -300,7 +300,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
300 | } | 300 | } |
301 | } | 301 | } |
302 | } | 302 | } |
303 | clustered_apic_check(); | 303 | setup_apic_routing(); |
304 | if (!num_processors) | 304 | if (!num_processors) |
305 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); | 305 | printk(KERN_ERR "MPTABLE: no processors registered!\n"); |
306 | return num_processors; | 306 | return num_processors; |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index dfab9f16736..6cd2b30e2ff 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -27,28 +27,11 @@ | |||
27 | #include <asm/proto.h> | 27 | #include <asm/proto.h> |
28 | #include <asm/kdebug.h> | 28 | #include <asm/kdebug.h> |
29 | #include <asm/mce.h> | 29 | #include <asm/mce.h> |
30 | #include <asm/intel_arch_perfmon.h> | ||
31 | 30 | ||
32 | int unknown_nmi_panic; | 31 | int unknown_nmi_panic; |
33 | int nmi_watchdog_enabled; | 32 | int nmi_watchdog_enabled; |
34 | int panic_on_unrecovered_nmi; | 33 | int panic_on_unrecovered_nmi; |
35 | 34 | ||
36 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: | ||
37 | * evtsel_nmi_owner tracks the ownership of the event selection | ||
38 | * - different performance counters/ event selection may be reserved for | ||
39 | * different subsystems this reservation system just tries to coordinate | ||
40 | * things a little | ||
41 | */ | ||
42 | |||
43 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | ||
44 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | ||
45 | */ | ||
46 | #define NMI_MAX_COUNTER_BITS 66 | ||
47 | #define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS) | ||
48 | |||
49 | static DEFINE_PER_CPU(unsigned, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]); | ||
50 | static DEFINE_PER_CPU(unsigned, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]); | ||
51 | |||
52 | static cpumask_t backtrace_mask = CPU_MASK_NONE; | 35 | static cpumask_t backtrace_mask = CPU_MASK_NONE; |
53 | 36 | ||
54 | /* nmi_active: | 37 | /* nmi_active: |
@@ -63,191 +46,11 @@ int panic_on_timeout; | |||
63 | unsigned int nmi_watchdog = NMI_DEFAULT; | 46 | unsigned int nmi_watchdog = NMI_DEFAULT; |
64 | static unsigned int nmi_hz = HZ; | 47 | static unsigned int nmi_hz = HZ; |
65 | 48 | ||
66 | struct nmi_watchdog_ctlblk { | 49 | static DEFINE_PER_CPU(short, wd_enabled); |
67 | int enabled; | ||
68 | u64 check_bit; | ||
69 | unsigned int cccr_msr; | ||
70 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
71 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
72 | }; | ||
73 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
74 | 50 | ||
75 | /* local prototypes */ | 51 | /* local prototypes */ |
76 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); | 52 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
77 | 53 | ||
78 | /* converts an msr to an appropriate reservation bit */ | ||
79 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | ||
80 | { | ||
81 | /* returns the bit offset of the performance counter register */ | ||
82 | switch (boot_cpu_data.x86_vendor) { | ||
83 | case X86_VENDOR_AMD: | ||
84 | return (msr - MSR_K7_PERFCTR0); | ||
85 | case X86_VENDOR_INTEL: | ||
86 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
87 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
88 | else | ||
89 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
90 | } | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | /* converts an msr to an appropriate reservation bit */ | ||
95 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | ||
96 | { | ||
97 | /* returns the bit offset of the event selection register */ | ||
98 | switch (boot_cpu_data.x86_vendor) { | ||
99 | case X86_VENDOR_AMD: | ||
100 | return (msr - MSR_K7_EVNTSEL0); | ||
101 | case X86_VENDOR_INTEL: | ||
102 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
103 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | ||
104 | else | ||
105 | return (msr - MSR_P4_BSU_ESCR0); | ||
106 | } | ||
107 | return 0; | ||
108 | } | ||
109 | |||
110 | /* checks for a bit availability (hack for oprofile) */ | ||
111 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | ||
112 | { | ||
113 | int cpu; | ||
114 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
115 | for_each_possible_cpu (cpu) { | ||
116 | if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu))) | ||
117 | return 0; | ||
118 | } | ||
119 | return 1; | ||
120 | } | ||
121 | |||
122 | /* checks the an msr for availability */ | ||
123 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
124 | { | ||
125 | unsigned int counter; | ||
126 | int cpu; | ||
127 | |||
128 | counter = nmi_perfctr_msr_to_bit(msr); | ||
129 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
130 | |||
131 | for_each_possible_cpu (cpu) { | ||
132 | if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu))) | ||
133 | return 0; | ||
134 | } | ||
135 | return 1; | ||
136 | } | ||
137 | |||
138 | static int __reserve_perfctr_nmi(int cpu, unsigned int msr) | ||
139 | { | ||
140 | unsigned int counter; | ||
141 | if (cpu < 0) | ||
142 | cpu = smp_processor_id(); | ||
143 | |||
144 | counter = nmi_perfctr_msr_to_bit(msr); | ||
145 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
146 | |||
147 | if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu))) | ||
148 | return 1; | ||
149 | return 0; | ||
150 | } | ||
151 | |||
152 | static void __release_perfctr_nmi(int cpu, unsigned int msr) | ||
153 | { | ||
154 | unsigned int counter; | ||
155 | if (cpu < 0) | ||
156 | cpu = smp_processor_id(); | ||
157 | |||
158 | counter = nmi_perfctr_msr_to_bit(msr); | ||
159 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
160 | |||
161 | clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)); | ||
162 | } | ||
163 | |||
164 | int reserve_perfctr_nmi(unsigned int msr) | ||
165 | { | ||
166 | int cpu, i; | ||
167 | for_each_possible_cpu (cpu) { | ||
168 | if (!__reserve_perfctr_nmi(cpu, msr)) { | ||
169 | for_each_possible_cpu (i) { | ||
170 | if (i >= cpu) | ||
171 | break; | ||
172 | __release_perfctr_nmi(i, msr); | ||
173 | } | ||
174 | return 0; | ||
175 | } | ||
176 | } | ||
177 | return 1; | ||
178 | } | ||
179 | |||
180 | void release_perfctr_nmi(unsigned int msr) | ||
181 | { | ||
182 | int cpu; | ||
183 | for_each_possible_cpu (cpu) | ||
184 | __release_perfctr_nmi(cpu, msr); | ||
185 | } | ||
186 | |||
187 | int __reserve_evntsel_nmi(int cpu, unsigned int msr) | ||
188 | { | ||
189 | unsigned int counter; | ||
190 | if (cpu < 0) | ||
191 | cpu = smp_processor_id(); | ||
192 | |||
193 | counter = nmi_evntsel_msr_to_bit(msr); | ||
194 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
195 | |||
196 | if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0])) | ||
197 | return 1; | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | static void __release_evntsel_nmi(int cpu, unsigned int msr) | ||
202 | { | ||
203 | unsigned int counter; | ||
204 | if (cpu < 0) | ||
205 | cpu = smp_processor_id(); | ||
206 | |||
207 | counter = nmi_evntsel_msr_to_bit(msr); | ||
208 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
209 | |||
210 | clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]); | ||
211 | } | ||
212 | |||
213 | int reserve_evntsel_nmi(unsigned int msr) | ||
214 | { | ||
215 | int cpu, i; | ||
216 | for_each_possible_cpu (cpu) { | ||
217 | if (!__reserve_evntsel_nmi(cpu, msr)) { | ||
218 | for_each_possible_cpu (i) { | ||
219 | if (i >= cpu) | ||
220 | break; | ||
221 | __release_evntsel_nmi(i, msr); | ||
222 | } | ||
223 | return 0; | ||
224 | } | ||
225 | } | ||
226 | return 1; | ||
227 | } | ||
228 | |||
229 | void release_evntsel_nmi(unsigned int msr) | ||
230 | { | ||
231 | int cpu; | ||
232 | for_each_possible_cpu (cpu) { | ||
233 | __release_evntsel_nmi(cpu, msr); | ||
234 | } | ||
235 | } | ||
236 | |||
237 | static __cpuinit inline int nmi_known_cpu(void) | ||
238 | { | ||
239 | switch (boot_cpu_data.x86_vendor) { | ||
240 | case X86_VENDOR_AMD: | ||
241 | return boot_cpu_data.x86 == 15 || boot_cpu_data.x86 == 16; | ||
242 | case X86_VENDOR_INTEL: | ||
243 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
244 | return 1; | ||
245 | else | ||
246 | return (boot_cpu_data.x86 == 15); | ||
247 | } | ||
248 | return 0; | ||
249 | } | ||
250 | |||
251 | /* Run after command line and cpu_init init, but before all other checks */ | 54 | /* Run after command line and cpu_init init, but before all other checks */ |
252 | void nmi_watchdog_default(void) | 55 | void nmi_watchdog_default(void) |
253 | { | 56 | { |
@@ -277,23 +80,6 @@ static __init void nmi_cpu_busy(void *data) | |||
277 | } | 80 | } |
278 | #endif | 81 | #endif |
279 | 82 | ||
280 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
281 | { | ||
282 | unsigned int retval = hz; | ||
283 | |||
284 | /* | ||
285 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | ||
286 | * are writable, with higher bits sign extending from bit 31. | ||
287 | * So, we can only program the counter with 31 bit values and | ||
288 | * 32nd bit should be 1, for 33.. to be 1. | ||
289 | * Find the appropriate nmi_hz | ||
290 | */ | ||
291 | if ((((u64)cpu_khz * 1000) / retval) > 0x7fffffffULL) { | ||
292 | retval = ((u64)cpu_khz * 1000) / 0x7fffffffUL + 1; | ||
293 | } | ||
294 | return retval; | ||
295 | } | ||
296 | |||
297 | int __init check_nmi_watchdog (void) | 83 | int __init check_nmi_watchdog (void) |
298 | { | 84 | { |
299 | int *counts; | 85 | int *counts; |
@@ -322,14 +108,14 @@ int __init check_nmi_watchdog (void) | |||
322 | mdelay((20*1000)/nmi_hz); // wait 20 ticks | 108 | mdelay((20*1000)/nmi_hz); // wait 20 ticks |
323 | 109 | ||
324 | for_each_online_cpu(cpu) { | 110 | for_each_online_cpu(cpu) { |
325 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | 111 | if (!per_cpu(wd_enabled, cpu)) |
326 | continue; | 112 | continue; |
327 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { | 113 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { |
328 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 114 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
329 | cpu, | 115 | cpu, |
330 | counts[cpu], | 116 | counts[cpu], |
331 | cpu_pda(cpu)->__nmi_count); | 117 | cpu_pda(cpu)->__nmi_count); |
332 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; | 118 | per_cpu(wd_enabled, cpu) = 0; |
333 | atomic_dec(&nmi_active); | 119 | atomic_dec(&nmi_active); |
334 | } | 120 | } |
335 | } | 121 | } |
@@ -344,13 +130,8 @@ int __init check_nmi_watchdog (void) | |||
344 | 130 | ||
345 | /* now that we know it works we can reduce NMI frequency to | 131 | /* now that we know it works we can reduce NMI frequency to |
346 | something more reasonable; makes a difference in some configs */ | 132 | something more reasonable; makes a difference in some configs */ |
347 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 133 | if (nmi_watchdog == NMI_LOCAL_APIC) |
348 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 134 | nmi_hz = lapic_adjust_nmi_hz(1); |
349 | |||
350 | nmi_hz = 1; | ||
351 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) | ||
352 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
353 | } | ||
354 | 135 | ||
355 | kfree(counts); | 136 | kfree(counts); |
356 | return 0; | 137 | return 0; |
@@ -379,57 +160,6 @@ int __init setup_nmi_watchdog(char *str) | |||
379 | 160 | ||
380 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 161 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
381 | 162 | ||
382 | static void disable_lapic_nmi_watchdog(void) | ||
383 | { | ||
384 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
385 | |||
386 | if (atomic_read(&nmi_active) <= 0) | ||
387 | return; | ||
388 | |||
389 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); | ||
390 | |||
391 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
392 | } | ||
393 | |||
394 | static void enable_lapic_nmi_watchdog(void) | ||
395 | { | ||
396 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
397 | |||
398 | /* are we already enabled */ | ||
399 | if (atomic_read(&nmi_active) != 0) | ||
400 | return; | ||
401 | |||
402 | /* are we lapic aware */ | ||
403 | if (nmi_known_cpu() <= 0) | ||
404 | return; | ||
405 | |||
406 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | ||
407 | touch_nmi_watchdog(); | ||
408 | } | ||
409 | |||
410 | void disable_timer_nmi_watchdog(void) | ||
411 | { | ||
412 | BUG_ON(nmi_watchdog != NMI_IO_APIC); | ||
413 | |||
414 | if (atomic_read(&nmi_active) <= 0) | ||
415 | return; | ||
416 | |||
417 | disable_irq(0); | ||
418 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); | ||
419 | |||
420 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
421 | } | ||
422 | |||
423 | void enable_timer_nmi_watchdog(void) | ||
424 | { | ||
425 | BUG_ON(nmi_watchdog != NMI_IO_APIC); | ||
426 | |||
427 | if (atomic_read(&nmi_active) == 0) { | ||
428 | touch_nmi_watchdog(); | ||
429 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | ||
430 | enable_irq(0); | ||
431 | } | ||
432 | } | ||
433 | 163 | ||
434 | static void __acpi_nmi_disable(void *__unused) | 164 | static void __acpi_nmi_disable(void *__unused) |
435 | { | 165 | { |
@@ -515,275 +245,9 @@ late_initcall(init_lapic_nmi_sysfs); | |||
515 | 245 | ||
516 | #endif /* CONFIG_PM */ | 246 | #endif /* CONFIG_PM */ |
517 | 247 | ||
518 | /* | ||
519 | * Activate the NMI watchdog via the local APIC. | ||
520 | * Original code written by Keith Owens. | ||
521 | */ | ||
522 | |||
523 | /* Note that these events don't tick when the CPU idles. This means | ||
524 | the frequency varies with CPU load. */ | ||
525 | |||
526 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
527 | #define K7_EVNTSEL_INT (1 << 20) | ||
528 | #define K7_EVNTSEL_OS (1 << 17) | ||
529 | #define K7_EVNTSEL_USR (1 << 16) | ||
530 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
531 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
532 | |||
533 | static int setup_k7_watchdog(void) | ||
534 | { | ||
535 | unsigned int perfctr_msr, evntsel_msr; | ||
536 | unsigned int evntsel; | ||
537 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
538 | |||
539 | perfctr_msr = MSR_K7_PERFCTR0; | ||
540 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
541 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
542 | goto fail; | ||
543 | |||
544 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
545 | goto fail1; | ||
546 | |||
547 | /* Simulator may not support it */ | ||
548 | if (checking_wrmsrl(evntsel_msr, 0UL)) | ||
549 | goto fail2; | ||
550 | wrmsrl(perfctr_msr, 0UL); | ||
551 | |||
552 | evntsel = K7_EVNTSEL_INT | ||
553 | | K7_EVNTSEL_OS | ||
554 | | K7_EVNTSEL_USR | ||
555 | | K7_NMI_EVENT; | ||
556 | |||
557 | /* setup the timer */ | ||
558 | wrmsr(evntsel_msr, evntsel, 0); | ||
559 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
560 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
561 | evntsel |= K7_EVNTSEL_ENABLE; | ||
562 | wrmsr(evntsel_msr, evntsel, 0); | ||
563 | |||
564 | wd->perfctr_msr = perfctr_msr; | ||
565 | wd->evntsel_msr = evntsel_msr; | ||
566 | wd->cccr_msr = 0; //unused | ||
567 | wd->check_bit = 1ULL<<63; | ||
568 | return 1; | ||
569 | fail2: | ||
570 | __release_evntsel_nmi(-1, evntsel_msr); | ||
571 | fail1: | ||
572 | __release_perfctr_nmi(-1, perfctr_msr); | ||
573 | fail: | ||
574 | return 0; | ||
575 | } | ||
576 | |||
577 | static void stop_k7_watchdog(void) | ||
578 | { | ||
579 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
580 | |||
581 | wrmsr(wd->evntsel_msr, 0, 0); | ||
582 | |||
583 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
584 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
585 | } | ||
586 | |||
587 | /* Note that these events don't tick when the CPU idles. This means | ||
588 | the frequency varies with CPU load. */ | ||
589 | |||
590 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
591 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
592 | #define P4_ESCR_OS (1<<3) | ||
593 | #define P4_ESCR_USR (1<<2) | ||
594 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
595 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
596 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
597 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
598 | #define P4_CCCR_COMPARE (1<<18) | ||
599 | #define P4_CCCR_REQUIRED (3<<16) | ||
600 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
601 | #define P4_CCCR_ENABLE (1<<12) | ||
602 | #define P4_CCCR_OVF (1<<31) | ||
603 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
604 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
605 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
606 | |||
607 | static int setup_p4_watchdog(void) | ||
608 | { | ||
609 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
610 | unsigned int evntsel, cccr_val; | ||
611 | unsigned int misc_enable, dummy; | ||
612 | unsigned int ht_num; | ||
613 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
614 | |||
615 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
616 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
617 | return 0; | ||
618 | |||
619 | #ifdef CONFIG_SMP | ||
620 | /* detect which hyperthread we are on */ | ||
621 | if (smp_num_siblings == 2) { | ||
622 | unsigned int ebx, apicid; | ||
623 | |||
624 | ebx = cpuid_ebx(1); | ||
625 | apicid = (ebx >> 24) & 0xff; | ||
626 | ht_num = apicid & 1; | ||
627 | } else | ||
628 | #endif | ||
629 | ht_num = 0; | ||
630 | |||
631 | /* performance counters are shared resources | ||
632 | * assign each hyperthread its own set | ||
633 | * (re-use the ESCR0 register, seems safe | ||
634 | * and keeps the cccr_val the same) | ||
635 | */ | ||
636 | if (!ht_num) { | ||
637 | /* logical cpu 0 */ | ||
638 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
639 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
640 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
641 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
642 | } else { | ||
643 | /* logical cpu 1 */ | ||
644 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
645 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
646 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
647 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
648 | } | ||
649 | |||
650 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
651 | goto fail; | ||
652 | |||
653 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
654 | goto fail1; | ||
655 | |||
656 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
657 | | P4_ESCR_OS | ||
658 | | P4_ESCR_USR; | ||
659 | |||
660 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
661 | | P4_CCCR_COMPLEMENT | ||
662 | | P4_CCCR_COMPARE | ||
663 | | P4_CCCR_REQUIRED; | ||
664 | |||
665 | wrmsr(evntsel_msr, evntsel, 0); | ||
666 | wrmsr(cccr_msr, cccr_val, 0); | ||
667 | wrmsrl(perfctr_msr, -((u64)cpu_khz * 1000 / nmi_hz)); | ||
668 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
669 | cccr_val |= P4_CCCR_ENABLE; | ||
670 | wrmsr(cccr_msr, cccr_val, 0); | ||
671 | |||
672 | wd->perfctr_msr = perfctr_msr; | ||
673 | wd->evntsel_msr = evntsel_msr; | ||
674 | wd->cccr_msr = cccr_msr; | ||
675 | wd->check_bit = 1ULL<<39; | ||
676 | return 1; | ||
677 | fail1: | ||
678 | __release_perfctr_nmi(-1, perfctr_msr); | ||
679 | fail: | ||
680 | return 0; | ||
681 | } | ||
682 | |||
683 | static void stop_p4_watchdog(void) | ||
684 | { | ||
685 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
686 | |||
687 | wrmsr(wd->cccr_msr, 0, 0); | ||
688 | wrmsr(wd->evntsel_msr, 0, 0); | ||
689 | |||
690 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
691 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
692 | } | ||
693 | |||
694 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
695 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
696 | |||
697 | static int setup_intel_arch_watchdog(void) | ||
698 | { | ||
699 | unsigned int ebx; | ||
700 | union cpuid10_eax eax; | ||
701 | unsigned int unused; | ||
702 | unsigned int perfctr_msr, evntsel_msr; | ||
703 | unsigned int evntsel; | ||
704 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
705 | |||
706 | /* | ||
707 | * Check whether the Architectural PerfMon supports | ||
708 | * Unhalted Core Cycles Event or not. | ||
709 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
710 | */ | ||
711 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
712 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
713 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
714 | goto fail; | ||
715 | |||
716 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
717 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
718 | |||
719 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
720 | goto fail; | ||
721 | |||
722 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
723 | goto fail1; | ||
724 | |||
725 | wrmsrl(perfctr_msr, 0UL); | ||
726 | |||
727 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
728 | | ARCH_PERFMON_EVENTSEL_OS | ||
729 | | ARCH_PERFMON_EVENTSEL_USR | ||
730 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
731 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
732 | |||
733 | /* setup the timer */ | ||
734 | wrmsr(evntsel_msr, evntsel, 0); | ||
735 | |||
736 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
737 | wrmsr(perfctr_msr, (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0); | ||
738 | |||
739 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
740 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
741 | wrmsr(evntsel_msr, evntsel, 0); | ||
742 | |||
743 | wd->perfctr_msr = perfctr_msr; | ||
744 | wd->evntsel_msr = evntsel_msr; | ||
745 | wd->cccr_msr = 0; //unused | ||
746 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
747 | return 1; | ||
748 | fail1: | ||
749 | __release_perfctr_nmi(-1, perfctr_msr); | ||
750 | fail: | ||
751 | return 0; | ||
752 | } | ||
753 | |||
754 | static void stop_intel_arch_watchdog(void) | ||
755 | { | ||
756 | unsigned int ebx; | ||
757 | union cpuid10_eax eax; | ||
758 | unsigned int unused; | ||
759 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
760 | |||
761 | /* | ||
762 | * Check whether the Architectural PerfMon supports | ||
763 | * Unhalted Core Cycles Event or not. | ||
764 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
765 | */ | ||
766 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
767 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
768 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
769 | return; | ||
770 | |||
771 | wrmsr(wd->evntsel_msr, 0, 0); | ||
772 | |||
773 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
774 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
775 | } | ||
776 | |||
777 | void setup_apic_nmi_watchdog(void *unused) | 248 | void setup_apic_nmi_watchdog(void *unused) |
778 | { | 249 | { |
779 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 250 | if (__get_cpu_var(wd_enabled) == 1) |
780 | |||
781 | /* only support LOCAL and IO APICs for now */ | ||
782 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
783 | (nmi_watchdog != NMI_IO_APIC)) | ||
784 | return; | ||
785 | |||
786 | if (wd->enabled == 1) | ||
787 | return; | 251 | return; |
788 | 252 | ||
789 | /* cheap hack to support suspend/resume */ | 253 | /* cheap hack to support suspend/resume */ |
@@ -791,62 +255,31 @@ void setup_apic_nmi_watchdog(void *unused) | |||
791 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) | 255 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) |
792 | return; | 256 | return; |
793 | 257 | ||
794 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 258 | switch (nmi_watchdog) { |
795 | switch (boot_cpu_data.x86_vendor) { | 259 | case NMI_LOCAL_APIC: |
796 | case X86_VENDOR_AMD: | 260 | __get_cpu_var(wd_enabled) = 1; |
797 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | 261 | if (lapic_watchdog_init(nmi_hz) < 0) { |
798 | return; | 262 | __get_cpu_var(wd_enabled) = 0; |
799 | if (!setup_k7_watchdog()) | ||
800 | return; | ||
801 | break; | ||
802 | case X86_VENDOR_INTEL: | ||
803 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
804 | if (!setup_intel_arch_watchdog()) | ||
805 | return; | ||
806 | break; | ||
807 | } | ||
808 | if (!setup_p4_watchdog()) | ||
809 | return; | ||
810 | break; | ||
811 | default: | ||
812 | return; | 263 | return; |
813 | } | 264 | } |
265 | /* FALL THROUGH */ | ||
266 | case NMI_IO_APIC: | ||
267 | __get_cpu_var(wd_enabled) = 1; | ||
268 | atomic_inc(&nmi_active); | ||
814 | } | 269 | } |
815 | wd->enabled = 1; | ||
816 | atomic_inc(&nmi_active); | ||
817 | } | 270 | } |
818 | 271 | ||
819 | void stop_apic_nmi_watchdog(void *unused) | 272 | void stop_apic_nmi_watchdog(void *unused) |
820 | { | 273 | { |
821 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
822 | |||
823 | /* only support LOCAL and IO APICs for now */ | 274 | /* only support LOCAL and IO APICs for now */ |
824 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | 275 | if ((nmi_watchdog != NMI_LOCAL_APIC) && |
825 | (nmi_watchdog != NMI_IO_APIC)) | 276 | (nmi_watchdog != NMI_IO_APIC)) |
826 | return; | 277 | return; |
827 | 278 | if (__get_cpu_var(wd_enabled) == 0) | |
828 | if (wd->enabled == 0) | ||
829 | return; | 279 | return; |
830 | 280 | if (nmi_watchdog == NMI_LOCAL_APIC) | |
831 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 281 | lapic_watchdog_stop(); |
832 | switch (boot_cpu_data.x86_vendor) { | 282 | __get_cpu_var(wd_enabled) = 0; |
833 | case X86_VENDOR_AMD: | ||
834 | if (strstr(boot_cpu_data.x86_model_id, "Screwdriver")) | ||
835 | return; | ||
836 | stop_k7_watchdog(); | ||
837 | break; | ||
838 | case X86_VENDOR_INTEL: | ||
839 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
840 | stop_intel_arch_watchdog(); | ||
841 | break; | ||
842 | } | ||
843 | stop_p4_watchdog(); | ||
844 | break; | ||
845 | default: | ||
846 | return; | ||
847 | } | ||
848 | } | ||
849 | wd->enabled = 0; | ||
850 | atomic_dec(&nmi_active); | 283 | atomic_dec(&nmi_active); |
851 | } | 284 | } |
852 | 285 | ||
@@ -885,9 +318,7 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
885 | int sum; | 318 | int sum; |
886 | int touched = 0; | 319 | int touched = 0; |
887 | int cpu = smp_processor_id(); | 320 | int cpu = smp_processor_id(); |
888 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 321 | int rc = 0; |
889 | u64 dummy; | ||
890 | int rc=0; | ||
891 | 322 | ||
892 | /* check for other users first */ | 323 | /* check for other users first */ |
893 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | 324 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) |
@@ -934,55 +365,20 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
934 | } | 365 | } |
935 | 366 | ||
936 | /* see if the nmi watchdog went off */ | 367 | /* see if the nmi watchdog went off */ |
937 | if (wd->enabled) { | 368 | if (!__get_cpu_var(wd_enabled)) |
938 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 369 | return rc; |
939 | rdmsrl(wd->perfctr_msr, dummy); | 370 | switch (nmi_watchdog) { |
940 | if (dummy & wd->check_bit){ | 371 | case NMI_LOCAL_APIC: |
941 | /* this wasn't a watchdog timer interrupt */ | 372 | rc |= lapic_wd_event(nmi_hz); |
942 | goto done; | 373 | break; |
943 | } | 374 | case NMI_IO_APIC: |
944 | 375 | /* don't know how to accurately check for this. | |
945 | /* only Intel uses the cccr msr */ | 376 | * just assume it was a watchdog timer interrupt |
946 | if (wd->cccr_msr != 0) { | 377 | * This matches the old behaviour. |
947 | /* | 378 | */ |
948 | * P4 quirks: | 379 | rc = 1; |
949 | * - An overflown perfctr will assert its interrupt | 380 | break; |
950 | * until the OVF flag in its CCCR is cleared. | ||
951 | * - LVTPC is masked on interrupt and must be | ||
952 | * unmasked by the LVTPC handler. | ||
953 | */ | ||
954 | rdmsrl(wd->cccr_msr, dummy); | ||
955 | dummy &= ~P4_CCCR_OVF; | ||
956 | wrmsrl(wd->cccr_msr, dummy); | ||
957 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
958 | /* start the cycle over again */ | ||
959 | wrmsrl(wd->perfctr_msr, | ||
960 | -((u64)cpu_khz * 1000 / nmi_hz)); | ||
961 | } else if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
962 | /* | ||
963 | * ArchPerfom/Core Duo needs to re-unmask | ||
964 | * the apic vector | ||
965 | */ | ||
966 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
967 | /* ARCH_PERFMON has 32 bit counter writes */ | ||
968 | wrmsr(wd->perfctr_msr, | ||
969 | (u32)(-((u64)cpu_khz * 1000 / nmi_hz)), 0); | ||
970 | } else { | ||
971 | /* start the cycle over again */ | ||
972 | wrmsrl(wd->perfctr_msr, | ||
973 | -((u64)cpu_khz * 1000 / nmi_hz)); | ||
974 | } | ||
975 | rc = 1; | ||
976 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
977 | /* don't know how to accurately check for this. | ||
978 | * just assume it was a watchdog timer interrupt | ||
979 | * This matches the old behaviour. | ||
980 | */ | ||
981 | rc = 1; | ||
982 | } else | ||
983 | printk(KERN_WARNING "Unknown enabled NMI hardware?!\n"); | ||
984 | } | 381 | } |
985 | done: | ||
986 | return rc; | 382 | return rc; |
987 | } | 383 | } |
988 | 384 | ||
@@ -1067,12 +463,4 @@ void __trigger_all_cpu_backtrace(void) | |||
1067 | 463 | ||
1068 | EXPORT_SYMBOL(nmi_active); | 464 | EXPORT_SYMBOL(nmi_active); |
1069 | EXPORT_SYMBOL(nmi_watchdog); | 465 | EXPORT_SYMBOL(nmi_watchdog); |
1070 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | ||
1071 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | ||
1072 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
1073 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
1074 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
1075 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
1076 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | ||
1077 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | ||
1078 | EXPORT_SYMBOL(touch_nmi_watchdog); | 466 | EXPORT_SYMBOL(touch_nmi_watchdog); |
diff --git a/arch/x86_64/kernel/pci-calgary.c b/arch/x86_64/kernel/pci-calgary.c index 04480c3b68f..5bd20b542c1 100644 --- a/arch/x86_64/kernel/pci-calgary.c +++ b/arch/x86_64/kernel/pci-calgary.c | |||
@@ -507,7 +507,7 @@ error: | |||
507 | return ret; | 507 | return ret; |
508 | } | 508 | } |
509 | 509 | ||
510 | static struct dma_mapping_ops calgary_dma_ops = { | 510 | static const struct dma_mapping_ops calgary_dma_ops = { |
511 | .alloc_coherent = calgary_alloc_coherent, | 511 | .alloc_coherent = calgary_alloc_coherent, |
512 | .map_single = calgary_map_single, | 512 | .map_single = calgary_map_single, |
513 | .unmap_single = calgary_unmap_single, | 513 | .unmap_single = calgary_unmap_single, |
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 0bae862e9a5..0a762e10f2b 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -556,7 +556,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
556 | 556 | ||
557 | extern int agp_amd64_init(void); | 557 | extern int agp_amd64_init(void); |
558 | 558 | ||
559 | static struct dma_mapping_ops gart_dma_ops = { | 559 | static const struct dma_mapping_ops gart_dma_ops = { |
560 | .mapping_error = NULL, | 560 | .mapping_error = NULL, |
561 | .map_single = gart_map_single, | 561 | .map_single = gart_map_single, |
562 | .map_simple = gart_map_simple, | 562 | .map_simple = gart_map_simple, |
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index df09ab05a1b..6dade0c867c 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c | |||
@@ -79,7 +79,7 @@ void nommu_unmap_sg(struct device *dev, struct scatterlist *sg, | |||
79 | { | 79 | { |
80 | } | 80 | } |
81 | 81 | ||
82 | struct dma_mapping_ops nommu_dma_ops = { | 82 | const struct dma_mapping_ops nommu_dma_ops = { |
83 | .map_single = nommu_map_single, | 83 | .map_single = nommu_map_single, |
84 | .unmap_single = nommu_unmap_single, | 84 | .unmap_single = nommu_unmap_single, |
85 | .map_sg = nommu_map_sg, | 85 | .map_sg = nommu_map_sg, |
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c index eb18be5a656..4b4569abc60 100644 --- a/arch/x86_64/kernel/pci-swiotlb.c +++ b/arch/x86_64/kernel/pci-swiotlb.c | |||
@@ -12,7 +12,7 @@ | |||
12 | int swiotlb __read_mostly; | 12 | int swiotlb __read_mostly; |
13 | EXPORT_SYMBOL(swiotlb); | 13 | EXPORT_SYMBOL(swiotlb); |
14 | 14 | ||
15 | struct dma_mapping_ops swiotlb_dma_ops = { | 15 | const struct dma_mapping_ops swiotlb_dma_ops = { |
16 | .mapping_error = swiotlb_dma_mapping_error, | 16 | .mapping_error = swiotlb_dma_mapping_error, |
17 | .alloc_coherent = swiotlb_alloc_coherent, | 17 | .alloc_coherent = swiotlb_alloc_coherent, |
18 | .free_coherent = swiotlb_free_coherent, | 18 | .free_coherent = swiotlb_free_coherent, |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index d8d5ccc245c..4f21765078b 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -288,16 +288,18 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
288 | 288 | ||
289 | static int __init idle_setup (char *str) | 289 | static int __init idle_setup (char *str) |
290 | { | 290 | { |
291 | if (!strncmp(str, "poll", 4)) { | 291 | if (!strcmp(str, "poll")) { |
292 | printk("using polling idle threads.\n"); | 292 | printk("using polling idle threads.\n"); |
293 | pm_idle = poll_idle; | 293 | pm_idle = poll_idle; |
294 | } | 294 | } else if (!strcmp(str, "mwait")) |
295 | force_mwait = 1; | ||
296 | else | ||
297 | return -1; | ||
295 | 298 | ||
296 | boot_option_idle_override = 1; | 299 | boot_option_idle_override = 1; |
297 | return 1; | 300 | return 0; |
298 | } | 301 | } |
299 | 302 | early_param("idle", idle_setup); | |
300 | __setup("idle=", idle_setup); | ||
301 | 303 | ||
302 | /* Prints also some state that isn't saved in the pt_regs */ | 304 | /* Prints also some state that isn't saved in the pt_regs */ |
303 | void __show_regs(struct pt_regs * regs) | 305 | void __show_regs(struct pt_regs * regs) |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 3d98b696881..db30b5bcef6 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -79,6 +79,8 @@ int bootloader_type; | |||
79 | 79 | ||
80 | unsigned long saved_video_mode; | 80 | unsigned long saved_video_mode; |
81 | 81 | ||
82 | int force_mwait __cpuinitdata; | ||
83 | |||
82 | /* | 84 | /* |
83 | * Early DMI memory | 85 | * Early DMI memory |
84 | */ | 86 | */ |
@@ -205,10 +207,10 @@ static void discover_ebda(void) | |||
205 | * there is a real-mode segmented pointer pointing to the | 207 | * there is a real-mode segmented pointer pointing to the |
206 | * 4K EBDA area at 0x40E | 208 | * 4K EBDA area at 0x40E |
207 | */ | 209 | */ |
208 | ebda_addr = *(unsigned short *)EBDA_ADDR_POINTER; | 210 | ebda_addr = *(unsigned short *)__va(EBDA_ADDR_POINTER); |
209 | ebda_addr <<= 4; | 211 | ebda_addr <<= 4; |
210 | 212 | ||
211 | ebda_size = *(unsigned short *)(unsigned long)ebda_addr; | 213 | ebda_size = *(unsigned short *)__va(ebda_addr); |
212 | 214 | ||
213 | /* Round EBDA up to pages */ | 215 | /* Round EBDA up to pages */ |
214 | if (ebda_size == 0) | 216 | if (ebda_size == 0) |
@@ -243,11 +245,12 @@ void __init setup_arch(char **cmdline_p) | |||
243 | init_mm.end_code = (unsigned long) &_etext; | 245 | init_mm.end_code = (unsigned long) &_etext; |
244 | init_mm.end_data = (unsigned long) &_edata; | 246 | init_mm.end_data = (unsigned long) &_edata; |
245 | init_mm.brk = (unsigned long) &_end; | 247 | init_mm.brk = (unsigned long) &_end; |
248 | init_mm.pgd = __va(__pa_symbol(&init_level4_pgt)); | ||
246 | 249 | ||
247 | code_resource.start = virt_to_phys(&_text); | 250 | code_resource.start = __pa_symbol(&_text); |
248 | code_resource.end = virt_to_phys(&_etext)-1; | 251 | code_resource.end = __pa_symbol(&_etext)-1; |
249 | data_resource.start = virt_to_phys(&_etext); | 252 | data_resource.start = __pa_symbol(&_etext); |
250 | data_resource.end = virt_to_phys(&_edata)-1; | 253 | data_resource.end = __pa_symbol(&_edata)-1; |
251 | 254 | ||
252 | early_identify_cpu(&boot_cpu_data); | 255 | early_identify_cpu(&boot_cpu_data); |
253 | 256 | ||
@@ -274,8 +277,6 @@ void __init setup_arch(char **cmdline_p) | |||
274 | 277 | ||
275 | dmi_scan_machine(); | 278 | dmi_scan_machine(); |
276 | 279 | ||
277 | zap_low_mappings(0); | ||
278 | |||
279 | #ifdef CONFIG_ACPI | 280 | #ifdef CONFIG_ACPI |
280 | /* | 281 | /* |
281 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). | 282 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). |
@@ -329,15 +330,8 @@ void __init setup_arch(char **cmdline_p) | |||
329 | #endif | 330 | #endif |
330 | 331 | ||
331 | #ifdef CONFIG_SMP | 332 | #ifdef CONFIG_SMP |
332 | /* | ||
333 | * But first pinch a few for the stack/trampoline stuff | ||
334 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
335 | * trampoline before removing it. (see the GDT stuff) | ||
336 | */ | ||
337 | reserve_bootmem_generic(PAGE_SIZE, PAGE_SIZE); | ||
338 | |||
339 | /* Reserve SMP trampoline */ | 333 | /* Reserve SMP trampoline */ |
340 | reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, PAGE_SIZE); | 334 | reserve_bootmem_generic(SMP_TRAMPOLINE_BASE, 2*PAGE_SIZE); |
341 | #endif | 335 | #endif |
342 | 336 | ||
343 | #ifdef CONFIG_ACPI_SLEEP | 337 | #ifdef CONFIG_ACPI_SLEEP |
@@ -612,6 +606,10 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
612 | 606 | ||
613 | /* RDTSC can be speculated around */ | 607 | /* RDTSC can be speculated around */ |
614 | clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); | 608 | clear_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
609 | |||
610 | /* Family 10 doesn't support C states in MWAIT so don't use it */ | ||
611 | if (c->x86 == 0x10 && !force_mwait) | ||
612 | clear_bit(X86_FEATURE_MWAIT, &c->x86_capability); | ||
615 | } | 613 | } |
616 | 614 | ||
617 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 615 | static void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
@@ -987,9 +985,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
987 | "stc", | 985 | "stc", |
988 | "100mhzsteps", | 986 | "100mhzsteps", |
989 | "hwpstate", | 987 | "hwpstate", |
990 | NULL, /* tsc invariant mapped to constant_tsc */ | 988 | "", /* tsc invariant mapped to constant_tsc */ |
991 | NULL, | 989 | /* nothing */ |
992 | /* nothing */ /* constant_tsc - moved to flags */ | ||
993 | }; | 990 | }; |
994 | 991 | ||
995 | 992 | ||
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 6a70b55f719..64379a80d76 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c | |||
@@ -103,9 +103,9 @@ void __init setup_per_cpu_areas(void) | |||
103 | if (!NODE_DATA(cpu_to_node(i))) { | 103 | if (!NODE_DATA(cpu_to_node(i))) { |
104 | printk("cpu with no node %d, num_online_nodes %d\n", | 104 | printk("cpu with no node %d, num_online_nodes %d\n", |
105 | i, num_online_nodes()); | 105 | i, num_online_nodes()); |
106 | ptr = alloc_bootmem(size); | 106 | ptr = alloc_bootmem_pages(size); |
107 | } else { | 107 | } else { |
108 | ptr = alloc_bootmem_node(NODE_DATA(cpu_to_node(i)), size); | 108 | ptr = alloc_bootmem_pages_node(NODE_DATA(cpu_to_node(i)), size); |
109 | } | 109 | } |
110 | if (!ptr) | 110 | if (!ptr) |
111 | panic("Cannot allocate cpu data for CPU %d\n", i); | 111 | panic("Cannot allocate cpu data for CPU %d\n", i); |
@@ -201,7 +201,6 @@ void __cpuinit cpu_init (void) | |||
201 | /* CPU 0 is initialised in head64.c */ | 201 | /* CPU 0 is initialised in head64.c */ |
202 | if (cpu != 0) { | 202 | if (cpu != 0) { |
203 | pda_init(cpu); | 203 | pda_init(cpu); |
204 | zap_low_mappings(cpu); | ||
205 | } else | 204 | } else |
206 | estacks = boot_exception_stacks; | 205 | estacks = boot_exception_stacks; |
207 | 206 | ||
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index 49ec324cd14..c819625f331 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c | |||
@@ -141,7 +141,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | |||
141 | goto badframe; | 141 | goto badframe; |
142 | 142 | ||
143 | #ifdef DEBUG_SIG | 143 | #ifdef DEBUG_SIG |
144 | printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs.rip,regs.rsp,frame,eax); | 144 | printk("%d sigreturn rip:%lx rsp:%lx frame:%p rax:%lx\n",current->pid,regs->rip,regs->rsp,frame,eax); |
145 | #endif | 145 | #endif |
146 | 146 | ||
147 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT) | 147 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->rsp) == -EFAULT) |
@@ -301,7 +301,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
301 | if (test_thread_flag(TIF_SINGLESTEP)) | 301 | if (test_thread_flag(TIF_SINGLESTEP)) |
302 | ptrace_notify(SIGTRAP); | 302 | ptrace_notify(SIGTRAP); |
303 | #ifdef DEBUG_SIG | 303 | #ifdef DEBUG_SIG |
304 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | 304 | printk("SIG deliver (%s:%d): sp=%p pc=%lx ra=%p\n", |
305 | current->comm, current->pid, frame, regs->rip, frame->pretcode); | 305 | current->comm, current->pid, frame, regs->rip, frame->pretcode); |
306 | #endif | 306 | #endif |
307 | 307 | ||
@@ -463,7 +463,7 @@ void | |||
463 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 463 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
464 | { | 464 | { |
465 | #ifdef DEBUG_SIG | 465 | #ifdef DEBUG_SIG |
466 | printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%lx pending:%lx\n", | 466 | printk("do_notify_resume flags:%x rip:%lx rsp:%lx caller:%p pending:%x\n", |
467 | thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); | 467 | thread_info_flags, regs->rip, regs->rsp, __builtin_return_address(0),signal_pending(current)); |
468 | #endif | 468 | #endif |
469 | 469 | ||
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index af1ec4d23cf..22abae4e9f3 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -76,7 +76,7 @@ static inline void leave_mm(int cpu) | |||
76 | if (read_pda(mmu_state) == TLBSTATE_OK) | 76 | if (read_pda(mmu_state) == TLBSTATE_OK) |
77 | BUG(); | 77 | BUG(); |
78 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); | 78 | cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); |
79 | load_cr3(swapper_pg_dir); | 79 | load_cr3(init_mm.pgd); |
80 | } | 80 | } |
81 | 81 | ||
82 | /* | 82 | /* |
@@ -452,42 +452,34 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
452 | } | 452 | } |
453 | EXPORT_SYMBOL(smp_call_function); | 453 | EXPORT_SYMBOL(smp_call_function); |
454 | 454 | ||
455 | void smp_stop_cpu(void) | 455 | static void stop_this_cpu(void *dummy) |
456 | { | 456 | { |
457 | unsigned long flags; | 457 | local_irq_disable(); |
458 | /* | 458 | /* |
459 | * Remove this CPU: | 459 | * Remove this CPU: |
460 | */ | 460 | */ |
461 | cpu_clear(smp_processor_id(), cpu_online_map); | 461 | cpu_clear(smp_processor_id(), cpu_online_map); |
462 | local_irq_save(flags); | ||
463 | disable_local_APIC(); | 462 | disable_local_APIC(); |
464 | local_irq_restore(flags); | ||
465 | } | ||
466 | |||
467 | static void smp_really_stop_cpu(void *dummy) | ||
468 | { | ||
469 | smp_stop_cpu(); | ||
470 | for (;;) | 463 | for (;;) |
471 | halt(); | 464 | halt(); |
472 | } | 465 | } |
473 | 466 | ||
474 | void smp_send_stop(void) | 467 | void smp_send_stop(void) |
475 | { | 468 | { |
476 | int nolock = 0; | 469 | int nolock; |
470 | unsigned long flags; | ||
471 | |||
477 | if (reboot_force) | 472 | if (reboot_force) |
478 | return; | 473 | return; |
474 | |||
479 | /* Don't deadlock on the call lock in panic */ | 475 | /* Don't deadlock on the call lock in panic */ |
480 | if (!spin_trylock(&call_lock)) { | 476 | nolock = !spin_trylock(&call_lock); |
481 | /* ignore locking because we have panicked anyways */ | 477 | local_irq_save(flags); |
482 | nolock = 1; | 478 | __smp_call_function(stop_this_cpu, NULL, 0, 0); |
483 | } | ||
484 | __smp_call_function(smp_really_stop_cpu, NULL, 0, 0); | ||
485 | if (!nolock) | 479 | if (!nolock) |
486 | spin_unlock(&call_lock); | 480 | spin_unlock(&call_lock); |
487 | |||
488 | local_irq_disable(); | ||
489 | disable_local_APIC(); | 481 | disable_local_APIC(); |
490 | local_irq_enable(); | 482 | local_irq_restore(flags); |
491 | } | 483 | } |
492 | 484 | ||
493 | /* | 485 | /* |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index cd4643a3702..4d9dacfae57 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -60,7 +60,6 @@ | |||
60 | #include <asm/irq.h> | 60 | #include <asm/irq.h> |
61 | #include <asm/hw_irq.h> | 61 | #include <asm/hw_irq.h> |
62 | #include <asm/numa.h> | 62 | #include <asm/numa.h> |
63 | #include <asm/genapic.h> | ||
64 | 63 | ||
65 | /* Number of siblings per CPU package */ | 64 | /* Number of siblings per CPU package */ |
66 | int smp_num_siblings = 1; | 65 | int smp_num_siblings = 1; |
@@ -68,7 +67,6 @@ EXPORT_SYMBOL(smp_num_siblings); | |||
68 | 67 | ||
69 | /* Last level cache ID of each logical CPU */ | 68 | /* Last level cache ID of each logical CPU */ |
70 | u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; | 69 | u8 cpu_llc_id[NR_CPUS] __cpuinitdata = {[0 ... NR_CPUS-1] = BAD_APICID}; |
71 | EXPORT_SYMBOL(cpu_llc_id); | ||
72 | 70 | ||
73 | /* Bitmask of currently online CPUs */ | 71 | /* Bitmask of currently online CPUs */ |
74 | cpumask_t cpu_online_map __read_mostly; | 72 | cpumask_t cpu_online_map __read_mostly; |
@@ -392,7 +390,8 @@ static void inquire_remote_apic(int apicid) | |||
392 | { | 390 | { |
393 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 391 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
394 | char *names[] = { "ID", "VERSION", "SPIV" }; | 392 | char *names[] = { "ID", "VERSION", "SPIV" }; |
395 | int timeout, status; | 393 | int timeout; |
394 | unsigned int status; | ||
396 | 395 | ||
397 | printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); | 396 | printk(KERN_INFO "Inquiring remote APIC #%d...\n", apicid); |
398 | 397 | ||
@@ -402,7 +401,9 @@ static void inquire_remote_apic(int apicid) | |||
402 | /* | 401 | /* |
403 | * Wait for idle. | 402 | * Wait for idle. |
404 | */ | 403 | */ |
405 | apic_wait_icr_idle(); | 404 | status = safe_apic_wait_icr_idle(); |
405 | if (status) | ||
406 | printk("a previous APIC delivery may have failed\n"); | ||
406 | 407 | ||
407 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | 408 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); |
408 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); | 409 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); |
@@ -430,8 +431,8 @@ static void inquire_remote_apic(int apicid) | |||
430 | */ | 431 | */ |
431 | static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) | 432 | static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int start_rip) |
432 | { | 433 | { |
433 | unsigned long send_status = 0, accept_status = 0; | 434 | unsigned long send_status, accept_status = 0; |
434 | int maxlvt, timeout, num_starts, j; | 435 | int maxlvt, num_starts, j; |
435 | 436 | ||
436 | Dprintk("Asserting INIT.\n"); | 437 | Dprintk("Asserting INIT.\n"); |
437 | 438 | ||
@@ -447,12 +448,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta | |||
447 | | APIC_DM_INIT); | 448 | | APIC_DM_INIT); |
448 | 449 | ||
449 | Dprintk("Waiting for send to finish...\n"); | 450 | Dprintk("Waiting for send to finish...\n"); |
450 | timeout = 0; | 451 | send_status = safe_apic_wait_icr_idle(); |
451 | do { | ||
452 | Dprintk("+"); | ||
453 | udelay(100); | ||
454 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
455 | } while (send_status && (timeout++ < 1000)); | ||
456 | 452 | ||
457 | mdelay(10); | 453 | mdelay(10); |
458 | 454 | ||
@@ -465,12 +461,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta | |||
465 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | 461 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); |
466 | 462 | ||
467 | Dprintk("Waiting for send to finish...\n"); | 463 | Dprintk("Waiting for send to finish...\n"); |
468 | timeout = 0; | 464 | send_status = safe_apic_wait_icr_idle(); |
469 | do { | ||
470 | Dprintk("+"); | ||
471 | udelay(100); | ||
472 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
473 | } while (send_status && (timeout++ < 1000)); | ||
474 | 465 | ||
475 | mb(); | 466 | mb(); |
476 | atomic_set(&init_deasserted, 1); | 467 | atomic_set(&init_deasserted, 1); |
@@ -509,12 +500,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta | |||
509 | Dprintk("Startup point 1.\n"); | 500 | Dprintk("Startup point 1.\n"); |
510 | 501 | ||
511 | Dprintk("Waiting for send to finish...\n"); | 502 | Dprintk("Waiting for send to finish...\n"); |
512 | timeout = 0; | 503 | send_status = safe_apic_wait_icr_idle(); |
513 | do { | ||
514 | Dprintk("+"); | ||
515 | udelay(100); | ||
516 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
517 | } while (send_status && (timeout++ < 1000)); | ||
518 | 504 | ||
519 | /* | 505 | /* |
520 | * Give the other CPU some time to accept the IPI. | 506 | * Give the other CPU some time to accept the IPI. |
@@ -945,6 +931,12 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
945 | return -ENOSYS; | 931 | return -ENOSYS; |
946 | } | 932 | } |
947 | 933 | ||
934 | /* | ||
935 | * Save current MTRR state in case it was changed since early boot | ||
936 | * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: | ||
937 | */ | ||
938 | mtrr_save_state(); | ||
939 | |||
948 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | 940 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; |
949 | /* Boot it! */ | 941 | /* Boot it! */ |
950 | err = do_boot_cpu(cpu, apicid); | 942 | err = do_boot_cpu(cpu, apicid); |
@@ -965,13 +957,6 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
965 | 957 | ||
966 | while (!cpu_isset(cpu, cpu_online_map)) | 958 | while (!cpu_isset(cpu, cpu_online_map)) |
967 | cpu_relax(); | 959 | cpu_relax(); |
968 | |||
969 | if (num_online_cpus() > 8 && genapic == &apic_flat) { | ||
970 | printk(KERN_WARNING | ||
971 | "flat APIC routing can't be used with > 8 cpus\n"); | ||
972 | BUG(); | ||
973 | } | ||
974 | |||
975 | err = 0; | 960 | err = 0; |
976 | 961 | ||
977 | return err; | 962 | return err; |
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index 91f7e678bae..6a5a98f2a75 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c | |||
@@ -12,6 +12,10 @@ | |||
12 | #include <asm/proto.h> | 12 | #include <asm/proto.h> |
13 | #include <asm/page.h> | 13 | #include <asm/page.h> |
14 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
15 | #include <asm/mtrr.h> | ||
16 | |||
17 | /* References to section boundaries */ | ||
18 | extern const void __nosave_begin, __nosave_end; | ||
15 | 19 | ||
16 | struct saved_context saved_context; | 20 | struct saved_context saved_context; |
17 | 21 | ||
@@ -33,7 +37,6 @@ void __save_processor_state(struct saved_context *ctxt) | |||
33 | asm volatile ("str %0" : "=m" (ctxt->tr)); | 37 | asm volatile ("str %0" : "=m" (ctxt->tr)); |
34 | 38 | ||
35 | /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ | 39 | /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ |
36 | /* EFER should be constant for kernel version, no need to handle it. */ | ||
37 | /* | 40 | /* |
38 | * segment registers | 41 | * segment registers |
39 | */ | 42 | */ |
@@ -46,10 +49,12 @@ void __save_processor_state(struct saved_context *ctxt) | |||
46 | rdmsrl(MSR_FS_BASE, ctxt->fs_base); | 49 | rdmsrl(MSR_FS_BASE, ctxt->fs_base); |
47 | rdmsrl(MSR_GS_BASE, ctxt->gs_base); | 50 | rdmsrl(MSR_GS_BASE, ctxt->gs_base); |
48 | rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); | 51 | rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); |
52 | mtrr_save_fixed_ranges(NULL); | ||
49 | 53 | ||
50 | /* | 54 | /* |
51 | * control registers | 55 | * control registers |
52 | */ | 56 | */ |
57 | rdmsrl(MSR_EFER, ctxt->efer); | ||
53 | asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0)); | 58 | asm volatile ("movq %%cr0, %0" : "=r" (ctxt->cr0)); |
54 | asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2)); | 59 | asm volatile ("movq %%cr2, %0" : "=r" (ctxt->cr2)); |
55 | asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3)); | 60 | asm volatile ("movq %%cr3, %0" : "=r" (ctxt->cr3)); |
@@ -75,6 +80,7 @@ void __restore_processor_state(struct saved_context *ctxt) | |||
75 | /* | 80 | /* |
76 | * control registers | 81 | * control registers |
77 | */ | 82 | */ |
83 | wrmsrl(MSR_EFER, ctxt->efer); | ||
78 | asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8)); | 84 | asm volatile ("movq %0, %%cr8" :: "r" (ctxt->cr8)); |
79 | asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4)); | 85 | asm volatile ("movq %0, %%cr4" :: "r" (ctxt->cr4)); |
80 | asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3)); | 86 | asm volatile ("movq %0, %%cr3" :: "r" (ctxt->cr3)); |
@@ -219,4 +225,15 @@ int swsusp_arch_resume(void) | |||
219 | restore_image(); | 225 | restore_image(); |
220 | return 0; | 226 | return 0; |
221 | } | 227 | } |
228 | |||
229 | /* | ||
230 | * pfn_is_nosave - check if given pfn is in the 'nosave' section | ||
231 | */ | ||
232 | |||
233 | int pfn_is_nosave(unsigned long pfn) | ||
234 | { | ||
235 | unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT; | ||
236 | unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT; | ||
237 | return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn); | ||
238 | } | ||
222 | #endif /* CONFIG_SOFTWARE_SUSPEND */ | 239 | #endif /* CONFIG_SOFTWARE_SUSPEND */ |
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S index bfbe00763c6..16d183f67bc 100644 --- a/arch/x86_64/kernel/suspend_asm.S +++ b/arch/x86_64/kernel/suspend_asm.S | |||
@@ -71,9 +71,10 @@ loop: | |||
71 | jmp loop | 71 | jmp loop |
72 | done: | 72 | done: |
73 | /* go back to the original page tables */ | 73 | /* go back to the original page tables */ |
74 | leaq init_level4_pgt(%rip), %rax | 74 | movq $(init_level4_pgt - __START_KERNEL_map), %rax |
75 | subq $__START_KERNEL_map, %rax | 75 | addq phys_base(%rip), %rax |
76 | movq %rax, %cr3 | 76 | movq %rax, %cr3 |
77 | |||
77 | /* Flush TLB, including "global" things (vmalloc) */ | 78 | /* Flush TLB, including "global" things (vmalloc) */ |
78 | movq mmu_cr4_features(%rip), %rax | 79 | movq mmu_cr4_features(%rip), %rax |
79 | movq %rax, %rdx | 80 | movq %rax, %rdx |
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c index 213fd6ab789..63d592c276c 100644 --- a/arch/x86_64/kernel/syscall.c +++ b/arch/x86_64/kernel/syscall.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/linkage.h> | 3 | #include <linux/linkage.h> |
4 | #include <linux/sys.h> | 4 | #include <linux/sys.h> |
5 | #include <linux/cache.h> | 5 | #include <linux/cache.h> |
6 | #include <asm/asm-offsets.h> | ||
6 | 7 | ||
7 | #define __NO_STUBS | 8 | #define __NO_STUBS |
8 | 9 | ||
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 75d73a9aa9f..0652e173813 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -39,13 +39,11 @@ | |||
39 | #include <asm/proto.h> | 39 | #include <asm/proto.h> |
40 | #include <asm/hpet.h> | 40 | #include <asm/hpet.h> |
41 | #include <asm/sections.h> | 41 | #include <asm/sections.h> |
42 | #include <linux/cpufreq.h> | ||
43 | #include <linux/hpet.h> | 42 | #include <linux/hpet.h> |
44 | #include <asm/apic.h> | 43 | #include <asm/apic.h> |
45 | #include <asm/hpet.h> | 44 | #include <asm/hpet.h> |
46 | 45 | #include <asm/mpspec.h> | |
47 | extern void i8254_timer_resume(void); | 46 | #include <asm/nmi.h> |
48 | extern int using_apic_timer; | ||
49 | 47 | ||
50 | static char *timename = NULL; | 48 | static char *timename = NULL; |
51 | 49 | ||
@@ -252,6 +250,51 @@ static unsigned long get_cmos_time(void) | |||
252 | return mktime(year, mon, day, hour, min, sec); | 250 | return mktime(year, mon, day, hour, min, sec); |
253 | } | 251 | } |
254 | 252 | ||
253 | /* calibrate_cpu is used on systems with fixed rate TSCs to determine | ||
254 | * processor frequency */ | ||
255 | #define TICK_COUNT 100000000 | ||
256 | static unsigned int __init tsc_calibrate_cpu_khz(void) | ||
257 | { | ||
258 | int tsc_start, tsc_now; | ||
259 | int i, no_ctr_free; | ||
260 | unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; | ||
261 | unsigned long flags; | ||
262 | |||
263 | for (i = 0; i < 4; i++) | ||
264 | if (avail_to_resrv_perfctr_nmi_bit(i)) | ||
265 | break; | ||
266 | no_ctr_free = (i == 4); | ||
267 | if (no_ctr_free) { | ||
268 | i = 3; | ||
269 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
270 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
271 | rdmsrl(MSR_K7_PERFCTR3, pmc3); | ||
272 | } else { | ||
273 | reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
274 | reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
275 | } | ||
276 | local_irq_save(flags); | ||
277 | /* start meauring cycles, incrementing from 0 */ | ||
278 | wrmsrl(MSR_K7_PERFCTR0 + i, 0); | ||
279 | wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); | ||
280 | rdtscl(tsc_start); | ||
281 | do { | ||
282 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | ||
283 | tsc_now = get_cycles_sync(); | ||
284 | } while ((tsc_now - tsc_start) < TICK_COUNT); | ||
285 | |||
286 | local_irq_restore(flags); | ||
287 | if (no_ctr_free) { | ||
288 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
289 | wrmsrl(MSR_K7_PERFCTR3, pmc3); | ||
290 | wrmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
291 | } else { | ||
292 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
293 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
294 | } | ||
295 | |||
296 | return pmc_now * tsc_khz / (tsc_now - tsc_start); | ||
297 | } | ||
255 | 298 | ||
256 | /* | 299 | /* |
257 | * pit_calibrate_tsc() uses the speaker output (channel 2) of | 300 | * pit_calibrate_tsc() uses the speaker output (channel 2) of |
@@ -285,7 +328,7 @@ static unsigned int __init pit_calibrate_tsc(void) | |||
285 | #define PIT_MODE 0x43 | 328 | #define PIT_MODE 0x43 |
286 | #define PIT_CH0 0x40 | 329 | #define PIT_CH0 0x40 |
287 | 330 | ||
288 | static void __init __pit_init(int val, u8 mode) | 331 | static void __pit_init(int val, u8 mode) |
289 | { | 332 | { |
290 | unsigned long flags; | 333 | unsigned long flags; |
291 | 334 | ||
@@ -301,12 +344,12 @@ void __init pit_init(void) | |||
301 | __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */ | 344 | __pit_init(LATCH, 0x34); /* binary, mode 2, LSB/MSB, ch 0 */ |
302 | } | 345 | } |
303 | 346 | ||
304 | void __init pit_stop_interrupt(void) | 347 | void pit_stop_interrupt(void) |
305 | { | 348 | { |
306 | __pit_init(0, 0x30); /* mode 0 */ | 349 | __pit_init(0, 0x30); /* mode 0 */ |
307 | } | 350 | } |
308 | 351 | ||
309 | void __init stop_timer_interrupt(void) | 352 | void stop_timer_interrupt(void) |
310 | { | 353 | { |
311 | char *name; | 354 | char *name; |
312 | if (hpet_address) { | 355 | if (hpet_address) { |
@@ -339,23 +382,29 @@ void __init time_init(void) | |||
339 | if (hpet_use_timer) { | 382 | if (hpet_use_timer) { |
340 | /* set tick_nsec to use the proper rate for HPET */ | 383 | /* set tick_nsec to use the proper rate for HPET */ |
341 | tick_nsec = TICK_NSEC_HPET; | 384 | tick_nsec = TICK_NSEC_HPET; |
342 | cpu_khz = hpet_calibrate_tsc(); | 385 | tsc_khz = hpet_calibrate_tsc(); |
343 | timename = "HPET"; | 386 | timename = "HPET"; |
344 | } else { | 387 | } else { |
345 | pit_init(); | 388 | pit_init(); |
346 | cpu_khz = pit_calibrate_tsc(); | 389 | tsc_khz = pit_calibrate_tsc(); |
347 | timename = "PIT"; | 390 | timename = "PIT"; |
348 | } | 391 | } |
349 | 392 | ||
393 | cpu_khz = tsc_khz; | ||
394 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | ||
395 | boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
396 | boot_cpu_data.x86 == 16) | ||
397 | cpu_khz = tsc_calibrate_cpu_khz(); | ||
398 | |||
350 | if (unsynchronized_tsc()) | 399 | if (unsynchronized_tsc()) |
351 | mark_tsc_unstable(); | 400 | mark_tsc_unstable("TSCs unsynchronized"); |
352 | 401 | ||
353 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | 402 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) |
354 | vgetcpu_mode = VGETCPU_RDTSCP; | 403 | vgetcpu_mode = VGETCPU_RDTSCP; |
355 | else | 404 | else |
356 | vgetcpu_mode = VGETCPU_LSL; | 405 | vgetcpu_mode = VGETCPU_LSL; |
357 | 406 | ||
358 | set_cyc2ns_scale(cpu_khz); | 407 | set_cyc2ns_scale(tsc_khz); |
359 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", | 408 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", |
360 | cpu_khz / 1000, cpu_khz % 1000); | 409 | cpu_khz / 1000, cpu_khz % 1000); |
361 | init_tsc_clocksource(); | 410 | init_tsc_clocksource(); |
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S index c79b99a9e2f..e7e2764c461 100644 --- a/arch/x86_64/kernel/trampoline.S +++ b/arch/x86_64/kernel/trampoline.S | |||
@@ -3,6 +3,7 @@ | |||
3 | * Trampoline.S Derived from Setup.S by Linus Torvalds | 3 | * Trampoline.S Derived from Setup.S by Linus Torvalds |
4 | * | 4 | * |
5 | * 4 Jan 1997 Michael Chastain: changed to gnu as. | 5 | * 4 Jan 1997 Michael Chastain: changed to gnu as. |
6 | * 15 Sept 2005 Eric Biederman: 64bit PIC support | ||
6 | * | 7 | * |
7 | * Entry: CS:IP point to the start of our code, we are | 8 | * Entry: CS:IP point to the start of our code, we are |
8 | * in real mode with no stack, but the rest of the | 9 | * in real mode with no stack, but the rest of the |
@@ -17,15 +18,20 @@ | |||
17 | * and IP is zero. Thus, data addresses need to be absolute | 18 | * and IP is zero. Thus, data addresses need to be absolute |
18 | * (no relocation) and are taken with regard to r_base. | 19 | * (no relocation) and are taken with regard to r_base. |
19 | * | 20 | * |
21 | * With the addition of trampoline_level4_pgt this code can | ||
22 | * now enter a 64bit kernel that lives at arbitrary 64bit | ||
23 | * physical addresses. | ||
24 | * | ||
20 | * If you work on this file, check the object module with objdump | 25 | * If you work on this file, check the object module with objdump |
21 | * --full-contents --reloc to make sure there are no relocation | 26 | * --full-contents --reloc to make sure there are no relocation |
22 | * entries. For the GDT entry we do hand relocation in smpboot.c | 27 | * entries. |
23 | * because of 64bit linker limitations. | ||
24 | */ | 28 | */ |
25 | 29 | ||
26 | #include <linux/linkage.h> | 30 | #include <linux/linkage.h> |
27 | #include <asm/segment.h> | 31 | #include <asm/pgtable.h> |
28 | #include <asm/page.h> | 32 | #include <asm/page.h> |
33 | #include <asm/msr.h> | ||
34 | #include <asm/segment.h> | ||
29 | 35 | ||
30 | .data | 36 | .data |
31 | 37 | ||
@@ -33,15 +39,33 @@ | |||
33 | 39 | ||
34 | ENTRY(trampoline_data) | 40 | ENTRY(trampoline_data) |
35 | r_base = . | 41 | r_base = . |
42 | cli # We should be safe anyway | ||
36 | wbinvd | 43 | wbinvd |
37 | mov %cs, %ax # Code and data in the same place | 44 | mov %cs, %ax # Code and data in the same place |
38 | mov %ax, %ds | 45 | mov %ax, %ds |
46 | mov %ax, %es | ||
47 | mov %ax, %ss | ||
39 | 48 | ||
40 | cli # We should be safe anyway | ||
41 | 49 | ||
42 | movl $0xA5A5A5A5, trampoline_data - r_base | 50 | movl $0xA5A5A5A5, trampoline_data - r_base |
43 | # write marker for master knows we're running | 51 | # write marker for master knows we're running |
44 | 52 | ||
53 | # Setup stack | ||
54 | movw $(trampoline_stack_end - r_base), %sp | ||
55 | |||
56 | call verify_cpu # Verify the cpu supports long mode | ||
57 | testl %eax, %eax # Check for return code | ||
58 | jnz no_longmode | ||
59 | |||
60 | mov %cs, %ax | ||
61 | movzx %ax, %esi # Find the 32bit trampoline location | ||
62 | shll $4, %esi | ||
63 | |||
64 | # Fixup the vectors | ||
65 | addl %esi, startup_32_vector - r_base | ||
66 | addl %esi, startup_64_vector - r_base | ||
67 | addl %esi, tgdt + 2 - r_base # Fixup the gdt pointer | ||
68 | |||
45 | /* | 69 | /* |
46 | * GDT tables in non default location kernel can be beyond 16MB and | 70 | * GDT tables in non default location kernel can be beyond 16MB and |
47 | * lgdt will not be able to load the address as in real mode default | 71 | * lgdt will not be able to load the address as in real mode default |
@@ -49,23 +73,94 @@ r_base = . | |||
49 | * to 32 bit. | 73 | * to 32 bit. |
50 | */ | 74 | */ |
51 | 75 | ||
52 | lidtl idt_48 - r_base # load idt with 0, 0 | 76 | lidtl tidt - r_base # load idt with 0, 0 |
53 | lgdtl gdt_48 - r_base # load gdt with whatever is appropriate | 77 | lgdtl tgdt - r_base # load gdt with whatever is appropriate |
54 | 78 | ||
55 | xor %ax, %ax | 79 | xor %ax, %ax |
56 | inc %ax # protected mode (PE) bit | 80 | inc %ax # protected mode (PE) bit |
57 | lmsw %ax # into protected mode | 81 | lmsw %ax # into protected mode |
58 | # flaush prefetch and jump to startup_32 in arch/x86_64/kernel/head.S | 82 | |
59 | ljmpl $__KERNEL32_CS, $(startup_32-__START_KERNEL_map) | 83 | # flush prefetch and jump to startup_32 |
84 | ljmpl *(startup_32_vector - r_base) | ||
85 | |||
86 | .code32 | ||
87 | .balign 4 | ||
88 | startup_32: | ||
89 | movl $__KERNEL_DS, %eax # Initialize the %ds segment register | ||
90 | movl %eax, %ds | ||
91 | |||
92 | xorl %eax, %eax | ||
93 | btsl $5, %eax # Enable PAE mode | ||
94 | movl %eax, %cr4 | ||
95 | |||
96 | # Setup trampoline 4 level pagetables | ||
97 | leal (trampoline_level4_pgt - r_base)(%esi), %eax | ||
98 | movl %eax, %cr3 | ||
99 | |||
100 | movl $MSR_EFER, %ecx | ||
101 | movl $(1 << _EFER_LME), %eax # Enable Long Mode | ||
102 | xorl %edx, %edx | ||
103 | wrmsr | ||
104 | |||
105 | xorl %eax, %eax | ||
106 | btsl $31, %eax # Enable paging and in turn activate Long Mode | ||
107 | btsl $0, %eax # Enable protected mode | ||
108 | movl %eax, %cr0 | ||
109 | |||
110 | /* | ||
111 | * At this point we're in long mode but in 32bit compatibility mode | ||
112 | * with EFER.LME = 1, CS.L = 0, CS.D = 1 (and in turn | ||
113 | * EFER.LMA = 1). Now we want to jump in 64bit mode, to do that we use | ||
114 | * the new gdt/idt that has __KERNEL_CS with CS.L = 1. | ||
115 | */ | ||
116 | ljmp *(startup_64_vector - r_base)(%esi) | ||
117 | |||
118 | .code64 | ||
119 | .balign 4 | ||
120 | startup_64: | ||
121 | # Now jump into the kernel using virtual addresses | ||
122 | movq $secondary_startup_64, %rax | ||
123 | jmp *%rax | ||
124 | |||
125 | .code16 | ||
126 | no_longmode: | ||
127 | hlt | ||
128 | jmp no_longmode | ||
129 | #include "verify_cpu.S" | ||
60 | 130 | ||
61 | # Careful these need to be in the same 64K segment as the above; | 131 | # Careful these need to be in the same 64K segment as the above; |
62 | idt_48: | 132 | tidt: |
63 | .word 0 # idt limit = 0 | 133 | .word 0 # idt limit = 0 |
64 | .word 0, 0 # idt base = 0L | 134 | .word 0, 0 # idt base = 0L |
65 | 135 | ||
66 | gdt_48: | 136 | # Duplicate the global descriptor table |
67 | .short GDT_ENTRIES*8 - 1 # gdt limit | 137 | # so the kernel can live anywhere |
68 | .long cpu_gdt_table-__START_KERNEL_map | 138 | .balign 4 |
139 | tgdt: | ||
140 | .short tgdt_end - tgdt # gdt limit | ||
141 | .long tgdt - r_base | ||
142 | .short 0 | ||
143 | .quad 0x00cf9b000000ffff # __KERNEL32_CS | ||
144 | .quad 0x00af9b000000ffff # __KERNEL_CS | ||
145 | .quad 0x00cf93000000ffff # __KERNEL_DS | ||
146 | tgdt_end: | ||
147 | |||
148 | .balign 4 | ||
149 | startup_32_vector: | ||
150 | .long startup_32 - r_base | ||
151 | .word __KERNEL32_CS, 0 | ||
152 | |||
153 | .balign 4 | ||
154 | startup_64_vector: | ||
155 | .long startup_64 - r_base | ||
156 | .word __KERNEL_CS, 0 | ||
157 | |||
158 | trampoline_stack: | ||
159 | .org 0x1000 | ||
160 | trampoline_stack_end: | ||
161 | ENTRY(trampoline_level4_pgt) | ||
162 | .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
163 | .fill 510,8,0 | ||
164 | .quad level3_kernel_pgt - __START_KERNEL_map + _KERNPG_TABLE | ||
69 | 165 | ||
70 | .globl trampoline_end | 166 | ENTRY(trampoline_end) |
71 | trampoline_end: | ||
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 09d2e8a10a4..d76fc32d459 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -426,8 +426,7 @@ void show_registers(struct pt_regs *regs) | |||
426 | const int cpu = smp_processor_id(); | 426 | const int cpu = smp_processor_id(); |
427 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | 427 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
428 | 428 | ||
429 | rsp = regs->rsp; | 429 | rsp = regs->rsp; |
430 | |||
431 | printk("CPU %d ", cpu); | 430 | printk("CPU %d ", cpu); |
432 | __show_regs(regs); | 431 | __show_regs(regs); |
433 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 432 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
@@ -438,7 +437,6 @@ void show_registers(struct pt_regs *regs) | |||
438 | * time of the fault.. | 437 | * time of the fault.. |
439 | */ | 438 | */ |
440 | if (in_kernel) { | 439 | if (in_kernel) { |
441 | |||
442 | printk("Stack: "); | 440 | printk("Stack: "); |
443 | _show_stack(NULL, regs, (unsigned long*)rsp); | 441 | _show_stack(NULL, regs, (unsigned long*)rsp); |
444 | 442 | ||
@@ -581,10 +579,20 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, | |||
581 | { | 579 | { |
582 | struct task_struct *tsk = current; | 580 | struct task_struct *tsk = current; |
583 | 581 | ||
584 | tsk->thread.error_code = error_code; | ||
585 | tsk->thread.trap_no = trapnr; | ||
586 | |||
587 | if (user_mode(regs)) { | 582 | if (user_mode(regs)) { |
583 | /* | ||
584 | * We want error_code and trap_no set for userspace | ||
585 | * faults and kernelspace faults which result in | ||
586 | * die(), but not kernelspace faults which are fixed | ||
587 | * up. die() gives the process no chance to handle | ||
588 | * the signal and notice the kernel fault information, | ||
589 | * so that won't result in polluting the information | ||
590 | * about previously queued, but not yet delivered, | ||
591 | * faults. See also do_general_protection below. | ||
592 | */ | ||
593 | tsk->thread.error_code = error_code; | ||
594 | tsk->thread.trap_no = trapnr; | ||
595 | |||
588 | if (exception_trace && unhandled_signal(tsk, signr)) | 596 | if (exception_trace && unhandled_signal(tsk, signr)) |
589 | printk(KERN_INFO | 597 | printk(KERN_INFO |
590 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", | 598 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", |
@@ -605,8 +613,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, | |||
605 | fixup = search_exception_tables(regs->rip); | 613 | fixup = search_exception_tables(regs->rip); |
606 | if (fixup) | 614 | if (fixup) |
607 | regs->rip = fixup->fixup; | 615 | regs->rip = fixup->fixup; |
608 | else | 616 | else { |
617 | tsk->thread.error_code = error_code; | ||
618 | tsk->thread.trap_no = trapnr; | ||
609 | die(str, regs, error_code); | 619 | die(str, regs, error_code); |
620 | } | ||
610 | return; | 621 | return; |
611 | } | 622 | } |
612 | } | 623 | } |
@@ -682,10 +693,10 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | |||
682 | 693 | ||
683 | conditional_sti(regs); | 694 | conditional_sti(regs); |
684 | 695 | ||
685 | tsk->thread.error_code = error_code; | ||
686 | tsk->thread.trap_no = 13; | ||
687 | |||
688 | if (user_mode(regs)) { | 696 | if (user_mode(regs)) { |
697 | tsk->thread.error_code = error_code; | ||
698 | tsk->thread.trap_no = 13; | ||
699 | |||
689 | if (exception_trace && unhandled_signal(tsk, SIGSEGV)) | 700 | if (exception_trace && unhandled_signal(tsk, SIGSEGV)) |
690 | printk(KERN_INFO | 701 | printk(KERN_INFO |
691 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", | 702 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", |
@@ -704,6 +715,9 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | |||
704 | regs->rip = fixup->fixup; | 715 | regs->rip = fixup->fixup; |
705 | return; | 716 | return; |
706 | } | 717 | } |
718 | |||
719 | tsk->thread.error_code = error_code; | ||
720 | tsk->thread.trap_no = 13; | ||
707 | if (notify_die(DIE_GPF, "general protection fault", regs, | 721 | if (notify_die(DIE_GPF, "general protection fault", regs, |
708 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | 722 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
709 | return; | 723 | return; |
diff --git a/arch/x86_64/kernel/tsc.c b/arch/x86_64/kernel/tsc.c index 1a0edbbffaa..48f9a8e6aa9 100644 --- a/arch/x86_64/kernel/tsc.c +++ b/arch/x86_64/kernel/tsc.c | |||
@@ -13,6 +13,8 @@ static int notsc __initdata = 0; | |||
13 | 13 | ||
14 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | 14 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ |
15 | EXPORT_SYMBOL(cpu_khz); | 15 | EXPORT_SYMBOL(cpu_khz); |
16 | unsigned int tsc_khz; | ||
17 | EXPORT_SYMBOL(tsc_khz); | ||
16 | 18 | ||
17 | static unsigned int cyc2ns_scale __read_mostly; | 19 | static unsigned int cyc2ns_scale __read_mostly; |
18 | 20 | ||
@@ -77,7 +79,7 @@ static void handle_cpufreq_delayed_get(struct work_struct *v) | |||
77 | static unsigned int ref_freq = 0; | 79 | static unsigned int ref_freq = 0; |
78 | static unsigned long loops_per_jiffy_ref = 0; | 80 | static unsigned long loops_per_jiffy_ref = 0; |
79 | 81 | ||
80 | static unsigned long cpu_khz_ref = 0; | 82 | static unsigned long tsc_khz_ref = 0; |
81 | 83 | ||
82 | static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | 84 | static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, |
83 | void *data) | 85 | void *data) |
@@ -99,7 +101,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
99 | if (!ref_freq) { | 101 | if (!ref_freq) { |
100 | ref_freq = freq->old; | 102 | ref_freq = freq->old; |
101 | loops_per_jiffy_ref = *lpj; | 103 | loops_per_jiffy_ref = *lpj; |
102 | cpu_khz_ref = cpu_khz; | 104 | tsc_khz_ref = tsc_khz; |
103 | } | 105 | } |
104 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | 106 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || |
105 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | 107 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || |
@@ -107,12 +109,12 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
107 | *lpj = | 109 | *lpj = |
108 | cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | 110 | cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); |
109 | 111 | ||
110 | cpu_khz = cpufreq_scale(cpu_khz_ref, ref_freq, freq->new); | 112 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); |
111 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | 113 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) |
112 | mark_tsc_unstable(); | 114 | mark_tsc_unstable("cpufreq changes"); |
113 | } | 115 | } |
114 | 116 | ||
115 | set_cyc2ns_scale(cpu_khz_ref); | 117 | set_cyc2ns_scale(tsc_khz_ref); |
116 | 118 | ||
117 | return 0; | 119 | return 0; |
118 | } | 120 | } |
@@ -197,10 +199,11 @@ static struct clocksource clocksource_tsc = { | |||
197 | .vread = vread_tsc, | 199 | .vread = vread_tsc, |
198 | }; | 200 | }; |
199 | 201 | ||
200 | void mark_tsc_unstable(void) | 202 | void mark_tsc_unstable(char *reason) |
201 | { | 203 | { |
202 | if (!tsc_unstable) { | 204 | if (!tsc_unstable) { |
203 | tsc_unstable = 1; | 205 | tsc_unstable = 1; |
206 | printk("Marking TSC unstable due to %s\n", reason); | ||
204 | /* Change only the rating, when not registered */ | 207 | /* Change only the rating, when not registered */ |
205 | if (clocksource_tsc.mult) | 208 | if (clocksource_tsc.mult) |
206 | clocksource_change_rating(&clocksource_tsc, 0); | 209 | clocksource_change_rating(&clocksource_tsc, 0); |
@@ -213,7 +216,7 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); | |||
213 | void __init init_tsc_clocksource(void) | 216 | void __init init_tsc_clocksource(void) |
214 | { | 217 | { |
215 | if (!notsc) { | 218 | if (!notsc) { |
216 | clocksource_tsc.mult = clocksource_khz2mult(cpu_khz, | 219 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, |
217 | clocksource_tsc.shift); | 220 | clocksource_tsc.shift); |
218 | if (check_tsc_unstable()) | 221 | if (check_tsc_unstable()) |
219 | clocksource_tsc.rating = 0; | 222 | clocksource_tsc.rating = 0; |
diff --git a/arch/x86_64/kernel/tsc_sync.c b/arch/x86_64/kernel/tsc_sync.c index 014f0db45df..355f5f506c8 100644 --- a/arch/x86_64/kernel/tsc_sync.c +++ b/arch/x86_64/kernel/tsc_sync.c | |||
@@ -50,7 +50,7 @@ static __cpuinit void check_tsc_warp(void) | |||
50 | /* | 50 | /* |
51 | * The measurement runs for 20 msecs: | 51 | * The measurement runs for 20 msecs: |
52 | */ | 52 | */ |
53 | end = start + cpu_khz * 20ULL; | 53 | end = start + tsc_khz * 20ULL; |
54 | now = start; | 54 | now = start; |
55 | 55 | ||
56 | for (i = 0; ; i++) { | 56 | for (i = 0; ; i++) { |
@@ -138,7 +138,7 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
138 | printk("\n"); | 138 | printk("\n"); |
139 | printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," | 139 | printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," |
140 | " turning off TSC clock.\n", max_warp); | 140 | " turning off TSC clock.\n", max_warp); |
141 | mark_tsc_unstable(); | 141 | mark_tsc_unstable("check_tsc_sync_source failed"); |
142 | nr_warps = 0; | 142 | nr_warps = 0; |
143 | max_warp = 0; | 143 | max_warp = 0; |
144 | last_tsc = 0; | 144 | last_tsc = 0; |
diff --git a/arch/x86_64/kernel/verify_cpu.S b/arch/x86_64/kernel/verify_cpu.S new file mode 100644 index 00000000000..e035f594819 --- /dev/null +++ b/arch/x86_64/kernel/verify_cpu.S | |||
@@ -0,0 +1,119 @@ | |||
1 | /* | ||
2 | * | ||
3 | * verify_cpu.S - Code for cpu long mode and SSE verification. This | ||
4 | * code has been borrowed from boot/setup.S and was introduced by | ||
5 | * Andi Kleen. | ||
6 | * | ||
7 | * Copyright (c) 2007 Andi Kleen (ak@suse.de) | ||
8 | * Copyright (c) 2007 Eric Biederman (ebiederm@xmission.com) | ||
9 | * Copyright (c) 2007 Vivek Goyal (vgoyal@in.ibm.com) | ||
10 | * | ||
11 | * This source code is licensed under the GNU General Public License, | ||
12 | * Version 2. See the file COPYING for more details. | ||
13 | * | ||
14 | * This is a common code for verification whether CPU supports | ||
15 | * long mode and SSE or not. It is not called directly instead this | ||
16 | * file is included at various places and compiled in that context. | ||
17 | * Following are the current usage. | ||
18 | * | ||
19 | * This file is included by both 16bit and 32bit code. | ||
20 | * | ||
21 | * arch/x86_64/boot/setup.S : Boot cpu verification (16bit) | ||
22 | * arch/x86_64/boot/compressed/head.S: Boot cpu verification (32bit) | ||
23 | * arch/x86_64/kernel/trampoline.S: secondary processor verfication (16bit) | ||
24 | * arch/x86_64/kernel/acpi/wakeup.S:Verfication at resume (16bit) | ||
25 | * | ||
26 | * verify_cpu, returns the status of cpu check in register %eax. | ||
27 | * 0: Success 1: Failure | ||
28 | * | ||
29 | * The caller needs to check for the error code and take the action | ||
30 | * appropriately. Either display a message or halt. | ||
31 | */ | ||
32 | |||
33 | #include <asm/cpufeature.h> | ||
34 | |||
35 | verify_cpu: | ||
36 | pushfl # Save caller passed flags | ||
37 | pushl $0 # Kill any dangerous flags | ||
38 | popfl | ||
39 | |||
40 | /* minimum CPUID flags for x86-64 as defined by AMD */ | ||
41 | #define M(x) (1<<(x)) | ||
42 | #define M2(a,b) M(a)|M(b) | ||
43 | #define M4(a,b,c,d) M(a)|M(b)|M(c)|M(d) | ||
44 | |||
45 | #define SSE_MASK \ | ||
46 | (M2(X86_FEATURE_XMM,X86_FEATURE_XMM2)) | ||
47 | #define REQUIRED_MASK1 \ | ||
48 | (M4(X86_FEATURE_FPU,X86_FEATURE_PSE,X86_FEATURE_TSC,X86_FEATURE_MSR)|\ | ||
49 | M4(X86_FEATURE_PAE,X86_FEATURE_CX8,X86_FEATURE_PGE,X86_FEATURE_CMOV)|\ | ||
50 | M(X86_FEATURE_FXSR)) | ||
51 | #define REQUIRED_MASK2 \ | ||
52 | (M(X86_FEATURE_LM - 32)) | ||
53 | |||
54 | pushfl # standard way to check for cpuid | ||
55 | popl %eax | ||
56 | movl %eax,%ebx | ||
57 | xorl $0x200000,%eax | ||
58 | pushl %eax | ||
59 | popfl | ||
60 | pushfl | ||
61 | popl %eax | ||
62 | cmpl %eax,%ebx | ||
63 | jz verify_cpu_no_longmode # cpu has no cpuid | ||
64 | |||
65 | movl $0x0,%eax # See if cpuid 1 is implemented | ||
66 | cpuid | ||
67 | cmpl $0x1,%eax | ||
68 | jb verify_cpu_no_longmode # no cpuid 1 | ||
69 | |||
70 | xor %di,%di | ||
71 | cmpl $0x68747541,%ebx # AuthenticAMD | ||
72 | jnz verify_cpu_noamd | ||
73 | cmpl $0x69746e65,%edx | ||
74 | jnz verify_cpu_noamd | ||
75 | cmpl $0x444d4163,%ecx | ||
76 | jnz verify_cpu_noamd | ||
77 | mov $1,%di # cpu is from AMD | ||
78 | |||
79 | verify_cpu_noamd: | ||
80 | movl $0x1,%eax # Does the cpu have what it takes | ||
81 | cpuid | ||
82 | andl $REQUIRED_MASK1,%edx | ||
83 | xorl $REQUIRED_MASK1,%edx | ||
84 | jnz verify_cpu_no_longmode | ||
85 | |||
86 | movl $0x80000000,%eax # See if extended cpuid is implemented | ||
87 | cpuid | ||
88 | cmpl $0x80000001,%eax | ||
89 | jb verify_cpu_no_longmode # no extended cpuid | ||
90 | |||
91 | movl $0x80000001,%eax # Does the cpu have what it takes | ||
92 | cpuid | ||
93 | andl $REQUIRED_MASK2,%edx | ||
94 | xorl $REQUIRED_MASK2,%edx | ||
95 | jnz verify_cpu_no_longmode | ||
96 | |||
97 | verify_cpu_sse_test: | ||
98 | movl $1,%eax | ||
99 | cpuid | ||
100 | andl $SSE_MASK,%edx | ||
101 | cmpl $SSE_MASK,%edx | ||
102 | je verify_cpu_sse_ok | ||
103 | test %di,%di | ||
104 | jz verify_cpu_no_longmode # only try to force SSE on AMD | ||
105 | movl $0xc0010015,%ecx # HWCR | ||
106 | rdmsr | ||
107 | btr $15,%eax # enable SSE | ||
108 | wrmsr | ||
109 | xor %di,%di # don't loop | ||
110 | jmp verify_cpu_sse_test # try again | ||
111 | |||
112 | verify_cpu_no_longmode: | ||
113 | popfl # Restore caller passed flags | ||
114 | movl $1,%eax | ||
115 | ret | ||
116 | verify_cpu_sse_ok: | ||
117 | popfl # Restore caller passed flags | ||
118 | xorl %eax, %eax | ||
119 | ret | ||
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 5176ecf006e..88cfa50b424 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -29,9 +29,7 @@ SECTIONS | |||
29 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | 29 | .text : AT(ADDR(.text) - LOAD_OFFSET) { |
30 | /* First the code that has to be first for bootstrapping */ | 30 | /* First the code that has to be first for bootstrapping */ |
31 | *(.bootstrap.text) | 31 | *(.bootstrap.text) |
32 | /* Then all the functions that are "hot" in profiles, to group them | 32 | _stext = .; |
33 | onto the same hugetlb entry */ | ||
34 | #include "functionlist" | ||
35 | /* Then the rest */ | 33 | /* Then the rest */ |
36 | *(.text) | 34 | *(.text) |
37 | SCHED_TEXT | 35 | SCHED_TEXT |
@@ -50,10 +48,10 @@ SECTIONS | |||
50 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } | 48 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { *(__ex_table) } |
51 | __stop___ex_table = .; | 49 | __stop___ex_table = .; |
52 | 50 | ||
53 | RODATA | ||
54 | |||
55 | BUG_TABLE | 51 | BUG_TABLE |
56 | 52 | ||
53 | RODATA | ||
54 | |||
57 | . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ | 55 | . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ |
58 | /* Data */ | 56 | /* Data */ |
59 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | 57 | .data : AT(ADDR(.data) - LOAD_OFFSET) { |
@@ -94,6 +92,12 @@ SECTIONS | |||
94 | { *(.vsyscall_gtod_data) } | 92 | { *(.vsyscall_gtod_data) } |
95 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); | 93 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); |
96 | 94 | ||
95 | |||
96 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) | ||
97 | { *(.vsyscall_1) } | ||
98 | .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) | ||
99 | { *(.vsyscall_2) } | ||
100 | |||
97 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } | 101 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } |
98 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | 102 | vgetcpu_mode = VVIRT(.vgetcpu_mode); |
99 | 103 | ||
@@ -101,10 +105,6 @@ SECTIONS | |||
101 | .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } | 105 | .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } |
102 | jiffies = VVIRT(.jiffies); | 106 | jiffies = VVIRT(.jiffies); |
103 | 107 | ||
104 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) | ||
105 | { *(.vsyscall_1) } | ||
106 | .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) | ||
107 | { *(.vsyscall_2) } | ||
108 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) | 108 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) |
109 | { *(.vsyscall_3) } | 109 | { *(.vsyscall_3) } |
110 | 110 | ||
@@ -194,7 +194,7 @@ SECTIONS | |||
194 | __initramfs_end = .; | 194 | __initramfs_end = .; |
195 | #endif | 195 | #endif |
196 | 196 | ||
197 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 197 | . = ALIGN(4096); |
198 | __per_cpu_start = .; | 198 | __per_cpu_start = .; |
199 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } | 199 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { *(.data.percpu) } |
200 | __per_cpu_end = .; | 200 | __per_cpu_end = .; |
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index b43c698cf7d..dc32cef9619 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -45,14 +45,34 @@ | |||
45 | 45 | ||
46 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 46 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
47 | #define __syscall_clobber "r11","rcx","memory" | 47 | #define __syscall_clobber "r11","rcx","memory" |
48 | #define __pa_vsymbol(x) \ | ||
49 | ({unsigned long v; \ | ||
50 | extern char __vsyscall_0; \ | ||
51 | asm("" : "=r" (v) : "0" (x)); \ | ||
52 | ((v - VSYSCALL_FIRST_PAGE) + __pa_symbol(&__vsyscall_0)); }) | ||
48 | 53 | ||
54 | /* | ||
55 | * vsyscall_gtod_data contains data that is : | ||
56 | * - readonly from vsyscalls | ||
57 | * - writen by timer interrupt or systcl (/proc/sys/kernel/vsyscall64) | ||
58 | * Try to keep this structure as small as possible to avoid cache line ping pongs | ||
59 | */ | ||
49 | struct vsyscall_gtod_data_t { | 60 | struct vsyscall_gtod_data_t { |
50 | seqlock_t lock; | 61 | seqlock_t lock; |
51 | int sysctl_enabled; | 62 | |
52 | struct timeval wall_time_tv; | 63 | /* open coded 'struct timespec' */ |
64 | time_t wall_time_sec; | ||
65 | u32 wall_time_nsec; | ||
66 | |||
67 | int sysctl_enabled; | ||
53 | struct timezone sys_tz; | 68 | struct timezone sys_tz; |
54 | cycle_t offset_base; | 69 | struct { /* extract of a clocksource struct */ |
55 | struct clocksource clock; | 70 | cycle_t (*vread)(void); |
71 | cycle_t cycle_last; | ||
72 | cycle_t mask; | ||
73 | u32 mult; | ||
74 | u32 shift; | ||
75 | } clock; | ||
56 | }; | 76 | }; |
57 | int __vgetcpu_mode __section_vgetcpu_mode; | 77 | int __vgetcpu_mode __section_vgetcpu_mode; |
58 | 78 | ||
@@ -68,9 +88,13 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | |||
68 | 88 | ||
69 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); | 89 | write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags); |
70 | /* copy vsyscall data */ | 90 | /* copy vsyscall data */ |
71 | vsyscall_gtod_data.clock = *clock; | 91 | vsyscall_gtod_data.clock.vread = clock->vread; |
72 | vsyscall_gtod_data.wall_time_tv.tv_sec = wall_time->tv_sec; | 92 | vsyscall_gtod_data.clock.cycle_last = clock->cycle_last; |
73 | vsyscall_gtod_data.wall_time_tv.tv_usec = wall_time->tv_nsec/1000; | 93 | vsyscall_gtod_data.clock.mask = clock->mask; |
94 | vsyscall_gtod_data.clock.mult = clock->mult; | ||
95 | vsyscall_gtod_data.clock.shift = clock->shift; | ||
96 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | ||
97 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | ||
74 | vsyscall_gtod_data.sys_tz = sys_tz; | 98 | vsyscall_gtod_data.sys_tz = sys_tz; |
75 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 99 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
76 | } | 100 | } |
@@ -105,7 +129,8 @@ static __always_inline long time_syscall(long *t) | |||
105 | static __always_inline void do_vgettimeofday(struct timeval * tv) | 129 | static __always_inline void do_vgettimeofday(struct timeval * tv) |
106 | { | 130 | { |
107 | cycle_t now, base, mask, cycle_delta; | 131 | cycle_t now, base, mask, cycle_delta; |
108 | unsigned long seq, mult, shift, nsec_delta; | 132 | unsigned seq; |
133 | unsigned long mult, shift, nsec; | ||
109 | cycle_t (*vread)(void); | 134 | cycle_t (*vread)(void); |
110 | do { | 135 | do { |
111 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); | 136 | seq = read_seqbegin(&__vsyscall_gtod_data.lock); |
@@ -121,21 +146,20 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
121 | mult = __vsyscall_gtod_data.clock.mult; | 146 | mult = __vsyscall_gtod_data.clock.mult; |
122 | shift = __vsyscall_gtod_data.clock.shift; | 147 | shift = __vsyscall_gtod_data.clock.shift; |
123 | 148 | ||
124 | *tv = __vsyscall_gtod_data.wall_time_tv; | 149 | tv->tv_sec = __vsyscall_gtod_data.wall_time_sec; |
125 | 150 | nsec = __vsyscall_gtod_data.wall_time_nsec; | |
126 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); | 151 | } while (read_seqretry(&__vsyscall_gtod_data.lock, seq)); |
127 | 152 | ||
128 | /* calculate interval: */ | 153 | /* calculate interval: */ |
129 | cycle_delta = (now - base) & mask; | 154 | cycle_delta = (now - base) & mask; |
130 | /* convert to nsecs: */ | 155 | /* convert to nsecs: */ |
131 | nsec_delta = (cycle_delta * mult) >> shift; | 156 | nsec += (cycle_delta * mult) >> shift; |
132 | 157 | ||
133 | /* convert to usecs and add to timespec: */ | 158 | while (nsec >= NSEC_PER_SEC) { |
134 | tv->tv_usec += nsec_delta / NSEC_PER_USEC; | ||
135 | while (tv->tv_usec > USEC_PER_SEC) { | ||
136 | tv->tv_sec += 1; | 159 | tv->tv_sec += 1; |
137 | tv->tv_usec -= USEC_PER_SEC; | 160 | nsec -= NSEC_PER_SEC; |
138 | } | 161 | } |
162 | tv->tv_usec = nsec / NSEC_PER_USEC; | ||
139 | } | 163 | } |
140 | 164 | ||
141 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | 165 | int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) |
@@ -151,11 +175,13 @@ int __vsyscall(0) vgettimeofday(struct timeval * tv, struct timezone * tz) | |||
151 | * unlikely */ | 175 | * unlikely */ |
152 | time_t __vsyscall(1) vtime(time_t *t) | 176 | time_t __vsyscall(1) vtime(time_t *t) |
153 | { | 177 | { |
178 | time_t result; | ||
154 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) | 179 | if (unlikely(!__vsyscall_gtod_data.sysctl_enabled)) |
155 | return time_syscall(t); | 180 | return time_syscall(t); |
156 | else if (t) | 181 | result = __vsyscall_gtod_data.wall_time_sec; |
157 | *t = __vsyscall_gtod_data.wall_time_tv.tv_sec; | 182 | if (t) |
158 | return __vsyscall_gtod_data.wall_time_tv.tv_sec; | 183 | *t = result; |
184 | return result; | ||
159 | } | 185 | } |
160 | 186 | ||
161 | /* Fast way to get current CPU and node. | 187 | /* Fast way to get current CPU and node. |
@@ -224,10 +250,10 @@ static int vsyscall_sysctl_change(ctl_table *ctl, int write, struct file * filp, | |||
224 | return ret; | 250 | return ret; |
225 | /* gcc has some trouble with __va(__pa()), so just do it this | 251 | /* gcc has some trouble with __va(__pa()), so just do it this |
226 | way. */ | 252 | way. */ |
227 | map1 = ioremap(__pa_symbol(&vsysc1), 2); | 253 | map1 = ioremap(__pa_vsymbol(&vsysc1), 2); |
228 | if (!map1) | 254 | if (!map1) |
229 | return -ENOMEM; | 255 | return -ENOMEM; |
230 | map2 = ioremap(__pa_symbol(&vsysc2), 2); | 256 | map2 = ioremap(__pa_vsymbol(&vsysc2), 2); |
231 | if (!map2) { | 257 | if (!map2) { |
232 | ret = -ENOMEM; | 258 | ret = -ENOMEM; |
233 | goto out; | 259 | goto out; |
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index 6ada7231f3a..de99dba2c51 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -585,7 +585,7 @@ do_sigbus: | |||
585 | } | 585 | } |
586 | 586 | ||
587 | DEFINE_SPINLOCK(pgd_lock); | 587 | DEFINE_SPINLOCK(pgd_lock); |
588 | struct page *pgd_list; | 588 | LIST_HEAD(pgd_list); |
589 | 589 | ||
590 | void vmalloc_sync_all(void) | 590 | void vmalloc_sync_all(void) |
591 | { | 591 | { |
@@ -605,8 +605,7 @@ void vmalloc_sync_all(void) | |||
605 | if (pgd_none(*pgd_ref)) | 605 | if (pgd_none(*pgd_ref)) |
606 | continue; | 606 | continue; |
607 | spin_lock(&pgd_lock); | 607 | spin_lock(&pgd_lock); |
608 | for (page = pgd_list; page; | 608 | list_for_each_entry(page, &pgd_list, lru) { |
609 | page = (struct page *)page->index) { | ||
610 | pgd_t *pgd; | 609 | pgd_t *pgd; |
611 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | 610 | pgd = (pgd_t *)page_address(page) + pgd_index(address); |
612 | if (pgd_none(*pgd)) | 611 | if (pgd_none(*pgd)) |
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index ec31534eb10..282b0a8f00a 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -22,10 +22,12 @@ | |||
22 | #include <linux/bootmem.h> | 22 | #include <linux/bootmem.h> |
23 | #include <linux/proc_fs.h> | 23 | #include <linux/proc_fs.h> |
24 | #include <linux/pci.h> | 24 | #include <linux/pci.h> |
25 | #include <linux/pfn.h> | ||
25 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
26 | #include <linux/dma-mapping.h> | 27 | #include <linux/dma-mapping.h> |
27 | #include <linux/module.h> | 28 | #include <linux/module.h> |
28 | #include <linux/memory_hotplug.h> | 29 | #include <linux/memory_hotplug.h> |
30 | #include <linux/nmi.h> | ||
29 | 31 | ||
30 | #include <asm/processor.h> | 32 | #include <asm/processor.h> |
31 | #include <asm/system.h> | 33 | #include <asm/system.h> |
@@ -46,7 +48,7 @@ | |||
46 | #define Dprintk(x...) | 48 | #define Dprintk(x...) |
47 | #endif | 49 | #endif |
48 | 50 | ||
49 | struct dma_mapping_ops* dma_ops; | 51 | const struct dma_mapping_ops* dma_ops; |
50 | EXPORT_SYMBOL(dma_ops); | 52 | EXPORT_SYMBOL(dma_ops); |
51 | 53 | ||
52 | static unsigned long dma_reserve __initdata; | 54 | static unsigned long dma_reserve __initdata; |
@@ -72,6 +74,11 @@ void show_mem(void) | |||
72 | 74 | ||
73 | for_each_online_pgdat(pgdat) { | 75 | for_each_online_pgdat(pgdat) { |
74 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | 76 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { |
77 | /* this loop can take a while with 256 GB and 4k pages | ||
78 | so update the NMI watchdog */ | ||
79 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) { | ||
80 | touch_nmi_watchdog(); | ||
81 | } | ||
75 | page = pfn_to_page(pgdat->node_start_pfn + i); | 82 | page = pfn_to_page(pgdat->node_start_pfn + i); |
76 | total++; | 83 | total++; |
77 | if (PageReserved(page)) | 84 | if (PageReserved(page)) |
@@ -167,23 +174,9 @@ __set_fixmap (enum fixed_addresses idx, unsigned long phys, pgprot_t prot) | |||
167 | 174 | ||
168 | unsigned long __initdata table_start, table_end; | 175 | unsigned long __initdata table_start, table_end; |
169 | 176 | ||
170 | extern pmd_t temp_boot_pmds[]; | 177 | static __meminit void *alloc_low_page(unsigned long *phys) |
171 | |||
172 | static struct temp_map { | ||
173 | pmd_t *pmd; | ||
174 | void *address; | ||
175 | int allocated; | ||
176 | } temp_mappings[] __initdata = { | ||
177 | { &temp_boot_pmds[0], (void *)(40UL * 1024 * 1024) }, | ||
178 | { &temp_boot_pmds[1], (void *)(42UL * 1024 * 1024) }, | ||
179 | {} | ||
180 | }; | ||
181 | |||
182 | static __meminit void *alloc_low_page(int *index, unsigned long *phys) | ||
183 | { | 178 | { |
184 | struct temp_map *ti; | 179 | unsigned long pfn = table_end++; |
185 | int i; | ||
186 | unsigned long pfn = table_end++, paddr; | ||
187 | void *adr; | 180 | void *adr; |
188 | 181 | ||
189 | if (after_bootmem) { | 182 | if (after_bootmem) { |
@@ -194,57 +187,63 @@ static __meminit void *alloc_low_page(int *index, unsigned long *phys) | |||
194 | 187 | ||
195 | if (pfn >= end_pfn) | 188 | if (pfn >= end_pfn) |
196 | panic("alloc_low_page: ran out of memory"); | 189 | panic("alloc_low_page: ran out of memory"); |
197 | for (i = 0; temp_mappings[i].allocated; i++) { | 190 | |
198 | if (!temp_mappings[i].pmd) | 191 | adr = early_ioremap(pfn * PAGE_SIZE, PAGE_SIZE); |
199 | panic("alloc_low_page: ran out of temp mappings"); | ||
200 | } | ||
201 | ti = &temp_mappings[i]; | ||
202 | paddr = (pfn << PAGE_SHIFT) & PMD_MASK; | ||
203 | set_pmd(ti->pmd, __pmd(paddr | _KERNPG_TABLE | _PAGE_PSE)); | ||
204 | ti->allocated = 1; | ||
205 | __flush_tlb(); | ||
206 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); | ||
207 | memset(adr, 0, PAGE_SIZE); | 192 | memset(adr, 0, PAGE_SIZE); |
208 | *index = i; | 193 | *phys = pfn * PAGE_SIZE; |
209 | *phys = pfn * PAGE_SIZE; | 194 | return adr; |
210 | return adr; | 195 | } |
211 | } | ||
212 | 196 | ||
213 | static __meminit void unmap_low_page(int i) | 197 | static __meminit void unmap_low_page(void *adr) |
214 | { | 198 | { |
215 | struct temp_map *ti; | ||
216 | 199 | ||
217 | if (after_bootmem) | 200 | if (after_bootmem) |
218 | return; | 201 | return; |
219 | 202 | ||
220 | ti = &temp_mappings[i]; | 203 | early_iounmap(adr, PAGE_SIZE); |
221 | set_pmd(ti->pmd, __pmd(0)); | ||
222 | ti->allocated = 0; | ||
223 | } | 204 | } |
224 | 205 | ||
225 | /* Must run before zap_low_mappings */ | 206 | /* Must run before zap_low_mappings */ |
226 | __init void *early_ioremap(unsigned long addr, unsigned long size) | 207 | __init void *early_ioremap(unsigned long addr, unsigned long size) |
227 | { | 208 | { |
228 | unsigned long map = round_down(addr, LARGE_PAGE_SIZE); | 209 | unsigned long vaddr; |
229 | 210 | pmd_t *pmd, *last_pmd; | |
230 | /* actually usually some more */ | 211 | int i, pmds; |
231 | if (size >= LARGE_PAGE_SIZE) { | 212 | |
232 | return NULL; | 213 | pmds = ((addr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; |
214 | vaddr = __START_KERNEL_map; | ||
215 | pmd = level2_kernel_pgt; | ||
216 | last_pmd = level2_kernel_pgt + PTRS_PER_PMD - 1; | ||
217 | for (; pmd <= last_pmd; pmd++, vaddr += PMD_SIZE) { | ||
218 | for (i = 0; i < pmds; i++) { | ||
219 | if (pmd_present(pmd[i])) | ||
220 | goto next; | ||
221 | } | ||
222 | vaddr += addr & ~PMD_MASK; | ||
223 | addr &= PMD_MASK; | ||
224 | for (i = 0; i < pmds; i++, addr += PMD_SIZE) | ||
225 | set_pmd(pmd + i,__pmd(addr | _KERNPG_TABLE | _PAGE_PSE)); | ||
226 | __flush_tlb(); | ||
227 | return (void *)vaddr; | ||
228 | next: | ||
229 | ; | ||
233 | } | 230 | } |
234 | set_pmd(temp_mappings[0].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | 231 | printk("early_ioremap(0x%lx, %lu) failed\n", addr, size); |
235 | map += LARGE_PAGE_SIZE; | 232 | return NULL; |
236 | set_pmd(temp_mappings[1].pmd, __pmd(map | _KERNPG_TABLE | _PAGE_PSE)); | ||
237 | __flush_tlb(); | ||
238 | return temp_mappings[0].address + (addr & (LARGE_PAGE_SIZE-1)); | ||
239 | } | 233 | } |
240 | 234 | ||
241 | /* To avoid virtual aliases later */ | 235 | /* To avoid virtual aliases later */ |
242 | __init void early_iounmap(void *addr, unsigned long size) | 236 | __init void early_iounmap(void *addr, unsigned long size) |
243 | { | 237 | { |
244 | if ((void *)round_down((unsigned long)addr, LARGE_PAGE_SIZE) != temp_mappings[0].address) | 238 | unsigned long vaddr; |
245 | printk("early_iounmap: bad address %p\n", addr); | 239 | pmd_t *pmd; |
246 | set_pmd(temp_mappings[0].pmd, __pmd(0)); | 240 | int i, pmds; |
247 | set_pmd(temp_mappings[1].pmd, __pmd(0)); | 241 | |
242 | vaddr = (unsigned long)addr; | ||
243 | pmds = ((vaddr & ~PMD_MASK) + size + ~PMD_MASK) / PMD_SIZE; | ||
244 | pmd = level2_kernel_pgt + pmd_index(vaddr); | ||
245 | for (i = 0; i < pmds; i++) | ||
246 | pmd_clear(pmd + i); | ||
248 | __flush_tlb(); | 247 | __flush_tlb(); |
249 | } | 248 | } |
250 | 249 | ||
@@ -289,7 +288,6 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne | |||
289 | 288 | ||
290 | 289 | ||
291 | for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { | 290 | for (; i < PTRS_PER_PUD; i++, addr = (addr & PUD_MASK) + PUD_SIZE ) { |
292 | int map; | ||
293 | unsigned long pmd_phys; | 291 | unsigned long pmd_phys; |
294 | pud_t *pud = pud_page + pud_index(addr); | 292 | pud_t *pud = pud_page + pud_index(addr); |
295 | pmd_t *pmd; | 293 | pmd_t *pmd; |
@@ -307,12 +305,12 @@ static void __meminit phys_pud_init(pud_t *pud_page, unsigned long addr, unsigne | |||
307 | continue; | 305 | continue; |
308 | } | 306 | } |
309 | 307 | ||
310 | pmd = alloc_low_page(&map, &pmd_phys); | 308 | pmd = alloc_low_page(&pmd_phys); |
311 | spin_lock(&init_mm.page_table_lock); | 309 | spin_lock(&init_mm.page_table_lock); |
312 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); | 310 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); |
313 | phys_pmd_init(pmd, addr, end); | 311 | phys_pmd_init(pmd, addr, end); |
314 | spin_unlock(&init_mm.page_table_lock); | 312 | spin_unlock(&init_mm.page_table_lock); |
315 | unmap_low_page(map); | 313 | unmap_low_page(pmd); |
316 | } | 314 | } |
317 | __flush_tlb(); | 315 | __flush_tlb(); |
318 | } | 316 | } |
@@ -364,7 +362,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
364 | end = (unsigned long)__va(end); | 362 | end = (unsigned long)__va(end); |
365 | 363 | ||
366 | for (; start < end; start = next) { | 364 | for (; start < end; start = next) { |
367 | int map; | ||
368 | unsigned long pud_phys; | 365 | unsigned long pud_phys; |
369 | pgd_t *pgd = pgd_offset_k(start); | 366 | pgd_t *pgd = pgd_offset_k(start); |
370 | pud_t *pud; | 367 | pud_t *pud; |
@@ -372,7 +369,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
372 | if (after_bootmem) | 369 | if (after_bootmem) |
373 | pud = pud_offset(pgd, start & PGDIR_MASK); | 370 | pud = pud_offset(pgd, start & PGDIR_MASK); |
374 | else | 371 | else |
375 | pud = alloc_low_page(&map, &pud_phys); | 372 | pud = alloc_low_page(&pud_phys); |
376 | 373 | ||
377 | next = start + PGDIR_SIZE; | 374 | next = start + PGDIR_SIZE; |
378 | if (next > end) | 375 | if (next > end) |
@@ -380,7 +377,7 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
380 | phys_pud_init(pud, __pa(start), __pa(next)); | 377 | phys_pud_init(pud, __pa(start), __pa(next)); |
381 | if (!after_bootmem) | 378 | if (!after_bootmem) |
382 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | 379 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); |
383 | unmap_low_page(map); | 380 | unmap_low_page(pud); |
384 | } | 381 | } |
385 | 382 | ||
386 | if (!after_bootmem) | 383 | if (!after_bootmem) |
@@ -388,21 +385,6 @@ void __meminit init_memory_mapping(unsigned long start, unsigned long end) | |||
388 | __flush_tlb_all(); | 385 | __flush_tlb_all(); |
389 | } | 386 | } |
390 | 387 | ||
391 | void __cpuinit zap_low_mappings(int cpu) | ||
392 | { | ||
393 | if (cpu == 0) { | ||
394 | pgd_t *pgd = pgd_offset_k(0UL); | ||
395 | pgd_clear(pgd); | ||
396 | } else { | ||
397 | /* | ||
398 | * For AP's, zap the low identity mappings by changing the cr3 | ||
399 | * to init_level4_pgt and doing local flush tlb all | ||
400 | */ | ||
401 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
402 | } | ||
403 | __flush_tlb_all(); | ||
404 | } | ||
405 | |||
406 | #ifndef CONFIG_NUMA | 388 | #ifndef CONFIG_NUMA |
407 | void __init paging_init(void) | 389 | void __init paging_init(void) |
408 | { | 390 | { |
@@ -579,15 +561,6 @@ void __init mem_init(void) | |||
579 | reservedpages << (PAGE_SHIFT-10), | 561 | reservedpages << (PAGE_SHIFT-10), |
580 | datasize >> 10, | 562 | datasize >> 10, |
581 | initsize >> 10); | 563 | initsize >> 10); |
582 | |||
583 | #ifdef CONFIG_SMP | ||
584 | /* | ||
585 | * Sync boot_level4_pgt mappings with the init_level4_pgt | ||
586 | * except for the low identity mappings which are already zapped | ||
587 | * in init_level4_pgt. This sync-up is essential for AP's bringup | ||
588 | */ | ||
589 | memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); | ||
590 | #endif | ||
591 | } | 564 | } |
592 | 565 | ||
593 | void free_init_pages(char *what, unsigned long begin, unsigned long end) | 566 | void free_init_pages(char *what, unsigned long begin, unsigned long end) |
@@ -597,37 +570,44 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end) | |||
597 | if (begin >= end) | 570 | if (begin >= end) |
598 | return; | 571 | return; |
599 | 572 | ||
600 | printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10); | 573 | printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10); |
601 | for (addr = begin; addr < end; addr += PAGE_SIZE) { | 574 | for (addr = begin; addr < end; addr += PAGE_SIZE) { |
602 | ClearPageReserved(virt_to_page(addr)); | 575 | struct page *page = pfn_to_page(addr >> PAGE_SHIFT); |
603 | init_page_count(virt_to_page(addr)); | 576 | ClearPageReserved(page); |
604 | memset((void *)(addr & ~(PAGE_SIZE-1)), | 577 | init_page_count(page); |
605 | POISON_FREE_INITMEM, PAGE_SIZE); | 578 | memset(page_address(page), POISON_FREE_INITMEM, PAGE_SIZE); |
606 | free_page(addr); | 579 | if (addr >= __START_KERNEL_map) |
580 | change_page_attr_addr(addr, 1, __pgprot(0)); | ||
581 | __free_page(page); | ||
607 | totalram_pages++; | 582 | totalram_pages++; |
608 | } | 583 | } |
584 | if (addr > __START_KERNEL_map) | ||
585 | global_flush_tlb(); | ||
609 | } | 586 | } |
610 | 587 | ||
611 | void free_initmem(void) | 588 | void free_initmem(void) |
612 | { | 589 | { |
613 | memset(__initdata_begin, POISON_FREE_INITDATA, | ||
614 | __initdata_end - __initdata_begin); | ||
615 | free_init_pages("unused kernel memory", | 590 | free_init_pages("unused kernel memory", |
616 | (unsigned long)(&__init_begin), | 591 | __pa_symbol(&__init_begin), |
617 | (unsigned long)(&__init_end)); | 592 | __pa_symbol(&__init_end)); |
618 | } | 593 | } |
619 | 594 | ||
620 | #ifdef CONFIG_DEBUG_RODATA | 595 | #ifdef CONFIG_DEBUG_RODATA |
621 | 596 | ||
622 | void mark_rodata_ro(void) | 597 | void mark_rodata_ro(void) |
623 | { | 598 | { |
624 | unsigned long addr = (unsigned long)__start_rodata; | 599 | unsigned long start = PFN_ALIGN(__va(__pa_symbol(&_stext))), size; |
625 | 600 | ||
626 | for (; addr < (unsigned long)__end_rodata; addr += PAGE_SIZE) | 601 | #ifdef CONFIG_HOTPLUG_CPU |
627 | change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); | 602 | /* It must still be possible to apply SMP alternatives. */ |
603 | if (num_possible_cpus() > 1) | ||
604 | start = PFN_ALIGN(__va(__pa_symbol(&_etext))); | ||
605 | #endif | ||
606 | size = (unsigned long)__va(__pa_symbol(&__end_rodata)) - start; | ||
607 | change_page_attr_addr(start, size >> PAGE_SHIFT, PAGE_KERNEL_RO); | ||
628 | 608 | ||
629 | printk ("Write protecting the kernel read-only data: %luk\n", | 609 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
630 | (__end_rodata - __start_rodata) >> 10); | 610 | size >> 10); |
631 | 611 | ||
632 | /* | 612 | /* |
633 | * change_page_attr_addr() requires a global_flush_tlb() call after it. | 613 | * change_page_attr_addr() requires a global_flush_tlb() call after it. |
@@ -642,7 +622,7 @@ void mark_rodata_ro(void) | |||
642 | #ifdef CONFIG_BLK_DEV_INITRD | 622 | #ifdef CONFIG_BLK_DEV_INITRD |
643 | void free_initrd_mem(unsigned long start, unsigned long end) | 623 | void free_initrd_mem(unsigned long start, unsigned long end) |
644 | { | 624 | { |
645 | free_init_pages("initrd memory", start, end); | 625 | free_init_pages("initrd memory", __pa(start), __pa(end)); |
646 | } | 626 | } |
647 | #endif | 627 | #endif |
648 | 628 | ||
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index b5b8dba28b4..f983c75825d 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c | |||
@@ -49,11 +49,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
49 | int found = 0; | 49 | int found = 0; |
50 | u32 reg; | 50 | u32 reg; |
51 | unsigned numnodes; | 51 | unsigned numnodes; |
52 | nodemask_t nodes_parsed; | ||
53 | unsigned dualcore = 0; | 52 | unsigned dualcore = 0; |
54 | 53 | ||
55 | nodes_clear(nodes_parsed); | ||
56 | |||
57 | if (!early_pci_allowed()) | 54 | if (!early_pci_allowed()) |
58 | return -1; | 55 | return -1; |
59 | 56 | ||
@@ -65,6 +62,8 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
65 | 62 | ||
66 | reg = read_pci_config(0, nb, 0, 0x60); | 63 | reg = read_pci_config(0, nb, 0, 0x60); |
67 | numnodes = ((reg >> 4) & 0xF) + 1; | 64 | numnodes = ((reg >> 4) & 0xF) + 1; |
65 | if (numnodes <= 1) | ||
66 | return -1; | ||
68 | 67 | ||
69 | printk(KERN_INFO "Number of nodes %d\n", numnodes); | 68 | printk(KERN_INFO "Number of nodes %d\n", numnodes); |
70 | 69 | ||
@@ -102,7 +101,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
102 | nodeid, (base>>8)&3, (limit>>8) & 3); | 101 | nodeid, (base>>8)&3, (limit>>8) & 3); |
103 | return -1; | 102 | return -1; |
104 | } | 103 | } |
105 | if (node_isset(nodeid, nodes_parsed)) { | 104 | if (node_isset(nodeid, node_possible_map)) { |
106 | printk(KERN_INFO "Node %d already present. Skipping\n", | 105 | printk(KERN_INFO "Node %d already present. Skipping\n", |
107 | nodeid); | 106 | nodeid); |
108 | continue; | 107 | continue; |
@@ -155,7 +154,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
155 | 154 | ||
156 | prevbase = base; | 155 | prevbase = base; |
157 | 156 | ||
158 | node_set(nodeid, nodes_parsed); | 157 | node_set(nodeid, node_possible_map); |
159 | } | 158 | } |
160 | 159 | ||
161 | if (!found) | 160 | if (!found) |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 41b8fb06992..51548947ad3 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -273,125 +273,213 @@ void __init numa_init_array(void) | |||
273 | 273 | ||
274 | #ifdef CONFIG_NUMA_EMU | 274 | #ifdef CONFIG_NUMA_EMU |
275 | /* Numa emulation */ | 275 | /* Numa emulation */ |
276 | int numa_fake __initdata = 0; | 276 | #define E820_ADDR_HOLE_SIZE(start, end) \ |
277 | (e820_hole_size((start) >> PAGE_SHIFT, (end) >> PAGE_SHIFT) << \ | ||
278 | PAGE_SHIFT) | ||
279 | char *cmdline __initdata; | ||
277 | 280 | ||
278 | /* | 281 | /* |
279 | * This function is used to find out if the start and end correspond to | 282 | * Setups up nid to range from addr to addr + size. If the end boundary is |
280 | * different zones. | 283 | * greater than max_addr, then max_addr is used instead. The return value is 0 |
284 | * if there is additional memory left for allocation past addr and -1 otherwise. | ||
285 | * addr is adjusted to be at the end of the node. | ||
281 | */ | 286 | */ |
282 | int zone_cross_over(unsigned long start, unsigned long end) | 287 | static int __init setup_node_range(int nid, struct bootnode *nodes, u64 *addr, |
288 | u64 size, u64 max_addr) | ||
283 | { | 289 | { |
284 | if ((start < (MAX_DMA32_PFN << PAGE_SHIFT)) && | 290 | int ret = 0; |
285 | (end >= (MAX_DMA32_PFN << PAGE_SHIFT))) | 291 | nodes[nid].start = *addr; |
286 | return 1; | 292 | *addr += size; |
287 | return 0; | 293 | if (*addr >= max_addr) { |
294 | *addr = max_addr; | ||
295 | ret = -1; | ||
296 | } | ||
297 | nodes[nid].end = *addr; | ||
298 | node_set(nid, node_possible_map); | ||
299 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", nid, | ||
300 | nodes[nid].start, nodes[nid].end, | ||
301 | (nodes[nid].end - nodes[nid].start) >> 20); | ||
302 | return ret; | ||
288 | } | 303 | } |
289 | 304 | ||
290 | static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | 305 | /* |
306 | * Splits num_nodes nodes up equally starting at node_start. The return value | ||
307 | * is the number of nodes split up and addr is adjusted to be at the end of the | ||
308 | * last node allocated. | ||
309 | */ | ||
310 | static int __init split_nodes_equally(struct bootnode *nodes, u64 *addr, | ||
311 | u64 max_addr, int node_start, | ||
312 | int num_nodes) | ||
291 | { | 313 | { |
292 | int i, big; | 314 | unsigned int big; |
293 | struct bootnode nodes[MAX_NUMNODES]; | 315 | u64 size; |
294 | unsigned long sz, old_sz; | 316 | int i; |
295 | unsigned long hole_size; | ||
296 | unsigned long start, end; | ||
297 | unsigned long max_addr = (end_pfn << PAGE_SHIFT); | ||
298 | |||
299 | start = (start_pfn << PAGE_SHIFT); | ||
300 | hole_size = e820_hole_size(start, max_addr); | ||
301 | sz = (max_addr - start - hole_size) / numa_fake; | ||
302 | |||
303 | /* Kludge needed for the hash function */ | ||
304 | |||
305 | old_sz = sz; | ||
306 | /* | ||
307 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
308 | */ | ||
309 | sz &= FAKE_NODE_MIN_HASH_MASK; | ||
310 | 317 | ||
318 | if (num_nodes <= 0) | ||
319 | return -1; | ||
320 | if (num_nodes > MAX_NUMNODES) | ||
321 | num_nodes = MAX_NUMNODES; | ||
322 | size = (max_addr - *addr - E820_ADDR_HOLE_SIZE(*addr, max_addr)) / | ||
323 | num_nodes; | ||
311 | /* | 324 | /* |
312 | * We ensure that each node is at least 64MB big. Smaller than this | 325 | * Calculate the number of big nodes that can be allocated as a result |
313 | * size can cause VM hiccups. | 326 | * of consolidating the leftovers. |
314 | */ | 327 | */ |
315 | if (sz == 0) { | 328 | big = ((size & ~FAKE_NODE_MIN_HASH_MASK) * num_nodes) / |
316 | printk(KERN_INFO "Not enough memory for %d nodes. Reducing " | 329 | FAKE_NODE_MIN_SIZE; |
317 | "the number of nodes\n", numa_fake); | 330 | |
318 | numa_fake = (max_addr - start - hole_size) / FAKE_NODE_MIN_SIZE; | 331 | /* Round down to nearest FAKE_NODE_MIN_SIZE. */ |
319 | printk(KERN_INFO "Number of fake nodes will be = %d\n", | 332 | size &= FAKE_NODE_MIN_HASH_MASK; |
320 | numa_fake); | 333 | if (!size) { |
321 | sz = FAKE_NODE_MIN_SIZE; | 334 | printk(KERN_ERR "Not enough memory for each node. " |
335 | "NUMA emulation disabled.\n"); | ||
336 | return -1; | ||
322 | } | 337 | } |
323 | /* | 338 | |
324 | * Find out how many nodes can get an extra NODE_MIN_SIZE granule. | 339 | for (i = node_start; i < num_nodes + node_start; i++) { |
325 | * This logic ensures the extra memory gets distributed among as many | 340 | u64 end = *addr + size; |
326 | * nodes as possible (as compared to one single node getting all that | ||
327 | * extra memory. | ||
328 | */ | ||
329 | big = ((old_sz - sz) * numa_fake) / FAKE_NODE_MIN_SIZE; | ||
330 | printk(KERN_INFO "Fake node Size: %luMB hole_size: %luMB big nodes: " | ||
331 | "%d\n", | ||
332 | (sz >> 20), (hole_size >> 20), big); | ||
333 | memset(&nodes,0,sizeof(nodes)); | ||
334 | end = start; | ||
335 | for (i = 0; i < numa_fake; i++) { | ||
336 | /* | ||
337 | * In case we are not able to allocate enough memory for all | ||
338 | * the nodes, we reduce the number of fake nodes. | ||
339 | */ | ||
340 | if (end >= max_addr) { | ||
341 | numa_fake = i - 1; | ||
342 | break; | ||
343 | } | ||
344 | start = nodes[i].start = end; | ||
345 | /* | ||
346 | * Final node can have all the remaining memory. | ||
347 | */ | ||
348 | if (i == numa_fake-1) | ||
349 | sz = max_addr - start; | ||
350 | end = nodes[i].start + sz; | ||
351 | /* | ||
352 | * Fir "big" number of nodes get extra granule. | ||
353 | */ | ||
354 | if (i < big) | 341 | if (i < big) |
355 | end += FAKE_NODE_MIN_SIZE; | 342 | end += FAKE_NODE_MIN_SIZE; |
356 | /* | 343 | /* |
357 | * Iterate over the range to ensure that this node gets at | 344 | * The final node can have the remaining system RAM. Other |
358 | * least sz amount of RAM (excluding holes) | 345 | * nodes receive roughly the same amount of available pages. |
359 | */ | 346 | */ |
360 | while ((end - start - e820_hole_size(start, end)) < sz) { | 347 | if (i == num_nodes + node_start - 1) |
361 | end += FAKE_NODE_MIN_SIZE; | 348 | end = max_addr; |
362 | if (end >= max_addr) | 349 | else |
363 | break; | 350 | while (end - *addr - E820_ADDR_HOLE_SIZE(*addr, end) < |
351 | size) { | ||
352 | end += FAKE_NODE_MIN_SIZE; | ||
353 | if (end > max_addr) { | ||
354 | end = max_addr; | ||
355 | break; | ||
356 | } | ||
357 | } | ||
358 | if (setup_node_range(i, nodes, addr, end - *addr, max_addr) < 0) | ||
359 | break; | ||
360 | } | ||
361 | return i - node_start + 1; | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * Splits the remaining system RAM into chunks of size. The remaining memory is | ||
366 | * always assigned to a final node and can be asymmetric. Returns the number of | ||
367 | * nodes split. | ||
368 | */ | ||
369 | static int __init split_nodes_by_size(struct bootnode *nodes, u64 *addr, | ||
370 | u64 max_addr, int node_start, u64 size) | ||
371 | { | ||
372 | int i = node_start; | ||
373 | size = (size << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
374 | while (!setup_node_range(i++, nodes, addr, size, max_addr)) | ||
375 | ; | ||
376 | return i - node_start; | ||
377 | } | ||
378 | |||
379 | /* | ||
380 | * Sets up the system RAM area from start_pfn to end_pfn according to the | ||
381 | * numa=fake command-line option. | ||
382 | */ | ||
383 | static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | ||
384 | { | ||
385 | struct bootnode nodes[MAX_NUMNODES]; | ||
386 | u64 addr = start_pfn << PAGE_SHIFT; | ||
387 | u64 max_addr = end_pfn << PAGE_SHIFT; | ||
388 | int num_nodes = 0; | ||
389 | int coeff_flag; | ||
390 | int coeff = -1; | ||
391 | int num = 0; | ||
392 | u64 size; | ||
393 | int i; | ||
394 | |||
395 | memset(&nodes, 0, sizeof(nodes)); | ||
396 | /* | ||
397 | * If the numa=fake command-line is just a single number N, split the | ||
398 | * system RAM into N fake nodes. | ||
399 | */ | ||
400 | if (!strchr(cmdline, '*') && !strchr(cmdline, ',')) { | ||
401 | num_nodes = split_nodes_equally(nodes, &addr, max_addr, 0, | ||
402 | simple_strtol(cmdline, NULL, 0)); | ||
403 | if (num_nodes < 0) | ||
404 | return num_nodes; | ||
405 | goto out; | ||
406 | } | ||
407 | |||
408 | /* Parse the command line. */ | ||
409 | for (coeff_flag = 0; ; cmdline++) { | ||
410 | if (*cmdline && isdigit(*cmdline)) { | ||
411 | num = num * 10 + *cmdline - '0'; | ||
412 | continue; | ||
364 | } | 413 | } |
365 | /* | 414 | if (*cmdline == '*') { |
366 | * Look at the next node to make sure there is some real memory | 415 | if (num > 0) |
367 | * to map. Bad things happen when the only memory present | 416 | coeff = num; |
368 | * in a zone on a fake node is IO hole. | 417 | coeff_flag = 1; |
369 | */ | 418 | } |
370 | while (e820_hole_size(end, end + FAKE_NODE_MIN_SIZE) > 0) { | 419 | if (!*cmdline || *cmdline == ',') { |
371 | if (zone_cross_over(start, end + sz)) { | 420 | if (!coeff_flag) |
372 | end = (MAX_DMA32_PFN << PAGE_SHIFT); | 421 | coeff = 1; |
422 | /* | ||
423 | * Round down to the nearest FAKE_NODE_MIN_SIZE. | ||
424 | * Command-line coefficients are in megabytes. | ||
425 | */ | ||
426 | size = ((u64)num << 20) & FAKE_NODE_MIN_HASH_MASK; | ||
427 | if (size) | ||
428 | for (i = 0; i < coeff; i++, num_nodes++) | ||
429 | if (setup_node_range(num_nodes, nodes, | ||
430 | &addr, size, max_addr) < 0) | ||
431 | goto done; | ||
432 | if (!*cmdline) | ||
373 | break; | 433 | break; |
374 | } | 434 | coeff_flag = 0; |
375 | if (end >= max_addr) | 435 | coeff = -1; |
436 | } | ||
437 | num = 0; | ||
438 | } | ||
439 | done: | ||
440 | if (!num_nodes) | ||
441 | return -1; | ||
442 | /* Fill remainder of system RAM, if appropriate. */ | ||
443 | if (addr < max_addr) { | ||
444 | if (coeff_flag && coeff < 0) { | ||
445 | /* Split remaining nodes into num-sized chunks */ | ||
446 | num_nodes += split_nodes_by_size(nodes, &addr, max_addr, | ||
447 | num_nodes, num); | ||
448 | goto out; | ||
449 | } | ||
450 | switch (*(cmdline - 1)) { | ||
451 | case '*': | ||
452 | /* Split remaining nodes into coeff chunks */ | ||
453 | if (coeff <= 0) | ||
376 | break; | 454 | break; |
377 | end += FAKE_NODE_MIN_SIZE; | 455 | num_nodes += split_nodes_equally(nodes, &addr, max_addr, |
456 | num_nodes, coeff); | ||
457 | break; | ||
458 | case ',': | ||
459 | /* Do not allocate remaining system RAM */ | ||
460 | break; | ||
461 | default: | ||
462 | /* Give one final node */ | ||
463 | setup_node_range(num_nodes, nodes, &addr, | ||
464 | max_addr - addr, max_addr); | ||
465 | num_nodes++; | ||
378 | } | 466 | } |
379 | if (end > max_addr) | 467 | } |
380 | end = max_addr; | 468 | out: |
381 | nodes[i].end = end; | 469 | memnode_shift = compute_hash_shift(nodes, num_nodes); |
382 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", | 470 | if (memnode_shift < 0) { |
383 | i, | 471 | memnode_shift = 0; |
384 | nodes[i].start, nodes[i].end, | 472 | printk(KERN_ERR "No NUMA hash function found. NUMA emulation " |
385 | (nodes[i].end - nodes[i].start) >> 20); | 473 | "disabled.\n"); |
386 | node_set_online(i); | 474 | return -1; |
387 | } | 475 | } |
388 | memnode_shift = compute_hash_shift(nodes, numa_fake); | 476 | |
389 | if (memnode_shift < 0) { | 477 | /* |
390 | memnode_shift = 0; | 478 | * We need to vacate all active ranges that may have been registered by |
391 | printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n"); | 479 | * SRAT. |
392 | return -1; | 480 | */ |
393 | } | 481 | remove_all_active_ranges(); |
394 | for_each_online_node(i) { | 482 | for_each_node_mask(i, node_possible_map) { |
395 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, | 483 | e820_register_active_regions(i, nodes[i].start >> PAGE_SHIFT, |
396 | nodes[i].end >> PAGE_SHIFT); | 484 | nodes[i].end >> PAGE_SHIFT); |
397 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 485 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
@@ -399,26 +487,32 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
399 | numa_init_array(); | 487 | numa_init_array(); |
400 | return 0; | 488 | return 0; |
401 | } | 489 | } |
402 | #endif | 490 | #undef E820_ADDR_HOLE_SIZE |
491 | #endif /* CONFIG_NUMA_EMU */ | ||
403 | 492 | ||
404 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | 493 | void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) |
405 | { | 494 | { |
406 | int i; | 495 | int i; |
407 | 496 | ||
497 | nodes_clear(node_possible_map); | ||
498 | |||
408 | #ifdef CONFIG_NUMA_EMU | 499 | #ifdef CONFIG_NUMA_EMU |
409 | if (numa_fake && !numa_emulation(start_pfn, end_pfn)) | 500 | if (cmdline && !numa_emulation(start_pfn, end_pfn)) |
410 | return; | 501 | return; |
502 | nodes_clear(node_possible_map); | ||
411 | #endif | 503 | #endif |
412 | 504 | ||
413 | #ifdef CONFIG_ACPI_NUMA | 505 | #ifdef CONFIG_ACPI_NUMA |
414 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, | 506 | if (!numa_off && !acpi_scan_nodes(start_pfn << PAGE_SHIFT, |
415 | end_pfn << PAGE_SHIFT)) | 507 | end_pfn << PAGE_SHIFT)) |
416 | return; | 508 | return; |
509 | nodes_clear(node_possible_map); | ||
417 | #endif | 510 | #endif |
418 | 511 | ||
419 | #ifdef CONFIG_K8_NUMA | 512 | #ifdef CONFIG_K8_NUMA |
420 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) | 513 | if (!numa_off && !k8_scan_nodes(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT)) |
421 | return; | 514 | return; |
515 | nodes_clear(node_possible_map); | ||
422 | #endif | 516 | #endif |
423 | printk(KERN_INFO "%s\n", | 517 | printk(KERN_INFO "%s\n", |
424 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); | 518 | numa_off ? "NUMA turned off" : "No NUMA configuration found"); |
@@ -432,6 +526,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
432 | memnodemap[0] = 0; | 526 | memnodemap[0] = 0; |
433 | nodes_clear(node_online_map); | 527 | nodes_clear(node_online_map); |
434 | node_set_online(0); | 528 | node_set_online(0); |
529 | node_set(0, node_possible_map); | ||
435 | for (i = 0; i < NR_CPUS; i++) | 530 | for (i = 0; i < NR_CPUS; i++) |
436 | numa_set_node(i, 0); | 531 | numa_set_node(i, 0); |
437 | node_to_cpumask[0] = cpumask_of_cpu(0); | 532 | node_to_cpumask[0] = cpumask_of_cpu(0); |
@@ -486,11 +581,8 @@ static __init int numa_setup(char *opt) | |||
486 | if (!strncmp(opt,"off",3)) | 581 | if (!strncmp(opt,"off",3)) |
487 | numa_off = 1; | 582 | numa_off = 1; |
488 | #ifdef CONFIG_NUMA_EMU | 583 | #ifdef CONFIG_NUMA_EMU |
489 | if(!strncmp(opt, "fake=", 5)) { | 584 | if (!strncmp(opt, "fake=", 5)) |
490 | numa_fake = simple_strtoul(opt+5,NULL,0); ; | 585 | cmdline = opt + 5; |
491 | if (numa_fake >= MAX_NUMNODES) | ||
492 | numa_fake = MAX_NUMNODES; | ||
493 | } | ||
494 | #endif | 586 | #endif |
495 | #ifdef CONFIG_ACPI_NUMA | 587 | #ifdef CONFIG_ACPI_NUMA |
496 | if (!strncmp(opt,"noacpi",6)) | 588 | if (!strncmp(opt,"noacpi",6)) |
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 081409aa345..bf4aa8dd425 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
@@ -51,7 +51,6 @@ static struct page *split_large_page(unsigned long address, pgprot_t prot, | |||
51 | SetPagePrivate(base); | 51 | SetPagePrivate(base); |
52 | page_private(base) = 0; | 52 | page_private(base) = 0; |
53 | 53 | ||
54 | address = __pa(address); | ||
55 | addr = address & LARGE_PAGE_MASK; | 54 | addr = address & LARGE_PAGE_MASK; |
56 | pbase = (pte_t *)page_address(base); | 55 | pbase = (pte_t *)page_address(base); |
57 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { | 56 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { |
@@ -101,13 +100,12 @@ static inline void save_page(struct page *fpage) | |||
101 | * No more special protections in this 2/4MB area - revert to a | 100 | * No more special protections in this 2/4MB area - revert to a |
102 | * large page again. | 101 | * large page again. |
103 | */ | 102 | */ |
104 | static void revert_page(unsigned long address, pgprot_t ref_prot) | 103 | static void revert_page(unsigned long address, unsigned long pfn, pgprot_t ref_prot) |
105 | { | 104 | { |
106 | pgd_t *pgd; | 105 | pgd_t *pgd; |
107 | pud_t *pud; | 106 | pud_t *pud; |
108 | pmd_t *pmd; | 107 | pmd_t *pmd; |
109 | pte_t large_pte; | 108 | pte_t large_pte; |
110 | unsigned long pfn; | ||
111 | 109 | ||
112 | pgd = pgd_offset_k(address); | 110 | pgd = pgd_offset_k(address); |
113 | BUG_ON(pgd_none(*pgd)); | 111 | BUG_ON(pgd_none(*pgd)); |
@@ -115,7 +113,6 @@ static void revert_page(unsigned long address, pgprot_t ref_prot) | |||
115 | BUG_ON(pud_none(*pud)); | 113 | BUG_ON(pud_none(*pud)); |
116 | pmd = pmd_offset(pud, address); | 114 | pmd = pmd_offset(pud, address); |
117 | BUG_ON(pmd_val(*pmd) & _PAGE_PSE); | 115 | BUG_ON(pmd_val(*pmd) & _PAGE_PSE); |
118 | pfn = (__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT; | ||
119 | large_pte = pfn_pte(pfn, ref_prot); | 116 | large_pte = pfn_pte(pfn, ref_prot); |
120 | large_pte = pte_mkhuge(large_pte); | 117 | large_pte = pte_mkhuge(large_pte); |
121 | set_pte((pte_t *)pmd, large_pte); | 118 | set_pte((pte_t *)pmd, large_pte); |
@@ -141,7 +138,8 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
141 | */ | 138 | */ |
142 | struct page *split; | 139 | struct page *split; |
143 | ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); | 140 | ref_prot2 = pte_pgprot(pte_clrhuge(*kpte)); |
144 | split = split_large_page(address, prot, ref_prot2); | 141 | split = split_large_page(pfn << PAGE_SHIFT, prot, |
142 | ref_prot2); | ||
145 | if (!split) | 143 | if (!split) |
146 | return -ENOMEM; | 144 | return -ENOMEM; |
147 | set_pte(kpte, mk_pte(split, ref_prot2)); | 145 | set_pte(kpte, mk_pte(split, ref_prot2)); |
@@ -160,7 +158,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
160 | 158 | ||
161 | if (page_private(kpte_page) == 0) { | 159 | if (page_private(kpte_page) == 0) { |
162 | save_page(kpte_page); | 160 | save_page(kpte_page); |
163 | revert_page(address, ref_prot); | 161 | revert_page(address, pfn, ref_prot); |
164 | } | 162 | } |
165 | return 0; | 163 | return 0; |
166 | } | 164 | } |
@@ -180,22 +178,32 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
180 | */ | 178 | */ |
181 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) | 179 | int change_page_attr_addr(unsigned long address, int numpages, pgprot_t prot) |
182 | { | 180 | { |
183 | int err = 0; | 181 | unsigned long phys_base_pfn = __pa_symbol(__START_KERNEL_map) >> PAGE_SHIFT; |
182 | int err = 0, kernel_map = 0; | ||
184 | int i; | 183 | int i; |
185 | 184 | ||
185 | if (address >= __START_KERNEL_map | ||
186 | && address < __START_KERNEL_map + KERNEL_TEXT_SIZE) { | ||
187 | address = (unsigned long)__va(__pa(address)); | ||
188 | kernel_map = 1; | ||
189 | } | ||
190 | |||
186 | down_write(&init_mm.mmap_sem); | 191 | down_write(&init_mm.mmap_sem); |
187 | for (i = 0; i < numpages; i++, address += PAGE_SIZE) { | 192 | for (i = 0; i < numpages; i++, address += PAGE_SIZE) { |
188 | unsigned long pfn = __pa(address) >> PAGE_SHIFT; | 193 | unsigned long pfn = __pa(address) >> PAGE_SHIFT; |
189 | 194 | ||
190 | err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); | 195 | if (!kernel_map || pte_present(pfn_pte(0, prot))) { |
191 | if (err) | 196 | err = __change_page_attr(address, pfn, prot, PAGE_KERNEL); |
192 | break; | 197 | if (err) |
198 | break; | ||
199 | } | ||
193 | /* Handle kernel mapping too which aliases part of the | 200 | /* Handle kernel mapping too which aliases part of the |
194 | * lowmem */ | 201 | * lowmem */ |
195 | if (__pa(address) < KERNEL_TEXT_SIZE) { | 202 | if ((pfn >= phys_base_pfn) && |
203 | ((pfn - phys_base_pfn) < (KERNEL_TEXT_SIZE >> PAGE_SHIFT))) { | ||
196 | unsigned long addr2; | 204 | unsigned long addr2; |
197 | pgprot_t prot2; | 205 | pgprot_t prot2; |
198 | addr2 = __START_KERNEL_map + __pa(address); | 206 | addr2 = __START_KERNEL_map + ((pfn - phys_base_pfn) << PAGE_SHIFT); |
199 | /* Make sure the kernel mappings stay executable */ | 207 | /* Make sure the kernel mappings stay executable */ |
200 | prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); | 208 | prot2 = pte_pgprot(pte_mkexec(pfn_pte(0, prot))); |
201 | err = __change_page_attr(addr2, pfn, prot2, | 209 | err = __change_page_attr(addr2, pfn, prot2, |
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 2efe215fc76..1e76bb0a727 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c | |||
@@ -419,19 +419,21 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
419 | return -1; | 419 | return -1; |
420 | } | 420 | } |
421 | 421 | ||
422 | node_possible_map = nodes_parsed; | ||
423 | |||
422 | /* Finally register nodes */ | 424 | /* Finally register nodes */ |
423 | for_each_node_mask(i, nodes_parsed) | 425 | for_each_node_mask(i, node_possible_map) |
424 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 426 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
425 | /* Try again in case setup_node_bootmem missed one due | 427 | /* Try again in case setup_node_bootmem missed one due |
426 | to missing bootmem */ | 428 | to missing bootmem */ |
427 | for_each_node_mask(i, nodes_parsed) | 429 | for_each_node_mask(i, node_possible_map) |
428 | if (!node_online(i)) | 430 | if (!node_online(i)) |
429 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 431 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
430 | 432 | ||
431 | for (i = 0; i < NR_CPUS; i++) { | 433 | for (i = 0; i < NR_CPUS; i++) { |
432 | if (cpu_to_node[i] == NUMA_NO_NODE) | 434 | if (cpu_to_node[i] == NUMA_NO_NODE) |
433 | continue; | 435 | continue; |
434 | if (!node_isset(cpu_to_node[i], nodes_parsed)) | 436 | if (!node_isset(cpu_to_node[i], node_possible_map)) |
435 | numa_set_node(i, NUMA_NO_NODE); | 437 | numa_set_node(i, NUMA_NO_NODE); |
436 | } | 438 | } |
437 | numa_init_array(); | 439 | numa_init_array(); |
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S index ab6370054ce..4fbd66a52a8 100644 --- a/arch/xtensa/kernel/vmlinux.lds.S +++ b/arch/xtensa/kernel/vmlinux.lds.S | |||
@@ -198,7 +198,7 @@ SECTIONS | |||
198 | __ftr_fixup : { *(__ftr_fixup) } | 198 | __ftr_fixup : { *(__ftr_fixup) } |
199 | __stop___ftr_fixup = .; | 199 | __stop___ftr_fixup = .; |
200 | 200 | ||
201 | . = ALIGN(32); | 201 | . = ALIGN(4096); |
202 | __per_cpu_start = .; | 202 | __per_cpu_start = .; |
203 | .data.percpu : { *(.data.percpu) } | 203 | .data.percpu : { *(.data.percpu) } |
204 | __per_cpu_end = .; | 204 | __per_cpu_end = .; |