diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-07-18 13:31:12 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-07-18 13:31:12 -0400 |
commit | 3e370b29d35fb01bfb92c2814d6f79bf6a2cb970 (patch) | |
tree | 3b8fb467d60bfe6a34686f4abdc3a60050ba40a4 /arch/x86 | |
parent | 88d1dce3a74367291f65a757fbdcaf17f042f30c (diff) | |
parent | 5b664cb235e97afbf34db9c4d77f08ebd725335e (diff) |
Merge branch 'linus' into x86/pci-ioapic-boot-irq-quirks
Conflicts:
drivers/pci/quirks.c
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
136 files changed, 5474 insertions, 3685 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index bb0c0d0f6db7..96e0c2ebc388 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -23,6 +23,8 @@ config X86 | |||
23 | select HAVE_OPROFILE | 23 | select HAVE_OPROFILE |
24 | select HAVE_KPROBES | 24 | select HAVE_KPROBES |
25 | select HAVE_KRETPROBES | 25 | select HAVE_KRETPROBES |
26 | select HAVE_DYNAMIC_FTRACE | ||
27 | select HAVE_FTRACE | ||
26 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | 28 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) |
27 | select HAVE_ARCH_KGDB if !X86_VOYAGER | 29 | select HAVE_ARCH_KGDB if !X86_VOYAGER |
28 | 30 | ||
@@ -168,6 +170,7 @@ config GENERIC_PENDING_IRQ | |||
168 | config X86_SMP | 170 | config X86_SMP |
169 | bool | 171 | bool |
170 | depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) | 172 | depends on SMP && ((X86_32 && !X86_VOYAGER) || X86_64) |
173 | select USE_GENERIC_SMP_HELPERS | ||
171 | default y | 174 | default y |
172 | 175 | ||
173 | config X86_32_SMP | 176 | config X86_32_SMP |
@@ -181,12 +184,12 @@ config X86_64_SMP | |||
181 | config X86_HT | 184 | config X86_HT |
182 | bool | 185 | bool |
183 | depends on SMP | 186 | depends on SMP |
184 | depends on (X86_32 && !(X86_VISWS || X86_VOYAGER)) || X86_64 | 187 | depends on (X86_32 && !X86_VOYAGER) || X86_64 |
185 | default y | 188 | default y |
186 | 189 | ||
187 | config X86_BIOS_REBOOT | 190 | config X86_BIOS_REBOOT |
188 | bool | 191 | bool |
189 | depends on !X86_VISWS && !X86_VOYAGER | 192 | depends on !X86_VOYAGER |
190 | default y | 193 | default y |
191 | 194 | ||
192 | config X86_TRAMPOLINE | 195 | config X86_TRAMPOLINE |
@@ -232,13 +235,13 @@ config SMP | |||
232 | 235 | ||
233 | config X86_FIND_SMP_CONFIG | 236 | config X86_FIND_SMP_CONFIG |
234 | def_bool y | 237 | def_bool y |
235 | depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS | 238 | depends on X86_MPPARSE || X86_VOYAGER |
236 | 239 | ||
237 | if ACPI | 240 | if ACPI |
238 | config X86_MPPARSE | 241 | config X86_MPPARSE |
239 | def_bool y | 242 | def_bool y |
240 | bool "Enable MPS table" | 243 | bool "Enable MPS table" |
241 | depends on X86_LOCAL_APIC && !X86_VISWS | 244 | depends on X86_LOCAL_APIC |
242 | help | 245 | help |
243 | For old smp systems that do not have proper acpi support. Newer systems | 246 | For old smp systems that do not have proper acpi support. Newer systems |
244 | (esp with 64bit cpus) with acpi support, MADT and DSDT will override it | 247 | (esp with 64bit cpus) with acpi support, MADT and DSDT will override it |
@@ -247,7 +250,7 @@ endif | |||
247 | if !ACPI | 250 | if !ACPI |
248 | config X86_MPPARSE | 251 | config X86_MPPARSE |
249 | def_bool y | 252 | def_bool y |
250 | depends on X86_LOCAL_APIC && !X86_VISWS | 253 | depends on X86_LOCAL_APIC |
251 | endif | 254 | endif |
252 | 255 | ||
253 | choice | 256 | choice |
@@ -281,18 +284,6 @@ config X86_VOYAGER | |||
281 | If you do not specifically know you have a Voyager based machine, | 284 | If you do not specifically know you have a Voyager based machine, |
282 | say N here, otherwise the kernel you build will not be bootable. | 285 | say N here, otherwise the kernel you build will not be bootable. |
283 | 286 | ||
284 | config X86_VISWS | ||
285 | bool "SGI 320/540 (Visual Workstation)" | ||
286 | depends on X86_32 && !PCI | ||
287 | help | ||
288 | The SGI Visual Workstation series is an IA32-based workstation | ||
289 | based on SGI systems chips with some legacy PC hardware attached. | ||
290 | |||
291 | Say Y here to create a kernel to run on the SGI 320 or 540. | ||
292 | |||
293 | A kernel compiled for the Visual Workstation will not run on PCs | ||
294 | and vice versa. See <file:Documentation/sgi-visws.txt> for details. | ||
295 | |||
296 | config X86_GENERICARCH | 287 | config X86_GENERICARCH |
297 | bool "Generic architecture" | 288 | bool "Generic architecture" |
298 | depends on X86_32 | 289 | depends on X86_32 |
@@ -355,7 +346,7 @@ config X86_RDC321X | |||
355 | config X86_VSMP | 346 | config X86_VSMP |
356 | bool "Support for ScaleMP vSMP" | 347 | bool "Support for ScaleMP vSMP" |
357 | select PARAVIRT | 348 | select PARAVIRT |
358 | depends on X86_64 && !PCI | 349 | depends on X86_64 && PCI |
359 | help | 350 | help |
360 | Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is | 351 | Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is |
361 | supposed to run on these EM64T-based machines. Only choose this option | 352 | supposed to run on these EM64T-based machines. Only choose this option |
@@ -363,6 +354,18 @@ config X86_VSMP | |||
363 | 354 | ||
364 | endchoice | 355 | endchoice |
365 | 356 | ||
357 | config X86_VISWS | ||
358 | bool "SGI 320/540 (Visual Workstation)" | ||
359 | depends on X86_32 && PCI && !X86_VOYAGER && X86_MPPARSE && PCI_GODIRECT | ||
360 | help | ||
361 | The SGI Visual Workstation series is an IA32-based workstation | ||
362 | based on SGI systems chips with some legacy PC hardware attached. | ||
363 | |||
364 | Say Y here to create a kernel to run on the SGI 320 or 540. | ||
365 | |||
366 | A kernel compiled for the Visual Workstation will run on general | ||
367 | PCs as well. See <file:Documentation/sgi-visws.txt> for details. | ||
368 | |||
366 | config SCHED_NO_NO_OMIT_FRAME_POINTER | 369 | config SCHED_NO_NO_OMIT_FRAME_POINTER |
367 | def_bool y | 370 | def_bool y |
368 | prompt "Single-depth WCHAN output" | 371 | prompt "Single-depth WCHAN output" |
@@ -391,7 +394,7 @@ config VMI | |||
391 | bool "VMI Guest support" | 394 | bool "VMI Guest support" |
392 | select PARAVIRT | 395 | select PARAVIRT |
393 | depends on X86_32 | 396 | depends on X86_32 |
394 | depends on !(X86_VISWS || X86_VOYAGER) | 397 | depends on !X86_VOYAGER |
395 | help | 398 | help |
396 | VMI provides a paravirtualized interface to the VMware ESX server | 399 | VMI provides a paravirtualized interface to the VMware ESX server |
397 | (it could be used by other hypervisors in theory too, but is not | 400 | (it could be used by other hypervisors in theory too, but is not |
@@ -402,7 +405,7 @@ config KVM_CLOCK | |||
402 | bool "KVM paravirtualized clock" | 405 | bool "KVM paravirtualized clock" |
403 | select PARAVIRT | 406 | select PARAVIRT |
404 | select PARAVIRT_CLOCK | 407 | select PARAVIRT_CLOCK |
405 | depends on !(X86_VISWS || X86_VOYAGER) | 408 | depends on !X86_VOYAGER |
406 | help | 409 | help |
407 | Turning on this option will allow you to run a paravirtualized clock | 410 | Turning on this option will allow you to run a paravirtualized clock |
408 | when running over the KVM hypervisor. Instead of relying on a PIT | 411 | when running over the KVM hypervisor. Instead of relying on a PIT |
@@ -413,7 +416,7 @@ config KVM_CLOCK | |||
413 | config KVM_GUEST | 416 | config KVM_GUEST |
414 | bool "KVM Guest support" | 417 | bool "KVM Guest support" |
415 | select PARAVIRT | 418 | select PARAVIRT |
416 | depends on !(X86_VISWS || X86_VOYAGER) | 419 | depends on !X86_VOYAGER |
417 | help | 420 | help |
418 | This option enables various optimizations for running under the KVM | 421 | This option enables various optimizations for running under the KVM |
419 | hypervisor. | 422 | hypervisor. |
@@ -422,7 +425,7 @@ source "arch/x86/lguest/Kconfig" | |||
422 | 425 | ||
423 | config PARAVIRT | 426 | config PARAVIRT |
424 | bool "Enable paravirtualization code" | 427 | bool "Enable paravirtualization code" |
425 | depends on !(X86_VISWS || X86_VOYAGER) | 428 | depends on !X86_VOYAGER |
426 | help | 429 | help |
427 | This changes the kernel so it can modify itself when it is run | 430 | This changes the kernel so it can modify itself when it is run |
428 | under a hypervisor, potentially improving performance significantly | 431 | under a hypervisor, potentially improving performance significantly |
@@ -445,7 +448,6 @@ config PARAVIRT_DEBUG | |||
445 | config MEMTEST | 448 | config MEMTEST |
446 | bool "Memtest" | 449 | bool "Memtest" |
447 | depends on X86_64 | 450 | depends on X86_64 |
448 | default y | ||
449 | help | 451 | help |
450 | This option adds a kernel parameter 'memtest', which allows memtest | 452 | This option adds a kernel parameter 'memtest', which allows memtest |
451 | to be set. | 453 | to be set. |
@@ -453,7 +455,7 @@ config MEMTEST | |||
453 | memtest=1, mean do 1 test pattern; | 455 | memtest=1, mean do 1 test pattern; |
454 | ... | 456 | ... |
455 | memtest=4, mean do 4 test patterns. | 457 | memtest=4, mean do 4 test patterns. |
456 | If you are unsure how to answer this question, answer Y. | 458 | If you are unsure how to answer this question, answer N. |
457 | 459 | ||
458 | config X86_SUMMIT_NUMA | 460 | config X86_SUMMIT_NUMA |
459 | def_bool y | 461 | def_bool y |
@@ -575,7 +577,7 @@ config SWIOTLB | |||
575 | 3 GB of memory. If unsure, say Y. | 577 | 3 GB of memory. If unsure, say Y. |
576 | 578 | ||
577 | config IOMMU_HELPER | 579 | config IOMMU_HELPER |
578 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB) | 580 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) |
579 | config MAXSMP | 581 | config MAXSMP |
580 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | 582 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" |
581 | depends on X86_64 && SMP | 583 | depends on X86_64 && SMP |
@@ -628,7 +630,7 @@ source "kernel/Kconfig.preempt" | |||
628 | 630 | ||
629 | config X86_UP_APIC | 631 | config X86_UP_APIC |
630 | bool "Local APIC support on uniprocessors" | 632 | bool "Local APIC support on uniprocessors" |
631 | depends on X86_32 && !SMP && !(X86_VISWS || X86_VOYAGER || X86_GENERICARCH) | 633 | depends on X86_32 && !SMP && !(X86_VOYAGER || X86_GENERICARCH) |
632 | help | 634 | help |
633 | A local APIC (Advanced Programmable Interrupt Controller) is an | 635 | A local APIC (Advanced Programmable Interrupt Controller) is an |
634 | integrated interrupt controller in the CPU. If you have a single-CPU | 636 | integrated interrupt controller in the CPU. If you have a single-CPU |
@@ -653,11 +655,11 @@ config X86_UP_IOAPIC | |||
653 | 655 | ||
654 | config X86_LOCAL_APIC | 656 | config X86_LOCAL_APIC |
655 | def_bool y | 657 | def_bool y |
656 | depends on X86_64 || (X86_32 && (X86_UP_APIC || ((X86_VISWS || SMP) && !X86_VOYAGER) || X86_GENERICARCH)) | 658 | depends on X86_64 || (X86_32 && (X86_UP_APIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) |
657 | 659 | ||
658 | config X86_IO_APIC | 660 | config X86_IO_APIC |
659 | def_bool y | 661 | def_bool y |
660 | depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !(X86_VISWS || X86_VOYAGER)) || X86_GENERICARCH)) | 662 | depends on X86_64 || (X86_32 && (X86_UP_IOAPIC || (SMP && !X86_VOYAGER) || X86_GENERICARCH)) |
661 | 663 | ||
662 | config X86_VISWS_APIC | 664 | config X86_VISWS_APIC |
663 | def_bool y | 665 | def_bool y |
@@ -711,7 +713,7 @@ config X86_MCE_NONFATAL | |||
711 | 713 | ||
712 | config X86_MCE_P4THERMAL | 714 | config X86_MCE_P4THERMAL |
713 | bool "check for P4 thermal throttling interrupt." | 715 | bool "check for P4 thermal throttling interrupt." |
714 | depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) && !X86_VISWS | 716 | depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) |
715 | help | 717 | help |
716 | Enabling this feature will cause a message to be printed when the P4 | 718 | Enabling this feature will cause a message to be printed when the P4 |
717 | enters thermal throttling. | 719 | enters thermal throttling. |
@@ -1133,21 +1135,18 @@ config MTRR | |||
1133 | See <file:Documentation/mtrr.txt> for more information. | 1135 | See <file:Documentation/mtrr.txt> for more information. |
1134 | 1136 | ||
1135 | config MTRR_SANITIZER | 1137 | config MTRR_SANITIZER |
1136 | def_bool y | 1138 | bool |
1137 | prompt "MTRR cleanup support" | 1139 | prompt "MTRR cleanup support" |
1138 | depends on MTRR | 1140 | depends on MTRR |
1139 | help | 1141 | help |
1140 | Convert MTRR layout from continuous to discrete, so some X driver | 1142 | Convert MTRR layout from continuous to discrete, so X drivers can |
1141 | could add WB entries. | 1143 | add writeback entries. |
1142 | |||
1143 | Say N here if you see bootup problems (boot crash, boot hang, | ||
1144 | spontaneous reboots). | ||
1145 | 1144 | ||
1146 | Could be disabled with disable_mtrr_cleanup. Also mtrr_chunk_size | 1145 | Can be disabled with disable_mtrr_cleanup on the kernel command line. |
1147 | could be used to send largest mtrr entry size for continuous block | 1146 | The largest mtrr entry size for a continous block can be set with |
1148 | to hold holes (aka. UC entries) | 1147 | mtrr_chunk_size. |
1149 | 1148 | ||
1150 | If unsure, say Y. | 1149 | If unsure, say N. |
1151 | 1150 | ||
1152 | config MTRR_SANITIZER_ENABLE_DEFAULT | 1151 | config MTRR_SANITIZER_ENABLE_DEFAULT |
1153 | int "MTRR cleanup enable value (0-1)" | 1152 | int "MTRR cleanup enable value (0-1)" |
@@ -1164,7 +1163,7 @@ config MTRR_SANITIZER_SPARE_REG_NR_DEFAULT | |||
1164 | depends on MTRR_SANITIZER | 1163 | depends on MTRR_SANITIZER |
1165 | help | 1164 | help |
1166 | mtrr cleanup spare entries default, it can be changed via | 1165 | mtrr cleanup spare entries default, it can be changed via |
1167 | mtrr_spare_reg_nr= | 1166 | mtrr_spare_reg_nr=N on the kernel command line. |
1168 | 1167 | ||
1169 | config X86_PAT | 1168 | config X86_PAT |
1170 | bool | 1169 | bool |
@@ -1414,7 +1413,7 @@ config X86_APM_BOOT | |||
1414 | 1413 | ||
1415 | menuconfig APM | 1414 | menuconfig APM |
1416 | tristate "APM (Advanced Power Management) BIOS support" | 1415 | tristate "APM (Advanced Power Management) BIOS support" |
1417 | depends on X86_32 && PM_SLEEP && !X86_VISWS | 1416 | depends on X86_32 && PM_SLEEP |
1418 | ---help--- | 1417 | ---help--- |
1419 | APM is a BIOS specification for saving power using several different | 1418 | APM is a BIOS specification for saving power using several different |
1420 | techniques. This is mostly useful for battery powered laptops with | 1419 | techniques. This is mostly useful for battery powered laptops with |
@@ -1561,7 +1560,7 @@ config PCI | |||
1561 | 1560 | ||
1562 | choice | 1561 | choice |
1563 | prompt "PCI access mode" | 1562 | prompt "PCI access mode" |
1564 | depends on X86_32 && PCI && !X86_VISWS | 1563 | depends on X86_32 && PCI |
1565 | default PCI_GOANY | 1564 | default PCI_GOANY |
1566 | ---help--- | 1565 | ---help--- |
1567 | On PCI systems, the BIOS can be used to detect the PCI devices and | 1566 | On PCI systems, the BIOS can be used to detect the PCI devices and |
@@ -1598,12 +1597,12 @@ endchoice | |||
1598 | 1597 | ||
1599 | config PCI_BIOS | 1598 | config PCI_BIOS |
1600 | def_bool y | 1599 | def_bool y |
1601 | depends on X86_32 && !X86_VISWS && PCI && (PCI_GOBIOS || PCI_GOANY) | 1600 | depends on X86_32 && PCI && (PCI_GOBIOS || PCI_GOANY) |
1602 | 1601 | ||
1603 | # x86-64 doesn't support PCI BIOS access from long mode so always go direct. | 1602 | # x86-64 doesn't support PCI BIOS access from long mode so always go direct. |
1604 | config PCI_DIRECT | 1603 | config PCI_DIRECT |
1605 | def_bool y | 1604 | def_bool y |
1606 | depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC) || X86_VISWS) | 1605 | depends on PCI && (X86_64 || (PCI_GODIRECT || PCI_GOANY || PCI_GOOLPC)) |
1607 | 1606 | ||
1608 | config PCI_MMCONFIG | 1607 | config PCI_MMCONFIG |
1609 | def_bool y | 1608 | def_bool y |
@@ -1663,7 +1662,7 @@ if X86_32 | |||
1663 | 1662 | ||
1664 | config ISA | 1663 | config ISA |
1665 | bool "ISA support" | 1664 | bool "ISA support" |
1666 | depends on !(X86_VOYAGER || X86_VISWS) | 1665 | depends on !X86_VOYAGER |
1667 | help | 1666 | help |
1668 | Find out whether you have ISA slots on your motherboard. ISA is the | 1667 | Find out whether you have ISA slots on your motherboard. ISA is the |
1669 | name of a bus system, i.e. the way the CPU talks to the other stuff | 1668 | name of a bus system, i.e. the way the CPU talks to the other stuff |
@@ -1690,7 +1689,7 @@ config EISA | |||
1690 | source "drivers/eisa/Kconfig" | 1689 | source "drivers/eisa/Kconfig" |
1691 | 1690 | ||
1692 | config MCA | 1691 | config MCA |
1693 | bool "MCA support" if !(X86_VISWS || X86_VOYAGER) | 1692 | bool "MCA support" if !X86_VOYAGER |
1694 | default y if X86_VOYAGER | 1693 | default y if X86_VOYAGER |
1695 | help | 1694 | help |
1696 | MicroChannel Architecture is found in some IBM PS/2 machines and | 1695 | MicroChannel Architecture is found in some IBM PS/2 machines and |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 3d22bb8175b4..abff1b84ed5b 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -344,7 +344,7 @@ config X86_F00F_BUG | |||
344 | 344 | ||
345 | config X86_WP_WORKS_OK | 345 | config X86_WP_WORKS_OK |
346 | def_bool y | 346 | def_bool y |
347 | depends on X86_32 && !M386 | 347 | depends on !M386 |
348 | 348 | ||
349 | config X86_INVLPG | 349 | config X86_INVLPG |
350 | def_bool y | 350 | def_bool y |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index acc0271920f2..ae36bfa814e5 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -171,6 +171,33 @@ config IOMMU_LEAK | |||
171 | Add a simple leak tracer to the IOMMU code. This is useful when you | 171 | Add a simple leak tracer to the IOMMU code. This is useful when you |
172 | are debugging a buggy device driver that leaks IOMMU mappings. | 172 | are debugging a buggy device driver that leaks IOMMU mappings. |
173 | 173 | ||
174 | config MMIOTRACE_HOOKS | ||
175 | bool | ||
176 | |||
177 | config MMIOTRACE | ||
178 | bool "Memory mapped IO tracing" | ||
179 | depends on DEBUG_KERNEL && PCI | ||
180 | select TRACING | ||
181 | select MMIOTRACE_HOOKS | ||
182 | help | ||
183 | Mmiotrace traces Memory Mapped I/O access and is meant for | ||
184 | debugging and reverse engineering. It is called from the ioremap | ||
185 | implementation and works via page faults. Tracing is disabled by | ||
186 | default and can be enabled at run-time. | ||
187 | |||
188 | See Documentation/tracers/mmiotrace.txt. | ||
189 | If you are not helping to develop drivers, say N. | ||
190 | |||
191 | config MMIOTRACE_TEST | ||
192 | tristate "Test module for mmiotrace" | ||
193 | depends on MMIOTRACE && m | ||
194 | help | ||
195 | This is a dumb module for testing mmiotrace. It is very dangerous | ||
196 | as it will write garbage to IO memory starting at a given address. | ||
197 | However, it should be safe to use on e.g. unused portion of VRAM. | ||
198 | |||
199 | Say N, unless you absolutely know what you are doing. | ||
200 | |||
174 | # | 201 | # |
175 | # IO delay types: | 202 | # IO delay types: |
176 | # | 203 | # |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b03d24b44bf9..919ce21ea654 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -113,10 +113,6 @@ mcore-y := arch/x86/mach-default/ | |||
113 | mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager | 113 | mflags-$(CONFIG_X86_VOYAGER) := -Iinclude/asm-x86/mach-voyager |
114 | mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ | 114 | mcore-$(CONFIG_X86_VOYAGER) := arch/x86/mach-voyager/ |
115 | 115 | ||
116 | # VISWS subarch support | ||
117 | mflags-$(CONFIG_X86_VISWS) := -Iinclude/asm-x86/mach-visws | ||
118 | mcore-$(CONFIG_X86_VISWS) := arch/x86/mach-visws/ | ||
119 | |||
120 | # generic subarchitecture | 116 | # generic subarchitecture |
121 | mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic | 117 | mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic |
122 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ | 118 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 24e4d4928d65..20371d0635e4 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -116,7 +116,7 @@ ENTRY(ia32_sysenter_target) | |||
116 | pushfq | 116 | pushfq |
117 | CFI_ADJUST_CFA_OFFSET 8 | 117 | CFI_ADJUST_CFA_OFFSET 8 |
118 | /*CFI_REL_OFFSET rflags,0*/ | 118 | /*CFI_REL_OFFSET rflags,0*/ |
119 | movl 8*3-THREAD_SIZE+threadinfo_sysenter_return(%rsp), %r10d | 119 | movl 8*3-THREAD_SIZE+TI_sysenter_return(%rsp), %r10d |
120 | CFI_REGISTER rip,r10 | 120 | CFI_REGISTER rip,r10 |
121 | pushq $__USER32_CS | 121 | pushq $__USER32_CS |
122 | CFI_ADJUST_CFA_OFFSET 8 | 122 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -136,8 +136,9 @@ ENTRY(ia32_sysenter_target) | |||
136 | .quad 1b,ia32_badarg | 136 | .quad 1b,ia32_badarg |
137 | .previous | 137 | .previous |
138 | GET_THREAD_INFO(%r10) | 138 | GET_THREAD_INFO(%r10) |
139 | orl $TS_COMPAT,threadinfo_status(%r10) | 139 | orl $TS_COMPAT,TI_status(%r10) |
140 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | 140 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ |
141 | TI_flags(%r10) | ||
141 | CFI_REMEMBER_STATE | 142 | CFI_REMEMBER_STATE |
142 | jnz sysenter_tracesys | 143 | jnz sysenter_tracesys |
143 | sysenter_do_call: | 144 | sysenter_do_call: |
@@ -149,9 +150,9 @@ sysenter_do_call: | |||
149 | GET_THREAD_INFO(%r10) | 150 | GET_THREAD_INFO(%r10) |
150 | DISABLE_INTERRUPTS(CLBR_NONE) | 151 | DISABLE_INTERRUPTS(CLBR_NONE) |
151 | TRACE_IRQS_OFF | 152 | TRACE_IRQS_OFF |
152 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 153 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) |
153 | jnz int_ret_from_sys_call | 154 | jnz int_ret_from_sys_call |
154 | andl $~TS_COMPAT,threadinfo_status(%r10) | 155 | andl $~TS_COMPAT,TI_status(%r10) |
155 | /* clear IF, that popfq doesn't enable interrupts early */ | 156 | /* clear IF, that popfq doesn't enable interrupts early */ |
156 | andl $~0x200,EFLAGS-R11(%rsp) | 157 | andl $~0x200,EFLAGS-R11(%rsp) |
157 | movl RIP-R11(%rsp),%edx /* User %eip */ | 158 | movl RIP-R11(%rsp),%edx /* User %eip */ |
@@ -240,8 +241,9 @@ ENTRY(ia32_cstar_target) | |||
240 | .quad 1b,ia32_badarg | 241 | .quad 1b,ia32_badarg |
241 | .previous | 242 | .previous |
242 | GET_THREAD_INFO(%r10) | 243 | GET_THREAD_INFO(%r10) |
243 | orl $TS_COMPAT,threadinfo_status(%r10) | 244 | orl $TS_COMPAT,TI_status(%r10) |
244 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | 245 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ |
246 | TI_flags(%r10) | ||
245 | CFI_REMEMBER_STATE | 247 | CFI_REMEMBER_STATE |
246 | jnz cstar_tracesys | 248 | jnz cstar_tracesys |
247 | cstar_do_call: | 249 | cstar_do_call: |
@@ -253,9 +255,9 @@ cstar_do_call: | |||
253 | GET_THREAD_INFO(%r10) | 255 | GET_THREAD_INFO(%r10) |
254 | DISABLE_INTERRUPTS(CLBR_NONE) | 256 | DISABLE_INTERRUPTS(CLBR_NONE) |
255 | TRACE_IRQS_OFF | 257 | TRACE_IRQS_OFF |
256 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 258 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) |
257 | jnz int_ret_from_sys_call | 259 | jnz int_ret_from_sys_call |
258 | andl $~TS_COMPAT,threadinfo_status(%r10) | 260 | andl $~TS_COMPAT,TI_status(%r10) |
259 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 | 261 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 |
260 | movl RIP-ARGOFFSET(%rsp),%ecx | 262 | movl RIP-ARGOFFSET(%rsp),%ecx |
261 | CFI_REGISTER rip,rcx | 263 | CFI_REGISTER rip,rcx |
@@ -333,8 +335,9 @@ ENTRY(ia32_syscall) | |||
333 | this could be a problem. */ | 335 | this could be a problem. */ |
334 | SAVE_ARGS 0,0,1 | 336 | SAVE_ARGS 0,0,1 |
335 | GET_THREAD_INFO(%r10) | 337 | GET_THREAD_INFO(%r10) |
336 | orl $TS_COMPAT,threadinfo_status(%r10) | 338 | orl $TS_COMPAT,TI_status(%r10) |
337 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | 339 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ |
340 | TI_flags(%r10) | ||
338 | jnz ia32_tracesys | 341 | jnz ia32_tracesys |
339 | ia32_do_syscall: | 342 | ia32_do_syscall: |
340 | cmpl $(IA32_NR_syscalls-1),%eax | 343 | cmpl $(IA32_NR_syscalls-1),%eax |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 54829e2b5160..da140611bb57 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -6,6 +6,12 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu | |||
6 | 6 | ||
7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | 7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) |
8 | 8 | ||
9 | ifdef CONFIG_FTRACE | ||
10 | # Do not profile debug utilities | ||
11 | CFLAGS_REMOVE_tsc.o = -pg | ||
12 | CFLAGS_REMOVE_rtc.o = -pg | ||
13 | endif | ||
14 | |||
9 | # | 15 | # |
10 | # vsyscalls (which work on the user stack) should have | 16 | # vsyscalls (which work on the user stack) should have |
11 | # no stack-protector checks: | 17 | # no stack-protector checks: |
@@ -13,12 +19,13 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | |||
13 | nostackp := $(call cc-option, -fno-stack-protector) | 19 | nostackp := $(call cc-option, -fno-stack-protector) |
14 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | 20 | CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) |
15 | CFLAGS_hpet.o := $(nostackp) | 21 | CFLAGS_hpet.o := $(nostackp) |
16 | CFLAGS_tsc_64.o := $(nostackp) | 22 | CFLAGS_tsc.o := $(nostackp) |
17 | 23 | ||
18 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 24 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o |
19 | obj-y += traps_$(BITS).o irq_$(BITS).o | 25 | obj-y += traps_$(BITS).o irq_$(BITS).o |
20 | obj-y += time_$(BITS).o ioport.o ldt.o | 26 | obj-y += time_$(BITS).o ioport.o ldt.o |
21 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 27 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o |
28 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | ||
22 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 29 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
23 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 30 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
24 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 31 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
@@ -26,7 +33,7 @@ obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o | |||
26 | obj-y += bootflag.o e820.o | 33 | obj-y += bootflag.o e820.o |
27 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o | 34 | obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o |
28 | obj-y += alternative.o i8253.o pci-nommu.o | 35 | obj-y += alternative.o i8253.o pci-nommu.o |
29 | obj-y += tsc_$(BITS).o io_delay.o rtc.o | 36 | obj-y += tsc.o io_delay.o rtc.o |
30 | 37 | ||
31 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o | 38 | obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o |
32 | obj-y += process.o | 39 | obj-y += process.o |
@@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o | |||
56 | obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o | 63 | obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o |
57 | obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o | 64 | obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o |
58 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 65 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
66 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | ||
59 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 67 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
60 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 68 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
61 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 69 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5c0107602b62..f489d7a9be92 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/pgtable.h> | 37 | #include <asm/pgtable.h> |
38 | #include <asm/io_apic.h> | 38 | #include <asm/io_apic.h> |
39 | #include <asm/apic.h> | 39 | #include <asm/apic.h> |
40 | #include <asm/genapic.h> | ||
40 | #include <asm/io.h> | 41 | #include <asm/io.h> |
41 | #include <asm/mpspec.h> | 42 | #include <asm/mpspec.h> |
42 | #include <asm/smp.h> | 43 | #include <asm/smp.h> |
@@ -83,8 +84,6 @@ int acpi_lapic; | |||
83 | int acpi_ioapic; | 84 | int acpi_ioapic; |
84 | int acpi_strict; | 85 | int acpi_strict; |
85 | 86 | ||
86 | static int disable_irq0_through_ioapic __initdata; | ||
87 | |||
88 | u8 acpi_sci_flags __initdata; | 87 | u8 acpi_sci_flags __initdata; |
89 | int acpi_sci_override_gsi __initdata; | 88 | int acpi_sci_override_gsi __initdata; |
90 | int acpi_skip_timer_override __initdata; | 89 | int acpi_skip_timer_override __initdata; |
@@ -108,21 +107,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |||
108 | */ | 107 | */ |
109 | enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; | 108 | enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; |
110 | 109 | ||
111 | #ifdef CONFIG_X86_64 | ||
112 | |||
113 | /* rely on all ACPI tables being in the direct mapping */ | ||
114 | char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size) | ||
115 | { | ||
116 | if (!phys_addr || !size) | ||
117 | return NULL; | ||
118 | |||
119 | if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE) | ||
120 | return __va(phys_addr); | ||
121 | |||
122 | return NULL; | ||
123 | } | ||
124 | |||
125 | #else | ||
126 | 110 | ||
127 | /* | 111 | /* |
128 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, | 112 | * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, |
@@ -141,11 +125,15 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) | |||
141 | unsigned long base, offset, mapped_size; | 125 | unsigned long base, offset, mapped_size; |
142 | int idx; | 126 | int idx; |
143 | 127 | ||
144 | if (phys + size < 8 * 1024 * 1024) | 128 | if (!phys || !size) |
129 | return NULL; | ||
130 | |||
131 | if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) | ||
145 | return __va(phys); | 132 | return __va(phys); |
146 | 133 | ||
147 | offset = phys & (PAGE_SIZE - 1); | 134 | offset = phys & (PAGE_SIZE - 1); |
148 | mapped_size = PAGE_SIZE - offset; | 135 | mapped_size = PAGE_SIZE - offset; |
136 | clear_fixmap(FIX_ACPI_END); | ||
149 | set_fixmap(FIX_ACPI_END, phys); | 137 | set_fixmap(FIX_ACPI_END, phys); |
150 | base = fix_to_virt(FIX_ACPI_END); | 138 | base = fix_to_virt(FIX_ACPI_END); |
151 | 139 | ||
@@ -157,13 +145,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) | |||
157 | if (--idx < FIX_ACPI_BEGIN) | 145 | if (--idx < FIX_ACPI_BEGIN) |
158 | return NULL; /* cannot handle this */ | 146 | return NULL; /* cannot handle this */ |
159 | phys += PAGE_SIZE; | 147 | phys += PAGE_SIZE; |
148 | clear_fixmap(idx); | ||
160 | set_fixmap(idx, phys); | 149 | set_fixmap(idx, phys); |
161 | mapped_size += PAGE_SIZE; | 150 | mapped_size += PAGE_SIZE; |
162 | } | 151 | } |
163 | 152 | ||
164 | return ((unsigned char *)base + offset); | 153 | return ((unsigned char *)base + offset); |
165 | } | 154 | } |
166 | #endif | ||
167 | 155 | ||
168 | #ifdef CONFIG_PCI_MMCONFIG | 156 | #ifdef CONFIG_PCI_MMCONFIG |
169 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ | 157 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ |
@@ -992,10 +980,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | |||
992 | int pin; | 980 | int pin; |
993 | struct mp_config_intsrc mp_irq; | 981 | struct mp_config_intsrc mp_irq; |
994 | 982 | ||
995 | /* Skip the 8254 timer interrupt (IRQ 0) if requested. */ | ||
996 | if (bus_irq == 0 && disable_irq0_through_ioapic) | ||
997 | return; | ||
998 | |||
999 | /* | 983 | /* |
1000 | * Convert 'gsi' to 'ioapic.pin'. | 984 | * Convert 'gsi' to 'ioapic.pin'. |
1001 | */ | 985 | */ |
@@ -1062,10 +1046,6 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1062 | for (i = 0; i < 16; i++) { | 1046 | for (i = 0; i < 16; i++) { |
1063 | int idx; | 1047 | int idx; |
1064 | 1048 | ||
1065 | /* Skip the 8254 timer interrupt (IRQ 0) if requested. */ | ||
1066 | if (i == 0 && disable_irq0_through_ioapic) | ||
1067 | continue; | ||
1068 | |||
1069 | for (idx = 0; idx < mp_irq_entries; idx++) { | 1049 | for (idx = 0; idx < mp_irq_entries; idx++) { |
1070 | struct mp_config_intsrc *irq = mp_irqs + idx; | 1050 | struct mp_config_intsrc *irq = mp_irqs + idx; |
1071 | 1051 | ||
@@ -1373,8 +1353,6 @@ static void __init acpi_process_madt(void) | |||
1373 | return; | 1353 | return; |
1374 | } | 1354 | } |
1375 | 1355 | ||
1376 | #ifdef __i386__ | ||
1377 | |||
1378 | static int __init disable_acpi_irq(const struct dmi_system_id *d) | 1356 | static int __init disable_acpi_irq(const struct dmi_system_id *d) |
1379 | { | 1357 | { |
1380 | if (!acpi_force) { | 1358 | if (!acpi_force) { |
@@ -1425,13 +1403,12 @@ static int __init force_acpi_ht(const struct dmi_system_id *d) | |||
1425 | } | 1403 | } |
1426 | 1404 | ||
1427 | /* | 1405 | /* |
1428 | * Don't register any I/O APIC entries for the 8254 timer IRQ. | 1406 | * Force ignoring BIOS IRQ0 pin2 override |
1429 | */ | 1407 | */ |
1430 | static int __init | 1408 | static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d) |
1431 | dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d) | ||
1432 | { | 1409 | { |
1433 | pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident); | 1410 | pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident); |
1434 | disable_irq0_through_ioapic = 1; | 1411 | acpi_skip_timer_override = 1; |
1435 | return 0; | 1412 | return 0; |
1436 | } | 1413 | } |
1437 | 1414 | ||
@@ -1609,11 +1586,11 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1609 | * is enabled. This input is incorrectly designated the | 1586 | * is enabled. This input is incorrectly designated the |
1610 | * ISA IRQ 0 via an interrupt source override even though | 1587 | * ISA IRQ 0 via an interrupt source override even though |
1611 | * it is wired to the output of the master 8259A and INTIN0 | 1588 | * it is wired to the output of the master 8259A and INTIN0 |
1612 | * is not connected at all. Abandon any attempts to route | 1589 | * is not connected at all. Force ignoring BIOS IRQ0 pin2 |
1613 | * IRQ 0 through the I/O APIC therefore. | 1590 | * override in that cases. |
1614 | */ | 1591 | */ |
1615 | { | 1592 | { |
1616 | .callback = dmi_disable_irq0_through_ioapic, | 1593 | .callback = dmi_ignore_irq0_timer_override, |
1617 | .ident = "HP NX6125 laptop", | 1594 | .ident = "HP NX6125 laptop", |
1618 | .matches = { | 1595 | .matches = { |
1619 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | 1596 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), |
@@ -1621,7 +1598,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1621 | }, | 1598 | }, |
1622 | }, | 1599 | }, |
1623 | { | 1600 | { |
1624 | .callback = dmi_disable_irq0_through_ioapic, | 1601 | .callback = dmi_ignore_irq0_timer_override, |
1625 | .ident = "HP NX6325 laptop", | 1602 | .ident = "HP NX6325 laptop", |
1626 | .matches = { | 1603 | .matches = { |
1627 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | 1604 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), |
@@ -1631,8 +1608,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1631 | {} | 1608 | {} |
1632 | }; | 1609 | }; |
1633 | 1610 | ||
1634 | #endif /* __i386__ */ | ||
1635 | |||
1636 | /* | 1611 | /* |
1637 | * acpi_boot_table_init() and acpi_boot_init() | 1612 | * acpi_boot_table_init() and acpi_boot_init() |
1638 | * called from setup_arch(), always. | 1613 | * called from setup_arch(), always. |
@@ -1660,9 +1635,7 @@ int __init acpi_boot_table_init(void) | |||
1660 | { | 1635 | { |
1661 | int error; | 1636 | int error; |
1662 | 1637 | ||
1663 | #ifdef __i386__ | ||
1664 | dmi_check_system(acpi_dmi_table); | 1638 | dmi_check_system(acpi_dmi_table); |
1665 | #endif | ||
1666 | 1639 | ||
1667 | /* | 1640 | /* |
1668 | * If acpi_disabled, bail out | 1641 | * If acpi_disabled, bail out |
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index de2d2e4ebad9..7c074eec39fb 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c | |||
@@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) | |||
56 | if (cpu_has(c, X86_FEATURE_ACPI)) | 56 | if (cpu_has(c, X86_FEATURE_ACPI)) |
57 | buf[2] |= ACPI_PDC_T_FFH; | 57 | buf[2] |= ACPI_PDC_T_FFH; |
58 | 58 | ||
59 | /* | ||
60 | * If mwait/monitor is unsupported, C2/C3_FFH will be disabled | ||
61 | */ | ||
62 | if (!cpu_has(c, X86_FEATURE_MWAIT)) | ||
63 | buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); | ||
64 | |||
59 | obj->type = ACPI_TYPE_BUFFER; | 65 | obj->type = ACPI_TYPE_BUFFER; |
60 | obj->buffer.length = 12; | 66 | obj->buffer.length = 12; |
61 | obj->buffer.pointer = (u8 *) buf; | 67 | obj->buffer.pointer = (u8 *) buf; |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index e6a4b564ccaa..868de3d5c39d 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -23,6 +23,15 @@ static unsigned long acpi_realmode; | |||
23 | static char temp_stack[10240]; | 23 | static char temp_stack[10240]; |
24 | #endif | 24 | #endif |
25 | 25 | ||
26 | /* XXX: this macro should move to asm-x86/segment.h and be shared with the | ||
27 | boot code... */ | ||
28 | #define GDT_ENTRY(flags, base, limit) \ | ||
29 | (((u64)(base & 0xff000000) << 32) | \ | ||
30 | ((u64)flags << 40) | \ | ||
31 | ((u64)(limit & 0x00ff0000) << 32) | \ | ||
32 | ((u64)(base & 0x00ffffff) << 16) | \ | ||
33 | ((u64)(limit & 0x0000ffff))) | ||
34 | |||
26 | /** | 35 | /** |
27 | * acpi_save_state_mem - save kernel state | 36 | * acpi_save_state_mem - save kernel state |
28 | * | 37 | * |
@@ -51,18 +60,27 @@ int acpi_save_state_mem(void) | |||
51 | header->video_mode = saved_video_mode; | 60 | header->video_mode = saved_video_mode; |
52 | 61 | ||
53 | header->wakeup_jmp_seg = acpi_wakeup_address >> 4; | 62 | header->wakeup_jmp_seg = acpi_wakeup_address >> 4; |
63 | |||
64 | /* | ||
65 | * Set up the wakeup GDT. We set these up as Big Real Mode, | ||
66 | * that is, with limits set to 4 GB. At least the Lenovo | ||
67 | * Thinkpad X61 is known to need this for the video BIOS | ||
68 | * initialization quirk to work; this is likely to also | ||
69 | * be the case for other laptops or integrated video devices. | ||
70 | */ | ||
71 | |||
54 | /* GDT[0]: GDT self-pointer */ | 72 | /* GDT[0]: GDT self-pointer */ |
55 | header->wakeup_gdt[0] = | 73 | header->wakeup_gdt[0] = |
56 | (u64)(sizeof(header->wakeup_gdt) - 1) + | 74 | (u64)(sizeof(header->wakeup_gdt) - 1) + |
57 | ((u64)(acpi_wakeup_address + | 75 | ((u64)(acpi_wakeup_address + |
58 | ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) | 76 | ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) |
59 | << 16); | 77 | << 16); |
60 | /* GDT[1]: real-mode-like code segment */ | 78 | /* GDT[1]: big real mode-like code segment */ |
61 | header->wakeup_gdt[1] = (0x009bULL << 40) + | 79 | header->wakeup_gdt[1] = |
62 | ((u64)acpi_wakeup_address << 16) + 0xffff; | 80 | GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff); |
63 | /* GDT[2]: real-mode-like data segment */ | 81 | /* GDT[2]: big real mode-like data segment */ |
64 | header->wakeup_gdt[2] = (0x0093ULL << 40) + | 82 | header->wakeup_gdt[2] = |
65 | ((u64)acpi_wakeup_address << 16) + 0xffff; | 83 | GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff); |
66 | 84 | ||
67 | #ifndef CONFIG_64BIT | 85 | #ifndef CONFIG_64BIT |
68 | store_gdt((struct desc_ptr *)&header->pmode_gdt); | 86 | store_gdt((struct desc_ptr *)&header->pmode_gdt); |
@@ -140,6 +158,8 @@ static int __init acpi_sleep_setup(char *str) | |||
140 | acpi_realmode_flags |= 2; | 158 | acpi_realmode_flags |= 2; |
141 | if (strncmp(str, "s3_beep", 7) == 0) | 159 | if (strncmp(str, "s3_beep", 7) == 0) |
142 | acpi_realmode_flags |= 4; | 160 | acpi_realmode_flags |= 4; |
161 | if (strncmp(str, "old_ordering", 12) == 0) | ||
162 | acpi_old_suspend_ordering(); | ||
143 | str = strchr(str, ','); | 163 | str = strchr(str, ','); |
144 | if (str != NULL) | 164 | if (str != NULL) |
145 | str += strspn(str, ", \t"); | 165 | str += strspn(str, ", \t"); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 65c7857a90dd..2763cb37b553 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/spinlock.h> | 3 | #include <linux/mutex.h> |
4 | #include <linux/list.h> | 4 | #include <linux/list.h> |
5 | #include <linux/kprobes.h> | 5 | #include <linux/kprobes.h> |
6 | #include <linux/mm.h> | 6 | #include <linux/mm.h> |
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | |||
143 | #ifdef CONFIG_X86_64 | 143 | #ifdef CONFIG_X86_64 |
144 | 144 | ||
145 | extern char __vsyscall_0; | 145 | extern char __vsyscall_0; |
146 | static inline const unsigned char*const * find_nop_table(void) | 146 | const unsigned char *const *find_nop_table(void) |
147 | { | 147 | { |
148 | return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || | 148 | return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || |
149 | boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; | 149 | boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; |
@@ -162,7 +162,7 @@ static const struct nop { | |||
162 | { -1, NULL } | 162 | { -1, NULL } |
163 | }; | 163 | }; |
164 | 164 | ||
165 | static const unsigned char*const * find_nop_table(void) | 165 | const unsigned char *const *find_nop_table(void) |
166 | { | 166 | { |
167 | const unsigned char *const *noptable = intel_nops; | 167 | const unsigned char *const *noptable = intel_nops; |
168 | int i; | 168 | int i; |
@@ -279,7 +279,7 @@ struct smp_alt_module { | |||
279 | struct list_head next; | 279 | struct list_head next; |
280 | }; | 280 | }; |
281 | static LIST_HEAD(smp_alt_modules); | 281 | static LIST_HEAD(smp_alt_modules); |
282 | static DEFINE_SPINLOCK(smp_alt); | 282 | static DEFINE_MUTEX(smp_alt); |
283 | static int smp_mode = 1; /* protected by smp_alt */ | 283 | static int smp_mode = 1; /* protected by smp_alt */ |
284 | 284 | ||
285 | void alternatives_smp_module_add(struct module *mod, char *name, | 285 | void alternatives_smp_module_add(struct module *mod, char *name, |
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name, | |||
312 | __func__, smp->locks, smp->locks_end, | 312 | __func__, smp->locks, smp->locks_end, |
313 | smp->text, smp->text_end, smp->name); | 313 | smp->text, smp->text_end, smp->name); |
314 | 314 | ||
315 | spin_lock(&smp_alt); | 315 | mutex_lock(&smp_alt); |
316 | list_add_tail(&smp->next, &smp_alt_modules); | 316 | list_add_tail(&smp->next, &smp_alt_modules); |
317 | if (boot_cpu_has(X86_FEATURE_UP)) | 317 | if (boot_cpu_has(X86_FEATURE_UP)) |
318 | alternatives_smp_unlock(smp->locks, smp->locks_end, | 318 | alternatives_smp_unlock(smp->locks, smp->locks_end, |
319 | smp->text, smp->text_end); | 319 | smp->text, smp->text_end); |
320 | spin_unlock(&smp_alt); | 320 | mutex_unlock(&smp_alt); |
321 | } | 321 | } |
322 | 322 | ||
323 | void alternatives_smp_module_del(struct module *mod) | 323 | void alternatives_smp_module_del(struct module *mod) |
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod) | |||
327 | if (smp_alt_once || noreplace_smp) | 327 | if (smp_alt_once || noreplace_smp) |
328 | return; | 328 | return; |
329 | 329 | ||
330 | spin_lock(&smp_alt); | 330 | mutex_lock(&smp_alt); |
331 | list_for_each_entry(item, &smp_alt_modules, next) { | 331 | list_for_each_entry(item, &smp_alt_modules, next) { |
332 | if (mod != item->mod) | 332 | if (mod != item->mod) |
333 | continue; | 333 | continue; |
334 | list_del(&item->next); | 334 | list_del(&item->next); |
335 | spin_unlock(&smp_alt); | 335 | mutex_unlock(&smp_alt); |
336 | DPRINTK("%s: %s\n", __func__, item->name); | 336 | DPRINTK("%s: %s\n", __func__, item->name); |
337 | kfree(item); | 337 | kfree(item); |
338 | return; | 338 | return; |
339 | } | 339 | } |
340 | spin_unlock(&smp_alt); | 340 | mutex_unlock(&smp_alt); |
341 | } | 341 | } |
342 | 342 | ||
343 | void alternatives_smp_switch(int smp) | 343 | void alternatives_smp_switch(int smp) |
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp) | |||
359 | return; | 359 | return; |
360 | BUG_ON(!smp && (num_online_cpus() > 1)); | 360 | BUG_ON(!smp && (num_online_cpus() > 1)); |
361 | 361 | ||
362 | spin_lock(&smp_alt); | 362 | mutex_lock(&smp_alt); |
363 | 363 | ||
364 | /* | 364 | /* |
365 | * Avoid unnecessary switches because it forces JIT based VMs to | 365 | * Avoid unnecessary switches because it forces JIT based VMs to |
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp) | |||
383 | mod->text, mod->text_end); | 383 | mod->text, mod->text_end); |
384 | } | 384 | } |
385 | smp_mode = smp; | 385 | smp_mode = smp; |
386 | spin_unlock(&smp_alt); | 386 | mutex_unlock(&smp_alt); |
387 | } | 387 | } |
388 | 388 | ||
389 | #endif | 389 | #endif |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index 6dea8306d8c0..a437d027f20b 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -82,6 +82,11 @@ int pic_mode; | |||
82 | /* Have we found an MP table */ | 82 | /* Have we found an MP table */ |
83 | int smp_found_config; | 83 | int smp_found_config; |
84 | 84 | ||
85 | static struct resource lapic_resource = { | ||
86 | .name = "Local APIC", | ||
87 | .flags = IORESOURCE_MEM | IORESOURCE_BUSY, | ||
88 | }; | ||
89 | |||
85 | static unsigned int calibration_result; | 90 | static unsigned int calibration_result; |
86 | 91 | ||
87 | static int lapic_next_event(unsigned long delta, | 92 | static int lapic_next_event(unsigned long delta, |
@@ -969,7 +974,7 @@ void __cpuinit setup_local_APIC(void) | |||
969 | * Double-check whether this APIC is really registered. | 974 | * Double-check whether this APIC is really registered. |
970 | */ | 975 | */ |
971 | if (!apic_id_registered()) | 976 | if (!apic_id_registered()) |
972 | BUG(); | 977 | WARN_ON_ONCE(1); |
973 | 978 | ||
974 | /* | 979 | /* |
975 | * Intel recommends to set DFR, LDR and TPR before enabling | 980 | * Intel recommends to set DFR, LDR and TPR before enabling |
@@ -1335,6 +1340,10 @@ void __init smp_intr_init(void) | |||
1335 | 1340 | ||
1336 | /* IPI for generic function call */ | 1341 | /* IPI for generic function call */ |
1337 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 1342 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
1343 | |||
1344 | /* IPI for single call function */ | ||
1345 | set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, | ||
1346 | call_function_single_interrupt); | ||
1338 | } | 1347 | } |
1339 | #endif | 1348 | #endif |
1340 | 1349 | ||
@@ -1720,3 +1729,21 @@ static int __init apic_set_verbosity(char *str) | |||
1720 | } | 1729 | } |
1721 | __setup("apic=", apic_set_verbosity); | 1730 | __setup("apic=", apic_set_verbosity); |
1722 | 1731 | ||
1732 | static int __init lapic_insert_resource(void) | ||
1733 | { | ||
1734 | if (!apic_phys) | ||
1735 | return -1; | ||
1736 | |||
1737 | /* Put local APIC into the resource map. */ | ||
1738 | lapic_resource.start = apic_phys; | ||
1739 | lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1; | ||
1740 | insert_resource(&iomem_resource, &lapic_resource); | ||
1741 | |||
1742 | return 0; | ||
1743 | } | ||
1744 | |||
1745 | /* | ||
1746 | * need call insert after e820_reserve_resources() | ||
1747 | * that is using request_resource | ||
1748 | */ | ||
1749 | late_initcall(lapic_insert_resource); | ||
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 00e6d1370954..bf9b441331e9 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -204,6 +204,7 @@ | |||
204 | #include <linux/module.h> | 204 | #include <linux/module.h> |
205 | 205 | ||
206 | #include <linux/poll.h> | 206 | #include <linux/poll.h> |
207 | #include <linux/smp_lock.h> | ||
207 | #include <linux/types.h> | 208 | #include <linux/types.h> |
208 | #include <linux/stddef.h> | 209 | #include <linux/stddef.h> |
209 | #include <linux/timer.h> | 210 | #include <linux/timer.h> |
@@ -1212,9 +1213,9 @@ static int suspend(int vetoable) | |||
1212 | if (err != APM_SUCCESS) | 1213 | if (err != APM_SUCCESS) |
1213 | apm_error("suspend", err); | 1214 | apm_error("suspend", err); |
1214 | err = (err == APM_SUCCESS) ? 0 : -EIO; | 1215 | err = (err == APM_SUCCESS) ? 0 : -EIO; |
1215 | device_power_up(); | 1216 | device_power_up(PMSG_RESUME); |
1216 | local_irq_enable(); | 1217 | local_irq_enable(); |
1217 | device_resume(); | 1218 | device_resume(PMSG_RESUME); |
1218 | queue_event(APM_NORMAL_RESUME, NULL); | 1219 | queue_event(APM_NORMAL_RESUME, NULL); |
1219 | spin_lock(&user_list_lock); | 1220 | spin_lock(&user_list_lock); |
1220 | for (as = user_list; as != NULL; as = as->next) { | 1221 | for (as = user_list; as != NULL; as = as->next) { |
@@ -1239,7 +1240,7 @@ static void standby(void) | |||
1239 | apm_error("standby", err); | 1240 | apm_error("standby", err); |
1240 | 1241 | ||
1241 | local_irq_disable(); | 1242 | local_irq_disable(); |
1242 | device_power_up(); | 1243 | device_power_up(PMSG_RESUME); |
1243 | local_irq_enable(); | 1244 | local_irq_enable(); |
1244 | } | 1245 | } |
1245 | 1246 | ||
@@ -1325,7 +1326,7 @@ static void check_events(void) | |||
1325 | ignore_bounce = 1; | 1326 | ignore_bounce = 1; |
1326 | if ((event != APM_NORMAL_RESUME) | 1327 | if ((event != APM_NORMAL_RESUME) |
1327 | || (ignore_normal_resume == 0)) { | 1328 | || (ignore_normal_resume == 0)) { |
1328 | device_resume(); | 1329 | device_resume(PMSG_RESUME); |
1329 | queue_event(event, NULL); | 1330 | queue_event(event, NULL); |
1330 | } | 1331 | } |
1331 | ignore_normal_resume = 0; | 1332 | ignore_normal_resume = 0; |
@@ -1549,10 +1550,12 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1549 | { | 1550 | { |
1550 | struct apm_user *as; | 1551 | struct apm_user *as; |
1551 | 1552 | ||
1553 | lock_kernel(); | ||
1552 | as = kmalloc(sizeof(*as), GFP_KERNEL); | 1554 | as = kmalloc(sizeof(*as), GFP_KERNEL); |
1553 | if (as == NULL) { | 1555 | if (as == NULL) { |
1554 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", | 1556 | printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", |
1555 | sizeof(*as)); | 1557 | sizeof(*as)); |
1558 | unlock_kernel(); | ||
1556 | return -ENOMEM; | 1559 | return -ENOMEM; |
1557 | } | 1560 | } |
1558 | as->magic = APM_BIOS_MAGIC; | 1561 | as->magic = APM_BIOS_MAGIC; |
@@ -1574,6 +1577,7 @@ static int do_open(struct inode *inode, struct file *filp) | |||
1574 | user_list = as; | 1577 | user_list = as; |
1575 | spin_unlock(&user_list_lock); | 1578 | spin_unlock(&user_list_lock); |
1576 | filp->private_data = as; | 1579 | filp->private_data = as; |
1580 | unlock_kernel(); | ||
1577 | return 0; | 1581 | return 0; |
1578 | } | 1582 | } |
1579 | 1583 | ||
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 3295e7c08fe7..bacf5deeec2d 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -34,7 +34,7 @@ int main(void) | |||
34 | ENTRY(pid); | 34 | ENTRY(pid); |
35 | BLANK(); | 35 | BLANK(); |
36 | #undef ENTRY | 36 | #undef ENTRY |
37 | #define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry)) | 37 | #define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry)) |
38 | ENTRY(flags); | 38 | ENTRY(flags); |
39 | ENTRY(addr_limit); | 39 | ENTRY(addr_limit); |
40 | ENTRY(preempt_count); | 40 | ENTRY(preempt_count); |
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 958526d6a74a..7c36fb8a28d4 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c | |||
@@ -199,10 +199,15 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
199 | * Don't do it for gbpages because there seems very little | 199 | * Don't do it for gbpages because there seems very little |
200 | * benefit in doing so. | 200 | * benefit in doing so. |
201 | */ | 201 | */ |
202 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && | 202 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { |
203 | (tseg >> PMD_SHIFT) < | 203 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); |
204 | (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) | 204 | if ((tseg>>PMD_SHIFT) < |
205 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | ||
206 | ((tseg>>PMD_SHIFT) < | ||
207 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | ||
208 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | ||
205 | set_memory_4k((unsigned long)__va(tseg), 1); | 209 | set_memory_4k((unsigned long)__va(tseg), 1); |
210 | } | ||
206 | } | 211 | } |
207 | } | 212 | } |
208 | 213 | ||
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c index 13526fd5cce1..1d181c40e2e1 100644 --- a/arch/x86/kernel/cpu/centaur_64.c +++ b/arch/x86/kernel/cpu/centaur_64.c | |||
@@ -10,20 +10,12 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | |||
10 | { | 10 | { |
11 | if (c->x86 == 0x6 && c->x86_model >= 0xf) | 11 | if (c->x86 == 0x6 && c->x86_model >= 0xf) |
12 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 12 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
13 | |||
14 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); | ||
13 | } | 15 | } |
14 | 16 | ||
15 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | 17 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) |
16 | { | 18 | { |
17 | /* Cache sizes */ | ||
18 | unsigned n; | ||
19 | |||
20 | n = c->extended_cpuid_level; | ||
21 | if (n >= 0x80000008) { | ||
22 | unsigned eax = cpuid_eax(0x80000008); | ||
23 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
24 | c->x86_phys_bits = eax & 0xff; | ||
25 | } | ||
26 | |||
27 | if (c->x86 == 0x6 && c->x86_model >= 0xf) { | 19 | if (c->x86 == 0x6 && c->x86_model >= 0xf) { |
28 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 20 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
29 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 21 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 751850235291..7b8cc72feb40 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c | |||
@@ -98,7 +98,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c) | |||
98 | 98 | ||
99 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | 99 | void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) |
100 | { | 100 | { |
101 | unsigned int n, dummy, eax, ebx, ecx, edx; | 101 | unsigned int n, dummy, ebx, ecx, edx; |
102 | 102 | ||
103 | n = c->extended_cpuid_level; | 103 | n = c->extended_cpuid_level; |
104 | 104 | ||
@@ -121,11 +121,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) | |||
121 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", | 121 | printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", |
122 | c->x86_cache_size, ecx & 0xFF); | 122 | c->x86_cache_size, ecx & 0xFF); |
123 | } | 123 | } |
124 | if (n >= 0x80000008) { | ||
125 | cpuid(0x80000008, &eax, &dummy, &dummy, &dummy); | ||
126 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
127 | c->x86_phys_bits = eax & 0xff; | ||
128 | } | ||
129 | } | 124 | } |
130 | 125 | ||
131 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) | 126 | void __cpuinit detect_ht(struct cpuinfo_x86 *c) |
@@ -314,6 +309,16 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
314 | if (c->extended_cpuid_level >= 0x80000007) | 309 | if (c->extended_cpuid_level >= 0x80000007) |
315 | c->x86_power = cpuid_edx(0x80000007); | 310 | c->x86_power = cpuid_edx(0x80000007); |
316 | 311 | ||
312 | if (c->extended_cpuid_level >= 0x80000008) { | ||
313 | u32 eax = cpuid_eax(0x80000008); | ||
314 | |||
315 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
316 | c->x86_phys_bits = eax & 0xff; | ||
317 | } | ||
318 | |||
319 | /* Assume all 64-bit CPUs support 32-bit syscall */ | ||
320 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
321 | |||
317 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | 322 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && |
318 | cpu_devs[c->x86_vendor]->c_early_init) | 323 | cpu_devs[c->x86_vendor]->c_early_init) |
319 | cpu_devs[c->x86_vendor]->c_early_init(c); | 324 | cpu_devs[c->x86_vendor]->c_early_init(c); |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fe9224c51d37..70609efdf1da 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
226 | 226 | ||
227 | if (cpu_has_bts) | 227 | if (cpu_has_bts) |
228 | ds_init_intel(c); | 228 | ds_init_intel(c); |
229 | |||
230 | #ifdef CONFIG_X86_NUMAQ | ||
231 | numaq_tsc_disable(); | ||
232 | #endif | ||
229 | } | 233 | } |
230 | 234 | ||
231 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) | 235 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) |
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c index fcb1cc9d75ca..1019c58d39f0 100644 --- a/arch/x86/kernel/cpu/intel_64.c +++ b/arch/x86/kernel/cpu/intel_64.c | |||
@@ -12,6 +12,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
12 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 12 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
13 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | 13 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) |
14 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 14 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
15 | |||
16 | set_cpu_cap(c, X86_FEATURE_SYSENTER32); | ||
15 | } | 17 | } |
16 | 18 | ||
17 | /* | 19 | /* |
@@ -52,9 +54,6 @@ static void __cpuinit srat_detect_node(void) | |||
52 | 54 | ||
53 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) | 55 | static void __cpuinit init_intel(struct cpuinfo_x86 *c) |
54 | { | 56 | { |
55 | /* Cache sizes */ | ||
56 | unsigned n; | ||
57 | |||
58 | init_intel_cacheinfo(c); | 57 | init_intel_cacheinfo(c); |
59 | if (c->cpuid_level > 9) { | 58 | if (c->cpuid_level > 9) { |
60 | unsigned eax = cpuid_eax(10); | 59 | unsigned eax = cpuid_eax(10); |
@@ -76,13 +75,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
76 | if (cpu_has_bts) | 75 | if (cpu_has_bts) |
77 | ds_init_intel(c); | 76 | ds_init_intel(c); |
78 | 77 | ||
79 | n = c->extended_cpuid_level; | ||
80 | if (n >= 0x80000008) { | ||
81 | unsigned eax = cpuid_eax(0x80000008); | ||
82 | c->x86_virt_bits = (eax >> 8) & 0xff; | ||
83 | c->x86_phys_bits = eax & 0xff; | ||
84 | } | ||
85 | |||
86 | if (c->x86 == 15) | 78 | if (c->x86 == 15) |
87 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 79 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
88 | if (c->x86 == 6) | 80 | if (c->x86 == 6) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index 501ca1cea27d..c4a7ec31394c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/types.h> | 9 | #include <linux/types.h> |
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | #include <linux/smp_lock.h> | ||
12 | #include <linux/string.h> | 13 | #include <linux/string.h> |
13 | #include <linux/rcupdate.h> | 14 | #include <linux/rcupdate.h> |
14 | #include <linux/kallsyms.h> | 15 | #include <linux/kallsyms.h> |
@@ -363,7 +364,7 @@ static void mcheck_check_cpu(void *info) | |||
363 | 364 | ||
364 | static void mcheck_timer(struct work_struct *work) | 365 | static void mcheck_timer(struct work_struct *work) |
365 | { | 366 | { |
366 | on_each_cpu(mcheck_check_cpu, NULL, 1, 1); | 367 | on_each_cpu(mcheck_check_cpu, NULL, 1); |
367 | 368 | ||
368 | /* | 369 | /* |
369 | * Alert userspace if needed. If we logged an MCE, reduce the | 370 | * Alert userspace if needed. If we logged an MCE, reduce the |
@@ -532,10 +533,12 @@ static int open_exclu; /* already open exclusive? */ | |||
532 | 533 | ||
533 | static int mce_open(struct inode *inode, struct file *file) | 534 | static int mce_open(struct inode *inode, struct file *file) |
534 | { | 535 | { |
536 | lock_kernel(); | ||
535 | spin_lock(&mce_state_lock); | 537 | spin_lock(&mce_state_lock); |
536 | 538 | ||
537 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | 539 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { |
538 | spin_unlock(&mce_state_lock); | 540 | spin_unlock(&mce_state_lock); |
541 | unlock_kernel(); | ||
539 | return -EBUSY; | 542 | return -EBUSY; |
540 | } | 543 | } |
541 | 544 | ||
@@ -544,6 +547,7 @@ static int mce_open(struct inode *inode, struct file *file) | |||
544 | open_count++; | 547 | open_count++; |
545 | 548 | ||
546 | spin_unlock(&mce_state_lock); | 549 | spin_unlock(&mce_state_lock); |
550 | unlock_kernel(); | ||
547 | 551 | ||
548 | return nonseekable_open(inode, file); | 552 | return nonseekable_open(inode, file); |
549 | } | 553 | } |
@@ -617,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
617 | * Collect entries that were still getting written before the | 621 | * Collect entries that were still getting written before the |
618 | * synchronize. | 622 | * synchronize. |
619 | */ | 623 | */ |
620 | on_each_cpu(collect_tscs, cpu_tsc, 1, 1); | 624 | on_each_cpu(collect_tscs, cpu_tsc, 1); |
621 | for (i = next; i < MCE_LOG_LEN; i++) { | 625 | for (i = next; i < MCE_LOG_LEN; i++) { |
622 | if (mcelog.entry[i].finished && | 626 | if (mcelog.entry[i].finished && |
623 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | 627 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { |
@@ -742,7 +746,7 @@ static void mce_restart(void) | |||
742 | if (next_interval) | 746 | if (next_interval) |
743 | cancel_delayed_work(&mcheck_work); | 747 | cancel_delayed_work(&mcheck_work); |
744 | /* Timer race is harmless here */ | 748 | /* Timer race is harmless here */ |
745 | on_each_cpu(mce_init, NULL, 1, 1); | 749 | on_each_cpu(mce_init, NULL, 1); |
746 | next_interval = check_interval * HZ; | 750 | next_interval = check_interval * HZ; |
747 | if (next_interval) | 751 | if (next_interval) |
748 | schedule_delayed_work(&mcheck_work, | 752 | schedule_delayed_work(&mcheck_work, |
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index 00ccb6c14ec2..cc1fccdd31e0 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); | |||
59 | 59 | ||
60 | static void mce_work_fn(struct work_struct *work) | 60 | static void mce_work_fn(struct work_struct *work) |
61 | { | 61 | { |
62 | on_each_cpu(mce_checkregs, NULL, 1, 1); | 62 | on_each_cpu(mce_checkregs, NULL, 1); |
63 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | 63 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); |
64 | } | 64 | } |
65 | 65 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 105afe12beb0..6f23969c8faf 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -223,7 +223,7 @@ static void set_mtrr(unsigned int reg, unsigned long base, | |||
223 | atomic_set(&data.gate,0); | 223 | atomic_set(&data.gate,0); |
224 | 224 | ||
225 | /* Start the ball rolling on other CPUs */ | 225 | /* Start the ball rolling on other CPUs */ |
226 | if (smp_call_function(ipi_handler, &data, 1, 0) != 0) | 226 | if (smp_call_function(ipi_handler, &data, 0) != 0) |
227 | panic("mtrr: timed out waiting for other CPUs\n"); | 227 | panic("mtrr: timed out waiting for other CPUs\n"); |
228 | 228 | ||
229 | local_irq_save(flags); | 229 | local_irq_save(flags); |
@@ -1682,7 +1682,7 @@ void mtrr_ap_init(void) | |||
1682 | */ | 1682 | */ |
1683 | void mtrr_save_state(void) | 1683 | void mtrr_save_state(void) |
1684 | { | 1684 | { |
1685 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); | 1685 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); |
1686 | } | 1686 | } |
1687 | 1687 | ||
1688 | static int __init mtrr_init_finialize(void) | 1688 | static int __init mtrr_init_finialize(void) |
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 2e9bef6e3aa3..6d4bdc02388a 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -189,7 +189,7 @@ void disable_lapic_nmi_watchdog(void) | |||
189 | if (atomic_read(&nmi_active) <= 0) | 189 | if (atomic_read(&nmi_active) <= 0) |
190 | return; | 190 | return; |
191 | 191 | ||
192 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); | 192 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); |
193 | 193 | ||
194 | if (wd_ops) | 194 | if (wd_ops) |
195 | wd_ops->unreserve(); | 195 | wd_ops->unreserve(); |
@@ -213,7 +213,7 @@ void enable_lapic_nmi_watchdog(void) | |||
213 | return; | 213 | return; |
214 | } | 214 | } |
215 | 215 | ||
216 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | 216 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 1); |
217 | touch_nmi_watchdog(); | 217 | touch_nmi_watchdog(); |
218 | } | 218 | } |
219 | 219 | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index daff52a62248..2de5fa2bbf77 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/init.h> | 33 | #include <linux/init.h> |
34 | #include <linux/poll.h> | 34 | #include <linux/poll.h> |
35 | #include <linux/smp.h> | 35 | #include <linux/smp.h> |
36 | #include <linux/smp_lock.h> | ||
36 | #include <linux/major.h> | 37 | #include <linux/major.h> |
37 | #include <linux/fs.h> | 38 | #include <linux/fs.h> |
38 | #include <linux/smp_lock.h> | 39 | #include <linux/smp_lock.h> |
@@ -95,7 +96,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
95 | for (; count; count -= 16) { | 96 | for (; count; count -= 16) { |
96 | cmd.eax = pos; | 97 | cmd.eax = pos; |
97 | cmd.ecx = pos >> 32; | 98 | cmd.ecx = pos >> 32; |
98 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); | 99 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); |
99 | if (copy_to_user(tmp, &cmd, 16)) | 100 | if (copy_to_user(tmp, &cmd, 16)) |
100 | return -EFAULT; | 101 | return -EFAULT; |
101 | tmp += 16; | 102 | tmp += 16; |
@@ -107,15 +108,23 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
107 | 108 | ||
108 | static int cpuid_open(struct inode *inode, struct file *file) | 109 | static int cpuid_open(struct inode *inode, struct file *file) |
109 | { | 110 | { |
110 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); | 111 | unsigned int cpu; |
111 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 112 | struct cpuinfo_x86 *c; |
112 | 113 | int ret = 0; | |
113 | if (cpu >= NR_CPUS || !cpu_online(cpu)) | 114 | |
114 | return -ENXIO; /* No such CPU */ | 115 | lock_kernel(); |
116 | |||
117 | cpu = iminor(file->f_path.dentry->d_inode); | ||
118 | if (cpu >= NR_CPUS || !cpu_online(cpu)) { | ||
119 | ret = -ENXIO; /* No such CPU */ | ||
120 | goto out; | ||
121 | } | ||
122 | c = &cpu_data(cpu); | ||
115 | if (c->cpuid_level < 0) | 123 | if (c->cpuid_level < 0) |
116 | return -EIO; /* CPUID not supported */ | 124 | ret = -EIO; /* CPUID not supported */ |
117 | 125 | out: | |
118 | return 0; | 126 | unlock_kernel(); |
127 | return ret; | ||
119 | } | 128 | } |
120 | 129 | ||
121 | /* | 130 | /* |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index d0335853ff52..28c29180b380 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include <linux/mm.h> | 19 | #include <linux/mm.h> |
20 | #include <linux/pfn.h> | 20 | #include <linux/pfn.h> |
21 | #include <linux/suspend.h> | 21 | #include <linux/suspend.h> |
22 | #include <linux/firmware-map.h> | ||
22 | 23 | ||
23 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
24 | #include <asm/page.h> | 25 | #include <asm/page.h> |
@@ -27,7 +28,22 @@ | |||
27 | #include <asm/setup.h> | 28 | #include <asm/setup.h> |
28 | #include <asm/trampoline.h> | 29 | #include <asm/trampoline.h> |
29 | 30 | ||
31 | /* | ||
32 | * The e820 map is the map that gets modified e.g. with command line parameters | ||
33 | * and that is also registered with modifications in the kernel resource tree | ||
34 | * with the iomem_resource as parent. | ||
35 | * | ||
36 | * The e820_saved is directly saved after the BIOS-provided memory map is | ||
37 | * copied. It doesn't get modified afterwards. It's registered for the | ||
38 | * /sys/firmware/memmap interface. | ||
39 | * | ||
40 | * That memory map is not modified and is used as base for kexec. The kexec'd | ||
41 | * kernel should get the same memory map as the firmware provides. Then the | ||
42 | * user can e.g. boot the original kernel with mem=1G while still booting the | ||
43 | * next kernel with full memory. | ||
44 | */ | ||
30 | struct e820map e820; | 45 | struct e820map e820; |
46 | struct e820map e820_saved; | ||
31 | 47 | ||
32 | /* For PCI or other memory-mapped resources */ | 48 | /* For PCI or other memory-mapped resources */ |
33 | unsigned long pci_mem_start = 0xaeedbabe; | 49 | unsigned long pci_mem_start = 0xaeedbabe; |
@@ -398,8 +414,9 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map) | |||
398 | return __append_e820_map(biosmap, nr_map); | 414 | return __append_e820_map(biosmap, nr_map); |
399 | } | 415 | } |
400 | 416 | ||
401 | u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, | 417 | static u64 __init e820_update_range_map(struct e820map *e820x, u64 start, |
402 | unsigned new_type) | 418 | u64 size, unsigned old_type, |
419 | unsigned new_type) | ||
403 | { | 420 | { |
404 | int i; | 421 | int i; |
405 | u64 real_updated_size = 0; | 422 | u64 real_updated_size = 0; |
@@ -410,7 +427,7 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, | |||
410 | size = ULLONG_MAX - start; | 427 | size = ULLONG_MAX - start; |
411 | 428 | ||
412 | for (i = 0; i < e820.nr_map; i++) { | 429 | for (i = 0; i < e820.nr_map; i++) { |
413 | struct e820entry *ei = &e820.map[i]; | 430 | struct e820entry *ei = &e820x->map[i]; |
414 | u64 final_start, final_end; | 431 | u64 final_start, final_end; |
415 | if (ei->type != old_type) | 432 | if (ei->type != old_type) |
416 | continue; | 433 | continue; |
@@ -438,6 +455,19 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, | |||
438 | return real_updated_size; | 455 | return real_updated_size; |
439 | } | 456 | } |
440 | 457 | ||
458 | u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, | ||
459 | unsigned new_type) | ||
460 | { | ||
461 | return e820_update_range_map(&e820, start, size, old_type, new_type); | ||
462 | } | ||
463 | |||
464 | static u64 __init e820_update_range_saved(u64 start, u64 size, | ||
465 | unsigned old_type, unsigned new_type) | ||
466 | { | ||
467 | return e820_update_range_map(&e820_saved, start, size, old_type, | ||
468 | new_type); | ||
469 | } | ||
470 | |||
441 | /* make e820 not cover the range */ | 471 | /* make e820 not cover the range */ |
442 | u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, | 472 | u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, |
443 | int checktype) | 473 | int checktype) |
@@ -487,6 +517,15 @@ void __init update_e820(void) | |||
487 | printk(KERN_INFO "modified physical RAM map:\n"); | 517 | printk(KERN_INFO "modified physical RAM map:\n"); |
488 | e820_print_map("modified"); | 518 | e820_print_map("modified"); |
489 | } | 519 | } |
520 | static void __init update_e820_saved(void) | ||
521 | { | ||
522 | int nr_map; | ||
523 | |||
524 | nr_map = e820_saved.nr_map; | ||
525 | if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map)) | ||
526 | return; | ||
527 | e820_saved.nr_map = nr_map; | ||
528 | } | ||
490 | #define MAX_GAP_END 0x100000000ull | 529 | #define MAX_GAP_END 0x100000000ull |
491 | /* | 530 | /* |
492 | * Search for a gap in the e820 memory space from start_addr to end_addr. | 531 | * Search for a gap in the e820 memory space from start_addr to end_addr. |
@@ -991,8 +1030,10 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |||
991 | 1030 | ||
992 | addr = round_down(start + size - sizet, align); | 1031 | addr = round_down(start + size - sizet, align); |
993 | e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); | 1032 | e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); |
1033 | e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED); | ||
994 | printk(KERN_INFO "update e820 for early_reserve_e820\n"); | 1034 | printk(KERN_INFO "update e820 for early_reserve_e820\n"); |
995 | update_e820(); | 1035 | update_e820(); |
1036 | update_e820_saved(); | ||
996 | 1037 | ||
997 | return addr; | 1038 | return addr; |
998 | } | 1039 | } |
@@ -1008,30 +1049,51 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align) | |||
1008 | #endif | 1049 | #endif |
1009 | 1050 | ||
1010 | /* | 1051 | /* |
1011 | * Last pfn which the user wants to use. | ||
1012 | */ | ||
1013 | unsigned long __initdata end_user_pfn = MAX_ARCH_PFN; | ||
1014 | |||
1015 | /* | ||
1016 | * Find the highest page frame number we have available | 1052 | * Find the highest page frame number we have available |
1017 | */ | 1053 | */ |
1018 | unsigned long __init e820_end_of_ram(void) | 1054 | static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type) |
1019 | { | 1055 | { |
1020 | unsigned long last_pfn; | 1056 | int i; |
1057 | unsigned long last_pfn = 0; | ||
1021 | unsigned long max_arch_pfn = MAX_ARCH_PFN; | 1058 | unsigned long max_arch_pfn = MAX_ARCH_PFN; |
1022 | 1059 | ||
1023 | last_pfn = find_max_pfn_with_active_regions(); | 1060 | for (i = 0; i < e820.nr_map; i++) { |
1061 | struct e820entry *ei = &e820.map[i]; | ||
1062 | unsigned long start_pfn; | ||
1063 | unsigned long end_pfn; | ||
1064 | |||
1065 | if (ei->type != type) | ||
1066 | continue; | ||
1067 | |||
1068 | start_pfn = ei->addr >> PAGE_SHIFT; | ||
1069 | end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT; | ||
1070 | |||
1071 | if (start_pfn >= limit_pfn) | ||
1072 | continue; | ||
1073 | if (end_pfn > limit_pfn) { | ||
1074 | last_pfn = limit_pfn; | ||
1075 | break; | ||
1076 | } | ||
1077 | if (end_pfn > last_pfn) | ||
1078 | last_pfn = end_pfn; | ||
1079 | } | ||
1024 | 1080 | ||
1025 | if (last_pfn > max_arch_pfn) | 1081 | if (last_pfn > max_arch_pfn) |
1026 | last_pfn = max_arch_pfn; | 1082 | last_pfn = max_arch_pfn; |
1027 | if (last_pfn > end_user_pfn) | ||
1028 | last_pfn = end_user_pfn; | ||
1029 | 1083 | ||
1030 | printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", | 1084 | printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", |
1031 | last_pfn, max_arch_pfn); | 1085 | last_pfn, max_arch_pfn); |
1032 | return last_pfn; | 1086 | return last_pfn; |
1033 | } | 1087 | } |
1088 | unsigned long __init e820_end_of_ram_pfn(void) | ||
1089 | { | ||
1090 | return e820_end_pfn(MAX_ARCH_PFN, E820_RAM); | ||
1091 | } | ||
1034 | 1092 | ||
1093 | unsigned long __init e820_end_of_low_ram_pfn(void) | ||
1094 | { | ||
1095 | return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); | ||
1096 | } | ||
1035 | /* | 1097 | /* |
1036 | * Finds an active region in the address range from start_pfn to last_pfn and | 1098 | * Finds an active region in the address range from start_pfn to last_pfn and |
1037 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. | 1099 | * returns its range in ei_startpfn and ei_endpfn for the e820 entry. |
@@ -1062,12 +1124,6 @@ int __init e820_find_active_region(const struct e820entry *ei, | |||
1062 | if (*ei_endpfn > last_pfn) | 1124 | if (*ei_endpfn > last_pfn) |
1063 | *ei_endpfn = last_pfn; | 1125 | *ei_endpfn = last_pfn; |
1064 | 1126 | ||
1065 | /* Obey end_user_pfn to save on memmap */ | ||
1066 | if (*ei_startpfn >= end_user_pfn) | ||
1067 | return 0; | ||
1068 | if (*ei_endpfn > end_user_pfn) | ||
1069 | *ei_endpfn = end_user_pfn; | ||
1070 | |||
1071 | return 1; | 1127 | return 1; |
1072 | } | 1128 | } |
1073 | 1129 | ||
@@ -1113,6 +1169,8 @@ static void early_panic(char *msg) | |||
1113 | panic(msg); | 1169 | panic(msg); |
1114 | } | 1170 | } |
1115 | 1171 | ||
1172 | static int userdef __initdata; | ||
1173 | |||
1116 | /* "mem=nopentium" disables the 4MB page tables. */ | 1174 | /* "mem=nopentium" disables the 4MB page tables. */ |
1117 | static int __init parse_memopt(char *p) | 1175 | static int __init parse_memopt(char *p) |
1118 | { | 1176 | { |
@@ -1128,22 +1186,22 @@ static int __init parse_memopt(char *p) | |||
1128 | } | 1186 | } |
1129 | #endif | 1187 | #endif |
1130 | 1188 | ||
1189 | userdef = 1; | ||
1131 | mem_size = memparse(p, &p); | 1190 | mem_size = memparse(p, &p); |
1132 | end_user_pfn = mem_size>>PAGE_SHIFT; | 1191 | e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); |
1133 | e820_update_range(mem_size, ULLONG_MAX - mem_size, | ||
1134 | E820_RAM, E820_RESERVED); | ||
1135 | 1192 | ||
1136 | return 0; | 1193 | return 0; |
1137 | } | 1194 | } |
1138 | early_param("mem", parse_memopt); | 1195 | early_param("mem", parse_memopt); |
1139 | 1196 | ||
1140 | static int userdef __initdata; | ||
1141 | |||
1142 | static int __init parse_memmap_opt(char *p) | 1197 | static int __init parse_memmap_opt(char *p) |
1143 | { | 1198 | { |
1144 | char *oldp; | 1199 | char *oldp; |
1145 | u64 start_at, mem_size; | 1200 | u64 start_at, mem_size; |
1146 | 1201 | ||
1202 | if (!p) | ||
1203 | return -EINVAL; | ||
1204 | |||
1147 | if (!strcmp(p, "exactmap")) { | 1205 | if (!strcmp(p, "exactmap")) { |
1148 | #ifdef CONFIG_CRASH_DUMP | 1206 | #ifdef CONFIG_CRASH_DUMP |
1149 | /* | 1207 | /* |
@@ -1151,9 +1209,7 @@ static int __init parse_memmap_opt(char *p) | |||
1151 | * the real mem size before original memory map is | 1209 | * the real mem size before original memory map is |
1152 | * reset. | 1210 | * reset. |
1153 | */ | 1211 | */ |
1154 | e820_register_active_regions(0, 0, -1UL); | 1212 | saved_max_pfn = e820_end_of_ram_pfn(); |
1155 | saved_max_pfn = e820_end_of_ram(); | ||
1156 | remove_all_active_ranges(); | ||
1157 | #endif | 1213 | #endif |
1158 | e820.nr_map = 0; | 1214 | e820.nr_map = 0; |
1159 | userdef = 1; | 1215 | userdef = 1; |
@@ -1175,11 +1231,9 @@ static int __init parse_memmap_opt(char *p) | |||
1175 | } else if (*p == '$') { | 1231 | } else if (*p == '$') { |
1176 | start_at = memparse(p+1, &p); | 1232 | start_at = memparse(p+1, &p); |
1177 | e820_add_region(start_at, mem_size, E820_RESERVED); | 1233 | e820_add_region(start_at, mem_size, E820_RESERVED); |
1178 | } else { | 1234 | } else |
1179 | end_user_pfn = (mem_size >> PAGE_SHIFT); | 1235 | e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1); |
1180 | e820_update_range(mem_size, ULLONG_MAX - mem_size, | 1236 | |
1181 | E820_RAM, E820_RESERVED); | ||
1182 | } | ||
1183 | return *p == '\0' ? 0 : -EINVAL; | 1237 | return *p == '\0' ? 0 : -EINVAL; |
1184 | } | 1238 | } |
1185 | early_param("memmap", parse_memmap_opt); | 1239 | early_param("memmap", parse_memmap_opt); |
@@ -1198,6 +1252,17 @@ void __init finish_e820_parsing(void) | |||
1198 | } | 1252 | } |
1199 | } | 1253 | } |
1200 | 1254 | ||
1255 | static inline const char *e820_type_to_string(int e820_type) | ||
1256 | { | ||
1257 | switch (e820_type) { | ||
1258 | case E820_RESERVED_KERN: | ||
1259 | case E820_RAM: return "System RAM"; | ||
1260 | case E820_ACPI: return "ACPI Tables"; | ||
1261 | case E820_NVS: return "ACPI Non-volatile Storage"; | ||
1262 | default: return "reserved"; | ||
1263 | } | ||
1264 | } | ||
1265 | |||
1201 | /* | 1266 | /* |
1202 | * Mark e820 reserved areas as busy for the resource manager. | 1267 | * Mark e820 reserved areas as busy for the resource manager. |
1203 | */ | 1268 | */ |
@@ -1209,13 +1274,6 @@ void __init e820_reserve_resources(void) | |||
1209 | 1274 | ||
1210 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); | 1275 | res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); |
1211 | for (i = 0; i < e820.nr_map; i++) { | 1276 | for (i = 0; i < e820.nr_map; i++) { |
1212 | switch (e820.map[i].type) { | ||
1213 | case E820_RESERVED_KERN: | ||
1214 | case E820_RAM: res->name = "System RAM"; break; | ||
1215 | case E820_ACPI: res->name = "ACPI Tables"; break; | ||
1216 | case E820_NVS: res->name = "ACPI Non-volatile Storage"; break; | ||
1217 | default: res->name = "reserved"; | ||
1218 | } | ||
1219 | end = e820.map[i].addr + e820.map[i].size - 1; | 1277 | end = e820.map[i].addr + e820.map[i].size - 1; |
1220 | #ifndef CONFIG_RESOURCES_64BIT | 1278 | #ifndef CONFIG_RESOURCES_64BIT |
1221 | if (end > 0x100000000ULL) { | 1279 | if (end > 0x100000000ULL) { |
@@ -1223,6 +1281,7 @@ void __init e820_reserve_resources(void) | |||
1223 | continue; | 1281 | continue; |
1224 | } | 1282 | } |
1225 | #endif | 1283 | #endif |
1284 | res->name = e820_type_to_string(e820.map[i].type); | ||
1226 | res->start = e820.map[i].addr; | 1285 | res->start = e820.map[i].addr; |
1227 | res->end = end; | 1286 | res->end = end; |
1228 | 1287 | ||
@@ -1230,8 +1289,20 @@ void __init e820_reserve_resources(void) | |||
1230 | insert_resource(&iomem_resource, res); | 1289 | insert_resource(&iomem_resource, res); |
1231 | res++; | 1290 | res++; |
1232 | } | 1291 | } |
1292 | |||
1293 | for (i = 0; i < e820_saved.nr_map; i++) { | ||
1294 | struct e820entry *entry = &e820_saved.map[i]; | ||
1295 | firmware_map_add_early(entry->addr, | ||
1296 | entry->addr + entry->size - 1, | ||
1297 | e820_type_to_string(entry->type)); | ||
1298 | } | ||
1233 | } | 1299 | } |
1234 | 1300 | ||
1301 | /* | ||
1302 | * Non-standard memory setup can be specified via this quirk: | ||
1303 | */ | ||
1304 | char * (*arch_memory_setup_quirk)(void); | ||
1305 | |||
1235 | char *__init default_machine_specific_memory_setup(void) | 1306 | char *__init default_machine_specific_memory_setup(void) |
1236 | { | 1307 | { |
1237 | char *who = "BIOS-e820"; | 1308 | char *who = "BIOS-e820"; |
@@ -1272,6 +1343,12 @@ char *__init default_machine_specific_memory_setup(void) | |||
1272 | 1343 | ||
1273 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) | 1344 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) |
1274 | { | 1345 | { |
1346 | if (arch_memory_setup_quirk) { | ||
1347 | char *who = arch_memory_setup_quirk(); | ||
1348 | |||
1349 | if (who) | ||
1350 | return who; | ||
1351 | } | ||
1275 | return default_machine_specific_memory_setup(); | 1352 | return default_machine_specific_memory_setup(); |
1276 | } | 1353 | } |
1277 | 1354 | ||
@@ -1283,8 +1360,12 @@ char * __init __attribute__((weak)) memory_setup(void) | |||
1283 | 1360 | ||
1284 | void __init setup_memory_map(void) | 1361 | void __init setup_memory_map(void) |
1285 | { | 1362 | { |
1363 | char *who; | ||
1364 | |||
1365 | who = memory_setup(); | ||
1366 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); | ||
1286 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1367 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
1287 | e820_print_map(memory_setup()); | 1368 | e820_print_map(who); |
1288 | } | 1369 | } |
1289 | 1370 | ||
1290 | #ifdef CONFIG_X86_64 | 1371 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a4665f37cfc5..a0e11c0cc872 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -120,7 +120,18 @@ static struct chipset early_qrk[] __initdata = { | |||
120 | {} | 120 | {} |
121 | }; | 121 | }; |
122 | 122 | ||
123 | static void __init check_dev_quirk(int num, int slot, int func) | 123 | /** |
124 | * check_dev_quirk - apply early quirks to a given PCI device | ||
125 | * @num: bus number | ||
126 | * @slot: slot number | ||
127 | * @func: PCI function | ||
128 | * | ||
129 | * Check the vendor & device ID against the early quirks table. | ||
130 | * | ||
131 | * If the device is single function, let early_quirks() know so we don't | ||
132 | * poke at this device again. | ||
133 | */ | ||
134 | static int __init check_dev_quirk(int num, int slot, int func) | ||
124 | { | 135 | { |
125 | u16 class; | 136 | u16 class; |
126 | u16 vendor; | 137 | u16 vendor; |
@@ -131,7 +142,7 @@ static void __init check_dev_quirk(int num, int slot, int func) | |||
131 | class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); | 142 | class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); |
132 | 143 | ||
133 | if (class == 0xffff) | 144 | if (class == 0xffff) |
134 | return; | 145 | return -1; /* no class, treat as single function */ |
135 | 146 | ||
136 | vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); | 147 | vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); |
137 | 148 | ||
@@ -154,7 +165,9 @@ static void __init check_dev_quirk(int num, int slot, int func) | |||
154 | type = read_pci_config_byte(num, slot, func, | 165 | type = read_pci_config_byte(num, slot, func, |
155 | PCI_HEADER_TYPE); | 166 | PCI_HEADER_TYPE); |
156 | if (!(type & 0x80)) | 167 | if (!(type & 0x80)) |
157 | return; | 168 | return -1; |
169 | |||
170 | return 0; | ||
158 | } | 171 | } |
159 | 172 | ||
160 | void __init early_quirks(void) | 173 | void __init early_quirks(void) |
@@ -167,6 +180,9 @@ void __init early_quirks(void) | |||
167 | /* Poor man's PCI discovery */ | 180 | /* Poor man's PCI discovery */ |
168 | for (num = 0; num < 32; num++) | 181 | for (num = 0; num < 32; num++) |
169 | for (slot = 0; slot < 32; slot++) | 182 | for (slot = 0; slot < 32; slot++) |
170 | for (func = 0; func < 8; func++) | 183 | for (func = 0; func < 8; func++) { |
171 | check_dev_quirk(num, slot, func); | 184 | /* Only probe function 0 on single fn devices */ |
185 | if (check_dev_quirk(num, slot, func)) | ||
186 | break; | ||
187 | } | ||
172 | } | 188 | } |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 643fd861b724..ff9e7350da54 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -196,7 +196,7 @@ static struct console simnow_console = { | |||
196 | static struct console *early_console = &early_vga_console; | 196 | static struct console *early_console = &early_vga_console; |
197 | static int early_console_initialized; | 197 | static int early_console_initialized; |
198 | 198 | ||
199 | void early_printk(const char *fmt, ...) | 199 | asmlinkage void early_printk(const char *fmt, ...) |
200 | { | 200 | { |
201 | char buf[512]; | 201 | char buf[512]; |
202 | int n; | 202 | int n; |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 94382faeadb6..06cc8d4254b1 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -473,7 +473,7 @@ void __init efi_enter_virtual_mode(void) | |||
473 | size = md->num_pages << EFI_PAGE_SHIFT; | 473 | size = md->num_pages << EFI_PAGE_SHIFT; |
474 | end = md->phys_addr + size; | 474 | end = md->phys_addr + size; |
475 | 475 | ||
476 | if (PFN_UP(end) <= max_pfn_mapped) | 476 | if (PFN_UP(end) <= max_low_pfn_mapped) |
477 | va = __va(md->phys_addr); | 477 | va = __va(md->phys_addr); |
478 | else | 478 | else |
479 | va = efi_ioremap(md->phys_addr, size); | 479 | va = efi_ioremap(md->phys_addr, size); |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 53393c306e11..6bc07f0f1202 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/percpu.h> | 51 | #include <asm/percpu.h> |
52 | #include <asm/dwarf2.h> | 52 | #include <asm/dwarf2.h> |
53 | #include <asm/processor-flags.h> | 53 | #include <asm/processor-flags.h> |
54 | #include <asm/ftrace.h> | ||
54 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
55 | 56 | ||
56 | /* | 57 | /* |
@@ -1024,6 +1025,7 @@ ENTRY(xen_sysenter_target) | |||
1024 | RING0_INT_FRAME | 1025 | RING0_INT_FRAME |
1025 | addl $5*4, %esp /* remove xen-provided frame */ | 1026 | addl $5*4, %esp /* remove xen-provided frame */ |
1026 | jmp sysenter_past_esp | 1027 | jmp sysenter_past_esp |
1028 | CFI_ENDPROC | ||
1027 | 1029 | ||
1028 | ENTRY(xen_hypervisor_callback) | 1030 | ENTRY(xen_hypervisor_callback) |
1029 | CFI_STARTPROC | 1031 | CFI_STARTPROC |
@@ -1110,6 +1112,77 @@ ENDPROC(xen_failsafe_callback) | |||
1110 | 1112 | ||
1111 | #endif /* CONFIG_XEN */ | 1113 | #endif /* CONFIG_XEN */ |
1112 | 1114 | ||
1115 | #ifdef CONFIG_FTRACE | ||
1116 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
1117 | |||
1118 | ENTRY(mcount) | ||
1119 | pushl %eax | ||
1120 | pushl %ecx | ||
1121 | pushl %edx | ||
1122 | movl 0xc(%esp), %eax | ||
1123 | subl $MCOUNT_INSN_SIZE, %eax | ||
1124 | |||
1125 | .globl mcount_call | ||
1126 | mcount_call: | ||
1127 | call ftrace_stub | ||
1128 | |||
1129 | popl %edx | ||
1130 | popl %ecx | ||
1131 | popl %eax | ||
1132 | |||
1133 | ret | ||
1134 | END(mcount) | ||
1135 | |||
1136 | ENTRY(ftrace_caller) | ||
1137 | pushl %eax | ||
1138 | pushl %ecx | ||
1139 | pushl %edx | ||
1140 | movl 0xc(%esp), %eax | ||
1141 | movl 0x4(%ebp), %edx | ||
1142 | subl $MCOUNT_INSN_SIZE, %eax | ||
1143 | |||
1144 | .globl ftrace_call | ||
1145 | ftrace_call: | ||
1146 | call ftrace_stub | ||
1147 | |||
1148 | popl %edx | ||
1149 | popl %ecx | ||
1150 | popl %eax | ||
1151 | |||
1152 | .globl ftrace_stub | ||
1153 | ftrace_stub: | ||
1154 | ret | ||
1155 | END(ftrace_caller) | ||
1156 | |||
1157 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | ||
1158 | |||
1159 | ENTRY(mcount) | ||
1160 | cmpl $ftrace_stub, ftrace_trace_function | ||
1161 | jnz trace | ||
1162 | .globl ftrace_stub | ||
1163 | ftrace_stub: | ||
1164 | ret | ||
1165 | |||
1166 | /* taken from glibc */ | ||
1167 | trace: | ||
1168 | pushl %eax | ||
1169 | pushl %ecx | ||
1170 | pushl %edx | ||
1171 | movl 0xc(%esp), %eax | ||
1172 | movl 0x4(%ebp), %edx | ||
1173 | subl $MCOUNT_INSN_SIZE, %eax | ||
1174 | |||
1175 | call *ftrace_trace_function | ||
1176 | |||
1177 | popl %edx | ||
1178 | popl %ecx | ||
1179 | popl %eax | ||
1180 | |||
1181 | jmp ftrace_stub | ||
1182 | END(mcount) | ||
1183 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
1184 | #endif /* CONFIG_FTRACE */ | ||
1185 | |||
1113 | .section .rodata,"a" | 1186 | .section .rodata,"a" |
1114 | #include "syscall_table_32.S" | 1187 | #include "syscall_table_32.S" |
1115 | 1188 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 07d69f262337..ae63e584c340 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -51,9 +51,115 @@ | |||
51 | #include <asm/page.h> | 51 | #include <asm/page.h> |
52 | #include <asm/irqflags.h> | 52 | #include <asm/irqflags.h> |
53 | #include <asm/paravirt.h> | 53 | #include <asm/paravirt.h> |
54 | #include <asm/ftrace.h> | ||
54 | 55 | ||
55 | .code64 | 56 | .code64 |
56 | 57 | ||
58 | #ifdef CONFIG_FTRACE | ||
59 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
60 | ENTRY(mcount) | ||
61 | |||
62 | subq $0x38, %rsp | ||
63 | movq %rax, (%rsp) | ||
64 | movq %rcx, 8(%rsp) | ||
65 | movq %rdx, 16(%rsp) | ||
66 | movq %rsi, 24(%rsp) | ||
67 | movq %rdi, 32(%rsp) | ||
68 | movq %r8, 40(%rsp) | ||
69 | movq %r9, 48(%rsp) | ||
70 | |||
71 | movq 0x38(%rsp), %rdi | ||
72 | subq $MCOUNT_INSN_SIZE, %rdi | ||
73 | |||
74 | .globl mcount_call | ||
75 | mcount_call: | ||
76 | call ftrace_stub | ||
77 | |||
78 | movq 48(%rsp), %r9 | ||
79 | movq 40(%rsp), %r8 | ||
80 | movq 32(%rsp), %rdi | ||
81 | movq 24(%rsp), %rsi | ||
82 | movq 16(%rsp), %rdx | ||
83 | movq 8(%rsp), %rcx | ||
84 | movq (%rsp), %rax | ||
85 | addq $0x38, %rsp | ||
86 | |||
87 | retq | ||
88 | END(mcount) | ||
89 | |||
90 | ENTRY(ftrace_caller) | ||
91 | |||
92 | /* taken from glibc */ | ||
93 | subq $0x38, %rsp | ||
94 | movq %rax, (%rsp) | ||
95 | movq %rcx, 8(%rsp) | ||
96 | movq %rdx, 16(%rsp) | ||
97 | movq %rsi, 24(%rsp) | ||
98 | movq %rdi, 32(%rsp) | ||
99 | movq %r8, 40(%rsp) | ||
100 | movq %r9, 48(%rsp) | ||
101 | |||
102 | movq 0x38(%rsp), %rdi | ||
103 | movq 8(%rbp), %rsi | ||
104 | subq $MCOUNT_INSN_SIZE, %rdi | ||
105 | |||
106 | .globl ftrace_call | ||
107 | ftrace_call: | ||
108 | call ftrace_stub | ||
109 | |||
110 | movq 48(%rsp), %r9 | ||
111 | movq 40(%rsp), %r8 | ||
112 | movq 32(%rsp), %rdi | ||
113 | movq 24(%rsp), %rsi | ||
114 | movq 16(%rsp), %rdx | ||
115 | movq 8(%rsp), %rcx | ||
116 | movq (%rsp), %rax | ||
117 | addq $0x38, %rsp | ||
118 | |||
119 | .globl ftrace_stub | ||
120 | ftrace_stub: | ||
121 | retq | ||
122 | END(ftrace_caller) | ||
123 | |||
124 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | ||
125 | ENTRY(mcount) | ||
126 | cmpq $ftrace_stub, ftrace_trace_function | ||
127 | jnz trace | ||
128 | .globl ftrace_stub | ||
129 | ftrace_stub: | ||
130 | retq | ||
131 | |||
132 | trace: | ||
133 | /* taken from glibc */ | ||
134 | subq $0x38, %rsp | ||
135 | movq %rax, (%rsp) | ||
136 | movq %rcx, 8(%rsp) | ||
137 | movq %rdx, 16(%rsp) | ||
138 | movq %rsi, 24(%rsp) | ||
139 | movq %rdi, 32(%rsp) | ||
140 | movq %r8, 40(%rsp) | ||
141 | movq %r9, 48(%rsp) | ||
142 | |||
143 | movq 0x38(%rsp), %rdi | ||
144 | movq 8(%rbp), %rsi | ||
145 | subq $MCOUNT_INSN_SIZE, %rdi | ||
146 | |||
147 | call *ftrace_trace_function | ||
148 | |||
149 | movq 48(%rsp), %r9 | ||
150 | movq 40(%rsp), %r8 | ||
151 | movq 32(%rsp), %rdi | ||
152 | movq 24(%rsp), %rsi | ||
153 | movq 16(%rsp), %rdx | ||
154 | movq 8(%rsp), %rcx | ||
155 | movq (%rsp), %rax | ||
156 | addq $0x38, %rsp | ||
157 | |||
158 | jmp ftrace_stub | ||
159 | END(mcount) | ||
160 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
161 | #endif /* CONFIG_FTRACE */ | ||
162 | |||
57 | #ifndef CONFIG_PREEMPT | 163 | #ifndef CONFIG_PREEMPT |
58 | #define retint_kernel retint_restore_args | 164 | #define retint_kernel retint_restore_args |
59 | #endif | 165 | #endif |
@@ -168,13 +274,13 @@ ENTRY(ret_from_fork) | |||
168 | CFI_ADJUST_CFA_OFFSET -4 | 274 | CFI_ADJUST_CFA_OFFSET -4 |
169 | call schedule_tail | 275 | call schedule_tail |
170 | GET_THREAD_INFO(%rcx) | 276 | GET_THREAD_INFO(%rcx) |
171 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) | 277 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) |
172 | jnz rff_trace | 278 | jnz rff_trace |
173 | rff_action: | 279 | rff_action: |
174 | RESTORE_REST | 280 | RESTORE_REST |
175 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? | 281 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? |
176 | je int_ret_from_sys_call | 282 | je int_ret_from_sys_call |
177 | testl $_TIF_IA32,threadinfo_flags(%rcx) | 283 | testl $_TIF_IA32,TI_flags(%rcx) |
178 | jnz int_ret_from_sys_call | 284 | jnz int_ret_from_sys_call |
179 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET | 285 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET |
180 | jmp ret_from_sys_call | 286 | jmp ret_from_sys_call |
@@ -243,7 +349,8 @@ ENTRY(system_call_after_swapgs) | |||
243 | movq %rcx,RIP-ARGOFFSET(%rsp) | 349 | movq %rcx,RIP-ARGOFFSET(%rsp) |
244 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 350 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
245 | GET_THREAD_INFO(%rcx) | 351 | GET_THREAD_INFO(%rcx) |
246 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) | 352 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ |
353 | TI_flags(%rcx) | ||
247 | jnz tracesys | 354 | jnz tracesys |
248 | cmpq $__NR_syscall_max,%rax | 355 | cmpq $__NR_syscall_max,%rax |
249 | ja badsys | 356 | ja badsys |
@@ -262,7 +369,7 @@ sysret_check: | |||
262 | GET_THREAD_INFO(%rcx) | 369 | GET_THREAD_INFO(%rcx) |
263 | DISABLE_INTERRUPTS(CLBR_NONE) | 370 | DISABLE_INTERRUPTS(CLBR_NONE) |
264 | TRACE_IRQS_OFF | 371 | TRACE_IRQS_OFF |
265 | movl threadinfo_flags(%rcx),%edx | 372 | movl TI_flags(%rcx),%edx |
266 | andl %edi,%edx | 373 | andl %edi,%edx |
267 | jnz sysret_careful | 374 | jnz sysret_careful |
268 | CFI_REMEMBER_STATE | 375 | CFI_REMEMBER_STATE |
@@ -305,7 +412,7 @@ sysret_signal: | |||
305 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 412 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
306 | xorl %esi,%esi # oldset -> arg2 | 413 | xorl %esi,%esi # oldset -> arg2 |
307 | call ptregscall_common | 414 | call ptregscall_common |
308 | 1: movl $_TIF_NEED_RESCHED,%edi | 415 | 1: movl $_TIF_WORK_MASK,%edi |
309 | /* Use IRET because user could have changed frame. This | 416 | /* Use IRET because user could have changed frame. This |
310 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 417 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
311 | DISABLE_INTERRUPTS(CLBR_NONE) | 418 | DISABLE_INTERRUPTS(CLBR_NONE) |
@@ -347,10 +454,10 @@ int_ret_from_sys_call: | |||
347 | int_with_check: | 454 | int_with_check: |
348 | LOCKDEP_SYS_EXIT_IRQ | 455 | LOCKDEP_SYS_EXIT_IRQ |
349 | GET_THREAD_INFO(%rcx) | 456 | GET_THREAD_INFO(%rcx) |
350 | movl threadinfo_flags(%rcx),%edx | 457 | movl TI_flags(%rcx),%edx |
351 | andl %edi,%edx | 458 | andl %edi,%edx |
352 | jnz int_careful | 459 | jnz int_careful |
353 | andl $~TS_COMPAT,threadinfo_status(%rcx) | 460 | andl $~TS_COMPAT,TI_status(%rcx) |
354 | jmp retint_swapgs | 461 | jmp retint_swapgs |
355 | 462 | ||
356 | /* Either reschedule or signal or syscall exit tracking needed. */ | 463 | /* Either reschedule or signal or syscall exit tracking needed. */ |
@@ -393,7 +500,7 @@ int_signal: | |||
393 | movq %rsp,%rdi # &ptregs -> arg1 | 500 | movq %rsp,%rdi # &ptregs -> arg1 |
394 | xorl %esi,%esi # oldset -> arg2 | 501 | xorl %esi,%esi # oldset -> arg2 |
395 | call do_notify_resume | 502 | call do_notify_resume |
396 | 1: movl $_TIF_NEED_RESCHED,%edi | 503 | 1: movl $_TIF_WORK_MASK,%edi |
397 | int_restore_rest: | 504 | int_restore_rest: |
398 | RESTORE_REST | 505 | RESTORE_REST |
399 | DISABLE_INTERRUPTS(CLBR_NONE) | 506 | DISABLE_INTERRUPTS(CLBR_NONE) |
@@ -558,7 +665,7 @@ retint_with_reschedule: | |||
558 | movl $_TIF_WORK_MASK,%edi | 665 | movl $_TIF_WORK_MASK,%edi |
559 | retint_check: | 666 | retint_check: |
560 | LOCKDEP_SYS_EXIT_IRQ | 667 | LOCKDEP_SYS_EXIT_IRQ |
561 | movl threadinfo_flags(%rcx),%edx | 668 | movl TI_flags(%rcx),%edx |
562 | andl %edi,%edx | 669 | andl %edi,%edx |
563 | CFI_REMEMBER_STATE | 670 | CFI_REMEMBER_STATE |
564 | jnz retint_careful | 671 | jnz retint_careful |
@@ -646,17 +753,16 @@ retint_signal: | |||
646 | RESTORE_REST | 753 | RESTORE_REST |
647 | DISABLE_INTERRUPTS(CLBR_NONE) | 754 | DISABLE_INTERRUPTS(CLBR_NONE) |
648 | TRACE_IRQS_OFF | 755 | TRACE_IRQS_OFF |
649 | movl $_TIF_NEED_RESCHED,%edi | ||
650 | GET_THREAD_INFO(%rcx) | 756 | GET_THREAD_INFO(%rcx) |
651 | jmp retint_check | 757 | jmp retint_with_reschedule |
652 | 758 | ||
653 | #ifdef CONFIG_PREEMPT | 759 | #ifdef CONFIG_PREEMPT |
654 | /* Returning to kernel space. Check if we need preemption */ | 760 | /* Returning to kernel space. Check if we need preemption */ |
655 | /* rcx: threadinfo. interrupts off. */ | 761 | /* rcx: threadinfo. interrupts off. */ |
656 | ENTRY(retint_kernel) | 762 | ENTRY(retint_kernel) |
657 | cmpl $0,threadinfo_preempt_count(%rcx) | 763 | cmpl $0,TI_preempt_count(%rcx) |
658 | jnz retint_restore_args | 764 | jnz retint_restore_args |
659 | bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) | 765 | bt $TIF_NEED_RESCHED,TI_flags(%rcx) |
660 | jnc retint_restore_args | 766 | jnc retint_restore_args |
661 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ | 767 | bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ |
662 | jnc retint_restore_args | 768 | jnc retint_restore_args |
@@ -710,6 +816,9 @@ END(invalidate_interrupt\num) | |||
710 | ENTRY(call_function_interrupt) | 816 | ENTRY(call_function_interrupt) |
711 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | 817 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt |
712 | END(call_function_interrupt) | 818 | END(call_function_interrupt) |
819 | ENTRY(call_function_single_interrupt) | ||
820 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt | ||
821 | END(call_function_single_interrupt) | ||
713 | ENTRY(irq_move_cleanup_interrupt) | 822 | ENTRY(irq_move_cleanup_interrupt) |
714 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt | 823 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt |
715 | END(irq_move_cleanup_interrupt) | 824 | END(irq_move_cleanup_interrupt) |
@@ -819,7 +928,7 @@ paranoid_restore\trace: | |||
819 | jmp irq_return | 928 | jmp irq_return |
820 | paranoid_userspace\trace: | 929 | paranoid_userspace\trace: |
821 | GET_THREAD_INFO(%rcx) | 930 | GET_THREAD_INFO(%rcx) |
822 | movl threadinfo_flags(%rcx),%ebx | 931 | movl TI_flags(%rcx),%ebx |
823 | andl $_TIF_WORK_MASK,%ebx | 932 | andl $_TIF_WORK_MASK,%ebx |
824 | jz paranoid_swapgs\trace | 933 | jz paranoid_swapgs\trace |
825 | movq %rsp,%rdi /* &pt_regs */ | 934 | movq %rsp,%rdi /* &pt_regs */ |
@@ -917,7 +1026,7 @@ error_exit: | |||
917 | testl %eax,%eax | 1026 | testl %eax,%eax |
918 | jne retint_kernel | 1027 | jne retint_kernel |
919 | LOCKDEP_SYS_EXIT_IRQ | 1028 | LOCKDEP_SYS_EXIT_IRQ |
920 | movl threadinfo_flags(%rcx),%edx | 1029 | movl TI_flags(%rcx),%edx |
921 | movl $_TIF_WORK_MASK,%edi | 1030 | movl $_TIF_WORK_MASK,%edi |
922 | andl %edi,%edx | 1031 | andl %edi,%edx |
923 | jnz retint_careful | 1032 | jnz retint_careful |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c new file mode 100644 index 000000000000..ab115cd15fdf --- /dev/null +++ b/arch/x86/kernel/ftrace.c | |||
@@ -0,0 +1,141 @@ | |||
1 | /* | ||
2 | * Code for replacing ftrace calls with jumps. | ||
3 | * | ||
4 | * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com> | ||
5 | * | ||
6 | * Thanks goes to Ingo Molnar, for suggesting the idea. | ||
7 | * Mathieu Desnoyers, for suggesting postponing the modifications. | ||
8 | * Arjan van de Ven, for keeping me straight, and explaining to me | ||
9 | * the dangers of modifying code on the run. | ||
10 | */ | ||
11 | |||
12 | #include <linux/spinlock.h> | ||
13 | #include <linux/hardirq.h> | ||
14 | #include <linux/ftrace.h> | ||
15 | #include <linux/percpu.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/list.h> | ||
18 | |||
19 | #include <asm/alternative.h> | ||
20 | #include <asm/ftrace.h> | ||
21 | |||
22 | |||
23 | /* Long is fine, even if it is only 4 bytes ;-) */ | ||
24 | static long *ftrace_nop; | ||
25 | |||
26 | union ftrace_code_union { | ||
27 | char code[MCOUNT_INSN_SIZE]; | ||
28 | struct { | ||
29 | char e8; | ||
30 | int offset; | ||
31 | } __attribute__((packed)); | ||
32 | }; | ||
33 | |||
34 | |||
35 | static int notrace ftrace_calc_offset(long ip, long addr) | ||
36 | { | ||
37 | return (int)(addr - ip); | ||
38 | } | ||
39 | |||
40 | notrace unsigned char *ftrace_nop_replace(void) | ||
41 | { | ||
42 | return (char *)ftrace_nop; | ||
43 | } | ||
44 | |||
45 | notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | ||
46 | { | ||
47 | static union ftrace_code_union calc; | ||
48 | |||
49 | calc.e8 = 0xe8; | ||
50 | calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); | ||
51 | |||
52 | /* | ||
53 | * No locking needed, this must be called via kstop_machine | ||
54 | * which in essence is like running on a uniprocessor machine. | ||
55 | */ | ||
56 | return calc.code; | ||
57 | } | ||
58 | |||
59 | notrace int | ||
60 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, | ||
61 | unsigned char *new_code) | ||
62 | { | ||
63 | unsigned replaced; | ||
64 | unsigned old = *(unsigned *)old_code; /* 4 bytes */ | ||
65 | unsigned new = *(unsigned *)new_code; /* 4 bytes */ | ||
66 | unsigned char newch = new_code[4]; | ||
67 | int faulted = 0; | ||
68 | |||
69 | /* | ||
70 | * Note: Due to modules and __init, code can | ||
71 | * disappear and change, we need to protect against faulting | ||
72 | * as well as code changing. | ||
73 | * | ||
74 | * No real locking needed, this code is run through | ||
75 | * kstop_machine. | ||
76 | */ | ||
77 | asm volatile ( | ||
78 | "1: lock\n" | ||
79 | " cmpxchg %3, (%2)\n" | ||
80 | " jnz 2f\n" | ||
81 | " movb %b4, 4(%2)\n" | ||
82 | "2:\n" | ||
83 | ".section .fixup, \"ax\"\n" | ||
84 | "3: movl $1, %0\n" | ||
85 | " jmp 2b\n" | ||
86 | ".previous\n" | ||
87 | _ASM_EXTABLE(1b, 3b) | ||
88 | : "=r"(faulted), "=a"(replaced) | ||
89 | : "r"(ip), "r"(new), "c"(newch), | ||
90 | "0"(faulted), "a"(old) | ||
91 | : "memory"); | ||
92 | sync_core(); | ||
93 | |||
94 | if (replaced != old && replaced != new) | ||
95 | faulted = 2; | ||
96 | |||
97 | return faulted; | ||
98 | } | ||
99 | |||
100 | notrace int ftrace_update_ftrace_func(ftrace_func_t func) | ||
101 | { | ||
102 | unsigned long ip = (unsigned long)(&ftrace_call); | ||
103 | unsigned char old[MCOUNT_INSN_SIZE], *new; | ||
104 | int ret; | ||
105 | |||
106 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); | ||
107 | new = ftrace_call_replace(ip, (unsigned long)func); | ||
108 | ret = ftrace_modify_code(ip, old, new); | ||
109 | |||
110 | return ret; | ||
111 | } | ||
112 | |||
113 | notrace int ftrace_mcount_set(unsigned long *data) | ||
114 | { | ||
115 | unsigned long ip = (long)(&mcount_call); | ||
116 | unsigned long *addr = data; | ||
117 | unsigned char old[MCOUNT_INSN_SIZE], *new; | ||
118 | |||
119 | /* | ||
120 | * Replace the mcount stub with a pointer to the | ||
121 | * ip recorder function. | ||
122 | */ | ||
123 | memcpy(old, &mcount_call, MCOUNT_INSN_SIZE); | ||
124 | new = ftrace_call_replace(ip, *addr); | ||
125 | *addr = ftrace_modify_code(ip, old, new); | ||
126 | |||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | int __init ftrace_dyn_arch_init(void *data) | ||
131 | { | ||
132 | const unsigned char *const *noptable = find_nop_table(); | ||
133 | |||
134 | /* This is running in kstop_machine */ | ||
135 | |||
136 | ftrace_mcount_set(data); | ||
137 | |||
138 | ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE]; | ||
139 | |||
140 | return 0; | ||
141 | } | ||
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 45e84acca8a9..711f11c30b06 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. | 8 | * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/kernel.h> | ||
11 | #include <linux/threads.h> | 12 | #include <linux/threads.h> |
12 | #include <linux/cpumask.h> | 13 | #include <linux/cpumask.h> |
13 | #include <linux/string.h> | 14 | #include <linux/string.h> |
@@ -20,6 +21,7 @@ | |||
20 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
21 | #include <asm/ipi.h> | 22 | #include <asm/ipi.h> |
22 | #include <asm/genapic.h> | 23 | #include <asm/genapic.h> |
24 | #include <asm/pgtable.h> | ||
23 | #include <asm/uv/uv_mmrs.h> | 25 | #include <asm/uv/uv_mmrs.h> |
24 | #include <asm/uv/uv_hub.h> | 26 | #include <asm/uv/uv_hub.h> |
25 | 27 | ||
@@ -208,14 +210,79 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) | |||
208 | BUG(); | 210 | BUG(); |
209 | } | 211 | } |
210 | 212 | ||
213 | static __init void map_low_mmrs(void) | ||
214 | { | ||
215 | init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE); | ||
216 | init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE); | ||
217 | } | ||
218 | |||
219 | enum map_type {map_wb, map_uc}; | ||
220 | |||
221 | static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) | ||
222 | { | ||
223 | unsigned long bytes, paddr; | ||
224 | |||
225 | paddr = base << shift; | ||
226 | bytes = (1UL << shift); | ||
227 | printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr, | ||
228 | paddr + bytes); | ||
229 | if (map_type == map_uc) | ||
230 | init_extra_mapping_uc(paddr, bytes); | ||
231 | else | ||
232 | init_extra_mapping_wb(paddr, bytes); | ||
233 | |||
234 | } | ||
235 | static __init void map_gru_high(int max_pnode) | ||
236 | { | ||
237 | union uvh_rh_gam_gru_overlay_config_mmr_u gru; | ||
238 | int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
239 | |||
240 | gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR); | ||
241 | if (gru.s.enable) | ||
242 | map_high("GRU", gru.s.base, shift, map_wb); | ||
243 | } | ||
244 | |||
245 | static __init void map_config_high(int max_pnode) | ||
246 | { | ||
247 | union uvh_rh_gam_cfg_overlay_config_mmr_u cfg; | ||
248 | int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
249 | |||
250 | cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR); | ||
251 | if (cfg.s.enable) | ||
252 | map_high("CONFIG", cfg.s.base, shift, map_uc); | ||
253 | } | ||
254 | |||
255 | static __init void map_mmr_high(int max_pnode) | ||
256 | { | ||
257 | union uvh_rh_gam_mmr_overlay_config_mmr_u mmr; | ||
258 | int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
259 | |||
260 | mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR); | ||
261 | if (mmr.s.enable) | ||
262 | map_high("MMR", mmr.s.base, shift, map_uc); | ||
263 | } | ||
264 | |||
265 | static __init void map_mmioh_high(int max_pnode) | ||
266 | { | ||
267 | union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh; | ||
268 | int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; | ||
269 | |||
270 | mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR); | ||
271 | if (mmioh.s.enable) | ||
272 | map_high("MMIOH", mmioh.s.base, shift, map_uc); | ||
273 | } | ||
274 | |||
211 | static __init void uv_system_init(void) | 275 | static __init void uv_system_init(void) |
212 | { | 276 | { |
213 | union uvh_si_addr_map_config_u m_n_config; | 277 | union uvh_si_addr_map_config_u m_n_config; |
214 | union uvh_node_id_u node_id; | 278 | union uvh_node_id_u node_id; |
215 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; | 279 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; |
216 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; | 280 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; |
281 | int max_pnode = 0; | ||
217 | unsigned long mmr_base, present; | 282 | unsigned long mmr_base, present; |
218 | 283 | ||
284 | map_low_mmrs(); | ||
285 | |||
219 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); | 286 | m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); |
220 | m_val = m_n_config.s.m_skt; | 287 | m_val = m_n_config.s.m_skt; |
221 | n_val = m_n_config.s.n_skt; | 288 | n_val = m_n_config.s.n_skt; |
@@ -281,12 +348,18 @@ static __init void uv_system_init(void) | |||
281 | uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ | 348 | uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ |
282 | uv_node_to_blade[nid] = blade; | 349 | uv_node_to_blade[nid] = blade; |
283 | uv_cpu_to_blade[cpu] = blade; | 350 | uv_cpu_to_blade[cpu] = blade; |
351 | max_pnode = max(pnode, max_pnode); | ||
284 | 352 | ||
285 | printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, pnode %d, nid %d, " | 353 | printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, " |
286 | "lcpu %d, blade %d\n", | 354 | "lcpu %d, blade %d\n", |
287 | cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, | 355 | cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, |
288 | lcpu, blade); | 356 | lcpu, blade); |
289 | } | 357 | } |
358 | |||
359 | map_gru_high(max_pnode); | ||
360 | map_mmr_high(max_pnode); | ||
361 | map_config_high(max_pnode); | ||
362 | map_mmioh_high(max_pnode); | ||
290 | } | 363 | } |
291 | 364 | ||
292 | /* | 365 | /* |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ea230ec69057..0ea6a19bfdfe 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned long d, unsigned long a) | |||
36 | } | 36 | } |
37 | 37 | ||
38 | #ifdef CONFIG_X86_64 | 38 | #ifdef CONFIG_X86_64 |
39 | |||
40 | #include <asm/pgtable.h> | 39 | #include <asm/pgtable.h> |
41 | 40 | #endif | |
42 | static inline void hpet_set_mapping(void) | ||
43 | { | ||
44 | set_fixmap_nocache(FIX_HPET_BASE, hpet_address); | ||
45 | __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); | ||
46 | hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE); | ||
47 | } | ||
48 | |||
49 | static inline void hpet_clear_mapping(void) | ||
50 | { | ||
51 | hpet_virt_address = NULL; | ||
52 | } | ||
53 | |||
54 | #else | ||
55 | 41 | ||
56 | static inline void hpet_set_mapping(void) | 42 | static inline void hpet_set_mapping(void) |
57 | { | 43 | { |
58 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); | 44 | hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); |
45 | #ifdef CONFIG_X86_64 | ||
46 | __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE); | ||
47 | #endif | ||
59 | } | 48 | } |
60 | 49 | ||
61 | static inline void hpet_clear_mapping(void) | 50 | static inline void hpet_clear_mapping(void) |
@@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(void) | |||
63 | iounmap(hpet_virt_address); | 52 | iounmap(hpet_virt_address); |
64 | hpet_virt_address = NULL; | 53 | hpet_virt_address = NULL; |
65 | } | 54 | } |
66 | #endif | ||
67 | 55 | ||
68 | /* | 56 | /* |
69 | * HPET command line enable / disable | 57 | * HPET command line enable / disable |
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index deb43785e923..dd7ebee446af 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c | |||
@@ -1,7 +1,14 @@ | |||
1 | #include <linux/module.h> | 1 | #include <linux/module.h> |
2 | |||
2 | #include <asm/checksum.h> | 3 | #include <asm/checksum.h> |
3 | #include <asm/desc.h> | ||
4 | #include <asm/pgtable.h> | 4 | #include <asm/pgtable.h> |
5 | #include <asm/desc.h> | ||
6 | #include <asm/ftrace.h> | ||
7 | |||
8 | #ifdef CONFIG_FTRACE | ||
9 | /* mcount is defined in assembly */ | ||
10 | EXPORT_SYMBOL(mcount); | ||
11 | #endif | ||
5 | 12 | ||
6 | /* Networking helper routines. */ | 13 | /* Networking helper routines. */ |
7 | EXPORT_SYMBOL(csum_partial_copy_generic); | 14 | EXPORT_SYMBOL(csum_partial_copy_generic); |
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 337ec3438a8f..558abf4c796a 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -1569,7 +1569,7 @@ void /*__init*/ print_local_APIC(void *dummy) | |||
1569 | 1569 | ||
1570 | void print_all_local_APICs(void) | 1570 | void print_all_local_APICs(void) |
1571 | { | 1571 | { |
1572 | on_each_cpu(print_local_APIC, NULL, 1, 1); | 1572 | on_each_cpu(print_local_APIC, NULL, 1); |
1573 | } | 1573 | } |
1574 | 1574 | ||
1575 | void /*__init*/ print_PIC(void) | 1575 | void /*__init*/ print_PIC(void) |
@@ -2020,7 +2020,7 @@ static inline void init_IO_APIC_traps(void) | |||
2020 | * The local APIC irq-chip implementation: | 2020 | * The local APIC irq-chip implementation: |
2021 | */ | 2021 | */ |
2022 | 2022 | ||
2023 | static void ack_apic(unsigned int irq) | 2023 | static void ack_lapic_irq(unsigned int irq) |
2024 | { | 2024 | { |
2025 | ack_APIC_irq(); | 2025 | ack_APIC_irq(); |
2026 | } | 2026 | } |
@@ -2045,9 +2045,17 @@ static struct irq_chip lapic_chip __read_mostly = { | |||
2045 | .name = "local-APIC", | 2045 | .name = "local-APIC", |
2046 | .mask = mask_lapic_irq, | 2046 | .mask = mask_lapic_irq, |
2047 | .unmask = unmask_lapic_irq, | 2047 | .unmask = unmask_lapic_irq, |
2048 | .eoi = ack_apic, | 2048 | .ack = ack_lapic_irq, |
2049 | }; | 2049 | }; |
2050 | 2050 | ||
2051 | static void lapic_register_intr(int irq, int vector) | ||
2052 | { | ||
2053 | irq_desc[irq].status &= ~IRQ_LEVEL; | ||
2054 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | ||
2055 | "edge"); | ||
2056 | set_intr_gate(vector, interrupt[irq]); | ||
2057 | } | ||
2058 | |||
2051 | static void __init setup_nmi(void) | 2059 | static void __init setup_nmi(void) |
2052 | { | 2060 | { |
2053 | /* | 2061 | /* |
@@ -2247,8 +2255,7 @@ static inline void __init check_timer(void) | |||
2247 | 2255 | ||
2248 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 2256 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); |
2249 | 2257 | ||
2250 | set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, | 2258 | lapic_register_intr(0, vector); |
2251 | "fasteoi"); | ||
2252 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | 2259 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
2253 | enable_8259A_irq(0); | 2260 | enable_8259A_irq(0); |
2254 | 2261 | ||
@@ -2280,11 +2287,21 @@ out: | |||
2280 | } | 2287 | } |
2281 | 2288 | ||
2282 | /* | 2289 | /* |
2283 | * | 2290 | * Traditionally ISA IRQ2 is the cascade IRQ, and is not available |
2284 | * IRQ's that are handled by the PIC in the MPS IOAPIC case. | 2291 | * to devices. However there may be an I/O APIC pin available for |
2285 | * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. | 2292 | * this interrupt regardless. The pin may be left unconnected, but |
2286 | * Linux doesn't really care, as it's not actually used | 2293 | * typically it will be reused as an ExtINT cascade interrupt for |
2287 | * for any interrupt handling anyway. | 2294 | * the master 8259A. In the MPS case such a pin will normally be |
2295 | * reported as an ExtINT interrupt in the MP table. With ACPI | ||
2296 | * there is no provision for ExtINT interrupts, and in the absence | ||
2297 | * of an override it would be treated as an ordinary ISA I/O APIC | ||
2298 | * interrupt, that is edge-triggered and unmasked by default. We | ||
2299 | * used to do this, but it caused problems on some systems because | ||
2300 | * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using | ||
2301 | * the same ExtINT cascade interrupt to drive the local APIC of the | ||
2302 | * bootstrap processor. Therefore we refrain from routing IRQ2 to | ||
2303 | * the I/O APIC in all cases now. No actual device should request | ||
2304 | * it anyway. --macro | ||
2288 | */ | 2305 | */ |
2289 | #define PIC_IRQS (1 << PIC_CASCADE_IR) | 2306 | #define PIC_IRQS (1 << PIC_CASCADE_IR) |
2290 | 2307 | ||
@@ -2298,10 +2315,7 @@ void __init setup_IO_APIC(void) | |||
2298 | 2315 | ||
2299 | enable_IO_APIC(); | 2316 | enable_IO_APIC(); |
2300 | 2317 | ||
2301 | if (acpi_ioapic) | 2318 | io_apic_irqs = ~PIC_IRQS; |
2302 | io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ | ||
2303 | else | ||
2304 | io_apic_irqs = ~PIC_IRQS; | ||
2305 | 2319 | ||
2306 | printk("ENABLING IO-APIC IRQs\n"); | 2320 | printk("ENABLING IO-APIC IRQs\n"); |
2307 | 2321 | ||
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 2b4c40bc12c9..6510cde36b35 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -1160,7 +1160,7 @@ void __apicdebuginit print_local_APIC(void * dummy) | |||
1160 | 1160 | ||
1161 | void print_all_local_APICs (void) | 1161 | void print_all_local_APICs (void) |
1162 | { | 1162 | { |
1163 | on_each_cpu(print_local_APIC, NULL, 1, 1); | 1163 | on_each_cpu(print_local_APIC, NULL, 1); |
1164 | } | 1164 | } |
1165 | 1165 | ||
1166 | void __apicdebuginit print_PIC(void) | 1166 | void __apicdebuginit print_PIC(void) |
@@ -1554,7 +1554,7 @@ static inline void init_IO_APIC_traps(void) | |||
1554 | } | 1554 | } |
1555 | } | 1555 | } |
1556 | 1556 | ||
1557 | static void enable_lapic_irq (unsigned int irq) | 1557 | static void unmask_lapic_irq(unsigned int irq) |
1558 | { | 1558 | { |
1559 | unsigned long v; | 1559 | unsigned long v; |
1560 | 1560 | ||
@@ -1562,7 +1562,7 @@ static void enable_lapic_irq (unsigned int irq) | |||
1562 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | 1562 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
1563 | } | 1563 | } |
1564 | 1564 | ||
1565 | static void disable_lapic_irq (unsigned int irq) | 1565 | static void mask_lapic_irq(unsigned int irq) |
1566 | { | 1566 | { |
1567 | unsigned long v; | 1567 | unsigned long v; |
1568 | 1568 | ||
@@ -1575,19 +1575,20 @@ static void ack_lapic_irq (unsigned int irq) | |||
1575 | ack_APIC_irq(); | 1575 | ack_APIC_irq(); |
1576 | } | 1576 | } |
1577 | 1577 | ||
1578 | static void end_lapic_irq (unsigned int i) { /* nothing */ } | 1578 | static struct irq_chip lapic_chip __read_mostly = { |
1579 | 1579 | .name = "local-APIC", | |
1580 | static struct hw_interrupt_type lapic_irq_type __read_mostly = { | 1580 | .mask = mask_lapic_irq, |
1581 | .name = "local-APIC", | 1581 | .unmask = unmask_lapic_irq, |
1582 | .typename = "local-APIC-edge", | 1582 | .ack = ack_lapic_irq, |
1583 | .startup = NULL, /* startup_irq() not used for IRQ0 */ | ||
1584 | .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */ | ||
1585 | .enable = enable_lapic_irq, | ||
1586 | .disable = disable_lapic_irq, | ||
1587 | .ack = ack_lapic_irq, | ||
1588 | .end = end_lapic_irq, | ||
1589 | }; | 1583 | }; |
1590 | 1584 | ||
1585 | static void lapic_register_intr(int irq) | ||
1586 | { | ||
1587 | irq_desc[irq].status &= ~IRQ_LEVEL; | ||
1588 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | ||
1589 | "edge"); | ||
1590 | } | ||
1591 | |||
1591 | static void __init setup_nmi(void) | 1592 | static void __init setup_nmi(void) |
1592 | { | 1593 | { |
1593 | /* | 1594 | /* |
@@ -1714,11 +1715,6 @@ static inline void __init check_timer(void) | |||
1714 | apic2 = apic1; | 1715 | apic2 = apic1; |
1715 | } | 1716 | } |
1716 | 1717 | ||
1717 | replace_pin_at_irq(0, 0, 0, apic1, pin1); | ||
1718 | apic1 = 0; | ||
1719 | pin1 = 0; | ||
1720 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | ||
1721 | |||
1722 | if (pin1 != -1) { | 1718 | if (pin1 != -1) { |
1723 | /* | 1719 | /* |
1724 | * Ok, does IRQ0 through the IOAPIC work? | 1720 | * Ok, does IRQ0 through the IOAPIC work? |
@@ -1779,7 +1775,7 @@ static inline void __init check_timer(void) | |||
1779 | 1775 | ||
1780 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 1776 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); |
1781 | 1777 | ||
1782 | irq_desc[0].chip = &lapic_irq_type; | 1778 | lapic_register_intr(0); |
1783 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 1779 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
1784 | enable_8259A_irq(0); | 1780 | enable_8259A_irq(0); |
1785 | 1781 | ||
@@ -1817,11 +1813,21 @@ static int __init notimercheck(char *s) | |||
1817 | __setup("no_timer_check", notimercheck); | 1813 | __setup("no_timer_check", notimercheck); |
1818 | 1814 | ||
1819 | /* | 1815 | /* |
1820 | * | 1816 | * Traditionally ISA IRQ2 is the cascade IRQ, and is not available |
1821 | * IRQs that are handled by the PIC in the MPS IOAPIC case. | 1817 | * to devices. However there may be an I/O APIC pin available for |
1822 | * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. | 1818 | * this interrupt regardless. The pin may be left unconnected, but |
1823 | * Linux doesn't really care, as it's not actually used | 1819 | * typically it will be reused as an ExtINT cascade interrupt for |
1824 | * for any interrupt handling anyway. | 1820 | * the master 8259A. In the MPS case such a pin will normally be |
1821 | * reported as an ExtINT interrupt in the MP table. With ACPI | ||
1822 | * there is no provision for ExtINT interrupts, and in the absence | ||
1823 | * of an override it would be treated as an ordinary ISA I/O APIC | ||
1824 | * interrupt, that is edge-triggered and unmasked by default. We | ||
1825 | * used to do this, but it caused problems on some systems because | ||
1826 | * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using | ||
1827 | * the same ExtINT cascade interrupt to drive the local APIC of the | ||
1828 | * bootstrap processor. Therefore we refrain from routing IRQ2 to | ||
1829 | * the I/O APIC in all cases now. No actual device should request | ||
1830 | * it anyway. --macro | ||
1825 | */ | 1831 | */ |
1826 | #define PIC_IRQS (1<<2) | 1832 | #define PIC_IRQS (1<<2) |
1827 | 1833 | ||
@@ -1832,10 +1838,7 @@ void __init setup_IO_APIC(void) | |||
1832 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 1838 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP |
1833 | */ | 1839 | */ |
1834 | 1840 | ||
1835 | if (acpi_ioapic) | 1841 | io_apic_irqs = ~PIC_IRQS; |
1836 | io_apic_irqs = ~0; /* all IRQs go through IOAPIC */ | ||
1837 | else | ||
1838 | io_apic_irqs = ~PIC_IRQS; | ||
1839 | 1842 | ||
1840 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 1843 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
1841 | 1844 | ||
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 31f49e8f46a7..0373e88de95a 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -199,6 +199,10 @@ void __init native_init_IRQ(void) | |||
199 | /* IPI for generic function call */ | 199 | /* IPI for generic function call */ |
200 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 200 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
201 | 201 | ||
202 | /* IPI for generic single function call */ | ||
203 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, | ||
204 | call_function_single_interrupt); | ||
205 | |||
202 | /* Low priority IPI to cleanup after moving an irq */ | 206 | /* Low priority IPI to cleanup after moving an irq */ |
203 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 207 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
204 | #endif | 208 | #endif |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index 21f2bae98c15..a8449571858a 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
68 | load_LDT(pc); | 68 | load_LDT(pc); |
69 | mask = cpumask_of_cpu(smp_processor_id()); | 69 | mask = cpumask_of_cpu(smp_processor_id()); |
70 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | 70 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) |
71 | smp_call_function(flush_ldt, current->mm, 1, 1); | 71 | smp_call_function(flush_ldt, current->mm, 1); |
72 | preempt_enable(); | 72 | preempt_enable(); |
73 | #else | 73 | #else |
74 | load_LDT(pc); | 74 | load_LDT(pc); |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index f4960171bc66..8864230d55af 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/delay.h> | 11 | #include <linux/delay.h> |
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | ||
15 | |||
14 | #include <asm/pgtable.h> | 16 | #include <asm/pgtable.h> |
15 | #include <asm/pgalloc.h> | 17 | #include <asm/pgalloc.h> |
16 | #include <asm/tlbflush.h> | 18 | #include <asm/tlbflush.h> |
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
107 | unsigned long page_list[PAGES_NR]; | 109 | unsigned long page_list[PAGES_NR]; |
108 | void *control_page; | 110 | void *control_page; |
109 | 111 | ||
112 | tracer_disable(); | ||
113 | |||
110 | /* Interrupts aren't acceptable while we reboot */ | 114 | /* Interrupts aren't acceptable while we reboot */ |
111 | local_irq_disable(); | 115 | local_irq_disable(); |
112 | 116 | ||
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 7830dc4a8380..9dd9262693a3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/string.h> | 11 | #include <linux/string.h> |
12 | #include <linux/reboot.h> | 12 | #include <linux/reboot.h> |
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | ||
15 | |||
14 | #include <asm/pgtable.h> | 16 | #include <asm/pgtable.h> |
15 | #include <asm/tlbflush.h> | 17 | #include <asm/tlbflush.h> |
16 | #include <asm/mmu_context.h> | 18 | #include <asm/mmu_context.h> |
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
184 | unsigned long page_list[PAGES_NR]; | 186 | unsigned long page_list[PAGES_NR]; |
185 | void *control_page; | 187 | void *control_page; |
186 | 188 | ||
189 | tracer_disable(); | ||
190 | |||
187 | /* Interrupts aren't acceptable while we reboot */ | 191 | /* Interrupts aren't acceptable while we reboot */ |
188 | local_irq_disable(); | 192 | local_irq_disable(); |
189 | 193 | ||
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 9758fea87c5b..56b933119a04 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c | |||
@@ -76,6 +76,7 @@ | |||
76 | #include <linux/kernel.h> | 76 | #include <linux/kernel.h> |
77 | #include <linux/init.h> | 77 | #include <linux/init.h> |
78 | #include <linux/sched.h> | 78 | #include <linux/sched.h> |
79 | #include <linux/smp_lock.h> | ||
79 | #include <linux/cpumask.h> | 80 | #include <linux/cpumask.h> |
80 | #include <linux/module.h> | 81 | #include <linux/module.h> |
81 | #include <linux/slab.h> | 82 | #include <linux/slab.h> |
@@ -423,6 +424,7 @@ out: | |||
423 | 424 | ||
424 | static int microcode_open (struct inode *unused1, struct file *unused2) | 425 | static int microcode_open (struct inode *unused1, struct file *unused2) |
425 | { | 426 | { |
427 | cycle_kernel_lock(); | ||
426 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; | 428 | return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; |
427 | } | 429 | } |
428 | 430 | ||
@@ -489,7 +491,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | |||
489 | #define microcode_dev_exit() do { } while(0) | 491 | #define microcode_dev_exit() do { } while(0) |
490 | #endif | 492 | #endif |
491 | 493 | ||
492 | static long get_next_ucode_from_buffer(void **mc, void *buf, | 494 | static long get_next_ucode_from_buffer(void **mc, const u8 *buf, |
493 | unsigned long size, long offset) | 495 | unsigned long size, long offset) |
494 | { | 496 | { |
495 | microcode_header_t *mc_header; | 497 | microcode_header_t *mc_header; |
@@ -523,7 +525,7 @@ static int cpu_request_microcode(int cpu) | |||
523 | char name[30]; | 525 | char name[30]; |
524 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 526 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
525 | const struct firmware *firmware; | 527 | const struct firmware *firmware; |
526 | void *buf; | 528 | const u8 *buf; |
527 | unsigned long size; | 529 | unsigned long size; |
528 | long offset = 0; | 530 | long offset = 0; |
529 | int error; | 531 | int error; |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 8b6b1e05c306..3b25e49380c6 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -726,12 +726,22 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
726 | static struct intel_mp_floating *mpf_found; | 726 | static struct intel_mp_floating *mpf_found; |
727 | 727 | ||
728 | /* | 728 | /* |
729 | * Machine specific quirk for finding the SMP config before other setup | ||
730 | * activities destroy the table: | ||
731 | */ | ||
732 | int (*mach_get_smp_config_quirk)(unsigned int early); | ||
733 | |||
734 | /* | ||
729 | * Scan the memory blocks for an SMP configuration block. | 735 | * Scan the memory blocks for an SMP configuration block. |
730 | */ | 736 | */ |
731 | static void __init __get_smp_config(unsigned early) | 737 | static void __init __get_smp_config(unsigned int early) |
732 | { | 738 | { |
733 | struct intel_mp_floating *mpf = mpf_found; | 739 | struct intel_mp_floating *mpf = mpf_found; |
734 | 740 | ||
741 | if (mach_get_smp_config_quirk) { | ||
742 | if (mach_get_smp_config_quirk(early)) | ||
743 | return; | ||
744 | } | ||
735 | if (acpi_lapic && early) | 745 | if (acpi_lapic && early) |
736 | return; | 746 | return; |
737 | /* | 747 | /* |
@@ -889,10 +899,16 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
889 | return 0; | 899 | return 0; |
890 | } | 900 | } |
891 | 901 | ||
892 | static void __init __find_smp_config(unsigned reserve) | 902 | int (*mach_find_smp_config_quirk)(unsigned int reserve); |
903 | |||
904 | static void __init __find_smp_config(unsigned int reserve) | ||
893 | { | 905 | { |
894 | unsigned int address; | 906 | unsigned int address; |
895 | 907 | ||
908 | if (mach_find_smp_config_quirk) { | ||
909 | if (mach_find_smp_config_quirk(reserve)) | ||
910 | return; | ||
911 | } | ||
896 | /* | 912 | /* |
897 | * FIXME: Linux assumes you have 640K of base ram.. | 913 | * FIXME: Linux assumes you have 640K of base ram.. |
898 | * this continues the error... | 914 | * this continues the error... |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 1f3abe048e93..a153b3905f60 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -117,12 +117,20 @@ static int msr_open(struct inode *inode, struct file *file) | |||
117 | { | 117 | { |
118 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); | 118 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); |
119 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 119 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
120 | int ret = 0; | ||
120 | 121 | ||
121 | if (cpu >= NR_CPUS || !cpu_online(cpu)) | 122 | lock_kernel(); |
122 | return -ENXIO; /* No such CPU */ | 123 | cpu = iminor(file->f_path.dentry->d_inode); |
123 | if (!cpu_has(c, X86_FEATURE_MSR)) | ||
124 | return -EIO; /* MSR not supported */ | ||
125 | 124 | ||
125 | if (cpu >= NR_CPUS || !cpu_online(cpu)) { | ||
126 | ret = -ENXIO; /* No such CPU */ | ||
127 | goto out; | ||
128 | } | ||
129 | c = &cpu_data(cpu); | ||
130 | if (!cpu_has(c, X86_FEATURE_MSR)) | ||
131 | ret = -EIO; /* MSR not supported */ | ||
132 | out: | ||
133 | unlock_kernel(); | ||
126 | return 0; | 134 | return 0; |
127 | } | 135 | } |
128 | 136 | ||
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 8dfe9db87a9e..ec024b3baad0 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -130,7 +130,7 @@ int __init check_nmi_watchdog(void) | |||
130 | 130 | ||
131 | #ifdef CONFIG_SMP | 131 | #ifdef CONFIG_SMP |
132 | if (nmi_watchdog == NMI_LOCAL_APIC) | 132 | if (nmi_watchdog == NMI_LOCAL_APIC) |
133 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); | 133 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0); |
134 | #endif | 134 | #endif |
135 | 135 | ||
136 | for_each_possible_cpu(cpu) | 136 | for_each_possible_cpu(cpu) |
@@ -171,6 +171,9 @@ int __init check_nmi_watchdog(void) | |||
171 | error: | 171 | error: |
172 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) | 172 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) |
173 | disable_8259A_irq(0); | 173 | disable_8259A_irq(0); |
174 | #ifdef CONFIG_X86_32 | ||
175 | timer_ack = 0; | ||
176 | #endif | ||
174 | return -1; | 177 | return -1; |
175 | } | 178 | } |
176 | 179 | ||
@@ -269,7 +272,7 @@ static void __acpi_nmi_enable(void *__unused) | |||
269 | void acpi_nmi_enable(void) | 272 | void acpi_nmi_enable(void) |
270 | { | 273 | { |
271 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | 274 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) |
272 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | 275 | on_each_cpu(__acpi_nmi_enable, NULL, 1); |
273 | } | 276 | } |
274 | 277 | ||
275 | static void __acpi_nmi_disable(void *__unused) | 278 | static void __acpi_nmi_disable(void *__unused) |
@@ -283,7 +286,7 @@ static void __acpi_nmi_disable(void *__unused) | |||
283 | void acpi_nmi_disable(void) | 286 | void acpi_nmi_disable(void) |
284 | { | 287 | { |
285 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | 288 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) |
286 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | 289 | on_each_cpu(__acpi_nmi_disable, NULL, 1); |
287 | } | 290 | } |
288 | 291 | ||
289 | void setup_apic_nmi_watchdog(void *unused) | 292 | void setup_apic_nmi_watchdog(void *unused) |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index f0f1de1c4a1d..a23e8233b9ac 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -93,12 +93,13 @@ int __init get_memcfg_numaq(void) | |||
93 | return 1; | 93 | return 1; |
94 | } | 94 | } |
95 | 95 | ||
96 | static int __init numaq_tsc_disable(void) | 96 | void __init numaq_tsc_disable(void) |
97 | { | 97 | { |
98 | if (!found_numaq) | ||
99 | return; | ||
100 | |||
98 | if (num_online_nodes() > 1) { | 101 | if (num_online_nodes() > 1) { |
99 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | 102 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); |
100 | setup_clear_cpu_cap(X86_FEATURE_TSC); | 103 | setup_clear_cpu_cap(X86_FEATURE_TSC); |
101 | } | 104 | } |
102 | return 0; | ||
103 | } | 105 | } |
104 | arch_initcall(numaq_tsc_disable); | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e7e5652f65bc..e0f571d58c19 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -285,7 +285,7 @@ struct pv_time_ops pv_time_ops = { | |||
285 | .get_wallclock = native_get_wallclock, | 285 | .get_wallclock = native_get_wallclock, |
286 | .set_wallclock = native_set_wallclock, | 286 | .set_wallclock = native_set_wallclock, |
287 | .sched_clock = native_sched_clock, | 287 | .sched_clock = native_sched_clock, |
288 | .get_cpu_khz = native_calculate_cpu_khz, | 288 | .get_tsc_khz = native_calibrate_tsc, |
289 | }; | 289 | }; |
290 | 290 | ||
291 | struct pv_irq_ops pv_irq_ops = { | 291 | struct pv_irq_ops pv_irq_ops = { |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index d0d18db5d2a4..c3fe78406d18 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -630,6 +630,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
630 | struct pci_dev *dev; | 630 | struct pci_dev *dev; |
631 | void *gatt; | 631 | void *gatt; |
632 | int i, error; | 632 | int i, error; |
633 | unsigned long start_pfn, end_pfn; | ||
633 | 634 | ||
634 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); | 635 | printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); |
635 | aper_size = aper_base = info->aper_size = 0; | 636 | aper_size = aper_base = info->aper_size = 0; |
@@ -674,6 +675,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
674 | 675 | ||
675 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", | 676 | printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", |
676 | aper_base, aper_size>>10); | 677 | aper_base, aper_size>>10); |
678 | |||
679 | /* need to map that range */ | ||
680 | end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT); | ||
681 | if (end_pfn > max_low_pfn_mapped) { | ||
682 | start_pfn = (aper_base>>PAGE_SHIFT); | ||
683 | init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT); | ||
684 | } | ||
677 | return 0; | 685 | return 0; |
678 | 686 | ||
679 | nommu: | 687 | nommu: |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4061d63aabe7..4d629c62f4f8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -7,6 +7,12 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/pm.h> | 8 | #include <linux/pm.h> |
9 | #include <linux/clockchips.h> | 9 | #include <linux/clockchips.h> |
10 | #include <asm/system.h> | ||
11 | |||
12 | unsigned long idle_halt; | ||
13 | EXPORT_SYMBOL(idle_halt); | ||
14 | unsigned long idle_nomwait; | ||
15 | EXPORT_SYMBOL(idle_nomwait); | ||
10 | 16 | ||
11 | struct kmem_cache *task_xstate_cachep; | 17 | struct kmem_cache *task_xstate_cachep; |
12 | 18 | ||
@@ -132,7 +138,7 @@ void cpu_idle_wait(void) | |||
132 | { | 138 | { |
133 | smp_mb(); | 139 | smp_mb(); |
134 | /* kick all the CPUs so that they exit out of pm_idle */ | 140 | /* kick all the CPUs so that they exit out of pm_idle */ |
135 | smp_call_function(do_nothing, NULL, 0, 1); | 141 | smp_call_function(do_nothing, NULL, 1); |
136 | } | 142 | } |
137 | EXPORT_SYMBOL_GPL(cpu_idle_wait); | 143 | EXPORT_SYMBOL_GPL(cpu_idle_wait); |
138 | 144 | ||
@@ -325,7 +331,27 @@ static int __init idle_setup(char *str) | |||
325 | pm_idle = poll_idle; | 331 | pm_idle = poll_idle; |
326 | } else if (!strcmp(str, "mwait")) | 332 | } else if (!strcmp(str, "mwait")) |
327 | force_mwait = 1; | 333 | force_mwait = 1; |
328 | else | 334 | else if (!strcmp(str, "halt")) { |
335 | /* | ||
336 | * When the boot option of idle=halt is added, halt is | ||
337 | * forced to be used for CPU idle. In such case CPU C2/C3 | ||
338 | * won't be used again. | ||
339 | * To continue to load the CPU idle driver, don't touch | ||
340 | * the boot_option_idle_override. | ||
341 | */ | ||
342 | pm_idle = default_idle; | ||
343 | idle_halt = 1; | ||
344 | return 0; | ||
345 | } else if (!strcmp(str, "nomwait")) { | ||
346 | /* | ||
347 | * If the boot option of "idle=nomwait" is added, | ||
348 | * it means that mwait will be disabled for CPU C2/C3 | ||
349 | * states. In such case it won't touch the variable | ||
350 | * of boot_option_idle_override. | ||
351 | */ | ||
352 | idle_nomwait = 1; | ||
353 | return 0; | ||
354 | } else | ||
329 | return -1; | 355 | return -1; |
330 | 356 | ||
331 | boot_option_idle_override = 1; | 357 | boot_option_idle_override = 1; |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 9a139f6c9df3..0c3927accb00 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -142,7 +142,10 @@ void cpu_idle(void) | |||
142 | 142 | ||
143 | local_irq_disable(); | 143 | local_irq_disable(); |
144 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; | 144 | __get_cpu_var(irq_stat).idle_timestamp = jiffies; |
145 | /* Don't trace irqs off for idle */ | ||
146 | stop_critical_timings(); | ||
145 | pm_idle(); | 147 | pm_idle(); |
148 | start_critical_timings(); | ||
146 | } | 149 | } |
147 | tick_nohz_restart_sched_tick(); | 150 | tick_nohz_restart_sched_tick(); |
148 | preempt_enable_no_resched(); | 151 | preempt_enable_no_resched(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index db5eb963e4df..a8e53626ac9a 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -134,7 +134,10 @@ void cpu_idle(void) | |||
134 | */ | 134 | */ |
135 | local_irq_disable(); | 135 | local_irq_disable(); |
136 | enter_idle(); | 136 | enter_idle(); |
137 | /* Don't trace irqs off for idle */ | ||
138 | stop_critical_timings(); | ||
137 | pm_idle(); | 139 | pm_idle(); |
140 | start_critical_timings(); | ||
138 | /* In many cases the interrupt that ended idle | 141 | /* In many cases the interrupt that ended idle |
139 | has already called exit_idle. But some idle | 142 | has already called exit_idle. But some idle |
140 | loops can be woken up without interrupt. */ | 143 | loops can be woken up without interrupt. */ |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 79bdcd11c66e..d13858818100 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -266,6 +266,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev) | |||
266 | hpet_print_force_info(); | 266 | hpet_print_force_info(); |
267 | } | 267 | } |
268 | 268 | ||
269 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1, | ||
270 | old_ich_force_enable_hpet_user); | ||
269 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, | 271 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, |
270 | old_ich_force_enable_hpet_user); | 272 | old_ich_force_enable_hpet_user); |
271 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, | 273 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cfcfbefee0b9..531b55b8e81a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -394,11 +394,10 @@ static void __init parse_setup_data(void) | |||
394 | } | 394 | } |
395 | } | 395 | } |
396 | 396 | ||
397 | static void __init reserve_setup_data(void) | 397 | static void __init e820_reserve_setup_data(void) |
398 | { | 398 | { |
399 | struct setup_data *data; | 399 | struct setup_data *data; |
400 | u64 pa_data; | 400 | u64 pa_data; |
401 | char buf[32]; | ||
402 | int found = 0; | 401 | int found = 0; |
403 | 402 | ||
404 | if (boot_params.hdr.version < 0x0209) | 403 | if (boot_params.hdr.version < 0x0209) |
@@ -406,8 +405,6 @@ static void __init reserve_setup_data(void) | |||
406 | pa_data = boot_params.hdr.setup_data; | 405 | pa_data = boot_params.hdr.setup_data; |
407 | while (pa_data) { | 406 | while (pa_data) { |
408 | data = early_ioremap(pa_data, sizeof(*data)); | 407 | data = early_ioremap(pa_data, sizeof(*data)); |
409 | sprintf(buf, "setup data %x", data->type); | ||
410 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | ||
411 | e820_update_range(pa_data, sizeof(*data)+data->len, | 408 | e820_update_range(pa_data, sizeof(*data)+data->len, |
412 | E820_RAM, E820_RESERVED_KERN); | 409 | E820_RAM, E820_RESERVED_KERN); |
413 | found = 1; | 410 | found = 1; |
@@ -418,10 +415,29 @@ static void __init reserve_setup_data(void) | |||
418 | return; | 415 | return; |
419 | 416 | ||
420 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 417 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
418 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); | ||
421 | printk(KERN_INFO "extended physical RAM map:\n"); | 419 | printk(KERN_INFO "extended physical RAM map:\n"); |
422 | e820_print_map("reserve setup_data"); | 420 | e820_print_map("reserve setup_data"); |
423 | } | 421 | } |
424 | 422 | ||
423 | static void __init reserve_early_setup_data(void) | ||
424 | { | ||
425 | struct setup_data *data; | ||
426 | u64 pa_data; | ||
427 | char buf[32]; | ||
428 | |||
429 | if (boot_params.hdr.version < 0x0209) | ||
430 | return; | ||
431 | pa_data = boot_params.hdr.setup_data; | ||
432 | while (pa_data) { | ||
433 | data = early_ioremap(pa_data, sizeof(*data)); | ||
434 | sprintf(buf, "setup data %x", data->type); | ||
435 | reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf); | ||
436 | pa_data = data->next; | ||
437 | early_iounmap(data, sizeof(*data)); | ||
438 | } | ||
439 | } | ||
440 | |||
425 | /* | 441 | /* |
426 | * --------- Crashkernel reservation ------------------------------ | 442 | * --------- Crashkernel reservation ------------------------------ |
427 | */ | 443 | */ |
@@ -580,6 +596,7 @@ void __init setup_arch(char **cmdline_p) | |||
580 | { | 596 | { |
581 | #ifdef CONFIG_X86_32 | 597 | #ifdef CONFIG_X86_32 |
582 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 598 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
599 | visws_early_detect(); | ||
583 | pre_setup_arch_hook(); | 600 | pre_setup_arch_hook(); |
584 | early_cpu_init(); | 601 | early_cpu_init(); |
585 | #else | 602 | #else |
@@ -626,6 +643,8 @@ void __init setup_arch(char **cmdline_p) | |||
626 | 643 | ||
627 | setup_memory_map(); | 644 | setup_memory_map(); |
628 | parse_setup_data(); | 645 | parse_setup_data(); |
646 | /* update the e820_saved too */ | ||
647 | e820_reserve_setup_data(); | ||
629 | 648 | ||
630 | copy_edd(); | 649 | copy_edd(); |
631 | 650 | ||
@@ -656,7 +675,7 @@ void __init setup_arch(char **cmdline_p) | |||
656 | parse_early_param(); | 675 | parse_early_param(); |
657 | 676 | ||
658 | /* after early param, so could get panic from serial */ | 677 | /* after early param, so could get panic from serial */ |
659 | reserve_setup_data(); | 678 | reserve_early_setup_data(); |
660 | 679 | ||
661 | if (acpi_mps_check()) { | 680 | if (acpi_mps_check()) { |
662 | #ifdef CONFIG_X86_LOCAL_APIC | 681 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -665,6 +684,11 @@ void __init setup_arch(char **cmdline_p) | |||
665 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 684 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
666 | } | 685 | } |
667 | 686 | ||
687 | #ifdef CONFIG_PCI | ||
688 | if (pci_early_dump_regs) | ||
689 | early_dump_pci_devices(); | ||
690 | #endif | ||
691 | |||
668 | finish_e820_parsing(); | 692 | finish_e820_parsing(); |
669 | 693 | ||
670 | #ifdef CONFIG_X86_32 | 694 | #ifdef CONFIG_X86_32 |
@@ -691,22 +715,18 @@ void __init setup_arch(char **cmdline_p) | |||
691 | early_gart_iommu_check(); | 715 | early_gart_iommu_check(); |
692 | #endif | 716 | #endif |
693 | 717 | ||
694 | e820_register_active_regions(0, 0, -1UL); | ||
695 | /* | 718 | /* |
696 | * partially used pages are not usable - thus | 719 | * partially used pages are not usable - thus |
697 | * we are rounding upwards: | 720 | * we are rounding upwards: |
698 | */ | 721 | */ |
699 | max_pfn = e820_end_of_ram(); | 722 | max_pfn = e820_end_of_ram_pfn(); |
700 | 723 | ||
701 | /* preallocate 4k for mptable mpc */ | 724 | /* preallocate 4k for mptable mpc */ |
702 | early_reserve_e820_mpc_new(); | 725 | early_reserve_e820_mpc_new(); |
703 | /* update e820 for memory not covered by WB MTRRs */ | 726 | /* update e820 for memory not covered by WB MTRRs */ |
704 | mtrr_bp_init(); | 727 | mtrr_bp_init(); |
705 | if (mtrr_trim_uncached_memory(max_pfn)) { | 728 | if (mtrr_trim_uncached_memory(max_pfn)) |
706 | remove_all_active_ranges(); | 729 | max_pfn = e820_end_of_ram_pfn(); |
707 | e820_register_active_regions(0, 0, -1UL); | ||
708 | max_pfn = e820_end_of_ram(); | ||
709 | } | ||
710 | 730 | ||
711 | #ifdef CONFIG_X86_32 | 731 | #ifdef CONFIG_X86_32 |
712 | /* max_low_pfn get updated here */ | 732 | /* max_low_pfn get updated here */ |
@@ -718,12 +738,26 @@ void __init setup_arch(char **cmdline_p) | |||
718 | 738 | ||
719 | /* How many end-of-memory variables you have, grandma! */ | 739 | /* How many end-of-memory variables you have, grandma! */ |
720 | /* need this before calling reserve_initrd */ | 740 | /* need this before calling reserve_initrd */ |
721 | max_low_pfn = max_pfn; | 741 | if (max_pfn > (1UL<<(32 - PAGE_SHIFT))) |
742 | max_low_pfn = e820_end_of_low_ram_pfn(); | ||
743 | else | ||
744 | max_low_pfn = max_pfn; | ||
745 | |||
722 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | 746 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; |
723 | #endif | 747 | #endif |
724 | 748 | ||
725 | /* max_pfn_mapped is updated here */ | 749 | /* max_pfn_mapped is updated here */ |
726 | max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT)); | 750 | max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); |
751 | max_pfn_mapped = max_low_pfn_mapped; | ||
752 | |||
753 | #ifdef CONFIG_X86_64 | ||
754 | if (max_pfn > max_low_pfn) { | ||
755 | max_pfn_mapped = init_memory_mapping(1UL<<32, | ||
756 | max_pfn<<PAGE_SHIFT); | ||
757 | /* can we preseve max_low_pfn ?*/ | ||
758 | max_low_pfn = max_pfn; | ||
759 | } | ||
760 | #endif | ||
727 | 761 | ||
728 | /* | 762 | /* |
729 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. | 763 | * NOTE: On x86-32, only from this point on, fixmaps are ready for use. |
@@ -749,9 +783,6 @@ void __init setup_arch(char **cmdline_p) | |||
749 | */ | 783 | */ |
750 | acpi_boot_table_init(); | 784 | acpi_boot_table_init(); |
751 | 785 | ||
752 | /* Remove active ranges so rediscovery with NUMA-awareness happens */ | ||
753 | remove_all_active_ranges(); | ||
754 | |||
755 | #ifdef CONFIG_ACPI_NUMA | 786 | #ifdef CONFIG_ACPI_NUMA |
756 | /* | 787 | /* |
757 | * Parse SRAT to discover nodes. | 788 | * Parse SRAT to discover nodes. |
@@ -823,6 +854,14 @@ void __init setup_arch(char **cmdline_p) | |||
823 | init_cpu_to_node(); | 854 | init_cpu_to_node(); |
824 | #endif | 855 | #endif |
825 | 856 | ||
857 | #ifdef CONFIG_X86_NUMAQ | ||
858 | /* | ||
859 | * need to check online nodes num, call it | ||
860 | * here before time_init/tsc_init | ||
861 | */ | ||
862 | numaq_tsc_disable(); | ||
863 | #endif | ||
864 | |||
826 | init_apic_mappings(); | 865 | init_apic_mappings(); |
827 | ioapic_init_mappings(); | 866 | ioapic_init_mappings(); |
828 | 867 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 5fc310f746fc..cac68430d31f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -343,23 +343,23 @@ static const cpumask_t cpu_mask_none; | |||
343 | /* | 343 | /* |
344 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | 344 | * Returns a pointer to the bitmask of CPUs on Node 'node'. |
345 | */ | 345 | */ |
346 | cpumask_t *_node_to_cpumask_ptr(int node) | 346 | const cpumask_t *_node_to_cpumask_ptr(int node) |
347 | { | 347 | { |
348 | if (node_to_cpumask_map == NULL) { | 348 | if (node_to_cpumask_map == NULL) { |
349 | printk(KERN_WARNING | 349 | printk(KERN_WARNING |
350 | "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", | 350 | "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", |
351 | node); | 351 | node); |
352 | dump_stack(); | 352 | dump_stack(); |
353 | return &cpu_online_map; | 353 | return (const cpumask_t *)&cpu_online_map; |
354 | } | 354 | } |
355 | if (node >= nr_node_ids) { | 355 | if (node >= nr_node_ids) { |
356 | printk(KERN_WARNING | 356 | printk(KERN_WARNING |
357 | "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", | 357 | "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", |
358 | node, nr_node_ids); | 358 | node, nr_node_ids); |
359 | dump_stack(); | 359 | dump_stack(); |
360 | return (cpumask_t *)&cpu_mask_none; | 360 | return &cpu_mask_none; |
361 | } | 361 | } |
362 | return (cpumask_t *)&node_to_cpumask_map[node]; | 362 | return &node_to_cpumask_map[node]; |
363 | } | 363 | } |
364 | EXPORT_SYMBOL(_node_to_cpumask_ptr); | 364 | EXPORT_SYMBOL(_node_to_cpumask_ptr); |
365 | 365 | ||
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 0cb7aadc87cd..361b7a4c640c 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -121,132 +121,23 @@ static void native_smp_send_reschedule(int cpu) | |||
121 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | 121 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); |
122 | } | 122 | } |
123 | 123 | ||
124 | /* | 124 | void native_send_call_func_single_ipi(int cpu) |
125 | * Structure and data for smp_call_function(). This is designed to minimise | ||
126 | * static memory requirements. It also looks cleaner. | ||
127 | */ | ||
128 | static DEFINE_SPINLOCK(call_lock); | ||
129 | |||
130 | struct call_data_struct { | ||
131 | void (*func) (void *info); | ||
132 | void *info; | ||
133 | atomic_t started; | ||
134 | atomic_t finished; | ||
135 | int wait; | ||
136 | }; | ||
137 | |||
138 | void lock_ipi_call_lock(void) | ||
139 | { | 125 | { |
140 | spin_lock_irq(&call_lock); | 126 | send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); |
141 | } | ||
142 | |||
143 | void unlock_ipi_call_lock(void) | ||
144 | { | ||
145 | spin_unlock_irq(&call_lock); | ||
146 | } | ||
147 | |||
148 | static struct call_data_struct *call_data; | ||
149 | |||
150 | static void __smp_call_function(void (*func) (void *info), void *info, | ||
151 | int nonatomic, int wait) | ||
152 | { | ||
153 | struct call_data_struct data; | ||
154 | int cpus = num_online_cpus() - 1; | ||
155 | |||
156 | if (!cpus) | ||
157 | return; | ||
158 | |||
159 | data.func = func; | ||
160 | data.info = info; | ||
161 | atomic_set(&data.started, 0); | ||
162 | data.wait = wait; | ||
163 | if (wait) | ||
164 | atomic_set(&data.finished, 0); | ||
165 | |||
166 | call_data = &data; | ||
167 | mb(); | ||
168 | |||
169 | /* Send a message to all other CPUs and wait for them to respond */ | ||
170 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
171 | |||
172 | /* Wait for response */ | ||
173 | while (atomic_read(&data.started) != cpus) | ||
174 | cpu_relax(); | ||
175 | |||
176 | if (wait) | ||
177 | while (atomic_read(&data.finished) != cpus) | ||
178 | cpu_relax(); | ||
179 | } | 127 | } |
180 | 128 | ||
181 | 129 | void native_send_call_func_ipi(cpumask_t mask) | |
182 | /** | ||
183 | * smp_call_function_mask(): Run a function on a set of other CPUs. | ||
184 | * @mask: The set of cpus to run on. Must not include the current cpu. | ||
185 | * @func: The function to run. This must be fast and non-blocking. | ||
186 | * @info: An arbitrary pointer to pass to the function. | ||
187 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | ||
188 | * | ||
189 | * Returns 0 on success, else a negative status code. | ||
190 | * | ||
191 | * If @wait is true, then returns once @func has returned; otherwise | ||
192 | * it returns just before the target cpu calls @func. | ||
193 | * | ||
194 | * You must not call this function with disabled interrupts or from a | ||
195 | * hardware interrupt handler or from a bottom half handler. | ||
196 | */ | ||
197 | static int | ||
198 | native_smp_call_function_mask(cpumask_t mask, | ||
199 | void (*func)(void *), void *info, | ||
200 | int wait) | ||
201 | { | 130 | { |
202 | struct call_data_struct data; | ||
203 | cpumask_t allbutself; | 131 | cpumask_t allbutself; |
204 | int cpus; | ||
205 | |||
206 | /* Can deadlock when called with interrupts disabled */ | ||
207 | WARN_ON(irqs_disabled()); | ||
208 | |||
209 | /* Holding any lock stops cpus from going down. */ | ||
210 | spin_lock(&call_lock); | ||
211 | 132 | ||
212 | allbutself = cpu_online_map; | 133 | allbutself = cpu_online_map; |
213 | cpu_clear(smp_processor_id(), allbutself); | 134 | cpu_clear(smp_processor_id(), allbutself); |
214 | 135 | ||
215 | cpus_and(mask, mask, allbutself); | ||
216 | cpus = cpus_weight(mask); | ||
217 | |||
218 | if (!cpus) { | ||
219 | spin_unlock(&call_lock); | ||
220 | return 0; | ||
221 | } | ||
222 | |||
223 | data.func = func; | ||
224 | data.info = info; | ||
225 | atomic_set(&data.started, 0); | ||
226 | data.wait = wait; | ||
227 | if (wait) | ||
228 | atomic_set(&data.finished, 0); | ||
229 | |||
230 | call_data = &data; | ||
231 | wmb(); | ||
232 | |||
233 | /* Send a message to other CPUs */ | ||
234 | if (cpus_equal(mask, allbutself) && | 136 | if (cpus_equal(mask, allbutself) && |
235 | cpus_equal(cpu_online_map, cpu_callout_map)) | 137 | cpus_equal(cpu_online_map, cpu_callout_map)) |
236 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | 138 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
237 | else | 139 | else |
238 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | 140 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
239 | |||
240 | /* Wait for response */ | ||
241 | while (atomic_read(&data.started) != cpus) | ||
242 | cpu_relax(); | ||
243 | |||
244 | if (wait) | ||
245 | while (atomic_read(&data.finished) != cpus) | ||
246 | cpu_relax(); | ||
247 | spin_unlock(&call_lock); | ||
248 | |||
249 | return 0; | ||
250 | } | 141 | } |
251 | 142 | ||
252 | static void stop_this_cpu(void *dummy) | 143 | static void stop_this_cpu(void *dummy) |
@@ -268,18 +159,13 @@ static void stop_this_cpu(void *dummy) | |||
268 | 159 | ||
269 | static void native_smp_send_stop(void) | 160 | static void native_smp_send_stop(void) |
270 | { | 161 | { |
271 | int nolock; | ||
272 | unsigned long flags; | 162 | unsigned long flags; |
273 | 163 | ||
274 | if (reboot_force) | 164 | if (reboot_force) |
275 | return; | 165 | return; |
276 | 166 | ||
277 | /* Don't deadlock on the call lock in panic */ | 167 | smp_call_function(stop_this_cpu, NULL, 0); |
278 | nolock = !spin_trylock(&call_lock); | ||
279 | local_irq_save(flags); | 168 | local_irq_save(flags); |
280 | __smp_call_function(stop_this_cpu, NULL, 0, 0); | ||
281 | if (!nolock) | ||
282 | spin_unlock(&call_lock); | ||
283 | disable_local_APIC(); | 169 | disable_local_APIC(); |
284 | local_irq_restore(flags); | 170 | local_irq_restore(flags); |
285 | } | 171 | } |
@@ -301,33 +187,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs) | |||
301 | 187 | ||
302 | void smp_call_function_interrupt(struct pt_regs *regs) | 188 | void smp_call_function_interrupt(struct pt_regs *regs) |
303 | { | 189 | { |
304 | void (*func) (void *info) = call_data->func; | ||
305 | void *info = call_data->info; | ||
306 | int wait = call_data->wait; | ||
307 | |||
308 | ack_APIC_irq(); | 190 | ack_APIC_irq(); |
309 | /* | ||
310 | * Notify initiating CPU that I've grabbed the data and am | ||
311 | * about to execute the function | ||
312 | */ | ||
313 | mb(); | ||
314 | atomic_inc(&call_data->started); | ||
315 | /* | ||
316 | * At this point the info structure may be out of scope unless wait==1 | ||
317 | */ | ||
318 | irq_enter(); | 191 | irq_enter(); |
319 | (*func)(info); | 192 | generic_smp_call_function_interrupt(); |
320 | #ifdef CONFIG_X86_32 | 193 | #ifdef CONFIG_X86_32 |
321 | __get_cpu_var(irq_stat).irq_call_count++; | 194 | __get_cpu_var(irq_stat).irq_call_count++; |
322 | #else | 195 | #else |
323 | add_pda(irq_call_count, 1); | 196 | add_pda(irq_call_count, 1); |
324 | #endif | 197 | #endif |
325 | irq_exit(); | 198 | irq_exit(); |
199 | } | ||
326 | 200 | ||
327 | if (wait) { | 201 | void smp_call_function_single_interrupt(struct pt_regs *regs) |
328 | mb(); | 202 | { |
329 | atomic_inc(&call_data->finished); | 203 | ack_APIC_irq(); |
330 | } | 204 | irq_enter(); |
205 | generic_smp_call_function_single_interrupt(); | ||
206 | #ifdef CONFIG_X86_32 | ||
207 | __get_cpu_var(irq_stat).irq_call_count++; | ||
208 | #else | ||
209 | add_pda(irq_call_count, 1); | ||
210 | #endif | ||
211 | irq_exit(); | ||
331 | } | 212 | } |
332 | 213 | ||
333 | struct smp_ops smp_ops = { | 214 | struct smp_ops smp_ops = { |
@@ -338,7 +219,8 @@ struct smp_ops smp_ops = { | |||
338 | 219 | ||
339 | .smp_send_stop = native_smp_send_stop, | 220 | .smp_send_stop = native_smp_send_stop, |
340 | .smp_send_reschedule = native_smp_send_reschedule, | 221 | .smp_send_reschedule = native_smp_send_reschedule, |
341 | .smp_call_function_mask = native_smp_call_function_mask, | 222 | |
223 | .send_call_func_ipi = native_send_call_func_ipi, | ||
224 | .send_call_func_single_ipi = native_send_call_func_single_ipi, | ||
342 | }; | 225 | }; |
343 | EXPORT_SYMBOL_GPL(smp_ops); | 226 | EXPORT_SYMBOL_GPL(smp_ops); |
344 | |||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e1200b202ed7..687376ab07e8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -327,12 +327,12 @@ static void __cpuinit start_secondary(void *unused) | |||
327 | * lock helps us to not include this cpu in a currently in progress | 327 | * lock helps us to not include this cpu in a currently in progress |
328 | * smp_call_function(). | 328 | * smp_call_function(). |
329 | */ | 329 | */ |
330 | lock_ipi_call_lock(); | 330 | ipi_call_lock_irq(); |
331 | #ifdef CONFIG_X86_IO_APIC | 331 | #ifdef CONFIG_X86_IO_APIC |
332 | setup_vector_irq(smp_processor_id()); | 332 | setup_vector_irq(smp_processor_id()); |
333 | #endif | 333 | #endif |
334 | cpu_set(smp_processor_id(), cpu_online_map); | 334 | cpu_set(smp_processor_id(), cpu_online_map); |
335 | unlock_ipi_call_lock(); | 335 | ipi_call_unlock_irq(); |
336 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 336 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
337 | 337 | ||
338 | setup_secondary_clock(); | 338 | setup_secondary_clock(); |
@@ -939,9 +939,9 @@ do_rest: | |||
939 | inquire_remote_apic(apicid); | 939 | inquire_remote_apic(apicid); |
940 | } | 940 | } |
941 | } | 941 | } |
942 | 942 | #ifdef CONFIG_X86_64 | |
943 | restore_state: | 943 | restore_state: |
944 | 944 | #endif | |
945 | if (boot_error) { | 945 | if (boot_error) { |
946 | /* Try to put things back the way they were before ... */ | 946 | /* Try to put things back the way they were before ... */ |
947 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ | 947 | numa_remove_cpu(cpu); /* was set by numa_add_cpu */ |
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 3449064d141a..99941b37eca0 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c | |||
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu) | |||
25 | per_cpu(cpu_number, cpu) = cpu; | 25 | per_cpu(cpu_number, cpu) = cpu; |
26 | } | 26 | } |
27 | #endif | 27 | #endif |
28 | |||
29 | /** | ||
30 | * smp_call_function(): Run a function on all other CPUs. | ||
31 | * @func: The function to run. This must be fast and non-blocking. | ||
32 | * @info: An arbitrary pointer to pass to the function. | ||
33 | * @nonatomic: Unused. | ||
34 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | ||
35 | * | ||
36 | * Returns 0 on success, else a negative status code. | ||
37 | * | ||
38 | * If @wait is true, then returns once @func has returned; otherwise | ||
39 | * it returns just before the target cpu calls @func. | ||
40 | * | ||
41 | * You must not call this function with disabled interrupts or from a | ||
42 | * hardware interrupt handler or from a bottom half handler. | ||
43 | */ | ||
44 | int smp_call_function(void (*func) (void *info), void *info, int nonatomic, | ||
45 | int wait) | ||
46 | { | ||
47 | return smp_call_function_mask(cpu_online_map, func, info, wait); | ||
48 | } | ||
49 | EXPORT_SYMBOL(smp_call_function); | ||
50 | |||
51 | /** | ||
52 | * smp_call_function_single - Run a function on a specific CPU | ||
53 | * @cpu: The target CPU. Cannot be the calling CPU. | ||
54 | * @func: The function to run. This must be fast and non-blocking. | ||
55 | * @info: An arbitrary pointer to pass to the function. | ||
56 | * @nonatomic: Unused. | ||
57 | * @wait: If true, wait until function has completed on other CPUs. | ||
58 | * | ||
59 | * Returns 0 on success, else a negative status code. | ||
60 | * | ||
61 | * If @wait is true, then returns once @func has returned; otherwise | ||
62 | * it returns just before the target cpu calls @func. | ||
63 | */ | ||
64 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
65 | int nonatomic, int wait) | ||
66 | { | ||
67 | /* prevent preemption and reschedule on another processor */ | ||
68 | int ret; | ||
69 | int me = get_cpu(); | ||
70 | if (cpu == me) { | ||
71 | local_irq_disable(); | ||
72 | func(info); | ||
73 | local_irq_enable(); | ||
74 | put_cpu(); | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); | ||
79 | |||
80 | put_cpu(); | ||
81 | return ret; | ||
82 | } | ||
83 | EXPORT_SYMBOL(smp_call_function_single); | ||
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index c28c342c162f..a03e7f6d90c3 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -74,6 +74,7 @@ void save_stack_trace(struct stack_trace *trace) | |||
74 | if (trace->nr_entries < trace->max_entries) | 74 | if (trace->nr_entries < trace->max_entries) |
75 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 75 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
76 | } | 76 | } |
77 | EXPORT_SYMBOL_GPL(save_stack_trace); | ||
77 | 78 | ||
78 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 79 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
79 | { | 80 | { |
@@ -81,3 +82,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | |||
81 | if (trace->nr_entries < trace->max_entries) | 82 | if (trace->nr_entries < trace->max_entries) |
82 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 83 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
83 | } | 84 | } |
85 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | ||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 5f29f12da50c..059ca6ee59b4 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -39,9 +39,6 @@ | |||
39 | 39 | ||
40 | #include "do_timer.h" | 40 | #include "do_timer.h" |
41 | 41 | ||
42 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ | ||
43 | EXPORT_SYMBOL(cpu_khz); | ||
44 | |||
45 | int timer_ack; | 42 | int timer_ack; |
46 | 43 | ||
47 | unsigned long profile_pc(struct pt_regs *regs) | 44 | unsigned long profile_pc(struct pt_regs *regs) |
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 39ae8511a137..e3d49c553af2 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c | |||
@@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id) | |||
56 | /* calibrate_cpu is used on systems with fixed rate TSCs to determine | 56 | /* calibrate_cpu is used on systems with fixed rate TSCs to determine |
57 | * processor frequency */ | 57 | * processor frequency */ |
58 | #define TICK_COUNT 100000000 | 58 | #define TICK_COUNT 100000000 |
59 | unsigned long __init native_calculate_cpu_khz(void) | 59 | unsigned long __init calibrate_cpu(void) |
60 | { | 60 | { |
61 | int tsc_start, tsc_now; | 61 | int tsc_start, tsc_now; |
62 | int i, no_ctr_free; | 62 | int i, no_ctr_free; |
@@ -116,25 +116,11 @@ void __init hpet_time_init(void) | |||
116 | 116 | ||
117 | void __init time_init(void) | 117 | void __init time_init(void) |
118 | { | 118 | { |
119 | tsc_calibrate(); | 119 | tsc_init(); |
120 | |||
121 | cpu_khz = tsc_khz; | ||
122 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | ||
123 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) | ||
124 | cpu_khz = calculate_cpu_khz(); | ||
125 | |||
126 | lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ; | ||
127 | |||
128 | if (unsynchronized_tsc()) | ||
129 | mark_tsc_unstable("TSCs unsynchronized"); | ||
130 | |||
131 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) | 120 | if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) |
132 | vgetcpu_mode = VGETCPU_RDTSCP; | 121 | vgetcpu_mode = VGETCPU_RDTSCP; |
133 | else | 122 | else |
134 | vgetcpu_mode = VGETCPU_LSL; | 123 | vgetcpu_mode = VGETCPU_LSL; |
135 | 124 | ||
136 | printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n", | ||
137 | cpu_khz / 1000, cpu_khz % 1000); | ||
138 | init_tsc_clocksource(); | ||
139 | late_time_init = choose_time_init(); | 125 | late_time_init = choose_time_init(); |
140 | } | 126 | } |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index 9bb2363851af..fec1ecedc9b7 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
@@ -238,6 +238,6 @@ static void do_flush_tlb_all(void *info) | |||
238 | 238 | ||
239 | void flush_tlb_all(void) | 239 | void flush_tlb_all(void) |
240 | { | 240 | { |
241 | on_each_cpu(do_flush_tlb_all, NULL, 1, 1); | 241 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
242 | } | 242 | } |
243 | 243 | ||
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 5039d0f097a2..dcbf7a1159ea 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c | |||
@@ -275,5 +275,5 @@ static void do_flush_tlb_all(void *info) | |||
275 | 275 | ||
276 | void flush_tlb_all(void) | 276 | void flush_tlb_all(void) |
277 | { | 277 | { |
278 | on_each_cpu(do_flush_tlb_all, NULL, 1, 1); | 278 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
279 | } | 279 | } |
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index d7cc292691ff..8a768973c4f0 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
3 | * | 4 | * |
4 | * Pentium III FXSR, SSE support | 5 | * Pentium III FXSR, SSE support |
5 | * Gareth Hughes <gareth@valinux.com>, May 2000 | 6 | * Gareth Hughes <gareth@valinux.com>, May 2000 |
@@ -60,8 +61,6 @@ | |||
60 | 61 | ||
61 | #include "mach_traps.h" | 62 | #include "mach_traps.h" |
62 | 63 | ||
63 | int panic_on_unrecovered_nmi; | ||
64 | |||
65 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | 64 | DECLARE_BITMAP(used_vectors, NR_VECTORS); |
66 | EXPORT_SYMBOL_GPL(used_vectors); | 65 | EXPORT_SYMBOL_GPL(used_vectors); |
67 | 66 | ||
@@ -98,19 +97,22 @@ asmlinkage void alignment_check(void); | |||
98 | asmlinkage void spurious_interrupt_bug(void); | 97 | asmlinkage void spurious_interrupt_bug(void); |
99 | asmlinkage void machine_check(void); | 98 | asmlinkage void machine_check(void); |
100 | 99 | ||
100 | int panic_on_unrecovered_nmi; | ||
101 | int kstack_depth_to_print = 24; | 101 | int kstack_depth_to_print = 24; |
102 | static unsigned int code_bytes = 64; | 102 | static unsigned int code_bytes = 64; |
103 | static int ignore_nmis; | ||
104 | static int die_counter; | ||
103 | 105 | ||
104 | void printk_address(unsigned long address, int reliable) | 106 | void printk_address(unsigned long address, int reliable) |
105 | { | 107 | { |
106 | #ifdef CONFIG_KALLSYMS | 108 | #ifdef CONFIG_KALLSYMS |
107 | char namebuf[KSYM_NAME_LEN]; | ||
108 | unsigned long offset = 0; | 109 | unsigned long offset = 0; |
109 | unsigned long symsize; | 110 | unsigned long symsize; |
110 | const char *symname; | 111 | const char *symname; |
111 | char reliab[4] = ""; | ||
112 | char *delim = ":"; | ||
113 | char *modname; | 112 | char *modname; |
113 | char *delim = ":"; | ||
114 | char namebuf[KSYM_NAME_LEN]; | ||
115 | char reliab[4] = ""; | ||
114 | 116 | ||
115 | symname = kallsyms_lookup(address, &symsize, &offset, | 117 | symname = kallsyms_lookup(address, &symsize, &offset, |
116 | &modname, namebuf); | 118 | &modname, namebuf); |
@@ -130,22 +132,23 @@ void printk_address(unsigned long address, int reliable) | |||
130 | #endif | 132 | #endif |
131 | } | 133 | } |
132 | 134 | ||
133 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) | 135 | static inline int valid_stack_ptr(struct thread_info *tinfo, |
136 | void *p, unsigned int size) | ||
134 | { | 137 | { |
135 | return p > (void *)tinfo && | 138 | void *t = tinfo; |
136 | p <= (void *)tinfo + THREAD_SIZE - size; | 139 | return p > t && p <= t + THREAD_SIZE - size; |
137 | } | 140 | } |
138 | 141 | ||
139 | /* The form of the top of the frame on the stack */ | 142 | /* The form of the top of the frame on the stack */ |
140 | struct stack_frame { | 143 | struct stack_frame { |
141 | struct stack_frame *next_frame; | 144 | struct stack_frame *next_frame; |
142 | unsigned long return_address; | 145 | unsigned long return_address; |
143 | }; | 146 | }; |
144 | 147 | ||
145 | static inline unsigned long | 148 | static inline unsigned long |
146 | print_context_stack(struct thread_info *tinfo, | 149 | print_context_stack(struct thread_info *tinfo, |
147 | unsigned long *stack, unsigned long bp, | 150 | unsigned long *stack, unsigned long bp, |
148 | const struct stacktrace_ops *ops, void *data) | 151 | const struct stacktrace_ops *ops, void *data) |
149 | { | 152 | { |
150 | struct stack_frame *frame = (struct stack_frame *)bp; | 153 | struct stack_frame *frame = (struct stack_frame *)bp; |
151 | 154 | ||
@@ -167,8 +170,6 @@ print_context_stack(struct thread_info *tinfo, | |||
167 | return bp; | 170 | return bp; |
168 | } | 171 | } |
169 | 172 | ||
170 | #define MSG(msg) ops->warning(data, msg) | ||
171 | |||
172 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 173 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
173 | unsigned long *stack, unsigned long bp, | 174 | unsigned long *stack, unsigned long bp, |
174 | const struct stacktrace_ops *ops, void *data) | 175 | const struct stacktrace_ops *ops, void *data) |
@@ -178,7 +179,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
178 | 179 | ||
179 | if (!stack) { | 180 | if (!stack) { |
180 | unsigned long dummy; | 181 | unsigned long dummy; |
181 | |||
182 | stack = &dummy; | 182 | stack = &dummy; |
183 | if (task != current) | 183 | if (task != current) |
184 | stack = (unsigned long *)task->thread.sp; | 184 | stack = (unsigned long *)task->thread.sp; |
@@ -196,7 +196,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
196 | } | 196 | } |
197 | #endif | 197 | #endif |
198 | 198 | ||
199 | while (1) { | 199 | for (;;) { |
200 | struct thread_info *context; | 200 | struct thread_info *context; |
201 | 201 | ||
202 | context = (struct thread_info *) | 202 | context = (struct thread_info *) |
@@ -248,10 +248,10 @@ static void print_trace_address(void *data, unsigned long addr, int reliable) | |||
248 | } | 248 | } |
249 | 249 | ||
250 | static const struct stacktrace_ops print_trace_ops = { | 250 | static const struct stacktrace_ops print_trace_ops = { |
251 | .warning = print_trace_warning, | 251 | .warning = print_trace_warning, |
252 | .warning_symbol = print_trace_warning_symbol, | 252 | .warning_symbol = print_trace_warning_symbol, |
253 | .stack = print_trace_stack, | 253 | .stack = print_trace_stack, |
254 | .address = print_trace_address, | 254 | .address = print_trace_address, |
255 | }; | 255 | }; |
256 | 256 | ||
257 | static void | 257 | static void |
@@ -351,15 +351,14 @@ void show_registers(struct pt_regs *regs) | |||
351 | printk(KERN_EMERG "Code: "); | 351 | printk(KERN_EMERG "Code: "); |
352 | 352 | ||
353 | ip = (u8 *)regs->ip - code_prologue; | 353 | ip = (u8 *)regs->ip - code_prologue; |
354 | if (ip < (u8 *)PAGE_OFFSET || | 354 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { |
355 | probe_kernel_address(ip, c)) { | ||
356 | /* try starting at EIP */ | 355 | /* try starting at EIP */ |
357 | ip = (u8 *)regs->ip; | 356 | ip = (u8 *)regs->ip; |
358 | code_len = code_len - code_prologue + 1; | 357 | code_len = code_len - code_prologue + 1; |
359 | } | 358 | } |
360 | for (i = 0; i < code_len; i++, ip++) { | 359 | for (i = 0; i < code_len; i++, ip++) { |
361 | if (ip < (u8 *)PAGE_OFFSET || | 360 | if (ip < (u8 *)PAGE_OFFSET || |
362 | probe_kernel_address(ip, c)) { | 361 | probe_kernel_address(ip, c)) { |
363 | printk(" Bad EIP value."); | 362 | printk(" Bad EIP value."); |
364 | break; | 363 | break; |
365 | } | 364 | } |
@@ -384,8 +383,6 @@ int is_valid_bugaddr(unsigned long ip) | |||
384 | return ud2 == 0x0b0f; | 383 | return ud2 == 0x0b0f; |
385 | } | 384 | } |
386 | 385 | ||
387 | static int die_counter; | ||
388 | |||
389 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | 386 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) |
390 | { | 387 | { |
391 | unsigned short ss; | 388 | unsigned short ss; |
@@ -402,26 +399,22 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
402 | printk("DEBUG_PAGEALLOC"); | 399 | printk("DEBUG_PAGEALLOC"); |
403 | #endif | 400 | #endif |
404 | printk("\n"); | 401 | printk("\n"); |
405 | |||
406 | if (notify_die(DIE_OOPS, str, regs, err, | 402 | if (notify_die(DIE_OOPS, str, regs, err, |
407 | current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { | 403 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) |
408 | 404 | return 1; | |
409 | show_registers(regs); | ||
410 | /* Executive summary in case the oops scrolled away */ | ||
411 | sp = (unsigned long) (®s->sp); | ||
412 | savesegment(ss, ss); | ||
413 | if (user_mode(regs)) { | ||
414 | sp = regs->sp; | ||
415 | ss = regs->ss & 0xffff; | ||
416 | } | ||
417 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
418 | print_symbol("%s", regs->ip); | ||
419 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
420 | 405 | ||
421 | return 0; | 406 | show_registers(regs); |
407 | /* Executive summary in case the oops scrolled away */ | ||
408 | sp = (unsigned long) (®s->sp); | ||
409 | savesegment(ss, ss); | ||
410 | if (user_mode(regs)) { | ||
411 | sp = regs->sp; | ||
412 | ss = regs->ss & 0xffff; | ||
422 | } | 413 | } |
423 | 414 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | |
424 | return 1; | 415 | print_symbol("%s", regs->ip); |
416 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
417 | return 0; | ||
425 | } | 418 | } |
426 | 419 | ||
427 | /* | 420 | /* |
@@ -546,7 +539,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ | |||
546 | { \ | 539 | { \ |
547 | trace_hardirqs_fixup(); \ | 540 | trace_hardirqs_fixup(); \ |
548 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 541 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
549 | == NOTIFY_STOP) \ | 542 | == NOTIFY_STOP) \ |
550 | return; \ | 543 | return; \ |
551 | do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ | 544 | do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ |
552 | } | 545 | } |
@@ -562,7 +555,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ | |||
562 | info.si_code = sicode; \ | 555 | info.si_code = sicode; \ |
563 | info.si_addr = (void __user *)siaddr; \ | 556 | info.si_addr = (void __user *)siaddr; \ |
564 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 557 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
565 | == NOTIFY_STOP) \ | 558 | == NOTIFY_STOP) \ |
566 | return; \ | 559 | return; \ |
567 | do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ | 560 | do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ |
568 | } | 561 | } |
@@ -571,7 +564,7 @@ void do_##name(struct pt_regs *regs, long error_code) \ | |||
571 | void do_##name(struct pt_regs *regs, long error_code) \ | 564 | void do_##name(struct pt_regs *regs, long error_code) \ |
572 | { \ | 565 | { \ |
573 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 566 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
574 | == NOTIFY_STOP) \ | 567 | == NOTIFY_STOP) \ |
575 | return; \ | 568 | return; \ |
576 | do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ | 569 | do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ |
577 | } | 570 | } |
@@ -586,27 +579,29 @@ void do_##name(struct pt_regs *regs, long error_code) \ | |||
586 | info.si_addr = (void __user *)siaddr; \ | 579 | info.si_addr = (void __user *)siaddr; \ |
587 | trace_hardirqs_fixup(); \ | 580 | trace_hardirqs_fixup(); \ |
588 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 581 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
589 | == NOTIFY_STOP) \ | 582 | == NOTIFY_STOP) \ |
590 | return; \ | 583 | return; \ |
591 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ | 584 | do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ |
592 | } | 585 | } |
593 | 586 | ||
594 | DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | 587 | DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) |
595 | #ifndef CONFIG_KPROBES | 588 | #ifndef CONFIG_KPROBES |
596 | DO_VM86_ERROR(3, SIGTRAP, "int3", int3) | 589 | DO_VM86_ERROR(3, SIGTRAP, "int3", int3) |
597 | #endif | 590 | #endif |
598 | DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) | 591 | DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) |
599 | DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) | 592 | DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) |
600 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) | 593 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) |
601 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 594 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
602 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 595 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
603 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | 596 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) |
604 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | 597 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) |
605 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) | 598 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) |
606 | DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) | 599 | DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) |
607 | 600 | ||
608 | void __kprobes do_general_protection(struct pt_regs *regs, long error_code) | 601 | void __kprobes |
602 | do_general_protection(struct pt_regs *regs, long error_code) | ||
609 | { | 603 | { |
604 | struct task_struct *tsk; | ||
610 | struct thread_struct *thread; | 605 | struct thread_struct *thread; |
611 | struct tss_struct *tss; | 606 | struct tss_struct *tss; |
612 | int cpu; | 607 | int cpu; |
@@ -647,23 +642,24 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code) | |||
647 | if (regs->flags & X86_VM_MASK) | 642 | if (regs->flags & X86_VM_MASK) |
648 | goto gp_in_vm86; | 643 | goto gp_in_vm86; |
649 | 644 | ||
645 | tsk = current; | ||
650 | if (!user_mode(regs)) | 646 | if (!user_mode(regs)) |
651 | goto gp_in_kernel; | 647 | goto gp_in_kernel; |
652 | 648 | ||
653 | current->thread.error_code = error_code; | 649 | tsk->thread.error_code = error_code; |
654 | current->thread.trap_no = 13; | 650 | tsk->thread.trap_no = 13; |
655 | 651 | ||
656 | if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && | 652 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && |
657 | printk_ratelimit()) { | 653 | printk_ratelimit()) { |
658 | printk(KERN_INFO | 654 | printk(KERN_INFO |
659 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | 655 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", |
660 | current->comm, task_pid_nr(current), | 656 | tsk->comm, task_pid_nr(tsk), |
661 | regs->ip, regs->sp, error_code); | 657 | regs->ip, regs->sp, error_code); |
662 | print_vma_addr(" in ", regs->ip); | 658 | print_vma_addr(" in ", regs->ip); |
663 | printk("\n"); | 659 | printk("\n"); |
664 | } | 660 | } |
665 | 661 | ||
666 | force_sig(SIGSEGV, current); | 662 | force_sig(SIGSEGV, tsk); |
667 | return; | 663 | return; |
668 | 664 | ||
669 | gp_in_vm86: | 665 | gp_in_vm86: |
@@ -672,14 +668,15 @@ gp_in_vm86: | |||
672 | return; | 668 | return; |
673 | 669 | ||
674 | gp_in_kernel: | 670 | gp_in_kernel: |
675 | if (!fixup_exception(regs)) { | 671 | if (fixup_exception(regs)) |
676 | current->thread.error_code = error_code; | 672 | return; |
677 | current->thread.trap_no = 13; | 673 | |
678 | if (notify_die(DIE_GPF, "general protection fault", regs, | 674 | tsk->thread.error_code = error_code; |
675 | tsk->thread.trap_no = 13; | ||
676 | if (notify_die(DIE_GPF, "general protection fault", regs, | ||
679 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | 677 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
680 | return; | 678 | return; |
681 | die("general protection fault", regs, error_code); | 679 | die("general protection fault", regs, error_code); |
682 | } | ||
683 | } | 680 | } |
684 | 681 | ||
685 | static notrace __kprobes void | 682 | static notrace __kprobes void |
@@ -792,14 +789,17 @@ void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) | |||
792 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | 789 | static notrace __kprobes void default_do_nmi(struct pt_regs *regs) |
793 | { | 790 | { |
794 | unsigned char reason = 0; | 791 | unsigned char reason = 0; |
792 | int cpu; | ||
793 | |||
794 | cpu = smp_processor_id(); | ||
795 | 795 | ||
796 | /* Only the BSP gets external NMIs from the system: */ | 796 | /* Only the BSP gets external NMIs from the system. */ |
797 | if (!smp_processor_id()) | 797 | if (!cpu) |
798 | reason = get_nmi_reason(); | 798 | reason = get_nmi_reason(); |
799 | 799 | ||
800 | if (!(reason & 0xc0)) { | 800 | if (!(reason & 0xc0)) { |
801 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) | 801 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) |
802 | == NOTIFY_STOP) | 802 | == NOTIFY_STOP) |
803 | return; | 803 | return; |
804 | #ifdef CONFIG_X86_LOCAL_APIC | 804 | #ifdef CONFIG_X86_LOCAL_APIC |
805 | /* | 805 | /* |
@@ -808,7 +808,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
808 | */ | 808 | */ |
809 | if (nmi_watchdog_tick(regs, reason)) | 809 | if (nmi_watchdog_tick(regs, reason)) |
810 | return; | 810 | return; |
811 | if (!do_nmi_callback(regs, smp_processor_id())) | 811 | if (!do_nmi_callback(regs, cpu)) |
812 | unknown_nmi_error(reason, regs); | 812 | unknown_nmi_error(reason, regs); |
813 | #else | 813 | #else |
814 | unknown_nmi_error(reason, regs); | 814 | unknown_nmi_error(reason, regs); |
@@ -818,6 +818,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
818 | } | 818 | } |
819 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 819 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) |
820 | return; | 820 | return; |
821 | |||
822 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | ||
821 | if (reason & 0x80) | 823 | if (reason & 0x80) |
822 | mem_parity_error(reason, regs); | 824 | mem_parity_error(reason, regs); |
823 | if (reason & 0x40) | 825 | if (reason & 0x40) |
@@ -829,8 +831,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
829 | reassert_nmi(); | 831 | reassert_nmi(); |
830 | } | 832 | } |
831 | 833 | ||
832 | static int ignore_nmis; | ||
833 | |||
834 | notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) | 834 | notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) |
835 | { | 835 | { |
836 | int cpu; | 836 | int cpu; |
@@ -915,7 +915,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
915 | tsk->thread.debugctlmsr = 0; | 915 | tsk->thread.debugctlmsr = 0; |
916 | 916 | ||
917 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 917 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
918 | SIGTRAP) == NOTIFY_STOP) | 918 | SIGTRAP) == NOTIFY_STOP) |
919 | return; | 919 | return; |
920 | /* It's safe to allow irq's after DR6 has been saved */ | 920 | /* It's safe to allow irq's after DR6 has been saved */ |
921 | if (regs->flags & X86_EFLAGS_IF) | 921 | if (regs->flags & X86_EFLAGS_IF) |
@@ -976,9 +976,8 @@ clear_TF_reenable: | |||
976 | void math_error(void __user *ip) | 976 | void math_error(void __user *ip) |
977 | { | 977 | { |
978 | struct task_struct *task; | 978 | struct task_struct *task; |
979 | unsigned short cwd; | ||
980 | unsigned short swd; | ||
981 | siginfo_t info; | 979 | siginfo_t info; |
980 | unsigned short cwd, swd; | ||
982 | 981 | ||
983 | /* | 982 | /* |
984 | * Save the info for the exception handler and clear the error. | 983 | * Save the info for the exception handler and clear the error. |
@@ -997,7 +996,7 @@ void math_error(void __user *ip) | |||
997 | * C1 reg you need in case of a stack fault, 0x040 is the stack | 996 | * C1 reg you need in case of a stack fault, 0x040 is the stack |
998 | * fault bit. We should only be taking one exception at a time, | 997 | * fault bit. We should only be taking one exception at a time, |
999 | * so if this combination doesn't produce any single exception, | 998 | * so if this combination doesn't produce any single exception, |
1000 | * then we have a bad program that isn't syncronizing its FPU usage | 999 | * then we have a bad program that isn't synchronizing its FPU usage |
1001 | * and it will suffer the consequences since we won't be able to | 1000 | * and it will suffer the consequences since we won't be able to |
1002 | * fully reproduce the context of the exception | 1001 | * fully reproduce the context of the exception |
1003 | */ | 1002 | */ |
@@ -1006,7 +1005,7 @@ void math_error(void __user *ip) | |||
1006 | switch (swd & ~cwd & 0x3f) { | 1005 | switch (swd & ~cwd & 0x3f) { |
1007 | case 0x000: /* No unmasked exception */ | 1006 | case 0x000: /* No unmasked exception */ |
1008 | return; | 1007 | return; |
1009 | default: /* Multiple exceptions */ | 1008 | default: /* Multiple exceptions */ |
1010 | break; | 1009 | break; |
1011 | case 0x001: /* Invalid Op */ | 1010 | case 0x001: /* Invalid Op */ |
1012 | /* | 1011 | /* |
@@ -1042,8 +1041,8 @@ void do_coprocessor_error(struct pt_regs *regs, long error_code) | |||
1042 | static void simd_math_error(void __user *ip) | 1041 | static void simd_math_error(void __user *ip) |
1043 | { | 1042 | { |
1044 | struct task_struct *task; | 1043 | struct task_struct *task; |
1045 | unsigned short mxcsr; | ||
1046 | siginfo_t info; | 1044 | siginfo_t info; |
1045 | unsigned short mxcsr; | ||
1047 | 1046 | ||
1048 | /* | 1047 | /* |
1049 | * Save the info for the exception handler and clear the error. | 1048 | * Save the info for the exception handler and clear the error. |
@@ -1198,16 +1197,16 @@ void __init trap_init(void) | |||
1198 | early_iounmap(p, 4); | 1197 | early_iounmap(p, 4); |
1199 | #endif | 1198 | #endif |
1200 | 1199 | ||
1201 | set_trap_gate(0, ÷_error); | 1200 | set_trap_gate(0, ÷_error); |
1202 | set_intr_gate(1, &debug); | 1201 | set_intr_gate(1, &debug); |
1203 | set_intr_gate(2, &nmi); | 1202 | set_intr_gate(2, &nmi); |
1204 | set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ | 1203 | set_system_intr_gate(3, &int3); /* int3 can be called from all */ |
1205 | set_system_gate(4, &overflow); | 1204 | set_system_gate(4, &overflow); /* int4 can be called from all */ |
1206 | set_trap_gate(5, &bounds); | 1205 | set_trap_gate(5, &bounds); |
1207 | set_trap_gate(6, &invalid_op); | 1206 | set_trap_gate(6, &invalid_op); |
1208 | set_trap_gate(7, &device_not_available); | 1207 | set_trap_gate(7, &device_not_available); |
1209 | set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); | 1208 | set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); |
1210 | set_trap_gate(9, &coprocessor_segment_overrun); | 1209 | set_trap_gate(9, &coprocessor_segment_overrun); |
1211 | set_trap_gate(10, &invalid_TSS); | 1210 | set_trap_gate(10, &invalid_TSS); |
1212 | set_trap_gate(11, &segment_not_present); | 1211 | set_trap_gate(11, &segment_not_present); |
1213 | set_trap_gate(12, &stack_segment); | 1212 | set_trap_gate(12, &stack_segment); |
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 80ba6d37bfe0..2696a6837782 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c | |||
@@ -10,49 +10,49 @@ | |||
10 | * 'Traps.c' handles hardware traps and faults after we have saved some | 10 | * 'Traps.c' handles hardware traps and faults after we have saved some |
11 | * state in 'entry.S'. | 11 | * state in 'entry.S'. |
12 | */ | 12 | */ |
13 | #include <linux/sched.h> | 13 | #include <linux/moduleparam.h> |
14 | #include <linux/interrupt.h> | ||
15 | #include <linux/kallsyms.h> | ||
16 | #include <linux/spinlock.h> | ||
17 | #include <linux/kprobes.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <linux/utsname.h> | ||
20 | #include <linux/kdebug.h> | ||
14 | #include <linux/kernel.h> | 21 | #include <linux/kernel.h> |
22 | #include <linux/module.h> | ||
23 | #include <linux/ptrace.h> | ||
15 | #include <linux/string.h> | 24 | #include <linux/string.h> |
25 | #include <linux/unwind.h> | ||
26 | #include <linux/delay.h> | ||
16 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
17 | #include <linux/ptrace.h> | 28 | #include <linux/kexec.h> |
29 | #include <linux/sched.h> | ||
18 | #include <linux/timer.h> | 30 | #include <linux/timer.h> |
19 | #include <linux/mm.h> | ||
20 | #include <linux/init.h> | 31 | #include <linux/init.h> |
21 | #include <linux/delay.h> | ||
22 | #include <linux/spinlock.h> | ||
23 | #include <linux/interrupt.h> | ||
24 | #include <linux/kallsyms.h> | ||
25 | #include <linux/module.h> | ||
26 | #include <linux/moduleparam.h> | ||
27 | #include <linux/nmi.h> | ||
28 | #include <linux/kprobes.h> | ||
29 | #include <linux/kexec.h> | ||
30 | #include <linux/unwind.h> | ||
31 | #include <linux/uaccess.h> | ||
32 | #include <linux/bug.h> | 32 | #include <linux/bug.h> |
33 | #include <linux/kdebug.h> | 33 | #include <linux/nmi.h> |
34 | #include <linux/utsname.h> | 34 | #include <linux/mm.h> |
35 | |||
36 | #include <mach_traps.h> | ||
37 | 35 | ||
38 | #if defined(CONFIG_EDAC) | 36 | #if defined(CONFIG_EDAC) |
39 | #include <linux/edac.h> | 37 | #include <linux/edac.h> |
40 | #endif | 38 | #endif |
41 | 39 | ||
42 | #include <asm/system.h> | 40 | #include <asm/stacktrace.h> |
43 | #include <asm/io.h> | 41 | #include <asm/processor.h> |
44 | #include <asm/atomic.h> | ||
45 | #include <asm/debugreg.h> | 42 | #include <asm/debugreg.h> |
43 | #include <asm/atomic.h> | ||
44 | #include <asm/system.h> | ||
45 | #include <asm/unwind.h> | ||
46 | #include <asm/desc.h> | 46 | #include <asm/desc.h> |
47 | #include <asm/i387.h> | 47 | #include <asm/i387.h> |
48 | #include <asm/processor.h> | 48 | #include <asm/nmi.h> |
49 | #include <asm/unwind.h> | ||
50 | #include <asm/smp.h> | 49 | #include <asm/smp.h> |
50 | #include <asm/io.h> | ||
51 | #include <asm/pgalloc.h> | 51 | #include <asm/pgalloc.h> |
52 | #include <asm/pda.h> | ||
53 | #include <asm/proto.h> | 52 | #include <asm/proto.h> |
54 | #include <asm/nmi.h> | 53 | #include <asm/pda.h> |
55 | #include <asm/stacktrace.h> | 54 | |
55 | #include <mach_traps.h> | ||
56 | 56 | ||
57 | asmlinkage void divide_error(void); | 57 | asmlinkage void divide_error(void); |
58 | asmlinkage void debug(void); | 58 | asmlinkage void debug(void); |
@@ -72,12 +72,14 @@ asmlinkage void page_fault(void); | |||
72 | asmlinkage void coprocessor_error(void); | 72 | asmlinkage void coprocessor_error(void); |
73 | asmlinkage void simd_coprocessor_error(void); | 73 | asmlinkage void simd_coprocessor_error(void); |
74 | asmlinkage void alignment_check(void); | 74 | asmlinkage void alignment_check(void); |
75 | asmlinkage void machine_check(void); | ||
76 | asmlinkage void spurious_interrupt_bug(void); | 75 | asmlinkage void spurious_interrupt_bug(void); |
76 | asmlinkage void machine_check(void); | ||
77 | 77 | ||
78 | int panic_on_unrecovered_nmi; | 78 | int panic_on_unrecovered_nmi; |
79 | int kstack_depth_to_print = 12; | ||
79 | static unsigned int code_bytes = 64; | 80 | static unsigned int code_bytes = 64; |
80 | static unsigned ignore_nmis; | 81 | static int ignore_nmis; |
82 | static int die_counter; | ||
81 | 83 | ||
82 | static inline void conditional_sti(struct pt_regs *regs) | 84 | static inline void conditional_sti(struct pt_regs *regs) |
83 | { | 85 | { |
@@ -101,34 +103,9 @@ static inline void preempt_conditional_cli(struct pt_regs *regs) | |||
101 | dec_preempt_count(); | 103 | dec_preempt_count(); |
102 | } | 104 | } |
103 | 105 | ||
104 | int kstack_depth_to_print = 12; | ||
105 | |||
106 | void printk_address(unsigned long address, int reliable) | 106 | void printk_address(unsigned long address, int reliable) |
107 | { | 107 | { |
108 | #ifdef CONFIG_KALLSYMS | 108 | printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address); |
109 | unsigned long offset = 0, symsize; | ||
110 | const char *symname; | ||
111 | char *modname; | ||
112 | char *delim = ":"; | ||
113 | char namebuf[KSYM_NAME_LEN]; | ||
114 | char reliab[4] = ""; | ||
115 | |||
116 | symname = kallsyms_lookup(address, &symsize, &offset, | ||
117 | &modname, namebuf); | ||
118 | if (!symname) { | ||
119 | printk(" [<%016lx>]\n", address); | ||
120 | return; | ||
121 | } | ||
122 | if (!reliable) | ||
123 | strcpy(reliab, "? "); | ||
124 | |||
125 | if (!modname) | ||
126 | modname = delim = ""; | ||
127 | printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n", | ||
128 | address, reliab, delim, modname, delim, symname, offset, symsize); | ||
129 | #else | ||
130 | printk(" [<%016lx>]\n", address); | ||
131 | #endif | ||
132 | } | 109 | } |
133 | 110 | ||
134 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 111 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
@@ -205,8 +182,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
205 | return NULL; | 182 | return NULL; |
206 | } | 183 | } |
207 | 184 | ||
208 | #define MSG(txt) ops->warning(data, txt) | ||
209 | |||
210 | /* | 185 | /* |
211 | * x86-64 can have up to three kernel stacks: | 186 | * x86-64 can have up to three kernel stacks: |
212 | * process stack | 187 | * process stack |
@@ -233,11 +208,11 @@ struct stack_frame { | |||
233 | unsigned long return_address; | 208 | unsigned long return_address; |
234 | }; | 209 | }; |
235 | 210 | ||
236 | 211 | static inline unsigned long | |
237 | static inline unsigned long print_context_stack(struct thread_info *tinfo, | 212 | print_context_stack(struct thread_info *tinfo, |
238 | unsigned long *stack, unsigned long bp, | 213 | unsigned long *stack, unsigned long bp, |
239 | const struct stacktrace_ops *ops, void *data, | 214 | const struct stacktrace_ops *ops, void *data, |
240 | unsigned long *end) | 215 | unsigned long *end) |
241 | { | 216 | { |
242 | struct stack_frame *frame = (struct stack_frame *)bp; | 217 | struct stack_frame *frame = (struct stack_frame *)bp; |
243 | 218 | ||
@@ -259,7 +234,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, | |||
259 | return bp; | 234 | return bp; |
260 | } | 235 | } |
261 | 236 | ||
262 | void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | 237 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
263 | unsigned long *stack, unsigned long bp, | 238 | unsigned long *stack, unsigned long bp, |
264 | const struct stacktrace_ops *ops, void *data) | 239 | const struct stacktrace_ops *ops, void *data) |
265 | { | 240 | { |
@@ -268,36 +243,34 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs, | |||
268 | unsigned used = 0; | 243 | unsigned used = 0; |
269 | struct thread_info *tinfo; | 244 | struct thread_info *tinfo; |
270 | 245 | ||
271 | if (!tsk) | 246 | if (!task) |
272 | tsk = current; | 247 | task = current; |
273 | tinfo = task_thread_info(tsk); | ||
274 | 248 | ||
275 | if (!stack) { | 249 | if (!stack) { |
276 | unsigned long dummy; | 250 | unsigned long dummy; |
277 | stack = &dummy; | 251 | stack = &dummy; |
278 | if (tsk && tsk != current) | 252 | if (task && task != current) |
279 | stack = (unsigned long *)tsk->thread.sp; | 253 | stack = (unsigned long *)task->thread.sp; |
280 | } | 254 | } |
281 | 255 | ||
282 | #ifdef CONFIG_FRAME_POINTER | 256 | #ifdef CONFIG_FRAME_POINTER |
283 | if (!bp) { | 257 | if (!bp) { |
284 | if (tsk == current) { | 258 | if (task == current) { |
285 | /* Grab bp right from our regs */ | 259 | /* Grab bp right from our regs */ |
286 | asm("movq %%rbp, %0" : "=r" (bp):); | 260 | asm("movq %%rbp, %0" : "=r" (bp) :); |
287 | } else { | 261 | } else { |
288 | /* bp is the last reg pushed by switch_to */ | 262 | /* bp is the last reg pushed by switch_to */ |
289 | bp = *(unsigned long *) tsk->thread.sp; | 263 | bp = *(unsigned long *) task->thread.sp; |
290 | } | 264 | } |
291 | } | 265 | } |
292 | #endif | 266 | #endif |
293 | 267 | ||
294 | |||
295 | |||
296 | /* | 268 | /* |
297 | * Print function call entries in all stacks, starting at the | 269 | * Print function call entries in all stacks, starting at the |
298 | * current stack address. If the stacks consist of nested | 270 | * current stack address. If the stacks consist of nested |
299 | * exceptions | 271 | * exceptions |
300 | */ | 272 | */ |
273 | tinfo = task_thread_info(task); | ||
301 | for (;;) { | 274 | for (;;) { |
302 | char *id; | 275 | char *id; |
303 | unsigned long *estack_end; | 276 | unsigned long *estack_end; |
@@ -382,18 +355,17 @@ static const struct stacktrace_ops print_trace_ops = { | |||
382 | .address = print_trace_address, | 355 | .address = print_trace_address, |
383 | }; | 356 | }; |
384 | 357 | ||
385 | void | 358 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
386 | show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, | 359 | unsigned long *stack, unsigned long bp) |
387 | unsigned long bp) | ||
388 | { | 360 | { |
389 | printk("\nCall Trace:\n"); | 361 | printk("\nCall Trace:\n"); |
390 | dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL); | 362 | dump_trace(task, regs, stack, bp, &print_trace_ops, NULL); |
391 | printk("\n"); | 363 | printk("\n"); |
392 | } | 364 | } |
393 | 365 | ||
394 | static void | 366 | static void |
395 | _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, | 367 | _show_stack(struct task_struct *task, struct pt_regs *regs, |
396 | unsigned long bp) | 368 | unsigned long *sp, unsigned long bp) |
397 | { | 369 | { |
398 | unsigned long *stack; | 370 | unsigned long *stack; |
399 | int i; | 371 | int i; |
@@ -405,14 +377,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, | |||
405 | // back trace for this cpu. | 377 | // back trace for this cpu. |
406 | 378 | ||
407 | if (sp == NULL) { | 379 | if (sp == NULL) { |
408 | if (tsk) | 380 | if (task) |
409 | sp = (unsigned long *)tsk->thread.sp; | 381 | sp = (unsigned long *)task->thread.sp; |
410 | else | 382 | else |
411 | sp = (unsigned long *)&sp; | 383 | sp = (unsigned long *)&sp; |
412 | } | 384 | } |
413 | 385 | ||
414 | stack = sp; | 386 | stack = sp; |
415 | for(i=0; i < kstack_depth_to_print; i++) { | 387 | for (i = 0; i < kstack_depth_to_print; i++) { |
416 | if (stack >= irqstack && stack <= irqstack_end) { | 388 | if (stack >= irqstack && stack <= irqstack_end) { |
417 | if (stack == irqstack_end) { | 389 | if (stack == irqstack_end) { |
418 | stack = (unsigned long *) (irqstack_end[-1]); | 390 | stack = (unsigned long *) (irqstack_end[-1]); |
@@ -427,12 +399,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, | |||
427 | printk(" %016lx", *stack++); | 399 | printk(" %016lx", *stack++); |
428 | touch_nmi_watchdog(); | 400 | touch_nmi_watchdog(); |
429 | } | 401 | } |
430 | show_trace(tsk, regs, sp, bp); | 402 | show_trace(task, regs, sp, bp); |
431 | } | 403 | } |
432 | 404 | ||
433 | void show_stack(struct task_struct *tsk, unsigned long * sp) | 405 | void show_stack(struct task_struct *task, unsigned long *sp) |
434 | { | 406 | { |
435 | _show_stack(tsk, NULL, sp, 0); | 407 | _show_stack(task, NULL, sp, 0); |
436 | } | 408 | } |
437 | 409 | ||
438 | /* | 410 | /* |
@@ -440,8 +412,8 @@ void show_stack(struct task_struct *tsk, unsigned long * sp) | |||
440 | */ | 412 | */ |
441 | void dump_stack(void) | 413 | void dump_stack(void) |
442 | { | 414 | { |
443 | unsigned long dummy; | ||
444 | unsigned long bp = 0; | 415 | unsigned long bp = 0; |
416 | unsigned long stack; | ||
445 | 417 | ||
446 | #ifdef CONFIG_FRAME_POINTER | 418 | #ifdef CONFIG_FRAME_POINTER |
447 | if (!bp) | 419 | if (!bp) |
@@ -453,7 +425,7 @@ void dump_stack(void) | |||
453 | init_utsname()->release, | 425 | init_utsname()->release, |
454 | (int)strcspn(init_utsname()->version, " "), | 426 | (int)strcspn(init_utsname()->version, " "), |
455 | init_utsname()->version); | 427 | init_utsname()->version); |
456 | show_trace(NULL, NULL, &dummy, bp); | 428 | show_trace(NULL, NULL, &stack, bp); |
457 | } | 429 | } |
458 | 430 | ||
459 | EXPORT_SYMBOL(dump_stack); | 431 | EXPORT_SYMBOL(dump_stack); |
@@ -464,12 +436,8 @@ void show_registers(struct pt_regs *regs) | |||
464 | unsigned long sp; | 436 | unsigned long sp; |
465 | const int cpu = smp_processor_id(); | 437 | const int cpu = smp_processor_id(); |
466 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; | 438 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
467 | u8 *ip; | ||
468 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
469 | unsigned int code_len = code_bytes; | ||
470 | 439 | ||
471 | sp = regs->sp; | 440 | sp = regs->sp; |
472 | ip = (u8 *) regs->ip - code_prologue; | ||
473 | printk("CPU %d ", cpu); | 441 | printk("CPU %d ", cpu); |
474 | __show_regs(regs); | 442 | __show_regs(regs); |
475 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 443 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
@@ -480,15 +448,21 @@ void show_registers(struct pt_regs *regs) | |||
480 | * time of the fault.. | 448 | * time of the fault.. |
481 | */ | 449 | */ |
482 | if (!user_mode(regs)) { | 450 | if (!user_mode(regs)) { |
451 | unsigned int code_prologue = code_bytes * 43 / 64; | ||
452 | unsigned int code_len = code_bytes; | ||
483 | unsigned char c; | 453 | unsigned char c; |
454 | u8 *ip; | ||
455 | |||
484 | printk("Stack: "); | 456 | printk("Stack: "); |
485 | _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); | 457 | _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); |
486 | printk("\n"); | 458 | printk("\n"); |
487 | 459 | ||
488 | printk(KERN_EMERG "Code: "); | 460 | printk(KERN_EMERG "Code: "); |
461 | |||
462 | ip = (u8 *)regs->ip - code_prologue; | ||
489 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { | 463 | if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { |
490 | /* try starting at RIP */ | 464 | /* try starting at RIP */ |
491 | ip = (u8 *) regs->ip; | 465 | ip = (u8 *)regs->ip; |
492 | code_len = code_len - code_prologue + 1; | 466 | code_len = code_len - code_prologue + 1; |
493 | } | 467 | } |
494 | for (i = 0; i < code_len; i++, ip++) { | 468 | for (i = 0; i < code_len; i++, ip++) { |
@@ -504,7 +478,7 @@ void show_registers(struct pt_regs *regs) | |||
504 | } | 478 | } |
505 | } | 479 | } |
506 | printk("\n"); | 480 | printk("\n"); |
507 | } | 481 | } |
508 | 482 | ||
509 | int is_valid_bugaddr(unsigned long ip) | 483 | int is_valid_bugaddr(unsigned long ip) |
510 | { | 484 | { |
@@ -562,10 +536,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | |||
562 | do_exit(signr); | 536 | do_exit(signr); |
563 | } | 537 | } |
564 | 538 | ||
565 | int __kprobes __die(const char * str, struct pt_regs * regs, long err) | 539 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) |
566 | { | 540 | { |
567 | static int die_counter; | 541 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter); |
568 | printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter); | ||
569 | #ifdef CONFIG_PREEMPT | 542 | #ifdef CONFIG_PREEMPT |
570 | printk("PREEMPT "); | 543 | printk("PREEMPT "); |
571 | #endif | 544 | #endif |
@@ -576,8 +549,10 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err) | |||
576 | printk("DEBUG_PAGEALLOC"); | 549 | printk("DEBUG_PAGEALLOC"); |
577 | #endif | 550 | #endif |
578 | printk("\n"); | 551 | printk("\n"); |
579 | if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | 552 | if (notify_die(DIE_OOPS, str, regs, err, |
553 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
580 | return 1; | 554 | return 1; |
555 | |||
581 | show_registers(regs); | 556 | show_registers(regs); |
582 | add_taint(TAINT_DIE); | 557 | add_taint(TAINT_DIE); |
583 | /* Executive summary in case the oops scrolled away */ | 558 | /* Executive summary in case the oops scrolled away */ |
@@ -589,7 +564,7 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err) | |||
589 | return 0; | 564 | return 0; |
590 | } | 565 | } |
591 | 566 | ||
592 | void die(const char * str, struct pt_regs * regs, long err) | 567 | void die(const char *str, struct pt_regs *regs, long err) |
593 | { | 568 | { |
594 | unsigned long flags = oops_begin(); | 569 | unsigned long flags = oops_begin(); |
595 | 570 | ||
@@ -606,8 +581,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) | |||
606 | { | 581 | { |
607 | unsigned long flags; | 582 | unsigned long flags; |
608 | 583 | ||
609 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == | 584 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) |
610 | NOTIFY_STOP) | ||
611 | return; | 585 | return; |
612 | 586 | ||
613 | flags = oops_begin(); | 587 | flags = oops_begin(); |
@@ -629,44 +603,44 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic) | |||
629 | do_exit(SIGBUS); | 603 | do_exit(SIGBUS); |
630 | } | 604 | } |
631 | 605 | ||
632 | static void __kprobes do_trap(int trapnr, int signr, char *str, | 606 | static void __kprobes |
633 | struct pt_regs * regs, long error_code, | 607 | do_trap(int trapnr, int signr, char *str, struct pt_regs *regs, |
634 | siginfo_t *info) | 608 | long error_code, siginfo_t *info) |
635 | { | 609 | { |
636 | struct task_struct *tsk = current; | 610 | struct task_struct *tsk = current; |
637 | 611 | ||
638 | if (user_mode(regs)) { | 612 | if (!user_mode(regs)) |
639 | /* | 613 | goto kernel_trap; |
640 | * We want error_code and trap_no set for userspace | ||
641 | * faults and kernelspace faults which result in | ||
642 | * die(), but not kernelspace faults which are fixed | ||
643 | * up. die() gives the process no chance to handle | ||
644 | * the signal and notice the kernel fault information, | ||
645 | * so that won't result in polluting the information | ||
646 | * about previously queued, but not yet delivered, | ||
647 | * faults. See also do_general_protection below. | ||
648 | */ | ||
649 | tsk->thread.error_code = error_code; | ||
650 | tsk->thread.trap_no = trapnr; | ||
651 | |||
652 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | ||
653 | printk_ratelimit()) { | ||
654 | printk(KERN_INFO | ||
655 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | ||
656 | tsk->comm, tsk->pid, str, | ||
657 | regs->ip, regs->sp, error_code); | ||
658 | print_vma_addr(" in ", regs->ip); | ||
659 | printk("\n"); | ||
660 | } | ||
661 | 614 | ||
662 | if (info) | 615 | /* |
663 | force_sig_info(signr, info, tsk); | 616 | * We want error_code and trap_no set for userspace faults and |
664 | else | 617 | * kernelspace faults which result in die(), but not |
665 | force_sig(signr, tsk); | 618 | * kernelspace faults which are fixed up. die() gives the |
666 | return; | 619 | * process no chance to handle the signal and notice the |
620 | * kernel fault information, so that won't result in polluting | ||
621 | * the information about previously queued, but not yet | ||
622 | * delivered, faults. See also do_general_protection below. | ||
623 | */ | ||
624 | tsk->thread.error_code = error_code; | ||
625 | tsk->thread.trap_no = trapnr; | ||
626 | |||
627 | if (show_unhandled_signals && unhandled_signal(tsk, signr) && | ||
628 | printk_ratelimit()) { | ||
629 | printk(KERN_INFO | ||
630 | "%s[%d] trap %s ip:%lx sp:%lx error:%lx", | ||
631 | tsk->comm, tsk->pid, str, | ||
632 | regs->ip, regs->sp, error_code); | ||
633 | print_vma_addr(" in ", regs->ip); | ||
634 | printk("\n"); | ||
667 | } | 635 | } |
668 | 636 | ||
637 | if (info) | ||
638 | force_sig_info(signr, info, tsk); | ||
639 | else | ||
640 | force_sig(signr, tsk); | ||
641 | return; | ||
669 | 642 | ||
643 | kernel_trap: | ||
670 | if (!fixup_exception(regs)) { | 644 | if (!fixup_exception(regs)) { |
671 | tsk->thread.error_code = error_code; | 645 | tsk->thread.error_code = error_code; |
672 | tsk->thread.trap_no = trapnr; | 646 | tsk->thread.trap_no = trapnr; |
@@ -676,38 +650,38 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, | |||
676 | } | 650 | } |
677 | 651 | ||
678 | #define DO_ERROR(trapnr, signr, str, name) \ | 652 | #define DO_ERROR(trapnr, signr, str, name) \ |
679 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | 653 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ |
680 | { \ | 654 | { \ |
681 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 655 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
682 | == NOTIFY_STOP) \ | 656 | == NOTIFY_STOP) \ |
683 | return; \ | 657 | return; \ |
684 | conditional_sti(regs); \ | 658 | conditional_sti(regs); \ |
685 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ | 659 | do_trap(trapnr, signr, str, regs, error_code, NULL); \ |
686 | } | 660 | } |
687 | 661 | ||
688 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ | 662 | #define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ |
689 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | 663 | asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ |
690 | { \ | 664 | { \ |
691 | siginfo_t info; \ | 665 | siginfo_t info; \ |
692 | info.si_signo = signr; \ | 666 | info.si_signo = signr; \ |
693 | info.si_errno = 0; \ | 667 | info.si_errno = 0; \ |
694 | info.si_code = sicode; \ | 668 | info.si_code = sicode; \ |
695 | info.si_addr = (void __user *)siaddr; \ | 669 | info.si_addr = (void __user *)siaddr; \ |
696 | trace_hardirqs_fixup(); \ | 670 | trace_hardirqs_fixup(); \ |
697 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ | 671 | if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ |
698 | == NOTIFY_STOP) \ | 672 | == NOTIFY_STOP) \ |
699 | return; \ | 673 | return; \ |
700 | conditional_sti(regs); \ | 674 | conditional_sti(regs); \ |
701 | do_trap(trapnr, signr, str, regs, error_code, &info); \ | 675 | do_trap(trapnr, signr, str, regs, error_code, &info); \ |
702 | } | 676 | } |
703 | 677 | ||
704 | DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) | 678 | DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) |
705 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) | 679 | DO_ERROR(4, SIGSEGV, "overflow", overflow) |
706 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) | 680 | DO_ERROR(5, SIGSEGV, "bounds", bounds) |
707 | DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) | 681 | DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) |
708 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 682 | DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
709 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 683 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
710 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | 684 | DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) |
711 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | 685 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) |
712 | 686 | ||
713 | /* Runs on IST stack */ | 687 | /* Runs on IST stack */ |
@@ -738,31 +712,34 @@ asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) | |||
738 | die(str, regs, error_code); | 712 | die(str, regs, error_code); |
739 | } | 713 | } |
740 | 714 | ||
741 | asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | 715 | asmlinkage void __kprobes |
742 | long error_code) | 716 | do_general_protection(struct pt_regs *regs, long error_code) |
743 | { | 717 | { |
744 | struct task_struct *tsk = current; | 718 | struct task_struct *tsk; |
745 | 719 | ||
746 | conditional_sti(regs); | 720 | conditional_sti(regs); |
747 | 721 | ||
748 | if (user_mode(regs)) { | 722 | tsk = current; |
749 | tsk->thread.error_code = error_code; | 723 | if (!user_mode(regs)) |
750 | tsk->thread.trap_no = 13; | 724 | goto gp_in_kernel; |
751 | |||
752 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | ||
753 | printk_ratelimit()) { | ||
754 | printk(KERN_INFO | ||
755 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | ||
756 | tsk->comm, tsk->pid, | ||
757 | regs->ip, regs->sp, error_code); | ||
758 | print_vma_addr(" in ", regs->ip); | ||
759 | printk("\n"); | ||
760 | } | ||
761 | 725 | ||
762 | force_sig(SIGSEGV, tsk); | 726 | tsk->thread.error_code = error_code; |
763 | return; | 727 | tsk->thread.trap_no = 13; |
764 | } | ||
765 | 728 | ||
729 | if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) && | ||
730 | printk_ratelimit()) { | ||
731 | printk(KERN_INFO | ||
732 | "%s[%d] general protection ip:%lx sp:%lx error:%lx", | ||
733 | tsk->comm, tsk->pid, | ||
734 | regs->ip, regs->sp, error_code); | ||
735 | print_vma_addr(" in ", regs->ip); | ||
736 | printk("\n"); | ||
737 | } | ||
738 | |||
739 | force_sig(SIGSEGV, tsk); | ||
740 | return; | ||
741 | |||
742 | gp_in_kernel: | ||
766 | if (fixup_exception(regs)) | 743 | if (fixup_exception(regs)) |
767 | return; | 744 | return; |
768 | 745 | ||
@@ -775,14 +752,14 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | |||
775 | } | 752 | } |
776 | 753 | ||
777 | static notrace __kprobes void | 754 | static notrace __kprobes void |
778 | mem_parity_error(unsigned char reason, struct pt_regs * regs) | 755 | mem_parity_error(unsigned char reason, struct pt_regs *regs) |
779 | { | 756 | { |
780 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", | 757 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", |
781 | reason); | 758 | reason); |
782 | printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); | 759 | printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); |
783 | 760 | ||
784 | #if defined(CONFIG_EDAC) | 761 | #if defined(CONFIG_EDAC) |
785 | if(edac_handler_set()) { | 762 | if (edac_handler_set()) { |
786 | edac_atomic_assert_error(); | 763 | edac_atomic_assert_error(); |
787 | return; | 764 | return; |
788 | } | 765 | } |
@@ -799,7 +776,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) | |||
799 | } | 776 | } |
800 | 777 | ||
801 | static notrace __kprobes void | 778 | static notrace __kprobes void |
802 | io_check_error(unsigned char reason, struct pt_regs * regs) | 779 | io_check_error(unsigned char reason, struct pt_regs *regs) |
803 | { | 780 | { |
804 | printk("NMI: IOCK error (debug interrupt?)\n"); | 781 | printk("NMI: IOCK error (debug interrupt?)\n"); |
805 | show_registers(regs); | 782 | show_registers(regs); |
@@ -829,14 +806,14 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | |||
829 | 806 | ||
830 | /* Runs on IST stack. This code must keep interrupts off all the time. | 807 | /* Runs on IST stack. This code must keep interrupts off all the time. |
831 | Nested NMIs are prevented by the CPU. */ | 808 | Nested NMIs are prevented by the CPU. */ |
832 | asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) | 809 | asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) |
833 | { | 810 | { |
834 | unsigned char reason = 0; | 811 | unsigned char reason = 0; |
835 | int cpu; | 812 | int cpu; |
836 | 813 | ||
837 | cpu = smp_processor_id(); | 814 | cpu = smp_processor_id(); |
838 | 815 | ||
839 | /* Only the BSP gets external NMIs from the system. */ | 816 | /* Only the BSP gets external NMIs from the system. */ |
840 | if (!cpu) | 817 | if (!cpu) |
841 | reason = get_nmi_reason(); | 818 | reason = get_nmi_reason(); |
842 | 819 | ||
@@ -848,18 +825,17 @@ asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) | |||
848 | * Ok, so this is none of the documented NMI sources, | 825 | * Ok, so this is none of the documented NMI sources, |
849 | * so it must be the NMI watchdog. | 826 | * so it must be the NMI watchdog. |
850 | */ | 827 | */ |
851 | if (nmi_watchdog_tick(regs,reason)) | 828 | if (nmi_watchdog_tick(regs, reason)) |
852 | return; | 829 | return; |
853 | if (!do_nmi_callback(regs,cpu)) | 830 | if (!do_nmi_callback(regs, cpu)) |
854 | unknown_nmi_error(reason, regs); | 831 | unknown_nmi_error(reason, regs); |
855 | 832 | ||
856 | return; | 833 | return; |
857 | } | 834 | } |
858 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 835 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) |
859 | return; | 836 | return; |
860 | 837 | ||
861 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | 838 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ |
862 | |||
863 | if (reason & 0x80) | 839 | if (reason & 0x80) |
864 | mem_parity_error(reason, regs); | 840 | mem_parity_error(reason, regs); |
865 | if (reason & 0x40) | 841 | if (reason & 0x40) |
@@ -870,9 +846,12 @@ asmlinkage notrace __kprobes void | |||
870 | do_nmi(struct pt_regs *regs, long error_code) | 846 | do_nmi(struct pt_regs *regs, long error_code) |
871 | { | 847 | { |
872 | nmi_enter(); | 848 | nmi_enter(); |
849 | |||
873 | add_pda(__nmi_count, 1); | 850 | add_pda(__nmi_count, 1); |
851 | |||
874 | if (!ignore_nmis) | 852 | if (!ignore_nmis) |
875 | default_do_nmi(regs); | 853 | default_do_nmi(regs); |
854 | |||
876 | nmi_exit(); | 855 | nmi_exit(); |
877 | } | 856 | } |
878 | 857 | ||
@@ -889,13 +868,14 @@ void restart_nmi(void) | |||
889 | } | 868 | } |
890 | 869 | ||
891 | /* runs on IST stack. */ | 870 | /* runs on IST stack. */ |
892 | asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) | 871 | asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) |
893 | { | 872 | { |
894 | trace_hardirqs_fixup(); | 873 | trace_hardirqs_fixup(); |
895 | 874 | ||
896 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { | 875 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) |
876 | == NOTIFY_STOP) | ||
897 | return; | 877 | return; |
898 | } | 878 | |
899 | preempt_conditional_sti(regs); | 879 | preempt_conditional_sti(regs); |
900 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); | 880 | do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); |
901 | preempt_conditional_cli(regs); | 881 | preempt_conditional_cli(regs); |
@@ -926,8 +906,8 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
926 | asmlinkage void __kprobes do_debug(struct pt_regs * regs, | 906 | asmlinkage void __kprobes do_debug(struct pt_regs * regs, |
927 | unsigned long error_code) | 907 | unsigned long error_code) |
928 | { | 908 | { |
929 | unsigned long condition; | ||
930 | struct task_struct *tsk = current; | 909 | struct task_struct *tsk = current; |
910 | unsigned long condition; | ||
931 | siginfo_t info; | 911 | siginfo_t info; |
932 | 912 | ||
933 | trace_hardirqs_fixup(); | 913 | trace_hardirqs_fixup(); |
@@ -948,21 +928,19 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
948 | 928 | ||
949 | /* Mask out spurious debug traps due to lazy DR7 setting */ | 929 | /* Mask out spurious debug traps due to lazy DR7 setting */ |
950 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { | 930 | if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { |
951 | if (!tsk->thread.debugreg7) { | 931 | if (!tsk->thread.debugreg7) |
952 | goto clear_dr7; | 932 | goto clear_dr7; |
953 | } | ||
954 | } | 933 | } |
955 | 934 | ||
956 | tsk->thread.debugreg6 = condition; | 935 | tsk->thread.debugreg6 = condition; |
957 | 936 | ||
958 | |||
959 | /* | 937 | /* |
960 | * Single-stepping through TF: make sure we ignore any events in | 938 | * Single-stepping through TF: make sure we ignore any events in |
961 | * kernel space (but re-enable TF when returning to user mode). | 939 | * kernel space (but re-enable TF when returning to user mode). |
962 | */ | 940 | */ |
963 | if (condition & DR_STEP) { | 941 | if (condition & DR_STEP) { |
964 | if (!user_mode(regs)) | 942 | if (!user_mode(regs)) |
965 | goto clear_TF_reenable; | 943 | goto clear_TF_reenable; |
966 | } | 944 | } |
967 | 945 | ||
968 | /* Ok, finally something we can handle */ | 946 | /* Ok, finally something we can handle */ |
@@ -975,7 +953,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
975 | force_sig_info(SIGTRAP, &info, tsk); | 953 | force_sig_info(SIGTRAP, &info, tsk); |
976 | 954 | ||
977 | clear_dr7: | 955 | clear_dr7: |
978 | set_debugreg(0UL, 7); | 956 | set_debugreg(0, 7); |
979 | preempt_conditional_cli(regs); | 957 | preempt_conditional_cli(regs); |
980 | return; | 958 | return; |
981 | 959 | ||
@@ -983,6 +961,7 @@ clear_TF_reenable: | |||
983 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); | 961 | set_tsk_thread_flag(tsk, TIF_SINGLESTEP); |
984 | regs->flags &= ~X86_EFLAGS_TF; | 962 | regs->flags &= ~X86_EFLAGS_TF; |
985 | preempt_conditional_cli(regs); | 963 | preempt_conditional_cli(regs); |
964 | return; | ||
986 | } | 965 | } |
987 | 966 | ||
988 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | 967 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) |
@@ -1005,7 +984,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) | |||
1005 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) | 984 | asmlinkage void do_coprocessor_error(struct pt_regs *regs) |
1006 | { | 985 | { |
1007 | void __user *ip = (void __user *)(regs->ip); | 986 | void __user *ip = (void __user *)(regs->ip); |
1008 | struct task_struct * task; | 987 | struct task_struct *task; |
1009 | siginfo_t info; | 988 | siginfo_t info; |
1010 | unsigned short cwd, swd; | 989 | unsigned short cwd, swd; |
1011 | 990 | ||
@@ -1038,30 +1017,30 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) | |||
1038 | cwd = get_fpu_cwd(task); | 1017 | cwd = get_fpu_cwd(task); |
1039 | swd = get_fpu_swd(task); | 1018 | swd = get_fpu_swd(task); |
1040 | switch (swd & ~cwd & 0x3f) { | 1019 | switch (swd & ~cwd & 0x3f) { |
1041 | case 0x000: | 1020 | case 0x000: /* No unmasked exception */ |
1042 | default: | 1021 | default: /* Multiple exceptions */ |
1043 | break; | 1022 | break; |
1044 | case 0x001: /* Invalid Op */ | 1023 | case 0x001: /* Invalid Op */ |
1045 | /* | 1024 | /* |
1046 | * swd & 0x240 == 0x040: Stack Underflow | 1025 | * swd & 0x240 == 0x040: Stack Underflow |
1047 | * swd & 0x240 == 0x240: Stack Overflow | 1026 | * swd & 0x240 == 0x240: Stack Overflow |
1048 | * User must clear the SF bit (0x40) if set | 1027 | * User must clear the SF bit (0x40) if set |
1049 | */ | 1028 | */ |
1050 | info.si_code = FPE_FLTINV; | 1029 | info.si_code = FPE_FLTINV; |
1051 | break; | 1030 | break; |
1052 | case 0x002: /* Denormalize */ | 1031 | case 0x002: /* Denormalize */ |
1053 | case 0x010: /* Underflow */ | 1032 | case 0x010: /* Underflow */ |
1054 | info.si_code = FPE_FLTUND; | 1033 | info.si_code = FPE_FLTUND; |
1055 | break; | 1034 | break; |
1056 | case 0x004: /* Zero Divide */ | 1035 | case 0x004: /* Zero Divide */ |
1057 | info.si_code = FPE_FLTDIV; | 1036 | info.si_code = FPE_FLTDIV; |
1058 | break; | 1037 | break; |
1059 | case 0x008: /* Overflow */ | 1038 | case 0x008: /* Overflow */ |
1060 | info.si_code = FPE_FLTOVF; | 1039 | info.si_code = FPE_FLTOVF; |
1061 | break; | 1040 | break; |
1062 | case 0x020: /* Precision */ | 1041 | case 0x020: /* Precision */ |
1063 | info.si_code = FPE_FLTRES; | 1042 | info.si_code = FPE_FLTRES; |
1064 | break; | 1043 | break; |
1065 | } | 1044 | } |
1066 | force_sig_info(SIGFPE, &info, task); | 1045 | force_sig_info(SIGFPE, &info, task); |
1067 | } | 1046 | } |
@@ -1074,7 +1053,7 @@ asmlinkage void bad_intr(void) | |||
1074 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | 1053 | asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) |
1075 | { | 1054 | { |
1076 | void __user *ip = (void __user *)(regs->ip); | 1055 | void __user *ip = (void __user *)(regs->ip); |
1077 | struct task_struct * task; | 1056 | struct task_struct *task; |
1078 | siginfo_t info; | 1057 | siginfo_t info; |
1079 | unsigned short mxcsr; | 1058 | unsigned short mxcsr; |
1080 | 1059 | ||
@@ -1102,25 +1081,25 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |||
1102 | */ | 1081 | */ |
1103 | mxcsr = get_fpu_mxcsr(task); | 1082 | mxcsr = get_fpu_mxcsr(task); |
1104 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { | 1083 | switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { |
1105 | case 0x000: | 1084 | case 0x000: |
1106 | default: | 1085 | default: |
1107 | break; | 1086 | break; |
1108 | case 0x001: /* Invalid Op */ | 1087 | case 0x001: /* Invalid Op */ |
1109 | info.si_code = FPE_FLTINV; | 1088 | info.si_code = FPE_FLTINV; |
1110 | break; | 1089 | break; |
1111 | case 0x002: /* Denormalize */ | 1090 | case 0x002: /* Denormalize */ |
1112 | case 0x010: /* Underflow */ | 1091 | case 0x010: /* Underflow */ |
1113 | info.si_code = FPE_FLTUND; | 1092 | info.si_code = FPE_FLTUND; |
1114 | break; | 1093 | break; |
1115 | case 0x004: /* Zero Divide */ | 1094 | case 0x004: /* Zero Divide */ |
1116 | info.si_code = FPE_FLTDIV; | 1095 | info.si_code = FPE_FLTDIV; |
1117 | break; | 1096 | break; |
1118 | case 0x008: /* Overflow */ | 1097 | case 0x008: /* Overflow */ |
1119 | info.si_code = FPE_FLTOVF; | 1098 | info.si_code = FPE_FLTOVF; |
1120 | break; | 1099 | break; |
1121 | case 0x020: /* Precision */ | 1100 | case 0x020: /* Precision */ |
1122 | info.si_code = FPE_FLTRES; | 1101 | info.si_code = FPE_FLTRES; |
1123 | break; | 1102 | break; |
1124 | } | 1103 | } |
1125 | force_sig_info(SIGFPE, &info, task); | 1104 | force_sig_info(SIGFPE, &info, task); |
1126 | } | 1105 | } |
@@ -1138,7 +1117,7 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | |||
1138 | } | 1117 | } |
1139 | 1118 | ||
1140 | /* | 1119 | /* |
1141 | * 'math_state_restore()' saves the current math information in the | 1120 | * 'math_state_restore()' saves the current math information in the |
1142 | * old math state array, and gets the new ones from the current task | 1121 | * old math state array, and gets the new ones from the current task |
1143 | * | 1122 | * |
1144 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. | 1123 | * Careful.. There are problems with IBM-designed IRQ13 behaviour. |
@@ -1163,7 +1142,7 @@ asmlinkage void math_state_restore(void) | |||
1163 | local_irq_disable(); | 1142 | local_irq_disable(); |
1164 | } | 1143 | } |
1165 | 1144 | ||
1166 | clts(); /* Allow maths ops (or we recurse) */ | 1145 | clts(); /* Allow maths ops (or we recurse) */ |
1167 | restore_fpu_checking(&me->thread.xstate->fxsave); | 1146 | restore_fpu_checking(&me->thread.xstate->fxsave); |
1168 | task_thread_info(me)->status |= TS_USEDFPU; | 1147 | task_thread_info(me)->status |= TS_USEDFPU; |
1169 | me->fpu_counter++; | 1148 | me->fpu_counter++; |
@@ -1172,64 +1151,61 @@ EXPORT_SYMBOL_GPL(math_state_restore); | |||
1172 | 1151 | ||
1173 | void __init trap_init(void) | 1152 | void __init trap_init(void) |
1174 | { | 1153 | { |
1175 | set_intr_gate(0,÷_error); | 1154 | set_intr_gate(0, ÷_error); |
1176 | set_intr_gate_ist(1,&debug,DEBUG_STACK); | 1155 | set_intr_gate_ist(1, &debug, DEBUG_STACK); |
1177 | set_intr_gate_ist(2,&nmi,NMI_STACK); | 1156 | set_intr_gate_ist(2, &nmi, NMI_STACK); |
1178 | set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ | 1157 | set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */ |
1179 | set_system_gate(4,&overflow); /* int4 can be called from all */ | 1158 | set_system_gate(4, &overflow); /* int4 can be called from all */ |
1180 | set_intr_gate(5,&bounds); | 1159 | set_intr_gate(5, &bounds); |
1181 | set_intr_gate(6,&invalid_op); | 1160 | set_intr_gate(6, &invalid_op); |
1182 | set_intr_gate(7,&device_not_available); | 1161 | set_intr_gate(7, &device_not_available); |
1183 | set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); | 1162 | set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK); |
1184 | set_intr_gate(9,&coprocessor_segment_overrun); | 1163 | set_intr_gate(9, &coprocessor_segment_overrun); |
1185 | set_intr_gate(10,&invalid_TSS); | 1164 | set_intr_gate(10, &invalid_TSS); |
1186 | set_intr_gate(11,&segment_not_present); | 1165 | set_intr_gate(11, &segment_not_present); |
1187 | set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK); | 1166 | set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK); |
1188 | set_intr_gate(13,&general_protection); | 1167 | set_intr_gate(13, &general_protection); |
1189 | set_intr_gate(14,&page_fault); | 1168 | set_intr_gate(14, &page_fault); |
1190 | set_intr_gate(15,&spurious_interrupt_bug); | 1169 | set_intr_gate(15, &spurious_interrupt_bug); |
1191 | set_intr_gate(16,&coprocessor_error); | 1170 | set_intr_gate(16, &coprocessor_error); |
1192 | set_intr_gate(17,&alignment_check); | 1171 | set_intr_gate(17, &alignment_check); |
1193 | #ifdef CONFIG_X86_MCE | 1172 | #ifdef CONFIG_X86_MCE |
1194 | set_intr_gate_ist(18,&machine_check, MCE_STACK); | 1173 | set_intr_gate_ist(18, &machine_check, MCE_STACK); |
1195 | #endif | 1174 | #endif |
1196 | set_intr_gate(19,&simd_coprocessor_error); | 1175 | set_intr_gate(19, &simd_coprocessor_error); |
1197 | 1176 | ||
1198 | #ifdef CONFIG_IA32_EMULATION | 1177 | #ifdef CONFIG_IA32_EMULATION |
1199 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | 1178 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); |
1200 | #endif | 1179 | #endif |
1201 | |||
1202 | /* | 1180 | /* |
1203 | * initialize the per thread extended state: | 1181 | * initialize the per thread extended state: |
1204 | */ | 1182 | */ |
1205 | init_thread_xstate(); | 1183 | init_thread_xstate(); |
1206 | /* | 1184 | /* |
1207 | * Should be a barrier for any external CPU state. | 1185 | * Should be a barrier for any external CPU state: |
1208 | */ | 1186 | */ |
1209 | cpu_init(); | 1187 | cpu_init(); |
1210 | } | 1188 | } |
1211 | 1189 | ||
1212 | |||
1213 | static int __init oops_setup(char *s) | 1190 | static int __init oops_setup(char *s) |
1214 | { | 1191 | { |
1215 | if (!s) | 1192 | if (!s) |
1216 | return -EINVAL; | 1193 | return -EINVAL; |
1217 | if (!strcmp(s, "panic")) | 1194 | if (!strcmp(s, "panic")) |
1218 | panic_on_oops = 1; | 1195 | panic_on_oops = 1; |
1219 | return 0; | 1196 | return 0; |
1220 | } | 1197 | } |
1221 | early_param("oops", oops_setup); | 1198 | early_param("oops", oops_setup); |
1222 | 1199 | ||
1223 | static int __init kstack_setup(char *s) | 1200 | static int __init kstack_setup(char *s) |
1224 | { | 1201 | { |
1225 | if (!s) | 1202 | if (!s) |
1226 | return -EINVAL; | 1203 | return -EINVAL; |
1227 | kstack_depth_to_print = simple_strtoul(s,NULL,0); | 1204 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); |
1228 | return 0; | 1205 | return 0; |
1229 | } | 1206 | } |
1230 | early_param("kstack", kstack_setup); | 1207 | early_param("kstack", kstack_setup); |
1231 | 1208 | ||
1232 | |||
1233 | static int __init code_bytes_setup(char *s) | 1209 | static int __init code_bytes_setup(char *s) |
1234 | { | 1210 | { |
1235 | code_bytes = simple_strtoul(s, NULL, 0); | 1211 | code_bytes = simple_strtoul(s, NULL, 0); |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c new file mode 100644 index 000000000000..7603c0553909 --- /dev/null +++ b/arch/x86/kernel/tsc.c | |||
@@ -0,0 +1,535 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/init.h> | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/timer.h> | ||
6 | #include <linux/acpi_pmtmr.h> | ||
7 | #include <linux/cpufreq.h> | ||
8 | #include <linux/dmi.h> | ||
9 | #include <linux/delay.h> | ||
10 | #include <linux/clocksource.h> | ||
11 | #include <linux/percpu.h> | ||
12 | |||
13 | #include <asm/hpet.h> | ||
14 | #include <asm/timer.h> | ||
15 | #include <asm/vgtod.h> | ||
16 | #include <asm/time.h> | ||
17 | #include <asm/delay.h> | ||
18 | |||
19 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | ||
20 | EXPORT_SYMBOL(cpu_khz); | ||
21 | unsigned int tsc_khz; | ||
22 | EXPORT_SYMBOL(tsc_khz); | ||
23 | |||
24 | /* | ||
25 | * TSC can be unstable due to cpufreq or due to unsynced TSCs | ||
26 | */ | ||
27 | static int tsc_unstable; | ||
28 | |||
29 | /* native_sched_clock() is called before tsc_init(), so | ||
30 | we must start with the TSC soft disabled to prevent | ||
31 | erroneous rdtsc usage on !cpu_has_tsc processors */ | ||
32 | static int tsc_disabled = -1; | ||
33 | |||
34 | /* | ||
35 | * Scheduler clock - returns current time in nanosec units. | ||
36 | */ | ||
37 | u64 native_sched_clock(void) | ||
38 | { | ||
39 | u64 this_offset; | ||
40 | |||
41 | /* | ||
42 | * Fall back to jiffies if there's no TSC available: | ||
43 | * ( But note that we still use it if the TSC is marked | ||
44 | * unstable. We do this because unlike Time Of Day, | ||
45 | * the scheduler clock tolerates small errors and it's | ||
46 | * very important for it to be as fast as the platform | ||
47 | * can achive it. ) | ||
48 | */ | ||
49 | if (unlikely(tsc_disabled)) { | ||
50 | /* No locking but a rare wrong value is not a big deal: */ | ||
51 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | ||
52 | } | ||
53 | |||
54 | /* read the Time Stamp Counter: */ | ||
55 | rdtscll(this_offset); | ||
56 | |||
57 | /* return the value in ns */ | ||
58 | return cycles_2_ns(this_offset); | ||
59 | } | ||
60 | |||
61 | /* We need to define a real function for sched_clock, to override the | ||
62 | weak default version */ | ||
63 | #ifdef CONFIG_PARAVIRT | ||
64 | unsigned long long sched_clock(void) | ||
65 | { | ||
66 | return paravirt_sched_clock(); | ||
67 | } | ||
68 | #else | ||
69 | unsigned long long | ||
70 | sched_clock(void) __attribute__((alias("native_sched_clock"))); | ||
71 | #endif | ||
72 | |||
73 | int check_tsc_unstable(void) | ||
74 | { | ||
75 | return tsc_unstable; | ||
76 | } | ||
77 | EXPORT_SYMBOL_GPL(check_tsc_unstable); | ||
78 | |||
79 | #ifdef CONFIG_X86_TSC | ||
80 | int __init notsc_setup(char *str) | ||
81 | { | ||
82 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | ||
83 | "cannot disable TSC completely.\n"); | ||
84 | tsc_disabled = 1; | ||
85 | return 1; | ||
86 | } | ||
87 | #else | ||
88 | /* | ||
89 | * disable flag for tsc. Takes effect by clearing the TSC cpu flag | ||
90 | * in cpu/common.c | ||
91 | */ | ||
92 | int __init notsc_setup(char *str) | ||
93 | { | ||
94 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
95 | return 1; | ||
96 | } | ||
97 | #endif | ||
98 | |||
99 | __setup("notsc", notsc_setup); | ||
100 | |||
101 | #define MAX_RETRIES 5 | ||
102 | #define SMI_TRESHOLD 50000 | ||
103 | |||
104 | /* | ||
105 | * Read TSC and the reference counters. Take care of SMI disturbance | ||
106 | */ | ||
107 | static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | ||
108 | { | ||
109 | u64 t1, t2; | ||
110 | int i; | ||
111 | |||
112 | for (i = 0; i < MAX_RETRIES; i++) { | ||
113 | t1 = get_cycles(); | ||
114 | if (hpet) | ||
115 | *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; | ||
116 | else | ||
117 | *pm = acpi_pm_read_early(); | ||
118 | t2 = get_cycles(); | ||
119 | if ((t2 - t1) < SMI_TRESHOLD) | ||
120 | return t2; | ||
121 | } | ||
122 | return ULLONG_MAX; | ||
123 | } | ||
124 | |||
125 | /** | ||
126 | * native_calibrate_tsc - calibrate the tsc on boot | ||
127 | */ | ||
128 | unsigned long native_calibrate_tsc(void) | ||
129 | { | ||
130 | unsigned long flags; | ||
131 | u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; | ||
132 | int hpet = is_hpet_enabled(); | ||
133 | unsigned int tsc_khz_val = 0; | ||
134 | |||
135 | local_irq_save(flags); | ||
136 | |||
137 | tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); | ||
138 | |||
139 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | ||
140 | |||
141 | outb(0xb0, 0x43); | ||
142 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | ||
143 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); | ||
144 | tr1 = get_cycles(); | ||
145 | while ((inb(0x61) & 0x20) == 0); | ||
146 | tr2 = get_cycles(); | ||
147 | |||
148 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | ||
149 | |||
150 | local_irq_restore(flags); | ||
151 | |||
152 | /* | ||
153 | * Preset the result with the raw and inaccurate PIT | ||
154 | * calibration value | ||
155 | */ | ||
156 | delta = (tr2 - tr1); | ||
157 | do_div(delta, 50); | ||
158 | tsc_khz_val = delta; | ||
159 | |||
160 | /* hpet or pmtimer available ? */ | ||
161 | if (!hpet && !pm1 && !pm2) { | ||
162 | printk(KERN_INFO "TSC calibrated against PIT\n"); | ||
163 | goto out; | ||
164 | } | ||
165 | |||
166 | /* Check, whether the sampling was disturbed by an SMI */ | ||
167 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { | ||
168 | printk(KERN_WARNING "TSC calibration disturbed by SMI, " | ||
169 | "using PIT calibration result\n"); | ||
170 | goto out; | ||
171 | } | ||
172 | |||
173 | tsc2 = (tsc2 - tsc1) * 1000000LL; | ||
174 | |||
175 | if (hpet) { | ||
176 | printk(KERN_INFO "TSC calibrated against HPET\n"); | ||
177 | if (hpet2 < hpet1) | ||
178 | hpet2 += 0x100000000ULL; | ||
179 | hpet2 -= hpet1; | ||
180 | tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
181 | do_div(tsc1, 1000000); | ||
182 | } else { | ||
183 | printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); | ||
184 | if (pm2 < pm1) | ||
185 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
186 | pm2 -= pm1; | ||
187 | tsc1 = pm2 * 1000000000LL; | ||
188 | do_div(tsc1, PMTMR_TICKS_PER_SEC); | ||
189 | } | ||
190 | |||
191 | do_div(tsc2, tsc1); | ||
192 | tsc_khz_val = tsc2; | ||
193 | |||
194 | out: | ||
195 | return tsc_khz_val; | ||
196 | } | ||
197 | |||
198 | |||
199 | #ifdef CONFIG_X86_32 | ||
200 | /* Only called from the Powernow K7 cpu freq driver */ | ||
201 | int recalibrate_cpu_khz(void) | ||
202 | { | ||
203 | #ifndef CONFIG_SMP | ||
204 | unsigned long cpu_khz_old = cpu_khz; | ||
205 | |||
206 | if (cpu_has_tsc) { | ||
207 | tsc_khz = calibrate_tsc(); | ||
208 | cpu_khz = tsc_khz; | ||
209 | cpu_data(0).loops_per_jiffy = | ||
210 | cpufreq_scale(cpu_data(0).loops_per_jiffy, | ||
211 | cpu_khz_old, cpu_khz); | ||
212 | return 0; | ||
213 | } else | ||
214 | return -ENODEV; | ||
215 | #else | ||
216 | return -ENODEV; | ||
217 | #endif | ||
218 | } | ||
219 | |||
220 | EXPORT_SYMBOL(recalibrate_cpu_khz); | ||
221 | |||
222 | #endif /* CONFIG_X86_32 */ | ||
223 | |||
224 | /* Accelerators for sched_clock() | ||
225 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
226 | * basic equation: | ||
227 | * ns = cycles / (freq / ns_per_sec) | ||
228 | * ns = cycles * (ns_per_sec / freq) | ||
229 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
230 | * ns = cycles * (10^6 / cpu_khz) | ||
231 | * | ||
232 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
233 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
234 | * ns = cycles * cyc2ns_scale / SC | ||
235 | * | ||
236 | * And since SC is a constant power of two, we can convert the div | ||
237 | * into a shift. | ||
238 | * | ||
239 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
240 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
241 | * (mathieu.desnoyers@polymtl.ca) | ||
242 | * | ||
243 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
244 | */ | ||
245 | |||
246 | DEFINE_PER_CPU(unsigned long, cyc2ns); | ||
247 | |||
248 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
249 | { | ||
250 | unsigned long long tsc_now, ns_now; | ||
251 | unsigned long flags, *scale; | ||
252 | |||
253 | local_irq_save(flags); | ||
254 | sched_clock_idle_sleep_event(); | ||
255 | |||
256 | scale = &per_cpu(cyc2ns, cpu); | ||
257 | |||
258 | rdtscll(tsc_now); | ||
259 | ns_now = __cycles_2_ns(tsc_now); | ||
260 | |||
261 | if (cpu_khz) | ||
262 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
263 | |||
264 | sched_clock_idle_wakeup_event(0); | ||
265 | local_irq_restore(flags); | ||
266 | } | ||
267 | |||
268 | #ifdef CONFIG_CPU_FREQ | ||
269 | |||
270 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency | ||
271 | * changes. | ||
272 | * | ||
273 | * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's | ||
274 | * not that important because current Opteron setups do not support | ||
275 | * scaling on SMP anyroads. | ||
276 | * | ||
277 | * Should fix up last_tsc too. Currently gettimeofday in the | ||
278 | * first tick after the change will be slightly wrong. | ||
279 | */ | ||
280 | |||
281 | static unsigned int ref_freq; | ||
282 | static unsigned long loops_per_jiffy_ref; | ||
283 | static unsigned long tsc_khz_ref; | ||
284 | |||
285 | static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
286 | void *data) | ||
287 | { | ||
288 | struct cpufreq_freqs *freq = data; | ||
289 | unsigned long *lpj, dummy; | ||
290 | |||
291 | if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) | ||
292 | return 0; | ||
293 | |||
294 | lpj = &dummy; | ||
295 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
296 | #ifdef CONFIG_SMP | ||
297 | lpj = &cpu_data(freq->cpu).loops_per_jiffy; | ||
298 | #else | ||
299 | lpj = &boot_cpu_data.loops_per_jiffy; | ||
300 | #endif | ||
301 | |||
302 | if (!ref_freq) { | ||
303 | ref_freq = freq->old; | ||
304 | loops_per_jiffy_ref = *lpj; | ||
305 | tsc_khz_ref = tsc_khz; | ||
306 | } | ||
307 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
308 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
309 | (val == CPUFREQ_RESUMECHANGE)) { | ||
310 | *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | ||
311 | |||
312 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | ||
313 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
314 | mark_tsc_unstable("cpufreq changes"); | ||
315 | } | ||
316 | |||
317 | set_cyc2ns_scale(tsc_khz_ref, freq->cpu); | ||
318 | |||
319 | return 0; | ||
320 | } | ||
321 | |||
322 | static struct notifier_block time_cpufreq_notifier_block = { | ||
323 | .notifier_call = time_cpufreq_notifier | ||
324 | }; | ||
325 | |||
326 | static int __init cpufreq_tsc(void) | ||
327 | { | ||
328 | cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
329 | CPUFREQ_TRANSITION_NOTIFIER); | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | core_initcall(cpufreq_tsc); | ||
334 | |||
335 | #endif /* CONFIG_CPU_FREQ */ | ||
336 | |||
337 | /* clocksource code */ | ||
338 | |||
339 | static struct clocksource clocksource_tsc; | ||
340 | |||
341 | /* | ||
342 | * We compare the TSC to the cycle_last value in the clocksource | ||
343 | * structure to avoid a nasty time-warp. This can be observed in a | ||
344 | * very small window right after one CPU updated cycle_last under | ||
345 | * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which | ||
346 | * is smaller than the cycle_last reference value due to a TSC which | ||
347 | * is slighty behind. This delta is nowhere else observable, but in | ||
348 | * that case it results in a forward time jump in the range of hours | ||
349 | * due to the unsigned delta calculation of the time keeping core | ||
350 | * code, which is necessary to support wrapping clocksources like pm | ||
351 | * timer. | ||
352 | */ | ||
353 | static cycle_t read_tsc(void) | ||
354 | { | ||
355 | cycle_t ret = (cycle_t)get_cycles(); | ||
356 | |||
357 | return ret >= clocksource_tsc.cycle_last ? | ||
358 | ret : clocksource_tsc.cycle_last; | ||
359 | } | ||
360 | |||
361 | #ifdef CONFIG_X86_64 | ||
362 | static cycle_t __vsyscall_fn vread_tsc(void) | ||
363 | { | ||
364 | cycle_t ret = (cycle_t)vget_cycles(); | ||
365 | |||
366 | return ret >= __vsyscall_gtod_data.clock.cycle_last ? | ||
367 | ret : __vsyscall_gtod_data.clock.cycle_last; | ||
368 | } | ||
369 | #endif | ||
370 | |||
371 | static struct clocksource clocksource_tsc = { | ||
372 | .name = "tsc", | ||
373 | .rating = 300, | ||
374 | .read = read_tsc, | ||
375 | .mask = CLOCKSOURCE_MASK(64), | ||
376 | .shift = 22, | ||
377 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | ||
378 | CLOCK_SOURCE_MUST_VERIFY, | ||
379 | #ifdef CONFIG_X86_64 | ||
380 | .vread = vread_tsc, | ||
381 | #endif | ||
382 | }; | ||
383 | |||
384 | void mark_tsc_unstable(char *reason) | ||
385 | { | ||
386 | if (!tsc_unstable) { | ||
387 | tsc_unstable = 1; | ||
388 | printk("Marking TSC unstable due to %s\n", reason); | ||
389 | /* Change only the rating, when not registered */ | ||
390 | if (clocksource_tsc.mult) | ||
391 | clocksource_change_rating(&clocksource_tsc, 0); | ||
392 | else | ||
393 | clocksource_tsc.rating = 0; | ||
394 | } | ||
395 | } | ||
396 | |||
397 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
398 | |||
399 | static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) | ||
400 | { | ||
401 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | ||
402 | d->ident); | ||
403 | tsc_unstable = 1; | ||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | /* List of systems that have known TSC problems */ | ||
408 | static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | ||
409 | { | ||
410 | .callback = dmi_mark_tsc_unstable, | ||
411 | .ident = "IBM Thinkpad 380XD", | ||
412 | .matches = { | ||
413 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
414 | DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), | ||
415 | }, | ||
416 | }, | ||
417 | {} | ||
418 | }; | ||
419 | |||
420 | /* | ||
421 | * Geode_LX - the OLPC CPU has a possibly a very reliable TSC | ||
422 | */ | ||
423 | #ifdef CONFIG_MGEODE_LX | ||
424 | /* RTSC counts during suspend */ | ||
425 | #define RTSC_SUSP 0x100 | ||
426 | |||
427 | static void __init check_geode_tsc_reliable(void) | ||
428 | { | ||
429 | unsigned long res_low, res_high; | ||
430 | |||
431 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | ||
432 | if (res_low & RTSC_SUSP) | ||
433 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | ||
434 | } | ||
435 | #else | ||
436 | static inline void check_geode_tsc_reliable(void) { } | ||
437 | #endif | ||
438 | |||
439 | /* | ||
440 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
441 | * over all CPUs. | ||
442 | */ | ||
443 | __cpuinit int unsynchronized_tsc(void) | ||
444 | { | ||
445 | if (!cpu_has_tsc || tsc_unstable) | ||
446 | return 1; | ||
447 | |||
448 | #ifdef CONFIG_SMP | ||
449 | if (apic_is_clustered_box()) | ||
450 | return 1; | ||
451 | #endif | ||
452 | |||
453 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
454 | return 0; | ||
455 | /* | ||
456 | * Intel systems are normally all synchronized. | ||
457 | * Exceptions must mark TSC as unstable: | ||
458 | */ | ||
459 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | ||
460 | /* assume multi socket systems are not synchronized: */ | ||
461 | if (num_possible_cpus() > 1) | ||
462 | tsc_unstable = 1; | ||
463 | } | ||
464 | |||
465 | return tsc_unstable; | ||
466 | } | ||
467 | |||
468 | static void __init init_tsc_clocksource(void) | ||
469 | { | ||
470 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | ||
471 | clocksource_tsc.shift); | ||
472 | /* lower the rating if we already know its unstable: */ | ||
473 | if (check_tsc_unstable()) { | ||
474 | clocksource_tsc.rating = 0; | ||
475 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | ||
476 | } | ||
477 | clocksource_register(&clocksource_tsc); | ||
478 | } | ||
479 | |||
480 | void __init tsc_init(void) | ||
481 | { | ||
482 | u64 lpj; | ||
483 | int cpu; | ||
484 | |||
485 | if (!cpu_has_tsc) | ||
486 | return; | ||
487 | |||
488 | tsc_khz = calibrate_tsc(); | ||
489 | cpu_khz = tsc_khz; | ||
490 | |||
491 | if (!tsc_khz) { | ||
492 | mark_tsc_unstable("could not calculate TSC khz"); | ||
493 | return; | ||
494 | } | ||
495 | |||
496 | #ifdef CONFIG_X86_64 | ||
497 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | ||
498 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) | ||
499 | cpu_khz = calibrate_cpu(); | ||
500 | #endif | ||
501 | |||
502 | lpj = ((u64)tsc_khz * 1000); | ||
503 | do_div(lpj, HZ); | ||
504 | lpj_fine = lpj; | ||
505 | |||
506 | printk("Detected %lu.%03lu MHz processor.\n", | ||
507 | (unsigned long)cpu_khz / 1000, | ||
508 | (unsigned long)cpu_khz % 1000); | ||
509 | |||
510 | /* | ||
511 | * Secondary CPUs do not run through tsc_init(), so set up | ||
512 | * all the scale factors for all CPUs, assuming the same | ||
513 | * speed as the bootup CPU. (cpufreq notifiers will fix this | ||
514 | * up if their speed diverges) | ||
515 | */ | ||
516 | for_each_possible_cpu(cpu) | ||
517 | set_cyc2ns_scale(cpu_khz, cpu); | ||
518 | |||
519 | if (tsc_disabled > 0) | ||
520 | return; | ||
521 | |||
522 | /* now allow native_sched_clock() to use rdtsc */ | ||
523 | tsc_disabled = 0; | ||
524 | |||
525 | use_tsc_delay(); | ||
526 | /* Check and install the TSC clocksource */ | ||
527 | dmi_check_system(bad_tsc_dmi_table); | ||
528 | |||
529 | if (unsynchronized_tsc()) | ||
530 | mark_tsc_unstable("TSCs unsynchronized"); | ||
531 | |||
532 | check_geode_tsc_reliable(); | ||
533 | init_tsc_clocksource(); | ||
534 | } | ||
535 | |||
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c deleted file mode 100644 index 6240922e497c..000000000000 --- a/arch/x86/kernel/tsc_32.c +++ /dev/null | |||
@@ -1,455 +0,0 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/clocksource.h> | ||
3 | #include <linux/workqueue.h> | ||
4 | #include <linux/delay.h> | ||
5 | #include <linux/cpufreq.h> | ||
6 | #include <linux/jiffies.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/dmi.h> | ||
9 | #include <linux/percpu.h> | ||
10 | |||
11 | #include <asm/delay.h> | ||
12 | #include <asm/tsc.h> | ||
13 | #include <asm/io.h> | ||
14 | #include <asm/timer.h> | ||
15 | |||
16 | #include "mach_timer.h" | ||
17 | |||
18 | /* native_sched_clock() is called before tsc_init(), so | ||
19 | we must start with the TSC soft disabled to prevent | ||
20 | erroneous rdtsc usage on !cpu_has_tsc processors */ | ||
21 | static int tsc_disabled = -1; | ||
22 | |||
23 | /* | ||
24 | * On some systems the TSC frequency does not | ||
25 | * change with the cpu frequency. So we need | ||
26 | * an extra value to store the TSC freq | ||
27 | */ | ||
28 | unsigned int tsc_khz; | ||
29 | EXPORT_SYMBOL_GPL(tsc_khz); | ||
30 | |||
31 | #ifdef CONFIG_X86_TSC | ||
32 | static int __init tsc_setup(char *str) | ||
33 | { | ||
34 | printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, " | ||
35 | "cannot disable TSC completely.\n"); | ||
36 | tsc_disabled = 1; | ||
37 | return 1; | ||
38 | } | ||
39 | #else | ||
40 | /* | ||
41 | * disable flag for tsc. Takes effect by clearing the TSC cpu flag | ||
42 | * in cpu/common.c | ||
43 | */ | ||
44 | static int __init tsc_setup(char *str) | ||
45 | { | ||
46 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
47 | return 1; | ||
48 | } | ||
49 | #endif | ||
50 | |||
51 | __setup("notsc", tsc_setup); | ||
52 | |||
53 | /* | ||
54 | * code to mark and check if the TSC is unstable | ||
55 | * due to cpufreq or due to unsynced TSCs | ||
56 | */ | ||
57 | static int tsc_unstable; | ||
58 | |||
59 | int check_tsc_unstable(void) | ||
60 | { | ||
61 | return tsc_unstable; | ||
62 | } | ||
63 | EXPORT_SYMBOL_GPL(check_tsc_unstable); | ||
64 | |||
65 | /* Accelerators for sched_clock() | ||
66 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
67 | * basic equation: | ||
68 | * ns = cycles / (freq / ns_per_sec) | ||
69 | * ns = cycles * (ns_per_sec / freq) | ||
70 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
71 | * ns = cycles * (10^6 / cpu_khz) | ||
72 | * | ||
73 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
74 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
75 | * ns = cycles * cyc2ns_scale / SC | ||
76 | * | ||
77 | * And since SC is a constant power of two, we can convert the div | ||
78 | * into a shift. | ||
79 | * | ||
80 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
81 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
82 | * (mathieu.desnoyers@polymtl.ca) | ||
83 | * | ||
84 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
85 | */ | ||
86 | |||
87 | DEFINE_PER_CPU(unsigned long, cyc2ns); | ||
88 | |||
89 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
90 | { | ||
91 | unsigned long long tsc_now, ns_now; | ||
92 | unsigned long flags, *scale; | ||
93 | |||
94 | local_irq_save(flags); | ||
95 | sched_clock_idle_sleep_event(); | ||
96 | |||
97 | scale = &per_cpu(cyc2ns, cpu); | ||
98 | |||
99 | rdtscll(tsc_now); | ||
100 | ns_now = __cycles_2_ns(tsc_now); | ||
101 | |||
102 | if (cpu_khz) | ||
103 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
104 | |||
105 | /* | ||
106 | * Start smoothly with the new frequency: | ||
107 | */ | ||
108 | sched_clock_idle_wakeup_event(0); | ||
109 | local_irq_restore(flags); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Scheduler clock - returns current time in nanosec units. | ||
114 | */ | ||
115 | unsigned long long native_sched_clock(void) | ||
116 | { | ||
117 | unsigned long long this_offset; | ||
118 | |||
119 | /* | ||
120 | * Fall back to jiffies if there's no TSC available: | ||
121 | * ( But note that we still use it if the TSC is marked | ||
122 | * unstable. We do this because unlike Time Of Day, | ||
123 | * the scheduler clock tolerates small errors and it's | ||
124 | * very important for it to be as fast as the platform | ||
125 | * can achive it. ) | ||
126 | */ | ||
127 | if (unlikely(tsc_disabled)) | ||
128 | /* No locking but a rare wrong value is not a big deal: */ | ||
129 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | ||
130 | |||
131 | /* read the Time Stamp Counter: */ | ||
132 | rdtscll(this_offset); | ||
133 | |||
134 | /* return the value in ns */ | ||
135 | return cycles_2_ns(this_offset); | ||
136 | } | ||
137 | |||
138 | /* We need to define a real function for sched_clock, to override the | ||
139 | weak default version */ | ||
140 | #ifdef CONFIG_PARAVIRT | ||
141 | unsigned long long sched_clock(void) | ||
142 | { | ||
143 | return paravirt_sched_clock(); | ||
144 | } | ||
145 | #else | ||
146 | unsigned long long sched_clock(void) | ||
147 | __attribute__((alias("native_sched_clock"))); | ||
148 | #endif | ||
149 | |||
150 | unsigned long native_calculate_cpu_khz(void) | ||
151 | { | ||
152 | unsigned long long start, end; | ||
153 | unsigned long count; | ||
154 | u64 delta64 = (u64)ULLONG_MAX; | ||
155 | int i; | ||
156 | unsigned long flags; | ||
157 | |||
158 | local_irq_save(flags); | ||
159 | |||
160 | /* run 3 times to ensure the cache is warm and to get an accurate reading */ | ||
161 | for (i = 0; i < 3; i++) { | ||
162 | mach_prepare_counter(); | ||
163 | rdtscll(start); | ||
164 | mach_countup(&count); | ||
165 | rdtscll(end); | ||
166 | |||
167 | /* | ||
168 | * Error: ECTCNEVERSET | ||
169 | * The CTC wasn't reliable: we got a hit on the very first read, | ||
170 | * or the CPU was so fast/slow that the quotient wouldn't fit in | ||
171 | * 32 bits.. | ||
172 | */ | ||
173 | if (count <= 1) | ||
174 | continue; | ||
175 | |||
176 | /* cpu freq too slow: */ | ||
177 | if ((end - start) <= CALIBRATE_TIME_MSEC) | ||
178 | continue; | ||
179 | |||
180 | /* | ||
181 | * We want the minimum time of all runs in case one of them | ||
182 | * is inaccurate due to SMI or other delay | ||
183 | */ | ||
184 | delta64 = min(delta64, (end - start)); | ||
185 | } | ||
186 | |||
187 | /* cpu freq too fast (or every run was bad): */ | ||
188 | if (delta64 > (1ULL<<32)) | ||
189 | goto err; | ||
190 | |||
191 | delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */ | ||
192 | do_div(delta64,CALIBRATE_TIME_MSEC); | ||
193 | |||
194 | local_irq_restore(flags); | ||
195 | return (unsigned long)delta64; | ||
196 | err: | ||
197 | local_irq_restore(flags); | ||
198 | return 0; | ||
199 | } | ||
200 | |||
201 | int recalibrate_cpu_khz(void) | ||
202 | { | ||
203 | #ifndef CONFIG_SMP | ||
204 | unsigned long cpu_khz_old = cpu_khz; | ||
205 | |||
206 | if (cpu_has_tsc) { | ||
207 | cpu_khz = calculate_cpu_khz(); | ||
208 | tsc_khz = cpu_khz; | ||
209 | cpu_data(0).loops_per_jiffy = | ||
210 | cpufreq_scale(cpu_data(0).loops_per_jiffy, | ||
211 | cpu_khz_old, cpu_khz); | ||
212 | return 0; | ||
213 | } else | ||
214 | return -ENODEV; | ||
215 | #else | ||
216 | return -ENODEV; | ||
217 | #endif | ||
218 | } | ||
219 | |||
220 | EXPORT_SYMBOL(recalibrate_cpu_khz); | ||
221 | |||
222 | #ifdef CONFIG_CPU_FREQ | ||
223 | |||
224 | /* | ||
225 | * if the CPU frequency is scaled, TSC-based delays will need a different | ||
226 | * loops_per_jiffy value to function properly. | ||
227 | */ | ||
228 | static unsigned int ref_freq; | ||
229 | static unsigned long loops_per_jiffy_ref; | ||
230 | static unsigned long cpu_khz_ref; | ||
231 | |||
232 | static int | ||
233 | time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | ||
234 | { | ||
235 | struct cpufreq_freqs *freq = data; | ||
236 | |||
237 | if (!ref_freq) { | ||
238 | if (!freq->old){ | ||
239 | ref_freq = freq->new; | ||
240 | return 0; | ||
241 | } | ||
242 | ref_freq = freq->old; | ||
243 | loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy; | ||
244 | cpu_khz_ref = cpu_khz; | ||
245 | } | ||
246 | |||
247 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
248 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
249 | (val == CPUFREQ_RESUMECHANGE)) { | ||
250 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
251 | cpu_data(freq->cpu).loops_per_jiffy = | ||
252 | cpufreq_scale(loops_per_jiffy_ref, | ||
253 | ref_freq, freq->new); | ||
254 | |||
255 | if (cpu_khz) { | ||
256 | |||
257 | if (num_online_cpus() == 1) | ||
258 | cpu_khz = cpufreq_scale(cpu_khz_ref, | ||
259 | ref_freq, freq->new); | ||
260 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) { | ||
261 | tsc_khz = cpu_khz; | ||
262 | set_cyc2ns_scale(cpu_khz, freq->cpu); | ||
263 | /* | ||
264 | * TSC based sched_clock turns | ||
265 | * to junk w/ cpufreq | ||
266 | */ | ||
267 | mark_tsc_unstable("cpufreq changes"); | ||
268 | } | ||
269 | } | ||
270 | } | ||
271 | |||
272 | return 0; | ||
273 | } | ||
274 | |||
275 | static struct notifier_block time_cpufreq_notifier_block = { | ||
276 | .notifier_call = time_cpufreq_notifier | ||
277 | }; | ||
278 | |||
279 | static int __init cpufreq_tsc(void) | ||
280 | { | ||
281 | return cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
282 | CPUFREQ_TRANSITION_NOTIFIER); | ||
283 | } | ||
284 | core_initcall(cpufreq_tsc); | ||
285 | |||
286 | #endif | ||
287 | |||
288 | /* clock source code */ | ||
289 | |||
290 | static struct clocksource clocksource_tsc; | ||
291 | |||
292 | /* | ||
293 | * We compare the TSC to the cycle_last value in the clocksource | ||
294 | * structure to avoid a nasty time-warp issue. This can be observed in | ||
295 | * a very small window right after one CPU updated cycle_last under | ||
296 | * xtime lock and the other CPU reads a TSC value which is smaller | ||
297 | * than the cycle_last reference value due to a TSC which is slighty | ||
298 | * behind. This delta is nowhere else observable, but in that case it | ||
299 | * results in a forward time jump in the range of hours due to the | ||
300 | * unsigned delta calculation of the time keeping core code, which is | ||
301 | * necessary to support wrapping clocksources like pm timer. | ||
302 | */ | ||
303 | static cycle_t read_tsc(void) | ||
304 | { | ||
305 | cycle_t ret; | ||
306 | |||
307 | rdtscll(ret); | ||
308 | |||
309 | return ret >= clocksource_tsc.cycle_last ? | ||
310 | ret : clocksource_tsc.cycle_last; | ||
311 | } | ||
312 | |||
313 | static struct clocksource clocksource_tsc = { | ||
314 | .name = "tsc", | ||
315 | .rating = 300, | ||
316 | .read = read_tsc, | ||
317 | .mask = CLOCKSOURCE_MASK(64), | ||
318 | .mult = 0, /* to be set */ | ||
319 | .shift = 22, | ||
320 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | ||
321 | CLOCK_SOURCE_MUST_VERIFY, | ||
322 | }; | ||
323 | |||
324 | void mark_tsc_unstable(char *reason) | ||
325 | { | ||
326 | if (!tsc_unstable) { | ||
327 | tsc_unstable = 1; | ||
328 | printk("Marking TSC unstable due to: %s.\n", reason); | ||
329 | /* Can be called before registration */ | ||
330 | if (clocksource_tsc.mult) | ||
331 | clocksource_change_rating(&clocksource_tsc, 0); | ||
332 | else | ||
333 | clocksource_tsc.rating = 0; | ||
334 | } | ||
335 | } | ||
336 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
337 | |||
338 | static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d) | ||
339 | { | ||
340 | printk(KERN_NOTICE "%s detected: marking TSC unstable.\n", | ||
341 | d->ident); | ||
342 | tsc_unstable = 1; | ||
343 | return 0; | ||
344 | } | ||
345 | |||
346 | /* List of systems that have known TSC problems */ | ||
347 | static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | ||
348 | { | ||
349 | .callback = dmi_mark_tsc_unstable, | ||
350 | .ident = "IBM Thinkpad 380XD", | ||
351 | .matches = { | ||
352 | DMI_MATCH(DMI_BOARD_VENDOR, "IBM"), | ||
353 | DMI_MATCH(DMI_BOARD_NAME, "2635FA0"), | ||
354 | }, | ||
355 | }, | ||
356 | {} | ||
357 | }; | ||
358 | |||
359 | /* | ||
360 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
361 | * over all CPUs. | ||
362 | */ | ||
363 | __cpuinit int unsynchronized_tsc(void) | ||
364 | { | ||
365 | if (!cpu_has_tsc || tsc_unstable) | ||
366 | return 1; | ||
367 | |||
368 | /* Anything with constant TSC should be synchronized */ | ||
369 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
370 | return 0; | ||
371 | |||
372 | /* | ||
373 | * Intel systems are normally all synchronized. | ||
374 | * Exceptions must mark TSC as unstable: | ||
375 | */ | ||
376 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) { | ||
377 | /* assume multi socket systems are not synchronized: */ | ||
378 | if (num_possible_cpus() > 1) | ||
379 | tsc_unstable = 1; | ||
380 | } | ||
381 | return tsc_unstable; | ||
382 | } | ||
383 | |||
384 | /* | ||
385 | * Geode_LX - the OLPC CPU has a possibly a very reliable TSC | ||
386 | */ | ||
387 | #ifdef CONFIG_MGEODE_LX | ||
388 | /* RTSC counts during suspend */ | ||
389 | #define RTSC_SUSP 0x100 | ||
390 | |||
391 | static void __init check_geode_tsc_reliable(void) | ||
392 | { | ||
393 | unsigned long res_low, res_high; | ||
394 | |||
395 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | ||
396 | if (res_low & RTSC_SUSP) | ||
397 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | ||
398 | } | ||
399 | #else | ||
400 | static inline void check_geode_tsc_reliable(void) { } | ||
401 | #endif | ||
402 | |||
403 | |||
404 | void __init tsc_init(void) | ||
405 | { | ||
406 | int cpu; | ||
407 | u64 lpj; | ||
408 | |||
409 | if (!cpu_has_tsc || tsc_disabled > 0) | ||
410 | return; | ||
411 | |||
412 | cpu_khz = calculate_cpu_khz(); | ||
413 | tsc_khz = cpu_khz; | ||
414 | |||
415 | if (!cpu_khz) { | ||
416 | mark_tsc_unstable("could not calculate TSC khz"); | ||
417 | return; | ||
418 | } | ||
419 | |||
420 | lpj = ((u64)tsc_khz * 1000); | ||
421 | do_div(lpj, HZ); | ||
422 | lpj_fine = lpj; | ||
423 | |||
424 | /* now allow native_sched_clock() to use rdtsc */ | ||
425 | tsc_disabled = 0; | ||
426 | |||
427 | printk("Detected %lu.%03lu MHz processor.\n", | ||
428 | (unsigned long)cpu_khz / 1000, | ||
429 | (unsigned long)cpu_khz % 1000); | ||
430 | |||
431 | /* | ||
432 | * Secondary CPUs do not run through tsc_init(), so set up | ||
433 | * all the scale factors for all CPUs, assuming the same | ||
434 | * speed as the bootup CPU. (cpufreq notifiers will fix this | ||
435 | * up if their speed diverges) | ||
436 | */ | ||
437 | for_each_possible_cpu(cpu) | ||
438 | set_cyc2ns_scale(cpu_khz, cpu); | ||
439 | |||
440 | use_tsc_delay(); | ||
441 | |||
442 | /* Check and install the TSC clocksource */ | ||
443 | dmi_check_system(bad_tsc_dmi_table); | ||
444 | |||
445 | unsynchronized_tsc(); | ||
446 | check_geode_tsc_reliable(); | ||
447 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | ||
448 | clocksource_tsc.shift); | ||
449 | /* lower the rating if we already know its unstable: */ | ||
450 | if (check_tsc_unstable()) { | ||
451 | clocksource_tsc.rating = 0; | ||
452 | clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS; | ||
453 | } | ||
454 | clocksource_register(&clocksource_tsc); | ||
455 | } | ||
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c deleted file mode 100644 index 9898fb01edfd..000000000000 --- a/arch/x86/kernel/tsc_64.c +++ /dev/null | |||
@@ -1,357 +0,0 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/interrupt.h> | ||
4 | #include <linux/init.h> | ||
5 | #include <linux/clocksource.h> | ||
6 | #include <linux/time.h> | ||
7 | #include <linux/acpi.h> | ||
8 | #include <linux/cpufreq.h> | ||
9 | #include <linux/acpi_pmtmr.h> | ||
10 | |||
11 | #include <asm/hpet.h> | ||
12 | #include <asm/timex.h> | ||
13 | #include <asm/timer.h> | ||
14 | #include <asm/vgtod.h> | ||
15 | |||
16 | static int notsc __initdata = 0; | ||
17 | |||
18 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | ||
19 | EXPORT_SYMBOL(cpu_khz); | ||
20 | unsigned int tsc_khz; | ||
21 | EXPORT_SYMBOL(tsc_khz); | ||
22 | |||
23 | /* Accelerators for sched_clock() | ||
24 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
25 | * basic equation: | ||
26 | * ns = cycles / (freq / ns_per_sec) | ||
27 | * ns = cycles * (ns_per_sec / freq) | ||
28 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
29 | * ns = cycles * (10^6 / cpu_khz) | ||
30 | * | ||
31 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
32 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
33 | * ns = cycles * cyc2ns_scale / SC | ||
34 | * | ||
35 | * And since SC is a constant power of two, we can convert the div | ||
36 | * into a shift. | ||
37 | * | ||
38 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
39 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
40 | * (mathieu.desnoyers@polymtl.ca) | ||
41 | * | ||
42 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
43 | */ | ||
44 | DEFINE_PER_CPU(unsigned long, cyc2ns); | ||
45 | |||
46 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
47 | { | ||
48 | unsigned long long tsc_now, ns_now; | ||
49 | unsigned long flags, *scale; | ||
50 | |||
51 | local_irq_save(flags); | ||
52 | sched_clock_idle_sleep_event(); | ||
53 | |||
54 | scale = &per_cpu(cyc2ns, cpu); | ||
55 | |||
56 | rdtscll(tsc_now); | ||
57 | ns_now = __cycles_2_ns(tsc_now); | ||
58 | |||
59 | if (cpu_khz) | ||
60 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | ||
61 | |||
62 | sched_clock_idle_wakeup_event(0); | ||
63 | local_irq_restore(flags); | ||
64 | } | ||
65 | |||
66 | unsigned long long native_sched_clock(void) | ||
67 | { | ||
68 | unsigned long a = 0; | ||
69 | |||
70 | /* Could do CPU core sync here. Opteron can execute rdtsc speculatively, | ||
71 | * which means it is not completely exact and may not be monotonous | ||
72 | * between CPUs. But the errors should be too small to matter for | ||
73 | * scheduling purposes. | ||
74 | */ | ||
75 | |||
76 | rdtscll(a); | ||
77 | return cycles_2_ns(a); | ||
78 | } | ||
79 | |||
80 | /* We need to define a real function for sched_clock, to override the | ||
81 | weak default version */ | ||
82 | #ifdef CONFIG_PARAVIRT | ||
83 | unsigned long long sched_clock(void) | ||
84 | { | ||
85 | return paravirt_sched_clock(); | ||
86 | } | ||
87 | #else | ||
88 | unsigned long long | ||
89 | sched_clock(void) __attribute__((alias("native_sched_clock"))); | ||
90 | #endif | ||
91 | |||
92 | |||
93 | static int tsc_unstable; | ||
94 | |||
95 | int check_tsc_unstable(void) | ||
96 | { | ||
97 | return tsc_unstable; | ||
98 | } | ||
99 | EXPORT_SYMBOL_GPL(check_tsc_unstable); | ||
100 | |||
101 | #ifdef CONFIG_CPU_FREQ | ||
102 | |||
103 | /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency | ||
104 | * changes. | ||
105 | * | ||
106 | * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's | ||
107 | * not that important because current Opteron setups do not support | ||
108 | * scaling on SMP anyroads. | ||
109 | * | ||
110 | * Should fix up last_tsc too. Currently gettimeofday in the | ||
111 | * first tick after the change will be slightly wrong. | ||
112 | */ | ||
113 | |||
114 | static unsigned int ref_freq; | ||
115 | static unsigned long loops_per_jiffy_ref; | ||
116 | static unsigned long tsc_khz_ref; | ||
117 | |||
118 | static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | ||
119 | void *data) | ||
120 | { | ||
121 | struct cpufreq_freqs *freq = data; | ||
122 | unsigned long *lpj, dummy; | ||
123 | |||
124 | if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) | ||
125 | return 0; | ||
126 | |||
127 | lpj = &dummy; | ||
128 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
129 | #ifdef CONFIG_SMP | ||
130 | lpj = &cpu_data(freq->cpu).loops_per_jiffy; | ||
131 | #else | ||
132 | lpj = &boot_cpu_data.loops_per_jiffy; | ||
133 | #endif | ||
134 | |||
135 | if (!ref_freq) { | ||
136 | ref_freq = freq->old; | ||
137 | loops_per_jiffy_ref = *lpj; | ||
138 | tsc_khz_ref = tsc_khz; | ||
139 | } | ||
140 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | ||
141 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | ||
142 | (val == CPUFREQ_RESUMECHANGE)) { | ||
143 | *lpj = | ||
144 | cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | ||
145 | |||
146 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | ||
147 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
148 | mark_tsc_unstable("cpufreq changes"); | ||
149 | } | ||
150 | |||
151 | set_cyc2ns_scale(tsc_khz_ref, freq->cpu); | ||
152 | |||
153 | return 0; | ||
154 | } | ||
155 | |||
156 | static struct notifier_block time_cpufreq_notifier_block = { | ||
157 | .notifier_call = time_cpufreq_notifier | ||
158 | }; | ||
159 | |||
160 | static int __init cpufreq_tsc(void) | ||
161 | { | ||
162 | cpufreq_register_notifier(&time_cpufreq_notifier_block, | ||
163 | CPUFREQ_TRANSITION_NOTIFIER); | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | core_initcall(cpufreq_tsc); | ||
168 | |||
169 | #endif | ||
170 | |||
171 | #define MAX_RETRIES 5 | ||
172 | #define SMI_TRESHOLD 50000 | ||
173 | |||
174 | /* | ||
175 | * Read TSC and the reference counters. Take care of SMI disturbance | ||
176 | */ | ||
177 | static unsigned long __init tsc_read_refs(unsigned long *pm, | ||
178 | unsigned long *hpet) | ||
179 | { | ||
180 | unsigned long t1, t2; | ||
181 | int i; | ||
182 | |||
183 | for (i = 0; i < MAX_RETRIES; i++) { | ||
184 | t1 = get_cycles(); | ||
185 | if (hpet) | ||
186 | *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF; | ||
187 | else | ||
188 | *pm = acpi_pm_read_early(); | ||
189 | t2 = get_cycles(); | ||
190 | if ((t2 - t1) < SMI_TRESHOLD) | ||
191 | return t2; | ||
192 | } | ||
193 | return ULONG_MAX; | ||
194 | } | ||
195 | |||
196 | /** | ||
197 | * tsc_calibrate - calibrate the tsc on boot | ||
198 | */ | ||
199 | void __init tsc_calibrate(void) | ||
200 | { | ||
201 | unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2; | ||
202 | int hpet = is_hpet_enabled(), cpu; | ||
203 | |||
204 | local_irq_save(flags); | ||
205 | |||
206 | tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); | ||
207 | |||
208 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | ||
209 | |||
210 | outb(0xb0, 0x43); | ||
211 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | ||
212 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); | ||
213 | tr1 = get_cycles(); | ||
214 | while ((inb(0x61) & 0x20) == 0); | ||
215 | tr2 = get_cycles(); | ||
216 | |||
217 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | ||
218 | |||
219 | local_irq_restore(flags); | ||
220 | |||
221 | /* | ||
222 | * Preset the result with the raw and inaccurate PIT | ||
223 | * calibration value | ||
224 | */ | ||
225 | tsc_khz = (tr2 - tr1) / 50; | ||
226 | |||
227 | /* hpet or pmtimer available ? */ | ||
228 | if (!hpet && !pm1 && !pm2) { | ||
229 | printk(KERN_INFO "TSC calibrated against PIT\n"); | ||
230 | goto out; | ||
231 | } | ||
232 | |||
233 | /* Check, whether the sampling was disturbed by an SMI */ | ||
234 | if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) { | ||
235 | printk(KERN_WARNING "TSC calibration disturbed by SMI, " | ||
236 | "using PIT calibration result\n"); | ||
237 | goto out; | ||
238 | } | ||
239 | |||
240 | tsc2 = (tsc2 - tsc1) * 1000000L; | ||
241 | |||
242 | if (hpet) { | ||
243 | printk(KERN_INFO "TSC calibrated against HPET\n"); | ||
244 | if (hpet2 < hpet1) | ||
245 | hpet2 += 0x100000000UL; | ||
246 | hpet2 -= hpet1; | ||
247 | tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000; | ||
248 | } else { | ||
249 | printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); | ||
250 | if (pm2 < pm1) | ||
251 | pm2 += ACPI_PM_OVRRUN; | ||
252 | pm2 -= pm1; | ||
253 | tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC; | ||
254 | } | ||
255 | |||
256 | tsc_khz = tsc2 / tsc1; | ||
257 | |||
258 | out: | ||
259 | for_each_possible_cpu(cpu) | ||
260 | set_cyc2ns_scale(tsc_khz, cpu); | ||
261 | } | ||
262 | |||
263 | /* | ||
264 | * Make an educated guess if the TSC is trustworthy and synchronized | ||
265 | * over all CPUs. | ||
266 | */ | ||
267 | __cpuinit int unsynchronized_tsc(void) | ||
268 | { | ||
269 | if (tsc_unstable) | ||
270 | return 1; | ||
271 | |||
272 | #ifdef CONFIG_SMP | ||
273 | if (apic_is_clustered_box()) | ||
274 | return 1; | ||
275 | #endif | ||
276 | |||
277 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
278 | return 0; | ||
279 | |||
280 | /* Assume multi socket systems are not synchronized */ | ||
281 | return num_present_cpus() > 1; | ||
282 | } | ||
283 | |||
284 | int __init notsc_setup(char *s) | ||
285 | { | ||
286 | notsc = 1; | ||
287 | return 1; | ||
288 | } | ||
289 | |||
290 | __setup("notsc", notsc_setup); | ||
291 | |||
292 | static struct clocksource clocksource_tsc; | ||
293 | |||
294 | /* | ||
295 | * We compare the TSC to the cycle_last value in the clocksource | ||
296 | * structure to avoid a nasty time-warp. This can be observed in a | ||
297 | * very small window right after one CPU updated cycle_last under | ||
298 | * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which | ||
299 | * is smaller than the cycle_last reference value due to a TSC which | ||
300 | * is slighty behind. This delta is nowhere else observable, but in | ||
301 | * that case it results in a forward time jump in the range of hours | ||
302 | * due to the unsigned delta calculation of the time keeping core | ||
303 | * code, which is necessary to support wrapping clocksources like pm | ||
304 | * timer. | ||
305 | */ | ||
306 | static cycle_t read_tsc(void) | ||
307 | { | ||
308 | cycle_t ret = (cycle_t)get_cycles(); | ||
309 | |||
310 | return ret >= clocksource_tsc.cycle_last ? | ||
311 | ret : clocksource_tsc.cycle_last; | ||
312 | } | ||
313 | |||
314 | static cycle_t __vsyscall_fn vread_tsc(void) | ||
315 | { | ||
316 | cycle_t ret = (cycle_t)vget_cycles(); | ||
317 | |||
318 | return ret >= __vsyscall_gtod_data.clock.cycle_last ? | ||
319 | ret : __vsyscall_gtod_data.clock.cycle_last; | ||
320 | } | ||
321 | |||
322 | static struct clocksource clocksource_tsc = { | ||
323 | .name = "tsc", | ||
324 | .rating = 300, | ||
325 | .read = read_tsc, | ||
326 | .mask = CLOCKSOURCE_MASK(64), | ||
327 | .shift = 22, | ||
328 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | ||
329 | CLOCK_SOURCE_MUST_VERIFY, | ||
330 | .vread = vread_tsc, | ||
331 | }; | ||
332 | |||
333 | void mark_tsc_unstable(char *reason) | ||
334 | { | ||
335 | if (!tsc_unstable) { | ||
336 | tsc_unstable = 1; | ||
337 | printk("Marking TSC unstable due to %s\n", reason); | ||
338 | /* Change only the rating, when not registered */ | ||
339 | if (clocksource_tsc.mult) | ||
340 | clocksource_change_rating(&clocksource_tsc, 0); | ||
341 | else | ||
342 | clocksource_tsc.rating = 0; | ||
343 | } | ||
344 | } | ||
345 | EXPORT_SYMBOL_GPL(mark_tsc_unstable); | ||
346 | |||
347 | void __init init_tsc_clocksource(void) | ||
348 | { | ||
349 | if (!notsc) { | ||
350 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | ||
351 | clocksource_tsc.shift); | ||
352 | if (check_tsc_unstable()) | ||
353 | clocksource_tsc.rating = 0; | ||
354 | |||
355 | clocksource_register(&clocksource_tsc); | ||
356 | } | ||
357 | } | ||
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c new file mode 100644 index 000000000000..e94bdb6add1d --- /dev/null +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -0,0 +1,709 @@ | |||
1 | /* | ||
2 | * SGI Visual Workstation support and quirks, unmaintained. | ||
3 | * | ||
4 | * Split out from setup.c by davej@suse.de | ||
5 | * | ||
6 | * Copyright (C) 1999 Bent Hagemark, Ingo Molnar | ||
7 | * | ||
8 | * SGI Visual Workstation interrupt controller | ||
9 | * | ||
10 | * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC | ||
11 | * which serves as the main interrupt controller in the system. Non-legacy | ||
12 | * hardware in the system uses this controller directly. Legacy devices | ||
13 | * are connected to the PIIX4 which in turn has its 8259(s) connected to | ||
14 | * a of the Cobalt APIC entry. | ||
15 | * | ||
16 | * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com | ||
17 | * | ||
18 | * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru> | ||
19 | */ | ||
20 | #include <linux/interrupt.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/smp.h> | ||
24 | |||
25 | #include <asm/visws/cobalt.h> | ||
26 | #include <asm/visws/piix4.h> | ||
27 | #include <asm/arch_hooks.h> | ||
28 | #include <asm/fixmap.h> | ||
29 | #include <asm/reboot.h> | ||
30 | #include <asm/setup.h> | ||
31 | #include <asm/e820.h> | ||
32 | #include <asm/smp.h> | ||
33 | #include <asm/io.h> | ||
34 | |||
35 | #include <mach_ipi.h> | ||
36 | |||
37 | #include "mach_apic.h" | ||
38 | |||
39 | #include <linux/init.h> | ||
40 | #include <linux/smp.h> | ||
41 | |||
42 | #include <linux/kernel_stat.h> | ||
43 | #include <linux/interrupt.h> | ||
44 | #include <linux/init.h> | ||
45 | |||
46 | #include <asm/io.h> | ||
47 | #include <asm/apic.h> | ||
48 | #include <asm/i8259.h> | ||
49 | #include <asm/irq_vectors.h> | ||
50 | #include <asm/visws/cobalt.h> | ||
51 | #include <asm/visws/lithium.h> | ||
52 | #include <asm/visws/piix4.h> | ||
53 | |||
54 | #include <linux/sched.h> | ||
55 | #include <linux/kernel.h> | ||
56 | #include <linux/init.h> | ||
57 | #include <linux/pci.h> | ||
58 | #include <linux/pci_ids.h> | ||
59 | |||
60 | extern int no_broadcast; | ||
61 | |||
62 | #include <asm/io.h> | ||
63 | #include <asm/apic.h> | ||
64 | #include <asm/arch_hooks.h> | ||
65 | #include <asm/visws/cobalt.h> | ||
66 | #include <asm/visws/lithium.h> | ||
67 | |||
68 | char visws_board_type = -1; | ||
69 | char visws_board_rev = -1; | ||
70 | |||
71 | int is_visws_box(void) | ||
72 | { | ||
73 | return visws_board_type >= 0; | ||
74 | } | ||
75 | |||
76 | static int __init visws_time_init_quirk(void) | ||
77 | { | ||
78 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | ||
79 | |||
80 | /* Set the countdown value */ | ||
81 | co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); | ||
82 | |||
83 | /* Start the timer */ | ||
84 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); | ||
85 | |||
86 | /* Enable (unmask) the timer interrupt */ | ||
87 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); | ||
88 | |||
89 | /* | ||
90 | * Zero return means the generic timer setup code will set up | ||
91 | * the standard vector: | ||
92 | */ | ||
93 | return 0; | ||
94 | } | ||
95 | |||
96 | static int __init visws_pre_intr_init_quirk(void) | ||
97 | { | ||
98 | init_VISWS_APIC_irqs(); | ||
99 | |||
100 | /* | ||
101 | * We dont want ISA irqs to be set up by the generic code: | ||
102 | */ | ||
103 | return 1; | ||
104 | } | ||
105 | |||
106 | /* Quirk for machine specific memory setup. */ | ||
107 | |||
108 | #define MB (1024 * 1024) | ||
109 | |||
110 | unsigned long sgivwfb_mem_phys; | ||
111 | unsigned long sgivwfb_mem_size; | ||
112 | EXPORT_SYMBOL(sgivwfb_mem_phys); | ||
113 | EXPORT_SYMBOL(sgivwfb_mem_size); | ||
114 | |||
115 | long long mem_size __initdata = 0; | ||
116 | |||
117 | static char * __init visws_memory_setup_quirk(void) | ||
118 | { | ||
119 | long long gfx_mem_size = 8 * MB; | ||
120 | |||
121 | mem_size = boot_params.alt_mem_k; | ||
122 | |||
123 | if (!mem_size) { | ||
124 | printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); | ||
125 | mem_size = 128 * MB; | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * this hardcodes the graphics memory to 8 MB | ||
130 | * it really should be sized dynamically (or at least | ||
131 | * set as a boot param) | ||
132 | */ | ||
133 | if (!sgivwfb_mem_size) { | ||
134 | printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); | ||
135 | sgivwfb_mem_size = 8 * MB; | ||
136 | } | ||
137 | |||
138 | /* | ||
139 | * Trim to nearest MB | ||
140 | */ | ||
141 | sgivwfb_mem_size &= ~((1 << 20) - 1); | ||
142 | sgivwfb_mem_phys = mem_size - gfx_mem_size; | ||
143 | |||
144 | e820_add_region(0, LOWMEMSIZE(), E820_RAM); | ||
145 | e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); | ||
146 | e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); | ||
147 | |||
148 | return "PROM"; | ||
149 | } | ||
150 | |||
151 | static void visws_machine_emergency_restart(void) | ||
152 | { | ||
153 | /* | ||
154 | * Visual Workstations restart after this | ||
155 | * register is poked on the PIIX4 | ||
156 | */ | ||
157 | outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); | ||
158 | } | ||
159 | |||
160 | static void visws_machine_power_off(void) | ||
161 | { | ||
162 | unsigned short pm_status; | ||
163 | /* extern unsigned int pci_bus0; */ | ||
164 | |||
165 | while ((pm_status = inw(PMSTS_PORT)) & 0x100) | ||
166 | outw(pm_status, PMSTS_PORT); | ||
167 | |||
168 | outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); | ||
169 | |||
170 | mdelay(10); | ||
171 | |||
172 | #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ | ||
173 | (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) | ||
174 | |||
175 | /* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */ | ||
176 | outl(PIIX_SPECIAL_STOP, 0xCFC); | ||
177 | } | ||
178 | |||
179 | static int __init visws_get_smp_config_quirk(unsigned int early) | ||
180 | { | ||
181 | /* | ||
182 | * Prevent MP-table parsing by the generic code: | ||
183 | */ | ||
184 | return 1; | ||
185 | } | ||
186 | |||
187 | extern unsigned int __cpuinitdata maxcpus; | ||
188 | |||
189 | /* | ||
190 | * The Visual Workstation is Intel MP compliant in the hardware | ||
191 | * sense, but it doesn't have a BIOS(-configuration table). | ||
192 | * No problem for Linux. | ||
193 | */ | ||
194 | |||
195 | static void __init MP_processor_info (struct mpc_config_processor *m) | ||
196 | { | ||
197 | int ver, logical_apicid; | ||
198 | physid_mask_t apic_cpus; | ||
199 | |||
200 | if (!(m->mpc_cpuflag & CPU_ENABLED)) | ||
201 | return; | ||
202 | |||
203 | logical_apicid = m->mpc_apicid; | ||
204 | printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", | ||
205 | m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", | ||
206 | m->mpc_apicid, | ||
207 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
208 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
209 | m->mpc_apicver); | ||
210 | |||
211 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) | ||
212 | boot_cpu_physical_apicid = m->mpc_apicid; | ||
213 | |||
214 | ver = m->mpc_apicver; | ||
215 | if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) { | ||
216 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | ||
217 | m->mpc_apicid, MAX_APICS); | ||
218 | return; | ||
219 | } | ||
220 | |||
221 | apic_cpus = apicid_to_cpu_present(m->mpc_apicid); | ||
222 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); | ||
223 | /* | ||
224 | * Validate version | ||
225 | */ | ||
226 | if (ver == 0x0) { | ||
227 | printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " | ||
228 | "fixing up to 0x10. (tell your hw vendor)\n", | ||
229 | m->mpc_apicid); | ||
230 | ver = 0x10; | ||
231 | } | ||
232 | apic_version[m->mpc_apicid] = ver; | ||
233 | } | ||
234 | |||
235 | int __init visws_find_smp_config_quirk(unsigned int reserve) | ||
236 | { | ||
237 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); | ||
238 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | ||
239 | |||
240 | if (ncpus > CO_CPU_MAX) { | ||
241 | printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", | ||
242 | ncpus, mp); | ||
243 | |||
244 | ncpus = CO_CPU_MAX; | ||
245 | } | ||
246 | |||
247 | if (ncpus > maxcpus) | ||
248 | ncpus = maxcpus; | ||
249 | |||
250 | #ifdef CONFIG_X86_LOCAL_APIC | ||
251 | smp_found_config = 1; | ||
252 | #endif | ||
253 | while (ncpus--) | ||
254 | MP_processor_info(mp++); | ||
255 | |||
256 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
257 | |||
258 | return 1; | ||
259 | } | ||
260 | |||
261 | extern int visws_trap_init_quirk(void); | ||
262 | |||
263 | void __init visws_early_detect(void) | ||
264 | { | ||
265 | int raw; | ||
266 | |||
267 | visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) | ||
268 | >> PIIX_GPI_BD_SHIFT; | ||
269 | |||
270 | if (visws_board_type < 0) | ||
271 | return; | ||
272 | |||
273 | /* | ||
274 | * Install special quirks for timer, interrupt and memory setup: | ||
275 | */ | ||
276 | arch_time_init_quirk = visws_time_init_quirk; | ||
277 | arch_pre_intr_init_quirk = visws_pre_intr_init_quirk; | ||
278 | arch_memory_setup_quirk = visws_memory_setup_quirk; | ||
279 | |||
280 | /* | ||
281 | * Fall back to generic behavior for traps: | ||
282 | */ | ||
283 | arch_intr_init_quirk = NULL; | ||
284 | arch_trap_init_quirk = visws_trap_init_quirk; | ||
285 | |||
286 | /* | ||
287 | * Install reboot quirks: | ||
288 | */ | ||
289 | pm_power_off = visws_machine_power_off; | ||
290 | machine_ops.emergency_restart = visws_machine_emergency_restart; | ||
291 | |||
292 | /* | ||
293 | * Do not use broadcast IPIs: | ||
294 | */ | ||
295 | no_broadcast = 0; | ||
296 | |||
297 | /* | ||
298 | * Override generic MP-table parsing: | ||
299 | */ | ||
300 | mach_get_smp_config_quirk = visws_get_smp_config_quirk; | ||
301 | mach_find_smp_config_quirk = visws_find_smp_config_quirk; | ||
302 | |||
303 | #ifdef CONFIG_X86_IO_APIC | ||
304 | /* | ||
305 | * Turn off IO-APIC detection and initialization: | ||
306 | */ | ||
307 | skip_ioapic_setup = 1; | ||
308 | #endif | ||
309 | |||
310 | /* | ||
311 | * Get Board rev. | ||
312 | * First, we have to initialize the 307 part to allow us access | ||
313 | * to the GPIO registers. Let's map them at 0x0fc0 which is right | ||
314 | * after the PIIX4 PM section. | ||
315 | */ | ||
316 | outb_p(SIO_DEV_SEL, SIO_INDEX); | ||
317 | outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ | ||
318 | |||
319 | outb_p(SIO_DEV_MSB, SIO_INDEX); | ||
320 | outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ | ||
321 | |||
322 | outb_p(SIO_DEV_LSB, SIO_INDEX); | ||
323 | outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ | ||
324 | |||
325 | outb_p(SIO_DEV_ENB, SIO_INDEX); | ||
326 | outb_p(1, SIO_DATA); /* Enable GPIO registers. */ | ||
327 | |||
328 | /* | ||
329 | * Now, we have to map the power management section to write | ||
330 | * a bit which enables access to the GPIO registers. | ||
331 | * What lunatic came up with this shit? | ||
332 | */ | ||
333 | outb_p(SIO_DEV_SEL, SIO_INDEX); | ||
334 | outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ | ||
335 | |||
336 | outb_p(SIO_DEV_MSB, SIO_INDEX); | ||
337 | outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ | ||
338 | |||
339 | outb_p(SIO_DEV_LSB, SIO_INDEX); | ||
340 | outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ | ||
341 | |||
342 | outb_p(SIO_DEV_ENB, SIO_INDEX); | ||
343 | outb_p(1, SIO_DATA); /* Enable PM registers. */ | ||
344 | |||
345 | /* | ||
346 | * Now, write the PM register which enables the GPIO registers. | ||
347 | */ | ||
348 | outb_p(SIO_PM_FER2, SIO_PM_INDEX); | ||
349 | outb_p(SIO_PM_GP_EN, SIO_PM_DATA); | ||
350 | |||
351 | /* | ||
352 | * Now, initialize the GPIO registers. | ||
353 | * We want them all to be inputs which is the | ||
354 | * power on default, so let's leave them alone. | ||
355 | * So, let's just read the board rev! | ||
356 | */ | ||
357 | raw = inb_p(SIO_GP_DATA1); | ||
358 | raw &= 0x7f; /* 7 bits of valid board revision ID. */ | ||
359 | |||
360 | if (visws_board_type == VISWS_320) { | ||
361 | if (raw < 0x6) { | ||
362 | visws_board_rev = 4; | ||
363 | } else if (raw < 0xc) { | ||
364 | visws_board_rev = 5; | ||
365 | } else { | ||
366 | visws_board_rev = 6; | ||
367 | } | ||
368 | } else if (visws_board_type == VISWS_540) { | ||
369 | visws_board_rev = 2; | ||
370 | } else { | ||
371 | visws_board_rev = raw; | ||
372 | } | ||
373 | |||
374 | printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", | ||
375 | (visws_board_type == VISWS_320 ? "320" : | ||
376 | (visws_board_type == VISWS_540 ? "540" : | ||
377 | "unknown")), visws_board_rev); | ||
378 | } | ||
379 | |||
380 | #define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) | ||
381 | #define BCD (LI_INTB | LI_INTC | LI_INTD) | ||
382 | #define ALLDEVS (A01234 | BCD) | ||
383 | |||
384 | static __init void lithium_init(void) | ||
385 | { | ||
386 | set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); | ||
387 | set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); | ||
388 | |||
389 | if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || | ||
390 | (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { | ||
391 | printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); | ||
392 | /* panic("This machine is not SGI Visual Workstation 320/540"); */ | ||
393 | } | ||
394 | |||
395 | if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || | ||
396 | (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { | ||
397 | printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); | ||
398 | /* panic("This machine is not SGI Visual Workstation 320/540"); */ | ||
399 | } | ||
400 | |||
401 | li_pcia_write16(LI_PCI_INTEN, ALLDEVS); | ||
402 | li_pcib_write16(LI_PCI_INTEN, ALLDEVS); | ||
403 | } | ||
404 | |||
405 | static __init void cobalt_init(void) | ||
406 | { | ||
407 | /* | ||
408 | * On normal SMP PC this is used only with SMP, but we have to | ||
409 | * use it and set it up here to start the Cobalt clock | ||
410 | */ | ||
411 | set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); | ||
412 | setup_local_APIC(); | ||
413 | printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", | ||
414 | (unsigned int)apic_read(APIC_LVR), | ||
415 | (unsigned int)apic_read(APIC_ID)); | ||
416 | |||
417 | set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); | ||
418 | set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); | ||
419 | printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", | ||
420 | co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); | ||
421 | |||
422 | /* Enable Cobalt APIC being careful to NOT change the ID! */ | ||
423 | co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); | ||
424 | |||
425 | printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", | ||
426 | co_apic_read(CO_APIC_ID)); | ||
427 | } | ||
428 | |||
429 | int __init visws_trap_init_quirk(void) | ||
430 | { | ||
431 | lithium_init(); | ||
432 | cobalt_init(); | ||
433 | |||
434 | return 1; | ||
435 | } | ||
436 | |||
437 | /* | ||
438 | * IRQ controller / APIC support: | ||
439 | */ | ||
440 | |||
441 | static DEFINE_SPINLOCK(cobalt_lock); | ||
442 | |||
443 | /* | ||
444 | * Set the given Cobalt APIC Redirection Table entry to point | ||
445 | * to the given IDT vector/index. | ||
446 | */ | ||
447 | static inline void co_apic_set(int entry, int irq) | ||
448 | { | ||
449 | co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); | ||
450 | co_apic_write(CO_APIC_HI(entry), 0); | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * Cobalt (IO)-APIC functions to handle PCI devices. | ||
455 | */ | ||
456 | static inline int co_apic_ide0_hack(void) | ||
457 | { | ||
458 | extern char visws_board_type; | ||
459 | extern char visws_board_rev; | ||
460 | |||
461 | if (visws_board_type == VISWS_320 && visws_board_rev == 5) | ||
462 | return 5; | ||
463 | return CO_APIC_IDE0; | ||
464 | } | ||
465 | |||
466 | static int is_co_apic(unsigned int irq) | ||
467 | { | ||
468 | if (IS_CO_APIC(irq)) | ||
469 | return CO_APIC(irq); | ||
470 | |||
471 | switch (irq) { | ||
472 | case 0: return CO_APIC_CPU; | ||
473 | case CO_IRQ_IDE0: return co_apic_ide0_hack(); | ||
474 | case CO_IRQ_IDE1: return CO_APIC_IDE1; | ||
475 | default: return -1; | ||
476 | } | ||
477 | } | ||
478 | |||
479 | |||
480 | /* | ||
481 | * This is the SGI Cobalt (IO-)APIC: | ||
482 | */ | ||
483 | |||
484 | static void enable_cobalt_irq(unsigned int irq) | ||
485 | { | ||
486 | co_apic_set(is_co_apic(irq), irq); | ||
487 | } | ||
488 | |||
489 | static void disable_cobalt_irq(unsigned int irq) | ||
490 | { | ||
491 | int entry = is_co_apic(irq); | ||
492 | |||
493 | co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); | ||
494 | co_apic_read(CO_APIC_LO(entry)); | ||
495 | } | ||
496 | |||
497 | /* | ||
498 | * "irq" really just serves to identify the device. Here is where we | ||
499 | * map this to the Cobalt APIC entry where it's physically wired. | ||
500 | * This is called via request_irq -> setup_irq -> irq_desc->startup() | ||
501 | */ | ||
502 | static unsigned int startup_cobalt_irq(unsigned int irq) | ||
503 | { | ||
504 | unsigned long flags; | ||
505 | |||
506 | spin_lock_irqsave(&cobalt_lock, flags); | ||
507 | if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) | ||
508 | irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); | ||
509 | enable_cobalt_irq(irq); | ||
510 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
511 | return 0; | ||
512 | } | ||
513 | |||
514 | static void ack_cobalt_irq(unsigned int irq) | ||
515 | { | ||
516 | unsigned long flags; | ||
517 | |||
518 | spin_lock_irqsave(&cobalt_lock, flags); | ||
519 | disable_cobalt_irq(irq); | ||
520 | apic_write(APIC_EOI, APIC_EIO_ACK); | ||
521 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
522 | } | ||
523 | |||
524 | static void end_cobalt_irq(unsigned int irq) | ||
525 | { | ||
526 | unsigned long flags; | ||
527 | |||
528 | spin_lock_irqsave(&cobalt_lock, flags); | ||
529 | if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) | ||
530 | enable_cobalt_irq(irq); | ||
531 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
532 | } | ||
533 | |||
534 | static struct irq_chip cobalt_irq_type = { | ||
535 | .typename = "Cobalt-APIC", | ||
536 | .startup = startup_cobalt_irq, | ||
537 | .shutdown = disable_cobalt_irq, | ||
538 | .enable = enable_cobalt_irq, | ||
539 | .disable = disable_cobalt_irq, | ||
540 | .ack = ack_cobalt_irq, | ||
541 | .end = end_cobalt_irq, | ||
542 | }; | ||
543 | |||
544 | |||
545 | /* | ||
546 | * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt | ||
547 | * -- not the manner expected by the code in i8259.c. | ||
548 | * | ||
549 | * there is a 'master' physical interrupt source that gets sent to | ||
550 | * the CPU. But in the chipset there are various 'virtual' interrupts | ||
551 | * waiting to be handled. We represent this to Linux through a 'master' | ||
552 | * interrupt controller type, and through a special virtual interrupt- | ||
553 | * controller. Device drivers only see the virtual interrupt sources. | ||
554 | */ | ||
555 | static unsigned int startup_piix4_master_irq(unsigned int irq) | ||
556 | { | ||
557 | init_8259A(0); | ||
558 | |||
559 | return startup_cobalt_irq(irq); | ||
560 | } | ||
561 | |||
562 | static void end_piix4_master_irq(unsigned int irq) | ||
563 | { | ||
564 | unsigned long flags; | ||
565 | |||
566 | spin_lock_irqsave(&cobalt_lock, flags); | ||
567 | enable_cobalt_irq(irq); | ||
568 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
569 | } | ||
570 | |||
571 | static struct irq_chip piix4_master_irq_type = { | ||
572 | .typename = "PIIX4-master", | ||
573 | .startup = startup_piix4_master_irq, | ||
574 | .ack = ack_cobalt_irq, | ||
575 | .end = end_piix4_master_irq, | ||
576 | }; | ||
577 | |||
578 | |||
579 | static struct irq_chip piix4_virtual_irq_type = { | ||
580 | .typename = "PIIX4-virtual", | ||
581 | .shutdown = disable_8259A_irq, | ||
582 | .enable = enable_8259A_irq, | ||
583 | .disable = disable_8259A_irq, | ||
584 | }; | ||
585 | |||
586 | |||
587 | /* | ||
588 | * PIIX4-8259 master/virtual functions to handle interrupt requests | ||
589 | * from legacy devices: floppy, parallel, serial, rtc. | ||
590 | * | ||
591 | * None of these get Cobalt APIC entries, neither do they have IDT | ||
592 | * entries. These interrupts are purely virtual and distributed from | ||
593 | * the 'master' interrupt source: CO_IRQ_8259. | ||
594 | * | ||
595 | * When the 8259 interrupts its handler figures out which of these | ||
596 | * devices is interrupting and dispatches to its handler. | ||
597 | * | ||
598 | * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ | ||
599 | * enable_irq gets the right irq. This 'master' irq is never directly | ||
600 | * manipulated by any driver. | ||
601 | */ | ||
602 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) | ||
603 | { | ||
604 | int realirq; | ||
605 | irq_desc_t *desc; | ||
606 | unsigned long flags; | ||
607 | |||
608 | spin_lock_irqsave(&i8259A_lock, flags); | ||
609 | |||
610 | /* Find out what's interrupting in the PIIX4 master 8259 */ | ||
611 | outb(0x0c, 0x20); /* OCW3 Poll command */ | ||
612 | realirq = inb(0x20); | ||
613 | |||
614 | /* | ||
615 | * Bit 7 == 0 means invalid/spurious | ||
616 | */ | ||
617 | if (unlikely(!(realirq & 0x80))) | ||
618 | goto out_unlock; | ||
619 | |||
620 | realirq &= 7; | ||
621 | |||
622 | if (unlikely(realirq == 2)) { | ||
623 | outb(0x0c, 0xa0); | ||
624 | realirq = inb(0xa0); | ||
625 | |||
626 | if (unlikely(!(realirq & 0x80))) | ||
627 | goto out_unlock; | ||
628 | |||
629 | realirq = (realirq & 7) + 8; | ||
630 | } | ||
631 | |||
632 | /* mask and ack interrupt */ | ||
633 | cached_irq_mask |= 1 << realirq; | ||
634 | if (unlikely(realirq > 7)) { | ||
635 | inb(0xa1); | ||
636 | outb(cached_slave_mask, 0xa1); | ||
637 | outb(0x60 + (realirq & 7), 0xa0); | ||
638 | outb(0x60 + 2, 0x20); | ||
639 | } else { | ||
640 | inb(0x21); | ||
641 | outb(cached_master_mask, 0x21); | ||
642 | outb(0x60 + realirq, 0x20); | ||
643 | } | ||
644 | |||
645 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
646 | |||
647 | desc = irq_desc + realirq; | ||
648 | |||
649 | /* | ||
650 | * handle this 'virtual interrupt' as a Cobalt one now. | ||
651 | */ | ||
652 | kstat_cpu(smp_processor_id()).irqs[realirq]++; | ||
653 | |||
654 | if (likely(desc->action != NULL)) | ||
655 | handle_IRQ_event(realirq, desc->action); | ||
656 | |||
657 | if (!(desc->status & IRQ_DISABLED)) | ||
658 | enable_8259A_irq(realirq); | ||
659 | |||
660 | return IRQ_HANDLED; | ||
661 | |||
662 | out_unlock: | ||
663 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
664 | return IRQ_NONE; | ||
665 | } | ||
666 | |||
667 | static struct irqaction master_action = { | ||
668 | .handler = piix4_master_intr, | ||
669 | .name = "PIIX4-8259", | ||
670 | }; | ||
671 | |||
672 | static struct irqaction cascade_action = { | ||
673 | .handler = no_action, | ||
674 | .name = "cascade", | ||
675 | }; | ||
676 | |||
677 | |||
678 | void init_VISWS_APIC_irqs(void) | ||
679 | { | ||
680 | int i; | ||
681 | |||
682 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { | ||
683 | irq_desc[i].status = IRQ_DISABLED; | ||
684 | irq_desc[i].action = 0; | ||
685 | irq_desc[i].depth = 1; | ||
686 | |||
687 | if (i == 0) { | ||
688 | irq_desc[i].chip = &cobalt_irq_type; | ||
689 | } | ||
690 | else if (i == CO_IRQ_IDE0) { | ||
691 | irq_desc[i].chip = &cobalt_irq_type; | ||
692 | } | ||
693 | else if (i == CO_IRQ_IDE1) { | ||
694 | irq_desc[i].chip = &cobalt_irq_type; | ||
695 | } | ||
696 | else if (i == CO_IRQ_8259) { | ||
697 | irq_desc[i].chip = &piix4_master_irq_type; | ||
698 | } | ||
699 | else if (i < CO_IRQ_APIC0) { | ||
700 | irq_desc[i].chip = &piix4_virtual_irq_type; | ||
701 | } | ||
702 | else if (IS_CO_APIC(i)) { | ||
703 | irq_desc[i].chip = &cobalt_irq_type; | ||
704 | } | ||
705 | } | ||
706 | |||
707 | setup_irq(CO_IRQ_8259, &master_action); | ||
708 | setup_irq(2, &cascade_action); | ||
709 | } | ||
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 946bf13b44ab..b15346092b7b 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -932,7 +932,7 @@ static inline int __init activate_vmi(void) | |||
932 | pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; | 932 | pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; |
933 | #endif | 933 | #endif |
934 | pv_time_ops.sched_clock = vmi_sched_clock; | 934 | pv_time_ops.sched_clock = vmi_sched_clock; |
935 | pv_time_ops.get_cpu_khz = vmi_cpu_khz; | 935 | pv_time_ops.get_tsc_khz = vmi_tsc_khz; |
936 | 936 | ||
937 | /* We have true wallclock functions; disable CMOS clock sync */ | 937 | /* We have true wallclock functions; disable CMOS clock sync */ |
938 | no_sync_cmos_clock = 1; | 938 | no_sync_cmos_clock = 1; |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index ba7d19e102b1..6953859fe289 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -69,8 +69,8 @@ unsigned long long vmi_sched_clock(void) | |||
69 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); | 69 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); |
70 | } | 70 | } |
71 | 71 | ||
72 | /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ | 72 | /* paravirt_ops.get_tsc_khz = vmi_tsc_khz */ |
73 | unsigned long vmi_cpu_khz(void) | 73 | unsigned long vmi_tsc_khz(void) |
74 | { | 74 | { |
75 | unsigned long long khz; | 75 | unsigned long long khz; |
76 | khz = vmi_timer_ops.get_cycle_frequency(); | 76 | khz = vmi_timer_ops.get_cycle_frequency(); |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index 2674f5796275..cdb2363697d2 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
@@ -49,16 +49,14 @@ SECTIONS | |||
49 | _etext = .; /* End of text section */ | 49 | _etext = .; /* End of text section */ |
50 | } :text = 0x9090 | 50 | } :text = 0x9090 |
51 | 51 | ||
52 | NOTES :text :note | ||
53 | |||
52 | . = ALIGN(16); /* Exception table */ | 54 | . = ALIGN(16); /* Exception table */ |
53 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { | 55 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { |
54 | __start___ex_table = .; | 56 | __start___ex_table = .; |
55 | *(__ex_table) | 57 | *(__ex_table) |
56 | __stop___ex_table = .; | 58 | __stop___ex_table = .; |
57 | } | 59 | } :text = 0x9090 |
58 | |||
59 | NOTES :text :note | ||
60 | |||
61 | BUG_TABLE :text | ||
62 | 60 | ||
63 | RODATA | 61 | RODATA |
64 | 62 | ||
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index fd246e22fe6b..63e5c1a22e88 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -19,7 +19,7 @@ PHDRS { | |||
19 | data PT_LOAD FLAGS(7); /* RWE */ | 19 | data PT_LOAD FLAGS(7); /* RWE */ |
20 | user PT_LOAD FLAGS(7); /* RWE */ | 20 | user PT_LOAD FLAGS(7); /* RWE */ |
21 | data.init PT_LOAD FLAGS(7); /* RWE */ | 21 | data.init PT_LOAD FLAGS(7); /* RWE */ |
22 | note PT_NOTE FLAGS(4); /* R__ */ | 22 | note PT_NOTE FLAGS(0); /* ___ */ |
23 | } | 23 | } |
24 | SECTIONS | 24 | SECTIONS |
25 | { | 25 | { |
@@ -40,16 +40,14 @@ SECTIONS | |||
40 | _etext = .; /* End of text section */ | 40 | _etext = .; /* End of text section */ |
41 | } :text = 0x9090 | 41 | } :text = 0x9090 |
42 | 42 | ||
43 | NOTES :text :note | ||
44 | |||
43 | . = ALIGN(16); /* Exception table */ | 45 | . = ALIGN(16); /* Exception table */ |
44 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { | 46 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { |
45 | __start___ex_table = .; | 47 | __start___ex_table = .; |
46 | *(__ex_table) | 48 | *(__ex_table) |
47 | __stop___ex_table = .; | 49 | __stop___ex_table = .; |
48 | } | 50 | } :text = 0x9090 |
49 | |||
50 | NOTES :text :note | ||
51 | |||
52 | BUG_TABLE :text | ||
53 | 51 | ||
54 | RODATA | 52 | RODATA |
55 | 53 | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index c87cbd84c3e5..0b8b6690a86d 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -42,7 +42,8 @@ | |||
42 | #include <asm/topology.h> | 42 | #include <asm/topology.h> |
43 | #include <asm/vgtod.h> | 43 | #include <asm/vgtod.h> |
44 | 44 | ||
45 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 45 | #define __vsyscall(nr) \ |
46 | __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace | ||
46 | #define __syscall_clobber "r11","cx","memory" | 47 | #define __syscall_clobber "r11","cx","memory" |
47 | 48 | ||
48 | /* | 49 | /* |
@@ -278,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg) | |||
278 | { | 279 | { |
279 | long cpu = (long)arg; | 280 | long cpu = (long)arg; |
280 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) | 281 | if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) |
281 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); | 282 | smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1); |
282 | return NOTIFY_DONE; | 283 | return NOTIFY_DONE; |
283 | } | 284 | } |
284 | 285 | ||
@@ -301,7 +302,7 @@ static int __init vsyscall_init(void) | |||
301 | #ifdef CONFIG_SYSCTL | 302 | #ifdef CONFIG_SYSCTL |
302 | register_sysctl_table(kernel_root_table2); | 303 | register_sysctl_table(kernel_root_table2); |
303 | #endif | 304 | #endif |
304 | on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); | 305 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
305 | hotcpu_notifier(cpu_vsyscall_notifier, 0); | 306 | hotcpu_notifier(cpu_vsyscall_notifier, 0); |
306 | return 0; | 307 | return 0; |
307 | } | 308 | } |
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index 2f306a826897..b545f371b5f5 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c | |||
@@ -2,13 +2,20 @@ | |||
2 | All C exports should go in the respective C files. */ | 2 | All C exports should go in the respective C files. */ |
3 | 3 | ||
4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
5 | #include <net/checksum.h> | ||
6 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
7 | 6 | ||
7 | #include <net/checksum.h> | ||
8 | |||
8 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
9 | #include <asm/uaccess.h> | ||
10 | #include <asm/pgtable.h> | 10 | #include <asm/pgtable.h> |
11 | #include <asm/uaccess.h> | ||
11 | #include <asm/desc.h> | 12 | #include <asm/desc.h> |
13 | #include <asm/ftrace.h> | ||
14 | |||
15 | #ifdef CONFIG_FTRACE | ||
16 | /* mcount is defined in assembly */ | ||
17 | EXPORT_SYMBOL(mcount); | ||
18 | #endif | ||
12 | 19 | ||
13 | EXPORT_SYMBOL(kernel_thread); | 20 | EXPORT_SYMBOL(kernel_thread); |
14 | 21 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 540e95179074..10ce6ee4c491 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -335,7 +335,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx) | |||
335 | { | 335 | { |
336 | if (vmx->vcpu.cpu == -1) | 336 | if (vmx->vcpu.cpu == -1) |
337 | return; | 337 | return; |
338 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1); | 338 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); |
339 | vmx->launched = 0; | 339 | vmx->launched = 0; |
340 | } | 340 | } |
341 | 341 | ||
@@ -2968,7 +2968,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu) | |||
2968 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2968 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2969 | 2969 | ||
2970 | if (vmx->vmcs) { | 2970 | if (vmx->vmcs) { |
2971 | on_each_cpu(__vcpu_clear, vmx, 0, 1); | 2971 | on_each_cpu(__vcpu_clear, vmx, 1); |
2972 | free_vmcs(vmx->vmcs); | 2972 | free_vmcs(vmx->vmcs); |
2973 | vmx->vmcs = NULL; | 2973 | vmx->vmcs = NULL; |
2974 | } | 2974 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63a77caa59f1..0faa2546b1cd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -4044,6 +4044,6 @@ void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | |||
4044 | * So need not to call smp_call_function_single() in that case. | 4044 | * So need not to call smp_call_function_single() in that case. |
4045 | */ | 4045 | */ |
4046 | if (vcpu->guest_mode && vcpu->cpu != cpu) | 4046 | if (vcpu->guest_mode && vcpu->cpu != cpu) |
4047 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0, 0); | 4047 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); |
4048 | put_cpu(); | 4048 | put_cpu(); |
4049 | } | 4049 | } |
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 964dfa36d367..c70e12b1a637 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig | |||
@@ -3,7 +3,7 @@ config LGUEST_GUEST | |||
3 | select PARAVIRT | 3 | select PARAVIRT |
4 | depends on X86_32 | 4 | depends on X86_32 |
5 | depends on !X86_PAE | 5 | depends on !X86_PAE |
6 | depends on !(X86_VISWS || X86_VOYAGER) | 6 | depends on !X86_VOYAGER |
7 | select VIRTIO | 7 | select VIRTIO |
8 | select VIRTIO_RING | 8 | select VIRTIO_RING |
9 | select VIRTIO_CONSOLE | 9 | select VIRTIO_CONSOLE |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e72cf0793fbe..50dad44fb542 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -607,7 +607,7 @@ static unsigned long lguest_get_wallclock(void) | |||
607 | * what speed it runs at, or 0 if it's unusable as a reliable clock source. | 607 | * what speed it runs at, or 0 if it's unusable as a reliable clock source. |
608 | * This matches what we want here: if we return 0 from this function, the x86 | 608 | * This matches what we want here: if we return 0 from this function, the x86 |
609 | * TSC clock will give up and not register itself. */ | 609 | * TSC clock will give up and not register itself. */ |
610 | static unsigned long lguest_cpu_khz(void) | 610 | static unsigned long lguest_tsc_khz(void) |
611 | { | 611 | { |
612 | return lguest_data.tsc_khz; | 612 | return lguest_data.tsc_khz; |
613 | } | 613 | } |
@@ -998,7 +998,7 @@ __init void lguest_init(void) | |||
998 | /* time operations */ | 998 | /* time operations */ |
999 | pv_time_ops.get_wallclock = lguest_get_wallclock; | 999 | pv_time_ops.get_wallclock = lguest_get_wallclock; |
1000 | pv_time_ops.time_init = lguest_time_init; | 1000 | pv_time_ops.time_init = lguest_time_init; |
1001 | pv_time_ops.get_cpu_khz = lguest_cpu_khz; | 1001 | pv_time_ops.get_tsc_khz = lguest_tsc_khz; |
1002 | 1002 | ||
1003 | /* Now is a good time to look at the implementations of these functions | 1003 | /* Now is a good time to look at the implementations of these functions |
1004 | * before returning to the rest of lguest_init(). */ | 1004 | * before returning to the rest of lguest_init(). */ |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 76f60f52a885..aa3fa4119424 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -4,8 +4,9 @@ | |||
4 | 4 | ||
5 | obj-$(CONFIG_SMP) := msr-on-cpu.o | 5 | obj-$(CONFIG_SMP) := msr-on-cpu.o |
6 | 6 | ||
7 | lib-y := delay_$(BITS).o | 7 | lib-y := delay.o |
8 | lib-y += usercopy_$(BITS).o getuser_$(BITS).o putuser_$(BITS).o | 8 | lib-y += thunk_$(BITS).o |
9 | lib-y += usercopy_$(BITS).o getuser.o putuser.o | ||
9 | lib-y += memcpy_$(BITS).o | 10 | lib-y += memcpy_$(BITS).o |
10 | 11 | ||
11 | ifeq ($(CONFIG_X86_32),y) | 12 | ifeq ($(CONFIG_X86_32),y) |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index ee1c3f635157..dfdf428975c0 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -1,8 +1,10 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | 1 | /* |
2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> | ||
3 | * Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | 4 | * Subject to the GNU Public License v2. |
3 | * | 5 | * |
4 | * Functions to copy from and to user space. | 6 | * Functions to copy from and to user space. |
5 | */ | 7 | */ |
6 | 8 | ||
7 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
8 | #include <asm/dwarf2.h> | 10 | #include <asm/dwarf2.h> |
@@ -20,60 +22,88 @@ | |||
20 | .long \orig-1f /* by default jump to orig */ | 22 | .long \orig-1f /* by default jump to orig */ |
21 | 1: | 23 | 1: |
22 | .section .altinstr_replacement,"ax" | 24 | .section .altinstr_replacement,"ax" |
23 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | 25 | 2: .byte 0xe9 /* near jump with 32bit immediate */ |
24 | .long \alt-1b /* offset */ /* or alternatively to alt */ | 26 | .long \alt-1b /* offset */ /* or alternatively to alt */ |
25 | .previous | 27 | .previous |
26 | .section .altinstructions,"a" | 28 | .section .altinstructions,"a" |
27 | .align 8 | 29 | .align 8 |
28 | .quad 0b | 30 | .quad 0b |
29 | .quad 2b | 31 | .quad 2b |
30 | .byte \feature /* when feature is set */ | 32 | .byte \feature /* when feature is set */ |
31 | .byte 5 | 33 | .byte 5 |
32 | .byte 5 | 34 | .byte 5 |
33 | .previous | 35 | .previous |
34 | .endm | 36 | .endm |
35 | 37 | ||
36 | /* Standard copy_to_user with segment limit checking */ | 38 | .macro ALIGN_DESTINATION |
39 | #ifdef FIX_ALIGNMENT | ||
40 | /* check for bad alignment of destination */ | ||
41 | movl %edi,%ecx | ||
42 | andl $7,%ecx | ||
43 | jz 102f /* already aligned */ | ||
44 | subl $8,%ecx | ||
45 | negl %ecx | ||
46 | subl %ecx,%edx | ||
47 | 100: movb (%rsi),%al | ||
48 | 101: movb %al,(%rdi) | ||
49 | incq %rsi | ||
50 | incq %rdi | ||
51 | decl %ecx | ||
52 | jnz 100b | ||
53 | 102: | ||
54 | .section .fixup,"ax" | ||
55 | 103: addl %r8d,%edx /* ecx is zerorest also */ | ||
56 | jmp copy_user_handle_tail | ||
57 | .previous | ||
58 | |||
59 | .section __ex_table,"a" | ||
60 | .align 8 | ||
61 | .quad 100b,103b | ||
62 | .quad 101b,103b | ||
63 | .previous | ||
64 | #endif | ||
65 | .endm | ||
66 | |||
67 | /* Standard copy_to_user with segment limit checking */ | ||
37 | ENTRY(copy_to_user) | 68 | ENTRY(copy_to_user) |
38 | CFI_STARTPROC | 69 | CFI_STARTPROC |
39 | GET_THREAD_INFO(%rax) | 70 | GET_THREAD_INFO(%rax) |
40 | movq %rdi,%rcx | 71 | movq %rdi,%rcx |
41 | addq %rdx,%rcx | 72 | addq %rdx,%rcx |
42 | jc bad_to_user | 73 | jc bad_to_user |
43 | cmpq threadinfo_addr_limit(%rax),%rcx | 74 | cmpq TI_addr_limit(%rax),%rcx |
44 | jae bad_to_user | 75 | jae bad_to_user |
45 | xorl %eax,%eax /* clear zero flag */ | ||
46 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 76 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
47 | CFI_ENDPROC | 77 | CFI_ENDPROC |
48 | 78 | ||
49 | ENTRY(copy_user_generic) | 79 | /* Standard copy_from_user with segment limit checking */ |
80 | ENTRY(copy_from_user) | ||
50 | CFI_STARTPROC | 81 | CFI_STARTPROC |
51 | movl $1,%ecx /* set zero flag */ | 82 | GET_THREAD_INFO(%rax) |
83 | movq %rsi,%rcx | ||
84 | addq %rdx,%rcx | ||
85 | jc bad_from_user | ||
86 | cmpq TI_addr_limit(%rax),%rcx | ||
87 | jae bad_from_user | ||
52 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 88 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
53 | CFI_ENDPROC | 89 | CFI_ENDPROC |
90 | ENDPROC(copy_from_user) | ||
54 | 91 | ||
55 | ENTRY(__copy_from_user_inatomic) | 92 | ENTRY(copy_user_generic) |
56 | CFI_STARTPROC | 93 | CFI_STARTPROC |
57 | xorl %ecx,%ecx /* clear zero flag */ | ||
58 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 94 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
59 | CFI_ENDPROC | 95 | CFI_ENDPROC |
96 | ENDPROC(copy_user_generic) | ||
60 | 97 | ||
61 | /* Standard copy_from_user with segment limit checking */ | 98 | ENTRY(__copy_from_user_inatomic) |
62 | ENTRY(copy_from_user) | ||
63 | CFI_STARTPROC | 99 | CFI_STARTPROC |
64 | GET_THREAD_INFO(%rax) | ||
65 | movq %rsi,%rcx | ||
66 | addq %rdx,%rcx | ||
67 | jc bad_from_user | ||
68 | cmpq threadinfo_addr_limit(%rax),%rcx | ||
69 | jae bad_from_user | ||
70 | movl $1,%ecx /* set zero flag */ | ||
71 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string | 100 | ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string |
72 | CFI_ENDPROC | 101 | CFI_ENDPROC |
73 | ENDPROC(copy_from_user) | 102 | ENDPROC(__copy_from_user_inatomic) |
74 | 103 | ||
75 | .section .fixup,"ax" | 104 | .section .fixup,"ax" |
76 | /* must zero dest */ | 105 | /* must zero dest */ |
106 | ENTRY(bad_from_user) | ||
77 | bad_from_user: | 107 | bad_from_user: |
78 | CFI_STARTPROC | 108 | CFI_STARTPROC |
79 | movl %edx,%ecx | 109 | movl %edx,%ecx |
@@ -81,271 +111,158 @@ bad_from_user: | |||
81 | rep | 111 | rep |
82 | stosb | 112 | stosb |
83 | bad_to_user: | 113 | bad_to_user: |
84 | movl %edx,%eax | 114 | movl %edx,%eax |
85 | ret | 115 | ret |
86 | CFI_ENDPROC | 116 | CFI_ENDPROC |
87 | END(bad_from_user) | 117 | ENDPROC(bad_from_user) |
88 | .previous | 118 | .previous |
89 | 119 | ||
90 | |||
91 | /* | 120 | /* |
92 | * copy_user_generic_unrolled - memory copy with exception handling. | 121 | * copy_user_generic_unrolled - memory copy with exception handling. |
93 | * This version is for CPUs like P4 that don't have efficient micro code for rep movsq | 122 | * This version is for CPUs like P4 that don't have efficient micro |
94 | * | 123 | * code for rep movsq |
95 | * Input: | 124 | * |
125 | * Input: | ||
96 | * rdi destination | 126 | * rdi destination |
97 | * rsi source | 127 | * rsi source |
98 | * rdx count | 128 | * rdx count |
99 | * ecx zero flag -- if true zero destination on error | ||
100 | * | 129 | * |
101 | * Output: | 130 | * Output: |
102 | * eax uncopied bytes or 0 if successful. | 131 | * eax uncopied bytes or 0 if successfull. |
103 | */ | 132 | */ |
104 | ENTRY(copy_user_generic_unrolled) | 133 | ENTRY(copy_user_generic_unrolled) |
105 | CFI_STARTPROC | 134 | CFI_STARTPROC |
106 | pushq %rbx | 135 | cmpl $8,%edx |
107 | CFI_ADJUST_CFA_OFFSET 8 | 136 | jb 20f /* less then 8 bytes, go to byte copy loop */ |
108 | CFI_REL_OFFSET rbx, 0 | 137 | ALIGN_DESTINATION |
109 | pushq %rcx | 138 | movl %edx,%ecx |
110 | CFI_ADJUST_CFA_OFFSET 8 | 139 | andl $63,%edx |
111 | CFI_REL_OFFSET rcx, 0 | 140 | shrl $6,%ecx |
112 | xorl %eax,%eax /*zero for the exception handler */ | 141 | jz 17f |
113 | 142 | 1: movq (%rsi),%r8 | |
114 | #ifdef FIX_ALIGNMENT | 143 | 2: movq 1*8(%rsi),%r9 |
115 | /* check for bad alignment of destination */ | 144 | 3: movq 2*8(%rsi),%r10 |
116 | movl %edi,%ecx | 145 | 4: movq 3*8(%rsi),%r11 |
117 | andl $7,%ecx | 146 | 5: movq %r8,(%rdi) |
118 | jnz .Lbad_alignment | 147 | 6: movq %r9,1*8(%rdi) |
119 | .Lafter_bad_alignment: | 148 | 7: movq %r10,2*8(%rdi) |
120 | #endif | 149 | 8: movq %r11,3*8(%rdi) |
121 | 150 | 9: movq 4*8(%rsi),%r8 | |
122 | movq %rdx,%rcx | 151 | 10: movq 5*8(%rsi),%r9 |
123 | 152 | 11: movq 6*8(%rsi),%r10 | |
124 | movl $64,%ebx | 153 | 12: movq 7*8(%rsi),%r11 |
125 | shrq $6,%rdx | 154 | 13: movq %r8,4*8(%rdi) |
126 | decq %rdx | 155 | 14: movq %r9,5*8(%rdi) |
127 | js .Lhandle_tail | 156 | 15: movq %r10,6*8(%rdi) |
128 | 157 | 16: movq %r11,7*8(%rdi) | |
129 | .p2align 4 | ||
130 | .Lloop: | ||
131 | .Ls1: movq (%rsi),%r11 | ||
132 | .Ls2: movq 1*8(%rsi),%r8 | ||
133 | .Ls3: movq 2*8(%rsi),%r9 | ||
134 | .Ls4: movq 3*8(%rsi),%r10 | ||
135 | .Ld1: movq %r11,(%rdi) | ||
136 | .Ld2: movq %r8,1*8(%rdi) | ||
137 | .Ld3: movq %r9,2*8(%rdi) | ||
138 | .Ld4: movq %r10,3*8(%rdi) | ||
139 | |||
140 | .Ls5: movq 4*8(%rsi),%r11 | ||
141 | .Ls6: movq 5*8(%rsi),%r8 | ||
142 | .Ls7: movq 6*8(%rsi),%r9 | ||
143 | .Ls8: movq 7*8(%rsi),%r10 | ||
144 | .Ld5: movq %r11,4*8(%rdi) | ||
145 | .Ld6: movq %r8,5*8(%rdi) | ||
146 | .Ld7: movq %r9,6*8(%rdi) | ||
147 | .Ld8: movq %r10,7*8(%rdi) | ||
148 | |||
149 | decq %rdx | ||
150 | |||
151 | leaq 64(%rsi),%rsi | 158 | leaq 64(%rsi),%rsi |
152 | leaq 64(%rdi),%rdi | 159 | leaq 64(%rdi),%rdi |
153 | |||
154 | jns .Lloop | ||
155 | |||
156 | .p2align 4 | ||
157 | .Lhandle_tail: | ||
158 | movl %ecx,%edx | ||
159 | andl $63,%ecx | ||
160 | shrl $3,%ecx | ||
161 | jz .Lhandle_7 | ||
162 | movl $8,%ebx | ||
163 | .p2align 4 | ||
164 | .Lloop_8: | ||
165 | .Ls9: movq (%rsi),%r8 | ||
166 | .Ld9: movq %r8,(%rdi) | ||
167 | decl %ecx | 160 | decl %ecx |
168 | leaq 8(%rdi),%rdi | 161 | jnz 1b |
162 | 17: movl %edx,%ecx | ||
163 | andl $7,%edx | ||
164 | shrl $3,%ecx | ||
165 | jz 20f | ||
166 | 18: movq (%rsi),%r8 | ||
167 | 19: movq %r8,(%rdi) | ||
169 | leaq 8(%rsi),%rsi | 168 | leaq 8(%rsi),%rsi |
170 | jnz .Lloop_8 | 169 | leaq 8(%rdi),%rdi |
171 | 170 | decl %ecx | |
172 | .Lhandle_7: | 171 | jnz 18b |
172 | 20: andl %edx,%edx | ||
173 | jz 23f | ||
173 | movl %edx,%ecx | 174 | movl %edx,%ecx |
174 | andl $7,%ecx | 175 | 21: movb (%rsi),%al |
175 | jz .Lende | 176 | 22: movb %al,(%rdi) |
176 | .p2align 4 | ||
177 | .Lloop_1: | ||
178 | .Ls10: movb (%rsi),%bl | ||
179 | .Ld10: movb %bl,(%rdi) | ||
180 | incq %rdi | ||
181 | incq %rsi | 177 | incq %rsi |
178 | incq %rdi | ||
182 | decl %ecx | 179 | decl %ecx |
183 | jnz .Lloop_1 | 180 | jnz 21b |
184 | 181 | 23: xor %eax,%eax | |
185 | CFI_REMEMBER_STATE | ||
186 | .Lende: | ||
187 | popq %rcx | ||
188 | CFI_ADJUST_CFA_OFFSET -8 | ||
189 | CFI_RESTORE rcx | ||
190 | popq %rbx | ||
191 | CFI_ADJUST_CFA_OFFSET -8 | ||
192 | CFI_RESTORE rbx | ||
193 | ret | 182 | ret |
194 | CFI_RESTORE_STATE | ||
195 | 183 | ||
196 | #ifdef FIX_ALIGNMENT | 184 | .section .fixup,"ax" |
197 | /* align destination */ | 185 | 30: shll $6,%ecx |
198 | .p2align 4 | 186 | addl %ecx,%edx |
199 | .Lbad_alignment: | 187 | jmp 60f |
200 | movl $8,%r9d | 188 | 40: lea (%rdx,%rcx,8),%rdx |
201 | subl %ecx,%r9d | 189 | jmp 60f |
202 | movl %r9d,%ecx | 190 | 50: movl %ecx,%edx |
203 | cmpq %r9,%rdx | 191 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ |
204 | jz .Lhandle_7 | 192 | .previous |
205 | js .Lhandle_7 | ||
206 | .Lalign_1: | ||
207 | .Ls11: movb (%rsi),%bl | ||
208 | .Ld11: movb %bl,(%rdi) | ||
209 | incq %rsi | ||
210 | incq %rdi | ||
211 | decl %ecx | ||
212 | jnz .Lalign_1 | ||
213 | subq %r9,%rdx | ||
214 | jmp .Lafter_bad_alignment | ||
215 | #endif | ||
216 | 193 | ||
217 | /* table sorted by exception address */ | ||
218 | .section __ex_table,"a" | 194 | .section __ex_table,"a" |
219 | .align 8 | 195 | .align 8 |
220 | .quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */ | 196 | .quad 1b,30b |
221 | .quad .Ls2,.Ls1e | 197 | .quad 2b,30b |
222 | .quad .Ls3,.Ls1e | 198 | .quad 3b,30b |
223 | .quad .Ls4,.Ls1e | 199 | .quad 4b,30b |
224 | .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */ | 200 | .quad 5b,30b |
225 | .quad .Ld2,.Ls2e | 201 | .quad 6b,30b |
226 | .quad .Ld3,.Ls3e | 202 | .quad 7b,30b |
227 | .quad .Ld4,.Ls4e | 203 | .quad 8b,30b |
228 | .quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */ | 204 | .quad 9b,30b |
229 | .quad .Ls6,.Ls5e | 205 | .quad 10b,30b |
230 | .quad .Ls7,.Ls5e | 206 | .quad 11b,30b |
231 | .quad .Ls8,.Ls5e | 207 | .quad 12b,30b |
232 | .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */ | 208 | .quad 13b,30b |
233 | .quad .Ld6,.Ls6e | 209 | .quad 14b,30b |
234 | .quad .Ld7,.Ls7e | 210 | .quad 15b,30b |
235 | .quad .Ld8,.Ls8e | 211 | .quad 16b,30b |
236 | .quad .Ls9,.Le_quad | 212 | .quad 18b,40b |
237 | .quad .Ld9,.Le_quad | 213 | .quad 19b,40b |
238 | .quad .Ls10,.Le_byte | 214 | .quad 21b,50b |
239 | .quad .Ld10,.Le_byte | 215 | .quad 22b,50b |
240 | #ifdef FIX_ALIGNMENT | ||
241 | .quad .Ls11,.Lzero_rest | ||
242 | .quad .Ld11,.Lzero_rest | ||
243 | #endif | ||
244 | .quad .Le5,.Le_zero | ||
245 | .previous | 216 | .previous |
246 | |||
247 | /* eax: zero, ebx: 64 */ | ||
248 | .Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */ | ||
249 | .Ls2e: addl $8,%eax | ||
250 | .Ls3e: addl $8,%eax | ||
251 | .Ls4e: addl $8,%eax | ||
252 | .Ls5e: addl $8,%eax | ||
253 | .Ls6e: addl $8,%eax | ||
254 | .Ls7e: addl $8,%eax | ||
255 | .Ls8e: addl $8,%eax | ||
256 | addq %rbx,%rdi /* +64 */ | ||
257 | subq %rax,%rdi /* correct destination with computed offset */ | ||
258 | |||
259 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
260 | addq %rax,%rdx /* add offset to loopcnt */ | ||
261 | andl $63,%ecx /* remaining bytes */ | ||
262 | addq %rcx,%rdx /* add them */ | ||
263 | jmp .Lzero_rest | ||
264 | |||
265 | /* exception on quad word loop in tail handling */ | ||
266 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
267 | .Le_quad: | ||
268 | shll $3,%ecx | ||
269 | andl $7,%edx | ||
270 | addl %ecx,%edx | ||
271 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
272 | .Lzero_rest: | ||
273 | cmpl $0,(%rsp) | ||
274 | jz .Le_zero | ||
275 | movq %rdx,%rcx | ||
276 | .Le_byte: | ||
277 | xorl %eax,%eax | ||
278 | .Le5: rep | ||
279 | stosb | ||
280 | /* when there is another exception while zeroing the rest just return */ | ||
281 | .Le_zero: | ||
282 | movq %rdx,%rax | ||
283 | jmp .Lende | ||
284 | CFI_ENDPROC | 217 | CFI_ENDPROC |
285 | ENDPROC(copy_user_generic) | 218 | ENDPROC(copy_user_generic_unrolled) |
286 | 219 | ||
287 | 220 | /* Some CPUs run faster using the string copy instructions. | |
288 | /* Some CPUs run faster using the string copy instructions. | 221 | * This is also a lot simpler. Use them when possible. |
289 | This is also a lot simpler. Use them when possible. | 222 | * |
290 | Patch in jmps to this code instead of copying it fully | 223 | * Only 4GB of copy is supported. This shouldn't be a problem |
291 | to avoid unwanted aliasing in the exception tables. */ | 224 | * because the kernel normally only writes from/to page sized chunks |
292 | 225 | * even if user space passed a longer buffer. | |
293 | /* rdi destination | 226 | * And more would be dangerous because both Intel and AMD have |
294 | * rsi source | 227 | * errata with rep movsq > 4GB. If someone feels the need to fix |
295 | * rdx count | 228 | * this please consider this. |
296 | * ecx zero flag | 229 | * |
297 | * | 230 | * Input: |
298 | * Output: | 231 | * rdi destination |
299 | * eax uncopied bytes or 0 if successfull. | 232 | * rsi source |
300 | * | 233 | * rdx count |
301 | * Only 4GB of copy is supported. This shouldn't be a problem | 234 | * |
302 | * because the kernel normally only writes from/to page sized chunks | 235 | * Output: |
303 | * even if user space passed a longer buffer. | 236 | * eax uncopied bytes or 0 if successful. |
304 | * And more would be dangerous because both Intel and AMD have | 237 | */ |
305 | * errata with rep movsq > 4GB. If someone feels the need to fix | ||
306 | * this please consider this. | ||
307 | */ | ||
308 | ENTRY(copy_user_generic_string) | 238 | ENTRY(copy_user_generic_string) |
309 | CFI_STARTPROC | 239 | CFI_STARTPROC |
310 | movl %ecx,%r8d /* save zero flag */ | 240 | andl %edx,%edx |
241 | jz 4f | ||
242 | cmpl $8,%edx | ||
243 | jb 2f /* less than 8 bytes, go to byte copy loop */ | ||
244 | ALIGN_DESTINATION | ||
311 | movl %edx,%ecx | 245 | movl %edx,%ecx |
312 | shrl $3,%ecx | 246 | shrl $3,%ecx |
313 | andl $7,%edx | 247 | andl $7,%edx |
314 | jz 10f | 248 | 1: rep |
315 | 1: rep | ||
316 | movsq | ||
317 | movl %edx,%ecx | ||
318 | 2: rep | ||
319 | movsb | ||
320 | 9: movl %ecx,%eax | ||
321 | ret | ||
322 | |||
323 | /* multiple of 8 byte */ | ||
324 | 10: rep | ||
325 | movsq | 249 | movsq |
326 | xor %eax,%eax | 250 | 2: movl %edx,%ecx |
251 | 3: rep | ||
252 | movsb | ||
253 | 4: xorl %eax,%eax | ||
327 | ret | 254 | ret |
328 | 255 | ||
329 | /* exception handling */ | 256 | .section .fixup,"ax" |
330 | 3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */ | 257 | 11: lea (%rdx,%rcx,8),%rcx |
331 | jmp 6f | 258 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
332 | 5: movl %ecx,%eax /* exception on byte loop */ | 259 | jmp copy_user_handle_tail |
333 | /* eax: left over bytes */ | 260 | .previous |
334 | 6: testl %r8d,%r8d /* zero flag set? */ | ||
335 | jz 7f | ||
336 | movl %eax,%ecx /* initialize x86 loop counter */ | ||
337 | push %rax | ||
338 | xorl %eax,%eax | ||
339 | 8: rep | ||
340 | stosb /* zero the rest */ | ||
341 | 11: pop %rax | ||
342 | 7: ret | ||
343 | CFI_ENDPROC | ||
344 | END(copy_user_generic_c) | ||
345 | 261 | ||
346 | .section __ex_table,"a" | 262 | .section __ex_table,"a" |
347 | .quad 1b,3b | 263 | .align 8 |
348 | .quad 2b,5b | 264 | .quad 1b,11b |
349 | .quad 8b,11b | 265 | .quad 3b,12b |
350 | .quad 10b,3b | ||
351 | .previous | 266 | .previous |
267 | CFI_ENDPROC | ||
268 | ENDPROC(copy_user_generic_string) | ||
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 9d3d1ab83763..40e0e309d27e 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S | |||
@@ -1,4 +1,6 @@ | |||
1 | /* Copyright 2002 Andi Kleen, SuSE Labs. | 1 | /* |
2 | * Copyright 2008 Vitaly Mayatskikh <vmayatsk@redhat.com> | ||
3 | * Copyright 2002 Andi Kleen, SuSE Labs. | ||
2 | * Subject to the GNU Public License v2. | 4 | * Subject to the GNU Public License v2. |
3 | * | 5 | * |
4 | * Functions to copy from and to user space. | 6 | * Functions to copy from and to user space. |
@@ -12,204 +14,125 @@ | |||
12 | #include <asm/current.h> | 14 | #include <asm/current.h> |
13 | #include <asm/asm-offsets.h> | 15 | #include <asm/asm-offsets.h> |
14 | #include <asm/thread_info.h> | 16 | #include <asm/thread_info.h> |
15 | #include <asm/cpufeature.h> | ||
16 | |||
17 | /* | ||
18 | * copy_user_nocache - Uncached memory copy with exception handling | ||
19 | * This will force destination/source out of cache for more performance. | ||
20 | * | ||
21 | * Input: | ||
22 | * rdi destination | ||
23 | * rsi source | ||
24 | * rdx count | ||
25 | * rcx zero flag when 1 zero on exception | ||
26 | * | ||
27 | * Output: | ||
28 | * eax uncopied bytes or 0 if successful. | ||
29 | */ | ||
30 | ENTRY(__copy_user_nocache) | ||
31 | CFI_STARTPROC | ||
32 | pushq %rbx | ||
33 | CFI_ADJUST_CFA_OFFSET 8 | ||
34 | CFI_REL_OFFSET rbx, 0 | ||
35 | pushq %rcx /* save zero flag */ | ||
36 | CFI_ADJUST_CFA_OFFSET 8 | ||
37 | CFI_REL_OFFSET rcx, 0 | ||
38 | |||
39 | xorl %eax,%eax /* zero for the exception handler */ | ||
40 | 17 | ||
18 | .macro ALIGN_DESTINATION | ||
41 | #ifdef FIX_ALIGNMENT | 19 | #ifdef FIX_ALIGNMENT |
42 | /* check for bad alignment of destination */ | 20 | /* check for bad alignment of destination */ |
43 | movl %edi,%ecx | 21 | movl %edi,%ecx |
44 | andl $7,%ecx | 22 | andl $7,%ecx |
45 | jnz .Lbad_alignment | 23 | jz 102f /* already aligned */ |
46 | .Lafter_bad_alignment: | 24 | subl $8,%ecx |
47 | #endif | 25 | negl %ecx |
48 | 26 | subl %ecx,%edx | |
49 | movq %rdx,%rcx | 27 | 100: movb (%rsi),%al |
50 | 28 | 101: movb %al,(%rdi) | |
51 | movl $64,%ebx | 29 | incq %rsi |
52 | shrq $6,%rdx | 30 | incq %rdi |
53 | decq %rdx | 31 | decl %ecx |
54 | js .Lhandle_tail | 32 | jnz 100b |
55 | 33 | 102: | |
56 | .p2align 4 | 34 | .section .fixup,"ax" |
57 | .Lloop: | 35 | 103: addl %r8d,%edx /* ecx is zerorest also */ |
58 | .Ls1: movq (%rsi),%r11 | 36 | jmp copy_user_handle_tail |
59 | .Ls2: movq 1*8(%rsi),%r8 | 37 | .previous |
60 | .Ls3: movq 2*8(%rsi),%r9 | ||
61 | .Ls4: movq 3*8(%rsi),%r10 | ||
62 | .Ld1: movnti %r11,(%rdi) | ||
63 | .Ld2: movnti %r8,1*8(%rdi) | ||
64 | .Ld3: movnti %r9,2*8(%rdi) | ||
65 | .Ld4: movnti %r10,3*8(%rdi) | ||
66 | |||
67 | .Ls5: movq 4*8(%rsi),%r11 | ||
68 | .Ls6: movq 5*8(%rsi),%r8 | ||
69 | .Ls7: movq 6*8(%rsi),%r9 | ||
70 | .Ls8: movq 7*8(%rsi),%r10 | ||
71 | .Ld5: movnti %r11,4*8(%rdi) | ||
72 | .Ld6: movnti %r8,5*8(%rdi) | ||
73 | .Ld7: movnti %r9,6*8(%rdi) | ||
74 | .Ld8: movnti %r10,7*8(%rdi) | ||
75 | 38 | ||
76 | dec %rdx | 39 | .section __ex_table,"a" |
40 | .align 8 | ||
41 | .quad 100b,103b | ||
42 | .quad 101b,103b | ||
43 | .previous | ||
44 | #endif | ||
45 | .endm | ||
77 | 46 | ||
47 | /* | ||
48 | * copy_user_nocache - Uncached memory copy with exception handling | ||
49 | * This will force destination/source out of cache for more performance. | ||
50 | */ | ||
51 | ENTRY(__copy_user_nocache) | ||
52 | CFI_STARTPROC | ||
53 | cmpl $8,%edx | ||
54 | jb 20f /* less then 8 bytes, go to byte copy loop */ | ||
55 | ALIGN_DESTINATION | ||
56 | movl %edx,%ecx | ||
57 | andl $63,%edx | ||
58 | shrl $6,%ecx | ||
59 | jz 17f | ||
60 | 1: movq (%rsi),%r8 | ||
61 | 2: movq 1*8(%rsi),%r9 | ||
62 | 3: movq 2*8(%rsi),%r10 | ||
63 | 4: movq 3*8(%rsi),%r11 | ||
64 | 5: movnti %r8,(%rdi) | ||
65 | 6: movnti %r9,1*8(%rdi) | ||
66 | 7: movnti %r10,2*8(%rdi) | ||
67 | 8: movnti %r11,3*8(%rdi) | ||
68 | 9: movq 4*8(%rsi),%r8 | ||
69 | 10: movq 5*8(%rsi),%r9 | ||
70 | 11: movq 6*8(%rsi),%r10 | ||
71 | 12: movq 7*8(%rsi),%r11 | ||
72 | 13: movnti %r8,4*8(%rdi) | ||
73 | 14: movnti %r9,5*8(%rdi) | ||
74 | 15: movnti %r10,6*8(%rdi) | ||
75 | 16: movnti %r11,7*8(%rdi) | ||
78 | leaq 64(%rsi),%rsi | 76 | leaq 64(%rsi),%rsi |
79 | leaq 64(%rdi),%rdi | 77 | leaq 64(%rdi),%rdi |
80 | |||
81 | jns .Lloop | ||
82 | |||
83 | .p2align 4 | ||
84 | .Lhandle_tail: | ||
85 | movl %ecx,%edx | ||
86 | andl $63,%ecx | ||
87 | shrl $3,%ecx | ||
88 | jz .Lhandle_7 | ||
89 | movl $8,%ebx | ||
90 | .p2align 4 | ||
91 | .Lloop_8: | ||
92 | .Ls9: movq (%rsi),%r8 | ||
93 | .Ld9: movnti %r8,(%rdi) | ||
94 | decl %ecx | 78 | decl %ecx |
95 | leaq 8(%rdi),%rdi | 79 | jnz 1b |
80 | 17: movl %edx,%ecx | ||
81 | andl $7,%edx | ||
82 | shrl $3,%ecx | ||
83 | jz 20f | ||
84 | 18: movq (%rsi),%r8 | ||
85 | 19: movnti %r8,(%rdi) | ||
96 | leaq 8(%rsi),%rsi | 86 | leaq 8(%rsi),%rsi |
97 | jnz .Lloop_8 | 87 | leaq 8(%rdi),%rdi |
98 | 88 | decl %ecx | |
99 | .Lhandle_7: | 89 | jnz 18b |
90 | 20: andl %edx,%edx | ||
91 | jz 23f | ||
100 | movl %edx,%ecx | 92 | movl %edx,%ecx |
101 | andl $7,%ecx | 93 | 21: movb (%rsi),%al |
102 | jz .Lende | 94 | 22: movb %al,(%rdi) |
103 | .p2align 4 | ||
104 | .Lloop_1: | ||
105 | .Ls10: movb (%rsi),%bl | ||
106 | .Ld10: movb %bl,(%rdi) | ||
107 | incq %rdi | ||
108 | incq %rsi | 95 | incq %rsi |
96 | incq %rdi | ||
109 | decl %ecx | 97 | decl %ecx |
110 | jnz .Lloop_1 | 98 | jnz 21b |
111 | 99 | 23: xorl %eax,%eax | |
112 | CFI_REMEMBER_STATE | ||
113 | .Lende: | ||
114 | popq %rcx | ||
115 | CFI_ADJUST_CFA_OFFSET -8 | ||
116 | CFI_RESTORE %rcx | ||
117 | popq %rbx | ||
118 | CFI_ADJUST_CFA_OFFSET -8 | ||
119 | CFI_RESTORE rbx | ||
120 | sfence | 100 | sfence |
121 | ret | 101 | ret |
122 | CFI_RESTORE_STATE | ||
123 | 102 | ||
124 | #ifdef FIX_ALIGNMENT | 103 | .section .fixup,"ax" |
125 | /* align destination */ | 104 | 30: shll $6,%ecx |
126 | .p2align 4 | 105 | addl %ecx,%edx |
127 | .Lbad_alignment: | 106 | jmp 60f |
128 | movl $8,%r9d | 107 | 40: lea (%rdx,%rcx,8),%rdx |
129 | subl %ecx,%r9d | 108 | jmp 60f |
130 | movl %r9d,%ecx | 109 | 50: movl %ecx,%edx |
131 | cmpq %r9,%rdx | 110 | 60: sfence |
132 | jz .Lhandle_7 | 111 | movl %r8d,%ecx |
133 | js .Lhandle_7 | 112 | jmp copy_user_handle_tail |
134 | .Lalign_1: | 113 | .previous |
135 | .Ls11: movb (%rsi),%bl | ||
136 | .Ld11: movb %bl,(%rdi) | ||
137 | incq %rsi | ||
138 | incq %rdi | ||
139 | decl %ecx | ||
140 | jnz .Lalign_1 | ||
141 | subq %r9,%rdx | ||
142 | jmp .Lafter_bad_alignment | ||
143 | #endif | ||
144 | 114 | ||
145 | /* table sorted by exception address */ | ||
146 | .section __ex_table,"a" | 115 | .section __ex_table,"a" |
147 | .align 8 | 116 | .quad 1b,30b |
148 | .quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */ | 117 | .quad 2b,30b |
149 | .quad .Ls2,.Ls1e | 118 | .quad 3b,30b |
150 | .quad .Ls3,.Ls1e | 119 | .quad 4b,30b |
151 | .quad .Ls4,.Ls1e | 120 | .quad 5b,30b |
152 | .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */ | 121 | .quad 6b,30b |
153 | .quad .Ld2,.Ls2e | 122 | .quad 7b,30b |
154 | .quad .Ld3,.Ls3e | 123 | .quad 8b,30b |
155 | .quad .Ld4,.Ls4e | 124 | .quad 9b,30b |
156 | .quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */ | 125 | .quad 10b,30b |
157 | .quad .Ls6,.Ls5e | 126 | .quad 11b,30b |
158 | .quad .Ls7,.Ls5e | 127 | .quad 12b,30b |
159 | .quad .Ls8,.Ls5e | 128 | .quad 13b,30b |
160 | .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */ | 129 | .quad 14b,30b |
161 | .quad .Ld6,.Ls6e | 130 | .quad 15b,30b |
162 | .quad .Ld7,.Ls7e | 131 | .quad 16b,30b |
163 | .quad .Ld8,.Ls8e | 132 | .quad 18b,40b |
164 | .quad .Ls9,.Le_quad | 133 | .quad 19b,40b |
165 | .quad .Ld9,.Le_quad | 134 | .quad 21b,50b |
166 | .quad .Ls10,.Le_byte | 135 | .quad 22b,50b |
167 | .quad .Ld10,.Le_byte | ||
168 | #ifdef FIX_ALIGNMENT | ||
169 | .quad .Ls11,.Lzero_rest | ||
170 | .quad .Ld11,.Lzero_rest | ||
171 | #endif | ||
172 | .quad .Le5,.Le_zero | ||
173 | .previous | 136 | .previous |
174 | |||
175 | /* eax: zero, ebx: 64 */ | ||
176 | .Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */ | ||
177 | .Ls2e: addl $8,%eax | ||
178 | .Ls3e: addl $8,%eax | ||
179 | .Ls4e: addl $8,%eax | ||
180 | .Ls5e: addl $8,%eax | ||
181 | .Ls6e: addl $8,%eax | ||
182 | .Ls7e: addl $8,%eax | ||
183 | .Ls8e: addl $8,%eax | ||
184 | addq %rbx,%rdi /* +64 */ | ||
185 | subq %rax,%rdi /* correct destination with computed offset */ | ||
186 | |||
187 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
188 | addq %rax,%rdx /* add offset to loopcnt */ | ||
189 | andl $63,%ecx /* remaining bytes */ | ||
190 | addq %rcx,%rdx /* add them */ | ||
191 | jmp .Lzero_rest | ||
192 | |||
193 | /* exception on quad word loop in tail handling */ | ||
194 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
195 | .Le_quad: | ||
196 | shll $3,%ecx | ||
197 | andl $7,%edx | ||
198 | addl %ecx,%edx | ||
199 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
200 | .Lzero_rest: | ||
201 | cmpl $0,(%rsp) /* zero flag set? */ | ||
202 | jz .Le_zero | ||
203 | movq %rdx,%rcx | ||
204 | .Le_byte: | ||
205 | xorl %eax,%eax | ||
206 | .Le5: rep | ||
207 | stosb | ||
208 | /* when there is another exception while zeroing the rest just return */ | ||
209 | .Le_zero: | ||
210 | movq %rdx,%rax | ||
211 | jmp .Lende | ||
212 | CFI_ENDPROC | 137 | CFI_ENDPROC |
213 | ENDPROC(__copy_user_nocache) | 138 | ENDPROC(__copy_user_nocache) |
214 | |||
215 | |||
diff --git a/arch/x86/lib/delay_32.c b/arch/x86/lib/delay.c index ef691316f8b6..f4568605d7d5 100644 --- a/arch/x86/lib/delay_32.c +++ b/arch/x86/lib/delay.c | |||
@@ -29,7 +29,7 @@ | |||
29 | /* simple loop based delay: */ | 29 | /* simple loop based delay: */ |
30 | static void delay_loop(unsigned long loops) | 30 | static void delay_loop(unsigned long loops) |
31 | { | 31 | { |
32 | __asm__ __volatile__( | 32 | asm volatile( |
33 | " test %0,%0 \n" | 33 | " test %0,%0 \n" |
34 | " jz 3f \n" | 34 | " jz 3f \n" |
35 | " jmp 1f \n" | 35 | " jmp 1f \n" |
@@ -38,9 +38,9 @@ static void delay_loop(unsigned long loops) | |||
38 | "1: jmp 2f \n" | 38 | "1: jmp 2f \n" |
39 | 39 | ||
40 | ".align 16 \n" | 40 | ".align 16 \n" |
41 | "2: decl %0 \n" | 41 | "2: dec %0 \n" |
42 | " jnz 2b \n" | 42 | " jnz 2b \n" |
43 | "3: decl %0 \n" | 43 | "3: dec %0 \n" |
44 | 44 | ||
45 | : /* we don't need output */ | 45 | : /* we don't need output */ |
46 | :"a" (loops) | 46 | :"a" (loops) |
@@ -98,7 +98,7 @@ void use_tsc_delay(void) | |||
98 | int __devinit read_current_timer(unsigned long *timer_val) | 98 | int __devinit read_current_timer(unsigned long *timer_val) |
99 | { | 99 | { |
100 | if (delay_fn == delay_tsc) { | 100 | if (delay_fn == delay_tsc) { |
101 | rdtscl(*timer_val); | 101 | rdtscll(*timer_val); |
102 | return 0; | 102 | return 0; |
103 | } | 103 | } |
104 | return -1; | 104 | return -1; |
@@ -108,31 +108,30 @@ void __delay(unsigned long loops) | |||
108 | { | 108 | { |
109 | delay_fn(loops); | 109 | delay_fn(loops); |
110 | } | 110 | } |
111 | EXPORT_SYMBOL(__delay); | ||
111 | 112 | ||
112 | inline void __const_udelay(unsigned long xloops) | 113 | inline void __const_udelay(unsigned long xloops) |
113 | { | 114 | { |
114 | int d0; | 115 | int d0; |
115 | 116 | ||
116 | xloops *= 4; | 117 | xloops *= 4; |
117 | __asm__("mull %0" | 118 | asm("mull %%edx" |
118 | :"=d" (xloops), "=&a" (d0) | 119 | :"=d" (xloops), "=&a" (d0) |
119 | :"1" (xloops), "0" | 120 | :"1" (xloops), "0" |
120 | (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); | 121 | (cpu_data(raw_smp_processor_id()).loops_per_jiffy * (HZ/4))); |
121 | 122 | ||
122 | __delay(++xloops); | 123 | __delay(++xloops); |
123 | } | 124 | } |
125 | EXPORT_SYMBOL(__const_udelay); | ||
124 | 126 | ||
125 | void __udelay(unsigned long usecs) | 127 | void __udelay(unsigned long usecs) |
126 | { | 128 | { |
127 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | 129 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ |
128 | } | 130 | } |
131 | EXPORT_SYMBOL(__udelay); | ||
129 | 132 | ||
130 | void __ndelay(unsigned long nsecs) | 133 | void __ndelay(unsigned long nsecs) |
131 | { | 134 | { |
132 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | 135 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ |
133 | } | 136 | } |
134 | |||
135 | EXPORT_SYMBOL(__delay); | ||
136 | EXPORT_SYMBOL(__const_udelay); | ||
137 | EXPORT_SYMBOL(__udelay); | ||
138 | EXPORT_SYMBOL(__ndelay); | 137 | EXPORT_SYMBOL(__ndelay); |
diff --git a/arch/x86/lib/delay_64.c b/arch/x86/lib/delay_64.c deleted file mode 100644 index 4c441be92641..000000000000 --- a/arch/x86/lib/delay_64.c +++ /dev/null | |||
@@ -1,85 +0,0 @@ | |||
1 | /* | ||
2 | * Precise Delay Loops for x86-64 | ||
3 | * | ||
4 | * Copyright (C) 1993 Linus Torvalds | ||
5 | * Copyright (C) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz> | ||
6 | * | ||
7 | * The __delay function must _NOT_ be inlined as its execution time | ||
8 | * depends wildly on alignment on many x86 processors. | ||
9 | */ | ||
10 | |||
11 | #include <linux/module.h> | ||
12 | #include <linux/sched.h> | ||
13 | #include <linux/timex.h> | ||
14 | #include <linux/preempt.h> | ||
15 | #include <linux/delay.h> | ||
16 | #include <linux/init.h> | ||
17 | |||
18 | #include <asm/delay.h> | ||
19 | #include <asm/msr.h> | ||
20 | |||
21 | #ifdef CONFIG_SMP | ||
22 | #include <asm/smp.h> | ||
23 | #endif | ||
24 | |||
25 | int __devinit read_current_timer(unsigned long *timer_value) | ||
26 | { | ||
27 | rdtscll(*timer_value); | ||
28 | return 0; | ||
29 | } | ||
30 | |||
31 | void __delay(unsigned long loops) | ||
32 | { | ||
33 | unsigned bclock, now; | ||
34 | int cpu; | ||
35 | |||
36 | preempt_disable(); | ||
37 | cpu = smp_processor_id(); | ||
38 | rdtscl(bclock); | ||
39 | for (;;) { | ||
40 | rdtscl(now); | ||
41 | if ((now - bclock) >= loops) | ||
42 | break; | ||
43 | |||
44 | /* Allow RT tasks to run */ | ||
45 | preempt_enable(); | ||
46 | rep_nop(); | ||
47 | preempt_disable(); | ||
48 | |||
49 | /* | ||
50 | * It is possible that we moved to another CPU, and | ||
51 | * since TSC's are per-cpu we need to calculate | ||
52 | * that. The delay must guarantee that we wait "at | ||
53 | * least" the amount of time. Being moved to another | ||
54 | * CPU could make the wait longer but we just need to | ||
55 | * make sure we waited long enough. Rebalance the | ||
56 | * counter for this CPU. | ||
57 | */ | ||
58 | if (unlikely(cpu != smp_processor_id())) { | ||
59 | loops -= (now - bclock); | ||
60 | cpu = smp_processor_id(); | ||
61 | rdtscl(bclock); | ||
62 | } | ||
63 | } | ||
64 | preempt_enable(); | ||
65 | } | ||
66 | EXPORT_SYMBOL(__delay); | ||
67 | |||
68 | inline void __const_udelay(unsigned long xloops) | ||
69 | { | ||
70 | __delay(((xloops * HZ * | ||
71 | cpu_data(raw_smp_processor_id()).loops_per_jiffy) >> 32) + 1); | ||
72 | } | ||
73 | EXPORT_SYMBOL(__const_udelay); | ||
74 | |||
75 | void __udelay(unsigned long usecs) | ||
76 | { | ||
77 | __const_udelay(usecs * 0x000010c7); /* 2**32 / 1000000 (rounded up) */ | ||
78 | } | ||
79 | EXPORT_SYMBOL(__udelay); | ||
80 | |||
81 | void __ndelay(unsigned long nsecs) | ||
82 | { | ||
83 | __const_udelay(nsecs * 0x00005); /* 2**32 / 1000000000 (rounded up) */ | ||
84 | } | ||
85 | EXPORT_SYMBOL(__ndelay); | ||
diff --git a/arch/x86/lib/getuser_64.S b/arch/x86/lib/getuser.S index 5448876261f8..ad374003742f 100644 --- a/arch/x86/lib/getuser_64.S +++ b/arch/x86/lib/getuser.S | |||
@@ -3,6 +3,7 @@ | |||
3 | * | 3 | * |
4 | * (C) Copyright 1998 Linus Torvalds | 4 | * (C) Copyright 1998 Linus Torvalds |
5 | * (C) Copyright 2005 Andi Kleen | 5 | * (C) Copyright 2005 Andi Kleen |
6 | * (C) Copyright 2008 Glauber Costa | ||
6 | * | 7 | * |
7 | * These functions have a non-standard call interface | 8 | * These functions have a non-standard call interface |
8 | * to make them more efficient, especially as they | 9 | * to make them more efficient, especially as they |
@@ -13,14 +14,13 @@ | |||
13 | /* | 14 | /* |
14 | * __get_user_X | 15 | * __get_user_X |
15 | * | 16 | * |
16 | * Inputs: %rcx contains the address. | 17 | * Inputs: %[r|e]ax contains the address. |
17 | * The register is modified, but all changes are undone | 18 | * The register is modified, but all changes are undone |
18 | * before returning because the C code doesn't know about it. | 19 | * before returning because the C code doesn't know about it. |
19 | * | 20 | * |
20 | * Outputs: %rax is error code (0 or -EFAULT) | 21 | * Outputs: %[r|e]ax is error code (0 or -EFAULT) |
21 | * %rdx contains zero-extended value | 22 | * %[r|e]dx contains zero-extended value |
22 | * | 23 | * |
23 | * %r8 is destroyed. | ||
24 | * | 24 | * |
25 | * These functions should not modify any other registers, | 25 | * These functions should not modify any other registers, |
26 | * as they get called from within inline assembly. | 26 | * as they get called from within inline assembly. |
@@ -32,78 +32,73 @@ | |||
32 | #include <asm/errno.h> | 32 | #include <asm/errno.h> |
33 | #include <asm/asm-offsets.h> | 33 | #include <asm/asm-offsets.h> |
34 | #include <asm/thread_info.h> | 34 | #include <asm/thread_info.h> |
35 | #include <asm/asm.h> | ||
35 | 36 | ||
36 | .text | 37 | .text |
37 | ENTRY(__get_user_1) | 38 | ENTRY(__get_user_1) |
38 | CFI_STARTPROC | 39 | CFI_STARTPROC |
39 | GET_THREAD_INFO(%r8) | 40 | GET_THREAD_INFO(%_ASM_DX) |
40 | cmpq threadinfo_addr_limit(%r8),%rcx | 41 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
41 | jae bad_get_user | 42 | jae bad_get_user |
42 | 1: movzb (%rcx),%edx | 43 | 1: movzb (%_ASM_AX),%edx |
43 | xorl %eax,%eax | 44 | xor %eax,%eax |
44 | ret | 45 | ret |
45 | CFI_ENDPROC | 46 | CFI_ENDPROC |
46 | ENDPROC(__get_user_1) | 47 | ENDPROC(__get_user_1) |
47 | 48 | ||
48 | ENTRY(__get_user_2) | 49 | ENTRY(__get_user_2) |
49 | CFI_STARTPROC | 50 | CFI_STARTPROC |
50 | GET_THREAD_INFO(%r8) | 51 | add $1,%_ASM_AX |
51 | addq $1,%rcx | 52 | jc bad_get_user |
52 | jc 20f | 53 | GET_THREAD_INFO(%_ASM_DX) |
53 | cmpq threadinfo_addr_limit(%r8),%rcx | 54 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
54 | jae 20f | 55 | jae bad_get_user |
55 | decq %rcx | 56 | 2: movzwl -1(%_ASM_AX),%edx |
56 | 2: movzwl (%rcx),%edx | 57 | xor %eax,%eax |
57 | xorl %eax,%eax | ||
58 | ret | 58 | ret |
59 | 20: decq %rcx | ||
60 | jmp bad_get_user | ||
61 | CFI_ENDPROC | 59 | CFI_ENDPROC |
62 | ENDPROC(__get_user_2) | 60 | ENDPROC(__get_user_2) |
63 | 61 | ||
64 | ENTRY(__get_user_4) | 62 | ENTRY(__get_user_4) |
65 | CFI_STARTPROC | 63 | CFI_STARTPROC |
66 | GET_THREAD_INFO(%r8) | 64 | add $3,%_ASM_AX |
67 | addq $3,%rcx | 65 | jc bad_get_user |
68 | jc 30f | 66 | GET_THREAD_INFO(%_ASM_DX) |
69 | cmpq threadinfo_addr_limit(%r8),%rcx | 67 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
70 | jae 30f | 68 | jae bad_get_user |
71 | subq $3,%rcx | 69 | 3: mov -3(%_ASM_AX),%edx |
72 | 3: movl (%rcx),%edx | 70 | xor %eax,%eax |
73 | xorl %eax,%eax | ||
74 | ret | 71 | ret |
75 | 30: subq $3,%rcx | ||
76 | jmp bad_get_user | ||
77 | CFI_ENDPROC | 72 | CFI_ENDPROC |
78 | ENDPROC(__get_user_4) | 73 | ENDPROC(__get_user_4) |
79 | 74 | ||
75 | #ifdef CONFIG_X86_64 | ||
80 | ENTRY(__get_user_8) | 76 | ENTRY(__get_user_8) |
81 | CFI_STARTPROC | 77 | CFI_STARTPROC |
82 | GET_THREAD_INFO(%r8) | 78 | add $7,%_ASM_AX |
83 | addq $7,%rcx | 79 | jc bad_get_user |
84 | jc 40f | 80 | GET_THREAD_INFO(%_ASM_DX) |
85 | cmpq threadinfo_addr_limit(%r8),%rcx | 81 | cmp TI_addr_limit(%_ASM_DX),%_ASM_AX |
86 | jae 40f | 82 | jae bad_get_user |
87 | subq $7,%rcx | 83 | 4: movq -7(%_ASM_AX),%_ASM_DX |
88 | 4: movq (%rcx),%rdx | 84 | xor %eax,%eax |
89 | xorl %eax,%eax | ||
90 | ret | 85 | ret |
91 | 40: subq $7,%rcx | ||
92 | jmp bad_get_user | ||
93 | CFI_ENDPROC | 86 | CFI_ENDPROC |
94 | ENDPROC(__get_user_8) | 87 | ENDPROC(__get_user_8) |
88 | #endif | ||
95 | 89 | ||
96 | bad_get_user: | 90 | bad_get_user: |
97 | CFI_STARTPROC | 91 | CFI_STARTPROC |
98 | xorl %edx,%edx | 92 | xor %edx,%edx |
99 | movq $(-EFAULT),%rax | 93 | mov $(-EFAULT),%_ASM_AX |
100 | ret | 94 | ret |
101 | CFI_ENDPROC | 95 | CFI_ENDPROC |
102 | END(bad_get_user) | 96 | END(bad_get_user) |
103 | 97 | ||
104 | .section __ex_table,"a" | 98 | .section __ex_table,"a" |
105 | .quad 1b,bad_get_user | 99 | _ASM_PTR 1b,bad_get_user |
106 | .quad 2b,bad_get_user | 100 | _ASM_PTR 2b,bad_get_user |
107 | .quad 3b,bad_get_user | 101 | _ASM_PTR 3b,bad_get_user |
108 | .quad 4b,bad_get_user | 102 | #ifdef CONFIG_X86_64 |
109 | .previous | 103 | _ASM_PTR 4b,bad_get_user |
104 | #endif | ||
diff --git a/arch/x86/lib/getuser_32.S b/arch/x86/lib/getuser_32.S deleted file mode 100644 index 6d84b53f12a2..000000000000 --- a/arch/x86/lib/getuser_32.S +++ /dev/null | |||
@@ -1,78 +0,0 @@ | |||
1 | /* | ||
2 | * __get_user functions. | ||
3 | * | ||
4 | * (C) Copyright 1998 Linus Torvalds | ||
5 | * | ||
6 | * These functions have a non-standard call interface | ||
7 | * to make them more efficient, especially as they | ||
8 | * return an error value in addition to the "real" | ||
9 | * return value. | ||
10 | */ | ||
11 | #include <linux/linkage.h> | ||
12 | #include <asm/dwarf2.h> | ||
13 | #include <asm/thread_info.h> | ||
14 | |||
15 | |||
16 | /* | ||
17 | * __get_user_X | ||
18 | * | ||
19 | * Inputs: %eax contains the address | ||
20 | * | ||
21 | * Outputs: %eax is error code (0 or -EFAULT) | ||
22 | * %edx contains zero-extended value | ||
23 | * | ||
24 | * These functions should not modify any other registers, | ||
25 | * as they get called from within inline assembly. | ||
26 | */ | ||
27 | |||
28 | .text | ||
29 | ENTRY(__get_user_1) | ||
30 | CFI_STARTPROC | ||
31 | GET_THREAD_INFO(%edx) | ||
32 | cmpl TI_addr_limit(%edx),%eax | ||
33 | jae bad_get_user | ||
34 | 1: movzbl (%eax),%edx | ||
35 | xorl %eax,%eax | ||
36 | ret | ||
37 | CFI_ENDPROC | ||
38 | ENDPROC(__get_user_1) | ||
39 | |||
40 | ENTRY(__get_user_2) | ||
41 | CFI_STARTPROC | ||
42 | addl $1,%eax | ||
43 | jc bad_get_user | ||
44 | GET_THREAD_INFO(%edx) | ||
45 | cmpl TI_addr_limit(%edx),%eax | ||
46 | jae bad_get_user | ||
47 | 2: movzwl -1(%eax),%edx | ||
48 | xorl %eax,%eax | ||
49 | ret | ||
50 | CFI_ENDPROC | ||
51 | ENDPROC(__get_user_2) | ||
52 | |||
53 | ENTRY(__get_user_4) | ||
54 | CFI_STARTPROC | ||
55 | addl $3,%eax | ||
56 | jc bad_get_user | ||
57 | GET_THREAD_INFO(%edx) | ||
58 | cmpl TI_addr_limit(%edx),%eax | ||
59 | jae bad_get_user | ||
60 | 3: movl -3(%eax),%edx | ||
61 | xorl %eax,%eax | ||
62 | ret | ||
63 | CFI_ENDPROC | ||
64 | ENDPROC(__get_user_4) | ||
65 | |||
66 | bad_get_user: | ||
67 | CFI_STARTPROC | ||
68 | xorl %edx,%edx | ||
69 | movl $-14,%eax | ||
70 | ret | ||
71 | CFI_ENDPROC | ||
72 | END(bad_get_user) | ||
73 | |||
74 | .section __ex_table,"a" | ||
75 | .long 1b,bad_get_user | ||
76 | .long 2b,bad_get_user | ||
77 | .long 3b,bad_get_user | ||
78 | .previous | ||
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c index 57d043fa893e..d5a2b39f882b 100644 --- a/arch/x86/lib/msr-on-cpu.c +++ b/arch/x86/lib/msr-on-cpu.c | |||
@@ -30,10 +30,10 @@ static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) | |||
30 | 30 | ||
31 | rv.msr_no = msr_no; | 31 | rv.msr_no = msr_no; |
32 | if (safe) { | 32 | if (safe) { |
33 | smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 0, 1); | 33 | smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); |
34 | err = rv.err; | 34 | err = rv.err; |
35 | } else { | 35 | } else { |
36 | smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 0, 1); | 36 | smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); |
37 | } | 37 | } |
38 | *l = rv.l; | 38 | *l = rv.l; |
39 | *h = rv.h; | 39 | *h = rv.h; |
@@ -64,10 +64,10 @@ static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) | |||
64 | rv.l = l; | 64 | rv.l = l; |
65 | rv.h = h; | 65 | rv.h = h; |
66 | if (safe) { | 66 | if (safe) { |
67 | smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 0, 1); | 67 | smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); |
68 | err = rv.err; | 68 | err = rv.err; |
69 | } else { | 69 | } else { |
70 | smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 0, 1); | 70 | smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); |
71 | } | 71 | } |
72 | 72 | ||
73 | return err; | 73 | return err; |
diff --git a/arch/x86/lib/putuser_32.S b/arch/x86/lib/putuser.S index f58fba109d18..36b0d15ae6e9 100644 --- a/arch/x86/lib/putuser_32.S +++ b/arch/x86/lib/putuser.S | |||
@@ -2,6 +2,8 @@ | |||
2 | * __put_user functions. | 2 | * __put_user functions. |
3 | * | 3 | * |
4 | * (C) Copyright 2005 Linus Torvalds | 4 | * (C) Copyright 2005 Linus Torvalds |
5 | * (C) Copyright 2005 Andi Kleen | ||
6 | * (C) Copyright 2008 Glauber Costa | ||
5 | * | 7 | * |
6 | * These functions have a non-standard call interface | 8 | * These functions have a non-standard call interface |
7 | * to make them more efficient, especially as they | 9 | * to make them more efficient, especially as they |
@@ -11,6 +13,8 @@ | |||
11 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
12 | #include <asm/dwarf2.h> | 14 | #include <asm/dwarf2.h> |
13 | #include <asm/thread_info.h> | 15 | #include <asm/thread_info.h> |
16 | #include <asm/errno.h> | ||
17 | #include <asm/asm.h> | ||
14 | 18 | ||
15 | 19 | ||
16 | /* | 20 | /* |
@@ -26,73 +30,68 @@ | |||
26 | */ | 30 | */ |
27 | 31 | ||
28 | #define ENTER CFI_STARTPROC ; \ | 32 | #define ENTER CFI_STARTPROC ; \ |
29 | pushl %ebx ; \ | 33 | GET_THREAD_INFO(%_ASM_BX) |
30 | CFI_ADJUST_CFA_OFFSET 4 ; \ | 34 | #define EXIT ret ; \ |
31 | CFI_REL_OFFSET ebx, 0 ; \ | ||
32 | GET_THREAD_INFO(%ebx) | ||
33 | #define EXIT popl %ebx ; \ | ||
34 | CFI_ADJUST_CFA_OFFSET -4 ; \ | ||
35 | CFI_RESTORE ebx ; \ | ||
36 | ret ; \ | ||
37 | CFI_ENDPROC | 35 | CFI_ENDPROC |
38 | 36 | ||
39 | .text | 37 | .text |
40 | ENTRY(__put_user_1) | 38 | ENTRY(__put_user_1) |
41 | ENTER | 39 | ENTER |
42 | cmpl TI_addr_limit(%ebx),%ecx | 40 | cmp TI_addr_limit(%_ASM_BX),%_ASM_CX |
43 | jae bad_put_user | 41 | jae bad_put_user |
44 | 1: movb %al,(%ecx) | 42 | 1: movb %al,(%_ASM_CX) |
45 | xorl %eax,%eax | 43 | xor %eax,%eax |
46 | EXIT | 44 | EXIT |
47 | ENDPROC(__put_user_1) | 45 | ENDPROC(__put_user_1) |
48 | 46 | ||
49 | ENTRY(__put_user_2) | 47 | ENTRY(__put_user_2) |
50 | ENTER | 48 | ENTER |
51 | movl TI_addr_limit(%ebx),%ebx | 49 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX |
52 | subl $1,%ebx | 50 | sub $1,%_ASM_BX |
53 | cmpl %ebx,%ecx | 51 | cmp %_ASM_BX,%_ASM_CX |
54 | jae bad_put_user | 52 | jae bad_put_user |
55 | 2: movw %ax,(%ecx) | 53 | 2: movw %ax,(%_ASM_CX) |
56 | xorl %eax,%eax | 54 | xor %eax,%eax |
57 | EXIT | 55 | EXIT |
58 | ENDPROC(__put_user_2) | 56 | ENDPROC(__put_user_2) |
59 | 57 | ||
60 | ENTRY(__put_user_4) | 58 | ENTRY(__put_user_4) |
61 | ENTER | 59 | ENTER |
62 | movl TI_addr_limit(%ebx),%ebx | 60 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX |
63 | subl $3,%ebx | 61 | sub $3,%_ASM_BX |
64 | cmpl %ebx,%ecx | 62 | cmp %_ASM_BX,%_ASM_CX |
65 | jae bad_put_user | 63 | jae bad_put_user |
66 | 3: movl %eax,(%ecx) | 64 | 3: movl %eax,(%_ASM_CX) |
67 | xorl %eax,%eax | 65 | xor %eax,%eax |
68 | EXIT | 66 | EXIT |
69 | ENDPROC(__put_user_4) | 67 | ENDPROC(__put_user_4) |
70 | 68 | ||
71 | ENTRY(__put_user_8) | 69 | ENTRY(__put_user_8) |
72 | ENTER | 70 | ENTER |
73 | movl TI_addr_limit(%ebx),%ebx | 71 | mov TI_addr_limit(%_ASM_BX),%_ASM_BX |
74 | subl $7,%ebx | 72 | sub $7,%_ASM_BX |
75 | cmpl %ebx,%ecx | 73 | cmp %_ASM_BX,%_ASM_CX |
76 | jae bad_put_user | 74 | jae bad_put_user |
77 | 4: movl %eax,(%ecx) | 75 | 4: mov %_ASM_AX,(%_ASM_CX) |
78 | 5: movl %edx,4(%ecx) | 76 | #ifdef CONFIG_X86_32 |
79 | xorl %eax,%eax | 77 | 5: movl %edx,4(%_ASM_CX) |
78 | #endif | ||
79 | xor %eax,%eax | ||
80 | EXIT | 80 | EXIT |
81 | ENDPROC(__put_user_8) | 81 | ENDPROC(__put_user_8) |
82 | 82 | ||
83 | bad_put_user: | 83 | bad_put_user: |
84 | CFI_STARTPROC simple | 84 | CFI_STARTPROC |
85 | CFI_DEF_CFA esp, 2*4 | 85 | movl $-EFAULT,%eax |
86 | CFI_OFFSET eip, -1*4 | ||
87 | CFI_OFFSET ebx, -2*4 | ||
88 | movl $-14,%eax | ||
89 | EXIT | 86 | EXIT |
90 | END(bad_put_user) | 87 | END(bad_put_user) |
91 | 88 | ||
92 | .section __ex_table,"a" | 89 | .section __ex_table,"a" |
93 | .long 1b,bad_put_user | 90 | _ASM_PTR 1b,bad_put_user |
94 | .long 2b,bad_put_user | 91 | _ASM_PTR 2b,bad_put_user |
95 | .long 3b,bad_put_user | 92 | _ASM_PTR 3b,bad_put_user |
96 | .long 4b,bad_put_user | 93 | _ASM_PTR 4b,bad_put_user |
97 | .long 5b,bad_put_user | 94 | #ifdef CONFIG_X86_32 |
95 | _ASM_PTR 5b,bad_put_user | ||
96 | #endif | ||
98 | .previous | 97 | .previous |
diff --git a/arch/x86/lib/putuser_64.S b/arch/x86/lib/putuser_64.S deleted file mode 100644 index 4989f5a8fa9b..000000000000 --- a/arch/x86/lib/putuser_64.S +++ /dev/null | |||
@@ -1,106 +0,0 @@ | |||
1 | /* | ||
2 | * __put_user functions. | ||
3 | * | ||
4 | * (C) Copyright 1998 Linus Torvalds | ||
5 | * (C) Copyright 2005 Andi Kleen | ||
6 | * | ||
7 | * These functions have a non-standard call interface | ||
8 | * to make them more efficient, especially as they | ||
9 | * return an error value in addition to the "real" | ||
10 | * return value. | ||
11 | */ | ||
12 | |||
13 | /* | ||
14 | * __put_user_X | ||
15 | * | ||
16 | * Inputs: %rcx contains the address | ||
17 | * %rdx contains new value | ||
18 | * | ||
19 | * Outputs: %rax is error code (0 or -EFAULT) | ||
20 | * | ||
21 | * %r8 is destroyed. | ||
22 | * | ||
23 | * These functions should not modify any other registers, | ||
24 | * as they get called from within inline assembly. | ||
25 | */ | ||
26 | |||
27 | #include <linux/linkage.h> | ||
28 | #include <asm/dwarf2.h> | ||
29 | #include <asm/page.h> | ||
30 | #include <asm/errno.h> | ||
31 | #include <asm/asm-offsets.h> | ||
32 | #include <asm/thread_info.h> | ||
33 | |||
34 | .text | ||
35 | ENTRY(__put_user_1) | ||
36 | CFI_STARTPROC | ||
37 | GET_THREAD_INFO(%r8) | ||
38 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
39 | jae bad_put_user | ||
40 | 1: movb %dl,(%rcx) | ||
41 | xorl %eax,%eax | ||
42 | ret | ||
43 | CFI_ENDPROC | ||
44 | ENDPROC(__put_user_1) | ||
45 | |||
46 | ENTRY(__put_user_2) | ||
47 | CFI_STARTPROC | ||
48 | GET_THREAD_INFO(%r8) | ||
49 | addq $1,%rcx | ||
50 | jc 20f | ||
51 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
52 | jae 20f | ||
53 | decq %rcx | ||
54 | 2: movw %dx,(%rcx) | ||
55 | xorl %eax,%eax | ||
56 | ret | ||
57 | 20: decq %rcx | ||
58 | jmp bad_put_user | ||
59 | CFI_ENDPROC | ||
60 | ENDPROC(__put_user_2) | ||
61 | |||
62 | ENTRY(__put_user_4) | ||
63 | CFI_STARTPROC | ||
64 | GET_THREAD_INFO(%r8) | ||
65 | addq $3,%rcx | ||
66 | jc 30f | ||
67 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
68 | jae 30f | ||
69 | subq $3,%rcx | ||
70 | 3: movl %edx,(%rcx) | ||
71 | xorl %eax,%eax | ||
72 | ret | ||
73 | 30: subq $3,%rcx | ||
74 | jmp bad_put_user | ||
75 | CFI_ENDPROC | ||
76 | ENDPROC(__put_user_4) | ||
77 | |||
78 | ENTRY(__put_user_8) | ||
79 | CFI_STARTPROC | ||
80 | GET_THREAD_INFO(%r8) | ||
81 | addq $7,%rcx | ||
82 | jc 40f | ||
83 | cmpq threadinfo_addr_limit(%r8),%rcx | ||
84 | jae 40f | ||
85 | subq $7,%rcx | ||
86 | 4: movq %rdx,(%rcx) | ||
87 | xorl %eax,%eax | ||
88 | ret | ||
89 | 40: subq $7,%rcx | ||
90 | jmp bad_put_user | ||
91 | CFI_ENDPROC | ||
92 | ENDPROC(__put_user_8) | ||
93 | |||
94 | bad_put_user: | ||
95 | CFI_STARTPROC | ||
96 | movq $(-EFAULT),%rax | ||
97 | ret | ||
98 | CFI_ENDPROC | ||
99 | END(bad_put_user) | ||
100 | |||
101 | .section __ex_table,"a" | ||
102 | .quad 1b,bad_put_user | ||
103 | .quad 2b,bad_put_user | ||
104 | .quad 3b,bad_put_user | ||
105 | .quad 4b,bad_put_user | ||
106 | .previous | ||
diff --git a/arch/x86/lib/thunk_32.S b/arch/x86/lib/thunk_32.S new file mode 100644 index 000000000000..650b11e00ecc --- /dev/null +++ b/arch/x86/lib/thunk_32.S | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * Trampoline to trace irqs off. (otherwise CALLER_ADDR1 might crash) | ||
3 | * Copyright 2008 by Steven Rostedt, Red Hat, Inc | ||
4 | * (inspired by Andi Kleen's thunk_64.S) | ||
5 | * Subject to the GNU public license, v.2. No warranty of any kind. | ||
6 | */ | ||
7 | |||
8 | #include <linux/linkage.h> | ||
9 | |||
10 | #define ARCH_TRACE_IRQS_ON \ | ||
11 | pushl %eax; \ | ||
12 | pushl %ecx; \ | ||
13 | pushl %edx; \ | ||
14 | call trace_hardirqs_on; \ | ||
15 | popl %edx; \ | ||
16 | popl %ecx; \ | ||
17 | popl %eax; | ||
18 | |||
19 | #define ARCH_TRACE_IRQS_OFF \ | ||
20 | pushl %eax; \ | ||
21 | pushl %ecx; \ | ||
22 | pushl %edx; \ | ||
23 | call trace_hardirqs_off; \ | ||
24 | popl %edx; \ | ||
25 | popl %ecx; \ | ||
26 | popl %eax; | ||
27 | |||
28 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
29 | /* put return address in eax (arg1) */ | ||
30 | .macro thunk_ra name,func | ||
31 | .globl \name | ||
32 | \name: | ||
33 | pushl %eax | ||
34 | pushl %ecx | ||
35 | pushl %edx | ||
36 | /* Place EIP in the arg1 */ | ||
37 | movl 3*4(%esp), %eax | ||
38 | call \func | ||
39 | popl %edx | ||
40 | popl %ecx | ||
41 | popl %eax | ||
42 | ret | ||
43 | .endm | ||
44 | |||
45 | thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller | ||
46 | thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller | ||
47 | #endif | ||
diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index e009251d4e9f..bf9a7d5a5428 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S | |||
@@ -2,6 +2,7 @@ | |||
2 | * Save registers before calling assembly functions. This avoids | 2 | * Save registers before calling assembly functions. This avoids |
3 | * disturbance of register allocation in some inline assembly constructs. | 3 | * disturbance of register allocation in some inline assembly constructs. |
4 | * Copyright 2001,2002 by Andi Kleen, SuSE Labs. | 4 | * Copyright 2001,2002 by Andi Kleen, SuSE Labs. |
5 | * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. | ||
5 | * Subject to the GNU public license, v.2. No warranty of any kind. | 6 | * Subject to the GNU public license, v.2. No warranty of any kind. |
6 | */ | 7 | */ |
7 | 8 | ||
@@ -42,8 +43,22 @@ | |||
42 | #endif | 43 | #endif |
43 | 44 | ||
44 | #ifdef CONFIG_TRACE_IRQFLAGS | 45 | #ifdef CONFIG_TRACE_IRQFLAGS |
45 | thunk trace_hardirqs_on_thunk,trace_hardirqs_on | 46 | /* put return address in rdi (arg1) */ |
46 | thunk trace_hardirqs_off_thunk,trace_hardirqs_off | 47 | .macro thunk_ra name,func |
48 | .globl \name | ||
49 | \name: | ||
50 | CFI_STARTPROC | ||
51 | SAVE_ARGS | ||
52 | /* SAVE_ARGS pushs 9 elements */ | ||
53 | /* the next element would be the rip */ | ||
54 | movq 9*8(%rsp), %rdi | ||
55 | call \func | ||
56 | jmp restore | ||
57 | CFI_ENDPROC | ||
58 | .endm | ||
59 | |||
60 | thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller | ||
61 | thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller | ||
47 | #endif | 62 | #endif |
48 | 63 | ||
49 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 64 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 0c89d1bb0287..f4df6e7c718b 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -158,3 +158,26 @@ unsigned long copy_in_user(void __user *to, const void __user *from, unsigned le | |||
158 | } | 158 | } |
159 | EXPORT_SYMBOL(copy_in_user); | 159 | EXPORT_SYMBOL(copy_in_user); |
160 | 160 | ||
161 | /* | ||
162 | * Try to copy last bytes and clear the rest if needed. | ||
163 | * Since protection fault in copy_from/to_user is not a normal situation, | ||
164 | * it is not necessary to optimize tail handling. | ||
165 | */ | ||
166 | unsigned long | ||
167 | copy_user_handle_tail(char *to, char *from, unsigned len, unsigned zerorest) | ||
168 | { | ||
169 | char c; | ||
170 | unsigned zero_len; | ||
171 | |||
172 | for (; len; --len) { | ||
173 | if (__get_user_nocheck(c, from++, sizeof(char))) | ||
174 | break; | ||
175 | if (__put_user_nocheck(c, to++, sizeof(char))) | ||
176 | break; | ||
177 | } | ||
178 | |||
179 | for (c = 0, zero_len = len; zerorest && zero_len; --zero_len) | ||
180 | if (__put_user_nocheck(c, to++, sizeof(char))) | ||
181 | break; | ||
182 | return len; | ||
183 | } | ||
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 2f5e277686b8..48278fa7d3de 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c | |||
@@ -10,6 +10,14 @@ | |||
10 | #include <asm/e820.h> | 10 | #include <asm/e820.h> |
11 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
12 | 12 | ||
13 | /* | ||
14 | * Any quirks to be performed to initialize timers/irqs/etc? | ||
15 | */ | ||
16 | int (*arch_time_init_quirk)(void); | ||
17 | int (*arch_pre_intr_init_quirk)(void); | ||
18 | int (*arch_intr_init_quirk)(void); | ||
19 | int (*arch_trap_init_quirk)(void); | ||
20 | |||
13 | #ifdef CONFIG_HOTPLUG_CPU | 21 | #ifdef CONFIG_HOTPLUG_CPU |
14 | #define DEFAULT_SEND_IPI (1) | 22 | #define DEFAULT_SEND_IPI (1) |
15 | #else | 23 | #else |
@@ -29,6 +37,10 @@ int no_broadcast=DEFAULT_SEND_IPI; | |||
29 | **/ | 37 | **/ |
30 | void __init pre_intr_init_hook(void) | 38 | void __init pre_intr_init_hook(void) |
31 | { | 39 | { |
40 | if (arch_pre_intr_init_quirk) { | ||
41 | if (arch_pre_intr_init_quirk()) | ||
42 | return; | ||
43 | } | ||
32 | init_ISA_irqs(); | 44 | init_ISA_irqs(); |
33 | } | 45 | } |
34 | 46 | ||
@@ -52,6 +64,10 @@ static struct irqaction irq2 = { | |||
52 | **/ | 64 | **/ |
53 | void __init intr_init_hook(void) | 65 | void __init intr_init_hook(void) |
54 | { | 66 | { |
67 | if (arch_intr_init_quirk) { | ||
68 | if (arch_intr_init_quirk()) | ||
69 | return; | ||
70 | } | ||
55 | #ifdef CONFIG_X86_LOCAL_APIC | 71 | #ifdef CONFIG_X86_LOCAL_APIC |
56 | apic_intr_init(); | 72 | apic_intr_init(); |
57 | #endif | 73 | #endif |
@@ -65,7 +81,7 @@ void __init intr_init_hook(void) | |||
65 | * | 81 | * |
66 | * Description: | 82 | * Description: |
67 | * generally used to activate any machine specific identification | 83 | * generally used to activate any machine specific identification |
68 | * routines that may be needed before setup_arch() runs. On VISWS | 84 | * routines that may be needed before setup_arch() runs. On Voyager |
69 | * this is used to get the board revision and type. | 85 | * this is used to get the board revision and type. |
70 | **/ | 86 | **/ |
71 | void __init pre_setup_arch_hook(void) | 87 | void __init pre_setup_arch_hook(void) |
@@ -81,6 +97,10 @@ void __init pre_setup_arch_hook(void) | |||
81 | **/ | 97 | **/ |
82 | void __init trap_init_hook(void) | 98 | void __init trap_init_hook(void) |
83 | { | 99 | { |
100 | if (arch_trap_init_quirk) { | ||
101 | if (arch_trap_init_quirk()) | ||
102 | return; | ||
103 | } | ||
84 | } | 104 | } |
85 | 105 | ||
86 | static struct irqaction irq0 = { | 106 | static struct irqaction irq0 = { |
@@ -99,6 +119,16 @@ static struct irqaction irq0 = { | |||
99 | **/ | 119 | **/ |
100 | void __init time_init_hook(void) | 120 | void __init time_init_hook(void) |
101 | { | 121 | { |
122 | if (arch_time_init_quirk) { | ||
123 | /* | ||
124 | * A nonzero return code does not mean failure, it means | ||
125 | * that the architecture quirk does not want any | ||
126 | * generic (timer) setup to be performed after this: | ||
127 | */ | ||
128 | if (arch_time_init_quirk()) | ||
129 | return; | ||
130 | } | ||
131 | |||
102 | irq0.mask = cpumask_of_cpu(0); | 132 | irq0.mask = cpumask_of_cpu(0); |
103 | setup_irq(0, &irq0); | 133 | setup_irq(0, &irq0); |
104 | } | 134 | } |
diff --git a/arch/x86/mach-visws/Makefile b/arch/x86/mach-visws/Makefile deleted file mode 100644 index 835fd96ad768..000000000000 --- a/arch/x86/mach-visws/Makefile +++ /dev/null | |||
@@ -1,8 +0,0 @@ | |||
1 | # | ||
2 | # Makefile for the linux kernel. | ||
3 | # | ||
4 | |||
5 | obj-y := setup.o traps.o reboot.o | ||
6 | |||
7 | obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o | ||
8 | obj-$(CONFIG_X86_LOCAL_APIC) += mpparse.o | ||
diff --git a/arch/x86/mach-visws/mpparse.c b/arch/x86/mach-visws/mpparse.c deleted file mode 100644 index a2fb78c0d154..000000000000 --- a/arch/x86/mach-visws/mpparse.c +++ /dev/null | |||
@@ -1,85 +0,0 @@ | |||
1 | |||
2 | #include <linux/init.h> | ||
3 | #include <linux/smp.h> | ||
4 | |||
5 | #include <asm/smp.h> | ||
6 | #include <asm/io.h> | ||
7 | |||
8 | #include "cobalt.h" | ||
9 | #include "mach_apic.h" | ||
10 | |||
11 | extern unsigned int __cpuinitdata maxcpus; | ||
12 | |||
13 | /* | ||
14 | * The Visual Workstation is Intel MP compliant in the hardware | ||
15 | * sense, but it doesn't have a BIOS(-configuration table). | ||
16 | * No problem for Linux. | ||
17 | */ | ||
18 | |||
19 | static void __init MP_processor_info (struct mpc_config_processor *m) | ||
20 | { | ||
21 | int ver, logical_apicid; | ||
22 | physid_mask_t apic_cpus; | ||
23 | |||
24 | if (!(m->mpc_cpuflag & CPU_ENABLED)) | ||
25 | return; | ||
26 | |||
27 | logical_apicid = m->mpc_apicid; | ||
28 | printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", | ||
29 | m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", | ||
30 | m->mpc_apicid, | ||
31 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
32 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
33 | m->mpc_apicver); | ||
34 | |||
35 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) | ||
36 | boot_cpu_physical_apicid = m->mpc_apicid; | ||
37 | |||
38 | ver = m->mpc_apicver; | ||
39 | if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) { | ||
40 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | ||
41 | m->mpc_apicid, MAX_APICS); | ||
42 | return; | ||
43 | } | ||
44 | |||
45 | apic_cpus = apicid_to_cpu_present(m->mpc_apicid); | ||
46 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); | ||
47 | /* | ||
48 | * Validate version | ||
49 | */ | ||
50 | if (ver == 0x0) { | ||
51 | printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " | ||
52 | "fixing up to 0x10. (tell your hw vendor)\n", | ||
53 | m->mpc_apicid); | ||
54 | ver = 0x10; | ||
55 | } | ||
56 | apic_version[m->mpc_apicid] = ver; | ||
57 | } | ||
58 | |||
59 | void __init find_smp_config(void) | ||
60 | { | ||
61 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); | ||
62 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | ||
63 | |||
64 | if (ncpus > CO_CPU_MAX) { | ||
65 | printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", | ||
66 | ncpus, mp); | ||
67 | |||
68 | ncpus = CO_CPU_MAX; | ||
69 | } | ||
70 | |||
71 | if (ncpus > maxcpus) | ||
72 | ncpus = maxcpus; | ||
73 | |||
74 | #ifdef CONFIG_X86_LOCAL_APIC | ||
75 | smp_found_config = 1; | ||
76 | #endif | ||
77 | while (ncpus--) | ||
78 | MP_processor_info(mp++); | ||
79 | |||
80 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
81 | } | ||
82 | |||
83 | void __init get_smp_config (void) | ||
84 | { | ||
85 | } | ||
diff --git a/arch/x86/mach-visws/reboot.c b/arch/x86/mach-visws/reboot.c deleted file mode 100644 index 99332abfad42..000000000000 --- a/arch/x86/mach-visws/reboot.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/smp.h> | ||
3 | #include <linux/delay.h> | ||
4 | |||
5 | #include <asm/io.h> | ||
6 | #include "piix4.h" | ||
7 | |||
8 | void (*pm_power_off)(void); | ||
9 | EXPORT_SYMBOL(pm_power_off); | ||
10 | |||
11 | void machine_shutdown(void) | ||
12 | { | ||
13 | #ifdef CONFIG_SMP | ||
14 | smp_send_stop(); | ||
15 | #endif | ||
16 | } | ||
17 | |||
18 | void machine_emergency_restart(void) | ||
19 | { | ||
20 | /* | ||
21 | * Visual Workstations restart after this | ||
22 | * register is poked on the PIIX4 | ||
23 | */ | ||
24 | outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); | ||
25 | } | ||
26 | |||
27 | void machine_restart(char * __unused) | ||
28 | { | ||
29 | machine_shutdown(); | ||
30 | machine_emergency_restart(); | ||
31 | } | ||
32 | |||
33 | void machine_power_off(void) | ||
34 | { | ||
35 | unsigned short pm_status; | ||
36 | extern unsigned int pci_bus0; | ||
37 | |||
38 | while ((pm_status = inw(PMSTS_PORT)) & 0x100) | ||
39 | outw(pm_status, PMSTS_PORT); | ||
40 | |||
41 | outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); | ||
42 | |||
43 | mdelay(10); | ||
44 | |||
45 | #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ | ||
46 | (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) | ||
47 | |||
48 | outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); | ||
49 | outl(PIIX_SPECIAL_STOP, 0xCFC); | ||
50 | } | ||
51 | |||
52 | void machine_halt(void) | ||
53 | { | ||
54 | } | ||
55 | |||
diff --git a/arch/x86/mach-visws/setup.c b/arch/x86/mach-visws/setup.c deleted file mode 100644 index d67868ec9b7f..000000000000 --- a/arch/x86/mach-visws/setup.c +++ /dev/null | |||
@@ -1,183 +0,0 @@ | |||
1 | /* | ||
2 | * Unmaintained SGI Visual Workstation support. | ||
3 | * Split out from setup.c by davej@suse.de | ||
4 | */ | ||
5 | |||
6 | #include <linux/smp.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/interrupt.h> | ||
9 | #include <linux/module.h> | ||
10 | |||
11 | #include <asm/fixmap.h> | ||
12 | #include <asm/arch_hooks.h> | ||
13 | #include <asm/io.h> | ||
14 | #include <asm/e820.h> | ||
15 | #include <asm/setup.h> | ||
16 | #include "cobalt.h" | ||
17 | #include "piix4.h" | ||
18 | |||
19 | int no_broadcast; | ||
20 | |||
21 | char visws_board_type = -1; | ||
22 | char visws_board_rev = -1; | ||
23 | |||
24 | void __init visws_get_board_type_and_rev(void) | ||
25 | { | ||
26 | int raw; | ||
27 | |||
28 | visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) | ||
29 | >> PIIX_GPI_BD_SHIFT; | ||
30 | /* | ||
31 | * Get Board rev. | ||
32 | * First, we have to initialize the 307 part to allow us access | ||
33 | * to the GPIO registers. Let's map them at 0x0fc0 which is right | ||
34 | * after the PIIX4 PM section. | ||
35 | */ | ||
36 | outb_p(SIO_DEV_SEL, SIO_INDEX); | ||
37 | outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ | ||
38 | |||
39 | outb_p(SIO_DEV_MSB, SIO_INDEX); | ||
40 | outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ | ||
41 | |||
42 | outb_p(SIO_DEV_LSB, SIO_INDEX); | ||
43 | outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ | ||
44 | |||
45 | outb_p(SIO_DEV_ENB, SIO_INDEX); | ||
46 | outb_p(1, SIO_DATA); /* Enable GPIO registers. */ | ||
47 | |||
48 | /* | ||
49 | * Now, we have to map the power management section to write | ||
50 | * a bit which enables access to the GPIO registers. | ||
51 | * What lunatic came up with this shit? | ||
52 | */ | ||
53 | outb_p(SIO_DEV_SEL, SIO_INDEX); | ||
54 | outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ | ||
55 | |||
56 | outb_p(SIO_DEV_MSB, SIO_INDEX); | ||
57 | outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ | ||
58 | |||
59 | outb_p(SIO_DEV_LSB, SIO_INDEX); | ||
60 | outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ | ||
61 | |||
62 | outb_p(SIO_DEV_ENB, SIO_INDEX); | ||
63 | outb_p(1, SIO_DATA); /* Enable PM registers. */ | ||
64 | |||
65 | /* | ||
66 | * Now, write the PM register which enables the GPIO registers. | ||
67 | */ | ||
68 | outb_p(SIO_PM_FER2, SIO_PM_INDEX); | ||
69 | outb_p(SIO_PM_GP_EN, SIO_PM_DATA); | ||
70 | |||
71 | /* | ||
72 | * Now, initialize the GPIO registers. | ||
73 | * We want them all to be inputs which is the | ||
74 | * power on default, so let's leave them alone. | ||
75 | * So, let's just read the board rev! | ||
76 | */ | ||
77 | raw = inb_p(SIO_GP_DATA1); | ||
78 | raw &= 0x7f; /* 7 bits of valid board revision ID. */ | ||
79 | |||
80 | if (visws_board_type == VISWS_320) { | ||
81 | if (raw < 0x6) { | ||
82 | visws_board_rev = 4; | ||
83 | } else if (raw < 0xc) { | ||
84 | visws_board_rev = 5; | ||
85 | } else { | ||
86 | visws_board_rev = 6; | ||
87 | } | ||
88 | } else if (visws_board_type == VISWS_540) { | ||
89 | visws_board_rev = 2; | ||
90 | } else { | ||
91 | visws_board_rev = raw; | ||
92 | } | ||
93 | |||
94 | printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", | ||
95 | (visws_board_type == VISWS_320 ? "320" : | ||
96 | (visws_board_type == VISWS_540 ? "540" : | ||
97 | "unknown")), visws_board_rev); | ||
98 | } | ||
99 | |||
100 | void __init pre_intr_init_hook(void) | ||
101 | { | ||
102 | init_VISWS_APIC_irqs(); | ||
103 | } | ||
104 | |||
105 | void __init intr_init_hook(void) | ||
106 | { | ||
107 | #ifdef CONFIG_X86_LOCAL_APIC | ||
108 | apic_intr_init(); | ||
109 | #endif | ||
110 | } | ||
111 | |||
112 | void __init pre_setup_arch_hook() | ||
113 | { | ||
114 | visws_get_board_type_and_rev(); | ||
115 | } | ||
116 | |||
117 | static struct irqaction irq0 = { | ||
118 | .handler = timer_interrupt, | ||
119 | .flags = IRQF_DISABLED | IRQF_IRQPOLL, | ||
120 | .name = "timer", | ||
121 | }; | ||
122 | |||
123 | void __init time_init_hook(void) | ||
124 | { | ||
125 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | ||
126 | |||
127 | /* Set the countdown value */ | ||
128 | co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); | ||
129 | |||
130 | /* Start the timer */ | ||
131 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); | ||
132 | |||
133 | /* Enable (unmask) the timer interrupt */ | ||
134 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); | ||
135 | |||
136 | /* Wire cpu IDT entry to s/w handler (and Cobalt APIC to IDT) */ | ||
137 | setup_irq(0, &irq0); | ||
138 | } | ||
139 | |||
140 | /* Hook for machine specific memory setup. */ | ||
141 | |||
142 | #define MB (1024 * 1024) | ||
143 | |||
144 | unsigned long sgivwfb_mem_phys; | ||
145 | unsigned long sgivwfb_mem_size; | ||
146 | EXPORT_SYMBOL(sgivwfb_mem_phys); | ||
147 | EXPORT_SYMBOL(sgivwfb_mem_size); | ||
148 | |||
149 | long long mem_size __initdata = 0; | ||
150 | |||
151 | char * __init machine_specific_memory_setup(void) | ||
152 | { | ||
153 | long long gfx_mem_size = 8 * MB; | ||
154 | |||
155 | mem_size = boot_params.alt_mem_k; | ||
156 | |||
157 | if (!mem_size) { | ||
158 | printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); | ||
159 | mem_size = 128 * MB; | ||
160 | } | ||
161 | |||
162 | /* | ||
163 | * this hardcodes the graphics memory to 8 MB | ||
164 | * it really should be sized dynamically (or at least | ||
165 | * set as a boot param) | ||
166 | */ | ||
167 | if (!sgivwfb_mem_size) { | ||
168 | printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); | ||
169 | sgivwfb_mem_size = 8 * MB; | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * Trim to nearest MB | ||
174 | */ | ||
175 | sgivwfb_mem_size &= ~((1 << 20) - 1); | ||
176 | sgivwfb_mem_phys = mem_size - gfx_mem_size; | ||
177 | |||
178 | e820_add_region(0, LOWMEMSIZE(), E820_RAM); | ||
179 | e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); | ||
180 | e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); | ||
181 | |||
182 | return "PROM"; | ||
183 | } | ||
diff --git a/arch/x86/mach-visws/traps.c b/arch/x86/mach-visws/traps.c deleted file mode 100644 index bfac6ba10f8a..000000000000 --- a/arch/x86/mach-visws/traps.c +++ /dev/null | |||
@@ -1,69 +0,0 @@ | |||
1 | /* VISWS traps */ | ||
2 | |||
3 | #include <linux/sched.h> | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/init.h> | ||
6 | #include <linux/pci.h> | ||
7 | #include <linux/pci_ids.h> | ||
8 | |||
9 | #include <asm/io.h> | ||
10 | #include <asm/arch_hooks.h> | ||
11 | #include <asm/apic.h> | ||
12 | #include "cobalt.h" | ||
13 | #include "lithium.h" | ||
14 | |||
15 | |||
16 | #define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) | ||
17 | #define BCD (LI_INTB | LI_INTC | LI_INTD) | ||
18 | #define ALLDEVS (A01234 | BCD) | ||
19 | |||
20 | static __init void lithium_init(void) | ||
21 | { | ||
22 | set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); | ||
23 | set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); | ||
24 | |||
25 | if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || | ||
26 | (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { | ||
27 | printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); | ||
28 | panic("This machine is not SGI Visual Workstation 320/540"); | ||
29 | } | ||
30 | |||
31 | if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || | ||
32 | (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { | ||
33 | printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); | ||
34 | panic("This machine is not SGI Visual Workstation 320/540"); | ||
35 | } | ||
36 | |||
37 | li_pcia_write16(LI_PCI_INTEN, ALLDEVS); | ||
38 | li_pcib_write16(LI_PCI_INTEN, ALLDEVS); | ||
39 | } | ||
40 | |||
41 | static __init void cobalt_init(void) | ||
42 | { | ||
43 | /* | ||
44 | * On normal SMP PC this is used only with SMP, but we have to | ||
45 | * use it and set it up here to start the Cobalt clock | ||
46 | */ | ||
47 | set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); | ||
48 | setup_local_APIC(); | ||
49 | printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", | ||
50 | (unsigned int)apic_read(APIC_LVR), | ||
51 | (unsigned int)apic_read(APIC_ID)); | ||
52 | |||
53 | set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); | ||
54 | set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); | ||
55 | printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", | ||
56 | co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); | ||
57 | |||
58 | /* Enable Cobalt APIC being careful to NOT change the ID! */ | ||
59 | co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); | ||
60 | |||
61 | printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", | ||
62 | co_apic_read(CO_APIC_ID)); | ||
63 | } | ||
64 | |||
65 | void __init trap_init_hook(void) | ||
66 | { | ||
67 | lithium_init(); | ||
68 | cobalt_init(); | ||
69 | } | ||
diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c deleted file mode 100644 index d8b2cfd85d92..000000000000 --- a/arch/x86/mach-visws/visws_apic.c +++ /dev/null | |||
@@ -1,296 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1999 Bent Hagemark, Ingo Molnar | ||
3 | * | ||
4 | * SGI Visual Workstation interrupt controller | ||
5 | * | ||
6 | * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC | ||
7 | * which serves as the main interrupt controller in the system. Non-legacy | ||
8 | * hardware in the system uses this controller directly. Legacy devices | ||
9 | * are connected to the PIIX4 which in turn has its 8259(s) connected to | ||
10 | * a of the Cobalt APIC entry. | ||
11 | * | ||
12 | * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com | ||
13 | * | ||
14 | * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru> | ||
15 | */ | ||
16 | |||
17 | #include <linux/kernel_stat.h> | ||
18 | #include <linux/interrupt.h> | ||
19 | #include <linux/init.h> | ||
20 | |||
21 | #include <asm/io.h> | ||
22 | #include <asm/apic.h> | ||
23 | #include <asm/i8259.h> | ||
24 | #include <asm/irq_vectors.h> | ||
25 | |||
26 | #include "cobalt.h" | ||
27 | |||
28 | static DEFINE_SPINLOCK(cobalt_lock); | ||
29 | |||
30 | /* | ||
31 | * Set the given Cobalt APIC Redirection Table entry to point | ||
32 | * to the given IDT vector/index. | ||
33 | */ | ||
34 | static inline void co_apic_set(int entry, int irq) | ||
35 | { | ||
36 | co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); | ||
37 | co_apic_write(CO_APIC_HI(entry), 0); | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | * Cobalt (IO)-APIC functions to handle PCI devices. | ||
42 | */ | ||
43 | static inline int co_apic_ide0_hack(void) | ||
44 | { | ||
45 | extern char visws_board_type; | ||
46 | extern char visws_board_rev; | ||
47 | |||
48 | if (visws_board_type == VISWS_320 && visws_board_rev == 5) | ||
49 | return 5; | ||
50 | return CO_APIC_IDE0; | ||
51 | } | ||
52 | |||
53 | static int is_co_apic(unsigned int irq) | ||
54 | { | ||
55 | if (IS_CO_APIC(irq)) | ||
56 | return CO_APIC(irq); | ||
57 | |||
58 | switch (irq) { | ||
59 | case 0: return CO_APIC_CPU; | ||
60 | case CO_IRQ_IDE0: return co_apic_ide0_hack(); | ||
61 | case CO_IRQ_IDE1: return CO_APIC_IDE1; | ||
62 | default: return -1; | ||
63 | } | ||
64 | } | ||
65 | |||
66 | |||
67 | /* | ||
68 | * This is the SGI Cobalt (IO-)APIC: | ||
69 | */ | ||
70 | |||
71 | static void enable_cobalt_irq(unsigned int irq) | ||
72 | { | ||
73 | co_apic_set(is_co_apic(irq), irq); | ||
74 | } | ||
75 | |||
76 | static void disable_cobalt_irq(unsigned int irq) | ||
77 | { | ||
78 | int entry = is_co_apic(irq); | ||
79 | |||
80 | co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); | ||
81 | co_apic_read(CO_APIC_LO(entry)); | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * "irq" really just serves to identify the device. Here is where we | ||
86 | * map this to the Cobalt APIC entry where it's physically wired. | ||
87 | * This is called via request_irq -> setup_irq -> irq_desc->startup() | ||
88 | */ | ||
89 | static unsigned int startup_cobalt_irq(unsigned int irq) | ||
90 | { | ||
91 | unsigned long flags; | ||
92 | |||
93 | spin_lock_irqsave(&cobalt_lock, flags); | ||
94 | if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING))) | ||
95 | irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING); | ||
96 | enable_cobalt_irq(irq); | ||
97 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | static void ack_cobalt_irq(unsigned int irq) | ||
102 | { | ||
103 | unsigned long flags; | ||
104 | |||
105 | spin_lock_irqsave(&cobalt_lock, flags); | ||
106 | disable_cobalt_irq(irq); | ||
107 | apic_write(APIC_EOI, APIC_EIO_ACK); | ||
108 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
109 | } | ||
110 | |||
111 | static void end_cobalt_irq(unsigned int irq) | ||
112 | { | ||
113 | unsigned long flags; | ||
114 | |||
115 | spin_lock_irqsave(&cobalt_lock, flags); | ||
116 | if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS))) | ||
117 | enable_cobalt_irq(irq); | ||
118 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
119 | } | ||
120 | |||
121 | static struct irq_chip cobalt_irq_type = { | ||
122 | .typename = "Cobalt-APIC", | ||
123 | .startup = startup_cobalt_irq, | ||
124 | .shutdown = disable_cobalt_irq, | ||
125 | .enable = enable_cobalt_irq, | ||
126 | .disable = disable_cobalt_irq, | ||
127 | .ack = ack_cobalt_irq, | ||
128 | .end = end_cobalt_irq, | ||
129 | }; | ||
130 | |||
131 | |||
132 | /* | ||
133 | * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt | ||
134 | * -- not the manner expected by the code in i8259.c. | ||
135 | * | ||
136 | * there is a 'master' physical interrupt source that gets sent to | ||
137 | * the CPU. But in the chipset there are various 'virtual' interrupts | ||
138 | * waiting to be handled. We represent this to Linux through a 'master' | ||
139 | * interrupt controller type, and through a special virtual interrupt- | ||
140 | * controller. Device drivers only see the virtual interrupt sources. | ||
141 | */ | ||
142 | static unsigned int startup_piix4_master_irq(unsigned int irq) | ||
143 | { | ||
144 | init_8259A(0); | ||
145 | |||
146 | return startup_cobalt_irq(irq); | ||
147 | } | ||
148 | |||
149 | static void end_piix4_master_irq(unsigned int irq) | ||
150 | { | ||
151 | unsigned long flags; | ||
152 | |||
153 | spin_lock_irqsave(&cobalt_lock, flags); | ||
154 | enable_cobalt_irq(irq); | ||
155 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
156 | } | ||
157 | |||
158 | static struct irq_chip piix4_master_irq_type = { | ||
159 | .typename = "PIIX4-master", | ||
160 | .startup = startup_piix4_master_irq, | ||
161 | .ack = ack_cobalt_irq, | ||
162 | .end = end_piix4_master_irq, | ||
163 | }; | ||
164 | |||
165 | |||
166 | static struct irq_chip piix4_virtual_irq_type = { | ||
167 | .typename = "PIIX4-virtual", | ||
168 | .shutdown = disable_8259A_irq, | ||
169 | .enable = enable_8259A_irq, | ||
170 | .disable = disable_8259A_irq, | ||
171 | }; | ||
172 | |||
173 | |||
174 | /* | ||
175 | * PIIX4-8259 master/virtual functions to handle interrupt requests | ||
176 | * from legacy devices: floppy, parallel, serial, rtc. | ||
177 | * | ||
178 | * None of these get Cobalt APIC entries, neither do they have IDT | ||
179 | * entries. These interrupts are purely virtual and distributed from | ||
180 | * the 'master' interrupt source: CO_IRQ_8259. | ||
181 | * | ||
182 | * When the 8259 interrupts its handler figures out which of these | ||
183 | * devices is interrupting and dispatches to its handler. | ||
184 | * | ||
185 | * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ | ||
186 | * enable_irq gets the right irq. This 'master' irq is never directly | ||
187 | * manipulated by any driver. | ||
188 | */ | ||
189 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) | ||
190 | { | ||
191 | int realirq; | ||
192 | irq_desc_t *desc; | ||
193 | unsigned long flags; | ||
194 | |||
195 | spin_lock_irqsave(&i8259A_lock, flags); | ||
196 | |||
197 | /* Find out what's interrupting in the PIIX4 master 8259 */ | ||
198 | outb(0x0c, 0x20); /* OCW3 Poll command */ | ||
199 | realirq = inb(0x20); | ||
200 | |||
201 | /* | ||
202 | * Bit 7 == 0 means invalid/spurious | ||
203 | */ | ||
204 | if (unlikely(!(realirq & 0x80))) | ||
205 | goto out_unlock; | ||
206 | |||
207 | realirq &= 7; | ||
208 | |||
209 | if (unlikely(realirq == 2)) { | ||
210 | outb(0x0c, 0xa0); | ||
211 | realirq = inb(0xa0); | ||
212 | |||
213 | if (unlikely(!(realirq & 0x80))) | ||
214 | goto out_unlock; | ||
215 | |||
216 | realirq = (realirq & 7) + 8; | ||
217 | } | ||
218 | |||
219 | /* mask and ack interrupt */ | ||
220 | cached_irq_mask |= 1 << realirq; | ||
221 | if (unlikely(realirq > 7)) { | ||
222 | inb(0xa1); | ||
223 | outb(cached_slave_mask, 0xa1); | ||
224 | outb(0x60 + (realirq & 7), 0xa0); | ||
225 | outb(0x60 + 2, 0x20); | ||
226 | } else { | ||
227 | inb(0x21); | ||
228 | outb(cached_master_mask, 0x21); | ||
229 | outb(0x60 + realirq, 0x20); | ||
230 | } | ||
231 | |||
232 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
233 | |||
234 | desc = irq_desc + realirq; | ||
235 | |||
236 | /* | ||
237 | * handle this 'virtual interrupt' as a Cobalt one now. | ||
238 | */ | ||
239 | kstat_cpu(smp_processor_id()).irqs[realirq]++; | ||
240 | |||
241 | if (likely(desc->action != NULL)) | ||
242 | handle_IRQ_event(realirq, desc->action); | ||
243 | |||
244 | if (!(desc->status & IRQ_DISABLED)) | ||
245 | enable_8259A_irq(realirq); | ||
246 | |||
247 | return IRQ_HANDLED; | ||
248 | |||
249 | out_unlock: | ||
250 | spin_unlock_irqrestore(&i8259A_lock, flags); | ||
251 | return IRQ_NONE; | ||
252 | } | ||
253 | |||
254 | static struct irqaction master_action = { | ||
255 | .handler = piix4_master_intr, | ||
256 | .name = "PIIX4-8259", | ||
257 | }; | ||
258 | |||
259 | static struct irqaction cascade_action = { | ||
260 | .handler = no_action, | ||
261 | .name = "cascade", | ||
262 | }; | ||
263 | |||
264 | |||
265 | void init_VISWS_APIC_irqs(void) | ||
266 | { | ||
267 | int i; | ||
268 | |||
269 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { | ||
270 | irq_desc[i].status = IRQ_DISABLED; | ||
271 | irq_desc[i].action = 0; | ||
272 | irq_desc[i].depth = 1; | ||
273 | |||
274 | if (i == 0) { | ||
275 | irq_desc[i].chip = &cobalt_irq_type; | ||
276 | } | ||
277 | else if (i == CO_IRQ_IDE0) { | ||
278 | irq_desc[i].chip = &cobalt_irq_type; | ||
279 | } | ||
280 | else if (i == CO_IRQ_IDE1) { | ||
281 | irq_desc[i].chip = &cobalt_irq_type; | ||
282 | } | ||
283 | else if (i == CO_IRQ_8259) { | ||
284 | irq_desc[i].chip = &piix4_master_irq_type; | ||
285 | } | ||
286 | else if (i < CO_IRQ_APIC0) { | ||
287 | irq_desc[i].chip = &piix4_virtual_irq_type; | ||
288 | } | ||
289 | else if (IS_CO_APIC(i)) { | ||
290 | irq_desc[i].chip = &cobalt_irq_type; | ||
291 | } | ||
292 | } | ||
293 | |||
294 | setup_irq(CO_IRQ_8259, &master_action); | ||
295 | setup_irq(2, &cascade_action); | ||
296 | } | ||
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 8dedd01e909f..ee0fba092157 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c | |||
@@ -950,94 +950,24 @@ static void smp_stop_cpu_function(void *dummy) | |||
950 | halt(); | 950 | halt(); |
951 | } | 951 | } |
952 | 952 | ||
953 | static DEFINE_SPINLOCK(call_lock); | ||
954 | |||
955 | struct call_data_struct { | ||
956 | void (*func) (void *info); | ||
957 | void *info; | ||
958 | volatile unsigned long started; | ||
959 | volatile unsigned long finished; | ||
960 | int wait; | ||
961 | }; | ||
962 | |||
963 | static struct call_data_struct *call_data; | ||
964 | |||
965 | /* execute a thread on a new CPU. The function to be called must be | 953 | /* execute a thread on a new CPU. The function to be called must be |
966 | * previously set up. This is used to schedule a function for | 954 | * previously set up. This is used to schedule a function for |
967 | * execution on all CPUs - set up the function then broadcast a | 955 | * execution on all CPUs - set up the function then broadcast a |
968 | * function_interrupt CPI to come here on each CPU */ | 956 | * function_interrupt CPI to come here on each CPU */ |
969 | static void smp_call_function_interrupt(void) | 957 | static void smp_call_function_interrupt(void) |
970 | { | 958 | { |
971 | void (*func) (void *info) = call_data->func; | ||
972 | void *info = call_data->info; | ||
973 | /* must take copy of wait because call_data may be replaced | ||
974 | * unless the function is waiting for us to finish */ | ||
975 | int wait = call_data->wait; | ||
976 | __u8 cpu = smp_processor_id(); | ||
977 | |||
978 | /* | ||
979 | * Notify initiating CPU that I've grabbed the data and am | ||
980 | * about to execute the function | ||
981 | */ | ||
982 | mb(); | ||
983 | if (!test_and_clear_bit(cpu, &call_data->started)) { | ||
984 | /* If the bit wasn't set, this could be a replay */ | ||
985 | printk(KERN_WARNING "VOYAGER SMP: CPU %d received call funtion" | ||
986 | " with no call pending\n", cpu); | ||
987 | return; | ||
988 | } | ||
989 | /* | ||
990 | * At this point the info structure may be out of scope unless wait==1 | ||
991 | */ | ||
992 | irq_enter(); | 959 | irq_enter(); |
993 | (*func) (info); | 960 | generic_smp_call_function_interrupt(); |
994 | __get_cpu_var(irq_stat).irq_call_count++; | 961 | __get_cpu_var(irq_stat).irq_call_count++; |
995 | irq_exit(); | 962 | irq_exit(); |
996 | if (wait) { | ||
997 | mb(); | ||
998 | clear_bit(cpu, &call_data->finished); | ||
999 | } | ||
1000 | } | 963 | } |
1001 | 964 | ||
1002 | static int | 965 | static void smp_call_function_single_interrupt(void) |
1003 | voyager_smp_call_function_mask(cpumask_t cpumask, | ||
1004 | void (*func) (void *info), void *info, int wait) | ||
1005 | { | 966 | { |
1006 | struct call_data_struct data; | 967 | irq_enter(); |
1007 | u32 mask = cpus_addr(cpumask)[0]; | 968 | generic_smp_call_function_single_interrupt(); |
1008 | 969 | __get_cpu_var(irq_stat).irq_call_count++; | |
1009 | mask &= ~(1 << smp_processor_id()); | 970 | irq_exit(); |
1010 | |||
1011 | if (!mask) | ||
1012 | return 0; | ||
1013 | |||
1014 | /* Can deadlock when called with interrupts disabled */ | ||
1015 | WARN_ON(irqs_disabled()); | ||
1016 | |||
1017 | data.func = func; | ||
1018 | data.info = info; | ||
1019 | data.started = mask; | ||
1020 | data.wait = wait; | ||
1021 | if (wait) | ||
1022 | data.finished = mask; | ||
1023 | |||
1024 | spin_lock(&call_lock); | ||
1025 | call_data = &data; | ||
1026 | wmb(); | ||
1027 | /* Send a message to all other CPUs and wait for them to respond */ | ||
1028 | send_CPI(mask, VIC_CALL_FUNCTION_CPI); | ||
1029 | |||
1030 | /* Wait for response */ | ||
1031 | while (data.started) | ||
1032 | barrier(); | ||
1033 | |||
1034 | if (wait) | ||
1035 | while (data.finished) | ||
1036 | barrier(); | ||
1037 | |||
1038 | spin_unlock(&call_lock); | ||
1039 | |||
1040 | return 0; | ||
1041 | } | 971 | } |
1042 | 972 | ||
1043 | /* Sorry about the name. In an APIC based system, the APICs | 973 | /* Sorry about the name. In an APIC based system, the APICs |
@@ -1094,6 +1024,12 @@ void smp_qic_call_function_interrupt(struct pt_regs *regs) | |||
1094 | smp_call_function_interrupt(); | 1024 | smp_call_function_interrupt(); |
1095 | } | 1025 | } |
1096 | 1026 | ||
1027 | void smp_qic_call_function_single_interrupt(struct pt_regs *regs) | ||
1028 | { | ||
1029 | ack_QIC_CPI(QIC_CALL_FUNCTION_SINGLE_CPI); | ||
1030 | smp_call_function_single_interrupt(); | ||
1031 | } | ||
1032 | |||
1097 | void smp_vic_cpi_interrupt(struct pt_regs *regs) | 1033 | void smp_vic_cpi_interrupt(struct pt_regs *regs) |
1098 | { | 1034 | { |
1099 | struct pt_regs *old_regs = set_irq_regs(regs); | 1035 | struct pt_regs *old_regs = set_irq_regs(regs); |
@@ -1114,6 +1050,8 @@ void smp_vic_cpi_interrupt(struct pt_regs *regs) | |||
1114 | smp_enable_irq_interrupt(); | 1050 | smp_enable_irq_interrupt(); |
1115 | if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu])) | 1051 | if (test_and_clear_bit(VIC_CALL_FUNCTION_CPI, &vic_cpi_mailbox[cpu])) |
1116 | smp_call_function_interrupt(); | 1052 | smp_call_function_interrupt(); |
1053 | if (test_and_clear_bit(VIC_CALL_FUNCTION_SINGLE_CPI, &vic_cpi_mailbox[cpu])) | ||
1054 | smp_call_function_single_interrupt(); | ||
1117 | set_irq_regs(old_regs); | 1055 | set_irq_regs(old_regs); |
1118 | } | 1056 | } |
1119 | 1057 | ||
@@ -1129,7 +1067,7 @@ static void do_flush_tlb_all(void *info) | |||
1129 | /* flush the TLB of every active CPU in the system */ | 1067 | /* flush the TLB of every active CPU in the system */ |
1130 | void flush_tlb_all(void) | 1068 | void flush_tlb_all(void) |
1131 | { | 1069 | { |
1132 | on_each_cpu(do_flush_tlb_all, 0, 1, 1); | 1070 | on_each_cpu(do_flush_tlb_all, 0, 1); |
1133 | } | 1071 | } |
1134 | 1072 | ||
1135 | /* send a reschedule CPI to one CPU by physical CPU number*/ | 1073 | /* send a reschedule CPI to one CPU by physical CPU number*/ |
@@ -1161,7 +1099,7 @@ int safe_smp_processor_id(void) | |||
1161 | /* broadcast a halt to all other CPUs */ | 1099 | /* broadcast a halt to all other CPUs */ |
1162 | static void voyager_smp_send_stop(void) | 1100 | static void voyager_smp_send_stop(void) |
1163 | { | 1101 | { |
1164 | smp_call_function(smp_stop_cpu_function, NULL, 1, 1); | 1102 | smp_call_function(smp_stop_cpu_function, NULL, 1); |
1165 | } | 1103 | } |
1166 | 1104 | ||
1167 | /* this function is triggered in time.c when a clock tick fires | 1105 | /* this function is triggered in time.c when a clock tick fires |
@@ -1848,5 +1786,7 @@ struct smp_ops smp_ops = { | |||
1848 | 1786 | ||
1849 | .smp_send_stop = voyager_smp_send_stop, | 1787 | .smp_send_stop = voyager_smp_send_stop, |
1850 | .smp_send_reschedule = voyager_smp_send_reschedule, | 1788 | .smp_send_reschedule = voyager_smp_send_reschedule, |
1851 | .smp_call_function_mask = voyager_smp_call_function_mask, | 1789 | |
1790 | .send_call_func_ipi = native_send_call_func_ipi, | ||
1791 | .send_call_func_single_ipi = native_send_call_func_single_ipi, | ||
1852 | }; | 1792 | }; |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index c107641cd39b..9873716e9f76 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -8,6 +8,11 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o | |||
8 | 8 | ||
9 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 9 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
10 | 10 | ||
11 | obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o | ||
12 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | ||
13 | mmiotrace-y := pf_in.o mmio-mod.o | ||
14 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | ||
15 | |||
11 | ifeq ($(CONFIG_X86_32),y) | 16 | ifeq ($(CONFIG_X86_32),y) |
12 | obj-$(CONFIG_NUMA) += discontig_32.o | 17 | obj-$(CONFIG_NUMA) += discontig_32.o |
13 | else | 18 | else |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index d0f5fce77d95..455f3fe67b42 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/string.h> | 10 | #include <linux/string.h> |
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/ptrace.h> | 12 | #include <linux/ptrace.h> |
13 | #include <linux/mmiotrace.h> | ||
13 | #include <linux/mman.h> | 14 | #include <linux/mman.h> |
14 | #include <linux/mm.h> | 15 | #include <linux/mm.h> |
15 | #include <linux/smp.h> | 16 | #include <linux/smp.h> |
@@ -49,6 +50,16 @@ | |||
49 | #define PF_RSVD (1<<3) | 50 | #define PF_RSVD (1<<3) |
50 | #define PF_INSTR (1<<4) | 51 | #define PF_INSTR (1<<4) |
51 | 52 | ||
53 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | ||
54 | { | ||
55 | #ifdef CONFIG_MMIOTRACE_HOOKS | ||
56 | if (unlikely(is_kmmio_active())) | ||
57 | if (kmmio_handler(regs, addr) == 1) | ||
58 | return -1; | ||
59 | #endif | ||
60 | return 0; | ||
61 | } | ||
62 | |||
52 | static inline int notify_page_fault(struct pt_regs *regs) | 63 | static inline int notify_page_fault(struct pt_regs *regs) |
53 | { | 64 | { |
54 | #ifdef CONFIG_KPROBES | 65 | #ifdef CONFIG_KPROBES |
@@ -598,6 +609,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
598 | 609 | ||
599 | if (notify_page_fault(regs)) | 610 | if (notify_page_fault(regs)) |
600 | return; | 611 | return; |
612 | if (unlikely(kmmio_fault(regs, address))) | ||
613 | return; | ||
601 | 614 | ||
602 | /* | 615 | /* |
603 | * We fault-in kernel-space virtual memory on-demand. The | 616 | * We fault-in kernel-space virtual memory on-demand. The |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index b5a0fd5f4c5f..9689a5138e64 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -50,6 +50,7 @@ | |||
50 | 50 | ||
51 | unsigned int __VMALLOC_RESERVE = 128 << 20; | 51 | unsigned int __VMALLOC_RESERVE = 128 << 20; |
52 | 52 | ||
53 | unsigned long max_low_pfn_mapped; | ||
53 | unsigned long max_pfn_mapped; | 54 | unsigned long max_pfn_mapped; |
54 | 55 | ||
55 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 56 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
@@ -1034,6 +1035,8 @@ void mark_rodata_ro(void) | |||
1034 | unsigned long start = PFN_ALIGN(_text); | 1035 | unsigned long start = PFN_ALIGN(_text); |
1035 | unsigned long size = PFN_ALIGN(_etext) - start; | 1036 | unsigned long size = PFN_ALIGN(_etext) - start; |
1036 | 1037 | ||
1038 | #ifndef CONFIG_DYNAMIC_FTRACE | ||
1039 | /* Dynamic tracing modifies the kernel text section */ | ||
1037 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 1040 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
1038 | printk(KERN_INFO "Write protecting the kernel text: %luk\n", | 1041 | printk(KERN_INFO "Write protecting the kernel text: %luk\n", |
1039 | size >> 10); | 1042 | size >> 10); |
@@ -1046,6 +1049,8 @@ void mark_rodata_ro(void) | |||
1046 | printk(KERN_INFO "Testing CPA: write protecting again\n"); | 1049 | printk(KERN_INFO "Testing CPA: write protecting again\n"); |
1047 | set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); | 1050 | set_pages_ro(virt_to_page(start), size>>PAGE_SHIFT); |
1048 | #endif | 1051 | #endif |
1052 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
1053 | |||
1049 | start += size; | 1054 | start += size; |
1050 | size = (unsigned long)__end_rodata - start; | 1055 | size = (unsigned long)__end_rodata - start; |
1051 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); | 1056 | set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 77d129d62c97..306049edd553 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -53,6 +53,7 @@ | |||
53 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | 53 | * The direct mapping extends to max_pfn_mapped, so that we can directly access |
54 | * apertures, ACPI and other tables without having to play with fixmaps. | 54 | * apertures, ACPI and other tables without having to play with fixmaps. |
55 | */ | 55 | */ |
56 | unsigned long max_low_pfn_mapped; | ||
56 | unsigned long max_pfn_mapped; | 57 | unsigned long max_pfn_mapped; |
57 | 58 | ||
58 | static unsigned long dma_reserve __initdata; | 59 | static unsigned long dma_reserve __initdata; |
@@ -202,6 +203,46 @@ set_pte_vaddr(unsigned long vaddr, pte_t pteval) | |||
202 | } | 203 | } |
203 | 204 | ||
204 | /* | 205 | /* |
206 | * Create large page table mappings for a range of physical addresses. | ||
207 | */ | ||
208 | static void __init __init_extra_mapping(unsigned long phys, unsigned long size, | ||
209 | pgprot_t prot) | ||
210 | { | ||
211 | pgd_t *pgd; | ||
212 | pud_t *pud; | ||
213 | pmd_t *pmd; | ||
214 | |||
215 | BUG_ON((phys & ~PMD_MASK) || (size & ~PMD_MASK)); | ||
216 | for (; size; phys += PMD_SIZE, size -= PMD_SIZE) { | ||
217 | pgd = pgd_offset_k((unsigned long)__va(phys)); | ||
218 | if (pgd_none(*pgd)) { | ||
219 | pud = (pud_t *) spp_getpage(); | ||
220 | set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE | | ||
221 | _PAGE_USER)); | ||
222 | } | ||
223 | pud = pud_offset(pgd, (unsigned long)__va(phys)); | ||
224 | if (pud_none(*pud)) { | ||
225 | pmd = (pmd_t *) spp_getpage(); | ||
226 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE | | ||
227 | _PAGE_USER)); | ||
228 | } | ||
229 | pmd = pmd_offset(pud, phys); | ||
230 | BUG_ON(!pmd_none(*pmd)); | ||
231 | set_pmd(pmd, __pmd(phys | pgprot_val(prot))); | ||
232 | } | ||
233 | } | ||
234 | |||
235 | void __init init_extra_mapping_wb(unsigned long phys, unsigned long size) | ||
236 | { | ||
237 | __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE); | ||
238 | } | ||
239 | |||
240 | void __init init_extra_mapping_uc(unsigned long phys, unsigned long size) | ||
241 | { | ||
242 | __init_extra_mapping(phys, size, PAGE_KERNEL_LARGE_NOCACHE); | ||
243 | } | ||
244 | |||
245 | /* | ||
205 | * The head.S code sets up the kernel high mapping: | 246 | * The head.S code sets up the kernel high mapping: |
206 | * | 247 | * |
207 | * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) | 248 | * from __START_KERNEL_map to __START_KERNEL_map + size (== _end-_text) |
@@ -262,11 +303,13 @@ static __meminit void unmap_low_page(void *adr) | |||
262 | early_iounmap(adr, PAGE_SIZE); | 303 | early_iounmap(adr, PAGE_SIZE); |
263 | } | 304 | } |
264 | 305 | ||
265 | static void __meminit | 306 | static unsigned long __meminit |
266 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | 307 | phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) |
267 | { | 308 | { |
268 | unsigned pages = 0; | 309 | unsigned pages = 0; |
310 | unsigned long last_map_addr = end; | ||
269 | int i; | 311 | int i; |
312 | |||
270 | pte_t *pte = pte_page + pte_index(addr); | 313 | pte_t *pte = pte_page + pte_index(addr); |
271 | 314 | ||
272 | for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { | 315 | for(i = pte_index(addr); i < PTRS_PER_PTE; i++, addr += PAGE_SIZE, pte++) { |
@@ -286,23 +329,28 @@ phys_pte_init(pte_t *pte_page, unsigned long addr, unsigned long end) | |||
286 | printk(" pte=%p addr=%lx pte=%016lx\n", | 329 | printk(" pte=%p addr=%lx pte=%016lx\n", |
287 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); | 330 | pte, addr, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL).pte); |
288 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); | 331 | set_pte(pte, pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL)); |
332 | last_map_addr = (addr & PAGE_MASK) + PAGE_SIZE; | ||
289 | pages++; | 333 | pages++; |
290 | } | 334 | } |
291 | update_page_count(PG_LEVEL_4K, pages); | 335 | update_page_count(PG_LEVEL_4K, pages); |
336 | |||
337 | return last_map_addr; | ||
292 | } | 338 | } |
293 | 339 | ||
294 | static void __meminit | 340 | static unsigned long __meminit |
295 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) | 341 | phys_pte_update(pmd_t *pmd, unsigned long address, unsigned long end) |
296 | { | 342 | { |
297 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); | 343 | pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); |
298 | 344 | ||
299 | phys_pte_init(pte, address, end); | 345 | return phys_pte_init(pte, address, end); |
300 | } | 346 | } |
301 | 347 | ||
302 | static unsigned long __meminit | 348 | static unsigned long __meminit |
303 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) | 349 | phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, |
350 | unsigned long page_size_mask) | ||
304 | { | 351 | { |
305 | unsigned long pages = 0; | 352 | unsigned long pages = 0; |
353 | unsigned long last_map_addr = end; | ||
306 | 354 | ||
307 | int i = pmd_index(address); | 355 | int i = pmd_index(address); |
308 | 356 | ||
@@ -321,42 +369,46 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end) | |||
321 | 369 | ||
322 | if (pmd_val(*pmd)) { | 370 | if (pmd_val(*pmd)) { |
323 | if (!pmd_large(*pmd)) | 371 | if (!pmd_large(*pmd)) |
324 | phys_pte_update(pmd, address, end); | 372 | last_map_addr = phys_pte_update(pmd, address, |
373 | end); | ||
325 | continue; | 374 | continue; |
326 | } | 375 | } |
327 | 376 | ||
328 | if (cpu_has_pse) { | 377 | if (page_size_mask & (1<<PG_LEVEL_2M)) { |
329 | pages++; | 378 | pages++; |
330 | set_pte((pte_t *)pmd, | 379 | set_pte((pte_t *)pmd, |
331 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 380 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
381 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; | ||
332 | continue; | 382 | continue; |
333 | } | 383 | } |
334 | 384 | ||
335 | pte = alloc_low_page(&pte_phys); | 385 | pte = alloc_low_page(&pte_phys); |
336 | phys_pte_init(pte, address, end); | 386 | last_map_addr = phys_pte_init(pte, address, end); |
337 | unmap_low_page(pte); | 387 | unmap_low_page(pte); |
338 | 388 | ||
339 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); | 389 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); |
340 | } | 390 | } |
341 | update_page_count(PG_LEVEL_2M, pages); | 391 | update_page_count(PG_LEVEL_2M, pages); |
342 | return address; | 392 | return last_map_addr; |
343 | } | 393 | } |
344 | 394 | ||
345 | static unsigned long __meminit | 395 | static unsigned long __meminit |
346 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) | 396 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, |
397 | unsigned long page_size_mask) | ||
347 | { | 398 | { |
348 | pmd_t *pmd = pmd_offset(pud, 0); | 399 | pmd_t *pmd = pmd_offset(pud, 0); |
349 | unsigned long last_map_addr; | 400 | unsigned long last_map_addr; |
350 | 401 | ||
351 | spin_lock(&init_mm.page_table_lock); | 402 | spin_lock(&init_mm.page_table_lock); |
352 | last_map_addr = phys_pmd_init(pmd, address, end); | 403 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); |
353 | spin_unlock(&init_mm.page_table_lock); | 404 | spin_unlock(&init_mm.page_table_lock); |
354 | __flush_tlb_all(); | 405 | __flush_tlb_all(); |
355 | return last_map_addr; | 406 | return last_map_addr; |
356 | } | 407 | } |
357 | 408 | ||
358 | static unsigned long __meminit | 409 | static unsigned long __meminit |
359 | phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | 410 | phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, |
411 | unsigned long page_size_mask) | ||
360 | { | 412 | { |
361 | unsigned long pages = 0; | 413 | unsigned long pages = 0; |
362 | unsigned long last_map_addr = end; | 414 | unsigned long last_map_addr = end; |
@@ -378,11 +430,12 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | |||
378 | 430 | ||
379 | if (pud_val(*pud)) { | 431 | if (pud_val(*pud)) { |
380 | if (!pud_large(*pud)) | 432 | if (!pud_large(*pud)) |
381 | last_map_addr = phys_pmd_update(pud, addr, end); | 433 | last_map_addr = phys_pmd_update(pud, addr, end, |
434 | page_size_mask); | ||
382 | continue; | 435 | continue; |
383 | } | 436 | } |
384 | 437 | ||
385 | if (direct_gbpages) { | 438 | if (page_size_mask & (1<<PG_LEVEL_1G)) { |
386 | pages++; | 439 | pages++; |
387 | set_pte((pte_t *)pud, | 440 | set_pte((pte_t *)pud, |
388 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 441 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
@@ -393,7 +446,7 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | |||
393 | pmd = alloc_low_page(&pmd_phys); | 446 | pmd = alloc_low_page(&pmd_phys); |
394 | 447 | ||
395 | spin_lock(&init_mm.page_table_lock); | 448 | spin_lock(&init_mm.page_table_lock); |
396 | last_map_addr = phys_pmd_init(pmd, addr, end); | 449 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); |
397 | unmap_low_page(pmd); | 450 | unmap_low_page(pmd); |
398 | pud_populate(&init_mm, pud, __va(pmd_phys)); | 451 | pud_populate(&init_mm, pud, __va(pmd_phys)); |
399 | spin_unlock(&init_mm.page_table_lock); | 452 | spin_unlock(&init_mm.page_table_lock); |
@@ -406,29 +459,37 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end) | |||
406 | } | 459 | } |
407 | 460 | ||
408 | static unsigned long __meminit | 461 | static unsigned long __meminit |
409 | phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end) | 462 | phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, |
463 | unsigned long page_size_mask) | ||
410 | { | 464 | { |
411 | pud_t *pud; | 465 | pud_t *pud; |
412 | 466 | ||
413 | pud = (pud_t *)pgd_page_vaddr(*pgd); | 467 | pud = (pud_t *)pgd_page_vaddr(*pgd); |
414 | 468 | ||
415 | return phys_pud_init(pud, addr, end); | 469 | return phys_pud_init(pud, addr, end, page_size_mask); |
416 | } | 470 | } |
417 | 471 | ||
418 | static void __init find_early_table_space(unsigned long end) | 472 | static void __init find_early_table_space(unsigned long end) |
419 | { | 473 | { |
420 | unsigned long puds, tables, start; | 474 | unsigned long puds, pmds, ptes, tables, start; |
421 | 475 | ||
422 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | 476 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; |
423 | tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); | 477 | tables = round_up(puds * sizeof(pud_t), PAGE_SIZE); |
424 | if (!direct_gbpages) { | 478 | if (direct_gbpages) { |
425 | unsigned long pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | 479 | unsigned long extra; |
426 | tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); | 480 | extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT); |
427 | } | 481 | pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT; |
428 | if (!cpu_has_pse) { | 482 | } else |
429 | unsigned long ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | 483 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; |
430 | tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); | 484 | tables += round_up(pmds * sizeof(pmd_t), PAGE_SIZE); |
431 | } | 485 | |
486 | if (cpu_has_pse) { | ||
487 | unsigned long extra; | ||
488 | extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT); | ||
489 | ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
490 | } else | ||
491 | ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
492 | tables += round_up(ptes * sizeof(pte_t), PAGE_SIZE); | ||
432 | 493 | ||
433 | /* | 494 | /* |
434 | * RED-PEN putting page tables only on node 0 could | 495 | * RED-PEN putting page tables only on node 0 could |
@@ -568,29 +629,12 @@ static void __init early_memtest(unsigned long start, unsigned long end) | |||
568 | } | 629 | } |
569 | #endif | 630 | #endif |
570 | 631 | ||
571 | /* | 632 | static unsigned long __init kernel_physical_mapping_init(unsigned long start, |
572 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | 633 | unsigned long end, |
573 | * This runs before bootmem is initialized and gets pages directly from | 634 | unsigned long page_size_mask) |
574 | * the physical memory. To access them they are temporarily mapped. | ||
575 | */ | ||
576 | unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned long end) | ||
577 | { | 635 | { |
578 | unsigned long next, last_map_addr = end; | ||
579 | unsigned long start_phys = start, end_phys = end; | ||
580 | 636 | ||
581 | printk(KERN_INFO "init_memory_mapping\n"); | 637 | unsigned long next, last_map_addr = end; |
582 | |||
583 | /* | ||
584 | * Find space for the kernel direct mapping tables. | ||
585 | * | ||
586 | * Later we should allocate these tables in the local node of the | ||
587 | * memory mapped. Unfortunately this is done currently before the | ||
588 | * nodes are discovered. | ||
589 | */ | ||
590 | if (!after_bootmem) { | ||
591 | init_gbpages(); | ||
592 | find_early_table_space(end); | ||
593 | } | ||
594 | 638 | ||
595 | start = (unsigned long)__va(start); | 639 | start = (unsigned long)__va(start); |
596 | end = (unsigned long)__va(end); | 640 | end = (unsigned long)__va(end); |
@@ -600,12 +644,13 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon | |||
600 | unsigned long pud_phys; | 644 | unsigned long pud_phys; |
601 | pud_t *pud; | 645 | pud_t *pud; |
602 | 646 | ||
603 | next = start + PGDIR_SIZE; | 647 | next = (start + PGDIR_SIZE) & PGDIR_MASK; |
604 | if (next > end) | 648 | if (next > end) |
605 | next = end; | 649 | next = end; |
606 | 650 | ||
607 | if (pgd_val(*pgd)) { | 651 | if (pgd_val(*pgd)) { |
608 | last_map_addr = phys_pud_update(pgd, __pa(start), __pa(end)); | 652 | last_map_addr = phys_pud_update(pgd, __pa(start), |
653 | __pa(end), page_size_mask); | ||
609 | continue; | 654 | continue; |
610 | } | 655 | } |
611 | 656 | ||
@@ -614,22 +659,151 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, unsigned lon | |||
614 | else | 659 | else |
615 | pud = alloc_low_page(&pud_phys); | 660 | pud = alloc_low_page(&pud_phys); |
616 | 661 | ||
617 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next)); | 662 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), |
663 | page_size_mask); | ||
618 | unmap_low_page(pud); | 664 | unmap_low_page(pud); |
619 | pgd_populate(&init_mm, pgd_offset_k(start), | 665 | pgd_populate(&init_mm, pgd_offset_k(start), |
620 | __va(pud_phys)); | 666 | __va(pud_phys)); |
621 | } | 667 | } |
622 | 668 | ||
669 | return last_map_addr; | ||
670 | } | ||
671 | |||
672 | struct map_range { | ||
673 | unsigned long start; | ||
674 | unsigned long end; | ||
675 | unsigned page_size_mask; | ||
676 | }; | ||
677 | |||
678 | #define NR_RANGE_MR 5 | ||
679 | |||
680 | static int save_mr(struct map_range *mr, int nr_range, | ||
681 | unsigned long start_pfn, unsigned long end_pfn, | ||
682 | unsigned long page_size_mask) | ||
683 | { | ||
684 | |||
685 | if (start_pfn < end_pfn) { | ||
686 | if (nr_range >= NR_RANGE_MR) | ||
687 | panic("run out of range for init_memory_mapping\n"); | ||
688 | mr[nr_range].start = start_pfn<<PAGE_SHIFT; | ||
689 | mr[nr_range].end = end_pfn<<PAGE_SHIFT; | ||
690 | mr[nr_range].page_size_mask = page_size_mask; | ||
691 | nr_range++; | ||
692 | } | ||
693 | |||
694 | return nr_range; | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Setup the direct mapping of the physical memory at PAGE_OFFSET. | ||
699 | * This runs before bootmem is initialized and gets pages directly from | ||
700 | * the physical memory. To access them they are temporarily mapped. | ||
701 | */ | ||
702 | unsigned long __init_refok init_memory_mapping(unsigned long start, | ||
703 | unsigned long end) | ||
704 | { | ||
705 | unsigned long last_map_addr = 0; | ||
706 | unsigned long page_size_mask = 0; | ||
707 | unsigned long start_pfn, end_pfn; | ||
708 | |||
709 | struct map_range mr[NR_RANGE_MR]; | ||
710 | int nr_range, i; | ||
711 | |||
712 | printk(KERN_INFO "init_memory_mapping\n"); | ||
713 | |||
714 | /* | ||
715 | * Find space for the kernel direct mapping tables. | ||
716 | * | ||
717 | * Later we should allocate these tables in the local node of the | ||
718 | * memory mapped. Unfortunately this is done currently before the | ||
719 | * nodes are discovered. | ||
720 | */ | ||
721 | if (!after_bootmem) | ||
722 | init_gbpages(); | ||
723 | |||
724 | if (direct_gbpages) | ||
725 | page_size_mask |= 1 << PG_LEVEL_1G; | ||
726 | if (cpu_has_pse) | ||
727 | page_size_mask |= 1 << PG_LEVEL_2M; | ||
728 | |||
729 | memset(mr, 0, sizeof(mr)); | ||
730 | nr_range = 0; | ||
731 | |||
732 | /* head if not big page alignment ?*/ | ||
733 | start_pfn = start >> PAGE_SHIFT; | ||
734 | end_pfn = ((start + (PMD_SIZE - 1)) >> PMD_SHIFT) | ||
735 | << (PMD_SHIFT - PAGE_SHIFT); | ||
736 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
737 | |||
738 | /* big page (2M) range*/ | ||
739 | start_pfn = ((start + (PMD_SIZE - 1))>>PMD_SHIFT) | ||
740 | << (PMD_SHIFT - PAGE_SHIFT); | ||
741 | end_pfn = ((start + (PUD_SIZE - 1))>>PUD_SHIFT) | ||
742 | << (PUD_SHIFT - PAGE_SHIFT); | ||
743 | if (end_pfn > ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT))) | ||
744 | end_pfn = ((end>>PUD_SHIFT)<<(PUD_SHIFT - PAGE_SHIFT)); | ||
745 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
746 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
747 | |||
748 | /* big page (1G) range */ | ||
749 | start_pfn = end_pfn; | ||
750 | end_pfn = (end>>PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT); | ||
751 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
752 | page_size_mask & | ||
753 | ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G))); | ||
754 | |||
755 | /* tail is not big page (1G) alignment */ | ||
756 | start_pfn = end_pfn; | ||
757 | end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT); | ||
758 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, | ||
759 | page_size_mask & (1<<PG_LEVEL_2M)); | ||
760 | |||
761 | /* tail is not big page (2M) alignment */ | ||
762 | start_pfn = end_pfn; | ||
763 | end_pfn = end>>PAGE_SHIFT; | ||
764 | nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0); | ||
765 | |||
766 | /* try to merge same page size and continuous */ | ||
767 | for (i = 0; nr_range > 1 && i < nr_range - 1; i++) { | ||
768 | unsigned long old_start; | ||
769 | if (mr[i].end != mr[i+1].start || | ||
770 | mr[i].page_size_mask != mr[i+1].page_size_mask) | ||
771 | continue; | ||
772 | /* move it */ | ||
773 | old_start = mr[i].start; | ||
774 | memmove(&mr[i], &mr[i+1], | ||
775 | (nr_range - 1 - i) * sizeof (struct map_range)); | ||
776 | mr[i].start = old_start; | ||
777 | nr_range--; | ||
778 | } | ||
779 | |||
780 | for (i = 0; i < nr_range; i++) | ||
781 | printk(KERN_DEBUG " %010lx - %010lx page %s\n", | ||
782 | mr[i].start, mr[i].end, | ||
783 | (mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":( | ||
784 | (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k")); | ||
785 | |||
786 | if (!after_bootmem) | ||
787 | find_early_table_space(end); | ||
788 | |||
789 | for (i = 0; i < nr_range; i++) | ||
790 | last_map_addr = kernel_physical_mapping_init( | ||
791 | mr[i].start, mr[i].end, | ||
792 | mr[i].page_size_mask); | ||
793 | |||
623 | if (!after_bootmem) | 794 | if (!after_bootmem) |
624 | mmu_cr4_features = read_cr4(); | 795 | mmu_cr4_features = read_cr4(); |
625 | __flush_tlb_all(); | 796 | __flush_tlb_all(); |
626 | 797 | ||
627 | if (!after_bootmem) | 798 | if (!after_bootmem && table_end > table_start) |
628 | reserve_early(table_start << PAGE_SHIFT, | 799 | reserve_early(table_start << PAGE_SHIFT, |
629 | table_end << PAGE_SHIFT, "PGTABLE"); | 800 | table_end << PAGE_SHIFT, "PGTABLE"); |
630 | 801 | ||
802 | printk(KERN_INFO "last_map_addr: %lx end: %lx\n", | ||
803 | last_map_addr, end); | ||
804 | |||
631 | if (!after_bootmem) | 805 | if (!after_bootmem) |
632 | early_memtest(start_phys, end_phys); | 806 | early_memtest(start, end); |
633 | 807 | ||
634 | return last_map_addr >> PAGE_SHIFT; | 808 | return last_map_addr >> PAGE_SHIFT; |
635 | } | 809 | } |
@@ -817,6 +991,13 @@ EXPORT_SYMBOL_GPL(rodata_test_data); | |||
817 | void mark_rodata_ro(void) | 991 | void mark_rodata_ro(void) |
818 | { | 992 | { |
819 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); | 993 | unsigned long start = PFN_ALIGN(_stext), end = PFN_ALIGN(__end_rodata); |
994 | unsigned long rodata_start = | ||
995 | ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | ||
996 | |||
997 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
998 | /* Dynamic tracing modifies the kernel text section */ | ||
999 | start = rodata_start; | ||
1000 | #endif | ||
820 | 1001 | ||
821 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", | 1002 | printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", |
822 | (end - start) >> 10); | 1003 | (end - start) >> 10); |
@@ -826,8 +1007,7 @@ void mark_rodata_ro(void) | |||
826 | * The rodata section (but not the kernel text!) should also be | 1007 | * The rodata section (but not the kernel text!) should also be |
827 | * not-executable. | 1008 | * not-executable. |
828 | */ | 1009 | */ |
829 | start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; | 1010 | set_memory_nx(rodata_start, (end - rodata_start) >> PAGE_SHIFT); |
830 | set_memory_nx(start, (end - start) >> PAGE_SHIFT); | ||
831 | 1011 | ||
832 | rodata_test(); | 1012 | rodata_test(); |
833 | 1013 | ||
@@ -1036,9 +1216,6 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
1036 | PAGE_KERNEL_LARGE); | 1216 | PAGE_KERNEL_LARGE); |
1037 | set_pmd(pmd, __pmd(pte_val(entry))); | 1217 | set_pmd(pmd, __pmd(pte_val(entry))); |
1038 | 1218 | ||
1039 | addr_end = addr + PMD_SIZE; | ||
1040 | p_end = p + PMD_SIZE; | ||
1041 | |||
1042 | /* check to see if we have contiguous blocks */ | 1219 | /* check to see if we have contiguous blocks */ |
1043 | if (p_end != p || node_start != node) { | 1220 | if (p_end != p || node_start != node) { |
1044 | if (p_start) | 1221 | if (p_start) |
@@ -1048,6 +1225,9 @@ vmemmap_populate(struct page *start_page, unsigned long size, int node) | |||
1048 | node_start = node; | 1225 | node_start = node; |
1049 | p_start = p; | 1226 | p_start = p; |
1050 | } | 1227 | } |
1228 | |||
1229 | addr_end = addr + PMD_SIZE; | ||
1230 | p_end = p + PMD_SIZE; | ||
1051 | } else | 1231 | } else |
1052 | vmemmap_verify((pte_t *)pmd, node, addr, next); | 1232 | vmemmap_verify((pte_t *)pmd, node, addr, next); |
1053 | } | 1233 | } |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 115f13ee40c9..24c1d3c30186 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/module.h> | 12 | #include <linux/module.h> |
13 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
14 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
15 | #include <linux/mmiotrace.h> | ||
15 | 16 | ||
16 | #include <asm/cacheflush.h> | 17 | #include <asm/cacheflush.h> |
17 | #include <asm/e820.h> | 18 | #include <asm/e820.h> |
@@ -122,10 +123,13 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
122 | { | 123 | { |
123 | unsigned long pfn, offset, vaddr; | 124 | unsigned long pfn, offset, vaddr; |
124 | resource_size_t last_addr; | 125 | resource_size_t last_addr; |
126 | const resource_size_t unaligned_phys_addr = phys_addr; | ||
127 | const unsigned long unaligned_size = size; | ||
125 | struct vm_struct *area; | 128 | struct vm_struct *area; |
126 | unsigned long new_prot_val; | 129 | unsigned long new_prot_val; |
127 | pgprot_t prot; | 130 | pgprot_t prot; |
128 | int retval; | 131 | int retval; |
132 | void __iomem *ret_addr; | ||
129 | 133 | ||
130 | /* Don't allow wraparound or zero size */ | 134 | /* Don't allow wraparound or zero size */ |
131 | last_addr = phys_addr + size - 1; | 135 | last_addr = phys_addr + size - 1; |
@@ -233,7 +237,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
233 | return NULL; | 237 | return NULL; |
234 | } | 238 | } |
235 | 239 | ||
236 | return (void __iomem *) (vaddr + offset); | 240 | ret_addr = (void __iomem *) (vaddr + offset); |
241 | mmiotrace_ioremap(unaligned_phys_addr, unaligned_size, ret_addr); | ||
242 | |||
243 | return ret_addr; | ||
237 | } | 244 | } |
238 | 245 | ||
239 | /** | 246 | /** |
@@ -348,6 +355,8 @@ void iounmap(volatile void __iomem *addr) | |||
348 | addr = (volatile void __iomem *) | 355 | addr = (volatile void __iomem *) |
349 | (PAGE_MASK & (unsigned long __force)addr); | 356 | (PAGE_MASK & (unsigned long __force)addr); |
350 | 357 | ||
358 | mmiotrace_iounmap(addr); | ||
359 | |||
351 | /* Use the vm area unlocked, assuming the caller | 360 | /* Use the vm area unlocked, assuming the caller |
352 | ensures there isn't another iounmap for the same address | 361 | ensures there isn't another iounmap for the same address |
353 | in parallel. Reuse of the virtual address is prevented by | 362 | in parallel. Reuse of the virtual address is prevented by |
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c new file mode 100644 index 000000000000..93d82038af4b --- /dev/null +++ b/arch/x86/mm/kmmio.c | |||
@@ -0,0 +1,510 @@ | |||
1 | /* Support for MMIO probes. | ||
2 | * Benfit many code from kprobes | ||
3 | * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>. | ||
4 | * 2007 Alexander Eichner | ||
5 | * 2008 Pekka Paalanen <pq@iki.fi> | ||
6 | */ | ||
7 | |||
8 | #include <linux/list.h> | ||
9 | #include <linux/rculist.h> | ||
10 | #include <linux/spinlock.h> | ||
11 | #include <linux/hash.h> | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/uaccess.h> | ||
16 | #include <linux/ptrace.h> | ||
17 | #include <linux/preempt.h> | ||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/mutex.h> | ||
21 | #include <linux/io.h> | ||
22 | #include <asm/cacheflush.h> | ||
23 | #include <asm/tlbflush.h> | ||
24 | #include <linux/errno.h> | ||
25 | #include <asm/debugreg.h> | ||
26 | #include <linux/mmiotrace.h> | ||
27 | |||
28 | #define KMMIO_PAGE_HASH_BITS 4 | ||
29 | #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS) | ||
30 | |||
31 | struct kmmio_fault_page { | ||
32 | struct list_head list; | ||
33 | struct kmmio_fault_page *release_next; | ||
34 | unsigned long page; /* location of the fault page */ | ||
35 | |||
36 | /* | ||
37 | * Number of times this page has been registered as a part | ||
38 | * of a probe. If zero, page is disarmed and this may be freed. | ||
39 | * Used only by writers (RCU). | ||
40 | */ | ||
41 | int count; | ||
42 | }; | ||
43 | |||
44 | struct kmmio_delayed_release { | ||
45 | struct rcu_head rcu; | ||
46 | struct kmmio_fault_page *release_list; | ||
47 | }; | ||
48 | |||
49 | struct kmmio_context { | ||
50 | struct kmmio_fault_page *fpage; | ||
51 | struct kmmio_probe *probe; | ||
52 | unsigned long saved_flags; | ||
53 | unsigned long addr; | ||
54 | int active; | ||
55 | }; | ||
56 | |||
57 | static DEFINE_SPINLOCK(kmmio_lock); | ||
58 | |||
59 | /* Protected by kmmio_lock */ | ||
60 | unsigned int kmmio_count; | ||
61 | |||
62 | /* Read-protected by RCU, write-protected by kmmio_lock. */ | ||
63 | static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE]; | ||
64 | static LIST_HEAD(kmmio_probes); | ||
65 | |||
66 | static struct list_head *kmmio_page_list(unsigned long page) | ||
67 | { | ||
68 | return &kmmio_page_table[hash_long(page, KMMIO_PAGE_HASH_BITS)]; | ||
69 | } | ||
70 | |||
71 | /* Accessed per-cpu */ | ||
72 | static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx); | ||
73 | |||
74 | /* | ||
75 | * this is basically a dynamic stabbing problem: | ||
76 | * Could use the existing prio tree code or | ||
77 | * Possible better implementations: | ||
78 | * The Interval Skip List: A Data Structure for Finding All Intervals That | ||
79 | * Overlap a Point (might be simple) | ||
80 | * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup | ||
81 | */ | ||
82 | /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */ | ||
83 | static struct kmmio_probe *get_kmmio_probe(unsigned long addr) | ||
84 | { | ||
85 | struct kmmio_probe *p; | ||
86 | list_for_each_entry_rcu(p, &kmmio_probes, list) { | ||
87 | if (addr >= p->addr && addr <= (p->addr + p->len)) | ||
88 | return p; | ||
89 | } | ||
90 | return NULL; | ||
91 | } | ||
92 | |||
93 | /* You must be holding RCU read lock. */ | ||
94 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | ||
95 | { | ||
96 | struct list_head *head; | ||
97 | struct kmmio_fault_page *p; | ||
98 | |||
99 | page &= PAGE_MASK; | ||
100 | head = kmmio_page_list(page); | ||
101 | list_for_each_entry_rcu(p, head, list) { | ||
102 | if (p->page == page) | ||
103 | return p; | ||
104 | } | ||
105 | return NULL; | ||
106 | } | ||
107 | |||
108 | static void set_page_present(unsigned long addr, bool present, | ||
109 | unsigned int *pglevel) | ||
110 | { | ||
111 | pteval_t pteval; | ||
112 | pmdval_t pmdval; | ||
113 | unsigned int level; | ||
114 | pmd_t *pmd; | ||
115 | pte_t *pte = lookup_address(addr, &level); | ||
116 | |||
117 | if (!pte) { | ||
118 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); | ||
119 | return; | ||
120 | } | ||
121 | |||
122 | if (pglevel) | ||
123 | *pglevel = level; | ||
124 | |||
125 | switch (level) { | ||
126 | case PG_LEVEL_2M: | ||
127 | pmd = (pmd_t *)pte; | ||
128 | pmdval = pmd_val(*pmd) & ~_PAGE_PRESENT; | ||
129 | if (present) | ||
130 | pmdval |= _PAGE_PRESENT; | ||
131 | set_pmd(pmd, __pmd(pmdval)); | ||
132 | break; | ||
133 | |||
134 | case PG_LEVEL_4K: | ||
135 | pteval = pte_val(*pte) & ~_PAGE_PRESENT; | ||
136 | if (present) | ||
137 | pteval |= _PAGE_PRESENT; | ||
138 | set_pte_atomic(pte, __pte(pteval)); | ||
139 | break; | ||
140 | |||
141 | default: | ||
142 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | ||
143 | return; | ||
144 | } | ||
145 | |||
146 | __flush_tlb_one(addr); | ||
147 | } | ||
148 | |||
149 | /** Mark the given page as not present. Access to it will trigger a fault. */ | ||
150 | static void arm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | ||
151 | { | ||
152 | set_page_present(page & PAGE_MASK, false, pglevel); | ||
153 | } | ||
154 | |||
155 | /** Mark the given page as present. */ | ||
156 | static void disarm_kmmio_fault_page(unsigned long page, unsigned int *pglevel) | ||
157 | { | ||
158 | set_page_present(page & PAGE_MASK, true, pglevel); | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * This is being called from do_page_fault(). | ||
163 | * | ||
164 | * We may be in an interrupt or a critical section. Also prefecthing may | ||
165 | * trigger a page fault. We may be in the middle of process switch. | ||
166 | * We cannot take any locks, because we could be executing especially | ||
167 | * within a kmmio critical section. | ||
168 | * | ||
169 | * Local interrupts are disabled, so preemption cannot happen. | ||
170 | * Do not enable interrupts, do not sleep, and watch out for other CPUs. | ||
171 | */ | ||
172 | /* | ||
173 | * Interrupts are disabled on entry as trap3 is an interrupt gate | ||
174 | * and they remain disabled thorough out this function. | ||
175 | */ | ||
176 | int kmmio_handler(struct pt_regs *regs, unsigned long addr) | ||
177 | { | ||
178 | struct kmmio_context *ctx; | ||
179 | struct kmmio_fault_page *faultpage; | ||
180 | int ret = 0; /* default to fault not handled */ | ||
181 | |||
182 | /* | ||
183 | * Preemption is now disabled to prevent process switch during | ||
184 | * single stepping. We can only handle one active kmmio trace | ||
185 | * per cpu, so ensure that we finish it before something else | ||
186 | * gets to run. We also hold the RCU read lock over single | ||
187 | * stepping to avoid looking up the probe and kmmio_fault_page | ||
188 | * again. | ||
189 | */ | ||
190 | preempt_disable(); | ||
191 | rcu_read_lock(); | ||
192 | |||
193 | faultpage = get_kmmio_fault_page(addr); | ||
194 | if (!faultpage) { | ||
195 | /* | ||
196 | * Either this page fault is not caused by kmmio, or | ||
197 | * another CPU just pulled the kmmio probe from under | ||
198 | * our feet. The latter case should not be possible. | ||
199 | */ | ||
200 | goto no_kmmio; | ||
201 | } | ||
202 | |||
203 | ctx = &get_cpu_var(kmmio_ctx); | ||
204 | if (ctx->active) { | ||
205 | disarm_kmmio_fault_page(faultpage->page, NULL); | ||
206 | if (addr == ctx->addr) { | ||
207 | /* | ||
208 | * On SMP we sometimes get recursive probe hits on the | ||
209 | * same address. Context is already saved, fall out. | ||
210 | */ | ||
211 | pr_debug("kmmio: duplicate probe hit on CPU %d, for " | ||
212 | "address 0x%08lx.\n", | ||
213 | smp_processor_id(), addr); | ||
214 | ret = 1; | ||
215 | goto no_kmmio_ctx; | ||
216 | } | ||
217 | /* | ||
218 | * Prevent overwriting already in-flight context. | ||
219 | * This should not happen, let's hope disarming at least | ||
220 | * prevents a panic. | ||
221 | */ | ||
222 | pr_emerg("kmmio: recursive probe hit on CPU %d, " | ||
223 | "for address 0x%08lx. Ignoring.\n", | ||
224 | smp_processor_id(), addr); | ||
225 | pr_emerg("kmmio: previous hit was at 0x%08lx.\n", | ||
226 | ctx->addr); | ||
227 | goto no_kmmio_ctx; | ||
228 | } | ||
229 | ctx->active++; | ||
230 | |||
231 | ctx->fpage = faultpage; | ||
232 | ctx->probe = get_kmmio_probe(addr); | ||
233 | ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF)); | ||
234 | ctx->addr = addr; | ||
235 | |||
236 | if (ctx->probe && ctx->probe->pre_handler) | ||
237 | ctx->probe->pre_handler(ctx->probe, regs, addr); | ||
238 | |||
239 | /* | ||
240 | * Enable single-stepping and disable interrupts for the faulting | ||
241 | * context. Local interrupts must not get enabled during stepping. | ||
242 | */ | ||
243 | regs->flags |= X86_EFLAGS_TF; | ||
244 | regs->flags &= ~X86_EFLAGS_IF; | ||
245 | |||
246 | /* Now we set present bit in PTE and single step. */ | ||
247 | disarm_kmmio_fault_page(ctx->fpage->page, NULL); | ||
248 | |||
249 | /* | ||
250 | * If another cpu accesses the same page while we are stepping, | ||
251 | * the access will not be caught. It will simply succeed and the | ||
252 | * only downside is we lose the event. If this becomes a problem, | ||
253 | * the user should drop to single cpu before tracing. | ||
254 | */ | ||
255 | |||
256 | put_cpu_var(kmmio_ctx); | ||
257 | return 1; /* fault handled */ | ||
258 | |||
259 | no_kmmio_ctx: | ||
260 | put_cpu_var(kmmio_ctx); | ||
261 | no_kmmio: | ||
262 | rcu_read_unlock(); | ||
263 | preempt_enable_no_resched(); | ||
264 | return ret; | ||
265 | } | ||
266 | |||
267 | /* | ||
268 | * Interrupts are disabled on entry as trap1 is an interrupt gate | ||
269 | * and they remain disabled thorough out this function. | ||
270 | * This must always get called as the pair to kmmio_handler(). | ||
271 | */ | ||
272 | static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | ||
273 | { | ||
274 | int ret = 0; | ||
275 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | ||
276 | |||
277 | if (!ctx->active) { | ||
278 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", | ||
279 | smp_processor_id()); | ||
280 | goto out; | ||
281 | } | ||
282 | |||
283 | if (ctx->probe && ctx->probe->post_handler) | ||
284 | ctx->probe->post_handler(ctx->probe, condition, regs); | ||
285 | |||
286 | arm_kmmio_fault_page(ctx->fpage->page, NULL); | ||
287 | |||
288 | regs->flags &= ~X86_EFLAGS_TF; | ||
289 | regs->flags |= ctx->saved_flags; | ||
290 | |||
291 | /* These were acquired in kmmio_handler(). */ | ||
292 | ctx->active--; | ||
293 | BUG_ON(ctx->active); | ||
294 | rcu_read_unlock(); | ||
295 | preempt_enable_no_resched(); | ||
296 | |||
297 | /* | ||
298 | * if somebody else is singlestepping across a probe point, flags | ||
299 | * will have TF set, in which case, continue the remaining processing | ||
300 | * of do_debug, as if this is not a probe hit. | ||
301 | */ | ||
302 | if (!(regs->flags & X86_EFLAGS_TF)) | ||
303 | ret = 1; | ||
304 | out: | ||
305 | put_cpu_var(kmmio_ctx); | ||
306 | return ret; | ||
307 | } | ||
308 | |||
309 | /* You must be holding kmmio_lock. */ | ||
310 | static int add_kmmio_fault_page(unsigned long page) | ||
311 | { | ||
312 | struct kmmio_fault_page *f; | ||
313 | |||
314 | page &= PAGE_MASK; | ||
315 | f = get_kmmio_fault_page(page); | ||
316 | if (f) { | ||
317 | if (!f->count) | ||
318 | arm_kmmio_fault_page(f->page, NULL); | ||
319 | f->count++; | ||
320 | return 0; | ||
321 | } | ||
322 | |||
323 | f = kmalloc(sizeof(*f), GFP_ATOMIC); | ||
324 | if (!f) | ||
325 | return -1; | ||
326 | |||
327 | f->count = 1; | ||
328 | f->page = page; | ||
329 | list_add_rcu(&f->list, kmmio_page_list(f->page)); | ||
330 | |||
331 | arm_kmmio_fault_page(f->page, NULL); | ||
332 | |||
333 | return 0; | ||
334 | } | ||
335 | |||
336 | /* You must be holding kmmio_lock. */ | ||
337 | static void release_kmmio_fault_page(unsigned long page, | ||
338 | struct kmmio_fault_page **release_list) | ||
339 | { | ||
340 | struct kmmio_fault_page *f; | ||
341 | |||
342 | page &= PAGE_MASK; | ||
343 | f = get_kmmio_fault_page(page); | ||
344 | if (!f) | ||
345 | return; | ||
346 | |||
347 | f->count--; | ||
348 | BUG_ON(f->count < 0); | ||
349 | if (!f->count) { | ||
350 | disarm_kmmio_fault_page(f->page, NULL); | ||
351 | f->release_next = *release_list; | ||
352 | *release_list = f; | ||
353 | } | ||
354 | } | ||
355 | |||
356 | /* | ||
357 | * With page-unaligned ioremaps, one or two armed pages may contain | ||
358 | * addresses from outside the intended mapping. Events for these addresses | ||
359 | * are currently silently dropped. The events may result only from programming | ||
360 | * mistakes by accessing addresses before the beginning or past the end of a | ||
361 | * mapping. | ||
362 | */ | ||
363 | int register_kmmio_probe(struct kmmio_probe *p) | ||
364 | { | ||
365 | unsigned long flags; | ||
366 | int ret = 0; | ||
367 | unsigned long size = 0; | ||
368 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); | ||
369 | |||
370 | spin_lock_irqsave(&kmmio_lock, flags); | ||
371 | if (get_kmmio_probe(p->addr)) { | ||
372 | ret = -EEXIST; | ||
373 | goto out; | ||
374 | } | ||
375 | kmmio_count++; | ||
376 | list_add_rcu(&p->list, &kmmio_probes); | ||
377 | while (size < size_lim) { | ||
378 | if (add_kmmio_fault_page(p->addr + size)) | ||
379 | pr_err("kmmio: Unable to set page fault.\n"); | ||
380 | size += PAGE_SIZE; | ||
381 | } | ||
382 | out: | ||
383 | spin_unlock_irqrestore(&kmmio_lock, flags); | ||
384 | /* | ||
385 | * XXX: What should I do here? | ||
386 | * Here was a call to global_flush_tlb(), but it does not exist | ||
387 | * anymore. It seems it's not needed after all. | ||
388 | */ | ||
389 | return ret; | ||
390 | } | ||
391 | EXPORT_SYMBOL(register_kmmio_probe); | ||
392 | |||
393 | static void rcu_free_kmmio_fault_pages(struct rcu_head *head) | ||
394 | { | ||
395 | struct kmmio_delayed_release *dr = container_of( | ||
396 | head, | ||
397 | struct kmmio_delayed_release, | ||
398 | rcu); | ||
399 | struct kmmio_fault_page *p = dr->release_list; | ||
400 | while (p) { | ||
401 | struct kmmio_fault_page *next = p->release_next; | ||
402 | BUG_ON(p->count); | ||
403 | kfree(p); | ||
404 | p = next; | ||
405 | } | ||
406 | kfree(dr); | ||
407 | } | ||
408 | |||
409 | static void remove_kmmio_fault_pages(struct rcu_head *head) | ||
410 | { | ||
411 | struct kmmio_delayed_release *dr = container_of( | ||
412 | head, | ||
413 | struct kmmio_delayed_release, | ||
414 | rcu); | ||
415 | struct kmmio_fault_page *p = dr->release_list; | ||
416 | struct kmmio_fault_page **prevp = &dr->release_list; | ||
417 | unsigned long flags; | ||
418 | spin_lock_irqsave(&kmmio_lock, flags); | ||
419 | while (p) { | ||
420 | if (!p->count) | ||
421 | list_del_rcu(&p->list); | ||
422 | else | ||
423 | *prevp = p->release_next; | ||
424 | prevp = &p->release_next; | ||
425 | p = p->release_next; | ||
426 | } | ||
427 | spin_unlock_irqrestore(&kmmio_lock, flags); | ||
428 | /* This is the real RCU destroy call. */ | ||
429 | call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages); | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * Remove a kmmio probe. You have to synchronize_rcu() before you can be | ||
434 | * sure that the callbacks will not be called anymore. Only after that | ||
435 | * you may actually release your struct kmmio_probe. | ||
436 | * | ||
437 | * Unregistering a kmmio fault page has three steps: | ||
438 | * 1. release_kmmio_fault_page() | ||
439 | * Disarm the page, wait a grace period to let all faults finish. | ||
440 | * 2. remove_kmmio_fault_pages() | ||
441 | * Remove the pages from kmmio_page_table. | ||
442 | * 3. rcu_free_kmmio_fault_pages() | ||
443 | * Actally free the kmmio_fault_page structs as with RCU. | ||
444 | */ | ||
445 | void unregister_kmmio_probe(struct kmmio_probe *p) | ||
446 | { | ||
447 | unsigned long flags; | ||
448 | unsigned long size = 0; | ||
449 | const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK); | ||
450 | struct kmmio_fault_page *release_list = NULL; | ||
451 | struct kmmio_delayed_release *drelease; | ||
452 | |||
453 | spin_lock_irqsave(&kmmio_lock, flags); | ||
454 | while (size < size_lim) { | ||
455 | release_kmmio_fault_page(p->addr + size, &release_list); | ||
456 | size += PAGE_SIZE; | ||
457 | } | ||
458 | list_del_rcu(&p->list); | ||
459 | kmmio_count--; | ||
460 | spin_unlock_irqrestore(&kmmio_lock, flags); | ||
461 | |||
462 | drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC); | ||
463 | if (!drelease) { | ||
464 | pr_crit("kmmio: leaking kmmio_fault_page objects.\n"); | ||
465 | return; | ||
466 | } | ||
467 | drelease->release_list = release_list; | ||
468 | |||
469 | /* | ||
470 | * This is not really RCU here. We have just disarmed a set of | ||
471 | * pages so that they cannot trigger page faults anymore. However, | ||
472 | * we cannot remove the pages from kmmio_page_table, | ||
473 | * because a probe hit might be in flight on another CPU. The | ||
474 | * pages are collected into a list, and they will be removed from | ||
475 | * kmmio_page_table when it is certain that no probe hit related to | ||
476 | * these pages can be in flight. RCU grace period sounds like a | ||
477 | * good choice. | ||
478 | * | ||
479 | * If we removed the pages too early, kmmio page fault handler might | ||
480 | * not find the respective kmmio_fault_page and determine it's not | ||
481 | * a kmmio fault, when it actually is. This would lead to madness. | ||
482 | */ | ||
483 | call_rcu(&drelease->rcu, remove_kmmio_fault_pages); | ||
484 | } | ||
485 | EXPORT_SYMBOL(unregister_kmmio_probe); | ||
486 | |||
487 | static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, | ||
488 | void *args) | ||
489 | { | ||
490 | struct die_args *arg = args; | ||
491 | |||
492 | if (val == DIE_DEBUG && (arg->err & DR_STEP)) | ||
493 | if (post_kmmio_handler(arg->err, arg->regs) == 1) | ||
494 | return NOTIFY_STOP; | ||
495 | |||
496 | return NOTIFY_DONE; | ||
497 | } | ||
498 | |||
499 | static struct notifier_block nb_die = { | ||
500 | .notifier_call = kmmio_die_notifier | ||
501 | }; | ||
502 | |||
503 | static int __init init_kmmio(void) | ||
504 | { | ||
505 | int i; | ||
506 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) | ||
507 | INIT_LIST_HEAD(&kmmio_page_table[i]); | ||
508 | return register_die_notifier(&nb_die); | ||
509 | } | ||
510 | fs_initcall(init_kmmio); /* should be before device_initcall() */ | ||
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c new file mode 100644 index 000000000000..e7397e108beb --- /dev/null +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -0,0 +1,515 @@ | |||
1 | /* | ||
2 | * This program is free software; you can redistribute it and/or modify | ||
3 | * it under the terms of the GNU General Public License as published by | ||
4 | * the Free Software Foundation; either version 2 of the License, or | ||
5 | * (at your option) any later version. | ||
6 | * | ||
7 | * This program is distributed in the hope that it will be useful, | ||
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
10 | * GNU General Public License for more details. | ||
11 | * | ||
12 | * You should have received a copy of the GNU General Public License | ||
13 | * along with this program; if not, write to the Free Software | ||
14 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | ||
15 | * | ||
16 | * Copyright (C) IBM Corporation, 2005 | ||
17 | * Jeff Muizelaar, 2006, 2007 | ||
18 | * Pekka Paalanen, 2008 <pq@iki.fi> | ||
19 | * | ||
20 | * Derived from the read-mod example from relay-examples by Tom Zanussi. | ||
21 | */ | ||
22 | #define DEBUG 1 | ||
23 | |||
24 | #include <linux/module.h> | ||
25 | #include <linux/debugfs.h> | ||
26 | #include <linux/uaccess.h> | ||
27 | #include <linux/io.h> | ||
28 | #include <linux/version.h> | ||
29 | #include <linux/kallsyms.h> | ||
30 | #include <asm/pgtable.h> | ||
31 | #include <linux/mmiotrace.h> | ||
32 | #include <asm/e820.h> /* for ISA_START_ADDRESS */ | ||
33 | #include <asm/atomic.h> | ||
34 | #include <linux/percpu.h> | ||
35 | #include <linux/cpu.h> | ||
36 | |||
37 | #include "pf_in.h" | ||
38 | |||
39 | #define NAME "mmiotrace: " | ||
40 | |||
41 | struct trap_reason { | ||
42 | unsigned long addr; | ||
43 | unsigned long ip; | ||
44 | enum reason_type type; | ||
45 | int active_traces; | ||
46 | }; | ||
47 | |||
48 | struct remap_trace { | ||
49 | struct list_head list; | ||
50 | struct kmmio_probe probe; | ||
51 | resource_size_t phys; | ||
52 | unsigned long id; | ||
53 | }; | ||
54 | |||
55 | /* Accessed per-cpu. */ | ||
56 | static DEFINE_PER_CPU(struct trap_reason, pf_reason); | ||
57 | static DEFINE_PER_CPU(struct mmiotrace_rw, cpu_trace); | ||
58 | |||
59 | #if 0 /* XXX: no way gather this info anymore */ | ||
60 | /* Access to this is not per-cpu. */ | ||
61 | static DEFINE_PER_CPU(atomic_t, dropped); | ||
62 | #endif | ||
63 | |||
64 | static struct dentry *marker_file; | ||
65 | |||
66 | static DEFINE_MUTEX(mmiotrace_mutex); | ||
67 | static DEFINE_SPINLOCK(trace_lock); | ||
68 | static atomic_t mmiotrace_enabled; | ||
69 | static LIST_HEAD(trace_list); /* struct remap_trace */ | ||
70 | |||
71 | /* | ||
72 | * Locking in this file: | ||
73 | * - mmiotrace_mutex enforces enable/disable_mmiotrace() critical sections. | ||
74 | * - mmiotrace_enabled may be modified only when holding mmiotrace_mutex | ||
75 | * and trace_lock. | ||
76 | * - Routines depending on is_enabled() must take trace_lock. | ||
77 | * - trace_list users must hold trace_lock. | ||
78 | * - is_enabled() guarantees that mmio_trace_record is allowed. | ||
79 | * - pre/post callbacks assume the effect of is_enabled() being true. | ||
80 | */ | ||
81 | |||
82 | /* module parameters */ | ||
83 | static unsigned long filter_offset; | ||
84 | static int nommiotrace; | ||
85 | static int trace_pc; | ||
86 | |||
87 | module_param(filter_offset, ulong, 0); | ||
88 | module_param(nommiotrace, bool, 0); | ||
89 | module_param(trace_pc, bool, 0); | ||
90 | |||
91 | MODULE_PARM_DESC(filter_offset, "Start address of traced mappings."); | ||
92 | MODULE_PARM_DESC(nommiotrace, "Disable actual MMIO tracing."); | ||
93 | MODULE_PARM_DESC(trace_pc, "Record address of faulting instructions."); | ||
94 | |||
95 | static bool is_enabled(void) | ||
96 | { | ||
97 | return atomic_read(&mmiotrace_enabled); | ||
98 | } | ||
99 | |||
100 | #if 0 /* XXX: needs rewrite */ | ||
101 | /* | ||
102 | * Write callback for the debugfs entry: | ||
103 | * Read a marker and write it to the mmio trace log | ||
104 | */ | ||
105 | static ssize_t write_marker(struct file *file, const char __user *buffer, | ||
106 | size_t count, loff_t *ppos) | ||
107 | { | ||
108 | char *event = NULL; | ||
109 | struct mm_io_header *headp; | ||
110 | ssize_t len = (count > 65535) ? 65535 : count; | ||
111 | |||
112 | event = kzalloc(sizeof(*headp) + len, GFP_KERNEL); | ||
113 | if (!event) | ||
114 | return -ENOMEM; | ||
115 | |||
116 | headp = (struct mm_io_header *)event; | ||
117 | headp->type = MMIO_MAGIC | (MMIO_MARKER << MMIO_OPCODE_SHIFT); | ||
118 | headp->data_len = len; | ||
119 | |||
120 | if (copy_from_user(event + sizeof(*headp), buffer, len)) { | ||
121 | kfree(event); | ||
122 | return -EFAULT; | ||
123 | } | ||
124 | |||
125 | spin_lock_irq(&trace_lock); | ||
126 | #if 0 /* XXX: convert this to use tracing */ | ||
127 | if (is_enabled()) | ||
128 | relay_write(chan, event, sizeof(*headp) + len); | ||
129 | else | ||
130 | #endif | ||
131 | len = -EINVAL; | ||
132 | spin_unlock_irq(&trace_lock); | ||
133 | kfree(event); | ||
134 | return len; | ||
135 | } | ||
136 | #endif | ||
137 | |||
138 | static void print_pte(unsigned long address) | ||
139 | { | ||
140 | unsigned int level; | ||
141 | pte_t *pte = lookup_address(address, &level); | ||
142 | |||
143 | if (!pte) { | ||
144 | pr_err(NAME "Error in %s: no pte for page 0x%08lx\n", | ||
145 | __func__, address); | ||
146 | return; | ||
147 | } | ||
148 | |||
149 | if (level == PG_LEVEL_2M) { | ||
150 | pr_emerg(NAME "4MB pages are not currently supported: " | ||
151 | "0x%08lx\n", address); | ||
152 | BUG(); | ||
153 | } | ||
154 | pr_info(NAME "pte for 0x%lx: 0x%llx 0x%llx\n", address, | ||
155 | (unsigned long long)pte_val(*pte), | ||
156 | (unsigned long long)pte_val(*pte) & _PAGE_PRESENT); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * For some reason the pre/post pairs have been called in an | ||
161 | * unmatched order. Report and die. | ||
162 | */ | ||
163 | static void die_kmmio_nesting_error(struct pt_regs *regs, unsigned long addr) | ||
164 | { | ||
165 | const struct trap_reason *my_reason = &get_cpu_var(pf_reason); | ||
166 | pr_emerg(NAME "unexpected fault for address: 0x%08lx, " | ||
167 | "last fault for address: 0x%08lx\n", | ||
168 | addr, my_reason->addr); | ||
169 | print_pte(addr); | ||
170 | print_symbol(KERN_EMERG "faulting IP is at %s\n", regs->ip); | ||
171 | print_symbol(KERN_EMERG "last faulting IP was at %s\n", my_reason->ip); | ||
172 | #ifdef __i386__ | ||
173 | pr_emerg("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n", | ||
174 | regs->ax, regs->bx, regs->cx, regs->dx); | ||
175 | pr_emerg("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n", | ||
176 | regs->si, regs->di, regs->bp, regs->sp); | ||
177 | #else | ||
178 | pr_emerg("rax: %016lx rcx: %016lx rdx: %016lx\n", | ||
179 | regs->ax, regs->cx, regs->dx); | ||
180 | pr_emerg("rsi: %016lx rdi: %016lx rbp: %016lx rsp: %016lx\n", | ||
181 | regs->si, regs->di, regs->bp, regs->sp); | ||
182 | #endif | ||
183 | put_cpu_var(pf_reason); | ||
184 | BUG(); | ||
185 | } | ||
186 | |||
187 | static void pre(struct kmmio_probe *p, struct pt_regs *regs, | ||
188 | unsigned long addr) | ||
189 | { | ||
190 | struct trap_reason *my_reason = &get_cpu_var(pf_reason); | ||
191 | struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); | ||
192 | const unsigned long instptr = instruction_pointer(regs); | ||
193 | const enum reason_type type = get_ins_type(instptr); | ||
194 | struct remap_trace *trace = p->private; | ||
195 | |||
196 | /* it doesn't make sense to have more than one active trace per cpu */ | ||
197 | if (my_reason->active_traces) | ||
198 | die_kmmio_nesting_error(regs, addr); | ||
199 | else | ||
200 | my_reason->active_traces++; | ||
201 | |||
202 | my_reason->type = type; | ||
203 | my_reason->addr = addr; | ||
204 | my_reason->ip = instptr; | ||
205 | |||
206 | my_trace->phys = addr - trace->probe.addr + trace->phys; | ||
207 | my_trace->map_id = trace->id; | ||
208 | |||
209 | /* | ||
210 | * Only record the program counter when requested. | ||
211 | * It may taint clean-room reverse engineering. | ||
212 | */ | ||
213 | if (trace_pc) | ||
214 | my_trace->pc = instptr; | ||
215 | else | ||
216 | my_trace->pc = 0; | ||
217 | |||
218 | /* | ||
219 | * XXX: the timestamp recorded will be *after* the tracing has been | ||
220 | * done, not at the time we hit the instruction. SMP implications | ||
221 | * on event ordering? | ||
222 | */ | ||
223 | |||
224 | switch (type) { | ||
225 | case REG_READ: | ||
226 | my_trace->opcode = MMIO_READ; | ||
227 | my_trace->width = get_ins_mem_width(instptr); | ||
228 | break; | ||
229 | case REG_WRITE: | ||
230 | my_trace->opcode = MMIO_WRITE; | ||
231 | my_trace->width = get_ins_mem_width(instptr); | ||
232 | my_trace->value = get_ins_reg_val(instptr, regs); | ||
233 | break; | ||
234 | case IMM_WRITE: | ||
235 | my_trace->opcode = MMIO_WRITE; | ||
236 | my_trace->width = get_ins_mem_width(instptr); | ||
237 | my_trace->value = get_ins_imm_val(instptr); | ||
238 | break; | ||
239 | default: | ||
240 | { | ||
241 | unsigned char *ip = (unsigned char *)instptr; | ||
242 | my_trace->opcode = MMIO_UNKNOWN_OP; | ||
243 | my_trace->width = 0; | ||
244 | my_trace->value = (*ip) << 16 | *(ip + 1) << 8 | | ||
245 | *(ip + 2); | ||
246 | } | ||
247 | } | ||
248 | put_cpu_var(cpu_trace); | ||
249 | put_cpu_var(pf_reason); | ||
250 | } | ||
251 | |||
252 | static void post(struct kmmio_probe *p, unsigned long condition, | ||
253 | struct pt_regs *regs) | ||
254 | { | ||
255 | struct trap_reason *my_reason = &get_cpu_var(pf_reason); | ||
256 | struct mmiotrace_rw *my_trace = &get_cpu_var(cpu_trace); | ||
257 | |||
258 | /* this should always return the active_trace count to 0 */ | ||
259 | my_reason->active_traces--; | ||
260 | if (my_reason->active_traces) { | ||
261 | pr_emerg(NAME "unexpected post handler"); | ||
262 | BUG(); | ||
263 | } | ||
264 | |||
265 | switch (my_reason->type) { | ||
266 | case REG_READ: | ||
267 | my_trace->value = get_ins_reg_val(my_reason->ip, regs); | ||
268 | break; | ||
269 | default: | ||
270 | break; | ||
271 | } | ||
272 | |||
273 | mmio_trace_rw(my_trace); | ||
274 | put_cpu_var(cpu_trace); | ||
275 | put_cpu_var(pf_reason); | ||
276 | } | ||
277 | |||
278 | static void ioremap_trace_core(resource_size_t offset, unsigned long size, | ||
279 | void __iomem *addr) | ||
280 | { | ||
281 | static atomic_t next_id; | ||
282 | struct remap_trace *trace = kmalloc(sizeof(*trace), GFP_KERNEL); | ||
283 | /* These are page-unaligned. */ | ||
284 | struct mmiotrace_map map = { | ||
285 | .phys = offset, | ||
286 | .virt = (unsigned long)addr, | ||
287 | .len = size, | ||
288 | .opcode = MMIO_PROBE | ||
289 | }; | ||
290 | |||
291 | if (!trace) { | ||
292 | pr_err(NAME "kmalloc failed in ioremap\n"); | ||
293 | return; | ||
294 | } | ||
295 | |||
296 | *trace = (struct remap_trace) { | ||
297 | .probe = { | ||
298 | .addr = (unsigned long)addr, | ||
299 | .len = size, | ||
300 | .pre_handler = pre, | ||
301 | .post_handler = post, | ||
302 | .private = trace | ||
303 | }, | ||
304 | .phys = offset, | ||
305 | .id = atomic_inc_return(&next_id) | ||
306 | }; | ||
307 | map.map_id = trace->id; | ||
308 | |||
309 | spin_lock_irq(&trace_lock); | ||
310 | if (!is_enabled()) | ||
311 | goto not_enabled; | ||
312 | |||
313 | mmio_trace_mapping(&map); | ||
314 | list_add_tail(&trace->list, &trace_list); | ||
315 | if (!nommiotrace) | ||
316 | register_kmmio_probe(&trace->probe); | ||
317 | |||
318 | not_enabled: | ||
319 | spin_unlock_irq(&trace_lock); | ||
320 | } | ||
321 | |||
322 | void mmiotrace_ioremap(resource_size_t offset, unsigned long size, | ||
323 | void __iomem *addr) | ||
324 | { | ||
325 | if (!is_enabled()) /* recheck and proper locking in *_core() */ | ||
326 | return; | ||
327 | |||
328 | pr_debug(NAME "ioremap_*(0x%llx, 0x%lx) = %p\n", | ||
329 | (unsigned long long)offset, size, addr); | ||
330 | if ((filter_offset) && (offset != filter_offset)) | ||
331 | return; | ||
332 | ioremap_trace_core(offset, size, addr); | ||
333 | } | ||
334 | |||
335 | static void iounmap_trace_core(volatile void __iomem *addr) | ||
336 | { | ||
337 | struct mmiotrace_map map = { | ||
338 | .phys = 0, | ||
339 | .virt = (unsigned long)addr, | ||
340 | .len = 0, | ||
341 | .opcode = MMIO_UNPROBE | ||
342 | }; | ||
343 | struct remap_trace *trace; | ||
344 | struct remap_trace *tmp; | ||
345 | struct remap_trace *found_trace = NULL; | ||
346 | |||
347 | pr_debug(NAME "Unmapping %p.\n", addr); | ||
348 | |||
349 | spin_lock_irq(&trace_lock); | ||
350 | if (!is_enabled()) | ||
351 | goto not_enabled; | ||
352 | |||
353 | list_for_each_entry_safe(trace, tmp, &trace_list, list) { | ||
354 | if ((unsigned long)addr == trace->probe.addr) { | ||
355 | if (!nommiotrace) | ||
356 | unregister_kmmio_probe(&trace->probe); | ||
357 | list_del(&trace->list); | ||
358 | found_trace = trace; | ||
359 | break; | ||
360 | } | ||
361 | } | ||
362 | map.map_id = (found_trace) ? found_trace->id : -1; | ||
363 | mmio_trace_mapping(&map); | ||
364 | |||
365 | not_enabled: | ||
366 | spin_unlock_irq(&trace_lock); | ||
367 | if (found_trace) { | ||
368 | synchronize_rcu(); /* unregister_kmmio_probe() requirement */ | ||
369 | kfree(found_trace); | ||
370 | } | ||
371 | } | ||
372 | |||
373 | void mmiotrace_iounmap(volatile void __iomem *addr) | ||
374 | { | ||
375 | might_sleep(); | ||
376 | if (is_enabled()) /* recheck and proper locking in *_core() */ | ||
377 | iounmap_trace_core(addr); | ||
378 | } | ||
379 | |||
380 | static void clear_trace_list(void) | ||
381 | { | ||
382 | struct remap_trace *trace; | ||
383 | struct remap_trace *tmp; | ||
384 | |||
385 | /* | ||
386 | * No locking required, because the caller ensures we are in a | ||
387 | * critical section via mutex, and is_enabled() is false, | ||
388 | * i.e. nothing can traverse or modify this list. | ||
389 | * Caller also ensures is_enabled() cannot change. | ||
390 | */ | ||
391 | list_for_each_entry(trace, &trace_list, list) { | ||
392 | pr_notice(NAME "purging non-iounmapped " | ||
393 | "trace @0x%08lx, size 0x%lx.\n", | ||
394 | trace->probe.addr, trace->probe.len); | ||
395 | if (!nommiotrace) | ||
396 | unregister_kmmio_probe(&trace->probe); | ||
397 | } | ||
398 | synchronize_rcu(); /* unregister_kmmio_probe() requirement */ | ||
399 | |||
400 | list_for_each_entry_safe(trace, tmp, &trace_list, list) { | ||
401 | list_del(&trace->list); | ||
402 | kfree(trace); | ||
403 | } | ||
404 | } | ||
405 | |||
406 | #ifdef CONFIG_HOTPLUG_CPU | ||
407 | static cpumask_t downed_cpus; | ||
408 | |||
409 | static void enter_uniprocessor(void) | ||
410 | { | ||
411 | int cpu; | ||
412 | int err; | ||
413 | |||
414 | get_online_cpus(); | ||
415 | downed_cpus = cpu_online_map; | ||
416 | cpu_clear(first_cpu(cpu_online_map), downed_cpus); | ||
417 | if (num_online_cpus() > 1) | ||
418 | pr_notice(NAME "Disabling non-boot CPUs...\n"); | ||
419 | put_online_cpus(); | ||
420 | |||
421 | for_each_cpu_mask(cpu, downed_cpus) { | ||
422 | err = cpu_down(cpu); | ||
423 | if (!err) | ||
424 | pr_info(NAME "CPU%d is down.\n", cpu); | ||
425 | else | ||
426 | pr_err(NAME "Error taking CPU%d down: %d\n", cpu, err); | ||
427 | } | ||
428 | if (num_online_cpus() > 1) | ||
429 | pr_warning(NAME "multiple CPUs still online, " | ||
430 | "may miss events.\n"); | ||
431 | } | ||
432 | |||
433 | static void leave_uniprocessor(void) | ||
434 | { | ||
435 | int cpu; | ||
436 | int err; | ||
437 | |||
438 | if (cpus_weight(downed_cpus) == 0) | ||
439 | return; | ||
440 | pr_notice(NAME "Re-enabling CPUs...\n"); | ||
441 | for_each_cpu_mask(cpu, downed_cpus) { | ||
442 | err = cpu_up(cpu); | ||
443 | if (!err) | ||
444 | pr_info(NAME "enabled CPU%d.\n", cpu); | ||
445 | else | ||
446 | pr_err(NAME "cannot re-enable CPU%d: %d\n", cpu, err); | ||
447 | } | ||
448 | } | ||
449 | |||
450 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
451 | static void enter_uniprocessor(void) | ||
452 | { | ||
453 | if (num_online_cpus() > 1) | ||
454 | pr_warning(NAME "multiple CPUs are online, may miss events. " | ||
455 | "Suggest booting with maxcpus=1 kernel argument.\n"); | ||
456 | } | ||
457 | |||
458 | static void leave_uniprocessor(void) | ||
459 | { | ||
460 | } | ||
461 | #endif | ||
462 | |||
463 | #if 0 /* XXX: out of order */ | ||
464 | static struct file_operations fops_marker = { | ||
465 | .owner = THIS_MODULE, | ||
466 | .write = write_marker | ||
467 | }; | ||
468 | #endif | ||
469 | |||
470 | void enable_mmiotrace(void) | ||
471 | { | ||
472 | mutex_lock(&mmiotrace_mutex); | ||
473 | if (is_enabled()) | ||
474 | goto out; | ||
475 | |||
476 | #if 0 /* XXX: tracing does not support text entries */ | ||
477 | marker_file = debugfs_create_file("marker", 0660, dir, NULL, | ||
478 | &fops_marker); | ||
479 | if (!marker_file) | ||
480 | pr_err(NAME "marker file creation failed.\n"); | ||
481 | #endif | ||
482 | |||
483 | if (nommiotrace) | ||
484 | pr_info(NAME "MMIO tracing disabled.\n"); | ||
485 | enter_uniprocessor(); | ||
486 | spin_lock_irq(&trace_lock); | ||
487 | atomic_inc(&mmiotrace_enabled); | ||
488 | spin_unlock_irq(&trace_lock); | ||
489 | pr_info(NAME "enabled.\n"); | ||
490 | out: | ||
491 | mutex_unlock(&mmiotrace_mutex); | ||
492 | } | ||
493 | |||
494 | void disable_mmiotrace(void) | ||
495 | { | ||
496 | mutex_lock(&mmiotrace_mutex); | ||
497 | if (!is_enabled()) | ||
498 | goto out; | ||
499 | |||
500 | spin_lock_irq(&trace_lock); | ||
501 | atomic_dec(&mmiotrace_enabled); | ||
502 | BUG_ON(is_enabled()); | ||
503 | spin_unlock_irq(&trace_lock); | ||
504 | |||
505 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ | ||
506 | leave_uniprocessor(); | ||
507 | if (marker_file) { | ||
508 | debugfs_remove(marker_file); | ||
509 | marker_file = NULL; | ||
510 | } | ||
511 | |||
512 | pr_info(NAME "disabled.\n"); | ||
513 | out: | ||
514 | mutex_unlock(&mmiotrace_mutex); | ||
515 | } | ||
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index afd40054d157..65c6e46bf059 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -141,7 +141,7 @@ static void cpa_flush_all(unsigned long cache) | |||
141 | { | 141 | { |
142 | BUG_ON(irqs_disabled()); | 142 | BUG_ON(irqs_disabled()); |
143 | 143 | ||
144 | on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1); | 144 | on_each_cpu(__cpa_flush_all, (void *) cache, 1); |
145 | } | 145 | } |
146 | 146 | ||
147 | static void __cpa_flush_range(void *arg) | 147 | static void __cpa_flush_range(void *arg) |
@@ -162,7 +162,7 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache) | |||
162 | BUG_ON(irqs_disabled()); | 162 | BUG_ON(irqs_disabled()); |
163 | WARN_ON(PAGE_ALIGN(start) != start); | 163 | WARN_ON(PAGE_ALIGN(start) != start); |
164 | 164 | ||
165 | on_each_cpu(__cpa_flush_range, NULL, 1, 1); | 165 | on_each_cpu(__cpa_flush_range, NULL, 1); |
166 | 166 | ||
167 | if (!cache) | 167 | if (!cache) |
168 | return; | 168 | return; |
@@ -262,6 +262,7 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) | |||
262 | 262 | ||
263 | return pte_offset_kernel(pmd, address); | 263 | return pte_offset_kernel(pmd, address); |
264 | } | 264 | } |
265 | EXPORT_SYMBOL_GPL(lookup_address); | ||
265 | 266 | ||
266 | /* | 267 | /* |
267 | * Set the new pmd in all the pgds we know about: | 268 | * Set the new pmd in all the pgds we know about: |
@@ -536,8 +537,14 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
536 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); | 537 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); |
537 | 538 | ||
538 | if (address >= (unsigned long)__va(0) && | 539 | if (address >= (unsigned long)__va(0) && |
540 | address < (unsigned long)__va(max_low_pfn_mapped << PAGE_SHIFT)) | ||
541 | split_page_count(level); | ||
542 | |||
543 | #ifdef CONFIG_X86_64 | ||
544 | if (address >= (unsigned long)__va(1UL<<32) && | ||
539 | address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) | 545 | address < (unsigned long)__va(max_pfn_mapped << PAGE_SHIFT)) |
540 | split_page_count(level); | 546 | split_page_count(level); |
547 | #endif | ||
541 | 548 | ||
542 | /* | 549 | /* |
543 | * Install the new, split up pagetable. Important details here: | 550 | * Install the new, split up pagetable. Important details here: |
@@ -652,15 +659,24 @@ static int cpa_process_alias(struct cpa_data *cpa) | |||
652 | struct cpa_data alias_cpa; | 659 | struct cpa_data alias_cpa; |
653 | int ret = 0; | 660 | int ret = 0; |
654 | 661 | ||
655 | if (cpa->pfn > max_pfn_mapped) | 662 | if (cpa->pfn >= max_pfn_mapped) |
656 | return 0; | 663 | return 0; |
657 | 664 | ||
665 | #ifdef CONFIG_X86_64 | ||
666 | if (cpa->pfn >= max_low_pfn_mapped && cpa->pfn < (1UL<<(32-PAGE_SHIFT))) | ||
667 | return 0; | ||
668 | #endif | ||
658 | /* | 669 | /* |
659 | * No need to redo, when the primary call touched the direct | 670 | * No need to redo, when the primary call touched the direct |
660 | * mapping already: | 671 | * mapping already: |
661 | */ | 672 | */ |
662 | if (!within(cpa->vaddr, PAGE_OFFSET, | 673 | if (!(within(cpa->vaddr, PAGE_OFFSET, |
663 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) { | 674 | PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) |
675 | #ifdef CONFIG_X86_64 | ||
676 | || within(cpa->vaddr, PAGE_OFFSET + (1UL<<32), | ||
677 | PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)) | ||
678 | #endif | ||
679 | )) { | ||
664 | 680 | ||
665 | alias_cpa = *cpa; | 681 | alias_cpa = *cpa; |
666 | alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); | 682 | alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index a885a1019b8a..d4585077977a 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -449,7 +449,8 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
449 | if (retval < 0) | 449 | if (retval < 0) |
450 | return 0; | 450 | return 0; |
451 | 451 | ||
452 | if (pfn <= max_pfn_mapped && | 452 | if (((pfn < max_low_pfn_mapped) || |
453 | (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) && | ||
453 | ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { | 454 | ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) { |
454 | free_memtype(offset, offset + size); | 455 | free_memtype(offset, offset + size); |
455 | printk(KERN_INFO | 456 | printk(KERN_INFO |
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c new file mode 100644 index 000000000000..efa1911e20ca --- /dev/null +++ b/arch/x86/mm/pf_in.c | |||
@@ -0,0 +1,489 @@ | |||
1 | /* | ||
2 | * Fault Injection Test harness (FI) | ||
3 | * Copyright (C) Intel Crop. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version 2 | ||
8 | * of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | ||
18 | * USA. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | /* Id: pf_in.c,v 1.1.1.1 2002/11/12 05:56:32 brlock Exp | ||
23 | * Copyright by Intel Crop., 2002 | ||
24 | * Louis Zhuang (louis.zhuang@intel.com) | ||
25 | * | ||
26 | * Bjorn Steinbrink (B.Steinbrink@gmx.de), 2007 | ||
27 | */ | ||
28 | |||
29 | #include <linux/module.h> | ||
30 | #include <linux/ptrace.h> /* struct pt_regs */ | ||
31 | #include "pf_in.h" | ||
32 | |||
33 | #ifdef __i386__ | ||
34 | /* IA32 Manual 3, 2-1 */ | ||
35 | static unsigned char prefix_codes[] = { | ||
36 | 0xF0, 0xF2, 0xF3, 0x2E, 0x36, 0x3E, 0x26, 0x64, | ||
37 | 0x65, 0x2E, 0x3E, 0x66, 0x67 | ||
38 | }; | ||
39 | /* IA32 Manual 3, 3-432*/ | ||
40 | static unsigned int reg_rop[] = { | ||
41 | 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F | ||
42 | }; | ||
43 | static unsigned int reg_wop[] = { 0x88, 0x89 }; | ||
44 | static unsigned int imm_wop[] = { 0xC6, 0xC7 }; | ||
45 | /* IA32 Manual 3, 3-432*/ | ||
46 | static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 }; | ||
47 | static unsigned int rw32[] = { | ||
48 | 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F | ||
49 | }; | ||
50 | static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F }; | ||
51 | static unsigned int mw16[] = { 0xB70F, 0xBF0F }; | ||
52 | static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 }; | ||
53 | static unsigned int mw64[] = {}; | ||
54 | #else /* not __i386__ */ | ||
55 | static unsigned char prefix_codes[] = { | ||
56 | 0x66, 0x67, 0x2E, 0x3E, 0x26, 0x64, 0x65, 0x36, | ||
57 | 0xF0, 0xF3, 0xF2, | ||
58 | /* REX Prefixes */ | ||
59 | 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, | ||
60 | 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f | ||
61 | }; | ||
62 | /* AMD64 Manual 3, Appendix A*/ | ||
63 | static unsigned int reg_rop[] = { | ||
64 | 0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F | ||
65 | }; | ||
66 | static unsigned int reg_wop[] = { 0x88, 0x89 }; | ||
67 | static unsigned int imm_wop[] = { 0xC6, 0xC7 }; | ||
68 | static unsigned int rw8[] = { 0xC6, 0x88, 0x8A }; | ||
69 | static unsigned int rw32[] = { | ||
70 | 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F | ||
71 | }; | ||
72 | /* 8 bit only */ | ||
73 | static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F }; | ||
74 | /* 16 bit only */ | ||
75 | static unsigned int mw16[] = { 0xB70F, 0xBF0F }; | ||
76 | /* 16 or 32 bit */ | ||
77 | static unsigned int mw32[] = { 0xC7 }; | ||
78 | /* 16, 32 or 64 bit */ | ||
79 | static unsigned int mw64[] = { 0x89, 0x8B }; | ||
80 | #endif /* not __i386__ */ | ||
81 | |||
82 | static int skip_prefix(unsigned char *addr, int *shorted, int *enlarged, | ||
83 | int *rexr) | ||
84 | { | ||
85 | int i; | ||
86 | unsigned char *p = addr; | ||
87 | *shorted = 0; | ||
88 | *enlarged = 0; | ||
89 | *rexr = 0; | ||
90 | |||
91 | restart: | ||
92 | for (i = 0; i < ARRAY_SIZE(prefix_codes); i++) { | ||
93 | if (*p == prefix_codes[i]) { | ||
94 | if (*p == 0x66) | ||
95 | *shorted = 1; | ||
96 | #ifdef __amd64__ | ||
97 | if ((*p & 0xf8) == 0x48) | ||
98 | *enlarged = 1; | ||
99 | if ((*p & 0xf4) == 0x44) | ||
100 | *rexr = 1; | ||
101 | #endif | ||
102 | p++; | ||
103 | goto restart; | ||
104 | } | ||
105 | } | ||
106 | |||
107 | return (p - addr); | ||
108 | } | ||
109 | |||
110 | static int get_opcode(unsigned char *addr, unsigned int *opcode) | ||
111 | { | ||
112 | int len; | ||
113 | |||
114 | if (*addr == 0x0F) { | ||
115 | /* 0x0F is extension instruction */ | ||
116 | *opcode = *(unsigned short *)addr; | ||
117 | len = 2; | ||
118 | } else { | ||
119 | *opcode = *addr; | ||
120 | len = 1; | ||
121 | } | ||
122 | |||
123 | return len; | ||
124 | } | ||
125 | |||
126 | #define CHECK_OP_TYPE(opcode, array, type) \ | ||
127 | for (i = 0; i < ARRAY_SIZE(array); i++) { \ | ||
128 | if (array[i] == opcode) { \ | ||
129 | rv = type; \ | ||
130 | goto exit; \ | ||
131 | } \ | ||
132 | } | ||
133 | |||
134 | enum reason_type get_ins_type(unsigned long ins_addr) | ||
135 | { | ||
136 | unsigned int opcode; | ||
137 | unsigned char *p; | ||
138 | int shorted, enlarged, rexr; | ||
139 | int i; | ||
140 | enum reason_type rv = OTHERS; | ||
141 | |||
142 | p = (unsigned char *)ins_addr; | ||
143 | p += skip_prefix(p, &shorted, &enlarged, &rexr); | ||
144 | p += get_opcode(p, &opcode); | ||
145 | |||
146 | CHECK_OP_TYPE(opcode, reg_rop, REG_READ); | ||
147 | CHECK_OP_TYPE(opcode, reg_wop, REG_WRITE); | ||
148 | CHECK_OP_TYPE(opcode, imm_wop, IMM_WRITE); | ||
149 | |||
150 | exit: | ||
151 | return rv; | ||
152 | } | ||
153 | #undef CHECK_OP_TYPE | ||
154 | |||
155 | static unsigned int get_ins_reg_width(unsigned long ins_addr) | ||
156 | { | ||
157 | unsigned int opcode; | ||
158 | unsigned char *p; | ||
159 | int i, shorted, enlarged, rexr; | ||
160 | |||
161 | p = (unsigned char *)ins_addr; | ||
162 | p += skip_prefix(p, &shorted, &enlarged, &rexr); | ||
163 | p += get_opcode(p, &opcode); | ||
164 | |||
165 | for (i = 0; i < ARRAY_SIZE(rw8); i++) | ||
166 | if (rw8[i] == opcode) | ||
167 | return 1; | ||
168 | |||
169 | for (i = 0; i < ARRAY_SIZE(rw32); i++) | ||
170 | if (rw32[i] == opcode) | ||
171 | return (shorted ? 2 : (enlarged ? 8 : 4)); | ||
172 | |||
173 | printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); | ||
174 | return 0; | ||
175 | } | ||
176 | |||
177 | unsigned int get_ins_mem_width(unsigned long ins_addr) | ||
178 | { | ||
179 | unsigned int opcode; | ||
180 | unsigned char *p; | ||
181 | int i, shorted, enlarged, rexr; | ||
182 | |||
183 | p = (unsigned char *)ins_addr; | ||
184 | p += skip_prefix(p, &shorted, &enlarged, &rexr); | ||
185 | p += get_opcode(p, &opcode); | ||
186 | |||
187 | for (i = 0; i < ARRAY_SIZE(mw8); i++) | ||
188 | if (mw8[i] == opcode) | ||
189 | return 1; | ||
190 | |||
191 | for (i = 0; i < ARRAY_SIZE(mw16); i++) | ||
192 | if (mw16[i] == opcode) | ||
193 | return 2; | ||
194 | |||
195 | for (i = 0; i < ARRAY_SIZE(mw32); i++) | ||
196 | if (mw32[i] == opcode) | ||
197 | return shorted ? 2 : 4; | ||
198 | |||
199 | for (i = 0; i < ARRAY_SIZE(mw64); i++) | ||
200 | if (mw64[i] == opcode) | ||
201 | return shorted ? 2 : (enlarged ? 8 : 4); | ||
202 | |||
203 | printk(KERN_ERR "mmiotrace: Unknown opcode 0x%02x\n", opcode); | ||
204 | return 0; | ||
205 | } | ||
206 | |||
207 | /* | ||
208 | * Define register ident in mod/rm byte. | ||
209 | * Note: these are NOT the same as in ptrace-abi.h. | ||
210 | */ | ||
211 | enum { | ||
212 | arg_AL = 0, | ||
213 | arg_CL = 1, | ||
214 | arg_DL = 2, | ||
215 | arg_BL = 3, | ||
216 | arg_AH = 4, | ||
217 | arg_CH = 5, | ||
218 | arg_DH = 6, | ||
219 | arg_BH = 7, | ||
220 | |||
221 | arg_AX = 0, | ||
222 | arg_CX = 1, | ||
223 | arg_DX = 2, | ||
224 | arg_BX = 3, | ||
225 | arg_SP = 4, | ||
226 | arg_BP = 5, | ||
227 | arg_SI = 6, | ||
228 | arg_DI = 7, | ||
229 | #ifdef __amd64__ | ||
230 | arg_R8 = 8, | ||
231 | arg_R9 = 9, | ||
232 | arg_R10 = 10, | ||
233 | arg_R11 = 11, | ||
234 | arg_R12 = 12, | ||
235 | arg_R13 = 13, | ||
236 | arg_R14 = 14, | ||
237 | arg_R15 = 15 | ||
238 | #endif | ||
239 | }; | ||
240 | |||
241 | static unsigned char *get_reg_w8(int no, struct pt_regs *regs) | ||
242 | { | ||
243 | unsigned char *rv = NULL; | ||
244 | |||
245 | switch (no) { | ||
246 | case arg_AL: | ||
247 | rv = (unsigned char *)®s->ax; | ||
248 | break; | ||
249 | case arg_BL: | ||
250 | rv = (unsigned char *)®s->bx; | ||
251 | break; | ||
252 | case arg_CL: | ||
253 | rv = (unsigned char *)®s->cx; | ||
254 | break; | ||
255 | case arg_DL: | ||
256 | rv = (unsigned char *)®s->dx; | ||
257 | break; | ||
258 | case arg_AH: | ||
259 | rv = 1 + (unsigned char *)®s->ax; | ||
260 | break; | ||
261 | case arg_BH: | ||
262 | rv = 1 + (unsigned char *)®s->bx; | ||
263 | break; | ||
264 | case arg_CH: | ||
265 | rv = 1 + (unsigned char *)®s->cx; | ||
266 | break; | ||
267 | case arg_DH: | ||
268 | rv = 1 + (unsigned char *)®s->dx; | ||
269 | break; | ||
270 | #ifdef __amd64__ | ||
271 | case arg_R8: | ||
272 | rv = (unsigned char *)®s->r8; | ||
273 | break; | ||
274 | case arg_R9: | ||
275 | rv = (unsigned char *)®s->r9; | ||
276 | break; | ||
277 | case arg_R10: | ||
278 | rv = (unsigned char *)®s->r10; | ||
279 | break; | ||
280 | case arg_R11: | ||
281 | rv = (unsigned char *)®s->r11; | ||
282 | break; | ||
283 | case arg_R12: | ||
284 | rv = (unsigned char *)®s->r12; | ||
285 | break; | ||
286 | case arg_R13: | ||
287 | rv = (unsigned char *)®s->r13; | ||
288 | break; | ||
289 | case arg_R14: | ||
290 | rv = (unsigned char *)®s->r14; | ||
291 | break; | ||
292 | case arg_R15: | ||
293 | rv = (unsigned char *)®s->r15; | ||
294 | break; | ||
295 | #endif | ||
296 | default: | ||
297 | printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); | ||
298 | break; | ||
299 | } | ||
300 | return rv; | ||
301 | } | ||
302 | |||
303 | static unsigned long *get_reg_w32(int no, struct pt_regs *regs) | ||
304 | { | ||
305 | unsigned long *rv = NULL; | ||
306 | |||
307 | switch (no) { | ||
308 | case arg_AX: | ||
309 | rv = ®s->ax; | ||
310 | break; | ||
311 | case arg_BX: | ||
312 | rv = ®s->bx; | ||
313 | break; | ||
314 | case arg_CX: | ||
315 | rv = ®s->cx; | ||
316 | break; | ||
317 | case arg_DX: | ||
318 | rv = ®s->dx; | ||
319 | break; | ||
320 | case arg_SP: | ||
321 | rv = ®s->sp; | ||
322 | break; | ||
323 | case arg_BP: | ||
324 | rv = ®s->bp; | ||
325 | break; | ||
326 | case arg_SI: | ||
327 | rv = ®s->si; | ||
328 | break; | ||
329 | case arg_DI: | ||
330 | rv = ®s->di; | ||
331 | break; | ||
332 | #ifdef __amd64__ | ||
333 | case arg_R8: | ||
334 | rv = ®s->r8; | ||
335 | break; | ||
336 | case arg_R9: | ||
337 | rv = ®s->r9; | ||
338 | break; | ||
339 | case arg_R10: | ||
340 | rv = ®s->r10; | ||
341 | break; | ||
342 | case arg_R11: | ||
343 | rv = ®s->r11; | ||
344 | break; | ||
345 | case arg_R12: | ||
346 | rv = ®s->r12; | ||
347 | break; | ||
348 | case arg_R13: | ||
349 | rv = ®s->r13; | ||
350 | break; | ||
351 | case arg_R14: | ||
352 | rv = ®s->r14; | ||
353 | break; | ||
354 | case arg_R15: | ||
355 | rv = ®s->r15; | ||
356 | break; | ||
357 | #endif | ||
358 | default: | ||
359 | printk(KERN_ERR "mmiotrace: Error reg no# %d\n", no); | ||
360 | } | ||
361 | |||
362 | return rv; | ||
363 | } | ||
364 | |||
365 | unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs) | ||
366 | { | ||
367 | unsigned int opcode; | ||
368 | unsigned char mod_rm; | ||
369 | int reg; | ||
370 | unsigned char *p; | ||
371 | int i, shorted, enlarged, rexr; | ||
372 | unsigned long rv; | ||
373 | |||
374 | p = (unsigned char *)ins_addr; | ||
375 | p += skip_prefix(p, &shorted, &enlarged, &rexr); | ||
376 | p += get_opcode(p, &opcode); | ||
377 | for (i = 0; i < ARRAY_SIZE(reg_rop); i++) | ||
378 | if (reg_rop[i] == opcode) { | ||
379 | rv = REG_READ; | ||
380 | goto do_work; | ||
381 | } | ||
382 | |||
383 | for (i = 0; i < ARRAY_SIZE(reg_wop); i++) | ||
384 | if (reg_wop[i] == opcode) { | ||
385 | rv = REG_WRITE; | ||
386 | goto do_work; | ||
387 | } | ||
388 | |||
389 | printk(KERN_ERR "mmiotrace: Not a register instruction, opcode " | ||
390 | "0x%02x\n", opcode); | ||
391 | goto err; | ||
392 | |||
393 | do_work: | ||
394 | mod_rm = *p; | ||
395 | reg = ((mod_rm >> 3) & 0x7) | (rexr << 3); | ||
396 | switch (get_ins_reg_width(ins_addr)) { | ||
397 | case 1: | ||
398 | return *get_reg_w8(reg, regs); | ||
399 | |||
400 | case 2: | ||
401 | return *(unsigned short *)get_reg_w32(reg, regs); | ||
402 | |||
403 | case 4: | ||
404 | return *(unsigned int *)get_reg_w32(reg, regs); | ||
405 | |||
406 | #ifdef __amd64__ | ||
407 | case 8: | ||
408 | return *(unsigned long *)get_reg_w32(reg, regs); | ||
409 | #endif | ||
410 | |||
411 | default: | ||
412 | printk(KERN_ERR "mmiotrace: Error width# %d\n", reg); | ||
413 | } | ||
414 | |||
415 | err: | ||
416 | return 0; | ||
417 | } | ||
418 | |||
419 | unsigned long get_ins_imm_val(unsigned long ins_addr) | ||
420 | { | ||
421 | unsigned int opcode; | ||
422 | unsigned char mod_rm; | ||
423 | unsigned char mod; | ||
424 | unsigned char *p; | ||
425 | int i, shorted, enlarged, rexr; | ||
426 | unsigned long rv; | ||
427 | |||
428 | p = (unsigned char *)ins_addr; | ||
429 | p += skip_prefix(p, &shorted, &enlarged, &rexr); | ||
430 | p += get_opcode(p, &opcode); | ||
431 | for (i = 0; i < ARRAY_SIZE(imm_wop); i++) | ||
432 | if (imm_wop[i] == opcode) { | ||
433 | rv = IMM_WRITE; | ||
434 | goto do_work; | ||
435 | } | ||
436 | |||
437 | printk(KERN_ERR "mmiotrace: Not an immediate instruction, opcode " | ||
438 | "0x%02x\n", opcode); | ||
439 | goto err; | ||
440 | |||
441 | do_work: | ||
442 | mod_rm = *p; | ||
443 | mod = mod_rm >> 6; | ||
444 | p++; | ||
445 | switch (mod) { | ||
446 | case 0: | ||
447 | /* if r/m is 5 we have a 32 disp (IA32 Manual 3, Table 2-2) */ | ||
448 | /* AMD64: XXX Check for address size prefix? */ | ||
449 | if ((mod_rm & 0x7) == 0x5) | ||
450 | p += 4; | ||
451 | break; | ||
452 | |||
453 | case 1: | ||
454 | p += 1; | ||
455 | break; | ||
456 | |||
457 | case 2: | ||
458 | p += 4; | ||
459 | break; | ||
460 | |||
461 | case 3: | ||
462 | default: | ||
463 | printk(KERN_ERR "mmiotrace: not a memory access instruction " | ||
464 | "at 0x%lx, rm_mod=0x%02x\n", | ||
465 | ins_addr, mod_rm); | ||
466 | } | ||
467 | |||
468 | switch (get_ins_reg_width(ins_addr)) { | ||
469 | case 1: | ||
470 | return *(unsigned char *)p; | ||
471 | |||
472 | case 2: | ||
473 | return *(unsigned short *)p; | ||
474 | |||
475 | case 4: | ||
476 | return *(unsigned int *)p; | ||
477 | |||
478 | #ifdef __amd64__ | ||
479 | case 8: | ||
480 | return *(unsigned long *)p; | ||
481 | #endif | ||
482 | |||
483 | default: | ||
484 | printk(KERN_ERR "mmiotrace: Error: width.\n"); | ||
485 | } | ||
486 | |||
487 | err: | ||
488 | return 0; | ||
489 | } | ||
diff --git a/arch/x86/mm/pf_in.h b/arch/x86/mm/pf_in.h new file mode 100644 index 000000000000..e05341a51a27 --- /dev/null +++ b/arch/x86/mm/pf_in.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Fault Injection Test harness (FI) | ||
3 | * Copyright (C) Intel Crop. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; either version 2 | ||
8 | * of the License, or (at your option) any later version. | ||
9 | * | ||
10 | * This program is distributed in the hope that it will be useful, | ||
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 | * GNU General Public License for more details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, | ||
18 | * USA. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #ifndef __PF_H_ | ||
23 | #define __PF_H_ | ||
24 | |||
25 | enum reason_type { | ||
26 | NOT_ME, /* page fault is not in regions */ | ||
27 | NOTHING, /* access others point in regions */ | ||
28 | REG_READ, /* read from addr to reg */ | ||
29 | REG_WRITE, /* write from reg to addr */ | ||
30 | IMM_WRITE, /* write from imm to addr */ | ||
31 | OTHERS /* Other instructions can not intercept */ | ||
32 | }; | ||
33 | |||
34 | enum reason_type get_ins_type(unsigned long ins_addr); | ||
35 | unsigned int get_ins_mem_width(unsigned long ins_addr); | ||
36 | unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs); | ||
37 | unsigned long get_ins_imm_val(unsigned long ins_addr); | ||
38 | |||
39 | #endif /* __PF_H_ */ | ||
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 828907d001e8..b4becbf8c570 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -141,7 +141,6 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) | |||
141 | __flush_tlb_one(vaddr); | 141 | __flush_tlb_one(vaddr); |
142 | } | 142 | } |
143 | 143 | ||
144 | static int fixmaps; | ||
145 | unsigned long __FIXADDR_TOP = 0xfffff000; | 144 | unsigned long __FIXADDR_TOP = 0xfffff000; |
146 | EXPORT_SYMBOL(__FIXADDR_TOP); | 145 | EXPORT_SYMBOL(__FIXADDR_TOP); |
147 | 146 | ||
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index f41d67f8f831..1eb2973a301c 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -156,10 +156,9 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *memory_affinity) | |||
156 | 156 | ||
157 | num_memory_chunks++; | 157 | num_memory_chunks++; |
158 | 158 | ||
159 | printk(KERN_DEBUG "Memory range %08lx to %08lx (type %x)" | 159 | printk(KERN_DEBUG "Memory range %08lx to %08lx" |
160 | " in proximity domain %02x %s\n", | 160 | " in proximity domain %02x %s\n", |
161 | start_pfn, end_pfn, | 161 | start_pfn, end_pfn, |
162 | memory_affinity->memory_type, | ||
163 | pxm, | 162 | pxm, |
164 | ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? | 163 | ((memory_affinity->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) ? |
165 | "enabled and removable" : "enabled" ) ); | 164 | "enabled and removable" : "enabled" ) ); |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 0fd67b81a8b6..1b4763e26ea9 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -100,7 +100,19 @@ static __init inline int srat_disabled(void) | |||
100 | /* Callback for SLIT parsing */ | 100 | /* Callback for SLIT parsing */ |
101 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | 101 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) |
102 | { | 102 | { |
103 | acpi_slit = slit; | 103 | unsigned length; |
104 | unsigned long phys; | ||
105 | |||
106 | length = slit->header.length; | ||
107 | phys = find_e820_area(0, max_pfn_mapped<<PAGE_SHIFT, length, | ||
108 | PAGE_SIZE); | ||
109 | |||
110 | if (phys == -1L) | ||
111 | panic(" Can not save slit!\n"); | ||
112 | |||
113 | acpi_slit = __va(phys); | ||
114 | memcpy(acpi_slit, slit, length); | ||
115 | reserve_early(phys, phys + length, "ACPI SLIT"); | ||
104 | } | 116 | } |
105 | 117 | ||
106 | /* Callback for Proximity Domain -> LAPIC mapping */ | 118 | /* Callback for Proximity Domain -> LAPIC mapping */ |
diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c new file mode 100644 index 000000000000..d877c5b423ef --- /dev/null +++ b/arch/x86/mm/testmmiotrace.c | |||
@@ -0,0 +1,71 @@ | |||
1 | /* | ||
2 | * Written by Pekka Paalanen, 2008 <pq@iki.fi> | ||
3 | */ | ||
4 | #include <linux/module.h> | ||
5 | #include <linux/io.h> | ||
6 | |||
7 | #define MODULE_NAME "testmmiotrace" | ||
8 | |||
9 | static unsigned long mmio_address; | ||
10 | module_param(mmio_address, ulong, 0); | ||
11 | MODULE_PARM_DESC(mmio_address, "Start address of the mapping of 16 kB."); | ||
12 | |||
13 | static void do_write_test(void __iomem *p) | ||
14 | { | ||
15 | unsigned int i; | ||
16 | for (i = 0; i < 256; i++) | ||
17 | iowrite8(i, p + i); | ||
18 | for (i = 1024; i < (5 * 1024); i += 2) | ||
19 | iowrite16(i * 12 + 7, p + i); | ||
20 | for (i = (5 * 1024); i < (16 * 1024); i += 4) | ||
21 | iowrite32(i * 212371 + 13, p + i); | ||
22 | } | ||
23 | |||
24 | static void do_read_test(void __iomem *p) | ||
25 | { | ||
26 | unsigned int i; | ||
27 | for (i = 0; i < 256; i++) | ||
28 | ioread8(p + i); | ||
29 | for (i = 1024; i < (5 * 1024); i += 2) | ||
30 | ioread16(p + i); | ||
31 | for (i = (5 * 1024); i < (16 * 1024); i += 4) | ||
32 | ioread32(p + i); | ||
33 | } | ||
34 | |||
35 | static void do_test(void) | ||
36 | { | ||
37 | void __iomem *p = ioremap_nocache(mmio_address, 0x4000); | ||
38 | if (!p) { | ||
39 | pr_err(MODULE_NAME ": could not ioremap, aborting.\n"); | ||
40 | return; | ||
41 | } | ||
42 | do_write_test(p); | ||
43 | do_read_test(p); | ||
44 | iounmap(p); | ||
45 | } | ||
46 | |||
47 | static int __init init(void) | ||
48 | { | ||
49 | if (mmio_address == 0) { | ||
50 | pr_err(MODULE_NAME ": you have to use the module argument " | ||
51 | "mmio_address.\n"); | ||
52 | pr_err(MODULE_NAME ": DO NOT LOAD THIS MODULE UNLESS" | ||
53 | " YOU REALLY KNOW WHAT YOU ARE DOING!\n"); | ||
54 | return -ENXIO; | ||
55 | } | ||
56 | |||
57 | pr_warning(MODULE_NAME ": WARNING: mapping 16 kB @ 0x%08lx " | ||
58 | "in PCI address space, and writing " | ||
59 | "rubbish in there.\n", mmio_address); | ||
60 | do_test(); | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | static void __exit cleanup(void) | ||
65 | { | ||
66 | pr_debug(MODULE_NAME ": unloaded.\n"); | ||
67 | } | ||
68 | |||
69 | module_init(init); | ||
70 | module_exit(cleanup); | ||
71 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 2b6ad5b9f9d5..7f3329b55d2e 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -218,8 +218,8 @@ static int nmi_setup(void) | |||
218 | } | 218 | } |
219 | 219 | ||
220 | } | 220 | } |
221 | on_each_cpu(nmi_save_registers, NULL, 0, 1); | 221 | on_each_cpu(nmi_save_registers, NULL, 1); |
222 | on_each_cpu(nmi_cpu_setup, NULL, 0, 1); | 222 | on_each_cpu(nmi_cpu_setup, NULL, 1); |
223 | nmi_enabled = 1; | 223 | nmi_enabled = 1; |
224 | return 0; | 224 | return 0; |
225 | } | 225 | } |
@@ -271,7 +271,7 @@ static void nmi_shutdown(void) | |||
271 | { | 271 | { |
272 | struct op_msrs *msrs = &get_cpu_var(cpu_msrs); | 272 | struct op_msrs *msrs = &get_cpu_var(cpu_msrs); |
273 | nmi_enabled = 0; | 273 | nmi_enabled = 0; |
274 | on_each_cpu(nmi_cpu_shutdown, NULL, 0, 1); | 274 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); |
275 | unregister_die_notifier(&profile_exceptions_nb); | 275 | unregister_die_notifier(&profile_exceptions_nb); |
276 | model->shutdown(msrs); | 276 | model->shutdown(msrs); |
277 | free_msrs(); | 277 | free_msrs(); |
@@ -286,7 +286,7 @@ static void nmi_cpu_start(void *dummy) | |||
286 | 286 | ||
287 | static int nmi_start(void) | 287 | static int nmi_start(void) |
288 | { | 288 | { |
289 | on_each_cpu(nmi_cpu_start, NULL, 0, 1); | 289 | on_each_cpu(nmi_cpu_start, NULL, 1); |
290 | return 0; | 290 | return 0; |
291 | } | 291 | } |
292 | 292 | ||
@@ -298,7 +298,7 @@ static void nmi_cpu_stop(void *dummy) | |||
298 | 298 | ||
299 | static void nmi_stop(void) | 299 | static void nmi_stop(void) |
300 | { | 300 | { |
301 | on_each_cpu(nmi_cpu_stop, NULL, 0, 1); | 301 | on_each_cpu(nmi_cpu_stop, NULL, 1); |
302 | } | 302 | } |
303 | 303 | ||
304 | struct op_counter_config counter_config[OP_MAX_COUNTER]; | 304 | struct op_counter_config counter_config[OP_MAX_COUNTER]; |
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index c5c8e485fc44..e515e8db842a 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -1,5 +1,17 @@ | |||
1 | ifeq ($(CONFIG_X86_32),y) | 1 | obj-y := i386.o init.o |
2 | include ${srctree}/arch/x86/pci/Makefile_32 | 2 | |
3 | else | 3 | obj-$(CONFIG_PCI_BIOS) += pcbios.o |
4 | include ${srctree}/arch/x86/pci/Makefile_64 | 4 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o |
5 | endif | 5 | obj-$(CONFIG_PCI_DIRECT) += direct.o |
6 | obj-$(CONFIG_PCI_OLPC) += olpc.o | ||
7 | |||
8 | pci-y := fixup.o | ||
9 | pci-$(CONFIG_ACPI) += acpi.o | ||
10 | pci-y += legacy.o irq.o | ||
11 | |||
12 | pci-$(CONFIG_X86_VISWS) += visws.o | ||
13 | |||
14 | pci-$(CONFIG_X86_NUMAQ) += numa.o | ||
15 | |||
16 | obj-y += $(pci-y) common.o early.o | ||
17 | obj-y += amd_bus.o | ||
diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32 deleted file mode 100644 index a34fbf557926..000000000000 --- a/arch/x86/pci/Makefile_32 +++ /dev/null | |||
@@ -1,26 +0,0 @@ | |||
1 | obj-y := i386.o init.o | ||
2 | |||
3 | obj-$(CONFIG_PCI_BIOS) += pcbios.o | ||
4 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_32.o direct.o mmconfig-shared.o | ||
5 | obj-$(CONFIG_PCI_DIRECT) += direct.o | ||
6 | obj-$(CONFIG_PCI_OLPC) += olpc.o | ||
7 | |||
8 | pci-y := fixup.o | ||
9 | |||
10 | # Do not change the ordering here. There is a nasty init function | ||
11 | # ordering dependency which breaks when you move acpi.o below | ||
12 | # legacy/irq.o | ||
13 | pci-$(CONFIG_ACPI) += acpi.o | ||
14 | pci-y += legacy.o irq.o | ||
15 | |||
16 | # Careful: VISWS overrule the pci-y above. The colons are | ||
17 | # therefor correct. This needs a proper fix by distangling the code. | ||
18 | pci-$(CONFIG_X86_VISWS) := visws.o fixup.o | ||
19 | |||
20 | pci-$(CONFIG_X86_NUMAQ) += numa.o | ||
21 | |||
22 | # Necessary for NUMAQ as well | ||
23 | pci-$(CONFIG_NUMA) += mp_bus_to_node.o | ||
24 | |||
25 | obj-y += $(pci-y) common.o early.o | ||
26 | obj-y += amd_bus.o | ||
diff --git a/arch/x86/pci/Makefile_64 b/arch/x86/pci/Makefile_64 deleted file mode 100644 index fd47068c95de..000000000000 --- a/arch/x86/pci/Makefile_64 +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | # | ||
2 | # Makefile for X86_64 specific PCI routines | ||
3 | # | ||
4 | # Reuse the i386 PCI subsystem | ||
5 | # | ||
6 | EXTRA_CFLAGS += -Iarch/x86/pci | ||
7 | |||
8 | obj-y := i386.o | ||
9 | obj-$(CONFIG_PCI_DIRECT)+= direct.o | ||
10 | obj-y += fixup.o init.o | ||
11 | obj-$(CONFIG_ACPI) += acpi.o | ||
12 | obj-y += legacy.o irq.o common.o early.o | ||
13 | # mmconfig has a 64bit special | ||
14 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_64.o direct.o mmconfig-shared.o | ||
15 | |||
16 | obj-y += amd_bus.o | ||
17 | |||
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 4fa52d3dc848..19af06927fbc 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -223,7 +223,7 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
223 | return bus; | 223 | return bus; |
224 | } | 224 | } |
225 | 225 | ||
226 | static int __init pci_acpi_init(void) | 226 | int __init pci_acpi_init(void) |
227 | { | 227 | { |
228 | struct pci_dev *dev = NULL; | 228 | struct pci_dev *dev = NULL; |
229 | 229 | ||
@@ -257,4 +257,3 @@ static int __init pci_acpi_init(void) | |||
257 | 257 | ||
258 | return 0; | 258 | return 0; |
259 | } | 259 | } |
260 | subsys_initcall(pci_acpi_init); | ||
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index d02c598451ec..dbf532369711 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -1,44 +1,25 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/topology.h> | ||
3 | #include "pci.h" | 4 | #include "pci.h" |
4 | 5 | ||
5 | #ifdef CONFIG_X86_64 | 6 | #ifdef CONFIG_X86_64 |
6 | |||
7 | #include <asm/pci-direct.h> | 7 | #include <asm/pci-direct.h> |
8 | #include <asm/mpspec.h> | 8 | #include <asm/mpspec.h> |
9 | #include <linux/cpumask.h> | 9 | #include <linux/cpumask.h> |
10 | #include <linux/topology.h> | 10 | #endif |
11 | 11 | ||
12 | /* | 12 | /* |
13 | * This discovers the pcibus <-> node mapping on AMD K8. | 13 | * This discovers the pcibus <-> node mapping on AMD K8. |
14 | * also get peer root bus resource for io,mmio | 14 | * also get peer root bus resource for io,mmio |
15 | */ | 15 | */ |
16 | 16 | ||
17 | |||
18 | /* | ||
19 | * sub bus (transparent) will use entres from 3 to store extra from root, | ||
20 | * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? | ||
21 | */ | ||
22 | #define RES_NUM 16 | ||
23 | struct pci_root_info { | ||
24 | char name[12]; | ||
25 | unsigned int res_num; | ||
26 | struct resource res[RES_NUM]; | ||
27 | int bus_min; | ||
28 | int bus_max; | ||
29 | int node; | ||
30 | int link; | ||
31 | }; | ||
32 | |||
33 | /* 4 at this time, it may become to 32 */ | ||
34 | #define PCI_ROOT_NR 4 | ||
35 | static int pci_root_num; | ||
36 | static struct pci_root_info pci_root_info[PCI_ROOT_NR]; | ||
37 | |||
38 | #ifdef CONFIG_NUMA | 17 | #ifdef CONFIG_NUMA |
39 | 18 | ||
40 | #define BUS_NR 256 | 19 | #define BUS_NR 256 |
41 | 20 | ||
21 | #ifdef CONFIG_X86_64 | ||
22 | |||
42 | static int mp_bus_to_node[BUS_NR]; | 23 | static int mp_bus_to_node[BUS_NR]; |
43 | 24 | ||
44 | void set_mp_bus_to_node(int busnum, int node) | 25 | void set_mp_bus_to_node(int busnum, int node) |
@@ -65,7 +46,52 @@ int get_mp_bus_to_node(int busnum) | |||
65 | 46 | ||
66 | return node; | 47 | return node; |
67 | } | 48 | } |
68 | #endif | 49 | |
50 | #else /* CONFIG_X86_32 */ | ||
51 | |||
52 | static unsigned char mp_bus_to_node[BUS_NR]; | ||
53 | |||
54 | void set_mp_bus_to_node(int busnum, int node) | ||
55 | { | ||
56 | if (busnum >= 0 && busnum < BUS_NR) | ||
57 | mp_bus_to_node[busnum] = (unsigned char) node; | ||
58 | } | ||
59 | |||
60 | int get_mp_bus_to_node(int busnum) | ||
61 | { | ||
62 | int node; | ||
63 | |||
64 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
65 | return 0; | ||
66 | node = mp_bus_to_node[busnum]; | ||
67 | return node; | ||
68 | } | ||
69 | |||
70 | #endif /* CONFIG_X86_32 */ | ||
71 | |||
72 | #endif /* CONFIG_NUMA */ | ||
73 | |||
74 | #ifdef CONFIG_X86_64 | ||
75 | |||
76 | /* | ||
77 | * sub bus (transparent) will use entres from 3 to store extra from root, | ||
78 | * so need to make sure have enought slot there, increase PCI_BUS_NUM_RESOURCES? | ||
79 | */ | ||
80 | #define RES_NUM 16 | ||
81 | struct pci_root_info { | ||
82 | char name[12]; | ||
83 | unsigned int res_num; | ||
84 | struct resource res[RES_NUM]; | ||
85 | int bus_min; | ||
86 | int bus_max; | ||
87 | int node; | ||
88 | int link; | ||
89 | }; | ||
90 | |||
91 | /* 4 at this time, it may become to 32 */ | ||
92 | #define PCI_ROOT_NR 4 | ||
93 | static int pci_root_num; | ||
94 | static struct pci_root_info pci_root_info[PCI_ROOT_NR]; | ||
69 | 95 | ||
70 | void set_pci_bus_resources_arch_default(struct pci_bus *b) | 96 | void set_pci_bus_resources_arch_default(struct pci_bus *b) |
71 | { | 97 | { |
@@ -552,7 +578,7 @@ static int __init enable_pci_io_ecs(void) | |||
552 | /* assume all cpus from fam10h have IO ECS */ | 578 | /* assume all cpus from fam10h have IO ECS */ |
553 | if (boot_cpu_data.x86 < 0x10) | 579 | if (boot_cpu_data.x86 < 0x10) |
554 | return 0; | 580 | return 0; |
555 | on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1, 1); | 581 | on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1); |
556 | pci_probe |= PCI_HAS_IO_ECS; | 582 | pci_probe |= PCI_HAS_IO_ECS; |
557 | return 0; | 583 | return 0; |
558 | } | 584 | } |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 00a319cd5be3..1485a26ddcef 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -20,6 +20,7 @@ | |||
20 | unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | | 20 | unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | |
21 | PCI_PROBE_MMCONF; | 21 | PCI_PROBE_MMCONF; |
22 | 22 | ||
23 | unsigned int pci_early_dump_regs; | ||
23 | static int pci_bf_sort; | 24 | static int pci_bf_sort; |
24 | int pci_routeirq; | 25 | int pci_routeirq; |
25 | int noioapicquirk; | 26 | int noioapicquirk; |
@@ -33,7 +34,7 @@ struct pci_raw_ops *raw_pci_ext_ops; | |||
33 | int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, | 34 | int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, |
34 | int reg, int len, u32 *val) | 35 | int reg, int len, u32 *val) |
35 | { | 36 | { |
36 | if (reg < 256 && raw_pci_ops) | 37 | if (domain == 0 && reg < 256 && raw_pci_ops) |
37 | return raw_pci_ops->read(domain, bus, devfn, reg, len, val); | 38 | return raw_pci_ops->read(domain, bus, devfn, reg, len, val); |
38 | if (raw_pci_ext_ops) | 39 | if (raw_pci_ext_ops) |
39 | return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val); | 40 | return raw_pci_ext_ops->read(domain, bus, devfn, reg, len, val); |
@@ -43,7 +44,7 @@ int raw_pci_read(unsigned int domain, unsigned int bus, unsigned int devfn, | |||
43 | int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, | 44 | int raw_pci_write(unsigned int domain, unsigned int bus, unsigned int devfn, |
44 | int reg, int len, u32 val) | 45 | int reg, int len, u32 val) |
45 | { | 46 | { |
46 | if (reg < 256 && raw_pci_ops) | 47 | if (domain == 0 && reg < 256 && raw_pci_ops) |
47 | return raw_pci_ops->write(domain, bus, devfn, reg, len, val); | 48 | return raw_pci_ops->write(domain, bus, devfn, reg, len, val); |
48 | if (raw_pci_ext_ops) | 49 | if (raw_pci_ext_ops) |
49 | return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val); | 50 | return raw_pci_ext_ops->write(domain, bus, devfn, reg, len, val); |
@@ -123,6 +124,21 @@ void __init dmi_check_skip_isa_align(void) | |||
123 | dmi_check_system(can_skip_pciprobe_dmi_table); | 124 | dmi_check_system(can_skip_pciprobe_dmi_table); |
124 | } | 125 | } |
125 | 126 | ||
127 | static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) | ||
128 | { | ||
129 | struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; | ||
130 | |||
131 | if (pci_probe & PCI_NOASSIGN_ROMS) { | ||
132 | if (rom_r->parent) | ||
133 | return; | ||
134 | if (rom_r->start) { | ||
135 | /* we deal with BIOS assigned ROM later */ | ||
136 | return; | ||
137 | } | ||
138 | rom_r->start = rom_r->end = rom_r->flags = 0; | ||
139 | } | ||
140 | } | ||
141 | |||
126 | /* | 142 | /* |
127 | * Called after each bus is probed, but before its children | 143 | * Called after each bus is probed, but before its children |
128 | * are examined. | 144 | * are examined. |
@@ -130,7 +146,11 @@ void __init dmi_check_skip_isa_align(void) | |||
130 | 146 | ||
131 | void __devinit pcibios_fixup_bus(struct pci_bus *b) | 147 | void __devinit pcibios_fixup_bus(struct pci_bus *b) |
132 | { | 148 | { |
149 | struct pci_dev *dev; | ||
150 | |||
133 | pci_read_bridge_bases(b); | 151 | pci_read_bridge_bases(b); |
152 | list_for_each_entry(dev, &b->devices, bus_list) | ||
153 | pcibios_fixup_device_resources(dev); | ||
134 | } | 154 | } |
135 | 155 | ||
136 | /* | 156 | /* |
@@ -386,7 +406,7 @@ struct pci_bus * __devinit pcibios_scan_root(int busnum) | |||
386 | 406 | ||
387 | extern u8 pci_cache_line_size; | 407 | extern u8 pci_cache_line_size; |
388 | 408 | ||
389 | static int __init pcibios_init(void) | 409 | int __init pcibios_init(void) |
390 | { | 410 | { |
391 | struct cpuinfo_x86 *c = &boot_cpu_data; | 411 | struct cpuinfo_x86 *c = &boot_cpu_data; |
392 | 412 | ||
@@ -413,8 +433,6 @@ static int __init pcibios_init(void) | |||
413 | return 0; | 433 | return 0; |
414 | } | 434 | } |
415 | 435 | ||
416 | subsys_initcall(pcibios_init); | ||
417 | |||
418 | char * __devinit pcibios_setup(char *str) | 436 | char * __devinit pcibios_setup(char *str) |
419 | { | 437 | { |
420 | if (!strcmp(str, "off")) { | 438 | if (!strcmp(str, "off")) { |
@@ -485,12 +503,18 @@ char * __devinit pcibios_setup(char *str) | |||
485 | else if (!strcmp(str, "rom")) { | 503 | else if (!strcmp(str, "rom")) { |
486 | pci_probe |= PCI_ASSIGN_ROMS; | 504 | pci_probe |= PCI_ASSIGN_ROMS; |
487 | return NULL; | 505 | return NULL; |
506 | } else if (!strcmp(str, "norom")) { | ||
507 | pci_probe |= PCI_NOASSIGN_ROMS; | ||
508 | return NULL; | ||
488 | } else if (!strcmp(str, "assign-busses")) { | 509 | } else if (!strcmp(str, "assign-busses")) { |
489 | pci_probe |= PCI_ASSIGN_ALL_BUSSES; | 510 | pci_probe |= PCI_ASSIGN_ALL_BUSSES; |
490 | return NULL; | 511 | return NULL; |
491 | } else if (!strcmp(str, "use_crs")) { | 512 | } else if (!strcmp(str, "use_crs")) { |
492 | pci_probe |= PCI_USE__CRS; | 513 | pci_probe |= PCI_USE__CRS; |
493 | return NULL; | 514 | return NULL; |
515 | } else if (!strcmp(str, "earlydump")) { | ||
516 | pci_early_dump_regs = 1; | ||
517 | return NULL; | ||
494 | } else if (!strcmp(str, "routeirq")) { | 518 | } else if (!strcmp(str, "routeirq")) { |
495 | pci_routeirq = 1; | 519 | pci_routeirq = 1; |
496 | return NULL; | 520 | return NULL; |
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 42df4b6606df..858dbe3399f9 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c | |||
@@ -49,7 +49,14 @@ void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) | |||
49 | { | 49 | { |
50 | PDprintk("%x writing to %x: %x\n", slot, offset, val); | 50 | PDprintk("%x writing to %x: %x\n", slot, offset, val); |
51 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 51 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
52 | outb(val, 0xcfc); | 52 | outb(val, 0xcfc + (offset&3)); |
53 | } | ||
54 | |||
55 | void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) | ||
56 | { | ||
57 | PDprintk("%x writing to %x: %x\n", slot, offset, val); | ||
58 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | ||
59 | outw(val, 0xcfc + (offset&2)); | ||
53 | } | 60 | } |
54 | 61 | ||
55 | int early_pci_allowed(void) | 62 | int early_pci_allowed(void) |
@@ -57,3 +64,54 @@ int early_pci_allowed(void) | |||
57 | return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == | 64 | return (pci_probe & (PCI_PROBE_CONF1|PCI_PROBE_NOEARLY)) == |
58 | PCI_PROBE_CONF1; | 65 | PCI_PROBE_CONF1; |
59 | } | 66 | } |
67 | |||
68 | void early_dump_pci_device(u8 bus, u8 slot, u8 func) | ||
69 | { | ||
70 | int i; | ||
71 | int j; | ||
72 | u32 val; | ||
73 | |||
74 | printk("PCI: %02x:%02x:%02x", bus, slot, func); | ||
75 | |||
76 | for (i = 0; i < 256; i += 4) { | ||
77 | if (!(i & 0x0f)) | ||
78 | printk("\n%04x:",i); | ||
79 | |||
80 | val = read_pci_config(bus, slot, func, i); | ||
81 | for (j = 0; j < 4; j++) { | ||
82 | printk(" %02x", val & 0xff); | ||
83 | val >>= 8; | ||
84 | } | ||
85 | } | ||
86 | printk("\n"); | ||
87 | } | ||
88 | |||
89 | void early_dump_pci_devices(void) | ||
90 | { | ||
91 | unsigned bus, slot, func; | ||
92 | |||
93 | if (!early_pci_allowed()) | ||
94 | return; | ||
95 | |||
96 | for (bus = 0; bus < 256; bus++) { | ||
97 | for (slot = 0; slot < 32; slot++) { | ||
98 | for (func = 0; func < 8; func++) { | ||
99 | u32 class; | ||
100 | u8 type; | ||
101 | class = read_pci_config(bus, slot, func, | ||
102 | PCI_CLASS_REVISION); | ||
103 | if (class == 0xffffffff) | ||
104 | break; | ||
105 | |||
106 | early_dump_pci_device(bus, slot, func); | ||
107 | |||
108 | /* No multi-function device? */ | ||
109 | type = read_pci_config_byte(bus, slot, func, | ||
110 | PCI_HEADER_TYPE); | ||
111 | if (!(type & 0x80)) | ||
112 | break; | ||
113 | } | ||
114 | } | ||
115 | } | ||
116 | } | ||
117 | |||
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 6ccd7a108cd4..2aafb67dc5f1 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -334,7 +334,9 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | |||
334 | flags = new_flags; | 334 | flags = new_flags; |
335 | } | 335 | } |
336 | 336 | ||
337 | if (vma->vm_pgoff <= max_pfn_mapped && | 337 | if (((vma->vm_pgoff < max_low_pfn_mapped) || |
338 | (vma->vm_pgoff >= (1UL<<(32 - PAGE_SHIFT)) && | ||
339 | vma->vm_pgoff < max_pfn_mapped)) && | ||
338 | ioremap_change_attr((unsigned long)__va(addr), len, flags)) { | 340 | ioremap_change_attr((unsigned long)__va(addr), len, flags)) { |
339 | free_memtype(addr, addr + len); | 341 | free_memtype(addr, addr + len); |
340 | return -EINVAL; | 342 | return -EINVAL; |
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index b821f4462d99..d6c950f81858 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | /* arch_initcall has too random ordering, so call the initializers | 5 | /* arch_initcall has too random ordering, so call the initializers |
6 | in the right sequence from here. */ | 6 | in the right sequence from here. */ |
7 | static __init int pci_access_init(void) | 7 | static __init int pci_arch_init(void) |
8 | { | 8 | { |
9 | #ifdef CONFIG_PCI_DIRECT | 9 | #ifdef CONFIG_PCI_DIRECT |
10 | int type = 0; | 10 | int type = 0; |
@@ -40,4 +40,4 @@ static __init int pci_access_init(void) | |||
40 | 40 | ||
41 | return 0; | 41 | return 0; |
42 | } | 42 | } |
43 | arch_initcall(pci_access_init); | 43 | arch_initcall(pci_arch_init); |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index f0859de23e20..6a06a2eb0597 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -45,7 +45,8 @@ struct irq_router { | |||
45 | char *name; | 45 | char *name; |
46 | u16 vendor, device; | 46 | u16 vendor, device; |
47 | int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); | 47 | int (*get)(struct pci_dev *router, struct pci_dev *dev, int pirq); |
48 | int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, int new); | 48 | int (*set)(struct pci_dev *router, struct pci_dev *dev, int pirq, |
49 | int new); | ||
49 | }; | 50 | }; |
50 | 51 | ||
51 | struct irq_router_handler { | 52 | struct irq_router_handler { |
@@ -77,7 +78,8 @@ static inline struct irq_routing_table *pirq_check_routing_table(u8 *addr) | |||
77 | for (i = 0; i < rt->size; i++) | 78 | for (i = 0; i < rt->size; i++) |
78 | sum += addr[i]; | 79 | sum += addr[i]; |
79 | if (!sum) { | 80 | if (!sum) { |
80 | DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", rt); | 81 | DBG(KERN_DEBUG "PCI: Interrupt Routing Table found at 0x%p\n", |
82 | rt); | ||
81 | return rt; | 83 | return rt; |
82 | } | 84 | } |
83 | return NULL; | 85 | return NULL; |
@@ -183,7 +185,8 @@ static unsigned int read_config_nybble(struct pci_dev *router, unsigned offset, | |||
183 | return (nr & 1) ? (x >> 4) : (x & 0xf); | 185 | return (nr & 1) ? (x >> 4) : (x & 0xf); |
184 | } | 186 | } |
185 | 187 | ||
186 | static void write_config_nybble(struct pci_dev *router, unsigned offset, unsigned nr, unsigned int val) | 188 | static void write_config_nybble(struct pci_dev *router, unsigned offset, |
189 | unsigned nr, unsigned int val) | ||
187 | { | 190 | { |
188 | u8 x; | 191 | u8 x; |
189 | unsigned reg = offset + (nr >> 1); | 192 | unsigned reg = offset + (nr >> 1); |
@@ -467,7 +470,8 @@ static int pirq_serverworks_get(struct pci_dev *router, struct pci_dev *dev, int | |||
467 | return inb(0xc01) & 0xf; | 470 | return inb(0xc01) & 0xf; |
468 | } | 471 | } |
469 | 472 | ||
470 | static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | 473 | static int pirq_serverworks_set(struct pci_dev *router, struct pci_dev *dev, |
474 | int pirq, int irq) | ||
471 | { | 475 | { |
472 | outb(pirq, 0xc00); | 476 | outb(pirq, 0xc00); |
473 | outb(irq, 0xc01); | 477 | outb(irq, 0xc01); |
@@ -660,7 +664,8 @@ static __init int vlsi_router_probe(struct irq_router *r, struct pci_dev *router | |||
660 | } | 664 | } |
661 | 665 | ||
662 | 666 | ||
663 | static __init int serverworks_router_probe(struct irq_router *r, struct pci_dev *router, u16 device) | 667 | static __init int serverworks_router_probe(struct irq_router *r, |
668 | struct pci_dev *router, u16 device) | ||
664 | { | 669 | { |
665 | switch (device) { | 670 | switch (device) { |
666 | case PCI_DEVICE_ID_SERVERWORKS_OSB4: | 671 | case PCI_DEVICE_ID_SERVERWORKS_OSB4: |
@@ -827,10 +832,12 @@ static void __init pirq_find_router(struct irq_router *r) | |||
827 | 832 | ||
828 | for (h = pirq_routers; h->vendor; h++) { | 833 | for (h = pirq_routers; h->vendor; h++) { |
829 | /* First look for a router match */ | 834 | /* First look for a router match */ |
830 | if (rt->rtr_vendor == h->vendor && h->probe(r, pirq_router_dev, rt->rtr_device)) | 835 | if (rt->rtr_vendor == h->vendor && |
836 | h->probe(r, pirq_router_dev, rt->rtr_device)) | ||
831 | break; | 837 | break; |
832 | /* Fall back to a device match */ | 838 | /* Fall back to a device match */ |
833 | if (pirq_router_dev->vendor == h->vendor && h->probe(r, pirq_router_dev, pirq_router_dev->device)) | 839 | if (pirq_router_dev->vendor == h->vendor && |
840 | h->probe(r, pirq_router_dev, pirq_router_dev->device)) | ||
834 | break; | 841 | break; |
835 | } | 842 | } |
836 | printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", | 843 | printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", |
@@ -845,11 +852,13 @@ static void __init pirq_find_router(struct irq_router *r) | |||
845 | static struct irq_info *pirq_get_info(struct pci_dev *dev) | 852 | static struct irq_info *pirq_get_info(struct pci_dev *dev) |
846 | { | 853 | { |
847 | struct irq_routing_table *rt = pirq_table; | 854 | struct irq_routing_table *rt = pirq_table; |
848 | int entries = (rt->size - sizeof(struct irq_routing_table)) / sizeof(struct irq_info); | 855 | int entries = (rt->size - sizeof(struct irq_routing_table)) / |
856 | sizeof(struct irq_info); | ||
849 | struct irq_info *info; | 857 | struct irq_info *info; |
850 | 858 | ||
851 | for (info = rt->slots; entries--; info++) | 859 | for (info = rt->slots; entries--; info++) |
852 | if (info->bus == dev->bus->number && PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) | 860 | if (info->bus == dev->bus->number && |
861 | PCI_SLOT(info->devfn) == PCI_SLOT(dev->devfn)) | ||
853 | return info; | 862 | return info; |
854 | return NULL; | 863 | return NULL; |
855 | } | 864 | } |
@@ -890,7 +899,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
890 | DBG(" -> not routed\n" KERN_DEBUG); | 899 | DBG(" -> not routed\n" KERN_DEBUG); |
891 | return 0; | 900 | return 0; |
892 | } | 901 | } |
893 | DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, pirq_table->exclusive_irqs); | 902 | DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, |
903 | pirq_table->exclusive_irqs); | ||
894 | mask &= pcibios_irq_mask; | 904 | mask &= pcibios_irq_mask; |
895 | 905 | ||
896 | /* Work around broken HP Pavilion Notebooks which assign USB to | 906 | /* Work around broken HP Pavilion Notebooks which assign USB to |
@@ -903,7 +913,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
903 | } | 913 | } |
904 | 914 | ||
905 | /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ | 915 | /* same for Acer Travelmate 360, but with CB and irq 11 -> 10 */ |
906 | if (acer_tm360_irqrouting && dev->irq == 11 && dev->vendor == PCI_VENDOR_ID_O2) { | 916 | if (acer_tm360_irqrouting && dev->irq == 11 && |
917 | dev->vendor == PCI_VENDOR_ID_O2) { | ||
907 | pirq = 0x68; | 918 | pirq = 0x68; |
908 | mask = 0x400; | 919 | mask = 0x400; |
909 | dev->irq = r->get(pirq_router_dev, dev, pirq); | 920 | dev->irq = r->get(pirq_router_dev, dev, pirq); |
@@ -920,15 +931,16 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
920 | newirq = 0; | 931 | newirq = 0; |
921 | else | 932 | else |
922 | printk("\n" KERN_WARNING | 933 | printk("\n" KERN_WARNING |
923 | "PCI: IRQ %i for device %s doesn't match PIRQ mask " | 934 | "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" |
924 | "- try pci=usepirqmask\n" KERN_DEBUG, newirq, | 935 | KERN_DEBUG, newirq, |
925 | pci_name(dev)); | 936 | pci_name(dev)); |
926 | } | 937 | } |
927 | if (!newirq && assign) { | 938 | if (!newirq && assign) { |
928 | for (i = 0; i < 16; i++) { | 939 | for (i = 0; i < 16; i++) { |
929 | if (!(mask & (1 << i))) | 940 | if (!(mask & (1 << i))) |
930 | continue; | 941 | continue; |
931 | if (pirq_penalty[i] < pirq_penalty[newirq] && can_request_irq(i, IRQF_SHARED)) | 942 | if (pirq_penalty[i] < pirq_penalty[newirq] && |
943 | can_request_irq(i, IRQF_SHARED)) | ||
932 | newirq = i; | 944 | newirq = i; |
933 | } | 945 | } |
934 | } | 946 | } |
@@ -944,7 +956,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
944 | DBG(" -> got IRQ %d\n", irq); | 956 | DBG(" -> got IRQ %d\n", irq); |
945 | msg = "Found"; | 957 | msg = "Found"; |
946 | eisa_set_level_irq(irq); | 958 | eisa_set_level_irq(irq); |
947 | } else if (newirq && r->set && (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { | 959 | } else if (newirq && r->set && |
960 | (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { | ||
948 | DBG(" -> assigning IRQ %d", newirq); | 961 | DBG(" -> assigning IRQ %d", newirq); |
949 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { | 962 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { |
950 | eisa_set_level_irq(newirq); | 963 | eisa_set_level_irq(newirq); |
@@ -962,7 +975,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
962 | } else | 975 | } else |
963 | return 0; | 976 | return 0; |
964 | } | 977 | } |
965 | printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, pci_name(dev)); | 978 | printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, |
979 | pci_name(dev)); | ||
966 | 980 | ||
967 | /* Update IRQ for all devices with the same pirq value */ | 981 | /* Update IRQ for all devices with the same pirq value */ |
968 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { | 982 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { |
@@ -974,7 +988,10 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
974 | if (!info) | 988 | if (!info) |
975 | continue; | 989 | continue; |
976 | if (info->irq[pin].link == pirq) { | 990 | if (info->irq[pin].link == pirq) { |
977 | /* We refuse to override the dev->irq information. Give a warning! */ | 991 | /* |
992 | * We refuse to override the dev->irq | ||
993 | * information. Give a warning! | ||
994 | */ | ||
978 | if (dev2->irq && dev2->irq != irq && \ | 995 | if (dev2->irq && dev2->irq != irq && \ |
979 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ | 996 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ |
980 | ((1 << dev2->irq) & mask))) { | 997 | ((1 << dev2->irq) & mask))) { |
@@ -987,7 +1004,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
987 | dev2->irq = irq; | 1004 | dev2->irq = irq; |
988 | pirq_penalty[irq]++; | 1005 | pirq_penalty[irq]++; |
989 | if (dev != dev2) | 1006 | if (dev != dev2) |
990 | printk(KERN_INFO "PCI: Sharing IRQ %d with %s\n", irq, pci_name(dev2)); | 1007 | printk(KERN_INFO |
1008 | "PCI: Sharing IRQ %d with %s\n", | ||
1009 | irq, pci_name(dev2)); | ||
991 | } | 1010 | } |
992 | } | 1011 | } |
993 | return 1; | 1012 | return 1; |
@@ -1001,15 +1020,21 @@ static void __init pcibios_fixup_irqs(void) | |||
1001 | DBG(KERN_DEBUG "PCI: IRQ fixup\n"); | 1020 | DBG(KERN_DEBUG "PCI: IRQ fixup\n"); |
1002 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1021 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
1003 | /* | 1022 | /* |
1004 | * If the BIOS has set an out of range IRQ number, just ignore it. | 1023 | * If the BIOS has set an out of range IRQ number, just |
1005 | * Also keep track of which IRQ's are already in use. | 1024 | * ignore it. Also keep track of which IRQ's are |
1025 | * already in use. | ||
1006 | */ | 1026 | */ |
1007 | if (dev->irq >= 16) { | 1027 | if (dev->irq >= 16) { |
1008 | DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", pci_name(dev), dev->irq); | 1028 | DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", |
1029 | pci_name(dev), dev->irq); | ||
1009 | dev->irq = 0; | 1030 | dev->irq = 0; |
1010 | } | 1031 | } |
1011 | /* If the IRQ is already assigned to a PCI device, ignore its ISA use penalty */ | 1032 | /* |
1012 | if (pirq_penalty[dev->irq] >= 100 && pirq_penalty[dev->irq] < 100000) | 1033 | * If the IRQ is already assigned to a PCI device, |
1034 | * ignore its ISA use penalty | ||
1035 | */ | ||
1036 | if (pirq_penalty[dev->irq] >= 100 && | ||
1037 | pirq_penalty[dev->irq] < 100000) | ||
1013 | pirq_penalty[dev->irq] = 0; | 1038 | pirq_penalty[dev->irq] = 0; |
1014 | pirq_penalty[dev->irq]++; | 1039 | pirq_penalty[dev->irq]++; |
1015 | } | 1040 | } |
@@ -1025,8 +1050,13 @@ static void __init pcibios_fixup_irqs(void) | |||
1025 | int irq; | 1050 | int irq; |
1026 | 1051 | ||
1027 | if (pin) { | 1052 | if (pin) { |
1028 | pin--; /* interrupt pins are numbered starting from 1 */ | 1053 | /* |
1029 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); | 1054 | * interrupt pins are numbered starting |
1055 | * from 1 | ||
1056 | */ | ||
1057 | pin--; | ||
1058 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, | ||
1059 | PCI_SLOT(dev->devfn), pin); | ||
1030 | /* | 1060 | /* |
1031 | * Busses behind bridges are typically not listed in the MP-table. | 1061 | * Busses behind bridges are typically not listed in the MP-table. |
1032 | * In this case we have to look up the IRQ based on the parent bus, | 1062 | * In this case we have to look up the IRQ based on the parent bus, |
@@ -1067,7 +1097,8 @@ static int __init fix_broken_hp_bios_irq9(const struct dmi_system_id *d) | |||
1067 | { | 1097 | { |
1068 | if (!broken_hp_bios_irq9) { | 1098 | if (!broken_hp_bios_irq9) { |
1069 | broken_hp_bios_irq9 = 1; | 1099 | broken_hp_bios_irq9 = 1; |
1070 | printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); | 1100 | printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", |
1101 | d->ident); | ||
1071 | } | 1102 | } |
1072 | return 0; | 1103 | return 0; |
1073 | } | 1104 | } |
@@ -1080,7 +1111,8 @@ static int __init fix_acer_tm360_irqrouting(const struct dmi_system_id *d) | |||
1080 | { | 1111 | { |
1081 | if (!acer_tm360_irqrouting) { | 1112 | if (!acer_tm360_irqrouting) { |
1082 | acer_tm360_irqrouting = 1; | 1113 | acer_tm360_irqrouting = 1; |
1083 | printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", d->ident); | 1114 | printk(KERN_INFO "%s detected - fixing broken IRQ routing\n", |
1115 | d->ident); | ||
1084 | } | 1116 | } |
1085 | return 0; | 1117 | return 0; |
1086 | } | 1118 | } |
@@ -1092,7 +1124,8 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = { | |||
1092 | .matches = { | 1124 | .matches = { |
1093 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), | 1125 | DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), |
1094 | DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), | 1126 | DMI_MATCH(DMI_BIOS_VERSION, "GE.M1.03"), |
1095 | DMI_MATCH(DMI_PRODUCT_VERSION, "HP Pavilion Notebook Model GE"), | 1127 | DMI_MATCH(DMI_PRODUCT_VERSION, |
1128 | "HP Pavilion Notebook Model GE"), | ||
1096 | DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), | 1129 | DMI_MATCH(DMI_BOARD_VERSION, "OmniBook N32N-736"), |
1097 | }, | 1130 | }, |
1098 | }, | 1131 | }, |
@@ -1107,7 +1140,7 @@ static struct dmi_system_id __initdata pciirq_dmi_table[] = { | |||
1107 | { } | 1140 | { } |
1108 | }; | 1141 | }; |
1109 | 1142 | ||
1110 | static int __init pcibios_irq_init(void) | 1143 | int __init pcibios_irq_init(void) |
1111 | { | 1144 | { |
1112 | DBG(KERN_DEBUG "PCI: IRQ init\n"); | 1145 | DBG(KERN_DEBUG "PCI: IRQ init\n"); |
1113 | 1146 | ||
@@ -1131,7 +1164,10 @@ static int __init pcibios_irq_init(void) | |||
1131 | if (!(pirq_table->exclusive_irqs & (1 << i))) | 1164 | if (!(pirq_table->exclusive_irqs & (1 << i))) |
1132 | pirq_penalty[i] += 100; | 1165 | pirq_penalty[i] += 100; |
1133 | } | 1166 | } |
1134 | /* If we're using the I/O APIC, avoid using the PCI IRQ routing table */ | 1167 | /* |
1168 | * If we're using the I/O APIC, avoid using the PCI IRQ | ||
1169 | * routing table | ||
1170 | */ | ||
1135 | if (io_apic_assign_pci_irqs) | 1171 | if (io_apic_assign_pci_irqs) |
1136 | pirq_table = NULL; | 1172 | pirq_table = NULL; |
1137 | } | 1173 | } |
@@ -1142,9 +1178,6 @@ static int __init pcibios_irq_init(void) | |||
1142 | return 0; | 1178 | return 0; |
1143 | } | 1179 | } |
1144 | 1180 | ||
1145 | subsys_initcall(pcibios_irq_init); | ||
1146 | |||
1147 | |||
1148 | static void pirq_penalize_isa_irq(int irq, int active) | 1181 | static void pirq_penalize_isa_irq(int irq, int active) |
1149 | { | 1182 | { |
1150 | /* | 1183 | /* |
@@ -1178,7 +1211,7 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1178 | if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { | 1211 | if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { |
1179 | char *msg = ""; | 1212 | char *msg = ""; |
1180 | 1213 | ||
1181 | pin--; /* interrupt pins are numbered starting from 1 */ | 1214 | pin--; /* interrupt pins are numbered starting from 1 */ |
1182 | 1215 | ||
1183 | if (io_apic_assign_pci_irqs) { | 1216 | if (io_apic_assign_pci_irqs) { |
1184 | int irq; | 1217 | int irq; |
@@ -1198,13 +1231,16 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1198 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1231 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1199 | PCI_SLOT(bridge->devfn), pin); | 1232 | PCI_SLOT(bridge->devfn), pin); |
1200 | if (irq >= 0) | 1233 | if (irq >= 0) |
1201 | printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", | 1234 | printk(KERN_WARNING |
1202 | pci_name(bridge), 'A' + pin, irq); | 1235 | "PCI: using PPB %s[%c] to get irq %d\n", |
1236 | pci_name(bridge), | ||
1237 | 'A' + pin, irq); | ||
1203 | dev = bridge; | 1238 | dev = bridge; |
1204 | } | 1239 | } |
1205 | dev = temp_dev; | 1240 | dev = temp_dev; |
1206 | if (irq >= 0) { | 1241 | if (irq >= 0) { |
1207 | printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | 1242 | printk(KERN_INFO |
1243 | "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | ||
1208 | pci_name(dev), 'A' + pin, irq); | 1244 | pci_name(dev), 'A' + pin, irq); |
1209 | dev->irq = irq; | 1245 | dev->irq = irq; |
1210 | return 0; | 1246 | return 0; |
@@ -1215,12 +1251,17 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1215 | else | 1251 | else |
1216 | msg = " Please try using pci=biosirq."; | 1252 | msg = " Please try using pci=biosirq."; |
1217 | 1253 | ||
1218 | /* With IDE legacy devices the IRQ lookup failure is not a problem.. */ | 1254 | /* |
1219 | if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && !(dev->class & 0x5)) | 1255 | * With IDE legacy devices the IRQ lookup failure is not |
1256 | * a problem.. | ||
1257 | */ | ||
1258 | if (dev->class >> 8 == PCI_CLASS_STORAGE_IDE && | ||
1259 | !(dev->class & 0x5)) | ||
1220 | return 0; | 1260 | return 0; |
1221 | 1261 | ||
1222 | printk(KERN_WARNING "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", | 1262 | printk(KERN_WARNING |
1223 | 'A' + pin, pci_name(dev), msg); | 1263 | "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", |
1264 | 'A' + pin, pci_name(dev), msg); | ||
1224 | } | 1265 | } |
1225 | return 0; | 1266 | return 0; |
1226 | } | 1267 | } |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index a67921ce60af..132876cc6fca 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -55,4 +55,18 @@ static int __init pci_legacy_init(void) | |||
55 | return 0; | 55 | return 0; |
56 | } | 56 | } |
57 | 57 | ||
58 | subsys_initcall(pci_legacy_init); | 58 | int __init pci_subsys_init(void) |
59 | { | ||
60 | #ifdef CONFIG_ACPI | ||
61 | pci_acpi_init(); | ||
62 | #endif | ||
63 | pci_legacy_init(); | ||
64 | pcibios_irq_init(); | ||
65 | #ifdef CONFIG_X86_NUMAQ | ||
66 | pci_numa_init(); | ||
67 | #endif | ||
68 | pcibios_init(); | ||
69 | |||
70 | return 0; | ||
71 | } | ||
72 | subsys_initcall(pci_subsys_init); | ||
diff --git a/arch/x86/pci/mp_bus_to_node.c b/arch/x86/pci/mp_bus_to_node.c deleted file mode 100644 index 022943999b84..000000000000 --- a/arch/x86/pci/mp_bus_to_node.c +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | #include <linux/pci.h> | ||
2 | #include <linux/init.h> | ||
3 | #include <linux/topology.h> | ||
4 | |||
5 | #define BUS_NR 256 | ||
6 | |||
7 | static unsigned char mp_bus_to_node[BUS_NR]; | ||
8 | |||
9 | void set_mp_bus_to_node(int busnum, int node) | ||
10 | { | ||
11 | if (busnum >= 0 && busnum < BUS_NR) | ||
12 | mp_bus_to_node[busnum] = (unsigned char) node; | ||
13 | } | ||
14 | |||
15 | int get_mp_bus_to_node(int busnum) | ||
16 | { | ||
17 | int node; | ||
18 | |||
19 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
20 | return 0; | ||
21 | node = mp_bus_to_node[busnum]; | ||
22 | return node; | ||
23 | } | ||
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numa.c index 99f1ecd485b5..8b5ca1966731 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numa.c | |||
@@ -151,7 +151,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) | |||
151 | } | 151 | } |
152 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); | 152 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); |
153 | 153 | ||
154 | static int __init pci_numa_init(void) | 154 | int __init pci_numa_init(void) |
155 | { | 155 | { |
156 | int quad; | 156 | int quad; |
157 | 157 | ||
@@ -176,5 +176,3 @@ static int __init pci_numa_init(void) | |||
176 | } | 176 | } |
177 | return 0; | 177 | return 0; |
178 | } | 178 | } |
179 | |||
180 | subsys_initcall(pci_numa_init); | ||
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index ba263e626a68..3e25deb821ac 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h | |||
@@ -28,6 +28,7 @@ | |||
28 | #define PCI_USE__CRS 0x10000 | 28 | #define PCI_USE__CRS 0x10000 |
29 | #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 | 29 | #define PCI_CHECK_ENABLE_AMD_MMCONF 0x20000 |
30 | #define PCI_HAS_IO_ECS 0x40000 | 30 | #define PCI_HAS_IO_ECS 0x40000 |
31 | #define PCI_NOASSIGN_ROMS 0x80000 | ||
31 | 32 | ||
32 | extern unsigned int pci_probe; | 33 | extern unsigned int pci_probe; |
33 | extern unsigned long pirq_table_addr; | 34 | extern unsigned long pirq_table_addr; |
@@ -39,9 +40,6 @@ enum pci_bf_sort_state { | |||
39 | pci_dmi_bf, | 40 | pci_dmi_bf, |
40 | }; | 41 | }; |
41 | 42 | ||
42 | extern void __init dmi_check_pciprobe(void); | ||
43 | extern void __init dmi_check_skip_isa_align(void); | ||
44 | |||
45 | /* pci-i386.c */ | 43 | /* pci-i386.c */ |
46 | 44 | ||
47 | extern unsigned int pcibios_max_latency; | 45 | extern unsigned int pcibios_max_latency; |
@@ -99,10 +97,19 @@ extern struct pci_raw_ops *raw_pci_ext_ops; | |||
99 | 97 | ||
100 | extern struct pci_raw_ops pci_direct_conf1; | 98 | extern struct pci_raw_ops pci_direct_conf1; |
101 | 99 | ||
100 | /* arch_initcall level */ | ||
102 | extern int pci_direct_probe(void); | 101 | extern int pci_direct_probe(void); |
103 | extern void pci_direct_init(int type); | 102 | extern void pci_direct_init(int type); |
104 | extern void pci_pcbios_init(void); | 103 | extern void pci_pcbios_init(void); |
105 | extern int pci_olpc_init(void); | 104 | extern int pci_olpc_init(void); |
105 | extern void __init dmi_check_pciprobe(void); | ||
106 | extern void __init dmi_check_skip_isa_align(void); | ||
107 | |||
108 | /* some common used subsys_initcalls */ | ||
109 | extern int __init pci_acpi_init(void); | ||
110 | extern int __init pcibios_irq_init(void); | ||
111 | extern int __init pci_numa_init(void); | ||
112 | extern int __init pcibios_init(void); | ||
106 | 113 | ||
107 | /* pci-mmconfig.c */ | 114 | /* pci-mmconfig.c */ |
108 | 115 | ||
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index c2df4e97eed6..1a7bed492bb1 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c | |||
@@ -8,18 +8,19 @@ | |||
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | 10 | ||
11 | #include "cobalt.h" | 11 | #include <asm/setup.h> |
12 | #include "lithium.h" | 12 | #include <asm/visws/cobalt.h> |
13 | #include <asm/visws/lithium.h> | ||
13 | 14 | ||
14 | #include "pci.h" | 15 | #include "pci.h" |
15 | 16 | ||
16 | static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } | 17 | static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } |
17 | static void pci_visws_disable_irq(struct pci_dev *dev) { } | 18 | static void pci_visws_disable_irq(struct pci_dev *dev) { } |
18 | 19 | ||
19 | int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; | 20 | /* int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; */ |
20 | void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; | 21 | /* void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; */ |
21 | 22 | ||
22 | void __init pcibios_penalize_isa_irq(int irq, int active) {} | 23 | /* void __init pcibios_penalize_isa_irq(int irq, int active) {} */ |
23 | 24 | ||
24 | 25 | ||
25 | unsigned int pci_bus0, pci_bus1; | 26 | unsigned int pci_bus0, pci_bus1; |
@@ -85,7 +86,7 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) | |||
85 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); | 86 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); |
86 | } | 87 | } |
87 | 88 | ||
88 | static int __init pcibios_init(void) | 89 | static int __init pci_visws_init(void) |
89 | { | 90 | { |
90 | /* The VISWS supports configuration access type 1 only */ | 91 | /* The VISWS supports configuration access type 1 only */ |
91 | pci_probe = (pci_probe | PCI_PROBE_CONF1) & | 92 | pci_probe = (pci_probe | PCI_PROBE_CONF1) & |
@@ -105,4 +106,17 @@ static int __init pcibios_init(void) | |||
105 | return 0; | 106 | return 0; |
106 | } | 107 | } |
107 | 108 | ||
108 | subsys_initcall(pcibios_init); | 109 | static __init int pci_subsys_init(void) |
110 | { | ||
111 | if (!is_visws_box()) | ||
112 | return -1; | ||
113 | |||
114 | pcibios_enable_irq = &pci_visws_enable_irq; | ||
115 | pcibios_disable_irq = &pci_visws_disable_irq; | ||
116 | |||
117 | pci_visws_init(); | ||
118 | pcibios_init(); | ||
119 | |||
120 | return 0; | ||
121 | } | ||
122 | subsys_initcall(pci_subsys_init); | ||
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index efa2ba7c6005..1ef0f90813d6 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -23,7 +23,7 @@ | |||
23 | 23 | ||
24 | #define gtod vdso_vsyscall_gtod_data | 24 | #define gtod vdso_vsyscall_gtod_data |
25 | 25 | ||
26 | static long vdso_fallback_gettime(long clock, struct timespec *ts) | 26 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
27 | { | 27 | { |
28 | long ret; | 28 | long ret; |
29 | asm("syscall" : "=a" (ret) : | 29 | asm("syscall" : "=a" (ret) : |
@@ -31,7 +31,7 @@ static long vdso_fallback_gettime(long clock, struct timespec *ts) | |||
31 | return ret; | 31 | return ret; |
32 | } | 32 | } |
33 | 33 | ||
34 | static inline long vgetns(void) | 34 | notrace static inline long vgetns(void) |
35 | { | 35 | { |
36 | long v; | 36 | long v; |
37 | cycles_t (*vread)(void); | 37 | cycles_t (*vread)(void); |
@@ -40,7 +40,7 @@ static inline long vgetns(void) | |||
40 | return (v * gtod->clock.mult) >> gtod->clock.shift; | 40 | return (v * gtod->clock.mult) >> gtod->clock.shift; |
41 | } | 41 | } |
42 | 42 | ||
43 | static noinline int do_realtime(struct timespec *ts) | 43 | notrace static noinline int do_realtime(struct timespec *ts) |
44 | { | 44 | { |
45 | unsigned long seq, ns; | 45 | unsigned long seq, ns; |
46 | do { | 46 | do { |
@@ -54,7 +54,8 @@ static noinline int do_realtime(struct timespec *ts) | |||
54 | } | 54 | } |
55 | 55 | ||
56 | /* Copy of the version in kernel/time.c which we cannot directly access */ | 56 | /* Copy of the version in kernel/time.c which we cannot directly access */ |
57 | static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) | 57 | notrace static void |
58 | vset_normalized_timespec(struct timespec *ts, long sec, long nsec) | ||
58 | { | 59 | { |
59 | while (nsec >= NSEC_PER_SEC) { | 60 | while (nsec >= NSEC_PER_SEC) { |
60 | nsec -= NSEC_PER_SEC; | 61 | nsec -= NSEC_PER_SEC; |
@@ -68,7 +69,7 @@ static void vset_normalized_timespec(struct timespec *ts, long sec, long nsec) | |||
68 | ts->tv_nsec = nsec; | 69 | ts->tv_nsec = nsec; |
69 | } | 70 | } |
70 | 71 | ||
71 | static noinline int do_monotonic(struct timespec *ts) | 72 | notrace static noinline int do_monotonic(struct timespec *ts) |
72 | { | 73 | { |
73 | unsigned long seq, ns, secs; | 74 | unsigned long seq, ns, secs; |
74 | do { | 75 | do { |
@@ -82,7 +83,7 @@ static noinline int do_monotonic(struct timespec *ts) | |||
82 | return 0; | 83 | return 0; |
83 | } | 84 | } |
84 | 85 | ||
85 | int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | 86 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
86 | { | 87 | { |
87 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) | 88 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) |
88 | switch (clock) { | 89 | switch (clock) { |
@@ -96,7 +97,7 @@ int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | |||
96 | int clock_gettime(clockid_t, struct timespec *) | 97 | int clock_gettime(clockid_t, struct timespec *) |
97 | __attribute__((weak, alias("__vdso_clock_gettime"))); | 98 | __attribute__((weak, alias("__vdso_clock_gettime"))); |
98 | 99 | ||
99 | int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | 100 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
100 | { | 101 | { |
101 | long ret; | 102 | long ret; |
102 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { | 103 | if (likely(gtod->sysctl_enabled && gtod->clock.vread)) { |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index cf058fecfcee..0bce5429a515 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -203,20 +203,11 @@ static struct page *vdso32_pages[1]; | |||
203 | 203 | ||
204 | #ifdef CONFIG_X86_64 | 204 | #ifdef CONFIG_X86_64 |
205 | 205 | ||
206 | static int use_sysenter __read_mostly = -1; | 206 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) |
207 | |||
208 | #define vdso32_sysenter() (use_sysenter > 0) | ||
209 | 207 | ||
210 | /* May not be __init: called during resume */ | 208 | /* May not be __init: called during resume */ |
211 | void syscall32_cpu_init(void) | 209 | void syscall32_cpu_init(void) |
212 | { | 210 | { |
213 | if (use_sysenter < 0) { | ||
214 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | ||
215 | use_sysenter = 1; | ||
216 | if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR) | ||
217 | use_sysenter = 1; | ||
218 | } | ||
219 | |||
220 | /* Load these always in case some future AMD CPU supports | 211 | /* Load these always in case some future AMD CPU supports |
221 | SYSENTER from compat mode too. */ | 212 | SYSENTER from compat mode too. */ |
222 | checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); | 213 | checking_wrmsrl(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); |
diff --git a/arch/x86/vdso/vgetcpu.c b/arch/x86/vdso/vgetcpu.c index c8097f17f8a9..9fbc6b20026b 100644 --- a/arch/x86/vdso/vgetcpu.c +++ b/arch/x86/vdso/vgetcpu.c | |||
@@ -13,7 +13,8 @@ | |||
13 | #include <asm/vgtod.h> | 13 | #include <asm/vgtod.h> |
14 | #include "vextern.h" | 14 | #include "vextern.h" |
15 | 15 | ||
16 | long __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) | 16 | notrace long |
17 | __vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unused) | ||
17 | { | 18 | { |
18 | unsigned int p; | 19 | unsigned int p; |
19 | 20 | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 3b980831602c..bb508456ef52 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -1062,7 +1062,7 @@ static const struct pv_time_ops xen_time_ops __initdata = { | |||
1062 | 1062 | ||
1063 | .set_wallclock = xen_set_wallclock, | 1063 | .set_wallclock = xen_set_wallclock, |
1064 | .get_wallclock = xen_get_wallclock, | 1064 | .get_wallclock = xen_get_wallclock, |
1065 | .get_cpu_khz = xen_cpu_khz, | 1065 | .get_tsc_khz = xen_tsc_khz, |
1066 | .sched_clock = xen_sched_clock, | 1066 | .sched_clock = xen_sched_clock, |
1067 | }; | 1067 | }; |
1068 | 1068 | ||
@@ -1214,7 +1214,9 @@ static const struct smp_ops xen_smp_ops __initdata = { | |||
1214 | 1214 | ||
1215 | .smp_send_stop = xen_smp_send_stop, | 1215 | .smp_send_stop = xen_smp_send_stop, |
1216 | .smp_send_reschedule = xen_smp_send_reschedule, | 1216 | .smp_send_reschedule = xen_smp_send_reschedule, |
1217 | .smp_call_function_mask = xen_smp_call_function_mask, | 1217 | |
1218 | .send_call_func_ipi = xen_smp_send_call_function_ipi, | ||
1219 | .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, | ||
1218 | }; | 1220 | }; |
1219 | #endif /* CONFIG_SMP */ | 1221 | #endif /* CONFIG_SMP */ |
1220 | 1222 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 42b3b9ed641d..ff0aa74afaa1 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -796,7 +796,7 @@ static void drop_mm_ref(struct mm_struct *mm) | |||
796 | } | 796 | } |
797 | 797 | ||
798 | if (!cpus_empty(mask)) | 798 | if (!cpus_empty(mask)) |
799 | xen_smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); | 799 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); |
800 | } | 800 | } |
801 | #else | 801 | #else |
802 | static void drop_mm_ref(struct mm_struct *mm) | 802 | static void drop_mm_ref(struct mm_struct *mm) |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index d2e3c20127d7..233156f39b7f 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -36,27 +36,14 @@ | |||
36 | #include "mmu.h" | 36 | #include "mmu.h" |
37 | 37 | ||
38 | cpumask_t xen_cpu_initialized_map; | 38 | cpumask_t xen_cpu_initialized_map; |
39 | static DEFINE_PER_CPU(int, resched_irq) = -1; | ||
40 | static DEFINE_PER_CPU(int, callfunc_irq) = -1; | ||
41 | static DEFINE_PER_CPU(int, debug_irq) = -1; | ||
42 | |||
43 | /* | ||
44 | * Structure and data for smp_call_function(). This is designed to minimise | ||
45 | * static memory requirements. It also looks cleaner. | ||
46 | */ | ||
47 | static DEFINE_SPINLOCK(call_lock); | ||
48 | 39 | ||
49 | struct call_data_struct { | 40 | static DEFINE_PER_CPU(int, resched_irq); |
50 | void (*func) (void *info); | 41 | static DEFINE_PER_CPU(int, callfunc_irq); |
51 | void *info; | 42 | static DEFINE_PER_CPU(int, callfuncsingle_irq); |
52 | atomic_t started; | 43 | static DEFINE_PER_CPU(int, debug_irq) = -1; |
53 | atomic_t finished; | ||
54 | int wait; | ||
55 | }; | ||
56 | 44 | ||
57 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | 45 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); |
58 | 46 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id); | |
59 | static struct call_data_struct *call_data; | ||
60 | 47 | ||
61 | /* | 48 | /* |
62 | * Reschedule call back. Nothing to do, | 49 | * Reschedule call back. Nothing to do, |
@@ -128,6 +115,17 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
128 | goto fail; | 115 | goto fail; |
129 | per_cpu(debug_irq, cpu) = rc; | 116 | per_cpu(debug_irq, cpu) = rc; |
130 | 117 | ||
118 | callfunc_name = kasprintf(GFP_KERNEL, "callfuncsingle%d", cpu); | ||
119 | rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_SINGLE_VECTOR, | ||
120 | cpu, | ||
121 | xen_call_function_single_interrupt, | ||
122 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
123 | callfunc_name, | ||
124 | NULL); | ||
125 | if (rc < 0) | ||
126 | goto fail; | ||
127 | per_cpu(callfuncsingle_irq, cpu) = rc; | ||
128 | |||
131 | return 0; | 129 | return 0; |
132 | 130 | ||
133 | fail: | 131 | fail: |
@@ -137,6 +135,9 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
137 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | 135 | unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); |
138 | if (per_cpu(debug_irq, cpu) >= 0) | 136 | if (per_cpu(debug_irq, cpu) >= 0) |
139 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); | 137 | unbind_from_irqhandler(per_cpu(debug_irq, cpu), NULL); |
138 | if (per_cpu(callfuncsingle_irq, cpu) >= 0) | ||
139 | unbind_from_irqhandler(per_cpu(callfuncsingle_irq, cpu), NULL); | ||
140 | |||
140 | return rc; | 141 | return rc; |
141 | } | 142 | } |
142 | 143 | ||
@@ -336,7 +337,7 @@ static void stop_self(void *v) | |||
336 | 337 | ||
337 | void xen_smp_send_stop(void) | 338 | void xen_smp_send_stop(void) |
338 | { | 339 | { |
339 | smp_call_function(stop_self, NULL, 0, 0); | 340 | smp_call_function(stop_self, NULL, 0); |
340 | } | 341 | } |
341 | 342 | ||
342 | void xen_smp_send_reschedule(int cpu) | 343 | void xen_smp_send_reschedule(int cpu) |
@@ -344,7 +345,6 @@ void xen_smp_send_reschedule(int cpu) | |||
344 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | 345 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); |
345 | } | 346 | } |
346 | 347 | ||
347 | |||
348 | static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | 348 | static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) |
349 | { | 349 | { |
350 | unsigned cpu; | 350 | unsigned cpu; |
@@ -355,83 +355,42 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | |||
355 | xen_send_IPI_one(cpu, vector); | 355 | xen_send_IPI_one(cpu, vector); |
356 | } | 356 | } |
357 | 357 | ||
358 | void xen_smp_send_call_function_ipi(cpumask_t mask) | ||
359 | { | ||
360 | int cpu; | ||
361 | |||
362 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | ||
363 | |||
364 | /* Make sure other vcpus get a chance to run if they need to. */ | ||
365 | for_each_cpu_mask(cpu, mask) { | ||
366 | if (xen_vcpu_stolen(cpu)) { | ||
367 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | ||
368 | break; | ||
369 | } | ||
370 | } | ||
371 | } | ||
372 | |||
373 | void xen_smp_send_call_function_single_ipi(int cpu) | ||
374 | { | ||
375 | xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); | ||
376 | } | ||
377 | |||
358 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | 378 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
359 | { | 379 | { |
360 | void (*func) (void *info) = call_data->func; | ||
361 | void *info = call_data->info; | ||
362 | int wait = call_data->wait; | ||
363 | |||
364 | /* | ||
365 | * Notify initiating CPU that I've grabbed the data and am | ||
366 | * about to execute the function | ||
367 | */ | ||
368 | mb(); | ||
369 | atomic_inc(&call_data->started); | ||
370 | /* | ||
371 | * At this point the info structure may be out of scope unless wait==1 | ||
372 | */ | ||
373 | irq_enter(); | 380 | irq_enter(); |
374 | (*func)(info); | 381 | generic_smp_call_function_interrupt(); |
375 | __get_cpu_var(irq_stat).irq_call_count++; | 382 | __get_cpu_var(irq_stat).irq_call_count++; |
376 | irq_exit(); | 383 | irq_exit(); |
377 | 384 | ||
378 | if (wait) { | ||
379 | mb(); /* commit everything before setting finished */ | ||
380 | atomic_inc(&call_data->finished); | ||
381 | } | ||
382 | |||
383 | return IRQ_HANDLED; | 385 | return IRQ_HANDLED; |
384 | } | 386 | } |
385 | 387 | ||
386 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), | 388 | static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) |
387 | void *info, int wait) | ||
388 | { | 389 | { |
389 | struct call_data_struct data; | 390 | irq_enter(); |
390 | int cpus, cpu; | 391 | generic_smp_call_function_single_interrupt(); |
391 | bool yield; | 392 | __get_cpu_var(irq_stat).irq_call_count++; |
392 | 393 | irq_exit(); | |
393 | /* Holding any lock stops cpus from going down. */ | ||
394 | spin_lock(&call_lock); | ||
395 | |||
396 | cpu_clear(smp_processor_id(), mask); | ||
397 | |||
398 | cpus = cpus_weight(mask); | ||
399 | if (!cpus) { | ||
400 | spin_unlock(&call_lock); | ||
401 | return 0; | ||
402 | } | ||
403 | |||
404 | /* Can deadlock when called with interrupts disabled */ | ||
405 | WARN_ON(irqs_disabled()); | ||
406 | |||
407 | data.func = func; | ||
408 | data.info = info; | ||
409 | atomic_set(&data.started, 0); | ||
410 | data.wait = wait; | ||
411 | if (wait) | ||
412 | atomic_set(&data.finished, 0); | ||
413 | |||
414 | call_data = &data; | ||
415 | mb(); /* write everything before IPI */ | ||
416 | |||
417 | /* Send a message to other CPUs and wait for them to respond */ | ||
418 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | ||
419 | |||
420 | /* Make sure other vcpus get a chance to run if they need to. */ | ||
421 | yield = false; | ||
422 | for_each_cpu_mask(cpu, mask) | ||
423 | if (xen_vcpu_stolen(cpu)) | ||
424 | yield = true; | ||
425 | |||
426 | if (yield) | ||
427 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | ||
428 | |||
429 | /* Wait for response */ | ||
430 | while (atomic_read(&data.started) != cpus || | ||
431 | (wait && atomic_read(&data.finished) != cpus)) | ||
432 | cpu_relax(); | ||
433 | |||
434 | spin_unlock(&call_lock); | ||
435 | 394 | ||
436 | return 0; | 395 | return IRQ_HANDLED; |
437 | } | 396 | } |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 64f0038b9558..685b77470fc3 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -197,8 +197,8 @@ unsigned long long xen_sched_clock(void) | |||
197 | } | 197 | } |
198 | 198 | ||
199 | 199 | ||
200 | /* Get the CPU speed from Xen */ | 200 | /* Get the TSC speed from Xen */ |
201 | unsigned long xen_cpu_khz(void) | 201 | unsigned long xen_tsc_khz(void) |
202 | { | 202 | { |
203 | u64 xen_khz = 1000000ULL << 32; | 203 | u64 xen_khz = 1000000ULL << 32; |
204 | const struct pvclock_vcpu_time_info *info = | 204 | const struct pvclock_vcpu_time_info *info = |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9a055592a307..6f4b1045c1c2 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -32,7 +32,7 @@ void __init xen_build_dynamic_phys_to_machine(void); | |||
32 | 32 | ||
33 | void xen_setup_timer(int cpu); | 33 | void xen_setup_timer(int cpu); |
34 | void xen_setup_cpu_clockevents(void); | 34 | void xen_setup_cpu_clockevents(void); |
35 | unsigned long xen_cpu_khz(void); | 35 | unsigned long xen_tsc_khz(void); |
36 | void __init xen_time_init(void); | 36 | void __init xen_time_init(void); |
37 | unsigned long xen_get_wallclock(void); | 37 | unsigned long xen_get_wallclock(void); |
38 | int xen_set_wallclock(unsigned long time); | 38 | int xen_set_wallclock(unsigned long time); |
@@ -55,13 +55,8 @@ void xen_smp_cpus_done(unsigned int max_cpus); | |||
55 | 55 | ||
56 | void xen_smp_send_stop(void); | 56 | void xen_smp_send_stop(void); |
57 | void xen_smp_send_reschedule(int cpu); | 57 | void xen_smp_send_reschedule(int cpu); |
58 | int xen_smp_call_function (void (*func) (void *info), void *info, int nonatomic, | 58 | void xen_smp_send_call_function_ipi(cpumask_t mask); |
59 | int wait); | 59 | void xen_smp_send_call_function_single_ipi(int cpu); |
60 | int xen_smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
61 | int nonatomic, int wait); | ||
62 | |||
63 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), | ||
64 | void *info, int wait); | ||
65 | 60 | ||
66 | extern cpumask_t xen_cpu_initialized_map; | 61 | extern cpumask_t xen_cpu_initialized_map; |
67 | 62 | ||