diff options
author | Len Brown <len.brown@intel.com> | 2009-01-09 03:39:43 -0500 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2009-01-09 03:39:43 -0500 |
commit | b2576e1d4408e134e2188c967b1f28af39cd79d4 (patch) | |
tree | 004f3c82faab760f304ce031d6d2f572e7746a50 /arch/x86 | |
parent | 3cc8a5f4ba91f67bbdb81a43a99281a26aab8d77 (diff) | |
parent | 2150edc6c5cf00f7adb54538b9ea2a3e9cedca3f (diff) |
Merge branch 'linus' into release
Diffstat (limited to 'arch/x86')
285 files changed, 10151 insertions, 6940 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac22bb7719f7..862adb9bf0d4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -19,6 +19,8 @@ config X86_64 | |||
19 | config X86 | 19 | config X86 |
20 | def_bool y | 20 | def_bool y |
21 | select HAVE_AOUT if X86_32 | 21 | select HAVE_AOUT if X86_32 |
22 | select HAVE_READQ | ||
23 | select HAVE_WRITEQ | ||
22 | select HAVE_UNSTABLE_SCHED_CLOCK | 24 | select HAVE_UNSTABLE_SCHED_CLOCK |
23 | select HAVE_IDE | 25 | select HAVE_IDE |
24 | select HAVE_OPROFILE | 26 | select HAVE_OPROFILE |
@@ -29,11 +31,14 @@ config X86 | |||
29 | select HAVE_FTRACE_MCOUNT_RECORD | 31 | select HAVE_FTRACE_MCOUNT_RECORD |
30 | select HAVE_DYNAMIC_FTRACE | 32 | select HAVE_DYNAMIC_FTRACE |
31 | select HAVE_FUNCTION_TRACER | 33 | select HAVE_FUNCTION_TRACER |
34 | select HAVE_FUNCTION_GRAPH_TRACER | ||
35 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST | ||
32 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | 36 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) |
33 | select HAVE_ARCH_KGDB if !X86_VOYAGER | 37 | select HAVE_ARCH_KGDB if !X86_VOYAGER |
34 | select HAVE_ARCH_TRACEHOOK | 38 | select HAVE_ARCH_TRACEHOOK |
35 | select HAVE_GENERIC_DMA_COHERENT if X86_32 | 39 | select HAVE_GENERIC_DMA_COHERENT if X86_32 |
36 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | 40 | select HAVE_EFFICIENT_UNALIGNED_ACCESS |
41 | select USER_STACKTRACE_SUPPORT | ||
37 | 42 | ||
38 | config ARCH_DEFCONFIG | 43 | config ARCH_DEFCONFIG |
39 | string | 44 | string |
@@ -87,6 +92,10 @@ config GENERIC_IOMAP | |||
87 | config GENERIC_BUG | 92 | config GENERIC_BUG |
88 | def_bool y | 93 | def_bool y |
89 | depends on BUG | 94 | depends on BUG |
95 | select GENERIC_BUG_RELATIVE_POINTERS if X86_64 | ||
96 | |||
97 | config GENERIC_BUG_RELATIVE_POINTERS | ||
98 | bool | ||
90 | 99 | ||
91 | config GENERIC_HWEIGHT | 100 | config GENERIC_HWEIGHT |
92 | def_bool y | 101 | def_bool y |
@@ -238,25 +247,39 @@ config X86_HAS_BOOT_CPU_ID | |||
238 | def_bool y | 247 | def_bool y |
239 | depends on X86_VOYAGER | 248 | depends on X86_VOYAGER |
240 | 249 | ||
250 | config SPARSE_IRQ | ||
251 | bool "Support sparse irq numbering" | ||
252 | depends on PCI_MSI || HT_IRQ | ||
253 | help | ||
254 | This enables support for sparse irqs. This is useful for distro | ||
255 | kernels that want to define a high CONFIG_NR_CPUS value but still | ||
256 | want to have low kernel memory footprint on smaller machines. | ||
257 | |||
258 | ( Sparse IRQs can also be beneficial on NUMA boxes, as they spread | ||
259 | out the irq_desc[] array in a more NUMA-friendly way. ) | ||
260 | |||
261 | If you don't know what to do here, say N. | ||
262 | |||
263 | config NUMA_MIGRATE_IRQ_DESC | ||
264 | bool "Move irq desc when changing irq smp_affinity" | ||
265 | depends on SPARSE_IRQ && NUMA | ||
266 | default n | ||
267 | help | ||
268 | This enables moving irq_desc to cpu/node that irq will use handled. | ||
269 | |||
270 | If you don't know what to do here, say N. | ||
271 | |||
241 | config X86_FIND_SMP_CONFIG | 272 | config X86_FIND_SMP_CONFIG |
242 | def_bool y | 273 | def_bool y |
243 | depends on X86_MPPARSE || X86_VOYAGER | 274 | depends on X86_MPPARSE || X86_VOYAGER |
244 | 275 | ||
245 | if ACPI | ||
246 | config X86_MPPARSE | 276 | config X86_MPPARSE |
247 | def_bool y | 277 | bool "Enable MPS table" if ACPI |
248 | bool "Enable MPS table" | 278 | default y |
249 | depends on X86_LOCAL_APIC | 279 | depends on X86_LOCAL_APIC |
250 | help | 280 | help |
251 | For old smp systems that do not have proper acpi support. Newer systems | 281 | For old smp systems that do not have proper acpi support. Newer systems |
252 | (esp with 64bit cpus) with acpi support, MADT and DSDT will override it | 282 | (esp with 64bit cpus) with acpi support, MADT and DSDT will override it |
253 | endif | ||
254 | |||
255 | if !ACPI | ||
256 | config X86_MPPARSE | ||
257 | def_bool y | ||
258 | depends on X86_LOCAL_APIC | ||
259 | endif | ||
260 | 283 | ||
261 | choice | 284 | choice |
262 | prompt "Subarchitecture Type" | 285 | prompt "Subarchitecture Type" |
@@ -367,10 +390,10 @@ config X86_RDC321X | |||
367 | as R-8610-(G). | 390 | as R-8610-(G). |
368 | If you don't have one of these chips, you should say N here. | 391 | If you don't have one of these chips, you should say N here. |
369 | 392 | ||
370 | config SCHED_NO_NO_OMIT_FRAME_POINTER | 393 | config SCHED_OMIT_FRAME_POINTER |
371 | def_bool y | 394 | def_bool y |
372 | prompt "Single-depth WCHAN output" | 395 | prompt "Single-depth WCHAN output" |
373 | depends on X86_32 | 396 | depends on X86 |
374 | help | 397 | help |
375 | Calculate simpler /proc/<PID>/wchan values. If this option | 398 | Calculate simpler /proc/<PID>/wchan values. If this option |
376 | is disabled then wchan values will recurse back to the | 399 | is disabled then wchan values will recurse back to the |
@@ -465,10 +488,6 @@ config X86_CYCLONE_TIMER | |||
465 | def_bool y | 488 | def_bool y |
466 | depends on X86_GENERICARCH | 489 | depends on X86_GENERICARCH |
467 | 490 | ||
468 | config ES7000_CLUSTERED_APIC | ||
469 | def_bool y | ||
470 | depends on SMP && X86_ES7000 && MPENTIUMIII | ||
471 | |||
472 | source "arch/x86/Kconfig.cpu" | 491 | source "arch/x86/Kconfig.cpu" |
473 | 492 | ||
474 | config HPET_TIMER | 493 | config HPET_TIMER |
@@ -482,7 +501,7 @@ config HPET_TIMER | |||
482 | The HPET provides a stable time base on SMP | 501 | The HPET provides a stable time base on SMP |
483 | systems, unlike the TSC, but it is more expensive to access, | 502 | systems, unlike the TSC, but it is more expensive to access, |
484 | as it is off-chip. You can find the HPET spec at | 503 | as it is off-chip. You can find the HPET spec at |
485 | <http://www.intel.com/hardwaredesign/hpetspec.htm>. | 504 | <http://www.intel.com/hardwaredesign/hpetspec_1.pdf>. |
486 | 505 | ||
487 | You can safely choose Y here. However, HPET will only be | 506 | You can safely choose Y here. However, HPET will only be |
488 | activated if the platform and the BIOS support this feature. | 507 | activated if the platform and the BIOS support this feature. |
@@ -567,9 +586,19 @@ config AMD_IOMMU | |||
567 | your BIOS for an option to enable it or if you have an IVRS ACPI | 586 | your BIOS for an option to enable it or if you have an IVRS ACPI |
568 | table. | 587 | table. |
569 | 588 | ||
589 | config AMD_IOMMU_STATS | ||
590 | bool "Export AMD IOMMU statistics to debugfs" | ||
591 | depends on AMD_IOMMU | ||
592 | select DEBUG_FS | ||
593 | help | ||
594 | This option enables code in the AMD IOMMU driver to collect various | ||
595 | statistics about whats happening in the driver and exports that | ||
596 | information to userspace via debugfs. | ||
597 | If unsure, say N. | ||
598 | |||
570 | # need this always selected by IOMMU for the VIA workaround | 599 | # need this always selected by IOMMU for the VIA workaround |
571 | config SWIOTLB | 600 | config SWIOTLB |
572 | bool | 601 | def_bool y if X86_64 |
573 | help | 602 | help |
574 | Support for software bounce buffers used on x86-64 systems | 603 | Support for software bounce buffers used on x86-64 systems |
575 | which don't have a hardware IOMMU (e.g. the current generation | 604 | which don't have a hardware IOMMU (e.g. the current generation |
@@ -580,21 +609,25 @@ config SWIOTLB | |||
580 | config IOMMU_HELPER | 609 | config IOMMU_HELPER |
581 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) | 610 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) |
582 | 611 | ||
612 | config IOMMU_API | ||
613 | def_bool (AMD_IOMMU || DMAR) | ||
614 | |||
583 | config MAXSMP | 615 | config MAXSMP |
584 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | 616 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" |
585 | depends on X86_64 && SMP && BROKEN | 617 | depends on X86_64 && SMP && DEBUG_KERNEL && EXPERIMENTAL |
618 | select CPUMASK_OFFSTACK | ||
586 | default n | 619 | default n |
587 | help | 620 | help |
588 | Configure maximum number of CPUS and NUMA Nodes for this architecture. | 621 | Configure maximum number of CPUS and NUMA Nodes for this architecture. |
589 | If unsure, say N. | 622 | If unsure, say N. |
590 | 623 | ||
591 | config NR_CPUS | 624 | config NR_CPUS |
592 | int "Maximum number of CPUs (2-512)" if !MAXSMP | 625 | int "Maximum number of CPUs" if SMP && !MAXSMP |
593 | range 2 512 | 626 | range 2 512 if SMP && !MAXSMP |
594 | depends on SMP | 627 | default "1" if !SMP |
595 | default "4096" if MAXSMP | 628 | default "4096" if MAXSMP |
596 | default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 | 629 | default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) |
597 | default "8" | 630 | default "8" if SMP |
598 | help | 631 | help |
599 | This allows you to specify the maximum number of CPUs which this | 632 | This allows you to specify the maximum number of CPUs which this |
600 | kernel will support. The maximum supported value is 512 and the | 633 | kernel will support. The maximum supported value is 512 and the |
@@ -660,6 +693,30 @@ config X86_VISWS_APIC | |||
660 | def_bool y | 693 | def_bool y |
661 | depends on X86_32 && X86_VISWS | 694 | depends on X86_32 && X86_VISWS |
662 | 695 | ||
696 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | ||
697 | bool "Reroute for broken boot IRQs" | ||
698 | default n | ||
699 | depends on X86_IO_APIC | ||
700 | help | ||
701 | This option enables a workaround that fixes a source of | ||
702 | spurious interrupts. This is recommended when threaded | ||
703 | interrupt handling is used on systems where the generation of | ||
704 | superfluous "boot interrupts" cannot be disabled. | ||
705 | |||
706 | Some chipsets generate a legacy INTx "boot IRQ" when the IRQ | ||
707 | entry in the chipset's IO-APIC is masked (as, e.g. the RT | ||
708 | kernel does during interrupt handling). On chipsets where this | ||
709 | boot IRQ generation cannot be disabled, this workaround keeps | ||
710 | the original IRQ line masked so that only the equivalent "boot | ||
711 | IRQ" is delivered to the CPUs. The workaround also tells the | ||
712 | kernel to set up the IRQ handler on the boot IRQ line. In this | ||
713 | way only one interrupt is delivered to the kernel. Otherwise | ||
714 | the spurious second interrupt may cause the kernel to bring | ||
715 | down (vital) interrupt lines. | ||
716 | |||
717 | Only affects "broken" chipsets. Interrupt sharing may be | ||
718 | increased on these systems. | ||
719 | |||
663 | config X86_MCE | 720 | config X86_MCE |
664 | bool "Machine Check Exception" | 721 | bool "Machine Check Exception" |
665 | depends on !X86_VOYAGER | 722 | depends on !X86_VOYAGER |
@@ -956,24 +1013,37 @@ config X86_PAE | |||
956 | config ARCH_PHYS_ADDR_T_64BIT | 1013 | config ARCH_PHYS_ADDR_T_64BIT |
957 | def_bool X86_64 || X86_PAE | 1014 | def_bool X86_64 || X86_PAE |
958 | 1015 | ||
1016 | config DIRECT_GBPAGES | ||
1017 | bool "Enable 1GB pages for kernel pagetables" if EMBEDDED | ||
1018 | default y | ||
1019 | depends on X86_64 | ||
1020 | help | ||
1021 | Allow the kernel linear mapping to use 1GB pages on CPUs that | ||
1022 | support it. This can improve the kernel's performance a tiny bit by | ||
1023 | reducing TLB pressure. If in doubt, say "Y". | ||
1024 | |||
959 | # Common NUMA Features | 1025 | # Common NUMA Features |
960 | config NUMA | 1026 | config NUMA |
961 | bool "Numa Memory Allocation and Scheduler Support (EXPERIMENTAL)" | 1027 | bool "Numa Memory Allocation and Scheduler Support" |
962 | depends on SMP | 1028 | depends on SMP |
963 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) | 1029 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI) && EXPERIMENTAL) |
964 | default n if X86_PC | 1030 | default n if X86_PC |
965 | default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) | 1031 | default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) |
966 | help | 1032 | help |
967 | Enable NUMA (Non Uniform Memory Access) support. | 1033 | Enable NUMA (Non Uniform Memory Access) support. |
1034 | |||
968 | The kernel will try to allocate memory used by a CPU on the | 1035 | The kernel will try to allocate memory used by a CPU on the |
969 | local memory controller of the CPU and add some more | 1036 | local memory controller of the CPU and add some more |
970 | NUMA awareness to the kernel. | 1037 | NUMA awareness to the kernel. |
971 | 1038 | ||
972 | For 32-bit this is currently highly experimental and should be only | 1039 | For 64-bit this is recommended if the system is Intel Core i7 |
973 | used for kernel development. It might also cause boot failures. | 1040 | (or later), AMD Opteron, or EM64T NUMA. |
974 | For 64-bit this is recommended on all multiprocessor Opteron systems. | 1041 | |
975 | If the system is EM64T, you should say N unless your system is | 1042 | For 32-bit this is only needed on (rare) 32-bit-only platforms |
976 | EM64T NUMA. | 1043 | that support NUMA topologies, such as NUMAQ / Summit, or if you |
1044 | boot a 32-bit kernel on a 64-bit NUMA platform. | ||
1045 | |||
1046 | Otherwise, you should say N. | ||
977 | 1047 | ||
978 | comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" | 1048 | comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" |
979 | depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) | 1049 | depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) |
@@ -1493,6 +1563,10 @@ config ARCH_ENABLE_MEMORY_HOTPLUG | |||
1493 | def_bool y | 1563 | def_bool y |
1494 | depends on X86_64 || (X86_32 && HIGHMEM) | 1564 | depends on X86_64 || (X86_32 && HIGHMEM) |
1495 | 1565 | ||
1566 | config ARCH_ENABLE_MEMORY_HOTREMOVE | ||
1567 | def_bool y | ||
1568 | depends on MEMORY_HOTPLUG | ||
1569 | |||
1496 | config HAVE_ARCH_EARLY_PFN_TO_NID | 1570 | config HAVE_ARCH_EARLY_PFN_TO_NID |
1497 | def_bool X86_64 | 1571 | def_bool X86_64 |
1498 | depends on NUMA | 1572 | depends on NUMA |
@@ -1632,13 +1706,6 @@ config APM_ALLOW_INTS | |||
1632 | many of the newer IBM Thinkpads. If you experience hangs when you | 1706 | many of the newer IBM Thinkpads. If you experience hangs when you |
1633 | suspend, try setting this to Y. Otherwise, say N. | 1707 | suspend, try setting this to Y. Otherwise, say N. |
1634 | 1708 | ||
1635 | config APM_REAL_MODE_POWER_OFF | ||
1636 | bool "Use real mode APM BIOS call to power off" | ||
1637 | help | ||
1638 | Use real mode APM BIOS calls to switch off the computer. This is | ||
1639 | a work-around for a number of buggy BIOSes. Switch this option on if | ||
1640 | your computer crashes instead of powering off properly. | ||
1641 | |||
1642 | endif # APM | 1709 | endif # APM |
1643 | 1710 | ||
1644 | source "arch/x86/kernel/cpu/cpufreq/Kconfig" | 1711 | source "arch/x86/kernel/cpu/cpufreq/Kconfig" |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index 8e99073b9e0f..8078955845ae 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -408,7 +408,7 @@ config X86_MINIMUM_CPU_FAMILY | |||
408 | 408 | ||
409 | config X86_DEBUGCTLMSR | 409 | config X86_DEBUGCTLMSR |
410 | def_bool y | 410 | def_bool y |
411 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) | 411 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) && !UML |
412 | 412 | ||
413 | menuconfig PROCESSOR_SELECT | 413 | menuconfig PROCESSOR_SELECT |
414 | bool "Supported processor vendors" if EMBEDDED | 414 | bool "Supported processor vendors" if EMBEDDED |
@@ -515,12 +515,12 @@ config CPU_SUP_UMC_32 | |||
515 | config X86_DS | 515 | config X86_DS |
516 | def_bool X86_PTRACE_BTS | 516 | def_bool X86_PTRACE_BTS |
517 | depends on X86_DEBUGCTLMSR | 517 | depends on X86_DEBUGCTLMSR |
518 | select HAVE_HW_BRANCH_TRACER | ||
518 | 519 | ||
519 | config X86_PTRACE_BTS | 520 | config X86_PTRACE_BTS |
520 | bool "Branch Trace Store" | 521 | bool "Branch Trace Store" |
521 | default y | 522 | default y |
522 | depends on X86_DEBUGCTLMSR | 523 | depends on X86_DEBUGCTLMSR |
523 | depends on BROKEN | ||
524 | help | 524 | help |
525 | This adds a ptrace interface to the hardware's branch trace store. | 525 | This adds a ptrace interface to the hardware's branch trace store. |
526 | 526 | ||
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2a3dfbd5e677..10d6cc3fd052 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -114,18 +114,6 @@ config DEBUG_RODATA | |||
114 | data. This is recommended so that we can catch kernel bugs sooner. | 114 | data. This is recommended so that we can catch kernel bugs sooner. |
115 | If in doubt, say "Y". | 115 | If in doubt, say "Y". |
116 | 116 | ||
117 | config DIRECT_GBPAGES | ||
118 | bool "Enable gbpages-mapped kernel pagetables" | ||
119 | depends on DEBUG_KERNEL && EXPERIMENTAL && X86_64 | ||
120 | help | ||
121 | Enable gigabyte pages support (if the CPU supports it). This can | ||
122 | improve the kernel's performance a tiny bit by reducing TLB | ||
123 | pressure. | ||
124 | |||
125 | This is experimental code. | ||
126 | |||
127 | If in doubt, say "N". | ||
128 | |||
129 | config DEBUG_RODATA_TEST | 117 | config DEBUG_RODATA_TEST |
130 | bool "Testcase for the DEBUG_RODATA feature" | 118 | bool "Testcase for the DEBUG_RODATA feature" |
131 | depends on DEBUG_RODATA | 119 | depends on DEBUG_RODATA |
@@ -186,14 +174,10 @@ config IOMMU_LEAK | |||
186 | Add a simple leak tracer to the IOMMU code. This is useful when you | 174 | Add a simple leak tracer to the IOMMU code. This is useful when you |
187 | are debugging a buggy device driver that leaks IOMMU mappings. | 175 | are debugging a buggy device driver that leaks IOMMU mappings. |
188 | 176 | ||
189 | config MMIOTRACE_HOOKS | ||
190 | bool | ||
191 | |||
192 | config MMIOTRACE | 177 | config MMIOTRACE |
193 | bool "Memory mapped IO tracing" | 178 | bool "Memory mapped IO tracing" |
194 | depends on DEBUG_KERNEL && PCI | 179 | depends on DEBUG_KERNEL && PCI |
195 | select TRACING | 180 | select TRACING |
196 | select MMIOTRACE_HOOKS | ||
197 | help | 181 | help |
198 | Mmiotrace traces Memory Mapped I/O access and is meant for | 182 | Mmiotrace traces Memory Mapped I/O access and is meant for |
199 | debugging and reverse engineering. It is called from the ioremap | 183 | debugging and reverse engineering. It is called from the ioremap |
@@ -307,10 +291,10 @@ config OPTIMIZE_INLINING | |||
307 | developers have marked 'inline'. Doing so takes away freedom from gcc to | 291 | developers have marked 'inline'. Doing so takes away freedom from gcc to |
308 | do what it thinks is best, which is desirable for the gcc 3.x series of | 292 | do what it thinks is best, which is desirable for the gcc 3.x series of |
309 | compilers. The gcc 4.x series have a rewritten inlining algorithm and | 293 | compilers. The gcc 4.x series have a rewritten inlining algorithm and |
310 | disabling this option will generate a smaller kernel there. Hopefully | 294 | enabling this option will generate a smaller kernel there. Hopefully |
311 | this algorithm is so good that allowing gcc4 to make the decision can | 295 | this algorithm is so good that allowing gcc 4.x and above to make the |
312 | become the default in the future, until then this option is there to | 296 | decision will become the default in the future. Until then this option |
313 | test gcc for this. | 297 | is there to test gcc for this. |
314 | 298 | ||
315 | If unsure, say N. | 299 | If unsure, say N. |
316 | 300 | ||
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index b939cb476dec..5d4742ed4aa2 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c | |||
@@ -34,7 +34,7 @@ static struct mode_info cga_modes[] = { | |||
34 | { VIDEO_80x25, 80, 25, 0 }, | 34 | { VIDEO_80x25, 80, 25, 0 }, |
35 | }; | 35 | }; |
36 | 36 | ||
37 | __videocard video_vga; | 37 | static __videocard video_vga; |
38 | 38 | ||
39 | /* Set basic 80x25 mode */ | 39 | /* Set basic 80x25 mode */ |
40 | static u8 vga_set_basic_mode(void) | 40 | static u8 vga_set_basic_mode(void) |
@@ -259,7 +259,7 @@ static int vga_probe(void) | |||
259 | return mode_count[adapter]; | 259 | return mode_count[adapter]; |
260 | } | 260 | } |
261 | 261 | ||
262 | __videocard video_vga = { | 262 | static __videocard video_vga = { |
263 | .card_name = "VGA", | 263 | .card_name = "VGA", |
264 | .probe = vga_probe, | 264 | .probe = vga_probe, |
265 | .set_mode = vga_set_mode, | 265 | .set_mode = vga_set_mode, |
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 83598b23093a..3bef2c1febe9 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c | |||
@@ -226,7 +226,7 @@ static unsigned int mode_menu(void) | |||
226 | 226 | ||
227 | #ifdef CONFIG_VIDEO_RETAIN | 227 | #ifdef CONFIG_VIDEO_RETAIN |
228 | /* Save screen content to the heap */ | 228 | /* Save screen content to the heap */ |
229 | struct saved_screen { | 229 | static struct saved_screen { |
230 | int x, y; | 230 | int x, y; |
231 | int curx, cury; | 231 | int curx, cury; |
232 | u16 *data; | 232 | u16 *data; |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 13b8c86ae985..b30a08ed8eb4 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y | |||
77 | CONFIG_AUDITSYSCALL=y | 77 | CONFIG_AUDITSYSCALL=y |
78 | CONFIG_AUDIT_TREE=y | 78 | CONFIG_AUDIT_TREE=y |
79 | # CONFIG_IKCONFIG is not set | 79 | # CONFIG_IKCONFIG is not set |
80 | CONFIG_LOG_BUF_SHIFT=17 | 80 | CONFIG_LOG_BUF_SHIFT=18 |
81 | CONFIG_CGROUPS=y | 81 | CONFIG_CGROUPS=y |
82 | # CONFIG_CGROUP_DEBUG is not set | 82 | # CONFIG_CGROUP_DEBUG is not set |
83 | CONFIG_CGROUP_NS=y | 83 | CONFIG_CGROUP_NS=y |
@@ -298,7 +298,7 @@ CONFIG_KEXEC=y | |||
298 | CONFIG_CRASH_DUMP=y | 298 | CONFIG_CRASH_DUMP=y |
299 | # CONFIG_KEXEC_JUMP is not set | 299 | # CONFIG_KEXEC_JUMP is not set |
300 | CONFIG_PHYSICAL_START=0x1000000 | 300 | CONFIG_PHYSICAL_START=0x1000000 |
301 | CONFIG_RELOCATABLE=y | 301 | # CONFIG_RELOCATABLE is not set |
302 | CONFIG_PHYSICAL_ALIGN=0x200000 | 302 | CONFIG_PHYSICAL_ALIGN=0x200000 |
303 | CONFIG_HOTPLUG_CPU=y | 303 | CONFIG_HOTPLUG_CPU=y |
304 | # CONFIG_COMPAT_VDSO is not set | 304 | # CONFIG_COMPAT_VDSO is not set |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index f0a03d7a7d63..0e7dbc0a3e46 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -77,7 +77,7 @@ CONFIG_AUDIT=y | |||
77 | CONFIG_AUDITSYSCALL=y | 77 | CONFIG_AUDITSYSCALL=y |
78 | CONFIG_AUDIT_TREE=y | 78 | CONFIG_AUDIT_TREE=y |
79 | # CONFIG_IKCONFIG is not set | 79 | # CONFIG_IKCONFIG is not set |
80 | CONFIG_LOG_BUF_SHIFT=17 | 80 | CONFIG_LOG_BUF_SHIFT=18 |
81 | CONFIG_CGROUPS=y | 81 | CONFIG_CGROUPS=y |
82 | # CONFIG_CGROUP_DEBUG is not set | 82 | # CONFIG_CGROUP_DEBUG is not set |
83 | CONFIG_CGROUP_NS=y | 83 | CONFIG_CGROUP_NS=y |
@@ -298,7 +298,7 @@ CONFIG_SCHED_HRTICK=y | |||
298 | CONFIG_KEXEC=y | 298 | CONFIG_KEXEC=y |
299 | CONFIG_CRASH_DUMP=y | 299 | CONFIG_CRASH_DUMP=y |
300 | CONFIG_PHYSICAL_START=0x1000000 | 300 | CONFIG_PHYSICAL_START=0x1000000 |
301 | CONFIG_RELOCATABLE=y | 301 | # CONFIG_RELOCATABLE is not set |
302 | CONFIG_PHYSICAL_ALIGN=0x200000 | 302 | CONFIG_PHYSICAL_ALIGN=0x200000 |
303 | CONFIG_HOTPLUG_CPU=y | 303 | CONFIG_HOTPLUG_CPU=y |
304 | # CONFIG_COMPAT_VDSO is not set | 304 | # CONFIG_COMPAT_VDSO is not set |
diff --git a/arch/x86/crypto/crc32c-intel.c b/arch/x86/crypto/crc32c-intel.c index 070afc5b6c94..b9d00261703c 100644 --- a/arch/x86/crypto/crc32c-intel.c +++ b/arch/x86/crypto/crc32c-intel.c | |||
@@ -6,13 +6,22 @@ | |||
6 | * Intel(R) 64 and IA-32 Architectures Software Developer's Manual | 6 | * Intel(R) 64 and IA-32 Architectures Software Developer's Manual |
7 | * Volume 2A: Instruction Set Reference, A-M | 7 | * Volume 2A: Instruction Set Reference, A-M |
8 | * | 8 | * |
9 | * Copyright (c) 2008 Austin Zhang <austin_zhang@linux.intel.com> | 9 | * Copyright (C) 2008 Intel Corporation |
10 | * Copyright (c) 2008 Kent Liu <kent.liu@intel.com> | 10 | * Authors: Austin Zhang <austin_zhang@linux.intel.com> |
11 | * Kent Liu <kent.liu@intel.com> | ||
11 | * | 12 | * |
12 | * This program is free software; you can redistribute it and/or modify it | 13 | * This program is free software; you can redistribute it and/or modify it |
13 | * under the terms of the GNU General Public License as published by the Free | 14 | * under the terms and conditions of the GNU General Public License, |
14 | * Software Foundation; either version 2 of the License, or (at your option) | 15 | * version 2, as published by the Free Software Foundation. |
15 | * any later version. | 16 | * |
17 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
18 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
19 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
20 | * more details. | ||
21 | * | ||
22 | * You should have received a copy of the GNU General Public License along with | ||
23 | * this program; if not, write to the Free Software Foundation, Inc., | ||
24 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
16 | * | 25 | * |
17 | */ | 26 | */ |
18 | #include <linux/init.h> | 27 | #include <linux/init.h> |
@@ -75,99 +84,92 @@ static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size_t len | |||
75 | * If your algorithm starts with ~0, then XOR with ~0 before you set | 84 | * If your algorithm starts with ~0, then XOR with ~0 before you set |
76 | * the seed. | 85 | * the seed. |
77 | */ | 86 | */ |
78 | static int crc32c_intel_setkey(struct crypto_ahash *hash, const u8 *key, | 87 | static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, |
79 | unsigned int keylen) | 88 | unsigned int keylen) |
80 | { | 89 | { |
81 | u32 *mctx = crypto_ahash_ctx(hash); | 90 | u32 *mctx = crypto_shash_ctx(hash); |
82 | 91 | ||
83 | if (keylen != sizeof(u32)) { | 92 | if (keylen != sizeof(u32)) { |
84 | crypto_ahash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); | 93 | crypto_shash_set_flags(hash, CRYPTO_TFM_RES_BAD_KEY_LEN); |
85 | return -EINVAL; | 94 | return -EINVAL; |
86 | } | 95 | } |
87 | *mctx = le32_to_cpup((__le32 *)key); | 96 | *mctx = le32_to_cpup((__le32 *)key); |
88 | return 0; | 97 | return 0; |
89 | } | 98 | } |
90 | 99 | ||
91 | static int crc32c_intel_init(struct ahash_request *req) | 100 | static int crc32c_intel_init(struct shash_desc *desc) |
92 | { | 101 | { |
93 | u32 *mctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); | 102 | u32 *mctx = crypto_shash_ctx(desc->tfm); |
94 | u32 *crcp = ahash_request_ctx(req); | 103 | u32 *crcp = shash_desc_ctx(desc); |
95 | 104 | ||
96 | *crcp = *mctx; | 105 | *crcp = *mctx; |
97 | 106 | ||
98 | return 0; | 107 | return 0; |
99 | } | 108 | } |
100 | 109 | ||
101 | static int crc32c_intel_update(struct ahash_request *req) | 110 | static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, |
111 | unsigned int len) | ||
102 | { | 112 | { |
103 | struct crypto_hash_walk walk; | 113 | u32 *crcp = shash_desc_ctx(desc); |
104 | u32 *crcp = ahash_request_ctx(req); | ||
105 | u32 crc = *crcp; | ||
106 | int nbytes; | ||
107 | |||
108 | for (nbytes = crypto_hash_walk_first(req, &walk); nbytes; | ||
109 | nbytes = crypto_hash_walk_done(&walk, 0)) | ||
110 | crc = crc32c_intel_le_hw(crc, walk.data, nbytes); | ||
111 | 114 | ||
112 | *crcp = crc; | 115 | *crcp = crc32c_intel_le_hw(*crcp, data, len); |
113 | return 0; | 116 | return 0; |
114 | } | 117 | } |
115 | 118 | ||
116 | static int crc32c_intel_final(struct ahash_request *req) | 119 | static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int len, |
120 | u8 *out) | ||
117 | { | 121 | { |
118 | u32 *crcp = ahash_request_ctx(req); | 122 | *(__le32 *)out = ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); |
119 | |||
120 | *(__le32 *)req->result = ~cpu_to_le32p(crcp); | ||
121 | return 0; | 123 | return 0; |
122 | } | 124 | } |
123 | 125 | ||
124 | static int crc32c_intel_digest(struct ahash_request *req) | 126 | static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, |
127 | unsigned int len, u8 *out) | ||
125 | { | 128 | { |
126 | struct crypto_hash_walk walk; | 129 | return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); |
127 | u32 *mctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); | 130 | } |
128 | u32 crc = *mctx; | ||
129 | int nbytes; | ||
130 | 131 | ||
131 | for (nbytes = crypto_hash_walk_first(req, &walk); nbytes; | 132 | static int crc32c_intel_final(struct shash_desc *desc, u8 *out) |
132 | nbytes = crypto_hash_walk_done(&walk, 0)) | 133 | { |
133 | crc = crc32c_intel_le_hw(crc, walk.data, nbytes); | 134 | u32 *crcp = shash_desc_ctx(desc); |
134 | 135 | ||
135 | *(__le32 *)req->result = ~cpu_to_le32(crc); | 136 | *(__le32 *)out = ~cpu_to_le32p(crcp); |
136 | return 0; | 137 | return 0; |
137 | } | 138 | } |
138 | 139 | ||
140 | static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, | ||
141 | unsigned int len, u8 *out) | ||
142 | { | ||
143 | return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, | ||
144 | out); | ||
145 | } | ||
146 | |||
139 | static int crc32c_intel_cra_init(struct crypto_tfm *tfm) | 147 | static int crc32c_intel_cra_init(struct crypto_tfm *tfm) |
140 | { | 148 | { |
141 | u32 *key = crypto_tfm_ctx(tfm); | 149 | u32 *key = crypto_tfm_ctx(tfm); |
142 | 150 | ||
143 | *key = ~0; | 151 | *key = ~0; |
144 | 152 | ||
145 | tfm->crt_ahash.reqsize = sizeof(u32); | ||
146 | |||
147 | return 0; | 153 | return 0; |
148 | } | 154 | } |
149 | 155 | ||
150 | static struct crypto_alg alg = { | 156 | static struct shash_alg alg = { |
151 | .cra_name = "crc32c", | 157 | .setkey = crc32c_intel_setkey, |
152 | .cra_driver_name = "crc32c-intel", | 158 | .init = crc32c_intel_init, |
153 | .cra_priority = 200, | 159 | .update = crc32c_intel_update, |
154 | .cra_flags = CRYPTO_ALG_TYPE_AHASH, | 160 | .final = crc32c_intel_final, |
155 | .cra_blocksize = CHKSUM_BLOCK_SIZE, | 161 | .finup = crc32c_intel_finup, |
156 | .cra_alignmask = 3, | 162 | .digest = crc32c_intel_digest, |
157 | .cra_ctxsize = sizeof(u32), | 163 | .descsize = sizeof(u32), |
158 | .cra_module = THIS_MODULE, | 164 | .digestsize = CHKSUM_DIGEST_SIZE, |
159 | .cra_list = LIST_HEAD_INIT(alg.cra_list), | 165 | .base = { |
160 | .cra_init = crc32c_intel_cra_init, | 166 | .cra_name = "crc32c", |
161 | .cra_type = &crypto_ahash_type, | 167 | .cra_driver_name = "crc32c-intel", |
162 | .cra_u = { | 168 | .cra_priority = 200, |
163 | .ahash = { | 169 | .cra_blocksize = CHKSUM_BLOCK_SIZE, |
164 | .digestsize = CHKSUM_DIGEST_SIZE, | 170 | .cra_ctxsize = sizeof(u32), |
165 | .setkey = crc32c_intel_setkey, | 171 | .cra_module = THIS_MODULE, |
166 | .init = crc32c_intel_init, | 172 | .cra_init = crc32c_intel_cra_init, |
167 | .update = crc32c_intel_update, | ||
168 | .final = crc32c_intel_final, | ||
169 | .digest = crc32c_intel_digest, | ||
170 | } | ||
171 | } | 173 | } |
172 | }; | 174 | }; |
173 | 175 | ||
@@ -175,14 +177,14 @@ static struct crypto_alg alg = { | |||
175 | static int __init crc32c_intel_mod_init(void) | 177 | static int __init crc32c_intel_mod_init(void) |
176 | { | 178 | { |
177 | if (cpu_has_xmm4_2) | 179 | if (cpu_has_xmm4_2) |
178 | return crypto_register_alg(&alg); | 180 | return crypto_register_shash(&alg); |
179 | else | 181 | else |
180 | return -ENODEV; | 182 | return -ENODEV; |
181 | } | 183 | } |
182 | 184 | ||
183 | static void __exit crc32c_intel_mod_fini(void) | 185 | static void __exit crc32c_intel_mod_fini(void) |
184 | { | 186 | { |
185 | crypto_unregister_alg(&alg); | 187 | crypto_unregister_shash(&alg); |
186 | } | 188 | } |
187 | 189 | ||
188 | module_init(crc32c_intel_mod_init); | 190 | module_init(crc32c_intel_mod_init); |
@@ -194,4 +196,3 @@ MODULE_LICENSE("GPL"); | |||
194 | 196 | ||
195 | MODULE_ALIAS("crc32c"); | 197 | MODULE_ALIAS("crc32c"); |
196 | MODULE_ALIAS("crc32c-intel"); | 198 | MODULE_ALIAS("crc32c-intel"); |
197 | |||
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 127ec3f07214..2a4d073d2cf1 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -327,7 +327,7 @@ static int load_aout_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
327 | current->mm->cached_hole_size = 0; | 327 | current->mm->cached_hole_size = 0; |
328 | 328 | ||
329 | current->mm->mmap = NULL; | 329 | current->mm->mmap = NULL; |
330 | compute_creds(bprm); | 330 | install_exec_creds(bprm); |
331 | current->flags &= ~PF_FORKNOEXEC; | 331 | current->flags &= ~PF_FORKNOEXEC; |
332 | 332 | ||
333 | if (N_MAGIC(ex) == OMAGIC) { | 333 | if (N_MAGIC(ex) == OMAGIC) { |
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 4bc02b23674b..9dabd00e9805 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -24,13 +24,14 @@ | |||
24 | #include <asm/ucontext.h> | 24 | #include <asm/ucontext.h> |
25 | #include <asm/uaccess.h> | 25 | #include <asm/uaccess.h> |
26 | #include <asm/i387.h> | 26 | #include <asm/i387.h> |
27 | #include <asm/ia32.h> | ||
28 | #include <asm/ptrace.h> | 27 | #include <asm/ptrace.h> |
29 | #include <asm/ia32_unistd.h> | 28 | #include <asm/ia32_unistd.h> |
30 | #include <asm/user32.h> | 29 | #include <asm/user32.h> |
31 | #include <asm/sigcontext32.h> | 30 | #include <asm/sigcontext32.h> |
32 | #include <asm/proto.h> | 31 | #include <asm/proto.h> |
33 | #include <asm/vdso.h> | 32 | #include <asm/vdso.h> |
33 | #include <asm/sigframe.h> | ||
34 | #include <asm/sys_ia32.h> | ||
34 | 35 | ||
35 | #define DEBUG_SIG 0 | 36 | #define DEBUG_SIG 0 |
36 | 37 | ||
@@ -41,7 +42,6 @@ | |||
41 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | 42 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ |
42 | X86_EFLAGS_CF) | 43 | X86_EFLAGS_CF) |
43 | 44 | ||
44 | asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); | ||
45 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); | 45 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); |
46 | 46 | ||
47 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) | 47 | int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) |
@@ -173,47 +173,28 @@ asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *uss_ptr, | |||
173 | /* | 173 | /* |
174 | * Do a signal return; undo the signal stack. | 174 | * Do a signal return; undo the signal stack. |
175 | */ | 175 | */ |
176 | #define COPY(x) { \ | ||
177 | err |= __get_user(regs->x, &sc->x); \ | ||
178 | } | ||
176 | 179 | ||
177 | struct sigframe | 180 | #define COPY_SEG_CPL3(seg) { \ |
178 | { | 181 | unsigned short tmp; \ |
179 | u32 pretcode; | 182 | err |= __get_user(tmp, &sc->seg); \ |
180 | int sig; | 183 | regs->seg = tmp | 3; \ |
181 | struct sigcontext_ia32 sc; | ||
182 | struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */ | ||
183 | unsigned int extramask[_COMPAT_NSIG_WORDS-1]; | ||
184 | char retcode[8]; | ||
185 | /* fp state follows here */ | ||
186 | }; | ||
187 | |||
188 | struct rt_sigframe | ||
189 | { | ||
190 | u32 pretcode; | ||
191 | int sig; | ||
192 | u32 pinfo; | ||
193 | u32 puc; | ||
194 | compat_siginfo_t info; | ||
195 | struct ucontext_ia32 uc; | ||
196 | char retcode[8]; | ||
197 | /* fp state follows here */ | ||
198 | }; | ||
199 | |||
200 | #define COPY(x) { \ | ||
201 | unsigned int reg; \ | ||
202 | err |= __get_user(reg, &sc->x); \ | ||
203 | regs->x = reg; \ | ||
204 | } | 184 | } |
205 | 185 | ||
206 | #define RELOAD_SEG(seg,mask) \ | 186 | #define RELOAD_SEG(seg) { \ |
207 | { unsigned int cur; \ | 187 | unsigned int cur, pre; \ |
208 | unsigned short pre; \ | 188 | err |= __get_user(pre, &sc->seg); \ |
209 | err |= __get_user(pre, &sc->seg); \ | 189 | savesegment(seg, cur); \ |
210 | savesegment(seg, cur); \ | 190 | pre |= 3; \ |
211 | pre |= mask; \ | 191 | if (pre != cur) \ |
212 | if (pre != cur) loadsegment(seg, pre); } | 192 | loadsegment(seg, pre); \ |
193 | } | ||
213 | 194 | ||
214 | static int ia32_restore_sigcontext(struct pt_regs *regs, | 195 | static int ia32_restore_sigcontext(struct pt_regs *regs, |
215 | struct sigcontext_ia32 __user *sc, | 196 | struct sigcontext_ia32 __user *sc, |
216 | unsigned int *peax) | 197 | unsigned int *pax) |
217 | { | 198 | { |
218 | unsigned int tmpflags, gs, oldgs, err = 0; | 199 | unsigned int tmpflags, gs, oldgs, err = 0; |
219 | void __user *buf; | 200 | void __user *buf; |
@@ -240,18 +221,16 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
240 | if (gs != oldgs) | 221 | if (gs != oldgs) |
241 | load_gs_index(gs); | 222 | load_gs_index(gs); |
242 | 223 | ||
243 | RELOAD_SEG(fs, 3); | 224 | RELOAD_SEG(fs); |
244 | RELOAD_SEG(ds, 3); | 225 | RELOAD_SEG(ds); |
245 | RELOAD_SEG(es, 3); | 226 | RELOAD_SEG(es); |
246 | 227 | ||
247 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 228 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
248 | COPY(dx); COPY(cx); COPY(ip); | 229 | COPY(dx); COPY(cx); COPY(ip); |
249 | /* Don't touch extended registers */ | 230 | /* Don't touch extended registers */ |
250 | 231 | ||
251 | err |= __get_user(regs->cs, &sc->cs); | 232 | COPY_SEG_CPL3(cs); |
252 | regs->cs |= 3; | 233 | COPY_SEG_CPL3(ss); |
253 | err |= __get_user(regs->ss, &sc->ss); | ||
254 | regs->ss |= 3; | ||
255 | 234 | ||
256 | err |= __get_user(tmpflags, &sc->flags); | 235 | err |= __get_user(tmpflags, &sc->flags); |
257 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 236 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
@@ -262,15 +241,13 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
262 | buf = compat_ptr(tmp); | 241 | buf = compat_ptr(tmp); |
263 | err |= restore_i387_xstate_ia32(buf); | 242 | err |= restore_i387_xstate_ia32(buf); |
264 | 243 | ||
265 | err |= __get_user(tmp, &sc->ax); | 244 | err |= __get_user(*pax, &sc->ax); |
266 | *peax = tmp; | ||
267 | |||
268 | return err; | 245 | return err; |
269 | } | 246 | } |
270 | 247 | ||
271 | asmlinkage long sys32_sigreturn(struct pt_regs *regs) | 248 | asmlinkage long sys32_sigreturn(struct pt_regs *regs) |
272 | { | 249 | { |
273 | struct sigframe __user *frame = (struct sigframe __user *)(regs->sp-8); | 250 | struct sigframe_ia32 __user *frame = (struct sigframe_ia32 __user *)(regs->sp-8); |
274 | sigset_t set; | 251 | sigset_t set; |
275 | unsigned int ax; | 252 | unsigned int ax; |
276 | 253 | ||
@@ -300,12 +277,12 @@ badframe: | |||
300 | 277 | ||
301 | asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) | 278 | asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs) |
302 | { | 279 | { |
303 | struct rt_sigframe __user *frame; | 280 | struct rt_sigframe_ia32 __user *frame; |
304 | sigset_t set; | 281 | sigset_t set; |
305 | unsigned int ax; | 282 | unsigned int ax; |
306 | struct pt_regs tregs; | 283 | struct pt_regs tregs; |
307 | 284 | ||
308 | frame = (struct rt_sigframe __user *)(regs->sp - 4); | 285 | frame = (struct rt_sigframe_ia32 __user *)(regs->sp - 4); |
309 | 286 | ||
310 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | 287 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) |
311 | goto badframe; | 288 | goto badframe; |
@@ -359,20 +336,15 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, | |||
359 | err |= __put_user(regs->dx, &sc->dx); | 336 | err |= __put_user(regs->dx, &sc->dx); |
360 | err |= __put_user(regs->cx, &sc->cx); | 337 | err |= __put_user(regs->cx, &sc->cx); |
361 | err |= __put_user(regs->ax, &sc->ax); | 338 | err |= __put_user(regs->ax, &sc->ax); |
362 | err |= __put_user(regs->cs, &sc->cs); | ||
363 | err |= __put_user(regs->ss, &sc->ss); | ||
364 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 339 | err |= __put_user(current->thread.trap_no, &sc->trapno); |
365 | err |= __put_user(current->thread.error_code, &sc->err); | 340 | err |= __put_user(current->thread.error_code, &sc->err); |
366 | err |= __put_user(regs->ip, &sc->ip); | 341 | err |= __put_user(regs->ip, &sc->ip); |
342 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); | ||
367 | err |= __put_user(regs->flags, &sc->flags); | 343 | err |= __put_user(regs->flags, &sc->flags); |
368 | err |= __put_user(regs->sp, &sc->sp_at_signal); | 344 | err |= __put_user(regs->sp, &sc->sp_at_signal); |
345 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); | ||
369 | 346 | ||
370 | tmp = save_i387_xstate_ia32(fpstate); | 347 | err |= __put_user(ptr_to_compat(fpstate), &sc->fpstate); |
371 | if (tmp < 0) | ||
372 | err = -EFAULT; | ||
373 | else | ||
374 | err |= __put_user(ptr_to_compat(tmp ? fpstate : NULL), | ||
375 | &sc->fpstate); | ||
376 | 348 | ||
377 | /* non-iBCS2 extensions.. */ | 349 | /* non-iBCS2 extensions.. */ |
378 | err |= __put_user(mask, &sc->oldmask); | 350 | err |= __put_user(mask, &sc->oldmask); |
@@ -400,7 +372,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, | |||
400 | } | 372 | } |
401 | 373 | ||
402 | /* This is the legacy signal stack switching. */ | 374 | /* This is the legacy signal stack switching. */ |
403 | else if ((regs->ss & 0xffff) != __USER_DS && | 375 | else if ((regs->ss & 0xffff) != __USER32_DS && |
404 | !(ka->sa.sa_flags & SA_RESTORER) && | 376 | !(ka->sa.sa_flags & SA_RESTORER) && |
405 | ka->sa.sa_restorer) | 377 | ka->sa.sa_restorer) |
406 | sp = (unsigned long) ka->sa.sa_restorer; | 378 | sp = (unsigned long) ka->sa.sa_restorer; |
@@ -408,6 +380,8 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, | |||
408 | if (used_math()) { | 380 | if (used_math()) { |
409 | sp = sp - sig_xstate_ia32_size; | 381 | sp = sp - sig_xstate_ia32_size; |
410 | *fpstate = (struct _fpstate_ia32 *) sp; | 382 | *fpstate = (struct _fpstate_ia32 *) sp; |
383 | if (save_i387_xstate_ia32(*fpstate) < 0) | ||
384 | return (void __user *) -1L; | ||
411 | } | 385 | } |
412 | 386 | ||
413 | sp -= frame_size; | 387 | sp -= frame_size; |
@@ -420,7 +394,7 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, | |||
420 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | 394 | int ia32_setup_frame(int sig, struct k_sigaction *ka, |
421 | compat_sigset_t *set, struct pt_regs *regs) | 395 | compat_sigset_t *set, struct pt_regs *regs) |
422 | { | 396 | { |
423 | struct sigframe __user *frame; | 397 | struct sigframe_ia32 __user *frame; |
424 | void __user *restorer; | 398 | void __user *restorer; |
425 | int err = 0; | 399 | int err = 0; |
426 | void __user *fpstate = NULL; | 400 | void __user *fpstate = NULL; |
@@ -430,12 +404,10 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
430 | u16 poplmovl; | 404 | u16 poplmovl; |
431 | u32 val; | 405 | u32 val; |
432 | u16 int80; | 406 | u16 int80; |
433 | u16 pad; | ||
434 | } __attribute__((packed)) code = { | 407 | } __attribute__((packed)) code = { |
435 | 0xb858, /* popl %eax ; movl $...,%eax */ | 408 | 0xb858, /* popl %eax ; movl $...,%eax */ |
436 | __NR_ia32_sigreturn, | 409 | __NR_ia32_sigreturn, |
437 | 0x80cd, /* int $0x80 */ | 410 | 0x80cd, /* int $0x80 */ |
438 | 0, | ||
439 | }; | 411 | }; |
440 | 412 | ||
441 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); | 413 | frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate); |
@@ -471,7 +443,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
471 | * These are actually not used anymore, but left because some | 443 | * These are actually not used anymore, but left because some |
472 | * gdb versions depend on them as a marker. | 444 | * gdb versions depend on them as a marker. |
473 | */ | 445 | */ |
474 | err |= __copy_to_user(frame->retcode, &code, 8); | 446 | err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); |
475 | if (err) | 447 | if (err) |
476 | return -EFAULT; | 448 | return -EFAULT; |
477 | 449 | ||
@@ -501,7 +473,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
501 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 473 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
502 | compat_sigset_t *set, struct pt_regs *regs) | 474 | compat_sigset_t *set, struct pt_regs *regs) |
503 | { | 475 | { |
504 | struct rt_sigframe __user *frame; | 476 | struct rt_sigframe_ia32 __user *frame; |
505 | void __user *restorer; | 477 | void __user *restorer; |
506 | int err = 0; | 478 | int err = 0; |
507 | void __user *fpstate = NULL; | 479 | void __user *fpstate = NULL; |
@@ -511,8 +483,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
511 | u8 movl; | 483 | u8 movl; |
512 | u32 val; | 484 | u32 val; |
513 | u16 int80; | 485 | u16 int80; |
514 | u16 pad; | 486 | u8 pad; |
515 | u8 pad2; | ||
516 | } __attribute__((packed)) code = { | 487 | } __attribute__((packed)) code = { |
517 | 0xb8, | 488 | 0xb8, |
518 | __NR_ia32_rt_sigreturn, | 489 | __NR_ia32_rt_sigreturn, |
@@ -559,7 +530,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
559 | * Not actually used anymore, but left because some gdb | 530 | * Not actually used anymore, but left because some gdb |
560 | * versions need it. | 531 | * versions need it. |
561 | */ | 532 | */ |
562 | err |= __copy_to_user(frame->retcode, &code, 8); | 533 | err |= __put_user(*((u64 *)&code), (u64 *)frame->retcode); |
563 | if (err) | 534 | if (err) |
564 | return -EFAULT; | 535 | return -EFAULT; |
565 | 536 | ||
@@ -572,11 +543,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
572 | regs->dx = (unsigned long) &frame->info; | 543 | regs->dx = (unsigned long) &frame->info; |
573 | regs->cx = (unsigned long) &frame->uc; | 544 | regs->cx = (unsigned long) &frame->uc; |
574 | 545 | ||
575 | /* Make -mregparm=3 work */ | ||
576 | regs->ax = sig; | ||
577 | regs->dx = (unsigned long) &frame->info; | ||
578 | regs->cx = (unsigned long) &frame->uc; | ||
579 | |||
580 | loadsegment(ds, __USER32_DS); | 546 | loadsegment(ds, __USER32_DS); |
581 | loadsegment(es, __USER32_DS); | 547 | loadsegment(es, __USER32_DS); |
582 | 548 | ||
diff --git a/arch/x86/ia32/ipc32.c b/arch/x86/ia32/ipc32.c index d21991ce606c..29cdcd02ead3 100644 --- a/arch/x86/ia32/ipc32.c +++ b/arch/x86/ia32/ipc32.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/shm.h> | 8 | #include <linux/shm.h> |
9 | #include <linux/ipc.h> | 9 | #include <linux/ipc.h> |
10 | #include <linux/compat.h> | 10 | #include <linux/compat.h> |
11 | #include <asm/sys_ia32.h> | ||
11 | 12 | ||
12 | asmlinkage long sys32_ipc(u32 call, int first, int second, int third, | 13 | asmlinkage long sys32_ipc(u32 call, int first, int second, int third, |
13 | compat_uptr_t ptr, u32 fifth) | 14 | compat_uptr_t ptr, u32 fifth) |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index 2e09dcd3c0a6..6c0d7f6231af 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -44,8 +44,8 @@ | |||
44 | #include <asm/types.h> | 44 | #include <asm/types.h> |
45 | #include <asm/uaccess.h> | 45 | #include <asm/uaccess.h> |
46 | #include <asm/atomic.h> | 46 | #include <asm/atomic.h> |
47 | #include <asm/ia32.h> | ||
48 | #include <asm/vgtod.h> | 47 | #include <asm/vgtod.h> |
48 | #include <asm/sys_ia32.h> | ||
49 | 49 | ||
50 | #define AA(__x) ((unsigned long)(__x)) | 50 | #define AA(__x) ((unsigned long)(__x)) |
51 | 51 | ||
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 4a8e80cdcfa5..a9f8a814a1f7 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -22,3 +22,4 @@ unifdef-y += unistd_32.h | |||
22 | unifdef-y += unistd_64.h | 22 | unifdef-y += unistd_64.h |
23 | unifdef-y += vm86.h | 23 | unifdef-y += vm86.h |
24 | unifdef-y += vsyscall.h | 24 | unifdef-y += vsyscall.h |
25 | unifdef-y += swab.h | ||
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ac302a2fa339..95c8cd9d22b5 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -190,16 +190,23 @@ | |||
190 | /* FIXME: move this macro to <linux/pci.h> */ | 190 | /* FIXME: move this macro to <linux/pci.h> */ |
191 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | 191 | #define PCI_BUS(x) (((x) >> 8) & 0xff) |
192 | 192 | ||
193 | /* Protection domain flags */ | ||
194 | #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ | ||
195 | #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops | ||
196 | domain for an IOMMU */ | ||
197 | |||
193 | /* | 198 | /* |
194 | * This structure contains generic data for IOMMU protection domains | 199 | * This structure contains generic data for IOMMU protection domains |
195 | * independent of their use. | 200 | * independent of their use. |
196 | */ | 201 | */ |
197 | struct protection_domain { | 202 | struct protection_domain { |
198 | spinlock_t lock; /* mostly used to lock the page table*/ | 203 | spinlock_t lock; /* mostly used to lock the page table*/ |
199 | u16 id; /* the domain id written to the device table */ | 204 | u16 id; /* the domain id written to the device table */ |
200 | int mode; /* paging mode (0-6 levels) */ | 205 | int mode; /* paging mode (0-6 levels) */ |
201 | u64 *pt_root; /* page table root pointer */ | 206 | u64 *pt_root; /* page table root pointer */ |
202 | void *priv; /* private data */ | 207 | unsigned long flags; /* flags to find out type of domain */ |
208 | unsigned dev_cnt; /* devices assigned to this domain */ | ||
209 | void *priv; /* private data */ | ||
203 | }; | 210 | }; |
204 | 211 | ||
205 | /* | 212 | /* |
@@ -295,7 +302,7 @@ struct amd_iommu { | |||
295 | bool int_enabled; | 302 | bool int_enabled; |
296 | 303 | ||
297 | /* if one, we need to send a completion wait command */ | 304 | /* if one, we need to send a completion wait command */ |
298 | int need_sync; | 305 | bool need_sync; |
299 | 306 | ||
300 | /* default dma_ops domain for that IOMMU */ | 307 | /* default dma_ops domain for that IOMMU */ |
301 | struct dma_ops_domain *default_dom; | 308 | struct dma_ops_domain *default_dom; |
@@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table; | |||
374 | extern unsigned long *amd_iommu_pd_alloc_bitmap; | 381 | extern unsigned long *amd_iommu_pd_alloc_bitmap; |
375 | 382 | ||
376 | /* will be 1 if device isolation is enabled */ | 383 | /* will be 1 if device isolation is enabled */ |
377 | extern int amd_iommu_isolate; | 384 | extern bool amd_iommu_isolate; |
378 | 385 | ||
379 | /* | 386 | /* |
380 | * If true, the addresses will be flushed on unmap time, not when | 387 | * If true, the addresses will be flushed on unmap time, not when |
@@ -382,18 +389,6 @@ extern int amd_iommu_isolate; | |||
382 | */ | 389 | */ |
383 | extern bool amd_iommu_unmap_flush; | 390 | extern bool amd_iommu_unmap_flush; |
384 | 391 | ||
385 | /* takes a PCI device id and prints it out in a readable form */ | ||
386 | static inline void print_devid(u16 devid, int nl) | ||
387 | { | ||
388 | int bus = devid >> 8; | ||
389 | int dev = devid >> 3 & 0x1f; | ||
390 | int fn = devid & 0x07; | ||
391 | |||
392 | printk("%02x:%02x.%x", bus, dev, fn); | ||
393 | if (nl) | ||
394 | printk("\n"); | ||
395 | } | ||
396 | |||
397 | /* takes bus and device/function and returns the device id | 392 | /* takes bus and device/function and returns the device id |
398 | * FIXME: should that be in generic PCI code? */ | 393 | * FIXME: should that be in generic PCI code? */ |
399 | static inline u16 calc_devid(u8 bus, u8 devfn) | 394 | static inline u16 calc_devid(u8 bus, u8 devfn) |
@@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn) | |||
401 | return (((u16)bus) << 8) | devfn; | 396 | return (((u16)bus) << 8) | devfn; |
402 | } | 397 | } |
403 | 398 | ||
399 | #ifdef CONFIG_AMD_IOMMU_STATS | ||
400 | |||
401 | struct __iommu_counter { | ||
402 | char *name; | ||
403 | struct dentry *dent; | ||
404 | u64 value; | ||
405 | }; | ||
406 | |||
407 | #define DECLARE_STATS_COUNTER(nm) \ | ||
408 | static struct __iommu_counter nm = { \ | ||
409 | .name = #nm, \ | ||
410 | } | ||
411 | |||
412 | #define INC_STATS_COUNTER(name) name.value += 1 | ||
413 | #define ADD_STATS_COUNTER(name, x) name.value += (x) | ||
414 | #define SUB_STATS_COUNTER(name, x) name.value -= (x) | ||
415 | |||
416 | #else /* CONFIG_AMD_IOMMU_STATS */ | ||
417 | |||
418 | #define DECLARE_STATS_COUNTER(name) | ||
419 | #define INC_STATS_COUNTER(name) | ||
420 | #define ADD_STATS_COUNTER(name, x) | ||
421 | #define SUB_STATS_COUNTER(name, x) | ||
422 | |||
423 | static inline void amd_iommu_stats_init(void) { } | ||
424 | |||
425 | #endif /* CONFIG_AMD_IOMMU_STATS */ | ||
426 | |||
404 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ | 427 | #endif /* _ASM_X86_AMD_IOMMU_TYPES_H */ |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 3b1510b4fc57..ab1d51a8855e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -54,7 +54,6 @@ extern int disable_apic; | |||
54 | extern int is_vsmp_box(void); | 54 | extern int is_vsmp_box(void); |
55 | extern void xapic_wait_icr_idle(void); | 55 | extern void xapic_wait_icr_idle(void); |
56 | extern u32 safe_xapic_wait_icr_idle(void); | 56 | extern u32 safe_xapic_wait_icr_idle(void); |
57 | extern u64 xapic_icr_read(void); | ||
58 | extern void xapic_icr_write(u32, u32); | 57 | extern void xapic_icr_write(u32, u32); |
59 | extern int setup_profiling_timer(unsigned int); | 58 | extern int setup_profiling_timer(unsigned int); |
60 | 59 | ||
@@ -93,7 +92,7 @@ static inline u32 native_apic_msr_read(u32 reg) | |||
93 | } | 92 | } |
94 | 93 | ||
95 | #ifndef CONFIG_X86_32 | 94 | #ifndef CONFIG_X86_32 |
96 | extern int x2apic, x2apic_preenabled; | 95 | extern int x2apic; |
97 | extern void check_x2apic(void); | 96 | extern void check_x2apic(void); |
98 | extern void enable_x2apic(void); | 97 | extern void enable_x2apic(void); |
99 | extern void enable_IR_x2apic(void); | 98 | extern void enable_IR_x2apic(void); |
@@ -193,6 +192,7 @@ extern u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask); | |||
193 | static inline void lapic_shutdown(void) { } | 192 | static inline void lapic_shutdown(void) { } |
194 | #define local_apic_timer_c2_ok 1 | 193 | #define local_apic_timer_c2_ok 1 |
195 | static inline void init_apic_mappings(void) { } | 194 | static inline void init_apic_mappings(void) { } |
195 | static inline void disable_local_APIC(void) { } | ||
196 | 196 | ||
197 | #endif /* !CONFIG_X86_LOCAL_APIC */ | 197 | #endif /* !CONFIG_X86_LOCAL_APIC */ |
198 | 198 | ||
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index ad5b9f6ecddf..85b46fba4229 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_ATOMIC_32_H | 2 | #define _ASM_X86_ATOMIC_32_H |
3 | 3 | ||
4 | #include <linux/compiler.h> | 4 | #include <linux/compiler.h> |
5 | #include <linux/types.h> | ||
5 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
6 | #include <asm/cmpxchg.h> | 7 | #include <asm/cmpxchg.h> |
7 | 8 | ||
@@ -10,15 +11,6 @@ | |||
10 | * resource counting etc.. | 11 | * resource counting etc.. |
11 | */ | 12 | */ |
12 | 13 | ||
13 | /* | ||
14 | * Make sure gcc doesn't try to be clever and move things around | ||
15 | * on us. We need to use _exactly_ the address the user gave us, | ||
16 | * not some alias that contains the same information. | ||
17 | */ | ||
18 | typedef struct { | ||
19 | int counter; | ||
20 | } atomic_t; | ||
21 | |||
22 | #define ATOMIC_INIT(i) { (i) } | 14 | #define ATOMIC_INIT(i) { (i) } |
23 | 15 | ||
24 | /** | 16 | /** |
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h index 279d2a731f3f..8c21731984da 100644 --- a/arch/x86/include/asm/atomic_64.h +++ b/arch/x86/include/asm/atomic_64.h | |||
@@ -1,25 +1,15 @@ | |||
1 | #ifndef _ASM_X86_ATOMIC_64_H | 1 | #ifndef _ASM_X86_ATOMIC_64_H |
2 | #define _ASM_X86_ATOMIC_64_H | 2 | #define _ASM_X86_ATOMIC_64_H |
3 | 3 | ||
4 | #include <linux/types.h> | ||
4 | #include <asm/alternative.h> | 5 | #include <asm/alternative.h> |
5 | #include <asm/cmpxchg.h> | 6 | #include <asm/cmpxchg.h> |
6 | 7 | ||
7 | /* atomic_t should be 32 bit signed type */ | ||
8 | |||
9 | /* | 8 | /* |
10 | * Atomic operations that C can't guarantee us. Useful for | 9 | * Atomic operations that C can't guarantee us. Useful for |
11 | * resource counting etc.. | 10 | * resource counting etc.. |
12 | */ | 11 | */ |
13 | 12 | ||
14 | /* | ||
15 | * Make sure gcc doesn't try to be clever and move things around | ||
16 | * on us. We need to use _exactly_ the address the user gave us, | ||
17 | * not some alias that contains the same information. | ||
18 | */ | ||
19 | typedef struct { | ||
20 | int counter; | ||
21 | } atomic_t; | ||
22 | |||
23 | #define ATOMIC_INIT(i) { (i) } | 13 | #define ATOMIC_INIT(i) { (i) } |
24 | 14 | ||
25 | /** | 15 | /** |
@@ -191,11 +181,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) | |||
191 | #define atomic_inc_return(v) (atomic_add_return(1, v)) | 181 | #define atomic_inc_return(v) (atomic_add_return(1, v)) |
192 | #define atomic_dec_return(v) (atomic_sub_return(1, v)) | 182 | #define atomic_dec_return(v) (atomic_sub_return(1, v)) |
193 | 183 | ||
194 | /* An 64bit atomic type */ | 184 | /* The 64-bit atomic type */ |
195 | |||
196 | typedef struct { | ||
197 | long counter; | ||
198 | } atomic64_t; | ||
199 | 185 | ||
200 | #define ATOMIC64_INIT(i) { (i) } | 186 | #define ATOMIC64_INIT(i) { (i) } |
201 | 187 | ||
diff --git a/arch/x86/include/asm/bigsmp/apic.h b/arch/x86/include/asm/bigsmp/apic.h index 1d9543b9d358..d8dd9f537911 100644 --- a/arch/x86/include/asm/bigsmp/apic.h +++ b/arch/x86/include/asm/bigsmp/apic.h | |||
@@ -9,12 +9,12 @@ static inline int apic_id_registered(void) | |||
9 | return (1); | 9 | return (1); |
10 | } | 10 | } |
11 | 11 | ||
12 | static inline cpumask_t target_cpus(void) | 12 | static inline const cpumask_t *target_cpus(void) |
13 | { | 13 | { |
14 | #ifdef CONFIG_SMP | 14 | #ifdef CONFIG_SMP |
15 | return cpu_online_map; | 15 | return &cpu_online_map; |
16 | #else | 16 | #else |
17 | return cpumask_of_cpu(0); | 17 | return &cpumask_of_cpu(0); |
18 | #endif | 18 | #endif |
19 | } | 19 | } |
20 | 20 | ||
@@ -24,8 +24,6 @@ static inline cpumask_t target_cpus(void) | |||
24 | #define INT_DELIVERY_MODE (dest_Fixed) | 24 | #define INT_DELIVERY_MODE (dest_Fixed) |
25 | #define INT_DEST_MODE (0) /* phys delivery to target proc */ | 25 | #define INT_DEST_MODE (0) /* phys delivery to target proc */ |
26 | #define NO_BALANCE_IRQ (0) | 26 | #define NO_BALANCE_IRQ (0) |
27 | #define WAKE_SECONDARY_VIA_INIT | ||
28 | |||
29 | 27 | ||
30 | static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) | 28 | static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) |
31 | { | 29 | { |
@@ -81,7 +79,7 @@ static inline int apicid_to_node(int logical_apicid) | |||
81 | 79 | ||
82 | static inline int cpu_present_to_apicid(int mps_cpu) | 80 | static inline int cpu_present_to_apicid(int mps_cpu) |
83 | { | 81 | { |
84 | if (mps_cpu < NR_CPUS) | 82 | if (mps_cpu < nr_cpu_ids) |
85 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); | 83 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); |
86 | 84 | ||
87 | return BAD_APICID; | 85 | return BAD_APICID; |
@@ -96,7 +94,7 @@ extern u8 cpu_2_logical_apicid[]; | |||
96 | /* Mapping from cpu number to logical apicid */ | 94 | /* Mapping from cpu number to logical apicid */ |
97 | static inline int cpu_to_logical_apicid(int cpu) | 95 | static inline int cpu_to_logical_apicid(int cpu) |
98 | { | 96 | { |
99 | if (cpu >= NR_CPUS) | 97 | if (cpu >= nr_cpu_ids) |
100 | return BAD_APICID; | 98 | return BAD_APICID; |
101 | return cpu_physical_id(cpu); | 99 | return cpu_physical_id(cpu); |
102 | } | 100 | } |
@@ -121,16 +119,34 @@ static inline int check_phys_apicid_present(int boot_cpu_physical_apicid) | |||
121 | } | 119 | } |
122 | 120 | ||
123 | /* As we are using single CPU as destination, pick only one CPU here */ | 121 | /* As we are using single CPU as destination, pick only one CPU here */ |
124 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 122 | static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) |
125 | { | 123 | { |
126 | int cpu; | 124 | int cpu; |
127 | int apicid; | 125 | int apicid; |
128 | 126 | ||
129 | cpu = first_cpu(cpumask); | 127 | cpu = first_cpu(*cpumask); |
130 | apicid = cpu_to_logical_apicid(cpu); | 128 | apicid = cpu_to_logical_apicid(cpu); |
131 | return apicid; | 129 | return apicid; |
132 | } | 130 | } |
133 | 131 | ||
132 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
133 | const struct cpumask *andmask) | ||
134 | { | ||
135 | int cpu; | ||
136 | |||
137 | /* | ||
138 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
139 | * May as well be the first. | ||
140 | */ | ||
141 | for_each_cpu_and(cpu, cpumask, andmask) | ||
142 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
143 | break; | ||
144 | if (cpu < nr_cpu_ids) | ||
145 | return cpu_to_logical_apicid(cpu); | ||
146 | |||
147 | return BAD_APICID; | ||
148 | } | ||
149 | |||
134 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) | 150 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) |
135 | { | 151 | { |
136 | return cpuid_apic >> index_msb; | 152 | return cpuid_apic >> index_msb; |
diff --git a/arch/x86/include/asm/bigsmp/ipi.h b/arch/x86/include/asm/bigsmp/ipi.h index 9404c535b7ec..27fcd01b3ae6 100644 --- a/arch/x86/include/asm/bigsmp/ipi.h +++ b/arch/x86/include/asm/bigsmp/ipi.h | |||
@@ -1,25 +1,22 @@ | |||
1 | #ifndef __ASM_MACH_IPI_H | 1 | #ifndef __ASM_MACH_IPI_H |
2 | #define __ASM_MACH_IPI_H | 2 | #define __ASM_MACH_IPI_H |
3 | 3 | ||
4 | void send_IPI_mask_sequence(cpumask_t mask, int vector); | 4 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector); |
5 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
5 | 6 | ||
6 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 7 | static inline void send_IPI_mask(const struct cpumask *mask, int vector) |
7 | { | 8 | { |
8 | send_IPI_mask_sequence(mask, vector); | 9 | send_IPI_mask_sequence(mask, vector); |
9 | } | 10 | } |
10 | 11 | ||
11 | static inline void send_IPI_allbutself(int vector) | 12 | static inline void send_IPI_allbutself(int vector) |
12 | { | 13 | { |
13 | cpumask_t mask = cpu_online_map; | 14 | send_IPI_mask_allbutself(cpu_online_mask, vector); |
14 | cpu_clear(smp_processor_id(), mask); | ||
15 | |||
16 | if (!cpus_empty(mask)) | ||
17 | send_IPI_mask(mask, vector); | ||
18 | } | 15 | } |
19 | 16 | ||
20 | static inline void send_IPI_all(int vector) | 17 | static inline void send_IPI_all(int vector) |
21 | { | 18 | { |
22 | send_IPI_mask(cpu_online_map, vector); | 19 | send_IPI_mask(cpu_online_mask, vector); |
23 | } | 20 | } |
24 | 21 | ||
25 | #endif /* __ASM_MACH_IPI_H */ | 22 | #endif /* __ASM_MACH_IPI_H */ |
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 360010322711..9fa9dcdf344b 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h | |||
@@ -168,7 +168,15 @@ static inline void __change_bit(int nr, volatile unsigned long *addr) | |||
168 | */ | 168 | */ |
169 | static inline void change_bit(int nr, volatile unsigned long *addr) | 169 | static inline void change_bit(int nr, volatile unsigned long *addr) |
170 | { | 170 | { |
171 | asm volatile(LOCK_PREFIX "btc %1,%0" : ADDR : "Ir" (nr)); | 171 | if (IS_IMMEDIATE(nr)) { |
172 | asm volatile(LOCK_PREFIX "xorb %1,%0" | ||
173 | : CONST_MASK_ADDR(nr, addr) | ||
174 | : "iq" ((u8)CONST_MASK(nr))); | ||
175 | } else { | ||
176 | asm volatile(LOCK_PREFIX "btc %1,%0" | ||
177 | : BITOP_ADDR(addr) | ||
178 | : "Ir" (nr)); | ||
179 | } | ||
172 | } | 180 | } |
173 | 181 | ||
174 | /** | 182 | /** |
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 3def2065fcea..d9cf1cd156d2 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h | |||
@@ -9,7 +9,7 @@ | |||
9 | #ifdef CONFIG_X86_32 | 9 | #ifdef CONFIG_X86_32 |
10 | # define __BUG_C0 "2:\t.long 1b, %c0\n" | 10 | # define __BUG_C0 "2:\t.long 1b, %c0\n" |
11 | #else | 11 | #else |
12 | # define __BUG_C0 "2:\t.quad 1b, %c0\n" | 12 | # define __BUG_C0 "2:\t.long 1b - 2b, %c0 - 2b\n" |
13 | #endif | 13 | #endif |
14 | 14 | ||
15 | #define BUG() \ | 15 | #define BUG() \ |
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h index e02ae2d89acf..7c49917e3d9d 100644 --- a/arch/x86/include/asm/byteorder.h +++ b/arch/x86/include/asm/byteorder.h | |||
@@ -1,81 +1,7 @@ | |||
1 | #ifndef _ASM_X86_BYTEORDER_H | 1 | #ifndef _ASM_X86_BYTEORDER_H |
2 | #define _ASM_X86_BYTEORDER_H | 2 | #define _ASM_X86_BYTEORDER_H |
3 | 3 | ||
4 | #include <asm/types.h> | 4 | #include <asm/swab.h> |
5 | #include <linux/compiler.h> | ||
6 | |||
7 | #ifdef __GNUC__ | ||
8 | |||
9 | #ifdef __i386__ | ||
10 | |||
11 | static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) | ||
12 | { | ||
13 | #ifdef CONFIG_X86_BSWAP | ||
14 | asm("bswap %0" : "=r" (x) : "0" (x)); | ||
15 | #else | ||
16 | asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ | ||
17 | "rorl $16,%0\n\t" /* swap words */ | ||
18 | "xchgb %b0,%h0" /* swap higher bytes */ | ||
19 | : "=q" (x) | ||
20 | : "0" (x)); | ||
21 | #endif | ||
22 | return x; | ||
23 | } | ||
24 | |||
25 | static inline __attribute_const__ __u64 ___arch__swab64(__u64 val) | ||
26 | { | ||
27 | union { | ||
28 | struct { | ||
29 | __u32 a; | ||
30 | __u32 b; | ||
31 | } s; | ||
32 | __u64 u; | ||
33 | } v; | ||
34 | v.u = val; | ||
35 | #ifdef CONFIG_X86_BSWAP | ||
36 | asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" | ||
37 | : "=r" (v.s.a), "=r" (v.s.b) | ||
38 | : "0" (v.s.a), "1" (v.s.b)); | ||
39 | #else | ||
40 | v.s.a = ___arch__swab32(v.s.a); | ||
41 | v.s.b = ___arch__swab32(v.s.b); | ||
42 | asm("xchgl %0,%1" | ||
43 | : "=r" (v.s.a), "=r" (v.s.b) | ||
44 | : "0" (v.s.a), "1" (v.s.b)); | ||
45 | #endif | ||
46 | return v.u; | ||
47 | } | ||
48 | |||
49 | #else /* __i386__ */ | ||
50 | |||
51 | static inline __attribute_const__ __u64 ___arch__swab64(__u64 x) | ||
52 | { | ||
53 | asm("bswapq %0" | ||
54 | : "=r" (x) | ||
55 | : "0" (x)); | ||
56 | return x; | ||
57 | } | ||
58 | |||
59 | static inline __attribute_const__ __u32 ___arch__swab32(__u32 x) | ||
60 | { | ||
61 | asm("bswapl %0" | ||
62 | : "=r" (x) | ||
63 | : "0" (x)); | ||
64 | return x; | ||
65 | } | ||
66 | |||
67 | #endif | ||
68 | |||
69 | /* Do not define swab16. Gcc is smart enough to recognize "C" version and | ||
70 | convert it into rotation or exhange. */ | ||
71 | |||
72 | #define __arch__swab64(x) ___arch__swab64(x) | ||
73 | #define __arch__swab32(x) ___arch__swab32(x) | ||
74 | |||
75 | #define __BYTEORDER_HAS_U64__ | ||
76 | |||
77 | #endif /* __GNUC__ */ | ||
78 | |||
79 | #include <linux/byteorder/little_endian.h> | 5 | #include <linux/byteorder/little_endian.h> |
80 | 6 | ||
81 | #endif /* _ASM_X86_BYTEORDER_H */ | 7 | #endif /* _ASM_X86_BYTEORDER_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index cfdf8c2c5c31..ea408dcba513 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -80,7 +80,6 @@ | |||
80 | #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ | 80 | #define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ |
81 | #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ | 81 | #define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */ |
82 | #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ | 82 | #define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ |
83 | #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ | ||
84 | #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ | 83 | #define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ |
85 | #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ | 84 | #define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ |
86 | #define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ | 85 | #define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */ |
@@ -92,6 +91,8 @@ | |||
92 | #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ | 91 | #define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ |
93 | #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ | 92 | #define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ |
94 | #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ | 93 | #define X86_FEATURE_XTOPOLOGY (3*32+22) /* cpu topology enum extensions */ |
94 | #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ | ||
95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ | ||
95 | 96 | ||
96 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 97 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
97 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 98 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
@@ -117,6 +118,7 @@ | |||
117 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | 118 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ |
118 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ | 119 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ |
119 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ | 120 | #define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */ |
121 | #define X86_FEATURE_HYPERVISOR (4*32+31) /* Running on a hypervisor */ | ||
120 | 122 | ||
121 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ | 123 | /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ |
122 | #define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ | 124 | #define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */ |
@@ -237,6 +239,7 @@ extern const char * const x86_power_flags[32]; | |||
237 | #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) | 239 | #define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) |
238 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) | 240 | #define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC) |
239 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) | 241 | #define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE) |
242 | #define cpu_has_hypervisor boot_cpu_has(X86_FEATURE_HYPERVISOR) | ||
240 | 243 | ||
241 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) | 244 | #if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) |
242 | # define cpu_has_invlpg 1 | 245 | # define cpu_has_invlpg 1 |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index e6b82b17b072..dc27705f5443 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -320,16 +320,14 @@ static inline void set_intr_gate(unsigned int n, void *addr) | |||
320 | _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); | 320 | _set_gate(n, GATE_INTERRUPT, addr, 0, 0, __KERNEL_CS); |
321 | } | 321 | } |
322 | 322 | ||
323 | #define SYS_VECTOR_FREE 0 | ||
324 | #define SYS_VECTOR_ALLOCED 1 | ||
325 | |||
326 | extern int first_system_vector; | 323 | extern int first_system_vector; |
327 | extern char system_vectors[]; | 324 | /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ |
325 | extern unsigned long used_vectors[]; | ||
328 | 326 | ||
329 | static inline void alloc_system_vector(int vector) | 327 | static inline void alloc_system_vector(int vector) |
330 | { | 328 | { |
331 | if (system_vectors[vector] == SYS_VECTOR_FREE) { | 329 | if (!test_bit(vector, used_vectors)) { |
332 | system_vectors[vector] = SYS_VECTOR_ALLOCED; | 330 | set_bit(vector, used_vectors); |
333 | if (first_system_vector > vector) | 331 | if (first_system_vector > vector) |
334 | first_system_vector = vector; | 332 | first_system_vector = vector; |
335 | } else | 333 | } else |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 097794ff6b79..4035357f5b9d 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -65,18 +65,16 @@ static inline struct dma_mapping_ops *get_dma_ops(struct device *dev) | |||
65 | return dma_ops; | 65 | return dma_ops; |
66 | else | 66 | else |
67 | return dev->archdata.dma_ops; | 67 | return dev->archdata.dma_ops; |
68 | #endif /* _ASM_X86_DMA_MAPPING_H */ | 68 | #endif |
69 | } | 69 | } |
70 | 70 | ||
71 | /* Make sure we keep the same behaviour */ | 71 | /* Make sure we keep the same behaviour */ |
72 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | 72 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) |
73 | { | 73 | { |
74 | #ifdef CONFIG_X86_64 | ||
75 | struct dma_mapping_ops *ops = get_dma_ops(dev); | 74 | struct dma_mapping_ops *ops = get_dma_ops(dev); |
76 | if (ops->mapping_error) | 75 | if (ops->mapping_error) |
77 | return ops->mapping_error(dev, dma_addr); | 76 | return ops->mapping_error(dev, dma_addr); |
78 | 77 | ||
79 | #endif | ||
80 | return (dma_addr == bad_dma_address); | 78 | return (dma_addr == bad_dma_address); |
81 | } | 79 | } |
82 | 80 | ||
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a95008457ea4..a8f672ba100c 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h | |||
@@ -6,14 +6,13 @@ | |||
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * It manages: | 8 | * It manages: |
9 | * - per-thread and per-cpu allocation of BTS and PEBS | 9 | * - DS and BTS hardware configuration |
10 | * - buffer memory allocation (optional) | 10 | * - buffer overflow handling (to be done) |
11 | * - buffer overflow handling | ||
12 | * - buffer access | 11 | * - buffer access |
13 | * | 12 | * |
14 | * It assumes: | 13 | * It does not do: |
15 | * - get_task_struct on all parameter tasks | 14 | * - security checking (is the caller allowed to trace the task) |
16 | * - current is allowed to trace parameter tasks | 15 | * - buffer allocation (memory accounting) |
17 | * | 16 | * |
18 | * | 17 | * |
19 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2008 Intel Corporation. |
@@ -26,11 +25,51 @@ | |||
26 | 25 | ||
27 | #include <linux/types.h> | 26 | #include <linux/types.h> |
28 | #include <linux/init.h> | 27 | #include <linux/init.h> |
28 | #include <linux/err.h> | ||
29 | 29 | ||
30 | 30 | ||
31 | #ifdef CONFIG_X86_DS | 31 | #ifdef CONFIG_X86_DS |
32 | 32 | ||
33 | struct task_struct; | 33 | struct task_struct; |
34 | struct ds_context; | ||
35 | struct ds_tracer; | ||
36 | struct bts_tracer; | ||
37 | struct pebs_tracer; | ||
38 | |||
39 | typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); | ||
40 | typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); | ||
41 | |||
42 | |||
43 | /* | ||
44 | * A list of features plus corresponding macros to talk about them in | ||
45 | * the ds_request function's flags parameter. | ||
46 | * | ||
47 | * We use the enum to index an array of corresponding control bits; | ||
48 | * we use the macro to index a flags bit-vector. | ||
49 | */ | ||
50 | enum ds_feature { | ||
51 | dsf_bts = 0, | ||
52 | dsf_bts_kernel, | ||
53 | #define BTS_KERNEL (1 << dsf_bts_kernel) | ||
54 | /* trace kernel-mode branches */ | ||
55 | |||
56 | dsf_bts_user, | ||
57 | #define BTS_USER (1 << dsf_bts_user) | ||
58 | /* trace user-mode branches */ | ||
59 | |||
60 | dsf_bts_overflow, | ||
61 | dsf_bts_max, | ||
62 | dsf_pebs = dsf_bts_max, | ||
63 | |||
64 | dsf_pebs_max, | ||
65 | dsf_ctl_max = dsf_pebs_max, | ||
66 | dsf_bts_timestamps = dsf_ctl_max, | ||
67 | #define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) | ||
68 | /* add timestamps into BTS trace */ | ||
69 | |||
70 | #define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) | ||
71 | }; | ||
72 | |||
34 | 73 | ||
35 | /* | 74 | /* |
36 | * Request BTS or PEBS | 75 | * Request BTS or PEBS |
@@ -38,163 +77,169 @@ struct task_struct; | |||
38 | * Due to alignement constraints, the actual buffer may be slightly | 77 | * Due to alignement constraints, the actual buffer may be slightly |
39 | * smaller than the requested or provided buffer. | 78 | * smaller than the requested or provided buffer. |
40 | * | 79 | * |
41 | * Returns 0 on success; -Eerrno otherwise | 80 | * Returns a pointer to a tracer structure on success, or |
81 | * ERR_PTR(errcode) on failure. | ||
82 | * | ||
83 | * The interrupt threshold is independent from the overflow callback | ||
84 | * to allow users to use their own overflow interrupt handling mechanism. | ||
42 | * | 85 | * |
43 | * task: the task to request recording for; | 86 | * task: the task to request recording for; |
44 | * NULL for per-cpu recording on the current cpu | 87 | * NULL for per-cpu recording on the current cpu |
45 | * base: the base pointer for the (non-pageable) buffer; | 88 | * base: the base pointer for the (non-pageable) buffer; |
46 | * NULL if buffer allocation requested | 89 | * size: the size of the provided buffer in bytes |
47 | * size: the size of the requested or provided buffer | ||
48 | * ovfl: pointer to a function to be called on buffer overflow; | 90 | * ovfl: pointer to a function to be called on buffer overflow; |
49 | * NULL if cyclic buffer requested | 91 | * NULL if cyclic buffer requested |
92 | * th: the interrupt threshold in records from the end of the buffer; | ||
93 | * -1 if no interrupt threshold is requested. | ||
94 | * flags: a bit-mask of the above flags | ||
50 | */ | 95 | */ |
51 | typedef void (*ds_ovfl_callback_t)(struct task_struct *); | 96 | extern struct bts_tracer *ds_request_bts(struct task_struct *task, |
52 | extern int ds_request_bts(struct task_struct *task, void *base, size_t size, | 97 | void *base, size_t size, |
53 | ds_ovfl_callback_t ovfl); | 98 | bts_ovfl_callback_t ovfl, |
54 | extern int ds_request_pebs(struct task_struct *task, void *base, size_t size, | 99 | size_t th, unsigned int flags); |
55 | ds_ovfl_callback_t ovfl); | 100 | extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
101 | void *base, size_t size, | ||
102 | pebs_ovfl_callback_t ovfl, | ||
103 | size_t th, unsigned int flags); | ||
56 | 104 | ||
57 | /* | 105 | /* |
58 | * Release BTS or PEBS resources | 106 | * Release BTS or PEBS resources |
107 | * Suspend and resume BTS or PEBS tracing | ||
59 | * | 108 | * |
60 | * Frees buffers allocated on ds_request. | 109 | * tracer: the tracer handle returned from ds_request_~() |
61 | * | ||
62 | * Returns 0 on success; -Eerrno otherwise | ||
63 | * | ||
64 | * task: the task to release resources for; | ||
65 | * NULL to release resources for the current cpu | ||
66 | */ | 110 | */ |
67 | extern int ds_release_bts(struct task_struct *task); | 111 | extern void ds_release_bts(struct bts_tracer *tracer); |
68 | extern int ds_release_pebs(struct task_struct *task); | 112 | extern void ds_suspend_bts(struct bts_tracer *tracer); |
113 | extern void ds_resume_bts(struct bts_tracer *tracer); | ||
114 | extern void ds_release_pebs(struct pebs_tracer *tracer); | ||
115 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | ||
116 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | ||
69 | 117 | ||
70 | /* | ||
71 | * Return the (array) index of the write pointer. | ||
72 | * (assuming an array of BTS/PEBS records) | ||
73 | * | ||
74 | * Returns -Eerrno on error | ||
75 | * | ||
76 | * task: the task to access; | ||
77 | * NULL to access the current cpu | ||
78 | * pos (out): if not NULL, will hold the result | ||
79 | */ | ||
80 | extern int ds_get_bts_index(struct task_struct *task, size_t *pos); | ||
81 | extern int ds_get_pebs_index(struct task_struct *task, size_t *pos); | ||
82 | 118 | ||
83 | /* | 119 | /* |
84 | * Return the (array) index one record beyond the end of the array. | 120 | * The raw DS buffer state as it is used for BTS and PEBS recording. |
85 | * (assuming an array of BTS/PEBS records) | ||
86 | * | 121 | * |
87 | * Returns -Eerrno on error | 122 | * This is the low-level, arch-dependent interface for working |
88 | * | 123 | * directly on the raw trace data. |
89 | * task: the task to access; | ||
90 | * NULL to access the current cpu | ||
91 | * pos (out): if not NULL, will hold the result | ||
92 | */ | 124 | */ |
93 | extern int ds_get_bts_end(struct task_struct *task, size_t *pos); | 125 | struct ds_trace { |
94 | extern int ds_get_pebs_end(struct task_struct *task, size_t *pos); | 126 | /* the number of bts/pebs records */ |
127 | size_t n; | ||
128 | /* the size of a bts/pebs record in bytes */ | ||
129 | size_t size; | ||
130 | /* pointers into the raw buffer: | ||
131 | - to the first entry */ | ||
132 | void *begin; | ||
133 | /* - one beyond the last entry */ | ||
134 | void *end; | ||
135 | /* - one beyond the newest entry */ | ||
136 | void *top; | ||
137 | /* - the interrupt threshold */ | ||
138 | void *ith; | ||
139 | /* flags given on ds_request() */ | ||
140 | unsigned int flags; | ||
141 | }; | ||
95 | 142 | ||
96 | /* | 143 | /* |
97 | * Provide a pointer to the BTS/PEBS record at parameter index. | 144 | * An arch-independent view on branch trace data. |
98 | * (assuming an array of BTS/PEBS records) | ||
99 | * | ||
100 | * The pointer points directly into the buffer. The user is | ||
101 | * responsible for copying the record. | ||
102 | * | ||
103 | * Returns the size of a single record on success; -Eerrno on error | ||
104 | * | ||
105 | * task: the task to access; | ||
106 | * NULL to access the current cpu | ||
107 | * index: the index of the requested record | ||
108 | * record (out): pointer to the requested record | ||
109 | */ | 145 | */ |
110 | extern int ds_access_bts(struct task_struct *task, | 146 | enum bts_qualifier { |
111 | size_t index, const void **record); | 147 | bts_invalid, |
112 | extern int ds_access_pebs(struct task_struct *task, | 148 | #define BTS_INVALID bts_invalid |
113 | size_t index, const void **record); | 149 | |
150 | bts_branch, | ||
151 | #define BTS_BRANCH bts_branch | ||
152 | |||
153 | bts_task_arrives, | ||
154 | #define BTS_TASK_ARRIVES bts_task_arrives | ||
155 | |||
156 | bts_task_departs, | ||
157 | #define BTS_TASK_DEPARTS bts_task_departs | ||
158 | |||
159 | bts_qual_bit_size = 4, | ||
160 | bts_qual_max = (1 << bts_qual_bit_size), | ||
161 | }; | ||
162 | |||
163 | struct bts_struct { | ||
164 | __u64 qualifier; | ||
165 | union { | ||
166 | /* BTS_BRANCH */ | ||
167 | struct { | ||
168 | __u64 from; | ||
169 | __u64 to; | ||
170 | } lbr; | ||
171 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | ||
172 | struct { | ||
173 | __u64 jiffies; | ||
174 | pid_t pid; | ||
175 | } timestamp; | ||
176 | } variant; | ||
177 | }; | ||
114 | 178 | ||
115 | /* | ||
116 | * Write one or more BTS/PEBS records at the write pointer index and | ||
117 | * advance the write pointer. | ||
118 | * | ||
119 | * If size is not a multiple of the record size, trailing bytes are | ||
120 | * zeroed out. | ||
121 | * | ||
122 | * May result in one or more overflow notifications. | ||
123 | * | ||
124 | * If called during overflow handling, that is, with index >= | ||
125 | * interrupt threshold, the write will wrap around. | ||
126 | * | ||
127 | * An overflow notification is given if and when the interrupt | ||
128 | * threshold is reached during or after the write. | ||
129 | * | ||
130 | * Returns the number of bytes written or -Eerrno. | ||
131 | * | ||
132 | * task: the task to access; | ||
133 | * NULL to access the current cpu | ||
134 | * buffer: the buffer to write | ||
135 | * size: the size of the buffer | ||
136 | */ | ||
137 | extern int ds_write_bts(struct task_struct *task, | ||
138 | const void *buffer, size_t size); | ||
139 | extern int ds_write_pebs(struct task_struct *task, | ||
140 | const void *buffer, size_t size); | ||
141 | 179 | ||
142 | /* | 180 | /* |
143 | * Same as ds_write_bts/pebs, but omit ownership checks. | 181 | * The BTS state. |
144 | * | 182 | * |
145 | * This is needed to have some other task than the owner of the | 183 | * This gives access to the raw DS state and adds functions to provide |
146 | * BTS/PEBS buffer or the parameter task itself write into the | 184 | * an arch-independent view of the BTS data. |
147 | * respective buffer. | ||
148 | */ | 185 | */ |
149 | extern int ds_unchecked_write_bts(struct task_struct *task, | 186 | struct bts_trace { |
150 | const void *buffer, size_t size); | 187 | struct ds_trace ds; |
151 | extern int ds_unchecked_write_pebs(struct task_struct *task, | 188 | |
152 | const void *buffer, size_t size); | 189 | int (*read)(struct bts_tracer *tracer, const void *at, |
190 | struct bts_struct *out); | ||
191 | int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); | ||
192 | }; | ||
193 | |||
153 | 194 | ||
154 | /* | 195 | /* |
155 | * Reset the write pointer of the BTS/PEBS buffer. | 196 | * The PEBS state. |
156 | * | 197 | * |
157 | * Returns 0 on success; -Eerrno on error | 198 | * This gives access to the raw DS state and the PEBS-specific counter |
158 | * | 199 | * reset value. |
159 | * task: the task to access; | ||
160 | * NULL to access the current cpu | ||
161 | */ | 200 | */ |
162 | extern int ds_reset_bts(struct task_struct *task); | 201 | struct pebs_trace { |
163 | extern int ds_reset_pebs(struct task_struct *task); | 202 | struct ds_trace ds; |
203 | |||
204 | /* the PEBS reset value */ | ||
205 | unsigned long long reset_value; | ||
206 | }; | ||
207 | |||
164 | 208 | ||
165 | /* | 209 | /* |
166 | * Clear the BTS/PEBS buffer and reset the write pointer. | 210 | * Read the BTS or PEBS trace. |
167 | * The entire buffer will be zeroed out. | ||
168 | * | 211 | * |
169 | * Returns 0 on success; -Eerrno on error | 212 | * Returns a view on the trace collected for the parameter tracer. |
213 | * | ||
214 | * The view remains valid as long as the traced task is not running or | ||
215 | * the tracer is suspended. | ||
216 | * Writes into the trace buffer are not reflected. | ||
170 | * | 217 | * |
171 | * task: the task to access; | 218 | * tracer: the tracer handle returned from ds_request_~() |
172 | * NULL to access the current cpu | ||
173 | */ | 219 | */ |
174 | extern int ds_clear_bts(struct task_struct *task); | 220 | extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); |
175 | extern int ds_clear_pebs(struct task_struct *task); | 221 | extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); |
222 | |||
176 | 223 | ||
177 | /* | 224 | /* |
178 | * Provide the PEBS counter reset value. | 225 | * Reset the write pointer of the BTS/PEBS buffer. |
179 | * | 226 | * |
180 | * Returns 0 on success; -Eerrno on error | 227 | * Returns 0 on success; -Eerrno on error |
181 | * | 228 | * |
182 | * task: the task to access; | 229 | * tracer: the tracer handle returned from ds_request_~() |
183 | * NULL to access the current cpu | ||
184 | * value (out): the counter reset value | ||
185 | */ | 230 | */ |
186 | extern int ds_get_pebs_reset(struct task_struct *task, u64 *value); | 231 | extern int ds_reset_bts(struct bts_tracer *tracer); |
232 | extern int ds_reset_pebs(struct pebs_tracer *tracer); | ||
187 | 233 | ||
188 | /* | 234 | /* |
189 | * Set the PEBS counter reset value. | 235 | * Set the PEBS counter reset value. |
190 | * | 236 | * |
191 | * Returns 0 on success; -Eerrno on error | 237 | * Returns 0 on success; -Eerrno on error |
192 | * | 238 | * |
193 | * task: the task to access; | 239 | * tracer: the tracer handle returned from ds_request_pebs() |
194 | * NULL to access the current cpu | ||
195 | * value: the new counter reset value | 240 | * value: the new counter reset value |
196 | */ | 241 | */ |
197 | extern int ds_set_pebs_reset(struct task_struct *task, u64 value); | 242 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); |
198 | 243 | ||
199 | /* | 244 | /* |
200 | * Initialization | 245 | * Initialization |
@@ -202,39 +247,26 @@ extern int ds_set_pebs_reset(struct task_struct *task, u64 value); | |||
202 | struct cpuinfo_x86; | 247 | struct cpuinfo_x86; |
203 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | 248 | extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); |
204 | 249 | ||
205 | |||
206 | |||
207 | /* | 250 | /* |
208 | * The DS context - part of struct thread_struct. | 251 | * Context switch work |
209 | */ | 252 | */ |
210 | struct ds_context { | 253 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); |
211 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | ||
212 | unsigned char *ds; | ||
213 | /* the owner of the BTS and PEBS configuration, respectively */ | ||
214 | struct task_struct *owner[2]; | ||
215 | /* buffer overflow notification function for BTS and PEBS */ | ||
216 | ds_ovfl_callback_t callback[2]; | ||
217 | /* the original buffer address */ | ||
218 | void *buffer[2]; | ||
219 | /* the number of allocated pages for on-request allocated buffers */ | ||
220 | unsigned int pages[2]; | ||
221 | /* use count */ | ||
222 | unsigned long count; | ||
223 | /* a pointer to the context location inside the thread_struct | ||
224 | * or the per_cpu context array */ | ||
225 | struct ds_context **this; | ||
226 | /* a pointer to the task owning this context, or NULL, if the | ||
227 | * context is owned by a cpu */ | ||
228 | struct task_struct *task; | ||
229 | }; | ||
230 | 254 | ||
231 | /* called by exit_thread() to free leftover contexts */ | 255 | /* |
232 | extern void ds_free(struct ds_context *context); | 256 | * Task clone/init and cleanup work |
257 | */ | ||
258 | extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); | ||
259 | extern void ds_exit_thread(struct task_struct *tsk); | ||
233 | 260 | ||
234 | #else /* CONFIG_X86_DS */ | 261 | #else /* CONFIG_X86_DS */ |
235 | 262 | ||
236 | struct cpuinfo_x86; | 263 | struct cpuinfo_x86; |
237 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | 264 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} |
265 | static inline void ds_switch_to(struct task_struct *prev, | ||
266 | struct task_struct *next) {} | ||
267 | static inline void ds_copy_thread(struct task_struct *tsk, | ||
268 | struct task_struct *father) {} | ||
269 | static inline void ds_exit_thread(struct task_struct *tsk) {} | ||
238 | 270 | ||
239 | #endif /* CONFIG_X86_DS */ | 271 | #endif /* CONFIG_X86_DS */ |
240 | #endif /* _ASM_X86_DS_H */ | 272 | #endif /* _ASM_X86_DS_H */ |
diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 804b6e6be929..3afc5e87cfdd 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h | |||
@@ -6,56 +6,91 @@ | |||
6 | #endif | 6 | #endif |
7 | 7 | ||
8 | /* | 8 | /* |
9 | Macros for dwarf2 CFI unwind table entries. | 9 | * Macros for dwarf2 CFI unwind table entries. |
10 | See "as.info" for details on these pseudo ops. Unfortunately | 10 | * See "as.info" for details on these pseudo ops. Unfortunately |
11 | they are only supported in very new binutils, so define them | 11 | * they are only supported in very new binutils, so define them |
12 | away for older version. | 12 | * away for older version. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #ifdef CONFIG_AS_CFI | 15 | #ifdef CONFIG_AS_CFI |
16 | 16 | ||
17 | #define CFI_STARTPROC .cfi_startproc | 17 | #define CFI_STARTPROC .cfi_startproc |
18 | #define CFI_ENDPROC .cfi_endproc | 18 | #define CFI_ENDPROC .cfi_endproc |
19 | #define CFI_DEF_CFA .cfi_def_cfa | 19 | #define CFI_DEF_CFA .cfi_def_cfa |
20 | #define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register | 20 | #define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register |
21 | #define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset | 21 | #define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset |
22 | #define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset | 22 | #define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset |
23 | #define CFI_OFFSET .cfi_offset | 23 | #define CFI_OFFSET .cfi_offset |
24 | #define CFI_REL_OFFSET .cfi_rel_offset | 24 | #define CFI_REL_OFFSET .cfi_rel_offset |
25 | #define CFI_REGISTER .cfi_register | 25 | #define CFI_REGISTER .cfi_register |
26 | #define CFI_RESTORE .cfi_restore | 26 | #define CFI_RESTORE .cfi_restore |
27 | #define CFI_REMEMBER_STATE .cfi_remember_state | 27 | #define CFI_REMEMBER_STATE .cfi_remember_state |
28 | #define CFI_RESTORE_STATE .cfi_restore_state | 28 | #define CFI_RESTORE_STATE .cfi_restore_state |
29 | #define CFI_UNDEFINED .cfi_undefined | 29 | #define CFI_UNDEFINED .cfi_undefined |
30 | 30 | ||
31 | #ifdef CONFIG_AS_CFI_SIGNAL_FRAME | 31 | #ifdef CONFIG_AS_CFI_SIGNAL_FRAME |
32 | #define CFI_SIGNAL_FRAME .cfi_signal_frame | 32 | #define CFI_SIGNAL_FRAME .cfi_signal_frame |
33 | #else | 33 | #else |
34 | #define CFI_SIGNAL_FRAME | 34 | #define CFI_SIGNAL_FRAME |
35 | #endif | 35 | #endif |
36 | 36 | ||
37 | #else | 37 | #else |
38 | 38 | ||
39 | /* Due to the structure of pre-exisiting code, don't use assembler line | 39 | /* |
40 | comment character # to ignore the arguments. Instead, use a dummy macro. */ | 40 | * Due to the structure of pre-exisiting code, don't use assembler line |
41 | * comment character # to ignore the arguments. Instead, use a dummy macro. | ||
42 | */ | ||
41 | .macro cfi_ignore a=0, b=0, c=0, d=0 | 43 | .macro cfi_ignore a=0, b=0, c=0, d=0 |
42 | .endm | 44 | .endm |
43 | 45 | ||
44 | #define CFI_STARTPROC cfi_ignore | 46 | #define CFI_STARTPROC cfi_ignore |
45 | #define CFI_ENDPROC cfi_ignore | 47 | #define CFI_ENDPROC cfi_ignore |
46 | #define CFI_DEF_CFA cfi_ignore | 48 | #define CFI_DEF_CFA cfi_ignore |
47 | #define CFI_DEF_CFA_REGISTER cfi_ignore | 49 | #define CFI_DEF_CFA_REGISTER cfi_ignore |
48 | #define CFI_DEF_CFA_OFFSET cfi_ignore | 50 | #define CFI_DEF_CFA_OFFSET cfi_ignore |
49 | #define CFI_ADJUST_CFA_OFFSET cfi_ignore | 51 | #define CFI_ADJUST_CFA_OFFSET cfi_ignore |
50 | #define CFI_OFFSET cfi_ignore | 52 | #define CFI_OFFSET cfi_ignore |
51 | #define CFI_REL_OFFSET cfi_ignore | 53 | #define CFI_REL_OFFSET cfi_ignore |
52 | #define CFI_REGISTER cfi_ignore | 54 | #define CFI_REGISTER cfi_ignore |
53 | #define CFI_RESTORE cfi_ignore | 55 | #define CFI_RESTORE cfi_ignore |
54 | #define CFI_REMEMBER_STATE cfi_ignore | 56 | #define CFI_REMEMBER_STATE cfi_ignore |
55 | #define CFI_RESTORE_STATE cfi_ignore | 57 | #define CFI_RESTORE_STATE cfi_ignore |
56 | #define CFI_UNDEFINED cfi_ignore | 58 | #define CFI_UNDEFINED cfi_ignore |
57 | #define CFI_SIGNAL_FRAME cfi_ignore | 59 | #define CFI_SIGNAL_FRAME cfi_ignore |
58 | 60 | ||
59 | #endif | 61 | #endif |
60 | 62 | ||
63 | /* | ||
64 | * An attempt to make CFI annotations more or less | ||
65 | * correct and shorter. It is implied that you know | ||
66 | * what you're doing if you use them. | ||
67 | */ | ||
68 | #ifdef __ASSEMBLY__ | ||
69 | #ifdef CONFIG_X86_64 | ||
70 | .macro pushq_cfi reg | ||
71 | pushq \reg | ||
72 | CFI_ADJUST_CFA_OFFSET 8 | ||
73 | .endm | ||
74 | |||
75 | .macro popq_cfi reg | ||
76 | popq \reg | ||
77 | CFI_ADJUST_CFA_OFFSET -8 | ||
78 | .endm | ||
79 | |||
80 | .macro movq_cfi reg offset=0 | ||
81 | movq %\reg, \offset(%rsp) | ||
82 | CFI_REL_OFFSET \reg, \offset | ||
83 | .endm | ||
84 | |||
85 | .macro movq_cfi_restore offset reg | ||
86 | movq \offset(%rsp), %\reg | ||
87 | CFI_RESTORE \reg | ||
88 | .endm | ||
89 | #else /*!CONFIG_X86_64*/ | ||
90 | |||
91 | /* 32bit defenitions are missed yet */ | ||
92 | |||
93 | #endif /*!CONFIG_X86_64*/ | ||
94 | #endif /*__ASSEMBLY__*/ | ||
95 | |||
61 | #endif /* _ASM_X86_DWARF2_H */ | 96 | #endif /* _ASM_X86_DWARF2_H */ |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index a2e545c91c35..ca5ffb2856b6 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -90,6 +90,7 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size); | |||
90 | 90 | ||
91 | #endif /* CONFIG_X86_32 */ | 91 | #endif /* CONFIG_X86_32 */ |
92 | 92 | ||
93 | extern int add_efi_memmap; | ||
93 | extern void efi_reserve_early(void); | 94 | extern void efi_reserve_early(void); |
94 | extern void efi_call_phys_prelog(void); | 95 | extern void efi_call_phys_prelog(void); |
95 | extern void efi_call_phys_epilog(void); | 96 | extern void efi_call_phys_epilog(void); |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 40ca1bea7916..f51a3ddde01a 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -325,7 +325,7 @@ struct linux_binprm; | |||
325 | 325 | ||
326 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 | 326 | #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 |
327 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, | 327 | extern int arch_setup_additional_pages(struct linux_binprm *bprm, |
328 | int executable_stack); | 328 | int uses_interp); |
329 | 329 | ||
330 | extern int syscall32_setup_pages(struct linux_binprm *, int exstack); | 330 | extern int syscall32_setup_pages(struct linux_binprm *, int exstack); |
331 | #define compat_arch_setup_additional_pages syscall32_setup_pages | 331 | #define compat_arch_setup_additional_pages syscall32_setup_pages |
diff --git a/arch/x86/include/asm/emergency-restart.h b/arch/x86/include/asm/emergency-restart.h index 94826cf87455..cc70c1c78ca4 100644 --- a/arch/x86/include/asm/emergency-restart.h +++ b/arch/x86/include/asm/emergency-restart.h | |||
@@ -8,7 +8,9 @@ enum reboot_type { | |||
8 | BOOT_BIOS = 'b', | 8 | BOOT_BIOS = 'b', |
9 | #endif | 9 | #endif |
10 | BOOT_ACPI = 'a', | 10 | BOOT_ACPI = 'a', |
11 | BOOT_EFI = 'e' | 11 | BOOT_EFI = 'e', |
12 | BOOT_CF9 = 'p', | ||
13 | BOOT_CF9_COND = 'q', | ||
12 | }; | 14 | }; |
13 | 15 | ||
14 | extern enum reboot_type reboot_type; | 16 | extern enum reboot_type reboot_type; |
diff --git a/arch/x86/include/asm/es7000/apic.h b/arch/x86/include/asm/es7000/apic.h index 380f0b4f17ed..bc53d5ef1386 100644 --- a/arch/x86/include/asm/es7000/apic.h +++ b/arch/x86/include/asm/es7000/apic.h | |||
@@ -9,31 +9,27 @@ static inline int apic_id_registered(void) | |||
9 | return (1); | 9 | return (1); |
10 | } | 10 | } |
11 | 11 | ||
12 | static inline cpumask_t target_cpus(void) | 12 | static inline const cpumask_t *target_cpus_cluster(void) |
13 | { | 13 | { |
14 | #if defined CONFIG_ES7000_CLUSTERED_APIC | 14 | return &CPU_MASK_ALL; |
15 | return CPU_MASK_ALL; | ||
16 | #else | ||
17 | return cpumask_of_cpu(smp_processor_id()); | ||
18 | #endif | ||
19 | } | 15 | } |
20 | 16 | ||
21 | #if defined CONFIG_ES7000_CLUSTERED_APIC | 17 | static inline const cpumask_t *target_cpus(void) |
22 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 18 | { |
23 | #define INT_DELIVERY_MODE (dest_LowestPrio) | 19 | return &cpumask_of_cpu(smp_processor_id()); |
24 | #define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ | 20 | } |
25 | #define NO_BALANCE_IRQ (1) | 21 | |
26 | #undef WAKE_SECONDARY_VIA_INIT | 22 | #define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER) |
27 | #define WAKE_SECONDARY_VIA_MIP | 23 | #define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio) |
28 | #else | 24 | #define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */ |
25 | #define NO_BALANCE_IRQ_CLUSTER (1) | ||
26 | |||
29 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) | 27 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) |
30 | #define INT_DELIVERY_MODE (dest_Fixed) | 28 | #define INT_DELIVERY_MODE (dest_Fixed) |
31 | #define INT_DEST_MODE (0) /* phys delivery to target procs */ | 29 | #define INT_DEST_MODE (0) /* phys delivery to target procs */ |
32 | #define NO_BALANCE_IRQ (0) | 30 | #define NO_BALANCE_IRQ (0) |
33 | #undef APIC_DEST_LOGICAL | 31 | #undef APIC_DEST_LOGICAL |
34 | #define APIC_DEST_LOGICAL 0x0 | 32 | #define APIC_DEST_LOGICAL 0x0 |
35 | #define WAKE_SECONDARY_VIA_INIT | ||
36 | #endif | ||
37 | 33 | ||
38 | static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) | 34 | static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) |
39 | { | 35 | { |
@@ -60,6 +56,16 @@ static inline unsigned long calculate_ldr(int cpu) | |||
60 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | 56 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel |
61 | * document number 292116). So here it goes... | 57 | * document number 292116). So here it goes... |
62 | */ | 58 | */ |
59 | static inline void init_apic_ldr_cluster(void) | ||
60 | { | ||
61 | unsigned long val; | ||
62 | int cpu = smp_processor_id(); | ||
63 | |||
64 | apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER); | ||
65 | val = calculate_ldr(cpu); | ||
66 | apic_write(APIC_LDR, val); | ||
67 | } | ||
68 | |||
63 | static inline void init_apic_ldr(void) | 69 | static inline void init_apic_ldr(void) |
64 | { | 70 | { |
65 | unsigned long val; | 71 | unsigned long val; |
@@ -70,17 +76,14 @@ static inline void init_apic_ldr(void) | |||
70 | apic_write(APIC_LDR, val); | 76 | apic_write(APIC_LDR, val); |
71 | } | 77 | } |
72 | 78 | ||
73 | #ifndef CONFIG_X86_GENERICARCH | ||
74 | extern void enable_apic_mode(void); | ||
75 | #endif | ||
76 | |||
77 | extern int apic_version [MAX_APICS]; | 79 | extern int apic_version [MAX_APICS]; |
78 | static inline void setup_apic_routing(void) | 80 | static inline void setup_apic_routing(void) |
79 | { | 81 | { |
80 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); | 82 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); |
81 | printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", | 83 | printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", |
82 | (apic_version[apic] == 0x14) ? | 84 | (apic_version[apic] == 0x14) ? |
83 | "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(target_cpus())[0]); | 85 | "Physical Cluster" : "Logical Cluster", |
86 | nr_ioapics, cpus_addr(*target_cpus())[0]); | ||
84 | } | 87 | } |
85 | 88 | ||
86 | static inline int multi_timer_check(int apic, int irq) | 89 | static inline int multi_timer_check(int apic, int irq) |
@@ -98,7 +101,7 @@ static inline int cpu_present_to_apicid(int mps_cpu) | |||
98 | { | 101 | { |
99 | if (!mps_cpu) | 102 | if (!mps_cpu) |
100 | return boot_cpu_physical_apicid; | 103 | return boot_cpu_physical_apicid; |
101 | else if (mps_cpu < NR_CPUS) | 104 | else if (mps_cpu < nr_cpu_ids) |
102 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); | 105 | return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu); |
103 | else | 106 | else |
104 | return BAD_APICID; | 107 | return BAD_APICID; |
@@ -118,9 +121,9 @@ extern u8 cpu_2_logical_apicid[]; | |||
118 | static inline int cpu_to_logical_apicid(int cpu) | 121 | static inline int cpu_to_logical_apicid(int cpu) |
119 | { | 122 | { |
120 | #ifdef CONFIG_SMP | 123 | #ifdef CONFIG_SMP |
121 | if (cpu >= NR_CPUS) | 124 | if (cpu >= nr_cpu_ids) |
122 | return BAD_APICID; | 125 | return BAD_APICID; |
123 | return (int)cpu_2_logical_apicid[cpu]; | 126 | return (int)cpu_2_logical_apicid[cpu]; |
124 | #else | 127 | #else |
125 | return logical_smp_processor_id(); | 128 | return logical_smp_processor_id(); |
126 | #endif | 129 | #endif |
@@ -144,38 +147,64 @@ static inline int check_phys_apicid_present(int cpu_physical_apicid) | |||
144 | return (1); | 147 | return (1); |
145 | } | 148 | } |
146 | 149 | ||
147 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 150 | static inline unsigned int |
151 | cpu_mask_to_apicid_cluster(const struct cpumask *cpumask) | ||
148 | { | 152 | { |
149 | int num_bits_set; | 153 | int num_bits_set; |
150 | int cpus_found = 0; | 154 | int cpus_found = 0; |
151 | int cpu; | 155 | int cpu; |
152 | int apicid; | 156 | int apicid; |
153 | 157 | ||
154 | num_bits_set = cpus_weight(cpumask); | 158 | num_bits_set = cpumask_weight(cpumask); |
155 | /* Return id to all */ | 159 | /* Return id to all */ |
156 | if (num_bits_set == NR_CPUS) | 160 | if (num_bits_set == nr_cpu_ids) |
157 | #if defined CONFIG_ES7000_CLUSTERED_APIC | ||
158 | return 0xFF; | 161 | return 0xFF; |
159 | #else | ||
160 | return cpu_to_logical_apicid(0); | ||
161 | #endif | ||
162 | /* | 162 | /* |
163 | * The cpus in the mask must all be on the apic cluster. If are not | 163 | * The cpus in the mask must all be on the apic cluster. If are not |
164 | * on the same apicid cluster return default value of TARGET_CPUS. | 164 | * on the same apicid cluster return default value of TARGET_CPUS. |
165 | */ | 165 | */ |
166 | cpu = first_cpu(cpumask); | 166 | cpu = cpumask_first(cpumask); |
167 | apicid = cpu_to_logical_apicid(cpu); | 167 | apicid = cpu_to_logical_apicid(cpu); |
168 | while (cpus_found < num_bits_set) { | 168 | while (cpus_found < num_bits_set) { |
169 | if (cpu_isset(cpu, cpumask)) { | 169 | if (cpumask_test_cpu(cpu, cpumask)) { |
170 | int new_apicid = cpu_to_logical_apicid(cpu); | 170 | int new_apicid = cpu_to_logical_apicid(cpu); |
171 | if (apicid_cluster(apicid) != | 171 | if (apicid_cluster(apicid) != |
172 | apicid_cluster(new_apicid)){ | 172 | apicid_cluster(new_apicid)){ |
173 | printk ("%s: Not a valid mask!\n", __func__); | 173 | printk ("%s: Not a valid mask!\n", __func__); |
174 | #if defined CONFIG_ES7000_CLUSTERED_APIC | ||
175 | return 0xFF; | 174 | return 0xFF; |
176 | #else | 175 | } |
176 | apicid = new_apicid; | ||
177 | cpus_found++; | ||
178 | } | ||
179 | cpu++; | ||
180 | } | ||
181 | return apicid; | ||
182 | } | ||
183 | |||
184 | static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) | ||
185 | { | ||
186 | int num_bits_set; | ||
187 | int cpus_found = 0; | ||
188 | int cpu; | ||
189 | int apicid; | ||
190 | |||
191 | num_bits_set = cpus_weight(*cpumask); | ||
192 | /* Return id to all */ | ||
193 | if (num_bits_set == nr_cpu_ids) | ||
194 | return cpu_to_logical_apicid(0); | ||
195 | /* | ||
196 | * The cpus in the mask must all be on the apic cluster. If are not | ||
197 | * on the same apicid cluster return default value of TARGET_CPUS. | ||
198 | */ | ||
199 | cpu = first_cpu(*cpumask); | ||
200 | apicid = cpu_to_logical_apicid(cpu); | ||
201 | while (cpus_found < num_bits_set) { | ||
202 | if (cpu_isset(cpu, *cpumask)) { | ||
203 | int new_apicid = cpu_to_logical_apicid(cpu); | ||
204 | if (apicid_cluster(apicid) != | ||
205 | apicid_cluster(new_apicid)){ | ||
206 | printk ("%s: Not a valid mask!\n", __func__); | ||
177 | return cpu_to_logical_apicid(0); | 207 | return cpu_to_logical_apicid(0); |
178 | #endif | ||
179 | } | 208 | } |
180 | apicid = new_apicid; | 209 | apicid = new_apicid; |
181 | cpus_found++; | 210 | cpus_found++; |
@@ -185,6 +214,24 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
185 | return apicid; | 214 | return apicid; |
186 | } | 215 | } |
187 | 216 | ||
217 | |||
218 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
219 | const struct cpumask *andmask) | ||
220 | { | ||
221 | int apicid = cpu_to_logical_apicid(0); | ||
222 | cpumask_var_t cpumask; | ||
223 | |||
224 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
225 | return apicid; | ||
226 | |||
227 | cpumask_and(cpumask, inmask, andmask); | ||
228 | cpumask_and(cpumask, cpumask, cpu_online_mask); | ||
229 | apicid = cpu_mask_to_apicid(cpumask); | ||
230 | |||
231 | free_cpumask_var(cpumask); | ||
232 | return apicid; | ||
233 | } | ||
234 | |||
188 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) | 235 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) |
189 | { | 236 | { |
190 | return cpuid_apic >> index_msb; | 237 | return cpuid_apic >> index_msb; |
diff --git a/arch/x86/include/asm/es7000/ipi.h b/arch/x86/include/asm/es7000/ipi.h index 632a955fcc0a..7e8ed24d4b8a 100644 --- a/arch/x86/include/asm/es7000/ipi.h +++ b/arch/x86/include/asm/es7000/ipi.h | |||
@@ -1,24 +1,22 @@ | |||
1 | #ifndef __ASM_ES7000_IPI_H | 1 | #ifndef __ASM_ES7000_IPI_H |
2 | #define __ASM_ES7000_IPI_H | 2 | #define __ASM_ES7000_IPI_H |
3 | 3 | ||
4 | void send_IPI_mask_sequence(cpumask_t mask, int vector); | 4 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector); |
5 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
5 | 6 | ||
6 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 7 | static inline void send_IPI_mask(const struct cpumask *mask, int vector) |
7 | { | 8 | { |
8 | send_IPI_mask_sequence(mask, vector); | 9 | send_IPI_mask_sequence(mask, vector); |
9 | } | 10 | } |
10 | 11 | ||
11 | static inline void send_IPI_allbutself(int vector) | 12 | static inline void send_IPI_allbutself(int vector) |
12 | { | 13 | { |
13 | cpumask_t mask = cpu_online_map; | 14 | send_IPI_mask_allbutself(cpu_online_mask, vector); |
14 | cpu_clear(smp_processor_id(), mask); | ||
15 | if (!cpus_empty(mask)) | ||
16 | send_IPI_mask(mask, vector); | ||
17 | } | 15 | } |
18 | 16 | ||
19 | static inline void send_IPI_all(int vector) | 17 | static inline void send_IPI_all(int vector) |
20 | { | 18 | { |
21 | send_IPI_mask(cpu_online_map, vector); | 19 | send_IPI_mask(cpu_online_mask, vector); |
22 | } | 20 | } |
23 | 21 | ||
24 | #endif /* __ASM_ES7000_IPI_H */ | 22 | #endif /* __ASM_ES7000_IPI_H */ |
diff --git a/arch/x86/include/asm/es7000/wakecpu.h b/arch/x86/include/asm/es7000/wakecpu.h index 398493461913..78f0daaee436 100644 --- a/arch/x86/include/asm/es7000/wakecpu.h +++ b/arch/x86/include/asm/es7000/wakecpu.h | |||
@@ -1,36 +1,12 @@ | |||
1 | #ifndef __ASM_ES7000_WAKECPU_H | 1 | #ifndef __ASM_ES7000_WAKECPU_H |
2 | #define __ASM_ES7000_WAKECPU_H | 2 | #define __ASM_ES7000_WAKECPU_H |
3 | 3 | ||
4 | /* | 4 | #define TRAMPOLINE_PHYS_LOW 0x467 |
5 | * This file copes with machines that wakeup secondary CPUs by the | 5 | #define TRAMPOLINE_PHYS_HIGH 0x469 |
6 | * INIT, INIT, STARTUP sequence. | ||
7 | */ | ||
8 | |||
9 | #ifdef CONFIG_ES7000_CLUSTERED_APIC | ||
10 | #define WAKE_SECONDARY_VIA_MIP | ||
11 | #else | ||
12 | #define WAKE_SECONDARY_VIA_INIT | ||
13 | #endif | ||
14 | |||
15 | #ifdef WAKE_SECONDARY_VIA_MIP | ||
16 | extern int es7000_start_cpu(int cpu, unsigned long eip); | ||
17 | static inline int | ||
18 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | ||
19 | { | ||
20 | int boot_error = 0; | ||
21 | boot_error = es7000_start_cpu(phys_apicid, start_eip); | ||
22 | return boot_error; | ||
23 | } | ||
24 | #endif | ||
25 | |||
26 | #define TRAMPOLINE_LOW phys_to_virt(0x467) | ||
27 | #define TRAMPOLINE_HIGH phys_to_virt(0x469) | ||
28 | |||
29 | #define boot_cpu_apicid boot_cpu_physical_apicid | ||
30 | 6 | ||
31 | static inline void wait_for_init_deassert(atomic_t *deassert) | 7 | static inline void wait_for_init_deassert(atomic_t *deassert) |
32 | { | 8 | { |
33 | #ifdef WAKE_SECONDARY_VIA_INIT | 9 | #ifndef CONFIG_ES7000_CLUSTERED_APIC |
34 | while (!atomic_read(deassert)) | 10 | while (!atomic_read(deassert)) |
35 | cpu_relax(); | 11 | cpu_relax(); |
36 | #endif | 12 | #endif |
@@ -50,9 +26,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) | |||
50 | { | 26 | { |
51 | } | 27 | } |
52 | 28 | ||
53 | #define inquire_remote_apic(apicid) do { \ | 29 | extern void __inquire_remote_apic(int apicid); |
54 | if (apic_verbosity >= APIC_DEBUG) \ | 30 | |
55 | __inquire_remote_apic(apicid); \ | 31 | static inline void inquire_remote_apic(int apicid) |
56 | } while (0) | 32 | { |
33 | if (apic_verbosity >= APIC_DEBUG) | ||
34 | __inquire_remote_apic(apicid); | ||
35 | } | ||
57 | 36 | ||
58 | #endif /* __ASM_MACH_WAKECPU_H */ | 37 | #endif /* __ASM_MACH_WAKECPU_H */ |
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 9e8bc29b8b17..b55b4a7fbefd 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h | |||
@@ -1,6 +1,33 @@ | |||
1 | #ifndef _ASM_X86_FTRACE_H | 1 | #ifndef _ASM_X86_FTRACE_H |
2 | #define _ASM_X86_FTRACE_H | 2 | #define _ASM_X86_FTRACE_H |
3 | 3 | ||
4 | #ifdef __ASSEMBLY__ | ||
5 | |||
6 | .macro MCOUNT_SAVE_FRAME | ||
7 | /* taken from glibc */ | ||
8 | subq $0x38, %rsp | ||
9 | movq %rax, (%rsp) | ||
10 | movq %rcx, 8(%rsp) | ||
11 | movq %rdx, 16(%rsp) | ||
12 | movq %rsi, 24(%rsp) | ||
13 | movq %rdi, 32(%rsp) | ||
14 | movq %r8, 40(%rsp) | ||
15 | movq %r9, 48(%rsp) | ||
16 | .endm | ||
17 | |||
18 | .macro MCOUNT_RESTORE_FRAME | ||
19 | movq 48(%rsp), %r9 | ||
20 | movq 40(%rsp), %r8 | ||
21 | movq 32(%rsp), %rdi | ||
22 | movq 24(%rsp), %rsi | ||
23 | movq 16(%rsp), %rdx | ||
24 | movq 8(%rsp), %rcx | ||
25 | movq (%rsp), %rax | ||
26 | addq $0x38, %rsp | ||
27 | .endm | ||
28 | |||
29 | #endif | ||
30 | |||
4 | #ifdef CONFIG_FUNCTION_TRACER | 31 | #ifdef CONFIG_FUNCTION_TRACER |
5 | #define MCOUNT_ADDR ((long)(mcount)) | 32 | #define MCOUNT_ADDR ((long)(mcount)) |
6 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ | 33 | #define MCOUNT_INSN_SIZE 5 /* sizeof mcount call */ |
@@ -17,8 +44,40 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) | |||
17 | */ | 44 | */ |
18 | return addr - 1; | 45 | return addr - 1; |
19 | } | 46 | } |
20 | #endif | ||
21 | 47 | ||
48 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
49 | |||
50 | struct dyn_arch_ftrace { | ||
51 | /* No extra data needed for x86 */ | ||
52 | }; | ||
53 | |||
54 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
55 | #endif /* __ASSEMBLY__ */ | ||
22 | #endif /* CONFIG_FUNCTION_TRACER */ | 56 | #endif /* CONFIG_FUNCTION_TRACER */ |
23 | 57 | ||
58 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
59 | |||
60 | #ifndef __ASSEMBLY__ | ||
61 | |||
62 | /* | ||
63 | * Stack of return addresses for functions | ||
64 | * of a thread. | ||
65 | * Used in struct thread_info | ||
66 | */ | ||
67 | struct ftrace_ret_stack { | ||
68 | unsigned long ret; | ||
69 | unsigned long func; | ||
70 | unsigned long long calltime; | ||
71 | }; | ||
72 | |||
73 | /* | ||
74 | * Primary handler of a function return. | ||
75 | * It relays on ftrace_return_to_handler. | ||
76 | * Defined in entry_32/64.S | ||
77 | */ | ||
78 | extern void return_to_handler(void); | ||
79 | |||
80 | #endif /* __ASSEMBLY__ */ | ||
81 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
82 | |||
24 | #endif /* _ASM_X86_FTRACE_H */ | 83 | #endif /* _ASM_X86_FTRACE_H */ |
diff --git a/arch/x86/include/asm/gart.h b/arch/x86/include/asm/gart.h index 74252264433d..6cfdafa409d8 100644 --- a/arch/x86/include/asm/gart.h +++ b/arch/x86/include/asm/gart.h | |||
@@ -29,6 +29,39 @@ extern int fix_aperture; | |||
29 | #define AMD64_GARTCACHECTL 0x9c | 29 | #define AMD64_GARTCACHECTL 0x9c |
30 | #define AMD64_GARTEN (1<<0) | 30 | #define AMD64_GARTEN (1<<0) |
31 | 31 | ||
32 | #ifdef CONFIG_GART_IOMMU | ||
33 | extern int gart_iommu_aperture; | ||
34 | extern int gart_iommu_aperture_allowed; | ||
35 | extern int gart_iommu_aperture_disabled; | ||
36 | |||
37 | extern void early_gart_iommu_check(void); | ||
38 | extern void gart_iommu_init(void); | ||
39 | extern void gart_iommu_shutdown(void); | ||
40 | extern void __init gart_parse_options(char *); | ||
41 | extern void gart_iommu_hole_init(void); | ||
42 | |||
43 | #else | ||
44 | #define gart_iommu_aperture 0 | ||
45 | #define gart_iommu_aperture_allowed 0 | ||
46 | #define gart_iommu_aperture_disabled 1 | ||
47 | |||
48 | static inline void early_gart_iommu_check(void) | ||
49 | { | ||
50 | } | ||
51 | static inline void gart_iommu_init(void) | ||
52 | { | ||
53 | } | ||
54 | static inline void gart_iommu_shutdown(void) | ||
55 | { | ||
56 | } | ||
57 | static inline void gart_parse_options(char *options) | ||
58 | { | ||
59 | } | ||
60 | static inline void gart_iommu_hole_init(void) | ||
61 | { | ||
62 | } | ||
63 | #endif | ||
64 | |||
32 | extern int agp_amd64_init(void); | 65 | extern int agp_amd64_init(void); |
33 | 66 | ||
34 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) | 67 | static inline void enable_gart_translation(struct pci_dev *dev, u64 addr) |
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h index 5cbd4fcc06fd..746f37a7963a 100644 --- a/arch/x86/include/asm/genapic_32.h +++ b/arch/x86/include/asm/genapic_32.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_GENAPIC_32_H | 2 | #define _ASM_X86_GENAPIC_32_H |
3 | 3 | ||
4 | #include <asm/mpspec.h> | 4 | #include <asm/mpspec.h> |
5 | #include <asm/atomic.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | * Generic APIC driver interface. | 8 | * Generic APIC driver interface. |
@@ -23,7 +24,7 @@ struct genapic { | |||
23 | int (*probe)(void); | 24 | int (*probe)(void); |
24 | 25 | ||
25 | int (*apic_id_registered)(void); | 26 | int (*apic_id_registered)(void); |
26 | cpumask_t (*target_cpus)(void); | 27 | const struct cpumask *(*target_cpus)(void); |
27 | int int_delivery_mode; | 28 | int int_delivery_mode; |
28 | int int_dest_mode; | 29 | int int_dest_mode; |
29 | int ESR_DISABLE; | 30 | int ESR_DISABLE; |
@@ -56,15 +57,27 @@ struct genapic { | |||
56 | 57 | ||
57 | unsigned (*get_apic_id)(unsigned long x); | 58 | unsigned (*get_apic_id)(unsigned long x); |
58 | unsigned long apic_id_mask; | 59 | unsigned long apic_id_mask; |
59 | unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); | 60 | unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); |
60 | cpumask_t (*vector_allocation_domain)(int cpu); | 61 | unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, |
62 | const struct cpumask *andmask); | ||
63 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); | ||
61 | 64 | ||
62 | #ifdef CONFIG_SMP | 65 | #ifdef CONFIG_SMP |
63 | /* ipi */ | 66 | /* ipi */ |
64 | void (*send_IPI_mask)(cpumask_t mask, int vector); | 67 | void (*send_IPI_mask)(const struct cpumask *mask, int vector); |
68 | void (*send_IPI_mask_allbutself)(const struct cpumask *mask, | ||
69 | int vector); | ||
65 | void (*send_IPI_allbutself)(int vector); | 70 | void (*send_IPI_allbutself)(int vector); |
66 | void (*send_IPI_all)(int vector); | 71 | void (*send_IPI_all)(int vector); |
67 | #endif | 72 | #endif |
73 | int (*wakeup_cpu)(int apicid, unsigned long start_eip); | ||
74 | int trampoline_phys_low; | ||
75 | int trampoline_phys_high; | ||
76 | void (*wait_for_init_deassert)(atomic_t *deassert); | ||
77 | void (*smp_callin_clear_local_apic)(void); | ||
78 | void (*store_NMI_vector)(unsigned short *high, unsigned short *low); | ||
79 | void (*restore_NMI_vector)(unsigned short *high, unsigned short *low); | ||
80 | void (*inquire_remote_apic)(int apicid); | ||
68 | }; | 81 | }; |
69 | 82 | ||
70 | #define APICFUNC(x) .x = x, | 83 | #define APICFUNC(x) .x = x, |
@@ -105,16 +118,25 @@ struct genapic { | |||
105 | APICFUNC(get_apic_id) \ | 118 | APICFUNC(get_apic_id) \ |
106 | .apic_id_mask = APIC_ID_MASK, \ | 119 | .apic_id_mask = APIC_ID_MASK, \ |
107 | APICFUNC(cpu_mask_to_apicid) \ | 120 | APICFUNC(cpu_mask_to_apicid) \ |
108 | APICFUNC(vector_allocation_domain) \ | 121 | APICFUNC(cpu_mask_to_apicid_and) \ |
122 | APICFUNC(vector_allocation_domain) \ | ||
109 | APICFUNC(acpi_madt_oem_check) \ | 123 | APICFUNC(acpi_madt_oem_check) \ |
110 | IPIFUNC(send_IPI_mask) \ | 124 | IPIFUNC(send_IPI_mask) \ |
111 | IPIFUNC(send_IPI_allbutself) \ | 125 | IPIFUNC(send_IPI_allbutself) \ |
112 | IPIFUNC(send_IPI_all) \ | 126 | IPIFUNC(send_IPI_all) \ |
113 | APICFUNC(enable_apic_mode) \ | 127 | APICFUNC(enable_apic_mode) \ |
114 | APICFUNC(phys_pkg_id) \ | 128 | APICFUNC(phys_pkg_id) \ |
129 | .trampoline_phys_low = TRAMPOLINE_PHYS_LOW, \ | ||
130 | .trampoline_phys_high = TRAMPOLINE_PHYS_HIGH, \ | ||
131 | APICFUNC(wait_for_init_deassert) \ | ||
132 | APICFUNC(smp_callin_clear_local_apic) \ | ||
133 | APICFUNC(store_NMI_vector) \ | ||
134 | APICFUNC(restore_NMI_vector) \ | ||
135 | APICFUNC(inquire_remote_apic) \ | ||
115 | } | 136 | } |
116 | 137 | ||
117 | extern struct genapic *genapic; | 138 | extern struct genapic *genapic; |
139 | extern void es7000_update_genapic_to_cluster(void); | ||
118 | 140 | ||
119 | enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; | 141 | enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; |
120 | #define get_uv_system_type() UV_NONE | 142 | #define get_uv_system_type() UV_NONE |
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h index 13c4e96199ea..adf32fb56aa6 100644 --- a/arch/x86/include/asm/genapic_64.h +++ b/arch/x86/include/asm/genapic_64.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_GENAPIC_64_H | 1 | #ifndef _ASM_X86_GENAPIC_64_H |
2 | #define _ASM_X86_GENAPIC_64_H | 2 | #define _ASM_X86_GENAPIC_64_H |
3 | 3 | ||
4 | #include <linux/cpumask.h> | ||
5 | |||
4 | /* | 6 | /* |
5 | * Copyright 2004 James Cleverdon, IBM. | 7 | * Copyright 2004 James Cleverdon, IBM. |
6 | * Subject to the GNU Public License, v.2 | 8 | * Subject to the GNU Public License, v.2 |
@@ -18,20 +20,26 @@ struct genapic { | |||
18 | u32 int_delivery_mode; | 20 | u32 int_delivery_mode; |
19 | u32 int_dest_mode; | 21 | u32 int_dest_mode; |
20 | int (*apic_id_registered)(void); | 22 | int (*apic_id_registered)(void); |
21 | cpumask_t (*target_cpus)(void); | 23 | const struct cpumask *(*target_cpus)(void); |
22 | cpumask_t (*vector_allocation_domain)(int cpu); | 24 | void (*vector_allocation_domain)(int cpu, struct cpumask *retmask); |
23 | void (*init_apic_ldr)(void); | 25 | void (*init_apic_ldr)(void); |
24 | /* ipi */ | 26 | /* ipi */ |
25 | void (*send_IPI_mask)(cpumask_t mask, int vector); | 27 | void (*send_IPI_mask)(const struct cpumask *mask, int vector); |
28 | void (*send_IPI_mask_allbutself)(const struct cpumask *mask, | ||
29 | int vector); | ||
26 | void (*send_IPI_allbutself)(int vector); | 30 | void (*send_IPI_allbutself)(int vector); |
27 | void (*send_IPI_all)(int vector); | 31 | void (*send_IPI_all)(int vector); |
28 | void (*send_IPI_self)(int vector); | 32 | void (*send_IPI_self)(int vector); |
29 | /* */ | 33 | /* */ |
30 | unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); | 34 | unsigned int (*cpu_mask_to_apicid)(const struct cpumask *cpumask); |
35 | unsigned int (*cpu_mask_to_apicid_and)(const struct cpumask *cpumask, | ||
36 | const struct cpumask *andmask); | ||
31 | unsigned int (*phys_pkg_id)(int index_msb); | 37 | unsigned int (*phys_pkg_id)(int index_msb); |
32 | unsigned int (*get_apic_id)(unsigned long x); | 38 | unsigned int (*get_apic_id)(unsigned long x); |
33 | unsigned long (*set_apic_id)(unsigned int id); | 39 | unsigned long (*set_apic_id)(unsigned int id); |
34 | unsigned long apic_id_mask; | 40 | unsigned long apic_id_mask; |
41 | /* wakeup_secondary_cpu */ | ||
42 | int (*wakeup_cpu)(int apicid, unsigned long start_eip); | ||
35 | }; | 43 | }; |
36 | 44 | ||
37 | extern struct genapic *genapic; | 45 | extern struct genapic *genapic; |
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index 5ca135e72f2b..cf7954d1405f 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h | |||
@@ -22,6 +22,8 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); | |||
22 | #define __ARCH_IRQ_STAT | 22 | #define __ARCH_IRQ_STAT |
23 | #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) | 23 | #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) |
24 | 24 | ||
25 | #define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++) | ||
26 | |||
25 | void ack_bad_irq(unsigned int irq); | 27 | void ack_bad_irq(unsigned int irq); |
26 | #include <linux/irq_cpustat.h> | 28 | #include <linux/irq_cpustat.h> |
27 | 29 | ||
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index 1ba381fc51d3..b5a6b5d56704 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h | |||
@@ -11,6 +11,8 @@ | |||
11 | 11 | ||
12 | #define __ARCH_IRQ_STAT 1 | 12 | #define __ARCH_IRQ_STAT 1 |
13 | 13 | ||
14 | #define inc_irq_stat(member) add_pda(member, 1) | ||
15 | |||
14 | #define local_softirq_pending() read_pda(__softirq_pending) | 16 | #define local_softirq_pending() read_pda(__softirq_pending) |
15 | 17 | ||
16 | #define __ARCH_SET_SOFTIRQ_PENDING 1 | 18 | #define __ARCH_SET_SOFTIRQ_PENDING 1 |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b97aecb0b61d..8de644b6b959 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); | |||
109 | #endif | 109 | #endif |
110 | #endif | 110 | #endif |
111 | 111 | ||
112 | #ifdef CONFIG_X86_32 | 112 | extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); |
113 | extern void (*const interrupt[NR_VECTORS])(void); | ||
114 | #endif | ||
115 | 113 | ||
116 | typedef int vector_irq_t[NR_VECTORS]; | 114 | typedef int vector_irq_t[NR_VECTORS]; |
117 | DECLARE_PER_CPU(vector_irq_t, vector_irq); | 115 | DECLARE_PER_CPU(vector_irq_t, vector_irq); |
diff --git a/arch/x86/include/asm/hypervisor.h b/arch/x86/include/asm/hypervisor.h new file mode 100644 index 000000000000..369f5c5d09a1 --- /dev/null +++ b/arch/x86/include/asm/hypervisor.h | |||
@@ -0,0 +1,26 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, VMware, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
12 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
13 | * details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | #ifndef ASM_X86__HYPERVISOR_H | ||
21 | #define ASM_X86__HYPERVISOR_H | ||
22 | |||
23 | extern unsigned long get_hypervisor_tsc_freq(void); | ||
24 | extern void init_hypervisor(struct cpuinfo_x86 *c); | ||
25 | |||
26 | #endif | ||
diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 97989c0e534c..50ca486fd88c 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h | |||
@@ -129,24 +129,6 @@ typedef struct compat_siginfo { | |||
129 | } _sifields; | 129 | } _sifields; |
130 | } compat_siginfo_t; | 130 | } compat_siginfo_t; |
131 | 131 | ||
132 | struct sigframe32 { | ||
133 | u32 pretcode; | ||
134 | int sig; | ||
135 | struct sigcontext_ia32 sc; | ||
136 | struct _fpstate_ia32 fpstate; | ||
137 | unsigned int extramask[_COMPAT_NSIG_WORDS-1]; | ||
138 | }; | ||
139 | |||
140 | struct rt_sigframe32 { | ||
141 | u32 pretcode; | ||
142 | int sig; | ||
143 | u32 pinfo; | ||
144 | u32 puc; | ||
145 | compat_siginfo_t info; | ||
146 | struct ucontext_ia32 uc; | ||
147 | struct _fpstate_ia32 fpstate; | ||
148 | }; | ||
149 | |||
150 | struct ustat32 { | 132 | struct ustat32 { |
151 | __u32 f_tfree; | 133 | __u32 f_tfree; |
152 | compat_ino_t f_tinode; | 134 | compat_ino_t f_tinode; |
diff --git a/arch/x86/include/asm/idle.h b/arch/x86/include/asm/idle.h index 44c89c3a23e9..38d87379e270 100644 --- a/arch/x86/include/asm/idle.h +++ b/arch/x86/include/asm/idle.h | |||
@@ -8,8 +8,13 @@ struct notifier_block; | |||
8 | void idle_notifier_register(struct notifier_block *n); | 8 | void idle_notifier_register(struct notifier_block *n); |
9 | void idle_notifier_unregister(struct notifier_block *n); | 9 | void idle_notifier_unregister(struct notifier_block *n); |
10 | 10 | ||
11 | #ifdef CONFIG_X86_64 | ||
11 | void enter_idle(void); | 12 | void enter_idle(void); |
12 | void exit_idle(void); | 13 | void exit_idle(void); |
14 | #else /* !CONFIG_X86_64 */ | ||
15 | static inline void enter_idle(void) { } | ||
16 | static inline void exit_idle(void) { } | ||
17 | #endif /* CONFIG_X86_64 */ | ||
13 | 18 | ||
14 | void c1e_remove_cpu(int cpu); | 19 | void c1e_remove_cpu(int cpu); |
15 | 20 | ||
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index ac2abc88cd95..05cfed4485fa 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
@@ -4,6 +4,7 @@ | |||
4 | #define ARCH_HAS_IOREMAP_WC | 4 | #define ARCH_HAS_IOREMAP_WC |
5 | 5 | ||
6 | #include <linux/compiler.h> | 6 | #include <linux/compiler.h> |
7 | #include <asm-generic/int-ll64.h> | ||
7 | 8 | ||
8 | #define build_mmio_read(name, size, type, reg, barrier) \ | 9 | #define build_mmio_read(name, size, type, reg, barrier) \ |
9 | static inline type name(const volatile void __iomem *addr) \ | 10 | static inline type name(const volatile void __iomem *addr) \ |
@@ -45,21 +46,39 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) | |||
45 | #define mmiowb() barrier() | 46 | #define mmiowb() barrier() |
46 | 47 | ||
47 | #ifdef CONFIG_X86_64 | 48 | #ifdef CONFIG_X86_64 |
49 | |||
48 | build_mmio_read(readq, "q", unsigned long, "=r", :"memory") | 50 | build_mmio_read(readq, "q", unsigned long, "=r", :"memory") |
49 | build_mmio_read(__readq, "q", unsigned long, "=r", ) | ||
50 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") | 51 | build_mmio_write(writeq, "q", unsigned long, "r", :"memory") |
51 | build_mmio_write(__writeq, "q", unsigned long, "r", ) | ||
52 | 52 | ||
53 | #define readq_relaxed(a) __readq(a) | 53 | #else |
54 | #define __raw_readq __readq | 54 | |
55 | #define __raw_writeq writeq | 55 | static inline __u64 readq(const volatile void __iomem *addr) |
56 | { | ||
57 | const volatile u32 __iomem *p = addr; | ||
58 | u32 low, high; | ||
59 | |||
60 | low = readl(p); | ||
61 | high = readl(p + 1); | ||
62 | |||
63 | return low + ((u64)high << 32); | ||
64 | } | ||
65 | |||
66 | static inline void writeq(__u64 val, volatile void __iomem *addr) | ||
67 | { | ||
68 | writel(val, addr); | ||
69 | writel(val >> 32, addr+4); | ||
70 | } | ||
56 | 71 | ||
57 | /* Let people know we have them */ | ||
58 | #define readq readq | ||
59 | #define writeq writeq | ||
60 | #endif | 72 | #endif |
61 | 73 | ||
62 | extern int iommu_bio_merge; | 74 | #define readq_relaxed(a) readq(a) |
75 | |||
76 | #define __raw_readq(a) readq(a) | ||
77 | #define __raw_writeq(val, addr) writeq(val, addr) | ||
78 | |||
79 | /* Let people know that we have them */ | ||
80 | #define readq readq | ||
81 | #define writeq writeq | ||
63 | 82 | ||
64 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
65 | # include "io_32.h" | 84 | # include "io_32.h" |
diff --git a/arch/x86/include/asm/io_64.h b/arch/x86/include/asm/io_64.h index fea325a1122f..563c16270ba6 100644 --- a/arch/x86/include/asm/io_64.h +++ b/arch/x86/include/asm/io_64.h | |||
@@ -232,8 +232,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c); | |||
232 | 232 | ||
233 | #define flush_write_buffers() | 233 | #define flush_write_buffers() |
234 | 234 | ||
235 | #define BIO_VMERGE_BOUNDARY iommu_bio_merge | ||
236 | |||
237 | /* | 235 | /* |
238 | * Convert a virtual cached pointer to an uncached pointer | 236 | * Convert a virtual cached pointer to an uncached pointer |
239 | */ | 237 | */ |
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 6afd9933a7dd..7a1f44ac1f17 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -156,11 +156,21 @@ extern int sis_apic_bug; | |||
156 | /* 1 if "noapic" boot option passed */ | 156 | /* 1 if "noapic" boot option passed */ |
157 | extern int skip_ioapic_setup; | 157 | extern int skip_ioapic_setup; |
158 | 158 | ||
159 | /* 1 if "noapic" boot option passed */ | ||
160 | extern int noioapicquirk; | ||
161 | |||
162 | /* -1 if "noapic" boot option passed */ | ||
163 | extern int noioapicreroute; | ||
164 | |||
159 | /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ | 165 | /* 1 if the timer IRQ uses the '8259A Virtual Wire' mode */ |
160 | extern int timer_through_8259; | 166 | extern int timer_through_8259; |
161 | 167 | ||
162 | static inline void disable_ioapic_setup(void) | 168 | static inline void disable_ioapic_setup(void) |
163 | { | 169 | { |
170 | #ifdef CONFIG_PCI | ||
171 | noioapicquirk = 1; | ||
172 | noioapicreroute = -1; | ||
173 | #endif | ||
164 | skip_ioapic_setup = 1; | 174 | skip_ioapic_setup = 1; |
165 | } | 175 | } |
166 | 176 | ||
@@ -188,17 +198,14 @@ extern void restore_IO_APIC_setup(void); | |||
188 | extern void reinit_intr_remapped_IO_APIC(int); | 198 | extern void reinit_intr_remapped_IO_APIC(int); |
189 | #endif | 199 | #endif |
190 | 200 | ||
191 | extern int probe_nr_irqs(void); | 201 | extern void probe_nr_irqs_gsi(void); |
192 | 202 | ||
193 | #else /* !CONFIG_X86_IO_APIC */ | 203 | #else /* !CONFIG_X86_IO_APIC */ |
194 | #define io_apic_assign_pci_irqs 0 | 204 | #define io_apic_assign_pci_irqs 0 |
195 | static const int timer_through_8259 = 0; | 205 | static const int timer_through_8259 = 0; |
196 | static inline void ioapic_init_mappings(void) { } | 206 | static inline void ioapic_init_mappings(void) { } |
197 | 207 | ||
198 | static inline int probe_nr_irqs(void) | 208 | static inline void probe_nr_irqs_gsi(void) { } |
199 | { | ||
200 | return NR_IRQS; | ||
201 | } | ||
202 | #endif | 209 | #endif |
203 | 210 | ||
204 | #endif /* _ASM_X86_IO_APIC_H */ | 211 | #endif /* _ASM_X86_IO_APIC_H */ |
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 0b500c5b6446..a6ee9e6f530f 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -7,42 +7,7 @@ extern struct dma_mapping_ops nommu_dma_ops; | |||
7 | extern int force_iommu, no_iommu; | 7 | extern int force_iommu, no_iommu; |
8 | extern int iommu_detected; | 8 | extern int iommu_detected; |
9 | 9 | ||
10 | extern unsigned long iommu_nr_pages(unsigned long addr, unsigned long len); | ||
11 | |||
12 | /* 10 seconds */ | 10 | /* 10 seconds */ |
13 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
14 | 12 | ||
15 | #ifdef CONFIG_GART_IOMMU | ||
16 | extern int gart_iommu_aperture; | ||
17 | extern int gart_iommu_aperture_allowed; | ||
18 | extern int gart_iommu_aperture_disabled; | ||
19 | |||
20 | extern void early_gart_iommu_check(void); | ||
21 | extern void gart_iommu_init(void); | ||
22 | extern void gart_iommu_shutdown(void); | ||
23 | extern void __init gart_parse_options(char *); | ||
24 | extern void gart_iommu_hole_init(void); | ||
25 | |||
26 | #else | ||
27 | #define gart_iommu_aperture 0 | ||
28 | #define gart_iommu_aperture_allowed 0 | ||
29 | #define gart_iommu_aperture_disabled 1 | ||
30 | |||
31 | static inline void early_gart_iommu_check(void) | ||
32 | { | ||
33 | } | ||
34 | static inline void gart_iommu_init(void) | ||
35 | { | ||
36 | } | ||
37 | static inline void gart_iommu_shutdown(void) | ||
38 | { | ||
39 | } | ||
40 | static inline void gart_parse_options(char *options) | ||
41 | { | ||
42 | } | ||
43 | static inline void gart_iommu_hole_init(void) | ||
44 | { | ||
45 | } | ||
46 | #endif | ||
47 | |||
48 | #endif /* _ASM_X86_IOMMU_H */ | 13 | #endif /* _ASM_X86_IOMMU_H */ |
diff --git a/arch/x86/include/asm/ipi.h b/arch/x86/include/asm/ipi.h index f89dffb28aa9..c745a306f7d3 100644 --- a/arch/x86/include/asm/ipi.h +++ b/arch/x86/include/asm/ipi.h | |||
@@ -117,7 +117,8 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector, | |||
117 | native_apic_mem_write(APIC_ICR, cfg); | 117 | native_apic_mem_write(APIC_ICR, cfg); |
118 | } | 118 | } |
119 | 119 | ||
120 | static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) | 120 | static inline void send_IPI_mask_sequence(const struct cpumask *mask, |
121 | int vector) | ||
121 | { | 122 | { |
122 | unsigned long flags; | 123 | unsigned long flags; |
123 | unsigned long query_cpu; | 124 | unsigned long query_cpu; |
@@ -128,11 +129,29 @@ static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) | |||
128 | * - mbligh | 129 | * - mbligh |
129 | */ | 130 | */ |
130 | local_irq_save(flags); | 131 | local_irq_save(flags); |
131 | for_each_cpu_mask_nr(query_cpu, mask) { | 132 | for_each_cpu(query_cpu, mask) { |
132 | __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), | 133 | __send_IPI_dest_field(per_cpu(x86_cpu_to_apicid, query_cpu), |
133 | vector, APIC_DEST_PHYSICAL); | 134 | vector, APIC_DEST_PHYSICAL); |
134 | } | 135 | } |
135 | local_irq_restore(flags); | 136 | local_irq_restore(flags); |
136 | } | 137 | } |
137 | 138 | ||
139 | static inline void send_IPI_mask_allbutself(const struct cpumask *mask, | ||
140 | int vector) | ||
141 | { | ||
142 | unsigned long flags; | ||
143 | unsigned int query_cpu; | ||
144 | unsigned int this_cpu = smp_processor_id(); | ||
145 | |||
146 | /* See Hack comment above */ | ||
147 | |||
148 | local_irq_save(flags); | ||
149 | for_each_cpu(query_cpu, mask) | ||
150 | if (query_cpu != this_cpu) | ||
151 | __send_IPI_dest_field( | ||
152 | per_cpu(x86_cpu_to_apicid, query_cpu), | ||
153 | vector, APIC_DEST_PHYSICAL); | ||
154 | local_irq_restore(flags); | ||
155 | } | ||
156 | |||
138 | #endif /* _ASM_X86_IPI_H */ | 157 | #endif /* _ASM_X86_IPI_H */ |
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index bae0eda95486..592688ed04d3 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -31,13 +31,9 @@ static inline int irq_canonicalize(int irq) | |||
31 | # endif | 31 | # endif |
32 | #endif | 32 | #endif |
33 | 33 | ||
34 | #ifdef CONFIG_IRQBALANCE | ||
35 | extern int irqbalance_disable(char *str); | ||
36 | #endif | ||
37 | |||
38 | #ifdef CONFIG_HOTPLUG_CPU | 34 | #ifdef CONFIG_HOTPLUG_CPU |
39 | #include <linux/cpumask.h> | 35 | #include <linux/cpumask.h> |
40 | extern void fixup_irqs(cpumask_t map); | 36 | extern void fixup_irqs(void); |
41 | #endif | 37 | #endif |
42 | 38 | ||
43 | extern unsigned int do_IRQ(struct pt_regs *regs); | 39 | extern unsigned int do_IRQ(struct pt_regs *regs); |
@@ -46,5 +42,6 @@ extern void native_init_IRQ(void); | |||
46 | 42 | ||
47 | /* Interrupt vector management */ | 43 | /* Interrupt vector management */ |
48 | extern DECLARE_BITMAP(used_vectors, NR_VECTORS); | 44 | extern DECLARE_BITMAP(used_vectors, NR_VECTORS); |
45 | extern int vector_used_by_percpu_irq(unsigned int vector); | ||
49 | 46 | ||
50 | #endif /* _ASM_X86_IRQ_H */ | 47 | #endif /* _ASM_X86_IRQ_H */ |
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h index af2f02d27fc7..86afd7473457 100644 --- a/arch/x86/include/asm/irq_regs_32.h +++ b/arch/x86/include/asm/irq_regs_32.h | |||
@@ -9,6 +9,8 @@ | |||
9 | 9 | ||
10 | #include <asm/percpu.h> | 10 | #include <asm/percpu.h> |
11 | 11 | ||
12 | #define ARCH_HAS_OWN_IRQ_REGS | ||
13 | |||
12 | DECLARE_PER_CPU(struct pt_regs *, irq_regs); | 14 | DECLARE_PER_CPU(struct pt_regs *, irq_regs); |
13 | 15 | ||
14 | static inline struct pt_regs *get_irq_regs(void) | 16 | static inline struct pt_regs *get_irq_regs(void) |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 0005adb0f941..f7ff65032b9d 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -101,12 +101,23 @@ | |||
101 | #define LAST_VM86_IRQ 15 | 101 | #define LAST_VM86_IRQ 15 |
102 | #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) | 102 | #define invalid_vm86_irq(irq) ((irq) < 3 || (irq) > 15) |
103 | 103 | ||
104 | #define NR_IRQS_LEGACY 16 | ||
105 | |||
104 | #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) | 106 | #if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) |
107 | |||
108 | #ifndef CONFIG_SPARSE_IRQ | ||
105 | # if NR_CPUS < MAX_IO_APICS | 109 | # if NR_CPUS < MAX_IO_APICS |
106 | # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) | 110 | # define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) |
107 | # else | 111 | # else |
108 | # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) | 112 | # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) |
109 | # endif | 113 | # endif |
114 | #else | ||
115 | # if (8 * NR_CPUS) > (32 * MAX_IO_APICS) | ||
116 | # define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) | ||
117 | # else | ||
118 | # define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) | ||
119 | # endif | ||
120 | #endif | ||
110 | 121 | ||
111 | #elif defined(CONFIG_X86_VOYAGER) | 122 | #elif defined(CONFIG_X86_VOYAGER) |
112 | 123 | ||
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h index a1f22771a15a..c61d8b2ab8b9 100644 --- a/arch/x86/include/asm/kexec.h +++ b/arch/x86/include/asm/kexec.h | |||
@@ -5,21 +5,8 @@ | |||
5 | # define PA_CONTROL_PAGE 0 | 5 | # define PA_CONTROL_PAGE 0 |
6 | # define VA_CONTROL_PAGE 1 | 6 | # define VA_CONTROL_PAGE 1 |
7 | # define PA_PGD 2 | 7 | # define PA_PGD 2 |
8 | # define VA_PGD 3 | 8 | # define PA_SWAP_PAGE 3 |
9 | # define PA_PTE_0 4 | 9 | # define PAGES_NR 4 |
10 | # define VA_PTE_0 5 | ||
11 | # define PA_PTE_1 6 | ||
12 | # define VA_PTE_1 7 | ||
13 | # define PA_SWAP_PAGE 8 | ||
14 | # ifdef CONFIG_X86_PAE | ||
15 | # define PA_PMD_0 9 | ||
16 | # define VA_PMD_0 10 | ||
17 | # define PA_PMD_1 11 | ||
18 | # define VA_PMD_1 12 | ||
19 | # define PAGES_NR 13 | ||
20 | # else | ||
21 | # define PAGES_NR 9 | ||
22 | # endif | ||
23 | #else | 10 | #else |
24 | # define PA_CONTROL_PAGE 0 | 11 | # define PA_CONTROL_PAGE 0 |
25 | # define VA_CONTROL_PAGE 1 | 12 | # define VA_CONTROL_PAGE 1 |
@@ -170,6 +157,20 @@ relocate_kernel(unsigned long indirection_page, | |||
170 | unsigned long start_address) ATTRIB_NORET; | 157 | unsigned long start_address) ATTRIB_NORET; |
171 | #endif | 158 | #endif |
172 | 159 | ||
160 | #ifdef CONFIG_X86_32 | ||
161 | #define ARCH_HAS_KIMAGE_ARCH | ||
162 | |||
163 | struct kimage_arch { | ||
164 | pgd_t *pgd; | ||
165 | #ifdef CONFIG_X86_PAE | ||
166 | pmd_t *pmd0; | ||
167 | pmd_t *pmd1; | ||
168 | #endif | ||
169 | pte_t *pte0; | ||
170 | pte_t *pte1; | ||
171 | }; | ||
172 | #endif | ||
173 | |||
173 | #endif /* __ASSEMBLY__ */ | 174 | #endif /* __ASSEMBLY__ */ |
174 | 175 | ||
175 | #endif /* _ASM_X86_KEXEC_H */ | 176 | #endif /* _ASM_X86_KEXEC_H */ |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 8346be87cfa1..730843d1d2fb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -21,6 +21,7 @@ | |||
21 | 21 | ||
22 | #include <asm/pvclock-abi.h> | 22 | #include <asm/pvclock-abi.h> |
23 | #include <asm/desc.h> | 23 | #include <asm/desc.h> |
24 | #include <asm/mtrr.h> | ||
24 | 25 | ||
25 | #define KVM_MAX_VCPUS 16 | 26 | #define KVM_MAX_VCPUS 16 |
26 | #define KVM_MEMORY_SLOTS 32 | 27 | #define KVM_MEMORY_SLOTS 32 |
@@ -86,6 +87,7 @@ | |||
86 | #define KVM_MIN_FREE_MMU_PAGES 5 | 87 | #define KVM_MIN_FREE_MMU_PAGES 5 |
87 | #define KVM_REFILL_PAGES 25 | 88 | #define KVM_REFILL_PAGES 25 |
88 | #define KVM_MAX_CPUID_ENTRIES 40 | 89 | #define KVM_MAX_CPUID_ENTRIES 40 |
90 | #define KVM_NR_FIXED_MTRR_REGION 88 | ||
89 | #define KVM_NR_VAR_MTRR 8 | 91 | #define KVM_NR_VAR_MTRR 8 |
90 | 92 | ||
91 | extern spinlock_t kvm_lock; | 93 | extern spinlock_t kvm_lock; |
@@ -180,6 +182,8 @@ struct kvm_mmu_page { | |||
180 | struct list_head link; | 182 | struct list_head link; |
181 | struct hlist_node hash_link; | 183 | struct hlist_node hash_link; |
182 | 184 | ||
185 | struct list_head oos_link; | ||
186 | |||
183 | /* | 187 | /* |
184 | * The following two entries are used to key the shadow page in the | 188 | * The following two entries are used to key the shadow page in the |
185 | * hash table. | 189 | * hash table. |
@@ -190,13 +194,16 @@ struct kvm_mmu_page { | |||
190 | u64 *spt; | 194 | u64 *spt; |
191 | /* hold the gfn of each spte inside spt */ | 195 | /* hold the gfn of each spte inside spt */ |
192 | gfn_t *gfns; | 196 | gfn_t *gfns; |
193 | unsigned long slot_bitmap; /* One bit set per slot which has memory | 197 | /* |
194 | * in this shadow page. | 198 | * One bit set per slot which has memory |
195 | */ | 199 | * in this shadow page. |
200 | */ | ||
201 | DECLARE_BITMAP(slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); | ||
196 | int multimapped; /* More than one parent_pte? */ | 202 | int multimapped; /* More than one parent_pte? */ |
197 | int root_count; /* Currently serving as active root */ | 203 | int root_count; /* Currently serving as active root */ |
198 | bool unsync; | 204 | bool unsync; |
199 | bool unsync_children; | 205 | bool global; |
206 | unsigned int unsync_children; | ||
200 | union { | 207 | union { |
201 | u64 *parent_pte; /* !multimapped */ | 208 | u64 *parent_pte; /* !multimapped */ |
202 | struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ | 209 | struct hlist_head parent_ptes; /* multimapped, kvm_pte_chain */ |
@@ -327,8 +334,10 @@ struct kvm_vcpu_arch { | |||
327 | 334 | ||
328 | bool nmi_pending; | 335 | bool nmi_pending; |
329 | bool nmi_injected; | 336 | bool nmi_injected; |
337 | bool nmi_window_open; | ||
330 | 338 | ||
331 | u64 mtrr[0x100]; | 339 | struct mtrr_state_type mtrr_state; |
340 | u32 pat; | ||
332 | }; | 341 | }; |
333 | 342 | ||
334 | struct kvm_mem_alias { | 343 | struct kvm_mem_alias { |
@@ -350,11 +359,13 @@ struct kvm_arch{ | |||
350 | */ | 359 | */ |
351 | struct list_head active_mmu_pages; | 360 | struct list_head active_mmu_pages; |
352 | struct list_head assigned_dev_head; | 361 | struct list_head assigned_dev_head; |
353 | struct dmar_domain *intel_iommu_domain; | 362 | struct list_head oos_global_pages; |
363 | struct iommu_domain *iommu_domain; | ||
354 | struct kvm_pic *vpic; | 364 | struct kvm_pic *vpic; |
355 | struct kvm_ioapic *vioapic; | 365 | struct kvm_ioapic *vioapic; |
356 | struct kvm_pit *vpit; | 366 | struct kvm_pit *vpit; |
357 | struct hlist_head irq_ack_notifier_list; | 367 | struct hlist_head irq_ack_notifier_list; |
368 | int vapics_in_nmi_mode; | ||
358 | 369 | ||
359 | int round_robin_prev_vcpu; | 370 | int round_robin_prev_vcpu; |
360 | unsigned int tss_addr; | 371 | unsigned int tss_addr; |
@@ -378,6 +389,7 @@ struct kvm_vm_stat { | |||
378 | u32 mmu_recycled; | 389 | u32 mmu_recycled; |
379 | u32 mmu_cache_miss; | 390 | u32 mmu_cache_miss; |
380 | u32 mmu_unsync; | 391 | u32 mmu_unsync; |
392 | u32 mmu_unsync_global; | ||
381 | u32 remote_tlb_flush; | 393 | u32 remote_tlb_flush; |
382 | u32 lpages; | 394 | u32 lpages; |
383 | }; | 395 | }; |
@@ -397,6 +409,7 @@ struct kvm_vcpu_stat { | |||
397 | u32 halt_exits; | 409 | u32 halt_exits; |
398 | u32 halt_wakeup; | 410 | u32 halt_wakeup; |
399 | u32 request_irq_exits; | 411 | u32 request_irq_exits; |
412 | u32 request_nmi_exits; | ||
400 | u32 irq_exits; | 413 | u32 irq_exits; |
401 | u32 host_state_reload; | 414 | u32 host_state_reload; |
402 | u32 efer_reload; | 415 | u32 efer_reload; |
@@ -405,6 +418,7 @@ struct kvm_vcpu_stat { | |||
405 | u32 insn_emulation_fail; | 418 | u32 insn_emulation_fail; |
406 | u32 hypercalls; | 419 | u32 hypercalls; |
407 | u32 irq_injections; | 420 | u32 irq_injections; |
421 | u32 nmi_injections; | ||
408 | }; | 422 | }; |
409 | 423 | ||
410 | struct descriptor_table { | 424 | struct descriptor_table { |
@@ -477,6 +491,7 @@ struct kvm_x86_ops { | |||
477 | 491 | ||
478 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 492 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
479 | int (*get_tdp_level)(void); | 493 | int (*get_tdp_level)(void); |
494 | int (*get_mt_mask_shift)(void); | ||
480 | }; | 495 | }; |
481 | 496 | ||
482 | extern struct kvm_x86_ops *kvm_x86_ops; | 497 | extern struct kvm_x86_ops *kvm_x86_ops; |
@@ -490,7 +505,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); | |||
490 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); | 505 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); |
491 | void kvm_mmu_set_base_ptes(u64 base_pte); | 506 | void kvm_mmu_set_base_ptes(u64 base_pte); |
492 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 507 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
493 | u64 dirty_mask, u64 nx_mask, u64 x_mask); | 508 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); |
494 | 509 | ||
495 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 510 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
496 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 511 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
@@ -587,12 +602,14 @@ unsigned long segment_base(u16 selector); | |||
587 | 602 | ||
588 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); | 603 | void kvm_mmu_flush_tlb(struct kvm_vcpu *vcpu); |
589 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 604 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
590 | const u8 *new, int bytes); | 605 | const u8 *new, int bytes, |
606 | bool guest_initiated); | ||
591 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); | 607 | int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); |
592 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | 608 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); |
593 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 609 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
594 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 610 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
595 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 611 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
612 | void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); | ||
596 | 613 | ||
597 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 614 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
598 | 615 | ||
@@ -607,6 +624,8 @@ void kvm_disable_tdp(void); | |||
607 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); | 624 | int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); |
608 | int complete_pio(struct kvm_vcpu *vcpu); | 625 | int complete_pio(struct kvm_vcpu *vcpu); |
609 | 626 | ||
627 | struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn); | ||
628 | |||
610 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) | 629 | static inline struct kvm_mmu_page *page_header(hpa_t shadow_page) |
611 | { | 630 | { |
612 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); | 631 | struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); |
@@ -702,18 +721,6 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code) | |||
702 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); | 721 | kvm_queue_exception_e(vcpu, GP_VECTOR, error_code); |
703 | } | 722 | } |
704 | 723 | ||
705 | #define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" | ||
706 | #define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" | ||
707 | #define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" | ||
708 | #define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" | ||
709 | #define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" | ||
710 | #define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" | ||
711 | #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" | ||
712 | #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" | ||
713 | #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" | ||
714 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | ||
715 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | ||
716 | |||
717 | #define MSR_IA32_TIME_STAMP_COUNTER 0x010 | 724 | #define MSR_IA32_TIME_STAMP_COUNTER 0x010 |
718 | 725 | ||
719 | #define TSS_IOPB_BASE_OFFSET 0x66 | 726 | #define TSS_IOPB_BASE_OFFSET 0x66 |
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 25179a29f208..6a159732881a 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h | |||
@@ -123,6 +123,7 @@ struct decode_cache { | |||
123 | u8 ad_bytes; | 123 | u8 ad_bytes; |
124 | u8 rex_prefix; | 124 | u8 rex_prefix; |
125 | struct operand src; | 125 | struct operand src; |
126 | struct operand src2; | ||
126 | struct operand dst; | 127 | struct operand dst; |
127 | bool has_seg_override; | 128 | bool has_seg_override; |
128 | u8 seg_override; | 129 | u8 seg_override; |
@@ -146,22 +147,18 @@ struct x86_emulate_ctxt { | |||
146 | /* Register state before/after emulation. */ | 147 | /* Register state before/after emulation. */ |
147 | struct kvm_vcpu *vcpu; | 148 | struct kvm_vcpu *vcpu; |
148 | 149 | ||
149 | /* Linear faulting address (if emulating a page-faulting instruction) */ | ||
150 | unsigned long eflags; | 150 | unsigned long eflags; |
151 | |||
152 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ | 151 | /* Emulated execution mode, represented by an X86EMUL_MODE value. */ |
153 | int mode; | 152 | int mode; |
154 | |||
155 | u32 cs_base; | 153 | u32 cs_base; |
156 | 154 | ||
157 | /* decode cache */ | 155 | /* decode cache */ |
158 | |||
159 | struct decode_cache decode; | 156 | struct decode_cache decode; |
160 | }; | 157 | }; |
161 | 158 | ||
162 | /* Repeat String Operation Prefix */ | 159 | /* Repeat String Operation Prefix */ |
163 | #define REPE_PREFIX 1 | 160 | #define REPE_PREFIX 1 |
164 | #define REPNE_PREFIX 2 | 161 | #define REPNE_PREFIX 2 |
165 | 162 | ||
166 | /* Execution mode, passed to the emulator. */ | 163 | /* Execution mode, passed to the emulator. */ |
167 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ | 164 | #define X86EMUL_MODE_REAL 0 /* Real mode. */ |
@@ -170,7 +167,7 @@ struct x86_emulate_ctxt { | |||
170 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ | 167 | #define X86EMUL_MODE_PROT64 8 /* 64-bit (long) mode. */ |
171 | 168 | ||
172 | /* Host execution mode. */ | 169 | /* Host execution mode. */ |
173 | #if defined(__i386__) | 170 | #if defined(CONFIG_X86_32) |
174 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 | 171 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT32 |
175 | #elif defined(CONFIG_X86_64) | 172 | #elif defined(CONFIG_X86_64) |
176 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 | 173 | #define X86EMUL_MODE_HOST X86EMUL_MODE_PROT64 |
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index d28a507cef39..1caf57628b9c 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h | |||
@@ -15,7 +15,7 @@ | |||
15 | #define SHARED_SWITCHER_PAGES \ | 15 | #define SHARED_SWITCHER_PAGES \ |
16 | DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) | 16 | DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE) |
17 | /* Pages for switcher itself, then two pages per cpu */ | 17 | /* Pages for switcher itself, then two pages per cpu */ |
18 | #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * NR_CPUS) | 18 | #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) |
19 | 19 | ||
20 | /* We map at -4M for ease of mapping into the guest (one PTE page). */ | 20 | /* We map at -4M for ease of mapping into the guest (one PTE page). */ |
21 | #define SWITCHER_ADDR 0xFFC00000 | 21 | #define SWITCHER_ADDR 0xFFC00000 |
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index f61ee8f937e4..5d98d0b68ffc 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h | |||
@@ -57,5 +57,65 @@ | |||
57 | #define __ALIGN_STR ".align 16,0x90" | 57 | #define __ALIGN_STR ".align 16,0x90" |
58 | #endif | 58 | #endif |
59 | 59 | ||
60 | /* | ||
61 | * to check ENTRY_X86/END_X86 and | ||
62 | * KPROBE_ENTRY_X86/KPROBE_END_X86 | ||
63 | * unbalanced-missed-mixed appearance | ||
64 | */ | ||
65 | #define __set_entry_x86 .set ENTRY_X86_IN, 0 | ||
66 | #define __unset_entry_x86 .set ENTRY_X86_IN, 1 | ||
67 | #define __set_kprobe_x86 .set KPROBE_X86_IN, 0 | ||
68 | #define __unset_kprobe_x86 .set KPROBE_X86_IN, 1 | ||
69 | |||
70 | #define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed" | ||
71 | |||
72 | #define __check_entry_x86 \ | ||
73 | .ifdef ENTRY_X86_IN; \ | ||
74 | .ifeq ENTRY_X86_IN; \ | ||
75 | __macro_err_x86; \ | ||
76 | .abort; \ | ||
77 | .endif; \ | ||
78 | .endif | ||
79 | |||
80 | #define __check_kprobe_x86 \ | ||
81 | .ifdef KPROBE_X86_IN; \ | ||
82 | .ifeq KPROBE_X86_IN; \ | ||
83 | __macro_err_x86; \ | ||
84 | .abort; \ | ||
85 | .endif; \ | ||
86 | .endif | ||
87 | |||
88 | #define __check_entry_kprobe_x86 \ | ||
89 | __check_entry_x86; \ | ||
90 | __check_kprobe_x86 | ||
91 | |||
92 | #define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86 | ||
93 | |||
94 | #define ENTRY_X86(name) \ | ||
95 | __check_entry_kprobe_x86; \ | ||
96 | __set_entry_x86; \ | ||
97 | .globl name; \ | ||
98 | __ALIGN; \ | ||
99 | name: | ||
100 | |||
101 | #define END_X86(name) \ | ||
102 | __unset_entry_x86; \ | ||
103 | __check_entry_kprobe_x86; \ | ||
104 | .size name, .-name | ||
105 | |||
106 | #define KPROBE_ENTRY_X86(name) \ | ||
107 | __check_entry_kprobe_x86; \ | ||
108 | __set_kprobe_x86; \ | ||
109 | .pushsection .kprobes.text, "ax"; \ | ||
110 | .globl name; \ | ||
111 | __ALIGN; \ | ||
112 | name: | ||
113 | |||
114 | #define KPROBE_END_X86(name) \ | ||
115 | __unset_kprobe_x86; \ | ||
116 | __check_entry_kprobe_x86; \ | ||
117 | .size name, .-name; \ | ||
118 | .popsection | ||
119 | |||
60 | #endif /* _ASM_X86_LINKAGE_H */ | 120 | #endif /* _ASM_X86_LINKAGE_H */ |
61 | 121 | ||
diff --git a/arch/x86/include/asm/mach-default/mach_apic.h b/arch/x86/include/asm/mach-default/mach_apic.h index ff3a6c236c00..cc09cbbee27e 100644 --- a/arch/x86/include/asm/mach-default/mach_apic.h +++ b/arch/x86/include/asm/mach-default/mach_apic.h | |||
@@ -8,12 +8,12 @@ | |||
8 | 8 | ||
9 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) | 9 | #define APIC_DFR_VALUE (APIC_DFR_FLAT) |
10 | 10 | ||
11 | static inline cpumask_t target_cpus(void) | 11 | static inline const struct cpumask *target_cpus(void) |
12 | { | 12 | { |
13 | #ifdef CONFIG_SMP | 13 | #ifdef CONFIG_SMP |
14 | return cpu_online_map; | 14 | return cpu_online_mask; |
15 | #else | 15 | #else |
16 | return cpumask_of_cpu(0); | 16 | return cpumask_of(0); |
17 | #endif | 17 | #endif |
18 | } | 18 | } |
19 | 19 | ||
@@ -28,15 +28,18 @@ static inline cpumask_t target_cpus(void) | |||
28 | #define apic_id_registered (genapic->apic_id_registered) | 28 | #define apic_id_registered (genapic->apic_id_registered) |
29 | #define init_apic_ldr (genapic->init_apic_ldr) | 29 | #define init_apic_ldr (genapic->init_apic_ldr) |
30 | #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) | 30 | #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) |
31 | #define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) | ||
31 | #define phys_pkg_id (genapic->phys_pkg_id) | 32 | #define phys_pkg_id (genapic->phys_pkg_id) |
32 | #define vector_allocation_domain (genapic->vector_allocation_domain) | 33 | #define vector_allocation_domain (genapic->vector_allocation_domain) |
33 | #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) | 34 | #define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID))) |
34 | #define send_IPI_self (genapic->send_IPI_self) | 35 | #define send_IPI_self (genapic->send_IPI_self) |
36 | #define wakeup_secondary_cpu (genapic->wakeup_cpu) | ||
35 | extern void setup_apic_routing(void); | 37 | extern void setup_apic_routing(void); |
36 | #else | 38 | #else |
37 | #define INT_DELIVERY_MODE dest_LowestPrio | 39 | #define INT_DELIVERY_MODE dest_LowestPrio |
38 | #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ | 40 | #define INT_DEST_MODE 1 /* logical delivery broadcast to all procs */ |
39 | #define TARGET_CPUS (target_cpus()) | 41 | #define TARGET_CPUS (target_cpus()) |
42 | #define wakeup_secondary_cpu wakeup_secondary_cpu_via_init | ||
40 | /* | 43 | /* |
41 | * Set up the logical destination ID. | 44 | * Set up the logical destination ID. |
42 | * | 45 | * |
@@ -59,9 +62,19 @@ static inline int apic_id_registered(void) | |||
59 | return physid_isset(read_apic_id(), phys_cpu_present_map); | 62 | return physid_isset(read_apic_id(), phys_cpu_present_map); |
60 | } | 63 | } |
61 | 64 | ||
62 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 65 | static inline unsigned int cpu_mask_to_apicid(const struct cpumask *cpumask) |
63 | { | 66 | { |
64 | return cpus_addr(cpumask)[0]; | 67 | return cpumask_bits(cpumask)[0]; |
68 | } | ||
69 | |||
70 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
71 | const struct cpumask *andmask) | ||
72 | { | ||
73 | unsigned long mask1 = cpumask_bits(cpumask)[0]; | ||
74 | unsigned long mask2 = cpumask_bits(andmask)[0]; | ||
75 | unsigned long mask3 = cpumask_bits(cpu_online_mask)[0]; | ||
76 | |||
77 | return (unsigned int)(mask1 & mask2 & mask3); | ||
65 | } | 78 | } |
66 | 79 | ||
67 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) | 80 | static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb) |
@@ -86,7 +99,7 @@ static inline int apicid_to_node(int logical_apicid) | |||
86 | #endif | 99 | #endif |
87 | } | 100 | } |
88 | 101 | ||
89 | static inline cpumask_t vector_allocation_domain(int cpu) | 102 | static inline void vector_allocation_domain(int cpu, struct cpumask *retmask) |
90 | { | 103 | { |
91 | /* Careful. Some cpus do not strictly honor the set of cpus | 104 | /* Careful. Some cpus do not strictly honor the set of cpus |
92 | * specified in the interrupt destination when using lowest | 105 | * specified in the interrupt destination when using lowest |
@@ -96,8 +109,7 @@ static inline cpumask_t vector_allocation_domain(int cpu) | |||
96 | * deliver interrupts to the wrong hyperthread when only one | 109 | * deliver interrupts to the wrong hyperthread when only one |
97 | * hyperthread was specified in the interrupt desitination. | 110 | * hyperthread was specified in the interrupt desitination. |
98 | */ | 111 | */ |
99 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 112 | *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } }; |
100 | return domain; | ||
101 | } | 113 | } |
102 | #endif | 114 | #endif |
103 | 115 | ||
@@ -129,7 +141,7 @@ static inline int cpu_to_logical_apicid(int cpu) | |||
129 | 141 | ||
130 | static inline int cpu_present_to_apicid(int mps_cpu) | 142 | static inline int cpu_present_to_apicid(int mps_cpu) |
131 | { | 143 | { |
132 | if (mps_cpu < NR_CPUS && cpu_present(mps_cpu)) | 144 | if (mps_cpu < nr_cpu_ids && cpu_present(mps_cpu)) |
133 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); | 145 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); |
134 | else | 146 | else |
135 | return BAD_APICID; | 147 | return BAD_APICID; |
diff --git a/arch/x86/include/asm/mach-default/mach_ipi.h b/arch/x86/include/asm/mach-default/mach_ipi.h index fabca01ebacf..191312d155da 100644 --- a/arch/x86/include/asm/mach-default/mach_ipi.h +++ b/arch/x86/include/asm/mach-default/mach_ipi.h | |||
@@ -4,7 +4,8 @@ | |||
4 | /* Avoid include hell */ | 4 | /* Avoid include hell */ |
5 | #define NMI_VECTOR 0x02 | 5 | #define NMI_VECTOR 0x02 |
6 | 6 | ||
7 | void send_IPI_mask_bitmask(cpumask_t mask, int vector); | 7 | void send_IPI_mask_bitmask(const struct cpumask *mask, int vector); |
8 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
8 | void __send_IPI_shortcut(unsigned int shortcut, int vector); | 9 | void __send_IPI_shortcut(unsigned int shortcut, int vector); |
9 | 10 | ||
10 | extern int no_broadcast; | 11 | extern int no_broadcast; |
@@ -12,28 +13,27 @@ extern int no_broadcast; | |||
12 | #ifdef CONFIG_X86_64 | 13 | #ifdef CONFIG_X86_64 |
13 | #include <asm/genapic.h> | 14 | #include <asm/genapic.h> |
14 | #define send_IPI_mask (genapic->send_IPI_mask) | 15 | #define send_IPI_mask (genapic->send_IPI_mask) |
16 | #define send_IPI_mask_allbutself (genapic->send_IPI_mask_allbutself) | ||
15 | #else | 17 | #else |
16 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 18 | static inline void send_IPI_mask(const struct cpumask *mask, int vector) |
17 | { | 19 | { |
18 | send_IPI_mask_bitmask(mask, vector); | 20 | send_IPI_mask_bitmask(mask, vector); |
19 | } | 21 | } |
22 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
20 | #endif | 23 | #endif |
21 | 24 | ||
22 | static inline void __local_send_IPI_allbutself(int vector) | 25 | static inline void __local_send_IPI_allbutself(int vector) |
23 | { | 26 | { |
24 | if (no_broadcast || vector == NMI_VECTOR) { | 27 | if (no_broadcast || vector == NMI_VECTOR) |
25 | cpumask_t mask = cpu_online_map; | 28 | send_IPI_mask_allbutself(cpu_online_mask, vector); |
26 | 29 | else | |
27 | cpu_clear(smp_processor_id(), mask); | ||
28 | send_IPI_mask(mask, vector); | ||
29 | } else | ||
30 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); | 30 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector); |
31 | } | 31 | } |
32 | 32 | ||
33 | static inline void __local_send_IPI_all(int vector) | 33 | static inline void __local_send_IPI_all(int vector) |
34 | { | 34 | { |
35 | if (no_broadcast || vector == NMI_VECTOR) | 35 | if (no_broadcast || vector == NMI_VECTOR) |
36 | send_IPI_mask(cpu_online_map, vector); | 36 | send_IPI_mask(cpu_online_mask, vector); |
37 | else | 37 | else |
38 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector); | 38 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector); |
39 | } | 39 | } |
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h index 9d80db91e992..ceb013660146 100644 --- a/arch/x86/include/asm/mach-default/mach_wakecpu.h +++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h | |||
@@ -1,17 +1,8 @@ | |||
1 | #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H | 1 | #ifndef _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H |
2 | #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H | 2 | #define _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H |
3 | 3 | ||
4 | /* | 4 | #define TRAMPOLINE_PHYS_LOW (0x467) |
5 | * This file copes with machines that wakeup secondary CPUs by the | 5 | #define TRAMPOLINE_PHYS_HIGH (0x469) |
6 | * INIT, INIT, STARTUP sequence. | ||
7 | */ | ||
8 | |||
9 | #define WAKE_SECONDARY_VIA_INIT | ||
10 | |||
11 | #define TRAMPOLINE_LOW phys_to_virt(0x467) | ||
12 | #define TRAMPOLINE_HIGH phys_to_virt(0x469) | ||
13 | |||
14 | #define boot_cpu_apicid boot_cpu_physical_apicid | ||
15 | 6 | ||
16 | static inline void wait_for_init_deassert(atomic_t *deassert) | 7 | static inline void wait_for_init_deassert(atomic_t *deassert) |
17 | { | 8 | { |
@@ -33,9 +24,12 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) | |||
33 | { | 24 | { |
34 | } | 25 | } |
35 | 26 | ||
36 | #define inquire_remote_apic(apicid) do { \ | 27 | extern void __inquire_remote_apic(int apicid); |
37 | if (apic_verbosity >= APIC_DEBUG) \ | 28 | |
38 | __inquire_remote_apic(apicid); \ | 29 | static inline void inquire_remote_apic(int apicid) |
39 | } while (0) | 30 | { |
31 | if (apic_verbosity >= APIC_DEBUG) | ||
32 | __inquire_remote_apic(apicid); | ||
33 | } | ||
40 | 34 | ||
41 | #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ | 35 | #endif /* _ASM_X86_MACH_DEFAULT_MACH_WAKECPU_H */ |
diff --git a/arch/x86/include/asm/mach-default/smpboot_hooks.h b/arch/x86/include/asm/mach-default/smpboot_hooks.h index dbab36d64d48..23bf52103b89 100644 --- a/arch/x86/include/asm/mach-default/smpboot_hooks.h +++ b/arch/x86/include/asm/mach-default/smpboot_hooks.h | |||
@@ -13,9 +13,11 @@ static inline void smpboot_setup_warm_reset_vector(unsigned long start_eip) | |||
13 | CMOS_WRITE(0xa, 0xf); | 13 | CMOS_WRITE(0xa, 0xf); |
14 | local_flush_tlb(); | 14 | local_flush_tlb(); |
15 | pr_debug("1.\n"); | 15 | pr_debug("1.\n"); |
16 | *((volatile unsigned short *) TRAMPOLINE_HIGH) = start_eip >> 4; | 16 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = |
17 | start_eip >> 4; | ||
17 | pr_debug("2.\n"); | 18 | pr_debug("2.\n"); |
18 | *((volatile unsigned short *) TRAMPOLINE_LOW) = start_eip & 0xf; | 19 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = |
20 | start_eip & 0xf; | ||
19 | pr_debug("3.\n"); | 21 | pr_debug("3.\n"); |
20 | } | 22 | } |
21 | 23 | ||
@@ -32,7 +34,7 @@ static inline void smpboot_restore_warm_reset_vector(void) | |||
32 | */ | 34 | */ |
33 | CMOS_WRITE(0, 0xf); | 35 | CMOS_WRITE(0, 0xf); |
34 | 36 | ||
35 | *((volatile long *) phys_to_virt(0x467)) = 0; | 37 | *((volatile long *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; |
36 | } | 38 | } |
37 | 39 | ||
38 | static inline void __init smpboot_setup_io_apic(void) | 40 | static inline void __init smpboot_setup_io_apic(void) |
diff --git a/arch/x86/include/asm/mach-generic/mach_apic.h b/arch/x86/include/asm/mach-generic/mach_apic.h index 5180bd7478fb..48553e958ad5 100644 --- a/arch/x86/include/asm/mach-generic/mach_apic.h +++ b/arch/x86/include/asm/mach-generic/mach_apic.h | |||
@@ -24,9 +24,11 @@ | |||
24 | #define check_phys_apicid_present (genapic->check_phys_apicid_present) | 24 | #define check_phys_apicid_present (genapic->check_phys_apicid_present) |
25 | #define check_apicid_used (genapic->check_apicid_used) | 25 | #define check_apicid_used (genapic->check_apicid_used) |
26 | #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) | 26 | #define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) |
27 | #define cpu_mask_to_apicid_and (genapic->cpu_mask_to_apicid_and) | ||
27 | #define vector_allocation_domain (genapic->vector_allocation_domain) | 28 | #define vector_allocation_domain (genapic->vector_allocation_domain) |
28 | #define enable_apic_mode (genapic->enable_apic_mode) | 29 | #define enable_apic_mode (genapic->enable_apic_mode) |
29 | #define phys_pkg_id (genapic->phys_pkg_id) | 30 | #define phys_pkg_id (genapic->phys_pkg_id) |
31 | #define wakeup_secondary_cpu (genapic->wakeup_cpu) | ||
30 | 32 | ||
31 | extern void generic_bigsmp_probe(void); | 33 | extern void generic_bigsmp_probe(void); |
32 | 34 | ||
diff --git a/arch/x86/include/asm/mach-generic/mach_wakecpu.h b/arch/x86/include/asm/mach-generic/mach_wakecpu.h new file mode 100644 index 000000000000..1ab16b168c8a --- /dev/null +++ b/arch/x86/include/asm/mach-generic/mach_wakecpu.h | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifndef _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H | ||
2 | #define _ASM_X86_MACH_GENERIC_MACH_WAKECPU_H | ||
3 | |||
4 | #define TRAMPOLINE_PHYS_LOW (genapic->trampoline_phys_low) | ||
5 | #define TRAMPOLINE_PHYS_HIGH (genapic->trampoline_phys_high) | ||
6 | #define wait_for_init_deassert (genapic->wait_for_init_deassert) | ||
7 | #define smp_callin_clear_local_apic (genapic->smp_callin_clear_local_apic) | ||
8 | #define store_NMI_vector (genapic->store_NMI_vector) | ||
9 | #define restore_NMI_vector (genapic->restore_NMI_vector) | ||
10 | #define inquire_remote_apic (genapic->inquire_remote_apic) | ||
11 | |||
12 | #endif /* _ASM_X86_MACH_GENERIC_MACH_APIC_H */ | ||
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h index 8e10015781fb..7e98ce1d2c0e 100644 --- a/arch/x86/include/asm/mmu_context_32.h +++ b/arch/x86/include/asm/mmu_context_32.h | |||
@@ -4,9 +4,8 @@ | |||
4 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) | 4 | static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) |
5 | { | 5 | { |
6 | #ifdef CONFIG_SMP | 6 | #ifdef CONFIG_SMP |
7 | unsigned cpu = smp_processor_id(); | 7 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) |
8 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | 8 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); |
9 | per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_LAZY; | ||
10 | #endif | 9 | #endif |
11 | } | 10 | } |
12 | 11 | ||
@@ -20,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev, | |||
20 | /* stop flush ipis for the previous mm */ | 19 | /* stop flush ipis for the previous mm */ |
21 | cpu_clear(cpu, prev->cpu_vm_mask); | 20 | cpu_clear(cpu, prev->cpu_vm_mask); |
22 | #ifdef CONFIG_SMP | 21 | #ifdef CONFIG_SMP |
23 | per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; | 22 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); |
24 | per_cpu(cpu_tlbstate, cpu).active_mm = next; | 23 | x86_write_percpu(cpu_tlbstate.active_mm, next); |
25 | #endif | 24 | #endif |
26 | cpu_set(cpu, next->cpu_vm_mask); | 25 | cpu_set(cpu, next->cpu_vm_mask); |
27 | 26 | ||
@@ -36,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev, | |||
36 | } | 35 | } |
37 | #ifdef CONFIG_SMP | 36 | #ifdef CONFIG_SMP |
38 | else { | 37 | else { |
39 | per_cpu(cpu_tlbstate, cpu).state = TLBSTATE_OK; | 38 | x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); |
40 | BUG_ON(per_cpu(cpu_tlbstate, cpu).active_mm != next); | 39 | BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); |
41 | 40 | ||
42 | if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { | 41 | if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { |
43 | /* We were in lazy tlb mode and leave_mm disabled | 42 | /* We were in lazy tlb mode and leave_mm disabled |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 91885c28f66b..62d14ce3cd00 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -6,13 +6,13 @@ | |||
6 | #include <asm/mpspec_def.h> | 6 | #include <asm/mpspec_def.h> |
7 | 7 | ||
8 | extern int apic_version[MAX_APICS]; | 8 | extern int apic_version[MAX_APICS]; |
9 | extern int pic_mode; | ||
9 | 10 | ||
10 | #ifdef CONFIG_X86_32 | 11 | #ifdef CONFIG_X86_32 |
11 | #include <mach_mpspec.h> | 12 | #include <mach_mpspec.h> |
12 | 13 | ||
13 | extern unsigned int def_to_bigsmp; | 14 | extern unsigned int def_to_bigsmp; |
14 | extern u8 apicid_2_node[]; | 15 | extern u8 apicid_2_node[]; |
15 | extern int pic_mode; | ||
16 | 16 | ||
17 | #ifdef CONFIG_X86_NUMAQ | 17 | #ifdef CONFIG_X86_NUMAQ |
18 | extern int mp_bus_id_to_node[MAX_MP_BUSSES]; | 18 | extern int mp_bus_id_to_node[MAX_MP_BUSSES]; |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index e38859d577a1..cb58643947b9 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -85,7 +85,9 @@ | |||
85 | /* AMD64 MSRs. Not complete. See the architecture manual for a more | 85 | /* AMD64 MSRs. Not complete. See the architecture manual for a more |
86 | complete list. */ | 86 | complete list. */ |
87 | 87 | ||
88 | #define MSR_AMD64_PATCH_LEVEL 0x0000008b | ||
88 | #define MSR_AMD64_NB_CFG 0xc001001f | 89 | #define MSR_AMD64_NB_CFG 0xc001001f |
90 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | ||
89 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 | 91 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 |
90 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 | 92 | #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 |
91 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 | 93 | #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index c2a812ebde89..638bf6241807 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -22,10 +22,10 @@ static inline unsigned long long native_read_tscp(unsigned int *aux) | |||
22 | } | 22 | } |
23 | 23 | ||
24 | /* | 24 | /* |
25 | * i386 calling convention returns 64-bit value in edx:eax, while | 25 | * both i386 and x86_64 returns 64-bit value in edx:eax, but gcc's "A" |
26 | * x86_64 returns at rax. Also, the "A" constraint does not really | 26 | * constraint has different meanings. For i386, "A" means exactly |
27 | * mean rdx:rax in x86_64, so we need specialized behaviour for each | 27 | * edx:eax, while for x86_64 it doesn't mean rdx:rax or edx:eax. Instead, |
28 | * architecture | 28 | * it means rax *or* rdx. |
29 | */ | 29 | */ |
30 | #ifdef CONFIG_X86_64 | 30 | #ifdef CONFIG_X86_64 |
31 | #define DECLARE_ARGS(val, low, high) unsigned low, high | 31 | #define DECLARE_ARGS(val, low, high) unsigned low, high |
@@ -85,7 +85,8 @@ static inline void native_write_msr(unsigned int msr, | |||
85 | asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); | 85 | asm volatile("wrmsr" : : "c" (msr), "a"(low), "d" (high) : "memory"); |
86 | } | 86 | } |
87 | 87 | ||
88 | static inline int native_write_msr_safe(unsigned int msr, | 88 | /* Can be uninlined because referenced by paravirt */ |
89 | notrace static inline int native_write_msr_safe(unsigned int msr, | ||
89 | unsigned low, unsigned high) | 90 | unsigned low, unsigned high) |
90 | { | 91 | { |
91 | int err; | 92 | int err; |
@@ -181,10 +182,10 @@ static inline int rdmsrl_amd_safe(unsigned msr, unsigned long long *p) | |||
181 | } | 182 | } |
182 | 183 | ||
183 | #define rdtscl(low) \ | 184 | #define rdtscl(low) \ |
184 | ((low) = (u32)native_read_tsc()) | 185 | ((low) = (u32)__native_read_tsc()) |
185 | 186 | ||
186 | #define rdtscll(val) \ | 187 | #define rdtscll(val) \ |
187 | ((val) = native_read_tsc()) | 188 | ((val) = __native_read_tsc()) |
188 | 189 | ||
189 | #define rdpmc(counter, low, high) \ | 190 | #define rdpmc(counter, low, high) \ |
190 | do { \ | 191 | do { \ |
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h index 7c1e4258b31e..cb988aab716d 100644 --- a/arch/x86/include/asm/mtrr.h +++ b/arch/x86/include/asm/mtrr.h | |||
@@ -57,6 +57,31 @@ struct mtrr_gentry { | |||
57 | }; | 57 | }; |
58 | #endif /* !__i386__ */ | 58 | #endif /* !__i386__ */ |
59 | 59 | ||
60 | struct mtrr_var_range { | ||
61 | u32 base_lo; | ||
62 | u32 base_hi; | ||
63 | u32 mask_lo; | ||
64 | u32 mask_hi; | ||
65 | }; | ||
66 | |||
67 | /* In the Intel processor's MTRR interface, the MTRR type is always held in | ||
68 | an 8 bit field: */ | ||
69 | typedef u8 mtrr_type; | ||
70 | |||
71 | #define MTRR_NUM_FIXED_RANGES 88 | ||
72 | #define MTRR_MAX_VAR_RANGES 256 | ||
73 | |||
74 | struct mtrr_state_type { | ||
75 | struct mtrr_var_range var_ranges[MTRR_MAX_VAR_RANGES]; | ||
76 | mtrr_type fixed_ranges[MTRR_NUM_FIXED_RANGES]; | ||
77 | unsigned char enabled; | ||
78 | unsigned char have_fixed; | ||
79 | mtrr_type def_type; | ||
80 | }; | ||
81 | |||
82 | #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) | ||
83 | #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) | ||
84 | |||
60 | /* These are the various ioctls */ | 85 | /* These are the various ioctls */ |
61 | #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) | 86 | #define MTRRIOC_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry) |
62 | #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) | 87 | #define MTRRIOC_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry) |
diff --git a/arch/x86/include/asm/numaq/apic.h b/arch/x86/include/asm/numaq/apic.h index 0bf2a06b7a4e..bf37bc49bd8e 100644 --- a/arch/x86/include/asm/numaq/apic.h +++ b/arch/x86/include/asm/numaq/apic.h | |||
@@ -7,9 +7,9 @@ | |||
7 | 7 | ||
8 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 8 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) |
9 | 9 | ||
10 | static inline cpumask_t target_cpus(void) | 10 | static inline const cpumask_t *target_cpus(void) |
11 | { | 11 | { |
12 | return CPU_MASK_ALL; | 12 | return &CPU_MASK_ALL; |
13 | } | 13 | } |
14 | 14 | ||
15 | #define NO_BALANCE_IRQ (1) | 15 | #define NO_BALANCE_IRQ (1) |
@@ -63,8 +63,8 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_map) | |||
63 | extern u8 cpu_2_logical_apicid[]; | 63 | extern u8 cpu_2_logical_apicid[]; |
64 | static inline int cpu_to_logical_apicid(int cpu) | 64 | static inline int cpu_to_logical_apicid(int cpu) |
65 | { | 65 | { |
66 | if (cpu >= NR_CPUS) | 66 | if (cpu >= nr_cpu_ids) |
67 | return BAD_APICID; | 67 | return BAD_APICID; |
68 | return (int)cpu_2_logical_apicid[cpu]; | 68 | return (int)cpu_2_logical_apicid[cpu]; |
69 | } | 69 | } |
70 | 70 | ||
@@ -122,7 +122,13 @@ static inline void enable_apic_mode(void) | |||
122 | * We use physical apicids here, not logical, so just return the default | 122 | * We use physical apicids here, not logical, so just return the default |
123 | * physical broadcast to stop people from breaking us | 123 | * physical broadcast to stop people from breaking us |
124 | */ | 124 | */ |
125 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 125 | static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) |
126 | { | ||
127 | return (int) 0xF; | ||
128 | } | ||
129 | |||
130 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
131 | const struct cpumask *andmask) | ||
126 | { | 132 | { |
127 | return (int) 0xF; | 133 | return (int) 0xF; |
128 | } | 134 | } |
diff --git a/arch/x86/include/asm/numaq/ipi.h b/arch/x86/include/asm/numaq/ipi.h index 935588d286cf..a8374c652778 100644 --- a/arch/x86/include/asm/numaq/ipi.h +++ b/arch/x86/include/asm/numaq/ipi.h | |||
@@ -1,25 +1,22 @@ | |||
1 | #ifndef __ASM_NUMAQ_IPI_H | 1 | #ifndef __ASM_NUMAQ_IPI_H |
2 | #define __ASM_NUMAQ_IPI_H | 2 | #define __ASM_NUMAQ_IPI_H |
3 | 3 | ||
4 | void send_IPI_mask_sequence(cpumask_t, int vector); | 4 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector); |
5 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector); | ||
5 | 6 | ||
6 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 7 | static inline void send_IPI_mask(const struct cpumask *mask, int vector) |
7 | { | 8 | { |
8 | send_IPI_mask_sequence(mask, vector); | 9 | send_IPI_mask_sequence(mask, vector); |
9 | } | 10 | } |
10 | 11 | ||
11 | static inline void send_IPI_allbutself(int vector) | 12 | static inline void send_IPI_allbutself(int vector) |
12 | { | 13 | { |
13 | cpumask_t mask = cpu_online_map; | 14 | send_IPI_mask_allbutself(cpu_online_mask, vector); |
14 | cpu_clear(smp_processor_id(), mask); | ||
15 | |||
16 | if (!cpus_empty(mask)) | ||
17 | send_IPI_mask(mask, vector); | ||
18 | } | 15 | } |
19 | 16 | ||
20 | static inline void send_IPI_all(int vector) | 17 | static inline void send_IPI_all(int vector) |
21 | { | 18 | { |
22 | send_IPI_mask(cpu_online_map, vector); | 19 | send_IPI_mask(cpu_online_mask, vector); |
23 | } | 20 | } |
24 | 21 | ||
25 | #endif /* __ASM_NUMAQ_IPI_H */ | 22 | #endif /* __ASM_NUMAQ_IPI_H */ |
diff --git a/arch/x86/include/asm/numaq/wakecpu.h b/arch/x86/include/asm/numaq/wakecpu.h index c577bda5b1c5..6f499df8eddb 100644 --- a/arch/x86/include/asm/numaq/wakecpu.h +++ b/arch/x86/include/asm/numaq/wakecpu.h | |||
@@ -3,12 +3,8 @@ | |||
3 | 3 | ||
4 | /* This file copes with machines that wakeup secondary CPUs by NMIs */ | 4 | /* This file copes with machines that wakeup secondary CPUs by NMIs */ |
5 | 5 | ||
6 | #define WAKE_SECONDARY_VIA_NMI | 6 | #define TRAMPOLINE_PHYS_LOW (0x8) |
7 | 7 | #define TRAMPOLINE_PHYS_HIGH (0xa) | |
8 | #define TRAMPOLINE_LOW phys_to_virt(0x8) | ||
9 | #define TRAMPOLINE_HIGH phys_to_virt(0xa) | ||
10 | |||
11 | #define boot_cpu_apicid boot_cpu_logical_apicid | ||
12 | 8 | ||
13 | /* We don't do anything here because we use NMI's to boot instead */ | 9 | /* We don't do anything here because we use NMI's to boot instead */ |
14 | static inline void wait_for_init_deassert(atomic_t *deassert) | 10 | static inline void wait_for_init_deassert(atomic_t *deassert) |
@@ -27,17 +23,23 @@ static inline void smp_callin_clear_local_apic(void) | |||
27 | static inline void store_NMI_vector(unsigned short *high, unsigned short *low) | 23 | static inline void store_NMI_vector(unsigned short *high, unsigned short *low) |
28 | { | 24 | { |
29 | printk("Storing NMI vector\n"); | 25 | printk("Storing NMI vector\n"); |
30 | *high = *((volatile unsigned short *) TRAMPOLINE_HIGH); | 26 | *high = |
31 | *low = *((volatile unsigned short *) TRAMPOLINE_LOW); | 27 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)); |
28 | *low = | ||
29 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)); | ||
32 | } | 30 | } |
33 | 31 | ||
34 | static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) | 32 | static inline void restore_NMI_vector(unsigned short *high, unsigned short *low) |
35 | { | 33 | { |
36 | printk("Restoring NMI vector\n"); | 34 | printk("Restoring NMI vector\n"); |
37 | *((volatile unsigned short *) TRAMPOLINE_HIGH) = *high; | 35 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_HIGH)) = |
38 | *((volatile unsigned short *) TRAMPOLINE_LOW) = *low; | 36 | *high; |
37 | *((volatile unsigned short *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = | ||
38 | *low; | ||
39 | } | 39 | } |
40 | 40 | ||
41 | #define inquire_remote_apic(apicid) {} | 41 | static inline void inquire_remote_apic(int apicid) |
42 | { | ||
43 | } | ||
42 | 44 | ||
43 | #endif /* __ASM_NUMAQ_WAKECPU_H */ | 45 | #endif /* __ASM_NUMAQ_WAKECPU_H */ |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 875b38edf193..a977de23cb4d 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -19,6 +19,8 @@ struct pci_sysdata { | |||
19 | }; | 19 | }; |
20 | 20 | ||
21 | extern int pci_routeirq; | 21 | extern int pci_routeirq; |
22 | extern int noioapicquirk; | ||
23 | extern int noioapicreroute; | ||
22 | 24 | ||
23 | /* scan a bus after allocating a pci_sysdata for it */ | 25 | /* scan a bus after allocating a pci_sysdata for it */ |
24 | extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, | 26 | extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, |
@@ -82,6 +84,8 @@ static inline void pci_dma_burst_advice(struct pci_dev *pdev, | |||
82 | static inline void early_quirks(void) { } | 84 | static inline void early_quirks(void) { } |
83 | #endif | 85 | #endif |
84 | 86 | ||
87 | extern void pci_iommu_alloc(void); | ||
88 | |||
85 | #endif /* __KERNEL__ */ | 89 | #endif /* __KERNEL__ */ |
86 | 90 | ||
87 | #ifdef CONFIG_X86_32 | 91 | #ifdef CONFIG_X86_32 |
@@ -98,9 +102,9 @@ static inline void early_quirks(void) { } | |||
98 | 102 | ||
99 | #ifdef CONFIG_NUMA | 103 | #ifdef CONFIG_NUMA |
100 | /* Returns the node based on pci bus */ | 104 | /* Returns the node based on pci bus */ |
101 | static inline int __pcibus_to_node(struct pci_bus *bus) | 105 | static inline int __pcibus_to_node(const struct pci_bus *bus) |
102 | { | 106 | { |
103 | struct pci_sysdata *sd = bus->sysdata; | 107 | const struct pci_sysdata *sd = bus->sysdata; |
104 | 108 | ||
105 | return sd->node; | 109 | return sd->node; |
106 | } | 110 | } |
@@ -109,6 +113,12 @@ static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus) | |||
109 | { | 113 | { |
110 | return node_to_cpumask(__pcibus_to_node(bus)); | 114 | return node_to_cpumask(__pcibus_to_node(bus)); |
111 | } | 115 | } |
116 | |||
117 | static inline const struct cpumask * | ||
118 | cpumask_of_pcibus(const struct pci_bus *bus) | ||
119 | { | ||
120 | return cpumask_of_node(__pcibus_to_node(bus)); | ||
121 | } | ||
112 | #endif | 122 | #endif |
113 | 123 | ||
114 | #endif /* _ASM_X86_PCI_H */ | 124 | #endif /* _ASM_X86_PCI_H */ |
diff --git a/arch/x86/include/asm/pci_64.h b/arch/x86/include/asm/pci_64.h index d02d936840a3..4da207982777 100644 --- a/arch/x86/include/asm/pci_64.h +++ b/arch/x86/include/asm/pci_64.h | |||
@@ -23,7 +23,6 @@ extern int (*pci_config_write)(int seg, int bus, int dev, int fn, | |||
23 | int reg, int len, u32 value); | 23 | int reg, int len, u32 value); |
24 | 24 | ||
25 | extern void dma32_reserve_bootmem(void); | 25 | extern void dma32_reserve_bootmem(void); |
26 | extern void pci_iommu_alloc(void); | ||
27 | 26 | ||
28 | /* The PCI address space does equal the physical memory | 27 | /* The PCI address space does equal the physical memory |
29 | * address space. The networking and block device layers use | 28 | * address space. The networking and block device layers use |
diff --git a/arch/x86/pci/pci.h b/arch/x86/include/asm/pci_x86.h index 15b9cf6be729..e60fd3e14bdf 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -57,7 +57,8 @@ extern struct pci_ops pci_root_ops; | |||
57 | struct irq_info { | 57 | struct irq_info { |
58 | u8 bus, devfn; /* Bus, device and function */ | 58 | u8 bus, devfn; /* Bus, device and function */ |
59 | struct { | 59 | struct { |
60 | u8 link; /* IRQ line ID, chipset dependent, 0=not routed */ | 60 | u8 link; /* IRQ line ID, chipset dependent, |
61 | 0 = not routed */ | ||
61 | u16 bitmap; /* Available IRQs */ | 62 | u16 bitmap; /* Available IRQs */ |
62 | } __attribute__((packed)) irq[4]; | 63 | } __attribute__((packed)) irq[4]; |
63 | u8 slot; /* Slot number, 0=onboard */ | 64 | u8 slot; /* Slot number, 0=onboard */ |
@@ -69,11 +70,13 @@ struct irq_routing_table { | |||
69 | u16 version; /* PIRQ_VERSION */ | 70 | u16 version; /* PIRQ_VERSION */ |
70 | u16 size; /* Table size in bytes */ | 71 | u16 size; /* Table size in bytes */ |
71 | u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ | 72 | u8 rtr_bus, rtr_devfn; /* Where the interrupt router lies */ |
72 | u16 exclusive_irqs; /* IRQs devoted exclusively to PCI usage */ | 73 | u16 exclusive_irqs; /* IRQs devoted exclusively to |
73 | u16 rtr_vendor, rtr_device; /* Vendor and device ID of interrupt router */ | 74 | PCI usage */ |
75 | u16 rtr_vendor, rtr_device; /* Vendor and device ID of | ||
76 | interrupt router */ | ||
74 | u32 miniport_data; /* Crap */ | 77 | u32 miniport_data; /* Crap */ |
75 | u8 rfu[11]; | 78 | u8 rfu[11]; |
76 | u8 checksum; /* Modulo 256 checksum must give zero */ | 79 | u8 checksum; /* Modulo 256 checksum must give 0 */ |
77 | struct irq_info slots[0]; | 80 | struct irq_info slots[0]; |
78 | } __attribute__((packed)); | 81 | } __attribute__((packed)); |
79 | 82 | ||
@@ -96,6 +99,7 @@ extern struct pci_raw_ops *raw_pci_ops; | |||
96 | extern struct pci_raw_ops *raw_pci_ext_ops; | 99 | extern struct pci_raw_ops *raw_pci_ext_ops; |
97 | 100 | ||
98 | extern struct pci_raw_ops pci_direct_conf1; | 101 | extern struct pci_raw_ops pci_direct_conf1; |
102 | extern bool port_cf9_safe; | ||
99 | 103 | ||
100 | /* arch_initcall level */ | 104 | /* arch_initcall level */ |
101 | extern int pci_direct_probe(void); | 105 | extern int pci_direct_probe(void); |
@@ -147,15 +151,15 @@ static inline unsigned int mmio_config_readl(void __iomem *pos) | |||
147 | 151 | ||
148 | static inline void mmio_config_writeb(void __iomem *pos, u8 val) | 152 | static inline void mmio_config_writeb(void __iomem *pos, u8 val) |
149 | { | 153 | { |
150 | asm volatile("movb %%al,(%1)" :: "a" (val), "r" (pos) : "memory"); | 154 | asm volatile("movb %%al,(%1)" : : "a" (val), "r" (pos) : "memory"); |
151 | } | 155 | } |
152 | 156 | ||
153 | static inline void mmio_config_writew(void __iomem *pos, u16 val) | 157 | static inline void mmio_config_writew(void __iomem *pos, u16 val) |
154 | { | 158 | { |
155 | asm volatile("movw %%ax,(%1)" :: "a" (val), "r" (pos) : "memory"); | 159 | asm volatile("movw %%ax,(%1)" : : "a" (val), "r" (pos) : "memory"); |
156 | } | 160 | } |
157 | 161 | ||
158 | static inline void mmio_config_writel(void __iomem *pos, u32 val) | 162 | static inline void mmio_config_writel(void __iomem *pos, u32 val) |
159 | { | 163 | { |
160 | asm volatile("movl %%eax,(%1)" :: "a" (val), "r" (pos) : "memory"); | 164 | asm volatile("movl %%eax,(%1)" : : "a" (val), "r" (pos) : "memory"); |
161 | } | 165 | } |
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index b17edfd23628..e0d199fe1d83 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -56,23 +56,55 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) | |||
56 | #define pte_none(x) (!(x).pte_low) | 56 | #define pte_none(x) (!(x).pte_low) |
57 | 57 | ||
58 | /* | 58 | /* |
59 | * Bits 0, 6 and 7 are taken, split up the 29 bits of offset | 59 | * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, |
60 | * into this range: | 60 | * split up the 29 bits of offset into this range: |
61 | */ | 61 | */ |
62 | #define PTE_FILE_MAX_BITS 29 | 62 | #define PTE_FILE_MAX_BITS 29 |
63 | #define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) | ||
64 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | ||
65 | #define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1) | ||
66 | #define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1) | ||
67 | #else | ||
68 | #define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1) | ||
69 | #define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1) | ||
70 | #endif | ||
71 | #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) | ||
72 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) | ||
63 | 73 | ||
64 | #define pte_to_pgoff(pte) \ | 74 | #define pte_to_pgoff(pte) \ |
65 | ((((pte).pte_low >> 1) & 0x1f) + (((pte).pte_low >> 8) << 5)) | 75 | ((((pte).pte_low >> PTE_FILE_SHIFT1) \ |
76 | & ((1U << PTE_FILE_BITS1) - 1)) \ | ||
77 | + ((((pte).pte_low >> PTE_FILE_SHIFT2) \ | ||
78 | & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \ | ||
79 | + (((pte).pte_low >> PTE_FILE_SHIFT3) \ | ||
80 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2))) | ||
66 | 81 | ||
67 | #define pgoff_to_pte(off) \ | 82 | #define pgoff_to_pte(off) \ |
68 | ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + \ | 83 | ((pte_t) { .pte_low = \ |
69 | (((off) >> 5) << 8) + _PAGE_FILE }) | 84 | (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ |
85 | + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \ | ||
86 | << PTE_FILE_SHIFT2) \ | ||
87 | + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | ||
88 | << PTE_FILE_SHIFT3) \ | ||
89 | + _PAGE_FILE }) | ||
70 | 90 | ||
71 | /* Encode and de-code a swap entry */ | 91 | /* Encode and de-code a swap entry */ |
72 | #define __swp_type(x) (((x).val >> 1) & 0x1f) | 92 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
73 | #define __swp_offset(x) ((x).val >> 8) | 93 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
74 | #define __swp_entry(type, offset) \ | 94 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) |
75 | ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) | 95 | #else |
96 | #define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) | ||
97 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) | ||
98 | #endif | ||
99 | |||
100 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) | ||
101 | |||
102 | #define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ | ||
103 | & ((1U << SWP_TYPE_BITS) - 1)) | ||
104 | #define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) | ||
105 | #define __swp_entry(type, offset) ((swp_entry_t) { \ | ||
106 | ((type) << (_PAGE_BIT_PRESENT + 1)) \ | ||
107 | | ((offset) << SWP_OFFSET_SHIFT) }) | ||
76 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) | 108 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) |
77 | #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) | 109 | #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) |
78 | 110 | ||
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 52597aeadfff..447da43cddb3 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h | |||
@@ -166,6 +166,7 @@ static inline int pte_none(pte_t pte) | |||
166 | #define PTE_FILE_MAX_BITS 32 | 166 | #define PTE_FILE_MAX_BITS 32 |
167 | 167 | ||
168 | /* Encode and de-code a swap entry */ | 168 | /* Encode and de-code a swap entry */ |
169 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) | ||
169 | #define __swp_type(x) (((x).val) & 0x1f) | 170 | #define __swp_type(x) (((x).val) & 0x1f) |
170 | #define __swp_offset(x) ((x).val >> 5) | 171 | #define __swp_offset(x) ((x).val >> 5) |
171 | #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) | 172 | #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) |
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c012f3b11671..83e69f4a37f0 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -10,7 +10,6 @@ | |||
10 | #define _PAGE_BIT_PCD 4 /* page cache disabled */ | 10 | #define _PAGE_BIT_PCD 4 /* page cache disabled */ |
11 | #define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ | 11 | #define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ |
12 | #define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ | 12 | #define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ |
13 | #define _PAGE_BIT_FILE 6 | ||
14 | #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ | 13 | #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ |
15 | #define _PAGE_BIT_PAT 7 /* on 4KB pages */ | 14 | #define _PAGE_BIT_PAT 7 /* on 4KB pages */ |
16 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | 15 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ |
@@ -22,6 +21,12 @@ | |||
22 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 | 21 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 |
23 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ | 22 | #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ |
24 | 23 | ||
24 | /* If _PAGE_BIT_PRESENT is clear, we use these: */ | ||
25 | /* - if the user mapped it with PROT_NONE; pte_present gives true */ | ||
26 | #define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL | ||
27 | /* - set: nonlinear file mapping, saved PTE; unset:swap */ | ||
28 | #define _PAGE_BIT_FILE _PAGE_BIT_DIRTY | ||
29 | |||
25 | #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) | 30 | #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) |
26 | #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) | 31 | #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) |
27 | #define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) | 32 | #define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) |
@@ -46,11 +51,8 @@ | |||
46 | #define _PAGE_NX (_AT(pteval_t, 0)) | 51 | #define _PAGE_NX (_AT(pteval_t, 0)) |
47 | #endif | 52 | #endif |
48 | 53 | ||
49 | /* If _PAGE_PRESENT is clear, we use these: */ | 54 | #define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) |
50 | #define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, | 55 | #define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) |
51 | * saved PTE; unset:swap */ | ||
52 | #define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; | ||
53 | pte_present gives true */ | ||
54 | 56 | ||
55 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ | 57 | #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ |
56 | _PAGE_ACCESSED | _PAGE_DIRTY) | 58 | _PAGE_ACCESSED | _PAGE_DIRTY) |
@@ -158,8 +160,19 @@ | |||
158 | #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ | 160 | #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ |
159 | #endif | 161 | #endif |
160 | 162 | ||
163 | /* | ||
164 | * Macro to mark a page protection value as UC- | ||
165 | */ | ||
166 | #define pgprot_noncached(prot) \ | ||
167 | ((boot_cpu_data.x86 > 3) \ | ||
168 | ? (__pgprot(pgprot_val(prot) | _PAGE_CACHE_UC_MINUS)) \ | ||
169 | : (prot)) | ||
170 | |||
161 | #ifndef __ASSEMBLY__ | 171 | #ifndef __ASSEMBLY__ |
162 | 172 | ||
173 | #define pgprot_writecombine pgprot_writecombine | ||
174 | extern pgprot_t pgprot_writecombine(pgprot_t prot); | ||
175 | |||
163 | /* | 176 | /* |
164 | * ZERO_PAGE is a global shared page that is always zero: used | 177 | * ZERO_PAGE is a global shared page that is always zero: used |
165 | * for zero-mapped memory areas etc.. | 178 | * for zero-mapped memory areas etc.. |
@@ -329,6 +342,9 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) | |||
329 | #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) | 342 | #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) |
330 | 343 | ||
331 | #ifndef __ASSEMBLY__ | 344 | #ifndef __ASSEMBLY__ |
345 | /* Indicate that x86 has its own track and untrack pfn vma functions */ | ||
346 | #define __HAVE_PFNMAP_TRACKING | ||
347 | |||
332 | #define __HAVE_PHYS_MEM_ACCESS_PROT | 348 | #define __HAVE_PHYS_MEM_ACCESS_PROT |
333 | struct file; | 349 | struct file; |
334 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 350 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index f9d5889b336b..72b020deb46b 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -101,15 +101,6 @@ extern unsigned long pg0[]; | |||
101 | #endif | 101 | #endif |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * Macro to mark a page protection value as "uncacheable". | ||
105 | * On processors which do not support it, this is a no-op. | ||
106 | */ | ||
107 | #define pgprot_noncached(prot) \ | ||
108 | ((boot_cpu_data.x86 > 3) \ | ||
109 | ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) \ | ||
110 | : (prot)) | ||
111 | |||
112 | /* | ||
113 | * Conversion functions: convert a page and protection to a page entry, | 104 | * Conversion functions: convert a page and protection to a page entry, |
114 | * and a page entry and page directory to the page they refer to. | 105 | * and a page entry and page directory to the page they refer to. |
115 | */ | 106 | */ |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 545a0e042bb2..ba09289accaa 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -146,7 +146,7 @@ static inline void native_pgd_clear(pgd_t *pgd) | |||
146 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | 146 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) |
147 | 147 | ||
148 | 148 | ||
149 | #define MAXMEM _AC(0x00003fffffffffff, UL) | 149 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) |
150 | #define VMALLOC_START _AC(0xffffc20000000000, UL) | 150 | #define VMALLOC_START _AC(0xffffc20000000000, UL) |
151 | #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) | 151 | #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) |
152 | #define VMEMMAP_START _AC(0xffffe20000000000, UL) | 152 | #define VMEMMAP_START _AC(0xffffe20000000000, UL) |
@@ -177,12 +177,6 @@ static inline int pmd_bad(pmd_t pmd) | |||
177 | #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ | 177 | #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) /* FIXME: is this right? */ |
178 | 178 | ||
179 | /* | 179 | /* |
180 | * Macro to mark a page protection value as "uncacheable". | ||
181 | */ | ||
182 | #define pgprot_noncached(prot) \ | ||
183 | (__pgprot(pgprot_val((prot)) | _PAGE_PCD | _PAGE_PWT)) | ||
184 | |||
185 | /* | ||
186 | * Conversion functions: convert a page and protection to a page entry, | 180 | * Conversion functions: convert a page and protection to a page entry, |
187 | * and a page entry and page directory to the page they refer to. | 181 | * and a page entry and page directory to the page they refer to. |
188 | */ | 182 | */ |
@@ -250,10 +244,22 @@ static inline int pud_large(pud_t pte) | |||
250 | extern int direct_gbpages; | 244 | extern int direct_gbpages; |
251 | 245 | ||
252 | /* Encode and de-code a swap entry */ | 246 | /* Encode and de-code a swap entry */ |
253 | #define __swp_type(x) (((x).val >> 1) & 0x3f) | 247 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
254 | #define __swp_offset(x) ((x).val >> 8) | 248 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
255 | #define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ | 249 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) |
256 | ((offset) << 8) }) | 250 | #else |
251 | #define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) | ||
252 | #define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) | ||
253 | #endif | ||
254 | |||
255 | #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) | ||
256 | |||
257 | #define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ | ||
258 | & ((1U << SWP_TYPE_BITS) - 1)) | ||
259 | #define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) | ||
260 | #define __swp_entry(type, offset) ((swp_entry_t) { \ | ||
261 | ((type) << (_PAGE_BIT_PRESENT + 1)) \ | ||
262 | | ((offset) << SWP_OFFSET_SHIFT) }) | ||
257 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) | 263 | #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) |
258 | #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) | 264 | #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) |
259 | 265 | ||
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h index fe681147a4f7..a8894647dd9a 100644 --- a/arch/x86/include/asm/prctl.h +++ b/arch/x86/include/asm/prctl.h | |||
@@ -6,5 +6,8 @@ | |||
6 | #define ARCH_GET_FS 0x1003 | 6 | #define ARCH_GET_FS 0x1003 |
7 | #define ARCH_GET_GS 0x1004 | 7 | #define ARCH_GET_GS 0x1004 |
8 | 8 | ||
9 | #ifdef CONFIG_X86_64 | ||
10 | extern long sys_arch_prctl(int, unsigned long); | ||
11 | #endif /* CONFIG_X86_64 */ | ||
9 | 12 | ||
10 | #endif /* _ASM_X86_PRCTL_H */ | 13 | #endif /* _ASM_X86_PRCTL_H */ |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5ca01e383269..091cd8855f2e 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -110,6 +110,7 @@ struct cpuinfo_x86 { | |||
110 | /* Index into per_cpu list: */ | 110 | /* Index into per_cpu list: */ |
111 | u16 cpu_index; | 111 | u16 cpu_index; |
112 | #endif | 112 | #endif |
113 | unsigned int x86_hyper_vendor; | ||
113 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); | 114 | } __attribute__((__aligned__(SMP_CACHE_BYTES))); |
114 | 115 | ||
115 | #define X86_VENDOR_INTEL 0 | 116 | #define X86_VENDOR_INTEL 0 |
@@ -123,6 +124,9 @@ struct cpuinfo_x86 { | |||
123 | 124 | ||
124 | #define X86_VENDOR_UNKNOWN 0xff | 125 | #define X86_VENDOR_UNKNOWN 0xff |
125 | 126 | ||
127 | #define X86_HYPER_VENDOR_NONE 0 | ||
128 | #define X86_HYPER_VENDOR_VMWARE 1 | ||
129 | |||
126 | /* | 130 | /* |
127 | * capabilities of CPUs | 131 | * capabilities of CPUs |
128 | */ | 132 | */ |
@@ -752,6 +756,19 @@ extern void switch_to_new_gdt(void); | |||
752 | extern void cpu_init(void); | 756 | extern void cpu_init(void); |
753 | extern void init_gdt(int cpu); | 757 | extern void init_gdt(int cpu); |
754 | 758 | ||
759 | static inline unsigned long get_debugctlmsr(void) | ||
760 | { | ||
761 | unsigned long debugctlmsr = 0; | ||
762 | |||
763 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
764 | if (boot_cpu_data.x86 < 6) | ||
765 | return 0; | ||
766 | #endif | ||
767 | rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | ||
768 | |||
769 | return debugctlmsr; | ||
770 | } | ||
771 | |||
755 | static inline void update_debugctlmsr(unsigned long debugctlmsr) | 772 | static inline void update_debugctlmsr(unsigned long debugctlmsr) |
756 | { | 773 | { |
757 | #ifndef CONFIG_X86_DEBUGCTLMSR | 774 | #ifndef CONFIG_X86_DEBUGCTLMSR |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index eefb0594b058..6d34d954c228 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -6,7 +6,6 @@ | |||
6 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
7 | 7 | ||
8 | #ifdef __KERNEL__ | 8 | #ifdef __KERNEL__ |
9 | #include <asm/ds.h> /* the DS BTS struct is used for ptrace too */ | ||
10 | #include <asm/segment.h> | 9 | #include <asm/segment.h> |
11 | #endif | 10 | #endif |
12 | 11 | ||
@@ -128,34 +127,6 @@ struct pt_regs { | |||
128 | #endif /* !__i386__ */ | 127 | #endif /* !__i386__ */ |
129 | 128 | ||
130 | 129 | ||
131 | #ifdef CONFIG_X86_PTRACE_BTS | ||
132 | /* a branch trace record entry | ||
133 | * | ||
134 | * In order to unify the interface between various processor versions, | ||
135 | * we use the below data structure for all processors. | ||
136 | */ | ||
137 | enum bts_qualifier { | ||
138 | BTS_INVALID = 0, | ||
139 | BTS_BRANCH, | ||
140 | BTS_TASK_ARRIVES, | ||
141 | BTS_TASK_DEPARTS | ||
142 | }; | ||
143 | |||
144 | struct bts_struct { | ||
145 | __u64 qualifier; | ||
146 | union { | ||
147 | /* BTS_BRANCH */ | ||
148 | struct { | ||
149 | __u64 from_ip; | ||
150 | __u64 to_ip; | ||
151 | } lbr; | ||
152 | /* BTS_TASK_ARRIVES or | ||
153 | BTS_TASK_DEPARTS */ | ||
154 | __u64 jiffies; | ||
155 | } variant; | ||
156 | }; | ||
157 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
158 | |||
159 | #ifdef __KERNEL__ | 130 | #ifdef __KERNEL__ |
160 | 131 | ||
161 | #include <linux/init.h> | 132 | #include <linux/init.h> |
@@ -163,13 +134,6 @@ struct bts_struct { | |||
163 | struct cpuinfo_x86; | 134 | struct cpuinfo_x86; |
164 | struct task_struct; | 135 | struct task_struct; |
165 | 136 | ||
166 | #ifdef CONFIG_X86_PTRACE_BTS | ||
167 | extern void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *); | ||
168 | extern void ptrace_bts_take_timestamp(struct task_struct *, enum bts_qualifier); | ||
169 | #else | ||
170 | #define ptrace_bts_init_intel(config) do {} while (0) | ||
171 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
172 | |||
173 | extern unsigned long profile_pc(struct pt_regs *regs); | 137 | extern unsigned long profile_pc(struct pt_regs *regs); |
174 | 138 | ||
175 | extern unsigned long | 139 | extern unsigned long |
@@ -271,6 +235,13 @@ extern int do_get_thread_area(struct task_struct *p, int idx, | |||
271 | extern int do_set_thread_area(struct task_struct *p, int idx, | 235 | extern int do_set_thread_area(struct task_struct *p, int idx, |
272 | struct user_desc __user *info, int can_allocate); | 236 | struct user_desc __user *info, int can_allocate); |
273 | 237 | ||
238 | extern void x86_ptrace_untrace(struct task_struct *); | ||
239 | extern void x86_ptrace_fork(struct task_struct *child, | ||
240 | unsigned long clone_flags); | ||
241 | |||
242 | #define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) | ||
243 | #define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) | ||
244 | |||
274 | #endif /* __KERNEL__ */ | 245 | #endif /* __KERNEL__ */ |
275 | 246 | ||
276 | #endif /* !__ASSEMBLY__ */ | 247 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/reboot.h b/arch/x86/include/asm/reboot.h index df7710354f85..562d4fd31ba8 100644 --- a/arch/x86/include/asm/reboot.h +++ b/arch/x86/include/asm/reboot.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_REBOOT_H | 1 | #ifndef _ASM_X86_REBOOT_H |
2 | #define _ASM_X86_REBOOT_H | 2 | #define _ASM_X86_REBOOT_H |
3 | 3 | ||
4 | #include <linux/kdebug.h> | ||
5 | |||
4 | struct pt_regs; | 6 | struct pt_regs; |
5 | 7 | ||
6 | struct machine_ops { | 8 | struct machine_ops { |
@@ -18,4 +20,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs); | |||
18 | void native_machine_shutdown(void); | 20 | void native_machine_shutdown(void); |
19 | void machine_real_restart(const unsigned char *code, int length); | 21 | void machine_real_restart(const unsigned char *code, int length); |
20 | 22 | ||
23 | typedef void (*nmi_shootdown_cb)(int, struct die_args*); | ||
24 | void nmi_shootdown_cpus(nmi_shootdown_cb callback); | ||
25 | |||
21 | #endif /* _ASM_X86_REBOOT_H */ | 26 | #endif /* _ASM_X86_REBOOT_H */ |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index f12d37237465..4fcd53fd5f43 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -8,6 +8,10 @@ | |||
8 | /* Interrupt control for vSMPowered x86_64 systems */ | 8 | /* Interrupt control for vSMPowered x86_64 systems */ |
9 | void vsmp_init(void); | 9 | void vsmp_init(void); |
10 | 10 | ||
11 | |||
12 | void setup_bios_corruption_check(void); | ||
13 | |||
14 | |||
11 | #ifdef CONFIG_X86_VISWS | 15 | #ifdef CONFIG_X86_VISWS |
12 | extern void visws_early_detect(void); | 16 | extern void visws_early_detect(void); |
13 | extern int is_visws_box(void); | 17 | extern int is_visws_box(void); |
@@ -16,6 +20,8 @@ static inline void visws_early_detect(void) { } | |||
16 | static inline int is_visws_box(void) { return 0; } | 20 | static inline int is_visws_box(void) { return 0; } |
17 | #endif | 21 | #endif |
18 | 22 | ||
23 | extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); | ||
24 | extern int wakeup_secondary_cpu_via_init(int apicid, unsigned long start_eip); | ||
19 | /* | 25 | /* |
20 | * Any setup quirks to be performed? | 26 | * Any setup quirks to be performed? |
21 | */ | 27 | */ |
@@ -39,6 +45,7 @@ struct x86_quirks { | |||
39 | void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, | 45 | void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, |
40 | unsigned short oemsize); | 46 | unsigned short oemsize); |
41 | int (*setup_ioapic_ids)(void); | 47 | int (*setup_ioapic_ids)(void); |
48 | int (*update_genapic)(void); | ||
42 | }; | 49 | }; |
43 | 50 | ||
44 | extern struct x86_quirks *x86_quirks; | 51 | extern struct x86_quirks *x86_quirks; |
diff --git a/arch/x86/include/asm/sigframe.h b/arch/x86/include/asm/sigframe.h new file mode 100644 index 000000000000..4e0fe26d27d3 --- /dev/null +++ b/arch/x86/include/asm/sigframe.h | |||
@@ -0,0 +1,70 @@ | |||
1 | #ifndef _ASM_X86_SIGFRAME_H | ||
2 | #define _ASM_X86_SIGFRAME_H | ||
3 | |||
4 | #include <asm/sigcontext.h> | ||
5 | #include <asm/siginfo.h> | ||
6 | #include <asm/ucontext.h> | ||
7 | |||
8 | #ifdef CONFIG_X86_32 | ||
9 | #define sigframe_ia32 sigframe | ||
10 | #define rt_sigframe_ia32 rt_sigframe | ||
11 | #define sigcontext_ia32 sigcontext | ||
12 | #define _fpstate_ia32 _fpstate | ||
13 | #define ucontext_ia32 ucontext | ||
14 | #else /* !CONFIG_X86_32 */ | ||
15 | |||
16 | #ifdef CONFIG_IA32_EMULATION | ||
17 | #include <asm/ia32.h> | ||
18 | #endif /* CONFIG_IA32_EMULATION */ | ||
19 | |||
20 | #endif /* CONFIG_X86_32 */ | ||
21 | |||
22 | #if defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) | ||
23 | struct sigframe_ia32 { | ||
24 | u32 pretcode; | ||
25 | int sig; | ||
26 | struct sigcontext_ia32 sc; | ||
27 | /* | ||
28 | * fpstate is unused. fpstate is moved/allocated after | ||
29 | * retcode[] below. This movement allows to have the FP state and the | ||
30 | * future state extensions (xsave) stay together. | ||
31 | * And at the same time retaining the unused fpstate, prevents changing | ||
32 | * the offset of extramask[] in the sigframe and thus prevent any | ||
33 | * legacy application accessing/modifying it. | ||
34 | */ | ||
35 | struct _fpstate_ia32 fpstate_unused; | ||
36 | #ifdef CONFIG_IA32_EMULATION | ||
37 | unsigned int extramask[_COMPAT_NSIG_WORDS-1]; | ||
38 | #else /* !CONFIG_IA32_EMULATION */ | ||
39 | unsigned long extramask[_NSIG_WORDS-1]; | ||
40 | #endif /* CONFIG_IA32_EMULATION */ | ||
41 | char retcode[8]; | ||
42 | /* fp state follows here */ | ||
43 | }; | ||
44 | |||
45 | struct rt_sigframe_ia32 { | ||
46 | u32 pretcode; | ||
47 | int sig; | ||
48 | u32 pinfo; | ||
49 | u32 puc; | ||
50 | #ifdef CONFIG_IA32_EMULATION | ||
51 | compat_siginfo_t info; | ||
52 | #else /* !CONFIG_IA32_EMULATION */ | ||
53 | struct siginfo info; | ||
54 | #endif /* CONFIG_IA32_EMULATION */ | ||
55 | struct ucontext_ia32 uc; | ||
56 | char retcode[8]; | ||
57 | /* fp state follows here */ | ||
58 | }; | ||
59 | #endif /* defined(CONFIG_X86_32) || defined(CONFIG_IA32_EMULATION) */ | ||
60 | |||
61 | #ifdef CONFIG_X86_64 | ||
62 | struct rt_sigframe { | ||
63 | char __user *pretcode; | ||
64 | struct ucontext uc; | ||
65 | struct siginfo info; | ||
66 | /* fp state follows here */ | ||
67 | }; | ||
68 | #endif /* CONFIG_X86_64 */ | ||
69 | |||
70 | #endif /* _ASM_X86_SIGFRAME_H */ | ||
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 96ac44f275da..7761a5d554bb 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h | |||
@@ -121,6 +121,10 @@ typedef unsigned long sigset_t; | |||
121 | 121 | ||
122 | #ifndef __ASSEMBLY__ | 122 | #ifndef __ASSEMBLY__ |
123 | 123 | ||
124 | # ifdef __KERNEL__ | ||
125 | extern void do_notify_resume(struct pt_regs *, void *, __u32); | ||
126 | # endif /* __KERNEL__ */ | ||
127 | |||
124 | #ifdef __i386__ | 128 | #ifdef __i386__ |
125 | # ifdef __KERNEL__ | 129 | # ifdef __KERNEL__ |
126 | struct old_sigaction { | 130 | struct old_sigaction { |
@@ -141,8 +145,6 @@ struct k_sigaction { | |||
141 | struct sigaction sa; | 145 | struct sigaction sa; |
142 | }; | 146 | }; |
143 | 147 | ||
144 | extern void do_notify_resume(struct pt_regs *, void *, __u32); | ||
145 | |||
146 | # else /* __KERNEL__ */ | 148 | # else /* __KERNEL__ */ |
147 | /* Here we must cater to libcs that poke about in kernel headers. */ | 149 | /* Here we must cater to libcs that poke about in kernel headers. */ |
148 | 150 | ||
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index d12811ce51d9..830b9fcb6427 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -60,7 +60,7 @@ struct smp_ops { | |||
60 | void (*cpu_die)(unsigned int cpu); | 60 | void (*cpu_die)(unsigned int cpu); |
61 | void (*play_dead)(void); | 61 | void (*play_dead)(void); |
62 | 62 | ||
63 | void (*send_call_func_ipi)(cpumask_t mask); | 63 | void (*send_call_func_ipi)(const struct cpumask *mask); |
64 | void (*send_call_func_single_ipi)(int cpu); | 64 | void (*send_call_func_single_ipi)(int cpu); |
65 | }; | 65 | }; |
66 | 66 | ||
@@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) | |||
125 | 125 | ||
126 | static inline void arch_send_call_function_ipi(cpumask_t mask) | 126 | static inline void arch_send_call_function_ipi(cpumask_t mask) |
127 | { | 127 | { |
128 | smp_ops.send_call_func_ipi(mask); | 128 | smp_ops.send_call_func_ipi(&mask); |
129 | } | 129 | } |
130 | 130 | ||
131 | void cpu_disable_common(void); | 131 | void cpu_disable_common(void); |
@@ -138,7 +138,7 @@ void native_cpu_die(unsigned int cpu); | |||
138 | void native_play_dead(void); | 138 | void native_play_dead(void); |
139 | void play_dead_common(void); | 139 | void play_dead_common(void); |
140 | 140 | ||
141 | void native_send_call_func_ipi(cpumask_t mask); | 141 | void native_send_call_func_ipi(const struct cpumask *mask); |
142 | void native_send_call_func_single_ipi(int cpu); | 142 | void native_send_call_func_single_ipi(int cpu); |
143 | 143 | ||
144 | extern void prefill_possible_map(void); | 144 | extern void prefill_possible_map(void); |
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index be44f7dab395..e3cc3c063ec5 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h | |||
@@ -27,7 +27,7 @@ | |||
27 | #else /* CONFIG_X86_32 */ | 27 | #else /* CONFIG_X86_32 */ |
28 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ | 28 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ |
29 | # define MAX_PHYSADDR_BITS 44 | 29 | # define MAX_PHYSADDR_BITS 44 |
30 | # define MAX_PHYSMEM_BITS 44 | 30 | # define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */ |
31 | #endif | 31 | #endif |
32 | 32 | ||
33 | #endif /* CONFIG_SPARSEMEM */ | 33 | #endif /* CONFIG_SPARSEMEM */ |
diff --git a/arch/x86/include/asm/summit/apic.h b/arch/x86/include/asm/summit/apic.h index 9b3070f1c2ac..4bb5fb34f030 100644 --- a/arch/x86/include/asm/summit/apic.h +++ b/arch/x86/include/asm/summit/apic.h | |||
@@ -14,13 +14,13 @@ | |||
14 | 14 | ||
15 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 15 | #define APIC_DFR_VALUE (APIC_DFR_CLUSTER) |
16 | 16 | ||
17 | static inline cpumask_t target_cpus(void) | 17 | static inline const cpumask_t *target_cpus(void) |
18 | { | 18 | { |
19 | /* CPU_MASK_ALL (0xff) has undefined behaviour with | 19 | /* CPU_MASK_ALL (0xff) has undefined behaviour with |
20 | * dest_LowestPrio mode logical clustered apic interrupt routing | 20 | * dest_LowestPrio mode logical clustered apic interrupt routing |
21 | * Just start on cpu 0. IRQ balancing will spread load | 21 | * Just start on cpu 0. IRQ balancing will spread load |
22 | */ | 22 | */ |
23 | return cpumask_of_cpu(0); | 23 | return &cpumask_of_cpu(0); |
24 | } | 24 | } |
25 | 25 | ||
26 | #define INT_DELIVERY_MODE (dest_LowestPrio) | 26 | #define INT_DELIVERY_MODE (dest_LowestPrio) |
@@ -52,7 +52,7 @@ static inline void init_apic_ldr(void) | |||
52 | int i; | 52 | int i; |
53 | 53 | ||
54 | /* Create logical APIC IDs by counting CPUs already in cluster. */ | 54 | /* Create logical APIC IDs by counting CPUs already in cluster. */ |
55 | for (count = 0, i = NR_CPUS; --i >= 0; ) { | 55 | for (count = 0, i = nr_cpu_ids; --i >= 0; ) { |
56 | lid = cpu_2_logical_apicid[i]; | 56 | lid = cpu_2_logical_apicid[i]; |
57 | if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) | 57 | if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster) |
58 | ++count; | 58 | ++count; |
@@ -97,8 +97,8 @@ static inline int apicid_to_node(int logical_apicid) | |||
97 | static inline int cpu_to_logical_apicid(int cpu) | 97 | static inline int cpu_to_logical_apicid(int cpu) |
98 | { | 98 | { |
99 | #ifdef CONFIG_SMP | 99 | #ifdef CONFIG_SMP |
100 | if (cpu >= NR_CPUS) | 100 | if (cpu >= nr_cpu_ids) |
101 | return BAD_APICID; | 101 | return BAD_APICID; |
102 | return (int)cpu_2_logical_apicid[cpu]; | 102 | return (int)cpu_2_logical_apicid[cpu]; |
103 | #else | 103 | #else |
104 | return logical_smp_processor_id(); | 104 | return logical_smp_processor_id(); |
@@ -107,7 +107,7 @@ static inline int cpu_to_logical_apicid(int cpu) | |||
107 | 107 | ||
108 | static inline int cpu_present_to_apicid(int mps_cpu) | 108 | static inline int cpu_present_to_apicid(int mps_cpu) |
109 | { | 109 | { |
110 | if (mps_cpu < NR_CPUS) | 110 | if (mps_cpu < nr_cpu_ids) |
111 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); | 111 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); |
112 | else | 112 | else |
113 | return BAD_APICID; | 113 | return BAD_APICID; |
@@ -137,25 +137,25 @@ static inline void enable_apic_mode(void) | |||
137 | { | 137 | { |
138 | } | 138 | } |
139 | 139 | ||
140 | static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | 140 | static inline unsigned int cpu_mask_to_apicid(const cpumask_t *cpumask) |
141 | { | 141 | { |
142 | int num_bits_set; | 142 | int num_bits_set; |
143 | int cpus_found = 0; | 143 | int cpus_found = 0; |
144 | int cpu; | 144 | int cpu; |
145 | int apicid; | 145 | int apicid; |
146 | 146 | ||
147 | num_bits_set = cpus_weight(cpumask); | 147 | num_bits_set = cpus_weight(*cpumask); |
148 | /* Return id to all */ | 148 | /* Return id to all */ |
149 | if (num_bits_set == NR_CPUS) | 149 | if (num_bits_set >= nr_cpu_ids) |
150 | return (int) 0xFF; | 150 | return (int) 0xFF; |
151 | /* | 151 | /* |
152 | * The cpus in the mask must all be on the apic cluster. If are not | 152 | * The cpus in the mask must all be on the apic cluster. If are not |
153 | * on the same apicid cluster return default value of TARGET_CPUS. | 153 | * on the same apicid cluster return default value of TARGET_CPUS. |
154 | */ | 154 | */ |
155 | cpu = first_cpu(cpumask); | 155 | cpu = first_cpu(*cpumask); |
156 | apicid = cpu_to_logical_apicid(cpu); | 156 | apicid = cpu_to_logical_apicid(cpu); |
157 | while (cpus_found < num_bits_set) { | 157 | while (cpus_found < num_bits_set) { |
158 | if (cpu_isset(cpu, cpumask)) { | 158 | if (cpu_isset(cpu, *cpumask)) { |
159 | int new_apicid = cpu_to_logical_apicid(cpu); | 159 | int new_apicid = cpu_to_logical_apicid(cpu); |
160 | if (apicid_cluster(apicid) != | 160 | if (apicid_cluster(apicid) != |
161 | apicid_cluster(new_apicid)){ | 161 | apicid_cluster(new_apicid)){ |
@@ -170,6 +170,23 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) | |||
170 | return apicid; | 170 | return apicid; |
171 | } | 171 | } |
172 | 172 | ||
173 | static inline unsigned int cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
174 | const struct cpumask *andmask) | ||
175 | { | ||
176 | int apicid = cpu_to_logical_apicid(0); | ||
177 | cpumask_var_t cpumask; | ||
178 | |||
179 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
180 | return apicid; | ||
181 | |||
182 | cpumask_and(cpumask, inmask, andmask); | ||
183 | cpumask_and(cpumask, cpumask, cpu_online_mask); | ||
184 | apicid = cpu_mask_to_apicid(cpumask); | ||
185 | |||
186 | free_cpumask_var(cpumask); | ||
187 | return apicid; | ||
188 | } | ||
189 | |||
173 | /* cpuid returns the value latched in the HW at reset, not the APIC ID | 190 | /* cpuid returns the value latched in the HW at reset, not the APIC ID |
174 | * register's value. For any box whose BIOS changes APIC IDs, like | 191 | * register's value. For any box whose BIOS changes APIC IDs, like |
175 | * clustered APIC systems, we must use hard_smp_processor_id. | 192 | * clustered APIC systems, we must use hard_smp_processor_id. |
diff --git a/arch/x86/include/asm/summit/ipi.h b/arch/x86/include/asm/summit/ipi.h index 53bd1e7bd7b4..a8a2c24f50cc 100644 --- a/arch/x86/include/asm/summit/ipi.h +++ b/arch/x86/include/asm/summit/ipi.h | |||
@@ -1,9 +1,10 @@ | |||
1 | #ifndef __ASM_SUMMIT_IPI_H | 1 | #ifndef __ASM_SUMMIT_IPI_H |
2 | #define __ASM_SUMMIT_IPI_H | 2 | #define __ASM_SUMMIT_IPI_H |
3 | 3 | ||
4 | void send_IPI_mask_sequence(cpumask_t mask, int vector); | 4 | void send_IPI_mask_sequence(const cpumask_t *mask, int vector); |
5 | void send_IPI_mask_allbutself(const cpumask_t *mask, int vector); | ||
5 | 6 | ||
6 | static inline void send_IPI_mask(cpumask_t mask, int vector) | 7 | static inline void send_IPI_mask(const cpumask_t *mask, int vector) |
7 | { | 8 | { |
8 | send_IPI_mask_sequence(mask, vector); | 9 | send_IPI_mask_sequence(mask, vector); |
9 | } | 10 | } |
@@ -14,12 +15,12 @@ static inline void send_IPI_allbutself(int vector) | |||
14 | cpu_clear(smp_processor_id(), mask); | 15 | cpu_clear(smp_processor_id(), mask); |
15 | 16 | ||
16 | if (!cpus_empty(mask)) | 17 | if (!cpus_empty(mask)) |
17 | send_IPI_mask(mask, vector); | 18 | send_IPI_mask(&mask, vector); |
18 | } | 19 | } |
19 | 20 | ||
20 | static inline void send_IPI_all(int vector) | 21 | static inline void send_IPI_all(int vector) |
21 | { | 22 | { |
22 | send_IPI_mask(cpu_online_map, vector); | 23 | send_IPI_mask(&cpu_online_map, vector); |
23 | } | 24 | } |
24 | 25 | ||
25 | #endif /* __ASM_SUMMIT_IPI_H */ | 26 | #endif /* __ASM_SUMMIT_IPI_H */ |
diff --git a/arch/x86/kvm/svm.h b/arch/x86/include/asm/svm.h index 1b8afa78e869..1b8afa78e869 100644 --- a/arch/x86/kvm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
diff --git a/arch/x86/include/asm/swab.h b/arch/x86/include/asm/swab.h new file mode 100644 index 000000000000..306d4178ffc9 --- /dev/null +++ b/arch/x86/include/asm/swab.h | |||
@@ -0,0 +1,61 @@ | |||
1 | #ifndef _ASM_X86_SWAB_H | ||
2 | #define _ASM_X86_SWAB_H | ||
3 | |||
4 | #include <asm/types.h> | ||
5 | #include <linux/compiler.h> | ||
6 | |||
7 | static inline __attribute_const__ __u32 __arch_swab32(__u32 val) | ||
8 | { | ||
9 | #ifdef __i386__ | ||
10 | # ifdef CONFIG_X86_BSWAP | ||
11 | asm("bswap %0" : "=r" (val) : "0" (val)); | ||
12 | # else | ||
13 | asm("xchgb %b0,%h0\n\t" /* swap lower bytes */ | ||
14 | "rorl $16,%0\n\t" /* swap words */ | ||
15 | "xchgb %b0,%h0" /* swap higher bytes */ | ||
16 | : "=q" (val) | ||
17 | : "0" (val)); | ||
18 | # endif | ||
19 | |||
20 | #else /* __i386__ */ | ||
21 | asm("bswapl %0" | ||
22 | : "=r" (val) | ||
23 | : "0" (val)); | ||
24 | #endif | ||
25 | return val; | ||
26 | } | ||
27 | #define __arch_swab32 __arch_swab32 | ||
28 | |||
29 | static inline __attribute_const__ __u64 __arch_swab64(__u64 val) | ||
30 | { | ||
31 | #ifdef __i386__ | ||
32 | union { | ||
33 | struct { | ||
34 | __u32 a; | ||
35 | __u32 b; | ||
36 | } s; | ||
37 | __u64 u; | ||
38 | } v; | ||
39 | v.u = val; | ||
40 | # ifdef CONFIG_X86_BSWAP | ||
41 | asm("bswapl %0 ; bswapl %1 ; xchgl %0,%1" | ||
42 | : "=r" (v.s.a), "=r" (v.s.b) | ||
43 | : "0" (v.s.a), "1" (v.s.b)); | ||
44 | # else | ||
45 | v.s.a = __arch_swab32(v.s.a); | ||
46 | v.s.b = __arch_swab32(v.s.b); | ||
47 | asm("xchgl %0,%1" | ||
48 | : "=r" (v.s.a), "=r" (v.s.b) | ||
49 | : "0" (v.s.a), "1" (v.s.b)); | ||
50 | # endif | ||
51 | return v.u; | ||
52 | #else /* __i386__ */ | ||
53 | asm("bswapq %0" | ||
54 | : "=r" (val) | ||
55 | : "0" (val)); | ||
56 | return val; | ||
57 | #endif | ||
58 | } | ||
59 | #define __arch_swab64 __arch_swab64 | ||
60 | |||
61 | #endif /* _ASM_X86_SWAB_H */ | ||
diff --git a/arch/x86/include/asm/swiotlb.h b/arch/x86/include/asm/swiotlb.h index 51fb2c76ad74..b9e4e20174fb 100644 --- a/arch/x86/include/asm/swiotlb.h +++ b/arch/x86/include/asm/swiotlb.h | |||
@@ -1,46 +1,10 @@ | |||
1 | #ifndef _ASM_X86_SWIOTLB_H | 1 | #ifndef _ASM_X86_SWIOTLB_H |
2 | #define _ASM_X86_SWIOTLB_H | 2 | #define _ASM_X86_SWIOTLB_H |
3 | 3 | ||
4 | #include <asm/dma-mapping.h> | 4 | #include <linux/swiotlb.h> |
5 | 5 | ||
6 | /* SWIOTLB interface */ | 6 | /* SWIOTLB interface */ |
7 | 7 | ||
8 | extern dma_addr_t swiotlb_map_single(struct device *hwdev, void *ptr, | ||
9 | size_t size, int dir); | ||
10 | extern void *swiotlb_alloc_coherent(struct device *hwdev, size_t size, | ||
11 | dma_addr_t *dma_handle, gfp_t flags); | ||
12 | extern void swiotlb_unmap_single(struct device *hwdev, dma_addr_t dev_addr, | ||
13 | size_t size, int dir); | ||
14 | extern void swiotlb_sync_single_for_cpu(struct device *hwdev, | ||
15 | dma_addr_t dev_addr, | ||
16 | size_t size, int dir); | ||
17 | extern void swiotlb_sync_single_for_device(struct device *hwdev, | ||
18 | dma_addr_t dev_addr, | ||
19 | size_t size, int dir); | ||
20 | extern void swiotlb_sync_single_range_for_cpu(struct device *hwdev, | ||
21 | dma_addr_t dev_addr, | ||
22 | unsigned long offset, | ||
23 | size_t size, int dir); | ||
24 | extern void swiotlb_sync_single_range_for_device(struct device *hwdev, | ||
25 | dma_addr_t dev_addr, | ||
26 | unsigned long offset, | ||
27 | size_t size, int dir); | ||
28 | extern void swiotlb_sync_sg_for_cpu(struct device *hwdev, | ||
29 | struct scatterlist *sg, int nelems, | ||
30 | int dir); | ||
31 | extern void swiotlb_sync_sg_for_device(struct device *hwdev, | ||
32 | struct scatterlist *sg, int nelems, | ||
33 | int dir); | ||
34 | extern int swiotlb_map_sg(struct device *hwdev, struct scatterlist *sg, | ||
35 | int nents, int direction); | ||
36 | extern void swiotlb_unmap_sg(struct device *hwdev, struct scatterlist *sg, | ||
37 | int nents, int direction); | ||
38 | extern int swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); | ||
39 | extern void swiotlb_free_coherent(struct device *hwdev, size_t size, | ||
40 | void *vaddr, dma_addr_t dma_handle); | ||
41 | extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); | ||
42 | extern void swiotlb_init(void); | ||
43 | |||
44 | extern int swiotlb_force; | 8 | extern int swiotlb_force; |
45 | 9 | ||
46 | #ifdef CONFIG_SWIOTLB | 10 | #ifdef CONFIG_SWIOTLB |
diff --git a/arch/x86/include/asm/sys_ia32.h b/arch/x86/include/asm/sys_ia32.h new file mode 100644 index 000000000000..ffb08be2a530 --- /dev/null +++ b/arch/x86/include/asm/sys_ia32.h | |||
@@ -0,0 +1,101 @@ | |||
1 | /* | ||
2 | * sys_ia32.h - Linux ia32 syscall interfaces | ||
3 | * | ||
4 | * Copyright (c) 2008 Jaswinder Singh Rajput | ||
5 | * | ||
6 | * This file is released under the GPLv2. | ||
7 | * See the file COPYING for more details. | ||
8 | */ | ||
9 | |||
10 | #ifndef _ASM_X86_SYS_IA32_H | ||
11 | #define _ASM_X86_SYS_IA32_H | ||
12 | |||
13 | #include <linux/compiler.h> | ||
14 | #include <linux/linkage.h> | ||
15 | #include <linux/types.h> | ||
16 | #include <linux/signal.h> | ||
17 | #include <asm/compat.h> | ||
18 | #include <asm/ia32.h> | ||
19 | |||
20 | /* ia32/sys_ia32.c */ | ||
21 | asmlinkage long sys32_truncate64(char __user *, unsigned long, unsigned long); | ||
22 | asmlinkage long sys32_ftruncate64(unsigned int, unsigned long, unsigned long); | ||
23 | |||
24 | asmlinkage long sys32_stat64(char __user *, struct stat64 __user *); | ||
25 | asmlinkage long sys32_lstat64(char __user *, struct stat64 __user *); | ||
26 | asmlinkage long sys32_fstat64(unsigned int, struct stat64 __user *); | ||
27 | asmlinkage long sys32_fstatat(unsigned int, char __user *, | ||
28 | struct stat64 __user *, int); | ||
29 | struct mmap_arg_struct; | ||
30 | asmlinkage long sys32_mmap(struct mmap_arg_struct __user *); | ||
31 | asmlinkage long sys32_mprotect(unsigned long, size_t, unsigned long); | ||
32 | |||
33 | asmlinkage long sys32_pipe(int __user *); | ||
34 | struct sigaction32; | ||
35 | struct old_sigaction32; | ||
36 | asmlinkage long sys32_rt_sigaction(int, struct sigaction32 __user *, | ||
37 | struct sigaction32 __user *, unsigned int); | ||
38 | asmlinkage long sys32_sigaction(int, struct old_sigaction32 __user *, | ||
39 | struct old_sigaction32 __user *); | ||
40 | asmlinkage long sys32_rt_sigprocmask(int, compat_sigset_t __user *, | ||
41 | compat_sigset_t __user *, unsigned int); | ||
42 | asmlinkage long sys32_alarm(unsigned int); | ||
43 | |||
44 | struct sel_arg_struct; | ||
45 | asmlinkage long sys32_old_select(struct sel_arg_struct __user *); | ||
46 | asmlinkage long sys32_waitpid(compat_pid_t, unsigned int *, int); | ||
47 | asmlinkage long sys32_sysfs(int, u32, u32); | ||
48 | |||
49 | asmlinkage long sys32_sched_rr_get_interval(compat_pid_t, | ||
50 | struct compat_timespec __user *); | ||
51 | asmlinkage long sys32_rt_sigpending(compat_sigset_t __user *, compat_size_t); | ||
52 | asmlinkage long sys32_rt_sigqueueinfo(int, int, compat_siginfo_t __user *); | ||
53 | |||
54 | #ifdef CONFIG_SYSCTL_SYSCALL | ||
55 | struct sysctl_ia32; | ||
56 | asmlinkage long sys32_sysctl(struct sysctl_ia32 __user *); | ||
57 | #endif | ||
58 | |||
59 | asmlinkage long sys32_pread(unsigned int, char __user *, u32, u32, u32); | ||
60 | asmlinkage long sys32_pwrite(unsigned int, char __user *, u32, u32, u32); | ||
61 | |||
62 | asmlinkage long sys32_personality(unsigned long); | ||
63 | asmlinkage long sys32_sendfile(int, int, compat_off_t __user *, s32); | ||
64 | |||
65 | asmlinkage long sys32_mmap2(unsigned long, unsigned long, unsigned long, | ||
66 | unsigned long, unsigned long, unsigned long); | ||
67 | |||
68 | struct oldold_utsname; | ||
69 | struct old_utsname; | ||
70 | asmlinkage long sys32_olduname(struct oldold_utsname __user *); | ||
71 | long sys32_uname(struct old_utsname __user *); | ||
72 | |||
73 | long sys32_ustat(unsigned, struct ustat32 __user *); | ||
74 | |||
75 | asmlinkage long sys32_execve(char __user *, compat_uptr_t __user *, | ||
76 | compat_uptr_t __user *, struct pt_regs *); | ||
77 | asmlinkage long sys32_clone(unsigned int, unsigned int, struct pt_regs *); | ||
78 | |||
79 | long sys32_lseek(unsigned int, int, unsigned int); | ||
80 | long sys32_kill(int, int); | ||
81 | long sys32_fadvise64_64(int, __u32, __u32, __u32, __u32, int); | ||
82 | long sys32_vm86_warning(void); | ||
83 | long sys32_lookup_dcookie(u32, u32, char __user *, size_t); | ||
84 | |||
85 | asmlinkage ssize_t sys32_readahead(int, unsigned, unsigned, size_t); | ||
86 | asmlinkage long sys32_sync_file_range(int, unsigned, unsigned, | ||
87 | unsigned, unsigned, int); | ||
88 | asmlinkage long sys32_fadvise64(int, unsigned, unsigned, size_t, int); | ||
89 | asmlinkage long sys32_fallocate(int, int, unsigned, | ||
90 | unsigned, unsigned, unsigned); | ||
91 | |||
92 | /* ia32/ia32_signal.c */ | ||
93 | asmlinkage long sys32_sigsuspend(int, int, old_sigset_t); | ||
94 | asmlinkage long sys32_sigaltstack(const stack_ia32_t __user *, | ||
95 | stack_ia32_t __user *, struct pt_regs *); | ||
96 | asmlinkage long sys32_sigreturn(struct pt_regs *); | ||
97 | asmlinkage long sys32_rt_sigreturn(struct pt_regs *); | ||
98 | |||
99 | /* ia32/ipc32.c */ | ||
100 | asmlinkage long sys32_ipc(u32, int, int, int, compat_uptr_t, u32); | ||
101 | #endif /* _ASM_X86_SYS_IA32_H */ | ||
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 87803da44010..9c6797c3e56c 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -19,6 +19,13 @@ | |||
19 | /* kernel/ioport.c */ | 19 | /* kernel/ioport.c */ |
20 | asmlinkage long sys_ioperm(unsigned long, unsigned long, int); | 20 | asmlinkage long sys_ioperm(unsigned long, unsigned long, int); |
21 | 21 | ||
22 | /* kernel/ldt.c */ | ||
23 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); | ||
24 | |||
25 | /* kernel/tls.c */ | ||
26 | asmlinkage int sys_set_thread_area(struct user_desc __user *); | ||
27 | asmlinkage int sys_get_thread_area(struct user_desc __user *); | ||
28 | |||
22 | /* X86_32 only */ | 29 | /* X86_32 only */ |
23 | #ifdef CONFIG_X86_32 | 30 | #ifdef CONFIG_X86_32 |
24 | /* kernel/process_32.c */ | 31 | /* kernel/process_32.c */ |
@@ -33,14 +40,11 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, | |||
33 | struct old_sigaction __user *); | 40 | struct old_sigaction __user *); |
34 | asmlinkage int sys_sigaltstack(unsigned long); | 41 | asmlinkage int sys_sigaltstack(unsigned long); |
35 | asmlinkage unsigned long sys_sigreturn(unsigned long); | 42 | asmlinkage unsigned long sys_sigreturn(unsigned long); |
36 | asmlinkage int sys_rt_sigreturn(unsigned long); | 43 | asmlinkage int sys_rt_sigreturn(struct pt_regs); |
37 | 44 | ||
38 | /* kernel/ioport.c */ | 45 | /* kernel/ioport.c */ |
39 | asmlinkage long sys_iopl(unsigned long); | 46 | asmlinkage long sys_iopl(unsigned long); |
40 | 47 | ||
41 | /* kernel/ldt.c */ | ||
42 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); | ||
43 | |||
44 | /* kernel/sys_i386_32.c */ | 48 | /* kernel/sys_i386_32.c */ |
45 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, | 49 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, |
46 | unsigned long, unsigned long, unsigned long); | 50 | unsigned long, unsigned long, unsigned long); |
@@ -54,10 +58,6 @@ asmlinkage int sys_uname(struct old_utsname __user *); | |||
54 | struct oldold_utsname; | 58 | struct oldold_utsname; |
55 | asmlinkage int sys_olduname(struct oldold_utsname __user *); | 59 | asmlinkage int sys_olduname(struct oldold_utsname __user *); |
56 | 60 | ||
57 | /* kernel/tls.c */ | ||
58 | asmlinkage int sys_set_thread_area(struct user_desc __user *); | ||
59 | asmlinkage int sys_get_thread_area(struct user_desc __user *); | ||
60 | |||
61 | /* kernel/vm86_32.c */ | 61 | /* kernel/vm86_32.c */ |
62 | asmlinkage int sys_vm86old(struct pt_regs); | 62 | asmlinkage int sys_vm86old(struct pt_regs); |
63 | asmlinkage int sys_vm86(struct pt_regs); | 63 | asmlinkage int sys_vm86(struct pt_regs); |
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index 2ed3f0f44ff7..8e626ea33a1a 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h | |||
@@ -17,12 +17,12 @@ | |||
17 | # define AT_VECTOR_SIZE_ARCH 1 | 17 | # define AT_VECTOR_SIZE_ARCH 1 |
18 | #endif | 18 | #endif |
19 | 19 | ||
20 | #ifdef CONFIG_X86_32 | ||
21 | |||
22 | struct task_struct; /* one of the stranger aspects of C forward declarations */ | 20 | struct task_struct; /* one of the stranger aspects of C forward declarations */ |
23 | struct task_struct *__switch_to(struct task_struct *prev, | 21 | struct task_struct *__switch_to(struct task_struct *prev, |
24 | struct task_struct *next); | 22 | struct task_struct *next); |
25 | 23 | ||
24 | #ifdef CONFIG_X86_32 | ||
25 | |||
26 | /* | 26 | /* |
27 | * Saving eflags is important. It switches not only IOPL between tasks, | 27 | * Saving eflags is important. It switches not only IOPL between tasks, |
28 | * it also protects other tasks from NT leaking through sysenter etc. | 28 | * it also protects other tasks from NT leaking through sysenter etc. |
@@ -314,6 +314,8 @@ extern void free_init_pages(char *what, unsigned long begin, unsigned long end); | |||
314 | 314 | ||
315 | void default_idle(void); | 315 | void default_idle(void); |
316 | 316 | ||
317 | void stop_this_cpu(void *dummy); | ||
318 | |||
317 | /* | 319 | /* |
318 | * Force strict CPU ordering. | 320 | * Force strict CPU ordering. |
319 | * And yes, this is required on UP too when we're talking | 321 | * And yes, this is required on UP too when we're talking |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e44d379faad2..98789647baa9 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -20,11 +20,13 @@ | |||
20 | struct task_struct; | 20 | struct task_struct; |
21 | struct exec_domain; | 21 | struct exec_domain; |
22 | #include <asm/processor.h> | 22 | #include <asm/processor.h> |
23 | #include <asm/ftrace.h> | ||
24 | #include <asm/atomic.h> | ||
23 | 25 | ||
24 | struct thread_info { | 26 | struct thread_info { |
25 | struct task_struct *task; /* main task structure */ | 27 | struct task_struct *task; /* main task structure */ |
26 | struct exec_domain *exec_domain; /* execution domain */ | 28 | struct exec_domain *exec_domain; /* execution domain */ |
27 | unsigned long flags; /* low level flags */ | 29 | __u32 flags; /* low level flags */ |
28 | __u32 status; /* thread synchronous flags */ | 30 | __u32 status; /* thread synchronous flags */ |
29 | __u32 cpu; /* current CPU */ | 31 | __u32 cpu; /* current CPU */ |
30 | int preempt_count; /* 0 => preemptable, | 32 | int preempt_count; /* 0 => preemptable, |
@@ -91,7 +93,6 @@ struct thread_info { | |||
91 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 93 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
92 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 94 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ |
93 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | 95 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ |
94 | #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ | ||
95 | 96 | ||
96 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | 97 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) |
97 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 98 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
@@ -113,7 +114,6 @@ struct thread_info { | |||
113 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 114 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
114 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 115 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) |
115 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | 116 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) |
116 | #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) | ||
117 | 117 | ||
118 | /* work to do in syscall_trace_enter() */ | 118 | /* work to do in syscall_trace_enter() */ |
119 | #define _TIF_WORK_SYSCALL_ENTRY \ | 119 | #define _TIF_WORK_SYSCALL_ENTRY \ |
@@ -139,8 +139,7 @@ struct thread_info { | |||
139 | 139 | ||
140 | /* flags to check in __switch_to() */ | 140 | /* flags to check in __switch_to() */ |
141 | #define _TIF_WORK_CTXSW \ | 141 | #define _TIF_WORK_CTXSW \ |
142 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS| \ | 142 | (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) |
143 | _TIF_NOTSC) | ||
144 | 143 | ||
145 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW | 144 | #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW |
146 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) | 145 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index ff386ff50ed7..4e2f2e0aab27 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -61,13 +61,19 @@ static inline int cpu_to_node(int cpu) | |||
61 | * | 61 | * |
62 | * Side note: this function creates the returned cpumask on the stack | 62 | * Side note: this function creates the returned cpumask on the stack |
63 | * so with a high NR_CPUS count, excessive stack space is used. The | 63 | * so with a high NR_CPUS count, excessive stack space is used. The |
64 | * node_to_cpumask_ptr function should be used whenever possible. | 64 | * cpumask_of_node function should be used whenever possible. |
65 | */ | 65 | */ |
66 | static inline cpumask_t node_to_cpumask(int node) | 66 | static inline cpumask_t node_to_cpumask(int node) |
67 | { | 67 | { |
68 | return node_to_cpumask_map[node]; | 68 | return node_to_cpumask_map[node]; |
69 | } | 69 | } |
70 | 70 | ||
71 | /* Returns a bitmask of CPUs on Node 'node'. */ | ||
72 | static inline const struct cpumask *cpumask_of_node(int node) | ||
73 | { | ||
74 | return &node_to_cpumask_map[node]; | ||
75 | } | ||
76 | |||
71 | #else /* CONFIG_X86_64 */ | 77 | #else /* CONFIG_X86_64 */ |
72 | 78 | ||
73 | /* Mappings between node number and cpus on that node. */ | 79 | /* Mappings between node number and cpus on that node. */ |
@@ -82,7 +88,7 @@ DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); | |||
82 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS | 88 | #ifdef CONFIG_DEBUG_PER_CPU_MAPS |
83 | extern int cpu_to_node(int cpu); | 89 | extern int cpu_to_node(int cpu); |
84 | extern int early_cpu_to_node(int cpu); | 90 | extern int early_cpu_to_node(int cpu); |
85 | extern const cpumask_t *_node_to_cpumask_ptr(int node); | 91 | extern const cpumask_t *cpumask_of_node(int node); |
86 | extern cpumask_t node_to_cpumask(int node); | 92 | extern cpumask_t node_to_cpumask(int node); |
87 | 93 | ||
88 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 94 | #else /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
@@ -103,7 +109,7 @@ static inline int early_cpu_to_node(int cpu) | |||
103 | } | 109 | } |
104 | 110 | ||
105 | /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ | 111 | /* Returns a pointer to the cpumask of CPUs on Node 'node'. */ |
106 | static inline const cpumask_t *_node_to_cpumask_ptr(int node) | 112 | static inline const cpumask_t *cpumask_of_node(int node) |
107 | { | 113 | { |
108 | return &node_to_cpumask_map[node]; | 114 | return &node_to_cpumask_map[node]; |
109 | } | 115 | } |
@@ -116,12 +122,15 @@ static inline cpumask_t node_to_cpumask(int node) | |||
116 | 122 | ||
117 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ | 123 | #endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ |
118 | 124 | ||
119 | /* Replace default node_to_cpumask_ptr with optimized version */ | 125 | /* |
126 | * Replace default node_to_cpumask_ptr with optimized version | ||
127 | * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" | ||
128 | */ | ||
120 | #define node_to_cpumask_ptr(v, node) \ | 129 | #define node_to_cpumask_ptr(v, node) \ |
121 | const cpumask_t *v = _node_to_cpumask_ptr(node) | 130 | const cpumask_t *v = cpumask_of_node(node) |
122 | 131 | ||
123 | #define node_to_cpumask_ptr_next(v, node) \ | 132 | #define node_to_cpumask_ptr_next(v, node) \ |
124 | v = _node_to_cpumask_ptr(node) | 133 | v = cpumask_of_node(node) |
125 | 134 | ||
126 | #endif /* CONFIG_X86_64 */ | 135 | #endif /* CONFIG_X86_64 */ |
127 | 136 | ||
@@ -187,7 +196,7 @@ extern int __node_distance(int, int); | |||
187 | #define cpu_to_node(cpu) 0 | 196 | #define cpu_to_node(cpu) 0 |
188 | #define early_cpu_to_node(cpu) 0 | 197 | #define early_cpu_to_node(cpu) 0 |
189 | 198 | ||
190 | static inline const cpumask_t *_node_to_cpumask_ptr(int node) | 199 | static inline const cpumask_t *cpumask_of_node(int node) |
191 | { | 200 | { |
192 | return &cpu_online_map; | 201 | return &cpu_online_map; |
193 | } | 202 | } |
@@ -200,12 +209,15 @@ static inline int node_to_first_cpu(int node) | |||
200 | return first_cpu(cpu_online_map); | 209 | return first_cpu(cpu_online_map); |
201 | } | 210 | } |
202 | 211 | ||
203 | /* Replace default node_to_cpumask_ptr with optimized version */ | 212 | /* |
213 | * Replace default node_to_cpumask_ptr with optimized version | ||
214 | * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" | ||
215 | */ | ||
204 | #define node_to_cpumask_ptr(v, node) \ | 216 | #define node_to_cpumask_ptr(v, node) \ |
205 | const cpumask_t *v = _node_to_cpumask_ptr(node) | 217 | const cpumask_t *v = cpumask_of_node(node) |
206 | 218 | ||
207 | #define node_to_cpumask_ptr_next(v, node) \ | 219 | #define node_to_cpumask_ptr_next(v, node) \ |
208 | v = _node_to_cpumask_ptr(node) | 220 | v = cpumask_of_node(node) |
209 | #endif | 221 | #endif |
210 | 222 | ||
211 | #include <asm-generic/topology.h> | 223 | #include <asm-generic/topology.h> |
@@ -214,18 +226,20 @@ static inline int node_to_first_cpu(int node) | |||
214 | /* Returns the number of the first CPU on Node 'node'. */ | 226 | /* Returns the number of the first CPU on Node 'node'. */ |
215 | static inline int node_to_first_cpu(int node) | 227 | static inline int node_to_first_cpu(int node) |
216 | { | 228 | { |
217 | node_to_cpumask_ptr(mask, node); | 229 | return cpumask_first(cpumask_of_node(node)); |
218 | return first_cpu(*mask); | ||
219 | } | 230 | } |
220 | #endif | 231 | #endif |
221 | 232 | ||
222 | extern cpumask_t cpu_coregroup_map(int cpu); | 233 | extern cpumask_t cpu_coregroup_map(int cpu); |
234 | extern const struct cpumask *cpu_coregroup_mask(int cpu); | ||
223 | 235 | ||
224 | #ifdef ENABLE_TOPO_DEFINES | 236 | #ifdef ENABLE_TOPO_DEFINES |
225 | #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) | 237 | #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) |
226 | #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) | 238 | #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) |
227 | #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) | 239 | #define topology_core_siblings(cpu) (per_cpu(cpu_core_map, cpu)) |
228 | #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) | 240 | #define topology_thread_siblings(cpu) (per_cpu(cpu_sibling_map, cpu)) |
241 | #define topology_core_cpumask(cpu) (&per_cpu(cpu_core_map, cpu)) | ||
242 | #define topology_thread_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) | ||
229 | 243 | ||
230 | /* indicates that pointers to the topology cpumask_t maps are valid */ | 244 | /* indicates that pointers to the topology cpumask_t maps are valid */ |
231 | #define arch_provides_topology_pointers yes | 245 | #define arch_provides_topology_pointers yes |
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h index fa0d79facdbc..780ba0ab94f9 100644 --- a/arch/x86/include/asm/trampoline.h +++ b/arch/x86/include/asm/trampoline.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #ifndef __ASSEMBLY__ | 4 | #ifndef __ASSEMBLY__ |
5 | 5 | ||
6 | #ifdef CONFIG_X86_TRAMPOLINE | ||
6 | /* | 7 | /* |
7 | * Trampoline 80x86 program as an array. | 8 | * Trampoline 80x86 program as an array. |
8 | */ | 9 | */ |
@@ -13,8 +14,14 @@ extern unsigned char *trampoline_base; | |||
13 | extern unsigned long init_rsp; | 14 | extern unsigned long init_rsp; |
14 | extern unsigned long initial_code; | 15 | extern unsigned long initial_code; |
15 | 16 | ||
17 | #define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) | ||
16 | #define TRAMPOLINE_BASE 0x6000 | 18 | #define TRAMPOLINE_BASE 0x6000 |
19 | |||
17 | extern unsigned long setup_trampoline(void); | 20 | extern unsigned long setup_trampoline(void); |
21 | extern void __init reserve_trampoline_memory(void); | ||
22 | #else | ||
23 | static inline void reserve_trampoline_memory(void) {}; | ||
24 | #endif /* CONFIG_X86_TRAMPOLINE */ | ||
18 | 25 | ||
19 | #endif /* __ASSEMBLY__ */ | 26 | #endif /* __ASSEMBLY__ */ |
20 | 27 | ||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 45dee286e45c..2ee0a3bceedf 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -46,6 +46,10 @@ dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *, long); | |||
46 | dotraplinkage void do_invalid_TSS(struct pt_regs *, long); | 46 | dotraplinkage void do_invalid_TSS(struct pt_regs *, long); |
47 | dotraplinkage void do_segment_not_present(struct pt_regs *, long); | 47 | dotraplinkage void do_segment_not_present(struct pt_regs *, long); |
48 | dotraplinkage void do_stack_segment(struct pt_regs *, long); | 48 | dotraplinkage void do_stack_segment(struct pt_regs *, long); |
49 | #ifdef CONFIG_X86_64 | ||
50 | dotraplinkage void do_double_fault(struct pt_regs *, long); | ||
51 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *); | ||
52 | #endif | ||
49 | dotraplinkage void do_general_protection(struct pt_regs *, long); | 53 | dotraplinkage void do_general_protection(struct pt_regs *, long); |
50 | dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); | 54 | dotraplinkage void do_page_fault(struct pt_regs *, unsigned long); |
51 | dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); | 55 | dotraplinkage void do_spurious_interrupt_bug(struct pt_regs *, long); |
@@ -72,10 +76,13 @@ static inline int get_si_code(unsigned long condition) | |||
72 | extern int panic_on_unrecovered_nmi; | 76 | extern int panic_on_unrecovered_nmi; |
73 | extern int kstack_depth_to_print; | 77 | extern int kstack_depth_to_print; |
74 | 78 | ||
75 | #ifdef CONFIG_X86_32 | ||
76 | void math_error(void __user *); | 79 | void math_error(void __user *); |
77 | unsigned long patch_espfix_desc(unsigned long, unsigned long); | ||
78 | asmlinkage void math_emulate(long); | 80 | asmlinkage void math_emulate(long); |
81 | #ifdef CONFIG_X86_32 | ||
82 | unsigned long patch_espfix_desc(unsigned long, unsigned long); | ||
83 | #else | ||
84 | asmlinkage void smp_thermal_interrupt(void); | ||
85 | asmlinkage void mce_threshold_interrupt(void); | ||
79 | #endif | 86 | #endif |
80 | 87 | ||
81 | #endif /* _ASM_X86_TRAPS_H */ | 88 | #endif /* _ASM_X86_TRAPS_H */ |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 9cd83a8e40d5..38ae163cc91b 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -34,8 +34,6 @@ static inline cycles_t get_cycles(void) | |||
34 | 34 | ||
35 | static __always_inline cycles_t vget_cycles(void) | 35 | static __always_inline cycles_t vget_cycles(void) |
36 | { | 36 | { |
37 | cycles_t cycles; | ||
38 | |||
39 | /* | 37 | /* |
40 | * We only do VDSOs on TSC capable CPUs, so this shouldnt | 38 | * We only do VDSOs on TSC capable CPUs, so this shouldnt |
41 | * access boot_cpu_data (which is not VDSO-safe): | 39 | * access boot_cpu_data (which is not VDSO-safe): |
@@ -44,11 +42,7 @@ static __always_inline cycles_t vget_cycles(void) | |||
44 | if (!cpu_has_tsc) | 42 | if (!cpu_has_tsc) |
45 | return 0; | 43 | return 0; |
46 | #endif | 44 | #endif |
47 | rdtsc_barrier(); | 45 | return (cycles_t)__native_read_tsc(); |
48 | cycles = (cycles_t)__native_read_tsc(); | ||
49 | rdtsc_barrier(); | ||
50 | |||
51 | return cycles; | ||
52 | } | 46 | } |
53 | 47 | ||
54 | extern void tsc_init(void); | 48 | extern void tsc_init(void); |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 35c54921b2e4..4340055b7559 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -157,6 +157,7 @@ extern int __get_user_bad(void); | |||
157 | int __ret_gu; \ | 157 | int __ret_gu; \ |
158 | unsigned long __val_gu; \ | 158 | unsigned long __val_gu; \ |
159 | __chk_user_ptr(ptr); \ | 159 | __chk_user_ptr(ptr); \ |
160 | might_fault(); \ | ||
160 | switch (sizeof(*(ptr))) { \ | 161 | switch (sizeof(*(ptr))) { \ |
161 | case 1: \ | 162 | case 1: \ |
162 | __get_user_x(1, __ret_gu, __val_gu, ptr); \ | 163 | __get_user_x(1, __ret_gu, __val_gu, ptr); \ |
@@ -241,6 +242,7 @@ extern void __put_user_8(void); | |||
241 | int __ret_pu; \ | 242 | int __ret_pu; \ |
242 | __typeof__(*(ptr)) __pu_val; \ | 243 | __typeof__(*(ptr)) __pu_val; \ |
243 | __chk_user_ptr(ptr); \ | 244 | __chk_user_ptr(ptr); \ |
245 | might_fault(); \ | ||
244 | __pu_val = x; \ | 246 | __pu_val = x; \ |
245 | switch (sizeof(*(ptr))) { \ | 247 | switch (sizeof(*(ptr))) { \ |
246 | case 1: \ | 248 | case 1: \ |
@@ -350,14 +352,14 @@ do { \ | |||
350 | 352 | ||
351 | #define __put_user_nocheck(x, ptr, size) \ | 353 | #define __put_user_nocheck(x, ptr, size) \ |
352 | ({ \ | 354 | ({ \ |
353 | long __pu_err; \ | 355 | int __pu_err; \ |
354 | __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ | 356 | __put_user_size((x), (ptr), (size), __pu_err, -EFAULT); \ |
355 | __pu_err; \ | 357 | __pu_err; \ |
356 | }) | 358 | }) |
357 | 359 | ||
358 | #define __get_user_nocheck(x, ptr, size) \ | 360 | #define __get_user_nocheck(x, ptr, size) \ |
359 | ({ \ | 361 | ({ \ |
360 | long __gu_err; \ | 362 | int __gu_err; \ |
361 | unsigned long __gu_val; \ | 363 | unsigned long __gu_val; \ |
362 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ | 364 | __get_user_size(__gu_val, (ptr), (size), __gu_err, -EFAULT); \ |
363 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ | 365 | (x) = (__force __typeof__(*(ptr)))__gu_val; \ |
diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index d095a3aeea1b..5e06259e90e5 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h | |||
@@ -82,8 +82,8 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) | |||
82 | static __always_inline unsigned long __must_check | 82 | static __always_inline unsigned long __must_check |
83 | __copy_to_user(void __user *to, const void *from, unsigned long n) | 83 | __copy_to_user(void __user *to, const void *from, unsigned long n) |
84 | { | 84 | { |
85 | might_sleep(); | 85 | might_fault(); |
86 | return __copy_to_user_inatomic(to, from, n); | 86 | return __copy_to_user_inatomic(to, from, n); |
87 | } | 87 | } |
88 | 88 | ||
89 | static __always_inline unsigned long | 89 | static __always_inline unsigned long |
@@ -137,7 +137,7 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) | |||
137 | static __always_inline unsigned long | 137 | static __always_inline unsigned long |
138 | __copy_from_user(void *to, const void __user *from, unsigned long n) | 138 | __copy_from_user(void *to, const void __user *from, unsigned long n) |
139 | { | 139 | { |
140 | might_sleep(); | 140 | might_fault(); |
141 | if (__builtin_constant_p(n)) { | 141 | if (__builtin_constant_p(n)) { |
142 | unsigned long ret; | 142 | unsigned long ret; |
143 | 143 | ||
@@ -159,7 +159,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) | |||
159 | static __always_inline unsigned long __copy_from_user_nocache(void *to, | 159 | static __always_inline unsigned long __copy_from_user_nocache(void *to, |
160 | const void __user *from, unsigned long n) | 160 | const void __user *from, unsigned long n) |
161 | { | 161 | { |
162 | might_sleep(); | 162 | might_fault(); |
163 | if (__builtin_constant_p(n)) { | 163 | if (__builtin_constant_p(n)) { |
164 | unsigned long ret; | 164 | unsigned long ret; |
165 | 165 | ||
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index f8cfd00db450..84210c479fca 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -29,6 +29,8 @@ static __always_inline __must_check | |||
29 | int __copy_from_user(void *dst, const void __user *src, unsigned size) | 29 | int __copy_from_user(void *dst, const void __user *src, unsigned size) |
30 | { | 30 | { |
31 | int ret = 0; | 31 | int ret = 0; |
32 | |||
33 | might_fault(); | ||
32 | if (!__builtin_constant_p(size)) | 34 | if (!__builtin_constant_p(size)) |
33 | return copy_user_generic(dst, (__force void *)src, size); | 35 | return copy_user_generic(dst, (__force void *)src, size); |
34 | switch (size) { | 36 | switch (size) { |
@@ -71,6 +73,8 @@ static __always_inline __must_check | |||
71 | int __copy_to_user(void __user *dst, const void *src, unsigned size) | 73 | int __copy_to_user(void __user *dst, const void *src, unsigned size) |
72 | { | 74 | { |
73 | int ret = 0; | 75 | int ret = 0; |
76 | |||
77 | might_fault(); | ||
74 | if (!__builtin_constant_p(size)) | 78 | if (!__builtin_constant_p(size)) |
75 | return copy_user_generic((__force void *)dst, src, size); | 79 | return copy_user_generic((__force void *)dst, src, size); |
76 | switch (size) { | 80 | switch (size) { |
@@ -113,6 +117,8 @@ static __always_inline __must_check | |||
113 | int __copy_in_user(void __user *dst, const void __user *src, unsigned size) | 117 | int __copy_in_user(void __user *dst, const void __user *src, unsigned size) |
114 | { | 118 | { |
115 | int ret = 0; | 119 | int ret = 0; |
120 | |||
121 | might_fault(); | ||
116 | if (!__builtin_constant_p(size)) | 122 | if (!__builtin_constant_p(size)) |
117 | return copy_user_generic((__force void *)dst, | 123 | return copy_user_generic((__force void *)dst, |
118 | (__force void *)src, size); | 124 | (__force void *)src, size); |
diff --git a/arch/x86/include/asm/unwind.h b/arch/x86/include/asm/unwind.h deleted file mode 100644 index 8b064bd9c553..000000000000 --- a/arch/x86/include/asm/unwind.h +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | #ifndef _ASM_X86_UNWIND_H | ||
2 | #define _ASM_X86_UNWIND_H | ||
3 | |||
4 | #define UNW_PC(frame) ((void)(frame), 0UL) | ||
5 | #define UNW_SP(frame) ((void)(frame), 0UL) | ||
6 | #define UNW_FP(frame) ((void)(frame), 0UL) | ||
7 | |||
8 | static inline int arch_unw_user_mode(const void *info) | ||
9 | { | ||
10 | return 0; | ||
11 | } | ||
12 | |||
13 | #endif /* _ASM_X86_UNWIND_H */ | ||
diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index d931d3b7e6f7..7ed17ff502b9 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h | |||
@@ -32,13 +32,18 @@ | |||
32 | enum uv_bios_cmd { | 32 | enum uv_bios_cmd { |
33 | UV_BIOS_COMMON, | 33 | UV_BIOS_COMMON, |
34 | UV_BIOS_GET_SN_INFO, | 34 | UV_BIOS_GET_SN_INFO, |
35 | UV_BIOS_FREQ_BASE | 35 | UV_BIOS_FREQ_BASE, |
36 | UV_BIOS_WATCHLIST_ALLOC, | ||
37 | UV_BIOS_WATCHLIST_FREE, | ||
38 | UV_BIOS_MEMPROTECT, | ||
39 | UV_BIOS_GET_PARTITION_ADDR | ||
36 | }; | 40 | }; |
37 | 41 | ||
38 | /* | 42 | /* |
39 | * Status values returned from a BIOS call. | 43 | * Status values returned from a BIOS call. |
40 | */ | 44 | */ |
41 | enum { | 45 | enum { |
46 | BIOS_STATUS_MORE_PASSES = 1, | ||
42 | BIOS_STATUS_SUCCESS = 0, | 47 | BIOS_STATUS_SUCCESS = 0, |
43 | BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, | 48 | BIOS_STATUS_UNIMPLEMENTED = -ENOSYS, |
44 | BIOS_STATUS_EINVAL = -EINVAL, | 49 | BIOS_STATUS_EINVAL = -EINVAL, |
@@ -71,6 +76,21 @@ union partition_info_u { | |||
71 | }; | 76 | }; |
72 | }; | 77 | }; |
73 | 78 | ||
79 | union uv_watchlist_u { | ||
80 | u64 val; | ||
81 | struct { | ||
82 | u64 blade : 16, | ||
83 | size : 32, | ||
84 | filler : 16; | ||
85 | }; | ||
86 | }; | ||
87 | |||
88 | enum uv_memprotect { | ||
89 | UV_MEMPROT_RESTRICT_ACCESS, | ||
90 | UV_MEMPROT_ALLOW_AMO, | ||
91 | UV_MEMPROT_ALLOW_RW | ||
92 | }; | ||
93 | |||
74 | /* | 94 | /* |
75 | * bios calls have 6 parameters | 95 | * bios calls have 6 parameters |
76 | */ | 96 | */ |
@@ -80,14 +100,20 @@ extern s64 uv_bios_call_reentrant(enum uv_bios_cmd, u64, u64, u64, u64, u64); | |||
80 | 100 | ||
81 | extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); | 101 | extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *); |
82 | extern s64 uv_bios_freq_base(u64, u64 *); | 102 | extern s64 uv_bios_freq_base(u64, u64 *); |
103 | extern int uv_bios_mq_watchlist_alloc(int, unsigned long, unsigned int, | ||
104 | unsigned long *); | ||
105 | extern int uv_bios_mq_watchlist_free(int, int); | ||
106 | extern s64 uv_bios_change_memprotect(u64, u64, enum uv_memprotect); | ||
107 | extern s64 uv_bios_reserved_page_pa(u64, u64 *, u64 *, u64 *); | ||
83 | 108 | ||
84 | extern void uv_bios_init(void); | 109 | extern void uv_bios_init(void); |
85 | 110 | ||
111 | extern unsigned long sn_rtc_cycles_per_second; | ||
86 | extern int uv_type; | 112 | extern int uv_type; |
87 | extern long sn_partition_id; | 113 | extern long sn_partition_id; |
88 | extern long uv_coherency_id; | 114 | extern long sn_coherency_id; |
89 | extern long uv_region_size; | 115 | extern long sn_region_size; |
90 | #define partition_coherence_id() (uv_coherency_id) | 116 | #define partition_coherence_id() (sn_coherency_id) |
91 | 117 | ||
92 | extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ | 118 | extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ |
93 | 119 | ||
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index e2363253bbbf..50423c7b56b2 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -133,61 +133,61 @@ struct bau_msg_payload { | |||
133 | * see table 4.2.3.0.1 in broacast_assist spec. | 133 | * see table 4.2.3.0.1 in broacast_assist spec. |
134 | */ | 134 | */ |
135 | struct bau_msg_header { | 135 | struct bau_msg_header { |
136 | int dest_subnodeid:6; /* must be zero */ | 136 | unsigned int dest_subnodeid:6; /* must be zero */ |
137 | /* bits 5:0 */ | 137 | /* bits 5:0 */ |
138 | int base_dest_nodeid:15; /* nasid>>1 (pnode) of first bit in node_map */ | 138 | unsigned int base_dest_nodeid:15; /* nasid>>1 (pnode) of */ |
139 | /* bits 20:6 */ | 139 | /* bits 20:6 */ /* first bit in node_map */ |
140 | int command:8; /* message type */ | 140 | unsigned int command:8; /* message type */ |
141 | /* bits 28:21 */ | 141 | /* bits 28:21 */ |
142 | /* 0x38: SN3net EndPoint Message */ | 142 | /* 0x38: SN3net EndPoint Message */ |
143 | int rsvd_1:3; /* must be zero */ | 143 | unsigned int rsvd_1:3; /* must be zero */ |
144 | /* bits 31:29 */ | 144 | /* bits 31:29 */ |
145 | /* int will align on 32 bits */ | 145 | /* int will align on 32 bits */ |
146 | int rsvd_2:9; /* must be zero */ | 146 | unsigned int rsvd_2:9; /* must be zero */ |
147 | /* bits 40:32 */ | 147 | /* bits 40:32 */ |
148 | /* Suppl_A is 56-41 */ | 148 | /* Suppl_A is 56-41 */ |
149 | int payload_2a:8; /* becomes byte 16 of msg */ | 149 | unsigned int payload_2a:8;/* becomes byte 16 of msg */ |
150 | /* bits 48:41 */ /* not currently using */ | 150 | /* bits 48:41 */ /* not currently using */ |
151 | int payload_2b:8; /* becomes byte 17 of msg */ | 151 | unsigned int payload_2b:8;/* becomes byte 17 of msg */ |
152 | /* bits 56:49 */ /* not currently using */ | 152 | /* bits 56:49 */ /* not currently using */ |
153 | /* Address field (96:57) is never used as an | 153 | /* Address field (96:57) is never used as an |
154 | address (these are address bits 42:3) */ | 154 | address (these are address bits 42:3) */ |
155 | int rsvd_3:1; /* must be zero */ | 155 | unsigned int rsvd_3:1; /* must be zero */ |
156 | /* bit 57 */ | 156 | /* bit 57 */ |
157 | /* address bits 27:4 are payload */ | 157 | /* address bits 27:4 are payload */ |
158 | /* these 24 bits become bytes 12-14 of msg */ | 158 | /* these 24 bits become bytes 12-14 of msg */ |
159 | int replied_to:1; /* sent as 0 by the source to byte 12 */ | 159 | unsigned int replied_to:1;/* sent as 0 by the source to byte 12 */ |
160 | /* bit 58 */ | 160 | /* bit 58 */ |
161 | 161 | ||
162 | int payload_1a:5; /* not currently used */ | 162 | unsigned int payload_1a:5;/* not currently used */ |
163 | /* bits 63:59 */ | 163 | /* bits 63:59 */ |
164 | int payload_1b:8; /* not currently used */ | 164 | unsigned int payload_1b:8;/* not currently used */ |
165 | /* bits 71:64 */ | 165 | /* bits 71:64 */ |
166 | int payload_1c:8; /* not currently used */ | 166 | unsigned int payload_1c:8;/* not currently used */ |
167 | /* bits 79:72 */ | 167 | /* bits 79:72 */ |
168 | int payload_1d:2; /* not currently used */ | 168 | unsigned int payload_1d:2;/* not currently used */ |
169 | /* bits 81:80 */ | 169 | /* bits 81:80 */ |
170 | 170 | ||
171 | int rsvd_4:7; /* must be zero */ | 171 | unsigned int rsvd_4:7; /* must be zero */ |
172 | /* bits 88:82 */ | 172 | /* bits 88:82 */ |
173 | int sw_ack_flag:1; /* software acknowledge flag */ | 173 | unsigned int sw_ack_flag:1;/* software acknowledge flag */ |
174 | /* bit 89 */ | 174 | /* bit 89 */ |
175 | /* INTD trasactions at destination are to | 175 | /* INTD trasactions at destination are to |
176 | wait for software acknowledge */ | 176 | wait for software acknowledge */ |
177 | int rsvd_5:6; /* must be zero */ | 177 | unsigned int rsvd_5:6; /* must be zero */ |
178 | /* bits 95:90 */ | 178 | /* bits 95:90 */ |
179 | int rsvd_6:5; /* must be zero */ | 179 | unsigned int rsvd_6:5; /* must be zero */ |
180 | /* bits 100:96 */ | 180 | /* bits 100:96 */ |
181 | int int_both:1; /* if 1, interrupt both sockets on the blade */ | 181 | unsigned int int_both:1;/* if 1, interrupt both sockets on the blade */ |
182 | /* bit 101*/ | 182 | /* bit 101*/ |
183 | int fairness:3; /* usually zero */ | 183 | unsigned int fairness:3;/* usually zero */ |
184 | /* bits 104:102 */ | 184 | /* bits 104:102 */ |
185 | int multilevel:1; /* multi-level multicast format */ | 185 | unsigned int multilevel:1; /* multi-level multicast format */ |
186 | /* bit 105 */ | 186 | /* bit 105 */ |
187 | /* 0 for TLB: endpoint multi-unicast messages */ | 187 | /* 0 for TLB: endpoint multi-unicast messages */ |
188 | int chaining:1; /* next descriptor is part of this activation*/ | 188 | unsigned int chaining:1;/* next descriptor is part of this activation*/ |
189 | /* bit 106 */ | 189 | /* bit 106 */ |
190 | int rsvd_7:21; /* must be zero */ | 190 | unsigned int rsvd_7:21; /* must be zero */ |
191 | /* bits 127:107 */ | 191 | /* bits 127:107 */ |
192 | }; | 192 | }; |
193 | 193 | ||
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 7a5782610b2b..777327ef05c1 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -113,25 +113,37 @@ | |||
113 | */ | 113 | */ |
114 | #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) | 114 | #define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) |
115 | 115 | ||
116 | struct uv_scir_s { | ||
117 | struct timer_list timer; | ||
118 | unsigned long offset; | ||
119 | unsigned long last; | ||
120 | unsigned long idle_on; | ||
121 | unsigned long idle_off; | ||
122 | unsigned char state; | ||
123 | unsigned char enabled; | ||
124 | }; | ||
125 | |||
116 | /* | 126 | /* |
117 | * The following defines attributes of the HUB chip. These attributes are | 127 | * The following defines attributes of the HUB chip. These attributes are |
118 | * frequently referenced and are kept in the per-cpu data areas of each cpu. | 128 | * frequently referenced and are kept in the per-cpu data areas of each cpu. |
119 | * They are kept together in a struct to minimize cache misses. | 129 | * They are kept together in a struct to minimize cache misses. |
120 | */ | 130 | */ |
121 | struct uv_hub_info_s { | 131 | struct uv_hub_info_s { |
122 | unsigned long global_mmr_base; | 132 | unsigned long global_mmr_base; |
123 | unsigned long gpa_mask; | 133 | unsigned long gpa_mask; |
124 | unsigned long gnode_upper; | 134 | unsigned long gnode_upper; |
125 | unsigned long lowmem_remap_top; | 135 | unsigned long lowmem_remap_top; |
126 | unsigned long lowmem_remap_base; | 136 | unsigned long lowmem_remap_base; |
127 | unsigned short pnode; | 137 | unsigned short pnode; |
128 | unsigned short pnode_mask; | 138 | unsigned short pnode_mask; |
129 | unsigned short coherency_domain_number; | 139 | unsigned short coherency_domain_number; |
130 | unsigned short numa_blade_id; | 140 | unsigned short numa_blade_id; |
131 | unsigned char blade_processor_id; | 141 | unsigned char blade_processor_id; |
132 | unsigned char m_val; | 142 | unsigned char m_val; |
133 | unsigned char n_val; | 143 | unsigned char n_val; |
144 | struct uv_scir_s scir; | ||
134 | }; | 145 | }; |
146 | |||
135 | DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | 147 | DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); |
136 | #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) | 148 | #define uv_hub_info (&__get_cpu_var(__uv_hub_info)) |
137 | #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) | 149 | #define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) |
@@ -163,6 +175,30 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
163 | 175 | ||
164 | #define UV_APIC_PNODE_SHIFT 6 | 176 | #define UV_APIC_PNODE_SHIFT 6 |
165 | 177 | ||
178 | /* Local Bus from cpu's perspective */ | ||
179 | #define LOCAL_BUS_BASE 0x1c00000 | ||
180 | #define LOCAL_BUS_SIZE (4 * 1024 * 1024) | ||
181 | |||
182 | /* | ||
183 | * System Controller Interface Reg | ||
184 | * | ||
185 | * Note there are NO leds on a UV system. This register is only | ||
186 | * used by the system controller to monitor system-wide operation. | ||
187 | * There are 64 regs per node. With Nahelem cpus (2 cores per node, | ||
188 | * 8 cpus per core, 2 threads per cpu) there are 32 cpu threads on | ||
189 | * a node. | ||
190 | * | ||
191 | * The window is located at top of ACPI MMR space | ||
192 | */ | ||
193 | #define SCIR_WINDOW_COUNT 64 | ||
194 | #define SCIR_LOCAL_MMR_BASE (LOCAL_BUS_BASE + \ | ||
195 | LOCAL_BUS_SIZE - \ | ||
196 | SCIR_WINDOW_COUNT) | ||
197 | |||
198 | #define SCIR_CPU_HEARTBEAT 0x01 /* timer interrupt */ | ||
199 | #define SCIR_CPU_ACTIVITY 0x02 /* not idle */ | ||
200 | #define SCIR_CPU_HB_INTERVAL (HZ) /* once per second */ | ||
201 | |||
166 | /* | 202 | /* |
167 | * Macros for converting between kernel virtual addresses, socket local physical | 203 | * Macros for converting between kernel virtual addresses, socket local physical |
168 | * addresses, and UV global physical addresses. | 204 | * addresses, and UV global physical addresses. |
@@ -174,7 +210,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
174 | static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) | 210 | static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) |
175 | { | 211 | { |
176 | if (paddr < uv_hub_info->lowmem_remap_top) | 212 | if (paddr < uv_hub_info->lowmem_remap_top) |
177 | paddr += uv_hub_info->lowmem_remap_base; | 213 | paddr |= uv_hub_info->lowmem_remap_base; |
178 | return paddr | uv_hub_info->gnode_upper; | 214 | return paddr | uv_hub_info->gnode_upper; |
179 | } | 215 | } |
180 | 216 | ||
@@ -182,19 +218,7 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) | |||
182 | /* socket virtual --> UV global physical address */ | 218 | /* socket virtual --> UV global physical address */ |
183 | static inline unsigned long uv_gpa(void *v) | 219 | static inline unsigned long uv_gpa(void *v) |
184 | { | 220 | { |
185 | return __pa(v) | uv_hub_info->gnode_upper; | 221 | return uv_soc_phys_ram_to_gpa(__pa(v)); |
186 | } | ||
187 | |||
188 | /* socket virtual --> UV global physical address */ | ||
189 | static inline void *uv_vgpa(void *v) | ||
190 | { | ||
191 | return (void *)uv_gpa(v); | ||
192 | } | ||
193 | |||
194 | /* UV global physical address --> socket virtual */ | ||
195 | static inline void *uv_va(unsigned long gpa) | ||
196 | { | ||
197 | return __va(gpa & uv_hub_info->gpa_mask); | ||
198 | } | 222 | } |
199 | 223 | ||
200 | /* pnode, offset --> socket virtual */ | 224 | /* pnode, offset --> socket virtual */ |
@@ -277,6 +301,16 @@ static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) | |||
277 | *uv_local_mmr_address(offset) = val; | 301 | *uv_local_mmr_address(offset) = val; |
278 | } | 302 | } |
279 | 303 | ||
304 | static inline unsigned char uv_read_local_mmr8(unsigned long offset) | ||
305 | { | ||
306 | return *((unsigned char *)uv_local_mmr_address(offset)); | ||
307 | } | ||
308 | |||
309 | static inline void uv_write_local_mmr8(unsigned long offset, unsigned char val) | ||
310 | { | ||
311 | *((unsigned char *)uv_local_mmr_address(offset)) = val; | ||
312 | } | ||
313 | |||
280 | /* | 314 | /* |
281 | * Structures and definitions for converting between cpu, node, pnode, and blade | 315 | * Structures and definitions for converting between cpu, node, pnode, and blade |
282 | * numbers. | 316 | * numbers. |
@@ -351,5 +385,20 @@ static inline int uv_num_possible_blades(void) | |||
351 | return uv_possible_blades; | 385 | return uv_possible_blades; |
352 | } | 386 | } |
353 | 387 | ||
354 | #endif /* _ASM_X86_UV_UV_HUB_H */ | 388 | /* Update SCIR state */ |
389 | static inline void uv_set_scir_bits(unsigned char value) | ||
390 | { | ||
391 | if (uv_hub_info->scir.state != value) { | ||
392 | uv_hub_info->scir.state = value; | ||
393 | uv_write_local_mmr8(uv_hub_info->scir.offset, value); | ||
394 | } | ||
395 | } | ||
396 | static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) | ||
397 | { | ||
398 | if (uv_cpu_hub_info(cpu)->scir.state != value) { | ||
399 | uv_cpu_hub_info(cpu)->scir.state = value; | ||
400 | uv_write_local_mmr8(uv_cpu_hub_info(cpu)->scir.offset, value); | ||
401 | } | ||
402 | } | ||
355 | 403 | ||
404 | #endif /* _ASM_X86_UV_UV_HUB_H */ | ||
diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h new file mode 100644 index 000000000000..593636275238 --- /dev/null +++ b/arch/x86/include/asm/virtext.h | |||
@@ -0,0 +1,132 @@ | |||
1 | /* CPU virtualization extensions handling | ||
2 | * | ||
3 | * This should carry the code for handling CPU virtualization extensions | ||
4 | * that needs to live in the kernel core. | ||
5 | * | ||
6 | * Author: Eduardo Habkost <ehabkost@redhat.com> | ||
7 | * | ||
8 | * Copyright (C) 2008, Red Hat Inc. | ||
9 | * | ||
10 | * Contains code from KVM, Copyright (C) 2006 Qumranet, Inc. | ||
11 | * | ||
12 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
13 | * the COPYING file in the top-level directory. | ||
14 | */ | ||
15 | #ifndef _ASM_X86_VIRTEX_H | ||
16 | #define _ASM_X86_VIRTEX_H | ||
17 | |||
18 | #include <asm/processor.h> | ||
19 | #include <asm/system.h> | ||
20 | |||
21 | #include <asm/vmx.h> | ||
22 | #include <asm/svm.h> | ||
23 | |||
24 | /* | ||
25 | * VMX functions: | ||
26 | */ | ||
27 | |||
28 | static inline int cpu_has_vmx(void) | ||
29 | { | ||
30 | unsigned long ecx = cpuid_ecx(1); | ||
31 | return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ | ||
32 | } | ||
33 | |||
34 | |||
35 | /** Disable VMX on the current CPU | ||
36 | * | ||
37 | * vmxoff causes a undefined-opcode exception if vmxon was not run | ||
38 | * on the CPU previously. Only call this function if you know VMX | ||
39 | * is enabled. | ||
40 | */ | ||
41 | static inline void cpu_vmxoff(void) | ||
42 | { | ||
43 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); | ||
44 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | ||
45 | } | ||
46 | |||
47 | static inline int cpu_vmx_enabled(void) | ||
48 | { | ||
49 | return read_cr4() & X86_CR4_VMXE; | ||
50 | } | ||
51 | |||
52 | /** Disable VMX if it is enabled on the current CPU | ||
53 | * | ||
54 | * You shouldn't call this if cpu_has_vmx() returns 0. | ||
55 | */ | ||
56 | static inline void __cpu_emergency_vmxoff(void) | ||
57 | { | ||
58 | if (cpu_vmx_enabled()) | ||
59 | cpu_vmxoff(); | ||
60 | } | ||
61 | |||
62 | /** Disable VMX if it is supported and enabled on the current CPU | ||
63 | */ | ||
64 | static inline void cpu_emergency_vmxoff(void) | ||
65 | { | ||
66 | if (cpu_has_vmx()) | ||
67 | __cpu_emergency_vmxoff(); | ||
68 | } | ||
69 | |||
70 | |||
71 | |||
72 | |||
73 | /* | ||
74 | * SVM functions: | ||
75 | */ | ||
76 | |||
77 | /** Check if the CPU has SVM support | ||
78 | * | ||
79 | * You can use the 'msg' arg to get a message describing the problem, | ||
80 | * if the function returns zero. Simply pass NULL if you are not interested | ||
81 | * on the messages; gcc should take care of not generating code for | ||
82 | * the messages on this case. | ||
83 | */ | ||
84 | static inline int cpu_has_svm(const char **msg) | ||
85 | { | ||
86 | uint32_t eax, ebx, ecx, edx; | ||
87 | |||
88 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { | ||
89 | if (msg) | ||
90 | *msg = "not amd"; | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | ||
95 | if (eax < SVM_CPUID_FUNC) { | ||
96 | if (msg) | ||
97 | *msg = "can't execute cpuid_8000000a"; | ||
98 | return 0; | ||
99 | } | ||
100 | |||
101 | cpuid(0x80000001, &eax, &ebx, &ecx, &edx); | ||
102 | if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { | ||
103 | if (msg) | ||
104 | *msg = "svm not available"; | ||
105 | return 0; | ||
106 | } | ||
107 | return 1; | ||
108 | } | ||
109 | |||
110 | |||
111 | /** Disable SVM on the current CPU | ||
112 | * | ||
113 | * You should call this only if cpu_has_svm() returned true. | ||
114 | */ | ||
115 | static inline void cpu_svm_disable(void) | ||
116 | { | ||
117 | uint64_t efer; | ||
118 | |||
119 | wrmsrl(MSR_VM_HSAVE_PA, 0); | ||
120 | rdmsrl(MSR_EFER, efer); | ||
121 | wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); | ||
122 | } | ||
123 | |||
124 | /** Makes sure SVM is disabled, if it is supported on the CPU | ||
125 | */ | ||
126 | static inline void cpu_emergency_svm_disable(void) | ||
127 | { | ||
128 | if (cpu_has_svm(NULL)) | ||
129 | cpu_svm_disable(); | ||
130 | } | ||
131 | |||
132 | #endif /* _ASM_X86_VIRTEX_H */ | ||
diff --git a/arch/x86/include/asm/vmware.h b/arch/x86/include/asm/vmware.h new file mode 100644 index 000000000000..c11b7e100d83 --- /dev/null +++ b/arch/x86/include/asm/vmware.h | |||
@@ -0,0 +1,27 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008, VMware, Inc. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or modify | ||
5 | * it under the terms of the GNU General Public License as published by | ||
6 | * the Free Software Foundation; either version 2 of the License, or | ||
7 | * (at your option) any later version. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, but | ||
10 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
12 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
13 | * details. | ||
14 | * | ||
15 | * You should have received a copy of the GNU General Public License | ||
16 | * along with this program; if not, write to the Free Software | ||
17 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
18 | * | ||
19 | */ | ||
20 | #ifndef ASM_X86__VMWARE_H | ||
21 | #define ASM_X86__VMWARE_H | ||
22 | |||
23 | extern unsigned long vmware_get_tsc_khz(void); | ||
24 | extern int vmware_platform(void); | ||
25 | extern void vmware_set_feature_bits(struct cpuinfo_x86 *c); | ||
26 | |||
27 | #endif | ||
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/include/asm/vmx.h index ec5edc339da6..d0238e6151d8 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -63,10 +63,13 @@ | |||
63 | 63 | ||
64 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 | 64 | #define VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 |
65 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 | 65 | #define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 |
66 | #define VM_EXIT_SAVE_IA32_PAT 0x00040000 | ||
67 | #define VM_EXIT_LOAD_IA32_PAT 0x00080000 | ||
66 | 68 | ||
67 | #define VM_ENTRY_IA32E_MODE 0x00000200 | 69 | #define VM_ENTRY_IA32E_MODE 0x00000200 |
68 | #define VM_ENTRY_SMM 0x00000400 | 70 | #define VM_ENTRY_SMM 0x00000400 |
69 | #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 | 71 | #define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 |
72 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 | ||
70 | 73 | ||
71 | /* VMCS Encodings */ | 74 | /* VMCS Encodings */ |
72 | enum vmcs_field { | 75 | enum vmcs_field { |
@@ -112,6 +115,8 @@ enum vmcs_field { | |||
112 | VMCS_LINK_POINTER_HIGH = 0x00002801, | 115 | VMCS_LINK_POINTER_HIGH = 0x00002801, |
113 | GUEST_IA32_DEBUGCTL = 0x00002802, | 116 | GUEST_IA32_DEBUGCTL = 0x00002802, |
114 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, | 117 | GUEST_IA32_DEBUGCTL_HIGH = 0x00002803, |
118 | GUEST_IA32_PAT = 0x00002804, | ||
119 | GUEST_IA32_PAT_HIGH = 0x00002805, | ||
115 | GUEST_PDPTR0 = 0x0000280a, | 120 | GUEST_PDPTR0 = 0x0000280a, |
116 | GUEST_PDPTR0_HIGH = 0x0000280b, | 121 | GUEST_PDPTR0_HIGH = 0x0000280b, |
117 | GUEST_PDPTR1 = 0x0000280c, | 122 | GUEST_PDPTR1 = 0x0000280c, |
@@ -120,6 +125,8 @@ enum vmcs_field { | |||
120 | GUEST_PDPTR2_HIGH = 0x0000280f, | 125 | GUEST_PDPTR2_HIGH = 0x0000280f, |
121 | GUEST_PDPTR3 = 0x00002810, | 126 | GUEST_PDPTR3 = 0x00002810, |
122 | GUEST_PDPTR3_HIGH = 0x00002811, | 127 | GUEST_PDPTR3_HIGH = 0x00002811, |
128 | HOST_IA32_PAT = 0x00002c00, | ||
129 | HOST_IA32_PAT_HIGH = 0x00002c01, | ||
123 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, | 130 | PIN_BASED_VM_EXEC_CONTROL = 0x00004000, |
124 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, | 131 | CPU_BASED_VM_EXEC_CONTROL = 0x00004002, |
125 | EXCEPTION_BITMAP = 0x00004004, | 132 | EXCEPTION_BITMAP = 0x00004004, |
@@ -331,8 +338,9 @@ enum vmcs_field { | |||
331 | 338 | ||
332 | #define AR_RESERVD_MASK 0xfffe0f00 | 339 | #define AR_RESERVD_MASK 0xfffe0f00 |
333 | 340 | ||
334 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT 9 | 341 | #define TSS_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 0) |
335 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10 | 342 | #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 1) |
343 | #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT (KVM_MEMORY_SLOTS + 2) | ||
336 | 344 | ||
337 | #define VMX_NR_VPIDS (1 << 16) | 345 | #define VMX_NR_VPIDS (1 << 16) |
338 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 | 346 | #define VMX_VPID_EXTENT_SINGLE_CONTEXT 1 |
@@ -356,4 +364,19 @@ enum vmcs_field { | |||
356 | 364 | ||
357 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | 365 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul |
358 | 366 | ||
367 | |||
368 | #define ASM_VMX_VMCLEAR_RAX ".byte 0x66, 0x0f, 0xc7, 0x30" | ||
369 | #define ASM_VMX_VMLAUNCH ".byte 0x0f, 0x01, 0xc2" | ||
370 | #define ASM_VMX_VMRESUME ".byte 0x0f, 0x01, 0xc3" | ||
371 | #define ASM_VMX_VMPTRLD_RAX ".byte 0x0f, 0xc7, 0x30" | ||
372 | #define ASM_VMX_VMREAD_RDX_RAX ".byte 0x0f, 0x78, 0xd0" | ||
373 | #define ASM_VMX_VMWRITE_RAX_RDX ".byte 0x0f, 0x79, 0xd0" | ||
374 | #define ASM_VMX_VMWRITE_RSP_RDX ".byte 0x0f, 0x79, 0xd4" | ||
375 | #define ASM_VMX_VMXOFF ".byte 0x0f, 0x01, 0xc4" | ||
376 | #define ASM_VMX_VMXON_RAX ".byte 0xf3, 0x0f, 0xc7, 0x30" | ||
377 | #define ASM_VMX_INVEPT ".byte 0x66, 0x0f, 0x38, 0x80, 0x08" | ||
378 | #define ASM_VMX_INVVPID ".byte 0x66, 0x0f, 0x38, 0x81, 0x08" | ||
379 | |||
380 | |||
381 | |||
359 | #endif | 382 | #endif |
diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index 3f6000d95fe2..5e79ca694326 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h | |||
@@ -33,8 +33,14 @@ | |||
33 | #ifndef _ASM_X86_XEN_HYPERCALL_H | 33 | #ifndef _ASM_X86_XEN_HYPERCALL_H |
34 | #define _ASM_X86_XEN_HYPERCALL_H | 34 | #define _ASM_X86_XEN_HYPERCALL_H |
35 | 35 | ||
36 | #include <linux/kernel.h> | ||
37 | #include <linux/spinlock.h> | ||
36 | #include <linux/errno.h> | 38 | #include <linux/errno.h> |
37 | #include <linux/string.h> | 39 | #include <linux/string.h> |
40 | #include <linux/types.h> | ||
41 | |||
42 | #include <asm/page.h> | ||
43 | #include <asm/pgtable.h> | ||
38 | 44 | ||
39 | #include <xen/interface/xen.h> | 45 | #include <xen/interface/xen.h> |
40 | #include <xen/interface/sched.h> | 46 | #include <xen/interface/sched.h> |
diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index a38d25ac87d2..81fbd735aec4 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h | |||
@@ -33,39 +33,10 @@ | |||
33 | #ifndef _ASM_X86_XEN_HYPERVISOR_H | 33 | #ifndef _ASM_X86_XEN_HYPERVISOR_H |
34 | #define _ASM_X86_XEN_HYPERVISOR_H | 34 | #define _ASM_X86_XEN_HYPERVISOR_H |
35 | 35 | ||
36 | #include <linux/types.h> | ||
37 | #include <linux/kernel.h> | ||
38 | |||
39 | #include <xen/interface/xen.h> | ||
40 | #include <xen/interface/version.h> | ||
41 | |||
42 | #include <asm/ptrace.h> | ||
43 | #include <asm/page.h> | ||
44 | #include <asm/desc.h> | ||
45 | #if defined(__i386__) | ||
46 | # ifdef CONFIG_X86_PAE | ||
47 | # include <asm-generic/pgtable-nopud.h> | ||
48 | # else | ||
49 | # include <asm-generic/pgtable-nopmd.h> | ||
50 | # endif | ||
51 | #endif | ||
52 | #include <asm/xen/hypercall.h> | ||
53 | |||
54 | /* arch/i386/kernel/setup.c */ | 36 | /* arch/i386/kernel/setup.c */ |
55 | extern struct shared_info *HYPERVISOR_shared_info; | 37 | extern struct shared_info *HYPERVISOR_shared_info; |
56 | extern struct start_info *xen_start_info; | 38 | extern struct start_info *xen_start_info; |
57 | 39 | ||
58 | /* arch/i386/mach-xen/evtchn.c */ | ||
59 | /* Force a proper event-channel callback from Xen. */ | ||
60 | extern void force_evtchn_callback(void); | ||
61 | |||
62 | /* Turn jiffies into Xen system time. */ | ||
63 | u64 jiffies_to_st(unsigned long jiffies); | ||
64 | |||
65 | |||
66 | #define MULTI_UVMFLAGS_INDEX 3 | ||
67 | #define MULTI_UVMDOMID_INDEX 4 | ||
68 | |||
69 | enum xen_domain_type { | 40 | enum xen_domain_type { |
70 | XEN_NATIVE, | 41 | XEN_NATIVE, |
71 | XEN_PV_DOMAIN, | 42 | XEN_PV_DOMAIN, |
@@ -74,9 +45,15 @@ enum xen_domain_type { | |||
74 | 45 | ||
75 | extern enum xen_domain_type xen_domain_type; | 46 | extern enum xen_domain_type xen_domain_type; |
76 | 47 | ||
48 | #ifdef CONFIG_XEN | ||
77 | #define xen_domain() (xen_domain_type != XEN_NATIVE) | 49 | #define xen_domain() (xen_domain_type != XEN_NATIVE) |
78 | #define xen_pv_domain() (xen_domain_type == XEN_PV_DOMAIN) | 50 | #else |
51 | #define xen_domain() (0) | ||
52 | #endif | ||
53 | |||
54 | #define xen_pv_domain() (xen_domain() && xen_domain_type == XEN_PV_DOMAIN) | ||
55 | #define xen_hvm_domain() (xen_domain() && xen_domain_type == XEN_HVM_DOMAIN) | ||
56 | |||
79 | #define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) | 57 | #define xen_initial_domain() (xen_pv_domain() && xen_start_info->flags & SIF_INITDOMAIN) |
80 | #define xen_hvm_domain() (xen_domain_type == XEN_HVM_DOMAIN) | ||
81 | 58 | ||
82 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ | 59 | #endif /* _ASM_X86_XEN_HYPERVISOR_H */ |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index bc628998a1b9..7ef617ef1df3 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -1,11 +1,16 @@ | |||
1 | #ifndef _ASM_X86_XEN_PAGE_H | 1 | #ifndef _ASM_X86_XEN_PAGE_H |
2 | #define _ASM_X86_XEN_PAGE_H | 2 | #define _ASM_X86_XEN_PAGE_H |
3 | 3 | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/spinlock.h> | ||
4 | #include <linux/pfn.h> | 7 | #include <linux/pfn.h> |
5 | 8 | ||
6 | #include <asm/uaccess.h> | 9 | #include <asm/uaccess.h> |
10 | #include <asm/page.h> | ||
7 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
8 | 12 | ||
13 | #include <xen/interface/xen.h> | ||
9 | #include <xen/features.h> | 14 | #include <xen/features.h> |
10 | 15 | ||
11 | /* Xen machine address */ | 16 | /* Xen machine address */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b62a7667828e..d364df03c1d6 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -12,6 +12,7 @@ CFLAGS_REMOVE_tsc.o = -pg | |||
12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg | 13 | CFLAGS_REMOVE_paravirt-spinlocks.o = -pg |
14 | CFLAGS_REMOVE_ftrace.o = -pg | 14 | CFLAGS_REMOVE_ftrace.o = -pg |
15 | CFLAGS_REMOVE_early_printk.o = -pg | ||
15 | endif | 16 | endif |
16 | 17 | ||
17 | # | 18 | # |
@@ -23,9 +24,9 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | |||
23 | CFLAGS_hpet.o := $(nostackp) | 24 | CFLAGS_hpet.o := $(nostackp) |
24 | CFLAGS_tsc.o := $(nostackp) | 25 | CFLAGS_tsc.o := $(nostackp) |
25 | 26 | ||
26 | obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o | 27 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
27 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 28 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
28 | obj-y += time_$(BITS).o ioport.o ldt.o | 29 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o |
29 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o | 30 | obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o |
30 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 31 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
31 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 32 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
@@ -65,6 +66,7 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | |||
65 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o | 66 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o |
66 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o | 67 | obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o |
67 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o | 68 | obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o |
69 | obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o | ||
68 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | 70 | obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o |
69 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 71 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
70 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 72 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
@@ -105,6 +107,10 @@ microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | |||
105 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | 107 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o |
106 | obj-$(CONFIG_MICROCODE) += microcode.o | 108 | obj-$(CONFIG_MICROCODE) += microcode.o |
107 | 109 | ||
110 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | ||
111 | |||
112 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64 | ||
113 | |||
108 | ### | 114 | ### |
109 | # 64 bit specific files | 115 | # 64 bit specific files |
110 | ifeq ($(CONFIG_X86_64),y) | 116 | ifeq ($(CONFIG_X86_64),y) |
@@ -118,7 +124,6 @@ ifeq ($(CONFIG_X86_64),y) | |||
118 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o | 124 | obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o |
119 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o | 125 | obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o |
120 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o | 126 | obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o |
121 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o | ||
122 | 127 | ||
123 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o | 128 | obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o |
124 | endif | 129 | endif |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 39ae3d0e3a4a..d37593c2f438 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -538,9 +538,10 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
538 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; | 538 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; |
539 | union acpi_object *obj; | 539 | union acpi_object *obj; |
540 | struct acpi_madt_local_apic *lapic; | 540 | struct acpi_madt_local_apic *lapic; |
541 | cpumask_t tmp_map, new_map; | 541 | cpumask_var_t tmp_map, new_map; |
542 | u8 physid; | 542 | u8 physid; |
543 | int cpu; | 543 | int cpu; |
544 | int retval = -ENOMEM; | ||
544 | 545 | ||
545 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) | 546 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) |
546 | return -EINVAL; | 547 | return -EINVAL; |
@@ -569,23 +570,37 @@ static int __cpuinit _acpi_map_lsapic(acpi_handle handle, int *pcpu) | |||
569 | buffer.length = ACPI_ALLOCATE_BUFFER; | 570 | buffer.length = ACPI_ALLOCATE_BUFFER; |
570 | buffer.pointer = NULL; | 571 | buffer.pointer = NULL; |
571 | 572 | ||
572 | tmp_map = cpu_present_map; | 573 | if (!alloc_cpumask_var(&tmp_map, GFP_KERNEL)) |
574 | goto out; | ||
575 | |||
576 | if (!alloc_cpumask_var(&new_map, GFP_KERNEL)) | ||
577 | goto free_tmp_map; | ||
578 | |||
579 | cpumask_copy(tmp_map, cpu_present_mask); | ||
573 | acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); | 580 | acpi_register_lapic(physid, lapic->lapic_flags & ACPI_MADT_ENABLED); |
574 | 581 | ||
575 | /* | 582 | /* |
576 | * If mp_register_lapic successfully generates a new logical cpu | 583 | * If mp_register_lapic successfully generates a new logical cpu |
577 | * number, then the following will get us exactly what was mapped | 584 | * number, then the following will get us exactly what was mapped |
578 | */ | 585 | */ |
579 | cpus_andnot(new_map, cpu_present_map, tmp_map); | 586 | cpumask_andnot(new_map, cpu_present_mask, tmp_map); |
580 | if (cpus_empty(new_map)) { | 587 | if (cpumask_empty(new_map)) { |
581 | printk ("Unable to map lapic to logical cpu number\n"); | 588 | printk ("Unable to map lapic to logical cpu number\n"); |
582 | return -EINVAL; | 589 | retval = -EINVAL; |
590 | goto free_new_map; | ||
583 | } | 591 | } |
584 | 592 | ||
585 | cpu = first_cpu(new_map); | 593 | cpu = cpumask_first(new_map); |
586 | 594 | ||
587 | *pcpu = cpu; | 595 | *pcpu = cpu; |
588 | return 0; | 596 | retval = 0; |
597 | |||
598 | free_new_map: | ||
599 | free_cpumask_var(new_map); | ||
600 | free_tmp_map: | ||
601 | free_cpumask_var(tmp_map); | ||
602 | out: | ||
603 | return retval; | ||
589 | } | 604 | } |
590 | 605 | ||
591 | /* wrapper to silence section mismatch warning */ | 606 | /* wrapper to silence section mismatch warning */ |
@@ -598,7 +613,7 @@ EXPORT_SYMBOL(acpi_map_lsapic); | |||
598 | int acpi_unmap_lsapic(int cpu) | 613 | int acpi_unmap_lsapic(int cpu) |
599 | { | 614 | { |
600 | per_cpu(x86_cpu_to_apicid, cpu) = -1; | 615 | per_cpu(x86_cpu_to_apicid, cpu) = -1; |
601 | cpu_clear(cpu, cpu_present_map); | 616 | set_cpu_present(cpu, false); |
602 | num_processors--; | 617 | num_processors--; |
603 | 618 | ||
604 | return (0); | 619 | return (0); |
@@ -1371,6 +1386,17 @@ static void __init acpi_process_madt(void) | |||
1371 | smp_found_config = 0; | 1386 | smp_found_config = 0; |
1372 | } | 1387 | } |
1373 | } | 1388 | } |
1389 | |||
1390 | /* | ||
1391 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | ||
1392 | * processors, where MPS only supports physical. | ||
1393 | */ | ||
1394 | if (acpi_lapic && acpi_ioapic) | ||
1395 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " | ||
1396 | "information\n"); | ||
1397 | else if (acpi_lapic) | ||
1398 | printk(KERN_INFO "Using ACPI for processor (LAPIC) " | ||
1399 | "configuration information\n"); | ||
1374 | #endif | 1400 | #endif |
1375 | return; | 1401 | return; |
1376 | } | 1402 | } |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0a60d60ed036..5113c080f0c4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -20,10 +20,15 @@ | |||
20 | #include <linux/pci.h> | 20 | #include <linux/pci.h> |
21 | #include <linux/gfp.h> | 21 | #include <linux/gfp.h> |
22 | #include <linux/bitops.h> | 22 | #include <linux/bitops.h> |
23 | #include <linux/debugfs.h> | ||
23 | #include <linux/scatterlist.h> | 24 | #include <linux/scatterlist.h> |
24 | #include <linux/iommu-helper.h> | 25 | #include <linux/iommu-helper.h> |
26 | #ifdef CONFIG_IOMMU_API | ||
27 | #include <linux/iommu.h> | ||
28 | #endif | ||
25 | #include <asm/proto.h> | 29 | #include <asm/proto.h> |
26 | #include <asm/iommu.h> | 30 | #include <asm/iommu.h> |
31 | #include <asm/gart.h> | ||
27 | #include <asm/amd_iommu_types.h> | 32 | #include <asm/amd_iommu_types.h> |
28 | #include <asm/amd_iommu.h> | 33 | #include <asm/amd_iommu.h> |
29 | 34 | ||
@@ -37,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); | |||
37 | static LIST_HEAD(iommu_pd_list); | 42 | static LIST_HEAD(iommu_pd_list); |
38 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | 43 | static DEFINE_SPINLOCK(iommu_pd_list_lock); |
39 | 44 | ||
45 | #ifdef CONFIG_IOMMU_API | ||
46 | static struct iommu_ops amd_iommu_ops; | ||
47 | #endif | ||
48 | |||
40 | /* | 49 | /* |
41 | * general struct to manage commands send to an IOMMU | 50 | * general struct to manage commands send to an IOMMU |
42 | */ | 51 | */ |
@@ -46,6 +55,68 @@ struct iommu_cmd { | |||
46 | 55 | ||
47 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 56 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
48 | struct unity_map_entry *e); | 57 | struct unity_map_entry *e); |
58 | static struct dma_ops_domain *find_protection_domain(u16 devid); | ||
59 | |||
60 | |||
61 | #ifdef CONFIG_AMD_IOMMU_STATS | ||
62 | |||
63 | /* | ||
64 | * Initialization code for statistics collection | ||
65 | */ | ||
66 | |||
67 | DECLARE_STATS_COUNTER(compl_wait); | ||
68 | DECLARE_STATS_COUNTER(cnt_map_single); | ||
69 | DECLARE_STATS_COUNTER(cnt_unmap_single); | ||
70 | DECLARE_STATS_COUNTER(cnt_map_sg); | ||
71 | DECLARE_STATS_COUNTER(cnt_unmap_sg); | ||
72 | DECLARE_STATS_COUNTER(cnt_alloc_coherent); | ||
73 | DECLARE_STATS_COUNTER(cnt_free_coherent); | ||
74 | DECLARE_STATS_COUNTER(cross_page); | ||
75 | DECLARE_STATS_COUNTER(domain_flush_single); | ||
76 | DECLARE_STATS_COUNTER(domain_flush_all); | ||
77 | DECLARE_STATS_COUNTER(alloced_io_mem); | ||
78 | DECLARE_STATS_COUNTER(total_map_requests); | ||
79 | |||
80 | static struct dentry *stats_dir; | ||
81 | static struct dentry *de_isolate; | ||
82 | static struct dentry *de_fflush; | ||
83 | |||
84 | static void amd_iommu_stats_add(struct __iommu_counter *cnt) | ||
85 | { | ||
86 | if (stats_dir == NULL) | ||
87 | return; | ||
88 | |||
89 | cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir, | ||
90 | &cnt->value); | ||
91 | } | ||
92 | |||
93 | static void amd_iommu_stats_init(void) | ||
94 | { | ||
95 | stats_dir = debugfs_create_dir("amd-iommu", NULL); | ||
96 | if (stats_dir == NULL) | ||
97 | return; | ||
98 | |||
99 | de_isolate = debugfs_create_bool("isolation", 0444, stats_dir, | ||
100 | (u32 *)&amd_iommu_isolate); | ||
101 | |||
102 | de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir, | ||
103 | (u32 *)&amd_iommu_unmap_flush); | ||
104 | |||
105 | amd_iommu_stats_add(&compl_wait); | ||
106 | amd_iommu_stats_add(&cnt_map_single); | ||
107 | amd_iommu_stats_add(&cnt_unmap_single); | ||
108 | amd_iommu_stats_add(&cnt_map_sg); | ||
109 | amd_iommu_stats_add(&cnt_unmap_sg); | ||
110 | amd_iommu_stats_add(&cnt_alloc_coherent); | ||
111 | amd_iommu_stats_add(&cnt_free_coherent); | ||
112 | amd_iommu_stats_add(&cross_page); | ||
113 | amd_iommu_stats_add(&domain_flush_single); | ||
114 | amd_iommu_stats_add(&domain_flush_all); | ||
115 | amd_iommu_stats_add(&alloced_io_mem); | ||
116 | amd_iommu_stats_add(&total_map_requests); | ||
117 | } | ||
118 | |||
119 | #endif | ||
49 | 120 | ||
50 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ | 121 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ |
51 | static int iommu_has_npcache(struct amd_iommu *iommu) | 122 | static int iommu_has_npcache(struct amd_iommu *iommu) |
@@ -188,13 +259,55 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
188 | spin_lock_irqsave(&iommu->lock, flags); | 259 | spin_lock_irqsave(&iommu->lock, flags); |
189 | ret = __iommu_queue_command(iommu, cmd); | 260 | ret = __iommu_queue_command(iommu, cmd); |
190 | if (!ret) | 261 | if (!ret) |
191 | iommu->need_sync = 1; | 262 | iommu->need_sync = true; |
192 | spin_unlock_irqrestore(&iommu->lock, flags); | 263 | spin_unlock_irqrestore(&iommu->lock, flags); |
193 | 264 | ||
194 | return ret; | 265 | return ret; |
195 | } | 266 | } |
196 | 267 | ||
197 | /* | 268 | /* |
269 | * This function waits until an IOMMU has completed a completion | ||
270 | * wait command | ||
271 | */ | ||
272 | static void __iommu_wait_for_completion(struct amd_iommu *iommu) | ||
273 | { | ||
274 | int ready = 0; | ||
275 | unsigned status = 0; | ||
276 | unsigned long i = 0; | ||
277 | |||
278 | INC_STATS_COUNTER(compl_wait); | ||
279 | |||
280 | while (!ready && (i < EXIT_LOOP_COUNT)) { | ||
281 | ++i; | ||
282 | /* wait for the bit to become one */ | ||
283 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
284 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | ||
285 | } | ||
286 | |||
287 | /* set bit back to zero */ | ||
288 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | ||
289 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
290 | |||
291 | if (unlikely(i == EXIT_LOOP_COUNT)) | ||
292 | panic("AMD IOMMU: Completion wait loop failed\n"); | ||
293 | } | ||
294 | |||
295 | /* | ||
296 | * This function queues a completion wait command into the command | ||
297 | * buffer of an IOMMU | ||
298 | */ | ||
299 | static int __iommu_completion_wait(struct amd_iommu *iommu) | ||
300 | { | ||
301 | struct iommu_cmd cmd; | ||
302 | |||
303 | memset(&cmd, 0, sizeof(cmd)); | ||
304 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; | ||
305 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | ||
306 | |||
307 | return __iommu_queue_command(iommu, &cmd); | ||
308 | } | ||
309 | |||
310 | /* | ||
198 | * This function is called whenever we need to ensure that the IOMMU has | 311 | * This function is called whenever we need to ensure that the IOMMU has |
199 | * completed execution of all commands we sent. It sends a | 312 | * completed execution of all commands we sent. It sends a |
200 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs | 313 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs |
@@ -203,40 +316,22 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | |||
203 | */ | 316 | */ |
204 | static int iommu_completion_wait(struct amd_iommu *iommu) | 317 | static int iommu_completion_wait(struct amd_iommu *iommu) |
205 | { | 318 | { |
206 | int ret = 0, ready = 0; | 319 | int ret = 0; |
207 | unsigned status = 0; | 320 | unsigned long flags; |
208 | struct iommu_cmd cmd; | ||
209 | unsigned long flags, i = 0; | ||
210 | |||
211 | memset(&cmd, 0, sizeof(cmd)); | ||
212 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; | ||
213 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | ||
214 | 321 | ||
215 | spin_lock_irqsave(&iommu->lock, flags); | 322 | spin_lock_irqsave(&iommu->lock, flags); |
216 | 323 | ||
217 | if (!iommu->need_sync) | 324 | if (!iommu->need_sync) |
218 | goto out; | 325 | goto out; |
219 | 326 | ||
220 | iommu->need_sync = 0; | 327 | ret = __iommu_completion_wait(iommu); |
221 | 328 | ||
222 | ret = __iommu_queue_command(iommu, &cmd); | 329 | iommu->need_sync = false; |
223 | 330 | ||
224 | if (ret) | 331 | if (ret) |
225 | goto out; | 332 | goto out; |
226 | 333 | ||
227 | while (!ready && (i < EXIT_LOOP_COUNT)) { | 334 | __iommu_wait_for_completion(iommu); |
228 | ++i; | ||
229 | /* wait for the bit to become one */ | ||
230 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
231 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | ||
232 | } | ||
233 | |||
234 | /* set bit back to zero */ | ||
235 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | ||
236 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
237 | |||
238 | if (unlikely(i == EXIT_LOOP_COUNT)) | ||
239 | panic("AMD IOMMU: Completion wait loop failed\n"); | ||
240 | 335 | ||
241 | out: | 336 | out: |
242 | spin_unlock_irqrestore(&iommu->lock, flags); | 337 | spin_unlock_irqrestore(&iommu->lock, flags); |
@@ -263,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | |||
263 | return ret; | 358 | return ret; |
264 | } | 359 | } |
265 | 360 | ||
361 | static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, | ||
362 | u16 domid, int pde, int s) | ||
363 | { | ||
364 | memset(cmd, 0, sizeof(*cmd)); | ||
365 | address &= PAGE_MASK; | ||
366 | CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); | ||
367 | cmd->data[1] |= domid; | ||
368 | cmd->data[2] = lower_32_bits(address); | ||
369 | cmd->data[3] = upper_32_bits(address); | ||
370 | if (s) /* size bit - we flush more than one 4kb page */ | ||
371 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
372 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
373 | cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
374 | } | ||
375 | |||
266 | /* | 376 | /* |
267 | * Generic command send function for invalidaing TLB entries | 377 | * Generic command send function for invalidaing TLB entries |
268 | */ | 378 | */ |
@@ -272,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | |||
272 | struct iommu_cmd cmd; | 382 | struct iommu_cmd cmd; |
273 | int ret; | 383 | int ret; |
274 | 384 | ||
275 | memset(&cmd, 0, sizeof(cmd)); | 385 | __iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s); |
276 | address &= PAGE_MASK; | ||
277 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); | ||
278 | cmd.data[1] |= domid; | ||
279 | cmd.data[2] = lower_32_bits(address); | ||
280 | cmd.data[3] = upper_32_bits(address); | ||
281 | if (s) /* size bit - we flush more than one 4kb page */ | ||
282 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | ||
283 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ | ||
284 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | ||
285 | 386 | ||
286 | ret = iommu_queue_command(iommu, &cmd); | 387 | ret = iommu_queue_command(iommu, &cmd); |
287 | 388 | ||
@@ -320,9 +421,35 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | |||
320 | { | 421 | { |
321 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | 422 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; |
322 | 423 | ||
424 | INC_STATS_COUNTER(domain_flush_single); | ||
425 | |||
323 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | 426 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); |
324 | } | 427 | } |
325 | 428 | ||
429 | /* | ||
430 | * This function is used to flush the IO/TLB for a given protection domain | ||
431 | * on every IOMMU in the system | ||
432 | */ | ||
433 | static void iommu_flush_domain(u16 domid) | ||
434 | { | ||
435 | unsigned long flags; | ||
436 | struct amd_iommu *iommu; | ||
437 | struct iommu_cmd cmd; | ||
438 | |||
439 | INC_STATS_COUNTER(domain_flush_all); | ||
440 | |||
441 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
442 | domid, 1, 1); | ||
443 | |||
444 | list_for_each_entry(iommu, &amd_iommu_list, list) { | ||
445 | spin_lock_irqsave(&iommu->lock, flags); | ||
446 | __iommu_queue_command(iommu, &cmd); | ||
447 | __iommu_completion_wait(iommu); | ||
448 | __iommu_wait_for_completion(iommu); | ||
449 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
450 | } | ||
451 | } | ||
452 | |||
326 | /**************************************************************************** | 453 | /**************************************************************************** |
327 | * | 454 | * |
328 | * The functions below are used the create the page table mappings for | 455 | * The functions below are used the create the page table mappings for |
@@ -337,10 +464,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | |||
337 | * supporting all features of AMD IOMMU page tables like level skipping | 464 | * supporting all features of AMD IOMMU page tables like level skipping |
338 | * and full 64 bit address spaces. | 465 | * and full 64 bit address spaces. |
339 | */ | 466 | */ |
340 | static int iommu_map(struct protection_domain *dom, | 467 | static int iommu_map_page(struct protection_domain *dom, |
341 | unsigned long bus_addr, | 468 | unsigned long bus_addr, |
342 | unsigned long phys_addr, | 469 | unsigned long phys_addr, |
343 | int prot) | 470 | int prot) |
344 | { | 471 | { |
345 | u64 __pte, *pte, *page; | 472 | u64 __pte, *pte, *page; |
346 | 473 | ||
@@ -387,6 +514,28 @@ static int iommu_map(struct protection_domain *dom, | |||
387 | return 0; | 514 | return 0; |
388 | } | 515 | } |
389 | 516 | ||
517 | static void iommu_unmap_page(struct protection_domain *dom, | ||
518 | unsigned long bus_addr) | ||
519 | { | ||
520 | u64 *pte; | ||
521 | |||
522 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; | ||
523 | |||
524 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
525 | return; | ||
526 | |||
527 | pte = IOMMU_PTE_PAGE(*pte); | ||
528 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
529 | |||
530 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
531 | return; | ||
532 | |||
533 | pte = IOMMU_PTE_PAGE(*pte); | ||
534 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
535 | |||
536 | *pte = 0; | ||
537 | } | ||
538 | |||
390 | /* | 539 | /* |
391 | * This function checks if a specific unity mapping entry is needed for | 540 | * This function checks if a specific unity mapping entry is needed for |
392 | * this specific IOMMU. | 541 | * this specific IOMMU. |
@@ -439,7 +588,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
439 | 588 | ||
440 | for (addr = e->address_start; addr < e->address_end; | 589 | for (addr = e->address_start; addr < e->address_end; |
441 | addr += PAGE_SIZE) { | 590 | addr += PAGE_SIZE) { |
442 | ret = iommu_map(&dma_dom->domain, addr, addr, e->prot); | 591 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); |
443 | if (ret) | 592 | if (ret) |
444 | return ret; | 593 | return ret; |
445 | /* | 594 | /* |
@@ -570,6 +719,16 @@ static u16 domain_id_alloc(void) | |||
570 | return id; | 719 | return id; |
571 | } | 720 | } |
572 | 721 | ||
722 | static void domain_id_free(int id) | ||
723 | { | ||
724 | unsigned long flags; | ||
725 | |||
726 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
727 | if (id > 0 && id < MAX_DOMAIN_ID) | ||
728 | __clear_bit(id, amd_iommu_pd_alloc_bitmap); | ||
729 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
730 | } | ||
731 | |||
573 | /* | 732 | /* |
574 | * Used to reserve address ranges in the aperture (e.g. for exclusion | 733 | * Used to reserve address ranges in the aperture (e.g. for exclusion |
575 | * ranges. | 734 | * ranges. |
@@ -586,12 +745,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | |||
586 | iommu_area_reserve(dom->bitmap, start_page, pages); | 745 | iommu_area_reserve(dom->bitmap, start_page, pages); |
587 | } | 746 | } |
588 | 747 | ||
589 | static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | 748 | static void free_pagetable(struct protection_domain *domain) |
590 | { | 749 | { |
591 | int i, j; | 750 | int i, j; |
592 | u64 *p1, *p2, *p3; | 751 | u64 *p1, *p2, *p3; |
593 | 752 | ||
594 | p1 = dma_dom->domain.pt_root; | 753 | p1 = domain->pt_root; |
595 | 754 | ||
596 | if (!p1) | 755 | if (!p1) |
597 | return; | 756 | return; |
@@ -612,6 +771,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | |||
612 | } | 771 | } |
613 | 772 | ||
614 | free_page((unsigned long)p1); | 773 | free_page((unsigned long)p1); |
774 | |||
775 | domain->pt_root = NULL; | ||
615 | } | 776 | } |
616 | 777 | ||
617 | /* | 778 | /* |
@@ -623,7 +784,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
623 | if (!dom) | 784 | if (!dom) |
624 | return; | 785 | return; |
625 | 786 | ||
626 | dma_ops_free_pagetable(dom); | 787 | free_pagetable(&dom->domain); |
627 | 788 | ||
628 | kfree(dom->pte_pages); | 789 | kfree(dom->pte_pages); |
629 | 790 | ||
@@ -662,6 +823,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
662 | goto free_dma_dom; | 823 | goto free_dma_dom; |
663 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; | 824 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; |
664 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 825 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
826 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | ||
665 | dma_dom->domain.priv = dma_dom; | 827 | dma_dom->domain.priv = dma_dom; |
666 | if (!dma_dom->domain.pt_root) | 828 | if (!dma_dom->domain.pt_root) |
667 | goto free_dma_dom; | 829 | goto free_dma_dom; |
@@ -724,6 +886,15 @@ free_dma_dom: | |||
724 | } | 886 | } |
725 | 887 | ||
726 | /* | 888 | /* |
889 | * little helper function to check whether a given protection domain is a | ||
890 | * dma_ops domain | ||
891 | */ | ||
892 | static bool dma_ops_domain(struct protection_domain *domain) | ||
893 | { | ||
894 | return domain->flags & PD_DMA_OPS_MASK; | ||
895 | } | ||
896 | |||
897 | /* | ||
727 | * Find out the protection domain structure for a given PCI device. This | 898 | * Find out the protection domain structure for a given PCI device. This |
728 | * will give us the pointer to the page table root for example. | 899 | * will give us the pointer to the page table root for example. |
729 | */ | 900 | */ |
@@ -743,14 +914,15 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
743 | * If a device is not yet associated with a domain, this function does | 914 | * If a device is not yet associated with a domain, this function does |
744 | * assigns it visible for the hardware | 915 | * assigns it visible for the hardware |
745 | */ | 916 | */ |
746 | static void set_device_domain(struct amd_iommu *iommu, | 917 | static void attach_device(struct amd_iommu *iommu, |
747 | struct protection_domain *domain, | 918 | struct protection_domain *domain, |
748 | u16 devid) | 919 | u16 devid) |
749 | { | 920 | { |
750 | unsigned long flags; | 921 | unsigned long flags; |
751 | |||
752 | u64 pte_root = virt_to_phys(domain->pt_root); | 922 | u64 pte_root = virt_to_phys(domain->pt_root); |
753 | 923 | ||
924 | domain->dev_cnt += 1; | ||
925 | |||
754 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | 926 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) |
755 | << DEV_ENTRY_MODE_SHIFT; | 927 | << DEV_ENTRY_MODE_SHIFT; |
756 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | 928 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; |
@@ -766,6 +938,116 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
766 | iommu_queue_inv_dev_entry(iommu, devid); | 938 | iommu_queue_inv_dev_entry(iommu, devid); |
767 | } | 939 | } |
768 | 940 | ||
941 | /* | ||
942 | * Removes a device from a protection domain (unlocked) | ||
943 | */ | ||
944 | static void __detach_device(struct protection_domain *domain, u16 devid) | ||
945 | { | ||
946 | |||
947 | /* lock domain */ | ||
948 | spin_lock(&domain->lock); | ||
949 | |||
950 | /* remove domain from the lookup table */ | ||
951 | amd_iommu_pd_table[devid] = NULL; | ||
952 | |||
953 | /* remove entry from the device table seen by the hardware */ | ||
954 | amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV; | ||
955 | amd_iommu_dev_table[devid].data[1] = 0; | ||
956 | amd_iommu_dev_table[devid].data[2] = 0; | ||
957 | |||
958 | /* decrease reference counter */ | ||
959 | domain->dev_cnt -= 1; | ||
960 | |||
961 | /* ready */ | ||
962 | spin_unlock(&domain->lock); | ||
963 | } | ||
964 | |||
965 | /* | ||
966 | * Removes a device from a protection domain (with devtable_lock held) | ||
967 | */ | ||
968 | static void detach_device(struct protection_domain *domain, u16 devid) | ||
969 | { | ||
970 | unsigned long flags; | ||
971 | |||
972 | /* lock device table */ | ||
973 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
974 | __detach_device(domain, devid); | ||
975 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
976 | } | ||
977 | |||
978 | static int device_change_notifier(struct notifier_block *nb, | ||
979 | unsigned long action, void *data) | ||
980 | { | ||
981 | struct device *dev = data; | ||
982 | struct pci_dev *pdev = to_pci_dev(dev); | ||
983 | u16 devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
984 | struct protection_domain *domain; | ||
985 | struct dma_ops_domain *dma_domain; | ||
986 | struct amd_iommu *iommu; | ||
987 | int order = amd_iommu_aperture_order; | ||
988 | unsigned long flags; | ||
989 | |||
990 | if (devid > amd_iommu_last_bdf) | ||
991 | goto out; | ||
992 | |||
993 | devid = amd_iommu_alias_table[devid]; | ||
994 | |||
995 | iommu = amd_iommu_rlookup_table[devid]; | ||
996 | if (iommu == NULL) | ||
997 | goto out; | ||
998 | |||
999 | domain = domain_for_device(devid); | ||
1000 | |||
1001 | if (domain && !dma_ops_domain(domain)) | ||
1002 | WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound " | ||
1003 | "to a non-dma-ops domain\n", dev_name(dev)); | ||
1004 | |||
1005 | switch (action) { | ||
1006 | case BUS_NOTIFY_BOUND_DRIVER: | ||
1007 | if (domain) | ||
1008 | goto out; | ||
1009 | dma_domain = find_protection_domain(devid); | ||
1010 | if (!dma_domain) | ||
1011 | dma_domain = iommu->default_dom; | ||
1012 | attach_device(iommu, &dma_domain->domain, devid); | ||
1013 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | ||
1014 | "device %s\n", dma_domain->domain.id, dev_name(dev)); | ||
1015 | break; | ||
1016 | case BUS_NOTIFY_UNBIND_DRIVER: | ||
1017 | if (!domain) | ||
1018 | goto out; | ||
1019 | detach_device(domain, devid); | ||
1020 | break; | ||
1021 | case BUS_NOTIFY_ADD_DEVICE: | ||
1022 | /* allocate a protection domain if a device is added */ | ||
1023 | dma_domain = find_protection_domain(devid); | ||
1024 | if (dma_domain) | ||
1025 | goto out; | ||
1026 | dma_domain = dma_ops_domain_alloc(iommu, order); | ||
1027 | if (!dma_domain) | ||
1028 | goto out; | ||
1029 | dma_domain->target_dev = devid; | ||
1030 | |||
1031 | spin_lock_irqsave(&iommu_pd_list_lock, flags); | ||
1032 | list_add_tail(&dma_domain->list, &iommu_pd_list); | ||
1033 | spin_unlock_irqrestore(&iommu_pd_list_lock, flags); | ||
1034 | |||
1035 | break; | ||
1036 | default: | ||
1037 | goto out; | ||
1038 | } | ||
1039 | |||
1040 | iommu_queue_inv_dev_entry(iommu, devid); | ||
1041 | iommu_completion_wait(iommu); | ||
1042 | |||
1043 | out: | ||
1044 | return 0; | ||
1045 | } | ||
1046 | |||
1047 | struct notifier_block device_nb = { | ||
1048 | .notifier_call = device_change_notifier, | ||
1049 | }; | ||
1050 | |||
769 | /***************************************************************************** | 1051 | /***************************************************************************** |
770 | * | 1052 | * |
771 | * The next functions belong to the dma_ops mapping/unmapping code. | 1053 | * The next functions belong to the dma_ops mapping/unmapping code. |
@@ -801,7 +1083,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid) | |||
801 | list_for_each_entry(entry, &iommu_pd_list, list) { | 1083 | list_for_each_entry(entry, &iommu_pd_list, list) { |
802 | if (entry->target_dev == devid) { | 1084 | if (entry->target_dev == devid) { |
803 | ret = entry; | 1085 | ret = entry; |
804 | list_del(&ret->list); | ||
805 | break; | 1086 | break; |
806 | } | 1087 | } |
807 | } | 1088 | } |
@@ -852,14 +1133,13 @@ static int get_device_resources(struct device *dev, | |||
852 | if (!dma_dom) | 1133 | if (!dma_dom) |
853 | dma_dom = (*iommu)->default_dom; | 1134 | dma_dom = (*iommu)->default_dom; |
854 | *domain = &dma_dom->domain; | 1135 | *domain = &dma_dom->domain; |
855 | set_device_domain(*iommu, *domain, *bdf); | 1136 | attach_device(*iommu, *domain, *bdf); |
856 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | 1137 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " |
857 | "device ", (*domain)->id); | 1138 | "device %s\n", (*domain)->id, dev_name(dev)); |
858 | print_devid(_bdf, 1); | ||
859 | } | 1139 | } |
860 | 1140 | ||
861 | if (domain_for_device(_bdf) == NULL) | 1141 | if (domain_for_device(_bdf) == NULL) |
862 | set_device_domain(*iommu, *domain, _bdf); | 1142 | attach_device(*iommu, *domain, _bdf); |
863 | 1143 | ||
864 | return 1; | 1144 | return 1; |
865 | } | 1145 | } |
@@ -945,6 +1225,11 @@ static dma_addr_t __map_single(struct device *dev, | |||
945 | pages = iommu_num_pages(paddr, size, PAGE_SIZE); | 1225 | pages = iommu_num_pages(paddr, size, PAGE_SIZE); |
946 | paddr &= PAGE_MASK; | 1226 | paddr &= PAGE_MASK; |
947 | 1227 | ||
1228 | INC_STATS_COUNTER(total_map_requests); | ||
1229 | |||
1230 | if (pages > 1) | ||
1231 | INC_STATS_COUNTER(cross_page); | ||
1232 | |||
948 | if (align) | 1233 | if (align) |
949 | align_mask = (1UL << get_order(size)) - 1; | 1234 | align_mask = (1UL << get_order(size)) - 1; |
950 | 1235 | ||
@@ -961,6 +1246,8 @@ static dma_addr_t __map_single(struct device *dev, | |||
961 | } | 1246 | } |
962 | address += offset; | 1247 | address += offset; |
963 | 1248 | ||
1249 | ADD_STATS_COUNTER(alloced_io_mem, size); | ||
1250 | |||
964 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { | 1251 | if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) { |
965 | iommu_flush_tlb(iommu, dma_dom->domain.id); | 1252 | iommu_flush_tlb(iommu, dma_dom->domain.id); |
966 | dma_dom->need_flush = false; | 1253 | dma_dom->need_flush = false; |
@@ -997,6 +1284,8 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
997 | start += PAGE_SIZE; | 1284 | start += PAGE_SIZE; |
998 | } | 1285 | } |
999 | 1286 | ||
1287 | SUB_STATS_COUNTER(alloced_io_mem, size); | ||
1288 | |||
1000 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 1289 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
1001 | 1290 | ||
1002 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { | 1291 | if (amd_iommu_unmap_flush || dma_dom->need_flush) { |
@@ -1018,6 +1307,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
1018 | dma_addr_t addr; | 1307 | dma_addr_t addr; |
1019 | u64 dma_mask; | 1308 | u64 dma_mask; |
1020 | 1309 | ||
1310 | INC_STATS_COUNTER(cnt_map_single); | ||
1311 | |||
1021 | if (!check_device(dev)) | 1312 | if (!check_device(dev)) |
1022 | return bad_dma_address; | 1313 | return bad_dma_address; |
1023 | 1314 | ||
@@ -1029,6 +1320,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
1029 | /* device not handled by any AMD IOMMU */ | 1320 | /* device not handled by any AMD IOMMU */ |
1030 | return (dma_addr_t)paddr; | 1321 | return (dma_addr_t)paddr; |
1031 | 1322 | ||
1323 | if (!dma_ops_domain(domain)) | ||
1324 | return bad_dma_address; | ||
1325 | |||
1032 | spin_lock_irqsave(&domain->lock, flags); | 1326 | spin_lock_irqsave(&domain->lock, flags); |
1033 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, | 1327 | addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false, |
1034 | dma_mask); | 1328 | dma_mask); |
@@ -1054,11 +1348,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
1054 | struct protection_domain *domain; | 1348 | struct protection_domain *domain; |
1055 | u16 devid; | 1349 | u16 devid; |
1056 | 1350 | ||
1351 | INC_STATS_COUNTER(cnt_unmap_single); | ||
1352 | |||
1057 | if (!check_device(dev) || | 1353 | if (!check_device(dev) || |
1058 | !get_device_resources(dev, &iommu, &domain, &devid)) | 1354 | !get_device_resources(dev, &iommu, &domain, &devid)) |
1059 | /* device not handled by any AMD IOMMU */ | 1355 | /* device not handled by any AMD IOMMU */ |
1060 | return; | 1356 | return; |
1061 | 1357 | ||
1358 | if (!dma_ops_domain(domain)) | ||
1359 | return; | ||
1360 | |||
1062 | spin_lock_irqsave(&domain->lock, flags); | 1361 | spin_lock_irqsave(&domain->lock, flags); |
1063 | 1362 | ||
1064 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); | 1363 | __unmap_single(iommu, domain->priv, dma_addr, size, dir); |
@@ -1103,6 +1402,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1103 | int mapped_elems = 0; | 1402 | int mapped_elems = 0; |
1104 | u64 dma_mask; | 1403 | u64 dma_mask; |
1105 | 1404 | ||
1405 | INC_STATS_COUNTER(cnt_map_sg); | ||
1406 | |||
1106 | if (!check_device(dev)) | 1407 | if (!check_device(dev)) |
1107 | return 0; | 1408 | return 0; |
1108 | 1409 | ||
@@ -1113,6 +1414,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, | |||
1113 | if (!iommu || !domain) | 1414 | if (!iommu || !domain) |
1114 | return map_sg_no_iommu(dev, sglist, nelems, dir); | 1415 | return map_sg_no_iommu(dev, sglist, nelems, dir); |
1115 | 1416 | ||
1417 | if (!dma_ops_domain(domain)) | ||
1418 | return 0; | ||
1419 | |||
1116 | spin_lock_irqsave(&domain->lock, flags); | 1420 | spin_lock_irqsave(&domain->lock, flags); |
1117 | 1421 | ||
1118 | for_each_sg(sglist, s, nelems, i) { | 1422 | for_each_sg(sglist, s, nelems, i) { |
@@ -1162,10 +1466,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
1162 | u16 devid; | 1466 | u16 devid; |
1163 | int i; | 1467 | int i; |
1164 | 1468 | ||
1469 | INC_STATS_COUNTER(cnt_unmap_sg); | ||
1470 | |||
1165 | if (!check_device(dev) || | 1471 | if (!check_device(dev) || |
1166 | !get_device_resources(dev, &iommu, &domain, &devid)) | 1472 | !get_device_resources(dev, &iommu, &domain, &devid)) |
1167 | return; | 1473 | return; |
1168 | 1474 | ||
1475 | if (!dma_ops_domain(domain)) | ||
1476 | return; | ||
1477 | |||
1169 | spin_lock_irqsave(&domain->lock, flags); | 1478 | spin_lock_irqsave(&domain->lock, flags); |
1170 | 1479 | ||
1171 | for_each_sg(sglist, s, nelems, i) { | 1480 | for_each_sg(sglist, s, nelems, i) { |
@@ -1193,6 +1502,8 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1193 | phys_addr_t paddr; | 1502 | phys_addr_t paddr; |
1194 | u64 dma_mask = dev->coherent_dma_mask; | 1503 | u64 dma_mask = dev->coherent_dma_mask; |
1195 | 1504 | ||
1505 | INC_STATS_COUNTER(cnt_alloc_coherent); | ||
1506 | |||
1196 | if (!check_device(dev)) | 1507 | if (!check_device(dev)) |
1197 | return NULL; | 1508 | return NULL; |
1198 | 1509 | ||
@@ -1211,6 +1522,9 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1211 | return virt_addr; | 1522 | return virt_addr; |
1212 | } | 1523 | } |
1213 | 1524 | ||
1525 | if (!dma_ops_domain(domain)) | ||
1526 | goto out_free; | ||
1527 | |||
1214 | if (!dma_mask) | 1528 | if (!dma_mask) |
1215 | dma_mask = *dev->dma_mask; | 1529 | dma_mask = *dev->dma_mask; |
1216 | 1530 | ||
@@ -1219,18 +1533,20 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1219 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 1533 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, |
1220 | size, DMA_BIDIRECTIONAL, true, dma_mask); | 1534 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
1221 | 1535 | ||
1222 | if (*dma_addr == bad_dma_address) { | 1536 | if (*dma_addr == bad_dma_address) |
1223 | free_pages((unsigned long)virt_addr, get_order(size)); | 1537 | goto out_free; |
1224 | virt_addr = NULL; | ||
1225 | goto out; | ||
1226 | } | ||
1227 | 1538 | ||
1228 | iommu_completion_wait(iommu); | 1539 | iommu_completion_wait(iommu); |
1229 | 1540 | ||
1230 | out: | ||
1231 | spin_unlock_irqrestore(&domain->lock, flags); | 1541 | spin_unlock_irqrestore(&domain->lock, flags); |
1232 | 1542 | ||
1233 | return virt_addr; | 1543 | return virt_addr; |
1544 | |||
1545 | out_free: | ||
1546 | |||
1547 | free_pages((unsigned long)virt_addr, get_order(size)); | ||
1548 | |||
1549 | return NULL; | ||
1234 | } | 1550 | } |
1235 | 1551 | ||
1236 | /* | 1552 | /* |
@@ -1244,6 +1560,8 @@ static void free_coherent(struct device *dev, size_t size, | |||
1244 | struct protection_domain *domain; | 1560 | struct protection_domain *domain; |
1245 | u16 devid; | 1561 | u16 devid; |
1246 | 1562 | ||
1563 | INC_STATS_COUNTER(cnt_free_coherent); | ||
1564 | |||
1247 | if (!check_device(dev)) | 1565 | if (!check_device(dev)) |
1248 | return; | 1566 | return; |
1249 | 1567 | ||
@@ -1252,6 +1570,9 @@ static void free_coherent(struct device *dev, size_t size, | |||
1252 | if (!iommu || !domain) | 1570 | if (!iommu || !domain) |
1253 | goto free_mem; | 1571 | goto free_mem; |
1254 | 1572 | ||
1573 | if (!dma_ops_domain(domain)) | ||
1574 | goto free_mem; | ||
1575 | |||
1255 | spin_lock_irqsave(&domain->lock, flags); | 1576 | spin_lock_irqsave(&domain->lock, flags); |
1256 | 1577 | ||
1257 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); | 1578 | __unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL); |
@@ -1295,7 +1616,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) | |||
1295 | * we don't need to preallocate the protection domains anymore. | 1616 | * we don't need to preallocate the protection domains anymore. |
1296 | * For now we have to. | 1617 | * For now we have to. |
1297 | */ | 1618 | */ |
1298 | void prealloc_protection_domains(void) | 1619 | static void prealloc_protection_domains(void) |
1299 | { | 1620 | { |
1300 | struct pci_dev *dev = NULL; | 1621 | struct pci_dev *dev = NULL; |
1301 | struct dma_ops_domain *dma_dom; | 1622 | struct dma_ops_domain *dma_dom; |
@@ -1304,7 +1625,7 @@ void prealloc_protection_domains(void) | |||
1304 | u16 devid; | 1625 | u16 devid; |
1305 | 1626 | ||
1306 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1627 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
1307 | devid = (dev->bus->number << 8) | dev->devfn; | 1628 | devid = calc_devid(dev->bus->number, dev->devfn); |
1308 | if (devid > amd_iommu_last_bdf) | 1629 | if (devid > amd_iommu_last_bdf) |
1309 | continue; | 1630 | continue; |
1310 | devid = amd_iommu_alias_table[devid]; | 1631 | devid = amd_iommu_alias_table[devid]; |
@@ -1351,6 +1672,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
1351 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); | 1672 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); |
1352 | if (iommu->default_dom == NULL) | 1673 | if (iommu->default_dom == NULL) |
1353 | return -ENOMEM; | 1674 | return -ENOMEM; |
1675 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; | ||
1354 | ret = iommu_init_unity_mappings(iommu); | 1676 | ret = iommu_init_unity_mappings(iommu); |
1355 | if (ret) | 1677 | if (ret) |
1356 | goto free_domains; | 1678 | goto free_domains; |
@@ -1374,6 +1696,12 @@ int __init amd_iommu_init_dma_ops(void) | |||
1374 | /* Make the driver finally visible to the drivers */ | 1696 | /* Make the driver finally visible to the drivers */ |
1375 | dma_ops = &amd_iommu_dma_ops; | 1697 | dma_ops = &amd_iommu_dma_ops; |
1376 | 1698 | ||
1699 | register_iommu(&amd_iommu_ops); | ||
1700 | |||
1701 | bus_register_notifier(&pci_bus_type, &device_nb); | ||
1702 | |||
1703 | amd_iommu_stats_init(); | ||
1704 | |||
1377 | return 0; | 1705 | return 0; |
1378 | 1706 | ||
1379 | free_domains: | 1707 | free_domains: |
@@ -1385,3 +1713,224 @@ free_domains: | |||
1385 | 1713 | ||
1386 | return ret; | 1714 | return ret; |
1387 | } | 1715 | } |
1716 | |||
1717 | /***************************************************************************** | ||
1718 | * | ||
1719 | * The following functions belong to the exported interface of AMD IOMMU | ||
1720 | * | ||
1721 | * This interface allows access to lower level functions of the IOMMU | ||
1722 | * like protection domain handling and assignement of devices to domains | ||
1723 | * which is not possible with the dma_ops interface. | ||
1724 | * | ||
1725 | *****************************************************************************/ | ||
1726 | |||
1727 | static void cleanup_domain(struct protection_domain *domain) | ||
1728 | { | ||
1729 | unsigned long flags; | ||
1730 | u16 devid; | ||
1731 | |||
1732 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1733 | |||
1734 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) | ||
1735 | if (amd_iommu_pd_table[devid] == domain) | ||
1736 | __detach_device(domain, devid); | ||
1737 | |||
1738 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1739 | } | ||
1740 | |||
1741 | static int amd_iommu_domain_init(struct iommu_domain *dom) | ||
1742 | { | ||
1743 | struct protection_domain *domain; | ||
1744 | |||
1745 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | ||
1746 | if (!domain) | ||
1747 | return -ENOMEM; | ||
1748 | |||
1749 | spin_lock_init(&domain->lock); | ||
1750 | domain->mode = PAGE_MODE_3_LEVEL; | ||
1751 | domain->id = domain_id_alloc(); | ||
1752 | if (!domain->id) | ||
1753 | goto out_free; | ||
1754 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); | ||
1755 | if (!domain->pt_root) | ||
1756 | goto out_free; | ||
1757 | |||
1758 | dom->priv = domain; | ||
1759 | |||
1760 | return 0; | ||
1761 | |||
1762 | out_free: | ||
1763 | kfree(domain); | ||
1764 | |||
1765 | return -ENOMEM; | ||
1766 | } | ||
1767 | |||
1768 | static void amd_iommu_domain_destroy(struct iommu_domain *dom) | ||
1769 | { | ||
1770 | struct protection_domain *domain = dom->priv; | ||
1771 | |||
1772 | if (!domain) | ||
1773 | return; | ||
1774 | |||
1775 | if (domain->dev_cnt > 0) | ||
1776 | cleanup_domain(domain); | ||
1777 | |||
1778 | BUG_ON(domain->dev_cnt != 0); | ||
1779 | |||
1780 | free_pagetable(domain); | ||
1781 | |||
1782 | domain_id_free(domain->id); | ||
1783 | |||
1784 | kfree(domain); | ||
1785 | |||
1786 | dom->priv = NULL; | ||
1787 | } | ||
1788 | |||
1789 | static void amd_iommu_detach_device(struct iommu_domain *dom, | ||
1790 | struct device *dev) | ||
1791 | { | ||
1792 | struct protection_domain *domain = dom->priv; | ||
1793 | struct amd_iommu *iommu; | ||
1794 | struct pci_dev *pdev; | ||
1795 | u16 devid; | ||
1796 | |||
1797 | if (dev->bus != &pci_bus_type) | ||
1798 | return; | ||
1799 | |||
1800 | pdev = to_pci_dev(dev); | ||
1801 | |||
1802 | devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
1803 | |||
1804 | if (devid > 0) | ||
1805 | detach_device(domain, devid); | ||
1806 | |||
1807 | iommu = amd_iommu_rlookup_table[devid]; | ||
1808 | if (!iommu) | ||
1809 | return; | ||
1810 | |||
1811 | iommu_queue_inv_dev_entry(iommu, devid); | ||
1812 | iommu_completion_wait(iommu); | ||
1813 | } | ||
1814 | |||
1815 | static int amd_iommu_attach_device(struct iommu_domain *dom, | ||
1816 | struct device *dev) | ||
1817 | { | ||
1818 | struct protection_domain *domain = dom->priv; | ||
1819 | struct protection_domain *old_domain; | ||
1820 | struct amd_iommu *iommu; | ||
1821 | struct pci_dev *pdev; | ||
1822 | u16 devid; | ||
1823 | |||
1824 | if (dev->bus != &pci_bus_type) | ||
1825 | return -EINVAL; | ||
1826 | |||
1827 | pdev = to_pci_dev(dev); | ||
1828 | |||
1829 | devid = calc_devid(pdev->bus->number, pdev->devfn); | ||
1830 | |||
1831 | if (devid >= amd_iommu_last_bdf || | ||
1832 | devid != amd_iommu_alias_table[devid]) | ||
1833 | return -EINVAL; | ||
1834 | |||
1835 | iommu = amd_iommu_rlookup_table[devid]; | ||
1836 | if (!iommu) | ||
1837 | return -EINVAL; | ||
1838 | |||
1839 | old_domain = domain_for_device(devid); | ||
1840 | if (old_domain) | ||
1841 | return -EBUSY; | ||
1842 | |||
1843 | attach_device(iommu, domain, devid); | ||
1844 | |||
1845 | iommu_completion_wait(iommu); | ||
1846 | |||
1847 | return 0; | ||
1848 | } | ||
1849 | |||
1850 | static int amd_iommu_map_range(struct iommu_domain *dom, | ||
1851 | unsigned long iova, phys_addr_t paddr, | ||
1852 | size_t size, int iommu_prot) | ||
1853 | { | ||
1854 | struct protection_domain *domain = dom->priv; | ||
1855 | unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE); | ||
1856 | int prot = 0; | ||
1857 | int ret; | ||
1858 | |||
1859 | if (iommu_prot & IOMMU_READ) | ||
1860 | prot |= IOMMU_PROT_IR; | ||
1861 | if (iommu_prot & IOMMU_WRITE) | ||
1862 | prot |= IOMMU_PROT_IW; | ||
1863 | |||
1864 | iova &= PAGE_MASK; | ||
1865 | paddr &= PAGE_MASK; | ||
1866 | |||
1867 | for (i = 0; i < npages; ++i) { | ||
1868 | ret = iommu_map_page(domain, iova, paddr, prot); | ||
1869 | if (ret) | ||
1870 | return ret; | ||
1871 | |||
1872 | iova += PAGE_SIZE; | ||
1873 | paddr += PAGE_SIZE; | ||
1874 | } | ||
1875 | |||
1876 | return 0; | ||
1877 | } | ||
1878 | |||
1879 | static void amd_iommu_unmap_range(struct iommu_domain *dom, | ||
1880 | unsigned long iova, size_t size) | ||
1881 | { | ||
1882 | |||
1883 | struct protection_domain *domain = dom->priv; | ||
1884 | unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE); | ||
1885 | |||
1886 | iova &= PAGE_MASK; | ||
1887 | |||
1888 | for (i = 0; i < npages; ++i) { | ||
1889 | iommu_unmap_page(domain, iova); | ||
1890 | iova += PAGE_SIZE; | ||
1891 | } | ||
1892 | |||
1893 | iommu_flush_domain(domain->id); | ||
1894 | } | ||
1895 | |||
1896 | static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | ||
1897 | unsigned long iova) | ||
1898 | { | ||
1899 | struct protection_domain *domain = dom->priv; | ||
1900 | unsigned long offset = iova & ~PAGE_MASK; | ||
1901 | phys_addr_t paddr; | ||
1902 | u64 *pte; | ||
1903 | |||
1904 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; | ||
1905 | |||
1906 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
1907 | return 0; | ||
1908 | |||
1909 | pte = IOMMU_PTE_PAGE(*pte); | ||
1910 | pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; | ||
1911 | |||
1912 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
1913 | return 0; | ||
1914 | |||
1915 | pte = IOMMU_PTE_PAGE(*pte); | ||
1916 | pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; | ||
1917 | |||
1918 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
1919 | return 0; | ||
1920 | |||
1921 | paddr = *pte & IOMMU_PAGE_MASK; | ||
1922 | paddr |= offset; | ||
1923 | |||
1924 | return paddr; | ||
1925 | } | ||
1926 | |||
1927 | static struct iommu_ops amd_iommu_ops = { | ||
1928 | .domain_init = amd_iommu_domain_init, | ||
1929 | .domain_destroy = amd_iommu_domain_destroy, | ||
1930 | .attach_dev = amd_iommu_attach_device, | ||
1931 | .detach_dev = amd_iommu_detach_device, | ||
1932 | .map = amd_iommu_map_range, | ||
1933 | .unmap = amd_iommu_unmap_range, | ||
1934 | .iova_to_phys = amd_iommu_iova_to_phys, | ||
1935 | }; | ||
1936 | |||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c6cc22815d35..42c33cebf00f 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <asm/amd_iommu_types.h> | 28 | #include <asm/amd_iommu_types.h> |
29 | #include <asm/amd_iommu.h> | 29 | #include <asm/amd_iommu.h> |
30 | #include <asm/iommu.h> | 30 | #include <asm/iommu.h> |
31 | #include <asm/gart.h> | ||
31 | 32 | ||
32 | /* | 33 | /* |
33 | * definitions for the ACPI scanning code | 34 | * definitions for the ACPI scanning code |
@@ -121,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have | |||
121 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | 122 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
122 | we find in ACPI */ | 123 | we find in ACPI */ |
123 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | 124 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ |
124 | int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */ | 125 | bool amd_iommu_isolate = true; /* if true, device isolation is |
126 | enabled */ | ||
125 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | 127 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ |
126 | 128 | ||
127 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 129 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
@@ -242,20 +244,16 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
242 | } | 244 | } |
243 | 245 | ||
244 | /* Function to enable the hardware */ | 246 | /* Function to enable the hardware */ |
245 | void __init iommu_enable(struct amd_iommu *iommu) | 247 | static void __init iommu_enable(struct amd_iommu *iommu) |
246 | { | 248 | { |
247 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU " | 249 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", |
248 | "at %02x:%02x.%x cap 0x%hx\n", | 250 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
249 | iommu->dev->bus->number, | ||
250 | PCI_SLOT(iommu->dev->devfn), | ||
251 | PCI_FUNC(iommu->dev->devfn), | ||
252 | iommu->cap_ptr); | ||
253 | 251 | ||
254 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 252 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
255 | } | 253 | } |
256 | 254 | ||
257 | /* Function to enable IOMMU event logging and event interrupts */ | 255 | /* Function to enable IOMMU event logging and event interrupts */ |
258 | void __init iommu_enable_event_logging(struct amd_iommu *iommu) | 256 | static void __init iommu_enable_event_logging(struct amd_iommu *iommu) |
259 | { | 257 | { |
260 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | 258 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); |
261 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | 259 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); |
@@ -1217,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str) | |||
1217 | { | 1215 | { |
1218 | for (; *str; ++str) { | 1216 | for (; *str; ++str) { |
1219 | if (strncmp(str, "isolate", 7) == 0) | 1217 | if (strncmp(str, "isolate", 7) == 0) |
1220 | amd_iommu_isolate = 1; | 1218 | amd_iommu_isolate = true; |
1221 | if (strncmp(str, "share", 5) == 0) | 1219 | if (strncmp(str, "share", 5) == 0) |
1222 | amd_iommu_isolate = 0; | 1220 | amd_iommu_isolate = false; |
1223 | if (strncmp(str, "fullflush", 9) == 0) | 1221 | if (strncmp(str, "fullflush", 9) == 0) |
1224 | amd_iommu_unmap_flush = true; | 1222 | amd_iommu_unmap_flush = true; |
1225 | } | 1223 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9a32b37ee2ee..676debfc1702 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -1,8 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * Firmware replacement code. | 2 | * Firmware replacement code. |
3 | * | 3 | * |
4 | * Work around broken BIOSes that don't set an aperture or only set the | 4 | * Work around broken BIOSes that don't set an aperture, only set the |
5 | * aperture in the AGP bridge. | 5 | * aperture in the AGP bridge, or set too small aperture. |
6 | * | ||
6 | * If all fails map the aperture over some low memory. This is cheaper than | 7 | * If all fails map the aperture over some low memory. This is cheaper than |
7 | * doing bounce buffering. The memory is lost. This is done at early boot | 8 | * doing bounce buffering. The memory is lost. This is done at early boot |
8 | * because only the bootmem allocator can allocate 32+MB. | 9 | * because only the bootmem allocator can allocate 32+MB. |
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f94879b525..b13d3c4dbd42 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/module.h> | 30 | #include <linux/module.h> |
31 | #include <linux/dmi.h> | 31 | #include <linux/dmi.h> |
32 | #include <linux/dmar.h> | 32 | #include <linux/dmar.h> |
33 | #include <linux/ftrace.h> | ||
33 | 34 | ||
34 | #include <asm/atomic.h> | 35 | #include <asm/atomic.h> |
35 | #include <asm/smp.h> | 36 | #include <asm/smp.h> |
@@ -97,8 +98,8 @@ __setup("apicpmtimer", setup_apicpmtimer); | |||
97 | #ifdef HAVE_X2APIC | 98 | #ifdef HAVE_X2APIC |
98 | int x2apic; | 99 | int x2apic; |
99 | /* x2apic enabled before OS handover */ | 100 | /* x2apic enabled before OS handover */ |
100 | int x2apic_preenabled; | 101 | static int x2apic_preenabled; |
101 | int disable_x2apic; | 102 | static int disable_x2apic; |
102 | static __init int setup_nox2apic(char *str) | 103 | static __init int setup_nox2apic(char *str) |
103 | { | 104 | { |
104 | disable_x2apic = 1; | 105 | disable_x2apic = 1; |
@@ -118,8 +119,6 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | |||
118 | 119 | ||
119 | int first_system_vector = 0xfe; | 120 | int first_system_vector = 0xfe; |
120 | 121 | ||
121 | char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | ||
122 | |||
123 | /* | 122 | /* |
124 | * Debug level, exported for io_apic.c | 123 | * Debug level, exported for io_apic.c |
125 | */ | 124 | */ |
@@ -141,7 +140,7 @@ static int lapic_next_event(unsigned long delta, | |||
141 | struct clock_event_device *evt); | 140 | struct clock_event_device *evt); |
142 | static void lapic_timer_setup(enum clock_event_mode mode, | 141 | static void lapic_timer_setup(enum clock_event_mode mode, |
143 | struct clock_event_device *evt); | 142 | struct clock_event_device *evt); |
144 | static void lapic_timer_broadcast(cpumask_t mask); | 143 | static void lapic_timer_broadcast(const struct cpumask *mask); |
145 | static void apic_pm_activate(void); | 144 | static void apic_pm_activate(void); |
146 | 145 | ||
147 | /* | 146 | /* |
@@ -227,7 +226,7 @@ void xapic_icr_write(u32 low, u32 id) | |||
227 | apic_write(APIC_ICR, low); | 226 | apic_write(APIC_ICR, low); |
228 | } | 227 | } |
229 | 228 | ||
230 | u64 xapic_icr_read(void) | 229 | static u64 xapic_icr_read(void) |
231 | { | 230 | { |
232 | u32 icr1, icr2; | 231 | u32 icr1, icr2; |
233 | 232 | ||
@@ -267,7 +266,7 @@ void x2apic_icr_write(u32 low, u32 id) | |||
267 | wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); | 266 | wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low); |
268 | } | 267 | } |
269 | 268 | ||
270 | u64 x2apic_icr_read(void) | 269 | static u64 x2apic_icr_read(void) |
271 | { | 270 | { |
272 | unsigned long val; | 271 | unsigned long val; |
273 | 272 | ||
@@ -441,6 +440,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
441 | v = apic_read(APIC_LVTT); | 440 | v = apic_read(APIC_LVTT); |
442 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 441 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
443 | apic_write(APIC_LVTT, v); | 442 | apic_write(APIC_LVTT, v); |
443 | apic_write(APIC_TMICT, 0xffffffff); | ||
444 | break; | 444 | break; |
445 | case CLOCK_EVT_MODE_RESUME: | 445 | case CLOCK_EVT_MODE_RESUME: |
446 | /* Nothing to do here */ | 446 | /* Nothing to do here */ |
@@ -453,7 +453,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
453 | /* | 453 | /* |
454 | * Local APIC timer broadcast function | 454 | * Local APIC timer broadcast function |
455 | */ | 455 | */ |
456 | static void lapic_timer_broadcast(cpumask_t mask) | 456 | static void lapic_timer_broadcast(const struct cpumask *mask) |
457 | { | 457 | { |
458 | #ifdef CONFIG_SMP | 458 | #ifdef CONFIG_SMP |
459 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); | 459 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); |
@@ -469,7 +469,7 @@ static void __cpuinit setup_APIC_timer(void) | |||
469 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 469 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
470 | 470 | ||
471 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); | 471 | memcpy(levt, &lapic_clockevent, sizeof(*levt)); |
472 | levt->cpumask = cpumask_of_cpu(smp_processor_id()); | 472 | levt->cpumask = cpumask_of(smp_processor_id()); |
473 | 473 | ||
474 | clockevents_register_device(levt); | 474 | clockevents_register_device(levt); |
475 | } | 475 | } |
@@ -559,13 +559,13 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta) | |||
559 | } else { | 559 | } else { |
560 | res = (((u64)deltapm) * mult) >> 22; | 560 | res = (((u64)deltapm) * mult) >> 22; |
561 | do_div(res, 1000000); | 561 | do_div(res, 1000000); |
562 | printk(KERN_WARNING "APIC calibration not consistent " | 562 | pr_warning("APIC calibration not consistent " |
563 | "with PM Timer: %ldms instead of 100ms\n", | 563 | "with PM Timer: %ldms instead of 100ms\n", |
564 | (long)res); | 564 | (long)res); |
565 | /* Correct the lapic counter value */ | 565 | /* Correct the lapic counter value */ |
566 | res = (((u64)(*delta)) * pm_100ms); | 566 | res = (((u64)(*delta)) * pm_100ms); |
567 | do_div(res, deltapm); | 567 | do_div(res, deltapm); |
568 | printk(KERN_INFO "APIC delta adjusted to PM-Timer: " | 568 | pr_info("APIC delta adjusted to PM-Timer: " |
569 | "%lu (%ld)\n", (unsigned long)res, *delta); | 569 | "%lu (%ld)\n", (unsigned long)res, *delta); |
570 | *delta = (long)res; | 570 | *delta = (long)res; |
571 | } | 571 | } |
@@ -645,8 +645,7 @@ static int __init calibrate_APIC_clock(void) | |||
645 | */ | 645 | */ |
646 | if (calibration_result < (1000000 / HZ)) { | 646 | if (calibration_result < (1000000 / HZ)) { |
647 | local_irq_enable(); | 647 | local_irq_enable(); |
648 | printk(KERN_WARNING | 648 | pr_warning("APIC frequency too slow, disabling apic timer\n"); |
649 | "APIC frequency too slow, disabling apic timer\n"); | ||
650 | return -1; | 649 | return -1; |
651 | } | 650 | } |
652 | 651 | ||
@@ -672,13 +671,9 @@ static int __init calibrate_APIC_clock(void) | |||
672 | while (lapic_cal_loops <= LAPIC_CAL_LOOPS) | 671 | while (lapic_cal_loops <= LAPIC_CAL_LOOPS) |
673 | cpu_relax(); | 672 | cpu_relax(); |
674 | 673 | ||
675 | local_irq_disable(); | ||
676 | |||
677 | /* Stop the lapic timer */ | 674 | /* Stop the lapic timer */ |
678 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); | 675 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, levt); |
679 | 676 | ||
680 | local_irq_enable(); | ||
681 | |||
682 | /* Jiffies delta */ | 677 | /* Jiffies delta */ |
683 | deltaj = lapic_cal_j2 - lapic_cal_j1; | 678 | deltaj = lapic_cal_j2 - lapic_cal_j1; |
684 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); | 679 | apic_printk(APIC_VERBOSE, "... jiffies delta = %lu\n", deltaj); |
@@ -692,8 +687,7 @@ static int __init calibrate_APIC_clock(void) | |||
692 | local_irq_enable(); | 687 | local_irq_enable(); |
693 | 688 | ||
694 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { | 689 | if (levt->features & CLOCK_EVT_FEAT_DUMMY) { |
695 | printk(KERN_WARNING | 690 | pr_warning("APIC timer disabled due to verification failure.\n"); |
696 | "APIC timer disabled due to verification failure.\n"); | ||
697 | return -1; | 691 | return -1; |
698 | } | 692 | } |
699 | 693 | ||
@@ -714,7 +708,7 @@ void __init setup_boot_APIC_clock(void) | |||
714 | * broadcast mechanism is used. On UP systems simply ignore it. | 708 | * broadcast mechanism is used. On UP systems simply ignore it. |
715 | */ | 709 | */ |
716 | if (disable_apic_timer) { | 710 | if (disable_apic_timer) { |
717 | printk(KERN_INFO "Disabling APIC timer\n"); | 711 | pr_info("Disabling APIC timer\n"); |
718 | /* No broadcast on UP ! */ | 712 | /* No broadcast on UP ! */ |
719 | if (num_possible_cpus() > 1) { | 713 | if (num_possible_cpus() > 1) { |
720 | lapic_clockevent.mult = 1; | 714 | lapic_clockevent.mult = 1; |
@@ -741,7 +735,7 @@ void __init setup_boot_APIC_clock(void) | |||
741 | if (nmi_watchdog != NMI_IO_APIC) | 735 | if (nmi_watchdog != NMI_IO_APIC) |
742 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | 736 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; |
743 | else | 737 | else |
744 | printk(KERN_WARNING "APIC timer registered as dummy," | 738 | pr_warning("APIC timer registered as dummy," |
745 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | 739 | " due to nmi_watchdog=%d!\n", nmi_watchdog); |
746 | 740 | ||
747 | /* Setup the lapic or request the broadcast */ | 741 | /* Setup the lapic or request the broadcast */ |
@@ -773,8 +767,7 @@ static void local_apic_timer_interrupt(void) | |||
773 | * spurious. | 767 | * spurious. |
774 | */ | 768 | */ |
775 | if (!evt->event_handler) { | 769 | if (!evt->event_handler) { |
776 | printk(KERN_WARNING | 770 | pr_warning("Spurious LAPIC timer interrupt on cpu %d\n", cpu); |
777 | "Spurious LAPIC timer interrupt on cpu %d\n", cpu); | ||
778 | /* Switch it off */ | 771 | /* Switch it off */ |
779 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); | 772 | lapic_timer_setup(CLOCK_EVT_MODE_SHUTDOWN, evt); |
780 | return; | 773 | return; |
@@ -783,11 +776,7 @@ static void local_apic_timer_interrupt(void) | |||
783 | /* | 776 | /* |
784 | * the NMI deadlock-detector uses this. | 777 | * the NMI deadlock-detector uses this. |
785 | */ | 778 | */ |
786 | #ifdef CONFIG_X86_64 | 779 | inc_irq_stat(apic_timer_irqs); |
787 | add_pda(apic_timer_irqs, 1); | ||
788 | #else | ||
789 | per_cpu(irq_stat, cpu).apic_timer_irqs++; | ||
790 | #endif | ||
791 | 780 | ||
792 | evt->event_handler(evt); | 781 | evt->event_handler(evt); |
793 | } | 782 | } |
@@ -800,7 +789,7 @@ static void local_apic_timer_interrupt(void) | |||
800 | * [ if a single-CPU system runs an SMP kernel then we call the local | 789 | * [ if a single-CPU system runs an SMP kernel then we call the local |
801 | * interrupt as well. Thus we cannot inline the local irq ... ] | 790 | * interrupt as well. Thus we cannot inline the local irq ... ] |
802 | */ | 791 | */ |
803 | void smp_apic_timer_interrupt(struct pt_regs *regs) | 792 | void __irq_entry smp_apic_timer_interrupt(struct pt_regs *regs) |
804 | { | 793 | { |
805 | struct pt_regs *old_regs = set_irq_regs(regs); | 794 | struct pt_regs *old_regs = set_irq_regs(regs); |
806 | 795 | ||
@@ -814,9 +803,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
814 | * Besides, if we don't timer interrupts ignore the global | 803 | * Besides, if we don't timer interrupts ignore the global |
815 | * interrupt lock, which is the WrongThing (tm) to do. | 804 | * interrupt lock, which is the WrongThing (tm) to do. |
816 | */ | 805 | */ |
817 | #ifdef CONFIG_X86_64 | ||
818 | exit_idle(); | 806 | exit_idle(); |
819 | #endif | ||
820 | irq_enter(); | 807 | irq_enter(); |
821 | local_apic_timer_interrupt(); | 808 | local_apic_timer_interrupt(); |
822 | irq_exit(); | 809 | irq_exit(); |
@@ -1093,7 +1080,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
1093 | unsigned int oldvalue, value, maxlvt; | 1080 | unsigned int oldvalue, value, maxlvt; |
1094 | 1081 | ||
1095 | if (!lapic_is_integrated()) { | 1082 | if (!lapic_is_integrated()) { |
1096 | printk(KERN_INFO "No ESR for 82489DX.\n"); | 1083 | pr_info("No ESR for 82489DX.\n"); |
1097 | return; | 1084 | return; |
1098 | } | 1085 | } |
1099 | 1086 | ||
@@ -1104,7 +1091,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
1104 | * ESR disabled - we can't do anything useful with the | 1091 | * ESR disabled - we can't do anything useful with the |
1105 | * errors anyway - mbligh | 1092 | * errors anyway - mbligh |
1106 | */ | 1093 | */ |
1107 | printk(KERN_INFO "Leaving ESR disabled.\n"); | 1094 | pr_info("Leaving ESR disabled.\n"); |
1108 | return; | 1095 | return; |
1109 | } | 1096 | } |
1110 | 1097 | ||
@@ -1298,7 +1285,7 @@ void check_x2apic(void) | |||
1298 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1285 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1299 | 1286 | ||
1300 | if (msr & X2APIC_ENABLE) { | 1287 | if (msr & X2APIC_ENABLE) { |
1301 | printk("x2apic enabled by BIOS, switching to x2apic ops\n"); | 1288 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); |
1302 | x2apic_preenabled = x2apic = 1; | 1289 | x2apic_preenabled = x2apic = 1; |
1303 | apic_ops = &x2apic_ops; | 1290 | apic_ops = &x2apic_ops; |
1304 | } | 1291 | } |
@@ -1310,7 +1297,7 @@ void enable_x2apic(void) | |||
1310 | 1297 | ||
1311 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1298 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
1312 | if (!(msr & X2APIC_ENABLE)) { | 1299 | if (!(msr & X2APIC_ENABLE)) { |
1313 | printk("Enabling x2apic\n"); | 1300 | pr_info("Enabling x2apic\n"); |
1314 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | 1301 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); |
1315 | } | 1302 | } |
1316 | } | 1303 | } |
@@ -1325,9 +1312,8 @@ void __init enable_IR_x2apic(void) | |||
1325 | return; | 1312 | return; |
1326 | 1313 | ||
1327 | if (!x2apic_preenabled && disable_x2apic) { | 1314 | if (!x2apic_preenabled && disable_x2apic) { |
1328 | printk(KERN_INFO | 1315 | pr_info("Skipped enabling x2apic and Interrupt-remapping " |
1329 | "Skipped enabling x2apic and Interrupt-remapping " | 1316 | "because of nox2apic\n"); |
1330 | "because of nox2apic\n"); | ||
1331 | return; | 1317 | return; |
1332 | } | 1318 | } |
1333 | 1319 | ||
@@ -1335,22 +1321,19 @@ void __init enable_IR_x2apic(void) | |||
1335 | panic("Bios already enabled x2apic, can't enforce nox2apic"); | 1321 | panic("Bios already enabled x2apic, can't enforce nox2apic"); |
1336 | 1322 | ||
1337 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1323 | if (!x2apic_preenabled && skip_ioapic_setup) { |
1338 | printk(KERN_INFO | 1324 | pr_info("Skipped enabling x2apic and Interrupt-remapping " |
1339 | "Skipped enabling x2apic and Interrupt-remapping " | 1325 | "because of skipping io-apic setup\n"); |
1340 | "because of skipping io-apic setup\n"); | ||
1341 | return; | 1326 | return; |
1342 | } | 1327 | } |
1343 | 1328 | ||
1344 | ret = dmar_table_init(); | 1329 | ret = dmar_table_init(); |
1345 | if (ret) { | 1330 | if (ret) { |
1346 | printk(KERN_INFO | 1331 | pr_info("dmar_table_init() failed with %d:\n", ret); |
1347 | "dmar_table_init() failed with %d:\n", ret); | ||
1348 | 1332 | ||
1349 | if (x2apic_preenabled) | 1333 | if (x2apic_preenabled) |
1350 | panic("x2apic enabled by bios. But IR enabling failed"); | 1334 | panic("x2apic enabled by bios. But IR enabling failed"); |
1351 | else | 1335 | else |
1352 | printk(KERN_INFO | 1336 | pr_info("Not enabling x2apic,Intr-remapping\n"); |
1353 | "Not enabling x2apic,Intr-remapping\n"); | ||
1354 | return; | 1337 | return; |
1355 | } | 1338 | } |
1356 | 1339 | ||
@@ -1359,7 +1342,7 @@ void __init enable_IR_x2apic(void) | |||
1359 | 1342 | ||
1360 | ret = save_mask_IO_APIC_setup(); | 1343 | ret = save_mask_IO_APIC_setup(); |
1361 | if (ret) { | 1344 | if (ret) { |
1362 | printk(KERN_INFO "Saving IO-APIC state failed: %d\n", ret); | 1345 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
1363 | goto end; | 1346 | goto end; |
1364 | } | 1347 | } |
1365 | 1348 | ||
@@ -1394,14 +1377,11 @@ end: | |||
1394 | 1377 | ||
1395 | if (!ret) { | 1378 | if (!ret) { |
1396 | if (!x2apic_preenabled) | 1379 | if (!x2apic_preenabled) |
1397 | printk(KERN_INFO | 1380 | pr_info("Enabled x2apic and interrupt-remapping\n"); |
1398 | "Enabled x2apic and interrupt-remapping\n"); | ||
1399 | else | 1381 | else |
1400 | printk(KERN_INFO | 1382 | pr_info("Enabled Interrupt-remapping\n"); |
1401 | "Enabled Interrupt-remapping\n"); | ||
1402 | } else | 1383 | } else |
1403 | printk(KERN_ERR | 1384 | pr_err("Failed to enable Interrupt-remapping and x2apic\n"); |
1404 | "Failed to enable Interrupt-remapping and x2apic\n"); | ||
1405 | #else | 1385 | #else |
1406 | if (!cpu_has_x2apic) | 1386 | if (!cpu_has_x2apic) |
1407 | return; | 1387 | return; |
@@ -1410,8 +1390,8 @@ end: | |||
1410 | panic("x2apic enabled prior OS handover," | 1390 | panic("x2apic enabled prior OS handover," |
1411 | " enable CONFIG_INTR_REMAP"); | 1391 | " enable CONFIG_INTR_REMAP"); |
1412 | 1392 | ||
1413 | printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping " | 1393 | pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " |
1414 | " and x2apic\n"); | 1394 | " and x2apic\n"); |
1415 | #endif | 1395 | #endif |
1416 | 1396 | ||
1417 | return; | 1397 | return; |
@@ -1428,7 +1408,7 @@ end: | |||
1428 | static int __init detect_init_APIC(void) | 1408 | static int __init detect_init_APIC(void) |
1429 | { | 1409 | { |
1430 | if (!cpu_has_apic) { | 1410 | if (!cpu_has_apic) { |
1431 | printk(KERN_INFO "No local APIC present\n"); | 1411 | pr_info("No local APIC present\n"); |
1432 | return -1; | 1412 | return -1; |
1433 | } | 1413 | } |
1434 | 1414 | ||
@@ -1469,8 +1449,8 @@ static int __init detect_init_APIC(void) | |||
1469 | * "lapic" specified. | 1449 | * "lapic" specified. |
1470 | */ | 1450 | */ |
1471 | if (!force_enable_local_apic) { | 1451 | if (!force_enable_local_apic) { |
1472 | printk(KERN_INFO "Local APIC disabled by BIOS -- " | 1452 | pr_info("Local APIC disabled by BIOS -- " |
1473 | "you can enable it with \"lapic\"\n"); | 1453 | "you can enable it with \"lapic\"\n"); |
1474 | return -1; | 1454 | return -1; |
1475 | } | 1455 | } |
1476 | /* | 1456 | /* |
@@ -1480,8 +1460,7 @@ static int __init detect_init_APIC(void) | |||
1480 | */ | 1460 | */ |
1481 | rdmsr(MSR_IA32_APICBASE, l, h); | 1461 | rdmsr(MSR_IA32_APICBASE, l, h); |
1482 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { | 1462 | if (!(l & MSR_IA32_APICBASE_ENABLE)) { |
1483 | printk(KERN_INFO | 1463 | pr_info("Local APIC disabled by BIOS -- reenabling.\n"); |
1484 | "Local APIC disabled by BIOS -- reenabling.\n"); | ||
1485 | l &= ~MSR_IA32_APICBASE_BASE; | 1464 | l &= ~MSR_IA32_APICBASE_BASE; |
1486 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | 1465 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; |
1487 | wrmsr(MSR_IA32_APICBASE, l, h); | 1466 | wrmsr(MSR_IA32_APICBASE, l, h); |
@@ -1494,7 +1473,7 @@ static int __init detect_init_APIC(void) | |||
1494 | */ | 1473 | */ |
1495 | features = cpuid_edx(1); | 1474 | features = cpuid_edx(1); |
1496 | if (!(features & (1 << X86_FEATURE_APIC))) { | 1475 | if (!(features & (1 << X86_FEATURE_APIC))) { |
1497 | printk(KERN_WARNING "Could not enable APIC!\n"); | 1476 | pr_warning("Could not enable APIC!\n"); |
1498 | return -1; | 1477 | return -1; |
1499 | } | 1478 | } |
1500 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1479 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
@@ -1505,14 +1484,14 @@ static int __init detect_init_APIC(void) | |||
1505 | if (l & MSR_IA32_APICBASE_ENABLE) | 1484 | if (l & MSR_IA32_APICBASE_ENABLE) |
1506 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; | 1485 | mp_lapic_addr = l & MSR_IA32_APICBASE_BASE; |
1507 | 1486 | ||
1508 | printk(KERN_INFO "Found and enabled local APIC!\n"); | 1487 | pr_info("Found and enabled local APIC!\n"); |
1509 | 1488 | ||
1510 | apic_pm_activate(); | 1489 | apic_pm_activate(); |
1511 | 1490 | ||
1512 | return 0; | 1491 | return 0; |
1513 | 1492 | ||
1514 | no_apic: | 1493 | no_apic: |
1515 | printk(KERN_INFO "No local APIC present or hardware disabled\n"); | 1494 | pr_info("No local APIC present or hardware disabled\n"); |
1516 | return -1; | 1495 | return -1; |
1517 | } | 1496 | } |
1518 | #endif | 1497 | #endif |
@@ -1588,12 +1567,12 @@ int __init APIC_init_uniprocessor(void) | |||
1588 | { | 1567 | { |
1589 | #ifdef CONFIG_X86_64 | 1568 | #ifdef CONFIG_X86_64 |
1590 | if (disable_apic) { | 1569 | if (disable_apic) { |
1591 | printk(KERN_INFO "Apic disabled\n"); | 1570 | pr_info("Apic disabled\n"); |
1592 | return -1; | 1571 | return -1; |
1593 | } | 1572 | } |
1594 | if (!cpu_has_apic) { | 1573 | if (!cpu_has_apic) { |
1595 | disable_apic = 1; | 1574 | disable_apic = 1; |
1596 | printk(KERN_INFO "Apic disabled by BIOS\n"); | 1575 | pr_info("Apic disabled by BIOS\n"); |
1597 | return -1; | 1576 | return -1; |
1598 | } | 1577 | } |
1599 | #else | 1578 | #else |
@@ -1605,8 +1584,8 @@ int __init APIC_init_uniprocessor(void) | |||
1605 | */ | 1584 | */ |
1606 | if (!cpu_has_apic && | 1585 | if (!cpu_has_apic && |
1607 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1586 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
1608 | printk(KERN_ERR "BIOS bug, local APIC 0x%x not detected!...\n", | 1587 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", |
1609 | boot_cpu_physical_apicid); | 1588 | boot_cpu_physical_apicid); |
1610 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1589 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); |
1611 | return -1; | 1590 | return -1; |
1612 | } | 1591 | } |
@@ -1682,9 +1661,7 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1682 | { | 1661 | { |
1683 | u32 v; | 1662 | u32 v; |
1684 | 1663 | ||
1685 | #ifdef CONFIG_X86_64 | ||
1686 | exit_idle(); | 1664 | exit_idle(); |
1687 | #endif | ||
1688 | irq_enter(); | 1665 | irq_enter(); |
1689 | /* | 1666 | /* |
1690 | * Check if this really is a spurious interrupt and ACK it | 1667 | * Check if this really is a spurious interrupt and ACK it |
@@ -1695,14 +1672,11 @@ void smp_spurious_interrupt(struct pt_regs *regs) | |||
1695 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) | 1672 | if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) |
1696 | ack_APIC_irq(); | 1673 | ack_APIC_irq(); |
1697 | 1674 | ||
1698 | #ifdef CONFIG_X86_64 | 1675 | inc_irq_stat(irq_spurious_count); |
1699 | add_pda(irq_spurious_count, 1); | 1676 | |
1700 | #else | ||
1701 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ | 1677 | /* see sw-dev-man vol 3, chapter 7.4.13.5 */ |
1702 | printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " | 1678 | pr_info("spurious APIC interrupt on CPU#%d, " |
1703 | "should never happen.\n", smp_processor_id()); | 1679 | "should never happen.\n", smp_processor_id()); |
1704 | __get_cpu_var(irq_stat).irq_spurious_count++; | ||
1705 | #endif | ||
1706 | irq_exit(); | 1680 | irq_exit(); |
1707 | } | 1681 | } |
1708 | 1682 | ||
@@ -1713,9 +1687,7 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1713 | { | 1687 | { |
1714 | u32 v, v1; | 1688 | u32 v, v1; |
1715 | 1689 | ||
1716 | #ifdef CONFIG_X86_64 | ||
1717 | exit_idle(); | 1690 | exit_idle(); |
1718 | #endif | ||
1719 | irq_enter(); | 1691 | irq_enter(); |
1720 | /* First tickle the hardware, only then report what went on. -- REW */ | 1692 | /* First tickle the hardware, only then report what went on. -- REW */ |
1721 | v = apic_read(APIC_ESR); | 1693 | v = apic_read(APIC_ESR); |
@@ -1724,17 +1696,18 @@ void smp_error_interrupt(struct pt_regs *regs) | |||
1724 | ack_APIC_irq(); | 1696 | ack_APIC_irq(); |
1725 | atomic_inc(&irq_err_count); | 1697 | atomic_inc(&irq_err_count); |
1726 | 1698 | ||
1727 | /* Here is what the APIC error bits mean: | 1699 | /* |
1728 | 0: Send CS error | 1700 | * Here is what the APIC error bits mean: |
1729 | 1: Receive CS error | 1701 | * 0: Send CS error |
1730 | 2: Send accept error | 1702 | * 1: Receive CS error |
1731 | 3: Receive accept error | 1703 | * 2: Send accept error |
1732 | 4: Reserved | 1704 | * 3: Receive accept error |
1733 | 5: Send illegal vector | 1705 | * 4: Reserved |
1734 | 6: Received illegal vector | 1706 | * 5: Send illegal vector |
1735 | 7: Illegal register address | 1707 | * 6: Received illegal vector |
1736 | */ | 1708 | * 7: Illegal register address |
1737 | printk(KERN_DEBUG "APIC error on CPU%d: %02x(%02x)\n", | 1709 | */ |
1710 | pr_debug("APIC error on CPU%d: %02x(%02x)\n", | ||
1738 | smp_processor_id(), v , v1); | 1711 | smp_processor_id(), v , v1); |
1739 | irq_exit(); | 1712 | irq_exit(); |
1740 | } | 1713 | } |
@@ -1832,28 +1805,32 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1832 | void __cpuinit generic_processor_info(int apicid, int version) | 1805 | void __cpuinit generic_processor_info(int apicid, int version) |
1833 | { | 1806 | { |
1834 | int cpu; | 1807 | int cpu; |
1835 | cpumask_t tmp_map; | ||
1836 | 1808 | ||
1837 | /* | 1809 | /* |
1838 | * Validate version | 1810 | * Validate version |
1839 | */ | 1811 | */ |
1840 | if (version == 0x0) { | 1812 | if (version == 0x0) { |
1841 | printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! " | 1813 | pr_warning("BIOS bug, APIC version is 0 for CPU#%d! " |
1842 | "fixing up to 0x10. (tell your hw vendor)\n", | 1814 | "fixing up to 0x10. (tell your hw vendor)\n", |
1843 | version); | 1815 | version); |
1844 | version = 0x10; | 1816 | version = 0x10; |
1845 | } | 1817 | } |
1846 | apic_version[apicid] = version; | 1818 | apic_version[apicid] = version; |
1847 | 1819 | ||
1848 | if (num_processors >= NR_CPUS) { | 1820 | if (num_processors >= nr_cpu_ids) { |
1849 | printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." | 1821 | int max = nr_cpu_ids; |
1850 | " Processor ignored.\n", NR_CPUS); | 1822 | int thiscpu = max + disabled_cpus; |
1823 | |||
1824 | pr_warning( | ||
1825 | "ACPI: NR_CPUS/possible_cpus limit of %i reached." | ||
1826 | " Processor %d/0x%x ignored.\n", max, thiscpu, apicid); | ||
1827 | |||
1828 | disabled_cpus++; | ||
1851 | return; | 1829 | return; |
1852 | } | 1830 | } |
1853 | 1831 | ||
1854 | num_processors++; | 1832 | num_processors++; |
1855 | cpus_complement(tmp_map, cpu_present_map); | 1833 | cpu = cpumask_next_zero(-1, cpu_present_mask); |
1856 | cpu = first_cpu(tmp_map); | ||
1857 | 1834 | ||
1858 | physid_set(apicid, phys_cpu_present_map); | 1835 | physid_set(apicid, phys_cpu_present_map); |
1859 | if (apicid == boot_cpu_physical_apicid) { | 1836 | if (apicid == boot_cpu_physical_apicid) { |
@@ -1903,8 +1880,8 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1903 | } | 1880 | } |
1904 | #endif | 1881 | #endif |
1905 | 1882 | ||
1906 | cpu_set(cpu, cpu_possible_map); | 1883 | set_cpu_possible(cpu, true); |
1907 | cpu_set(cpu, cpu_present_map); | 1884 | set_cpu_present(cpu, true); |
1908 | } | 1885 | } |
1909 | 1886 | ||
1910 | #ifdef CONFIG_X86_64 | 1887 | #ifdef CONFIG_X86_64 |
@@ -2106,7 +2083,7 @@ __cpuinit int apic_is_clustered_box(void) | |||
2106 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | 2083 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
2107 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | 2084 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); |
2108 | 2085 | ||
2109 | for (i = 0; i < NR_CPUS; i++) { | 2086 | for (i = 0; i < nr_cpu_ids; i++) { |
2110 | /* are we being called early in kernel startup? */ | 2087 | /* are we being called early in kernel startup? */ |
2111 | if (bios_cpu_apicid) { | 2088 | if (bios_cpu_apicid) { |
2112 | id = bios_cpu_apicid[i]; | 2089 | id = bios_cpu_apicid[i]; |
@@ -2209,7 +2186,7 @@ static int __init apic_set_verbosity(char *arg) | |||
2209 | else if (strcmp("verbose", arg) == 0) | 2186 | else if (strcmp("verbose", arg) == 0) |
2210 | apic_verbosity = APIC_VERBOSE; | 2187 | apic_verbosity = APIC_VERBOSE; |
2211 | else { | 2188 | else { |
2212 | printk(KERN_WARNING "APIC Verbosity level %s not recognised" | 2189 | pr_warning("APIC Verbosity level %s not recognised" |
2213 | " use apic=verbose or apic=debug\n", arg); | 2190 | " use apic=verbose or apic=debug\n", arg); |
2214 | return -EINVAL; | 2191 | return -EINVAL; |
2215 | } | 2192 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 5145a6e72bbb..3a26525a3f31 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -391,11 +391,7 @@ static int power_off; | |||
391 | #else | 391 | #else |
392 | static int power_off = 1; | 392 | static int power_off = 1; |
393 | #endif | 393 | #endif |
394 | #ifdef CONFIG_APM_REAL_MODE_POWER_OFF | ||
395 | static int realmode_power_off = 1; | ||
396 | #else | ||
397 | static int realmode_power_off; | 394 | static int realmode_power_off; |
398 | #endif | ||
399 | #ifdef CONFIG_APM_ALLOW_INTS | 395 | #ifdef CONFIG_APM_ALLOW_INTS |
400 | static int allow_ints = 1; | 396 | static int allow_ints = 1; |
401 | #else | 397 | #else |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 6649d09ad88f..ee4df08feee6 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -11,7 +11,7 @@ | |||
11 | #include <linux/suspend.h> | 11 | #include <linux/suspend.h> |
12 | #include <linux/kbuild.h> | 12 | #include <linux/kbuild.h> |
13 | #include <asm/ucontext.h> | 13 | #include <asm/ucontext.h> |
14 | #include "sigframe.h" | 14 | #include <asm/sigframe.h> |
15 | #include <asm/pgtable.h> | 15 | #include <asm/pgtable.h> |
16 | #include <asm/fixmap.h> | 16 | #include <asm/fixmap.h> |
17 | #include <asm/processor.h> | 17 | #include <asm/processor.h> |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 7fcf63d22f8b..1d41d3f1edbc 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -20,6 +20,8 @@ | |||
20 | 20 | ||
21 | #include <xen/interface/xen.h> | 21 | #include <xen/interface/xen.h> |
22 | 22 | ||
23 | #include <asm/sigframe.h> | ||
24 | |||
23 | #define __NO_STUBS 1 | 25 | #define __NO_STUBS 1 |
24 | #undef __SYSCALL | 26 | #undef __SYSCALL |
25 | #undef _ASM_X86_UNISTD_64_H | 27 | #undef _ASM_X86_UNISTD_64_H |
@@ -87,7 +89,7 @@ int main(void) | |||
87 | BLANK(); | 89 | BLANK(); |
88 | #undef ENTRY | 90 | #undef ENTRY |
89 | DEFINE(IA32_RT_SIGFRAME_sigcontext, | 91 | DEFINE(IA32_RT_SIGFRAME_sigcontext, |
90 | offsetof (struct rt_sigframe32, uc.uc_mcontext)); | 92 | offsetof (struct rt_sigframe_ia32, uc.uc_mcontext)); |
91 | BLANK(); | 93 | BLANK(); |
92 | #endif | 94 | #endif |
93 | DEFINE(pbe_address, offsetof(struct pbe, address)); | 95 | DEFINE(pbe_address, offsetof(struct pbe, address)); |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c index f0dfe6f17e7e..f63882728d91 100644 --- a/arch/x86/kernel/bios_uv.c +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -25,7 +25,7 @@ | |||
25 | #include <asm/uv/bios.h> | 25 | #include <asm/uv/bios.h> |
26 | #include <asm/uv/uv_hub.h> | 26 | #include <asm/uv/uv_hub.h> |
27 | 27 | ||
28 | struct uv_systab uv_systab; | 28 | static struct uv_systab uv_systab; |
29 | 29 | ||
30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) | 30 | s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) |
31 | { | 31 | { |
@@ -69,10 +69,10 @@ s64 uv_bios_call_reentrant(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, | |||
69 | 69 | ||
70 | long sn_partition_id; | 70 | long sn_partition_id; |
71 | EXPORT_SYMBOL_GPL(sn_partition_id); | 71 | EXPORT_SYMBOL_GPL(sn_partition_id); |
72 | long uv_coherency_id; | 72 | long sn_coherency_id; |
73 | EXPORT_SYMBOL_GPL(uv_coherency_id); | 73 | EXPORT_SYMBOL_GPL(sn_coherency_id); |
74 | long uv_region_size; | 74 | long sn_region_size; |
75 | EXPORT_SYMBOL_GPL(uv_region_size); | 75 | EXPORT_SYMBOL_GPL(sn_region_size); |
76 | int uv_type; | 76 | int uv_type; |
77 | 77 | ||
78 | 78 | ||
@@ -100,6 +100,56 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, | |||
100 | return ret; | 100 | return ret; |
101 | } | 101 | } |
102 | 102 | ||
103 | int | ||
104 | uv_bios_mq_watchlist_alloc(int blade, unsigned long addr, unsigned int mq_size, | ||
105 | unsigned long *intr_mmr_offset) | ||
106 | { | ||
107 | union uv_watchlist_u size_blade; | ||
108 | u64 watchlist; | ||
109 | s64 ret; | ||
110 | |||
111 | size_blade.size = mq_size; | ||
112 | size_blade.blade = blade; | ||
113 | |||
114 | /* | ||
115 | * bios returns watchlist number or negative error number. | ||
116 | */ | ||
117 | ret = (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_ALLOC, addr, | ||
118 | size_blade.val, (u64)intr_mmr_offset, | ||
119 | (u64)&watchlist, 0); | ||
120 | if (ret < BIOS_STATUS_SUCCESS) | ||
121 | return ret; | ||
122 | |||
123 | return watchlist; | ||
124 | } | ||
125 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_alloc); | ||
126 | |||
127 | int | ||
128 | uv_bios_mq_watchlist_free(int blade, int watchlist_num) | ||
129 | { | ||
130 | return (int)uv_bios_call_irqsave(UV_BIOS_WATCHLIST_FREE, | ||
131 | blade, watchlist_num, 0, 0, 0); | ||
132 | } | ||
133 | EXPORT_SYMBOL_GPL(uv_bios_mq_watchlist_free); | ||
134 | |||
135 | s64 | ||
136 | uv_bios_change_memprotect(u64 paddr, u64 len, enum uv_memprotect perms) | ||
137 | { | ||
138 | return uv_bios_call_irqsave(UV_BIOS_MEMPROTECT, paddr, len, | ||
139 | perms, 0, 0); | ||
140 | } | ||
141 | EXPORT_SYMBOL_GPL(uv_bios_change_memprotect); | ||
142 | |||
143 | s64 | ||
144 | uv_bios_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) | ||
145 | { | ||
146 | s64 ret; | ||
147 | |||
148 | ret = uv_bios_call_irqsave(UV_BIOS_GET_PARTITION_ADDR, (u64)cookie, | ||
149 | (u64)addr, buf, (u64)len, 0); | ||
150 | return ret; | ||
151 | } | ||
152 | EXPORT_SYMBOL_GPL(uv_bios_reserved_page_pa); | ||
103 | 153 | ||
104 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) | 154 | s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) |
105 | { | 155 | { |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c new file mode 100644 index 000000000000..2ac0ab71412a --- /dev/null +++ b/arch/x86/kernel/check.c | |||
@@ -0,0 +1,161 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/sched.h> | ||
3 | #include <linux/kthread.h> | ||
4 | #include <linux/workqueue.h> | ||
5 | #include <asm/e820.h> | ||
6 | #include <asm/proto.h> | ||
7 | |||
8 | /* | ||
9 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
10 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
11 | * remaining free memory in that area and fill it with a distinct | ||
12 | * pattern. | ||
13 | */ | ||
14 | #define MAX_SCAN_AREAS 8 | ||
15 | |||
16 | static int __read_mostly memory_corruption_check = -1; | ||
17 | |||
18 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
19 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
20 | |||
21 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
22 | static int num_scan_areas; | ||
23 | |||
24 | |||
25 | static __init int set_corruption_check(char *arg) | ||
26 | { | ||
27 | char *end; | ||
28 | |||
29 | memory_corruption_check = simple_strtol(arg, &end, 10); | ||
30 | |||
31 | return (*end == 0) ? 0 : -EINVAL; | ||
32 | } | ||
33 | early_param("memory_corruption_check", set_corruption_check); | ||
34 | |||
35 | static __init int set_corruption_check_period(char *arg) | ||
36 | { | ||
37 | char *end; | ||
38 | |||
39 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
40 | |||
41 | return (*end == 0) ? 0 : -EINVAL; | ||
42 | } | ||
43 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
44 | |||
45 | static __init int set_corruption_check_size(char *arg) | ||
46 | { | ||
47 | char *end; | ||
48 | unsigned size; | ||
49 | |||
50 | size = memparse(arg, &end); | ||
51 | |||
52 | if (*end == '\0') | ||
53 | corruption_check_size = size; | ||
54 | |||
55 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
56 | } | ||
57 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
58 | |||
59 | |||
60 | void __init setup_bios_corruption_check(void) | ||
61 | { | ||
62 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
63 | |||
64 | if (memory_corruption_check == -1) { | ||
65 | memory_corruption_check = | ||
66 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
67 | 1 | ||
68 | #else | ||
69 | 0 | ||
70 | #endif | ||
71 | ; | ||
72 | } | ||
73 | |||
74 | if (corruption_check_size == 0) | ||
75 | memory_corruption_check = 0; | ||
76 | |||
77 | if (!memory_corruption_check) | ||
78 | return; | ||
79 | |||
80 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
81 | |||
82 | while (addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | ||
83 | u64 size; | ||
84 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
85 | |||
86 | if (addr == 0) | ||
87 | break; | ||
88 | |||
89 | if ((addr + size) > corruption_check_size) | ||
90 | size = corruption_check_size - addr; | ||
91 | |||
92 | if (size == 0) | ||
93 | break; | ||
94 | |||
95 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
96 | scan_areas[num_scan_areas].addr = addr; | ||
97 | scan_areas[num_scan_areas].size = size; | ||
98 | num_scan_areas++; | ||
99 | |||
100 | /* Assume we've already mapped this early memory */ | ||
101 | memset(__va(addr), 0, size); | ||
102 | |||
103 | addr += size; | ||
104 | } | ||
105 | |||
106 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
107 | num_scan_areas); | ||
108 | update_e820(); | ||
109 | } | ||
110 | |||
111 | |||
112 | void check_for_bios_corruption(void) | ||
113 | { | ||
114 | int i; | ||
115 | int corruption = 0; | ||
116 | |||
117 | if (!memory_corruption_check) | ||
118 | return; | ||
119 | |||
120 | for (i = 0; i < num_scan_areas; i++) { | ||
121 | unsigned long *addr = __va(scan_areas[i].addr); | ||
122 | unsigned long size = scan_areas[i].size; | ||
123 | |||
124 | for (; size; addr++, size -= sizeof(unsigned long)) { | ||
125 | if (!*addr) | ||
126 | continue; | ||
127 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
128 | addr, __pa(addr), *addr); | ||
129 | corruption = 1; | ||
130 | *addr = 0; | ||
131 | } | ||
132 | } | ||
133 | |||
134 | WARN_ONCE(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
135 | } | ||
136 | |||
137 | static void check_corruption(struct work_struct *dummy); | ||
138 | static DECLARE_DELAYED_WORK(bios_check_work, check_corruption); | ||
139 | |||
140 | static void check_corruption(struct work_struct *dummy) | ||
141 | { | ||
142 | check_for_bios_corruption(); | ||
143 | schedule_delayed_work(&bios_check_work, | ||
144 | round_jiffies_relative(corruption_check_period*HZ)); | ||
145 | } | ||
146 | |||
147 | static int start_periodic_check_for_corruption(void) | ||
148 | { | ||
149 | if (!memory_corruption_check || corruption_check_period == 0) | ||
150 | return 0; | ||
151 | |||
152 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
153 | corruption_check_period); | ||
154 | |||
155 | /* First time we run the checks right away */ | ||
156 | schedule_delayed_work(&bios_check_work, 0); | ||
157 | return 0; | ||
158 | } | ||
159 | |||
160 | module_init(start_periodic_check_for_corruption); | ||
161 | |||
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 82ec6075c057..82db7f45e2de 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -2,8 +2,14 @@ | |||
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | # Don't trace early stages of a secondary CPU boot | ||
6 | ifdef CONFIG_FUNCTION_TRACER | ||
7 | CFLAGS_REMOVE_common.o = -pg | ||
8 | endif | ||
9 | |||
5 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 10 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
6 | obj-y += proc.o capflags.o powerflags.o common.o | 11 | obj-y += proc.o capflags.o powerflags.o common.o |
12 | obj-y += vmware.o hypervisor.o | ||
7 | 13 | ||
8 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 14 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
9 | obj-$(CONFIG_X86_64) += bugs_64.o | 15 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index ef8f831af823..2cf23634b6d9 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -120,9 +120,17 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) | |||
120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) | 120 | c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width) |
121 | & core_select_mask; | 121 | & core_select_mask; |
122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); | 122 | c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); |
123 | /* | ||
124 | * Reinit the apicid, now that we have extended initial_apicid. | ||
125 | */ | ||
126 | c->apicid = phys_pkg_id(c->initial_apicid, 0); | ||
123 | #else | 127 | #else |
124 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; | 128 | c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask; |
125 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); | 129 | c->phys_proc_id = phys_pkg_id(core_plus_mask_width); |
130 | /* | ||
131 | * Reinit the apicid, now that we have extended initial_apicid. | ||
132 | */ | ||
133 | c->apicid = phys_pkg_id(0); | ||
126 | #endif | 134 | #endif |
127 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); | 135 | c->x86_max_cores = (core_level_siblings / smp_num_siblings); |
128 | 136 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 8f1e31db2ad5..7c878f6aa919 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -283,9 +283,14 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
283 | { | 283 | { |
284 | early_init_amd_mc(c); | 284 | early_init_amd_mc(c); |
285 | 285 | ||
286 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | 286 | /* |
287 | if (c->x86_power & (1<<8)) | 287 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate |
288 | * with P/T states and does not stop in deep C-states | ||
289 | */ | ||
290 | if (c->x86_power & (1 << 8)) { | ||
288 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 291 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
292 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | ||
293 | } | ||
289 | 294 | ||
290 | #ifdef CONFIG_X86_64 | 295 | #ifdef CONFIG_X86_64 |
291 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | 296 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b9c9ea0217a9..3f95a40f718a 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <asm/proto.h> | 36 | #include <asm/proto.h> |
37 | #include <asm/sections.h> | 37 | #include <asm/sections.h> |
38 | #include <asm/setup.h> | 38 | #include <asm/setup.h> |
39 | #include <asm/hypervisor.h> | ||
39 | 40 | ||
40 | #include "cpu.h" | 41 | #include "cpu.h" |
41 | 42 | ||
@@ -354,7 +355,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
354 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 355 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); |
355 | } else if (smp_num_siblings > 1) { | 356 | } else if (smp_num_siblings > 1) { |
356 | 357 | ||
357 | if (smp_num_siblings > NR_CPUS) { | 358 | if (smp_num_siblings > nr_cpu_ids) { |
358 | printk(KERN_WARNING "CPU: Unsupported number of siblings %d", | 359 | printk(KERN_WARNING "CPU: Unsupported number of siblings %d", |
359 | smp_num_siblings); | 360 | smp_num_siblings); |
360 | smp_num_siblings = 1; | 361 | smp_num_siblings = 1; |
@@ -703,6 +704,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
703 | detect_ht(c); | 704 | detect_ht(c); |
704 | #endif | 705 | #endif |
705 | 706 | ||
707 | init_hypervisor(c); | ||
706 | /* | 708 | /* |
707 | * On SMP, boot_cpu_data holds the common feature set between | 709 | * On SMP, boot_cpu_data holds the common feature set between |
708 | * all CPUs; so make sure that we indicate which features are | 710 | * all CPUs; so make sure that we indicate which features are |
@@ -862,7 +864,7 @@ EXPORT_SYMBOL(_cpu_pda); | |||
862 | 864 | ||
863 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 865 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; |
864 | 866 | ||
865 | char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; | 867 | static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; |
866 | 868 | ||
867 | void __cpuinit pda_init(int cpu) | 869 | void __cpuinit pda_init(int cpu) |
868 | { | 870 | { |
@@ -903,8 +905,8 @@ void __cpuinit pda_init(int cpu) | |||
903 | } | 905 | } |
904 | } | 906 | } |
905 | 907 | ||
906 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | 908 | static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + |
907 | DEBUG_STKSZ] __page_aligned_bss; | 909 | DEBUG_STKSZ] __page_aligned_bss; |
908 | 910 | ||
909 | extern asmlinkage void ignore_sysret(void); | 911 | extern asmlinkage void ignore_sysret(void); |
910 | 912 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 8e48c5d4467d..28102ad1a363 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/cpufreq.h> | 33 | #include <linux/cpufreq.h> |
34 | #include <linux/compiler.h> | 34 | #include <linux/compiler.h> |
35 | #include <linux/dmi.h> | 35 | #include <linux/dmi.h> |
36 | #include <linux/ftrace.h> | ||
36 | 37 | ||
37 | #include <linux/acpi.h> | 38 | #include <linux/acpi.h> |
38 | #include <acpi/processor.h> | 39 | #include <acpi/processor.h> |
@@ -391,6 +392,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
391 | unsigned int next_perf_state = 0; /* Index into perf table */ | 392 | unsigned int next_perf_state = 0; /* Index into perf table */ |
392 | unsigned int i; | 393 | unsigned int i; |
393 | int result = 0; | 394 | int result = 0; |
395 | struct power_trace it; | ||
394 | 396 | ||
395 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); | 397 | dprintk("acpi_cpufreq_target %d (%d)\n", target_freq, policy->cpu); |
396 | 398 | ||
@@ -427,6 +429,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
427 | } | 429 | } |
428 | } | 430 | } |
429 | 431 | ||
432 | trace_power_mark(&it, POWER_PSTATE, next_perf_state); | ||
433 | |||
430 | switch (data->cpu_feature) { | 434 | switch (data->cpu_feature) { |
431 | case SYSTEM_INTEL_MSR_CAPABLE: | 435 | case SYSTEM_INTEL_MSR_CAPABLE: |
432 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; | 436 | cmd.type = SYSTEM_INTEL_MSR_CAPABLE; |
@@ -513,6 +517,17 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) | |||
513 | } | 517 | } |
514 | } | 518 | } |
515 | 519 | ||
520 | static void free_acpi_perf_data(void) | ||
521 | { | ||
522 | unsigned int i; | ||
523 | |||
524 | /* Freeing a NULL pointer is OK, and alloc_percpu zeroes. */ | ||
525 | for_each_possible_cpu(i) | ||
526 | free_cpumask_var(per_cpu_ptr(acpi_perf_data, i) | ||
527 | ->shared_cpu_map); | ||
528 | free_percpu(acpi_perf_data); | ||
529 | } | ||
530 | |||
516 | /* | 531 | /* |
517 | * acpi_cpufreq_early_init - initialize ACPI P-States library | 532 | * acpi_cpufreq_early_init - initialize ACPI P-States library |
518 | * | 533 | * |
@@ -523,6 +538,7 @@ acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) | |||
523 | */ | 538 | */ |
524 | static int __init acpi_cpufreq_early_init(void) | 539 | static int __init acpi_cpufreq_early_init(void) |
525 | { | 540 | { |
541 | unsigned int i; | ||
526 | dprintk("acpi_cpufreq_early_init\n"); | 542 | dprintk("acpi_cpufreq_early_init\n"); |
527 | 543 | ||
528 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); | 544 | acpi_perf_data = alloc_percpu(struct acpi_processor_performance); |
@@ -530,6 +546,16 @@ static int __init acpi_cpufreq_early_init(void) | |||
530 | dprintk("Memory allocation error for acpi_perf_data.\n"); | 546 | dprintk("Memory allocation error for acpi_perf_data.\n"); |
531 | return -ENOMEM; | 547 | return -ENOMEM; |
532 | } | 548 | } |
549 | for_each_possible_cpu(i) { | ||
550 | if (!alloc_cpumask_var_node( | ||
551 | &per_cpu_ptr(acpi_perf_data, i)->shared_cpu_map, | ||
552 | GFP_KERNEL, cpu_to_node(i))) { | ||
553 | |||
554 | /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ | ||
555 | free_acpi_perf_data(); | ||
556 | return -ENOMEM; | ||
557 | } | ||
558 | } | ||
533 | 559 | ||
534 | /* Do initialization in ACPI core */ | 560 | /* Do initialization in ACPI core */ |
535 | acpi_processor_preregister_performance(acpi_perf_data); | 561 | acpi_processor_preregister_performance(acpi_perf_data); |
@@ -600,9 +626,9 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
600 | */ | 626 | */ |
601 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || | 627 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ALL || |
602 | policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { | 628 | policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { |
603 | policy->cpus = perf->shared_cpu_map; | 629 | cpumask_copy(&policy->cpus, perf->shared_cpu_map); |
604 | } | 630 | } |
605 | policy->related_cpus = perf->shared_cpu_map; | 631 | cpumask_copy(&policy->related_cpus, perf->shared_cpu_map); |
606 | 632 | ||
607 | #ifdef CONFIG_SMP | 633 | #ifdef CONFIG_SMP |
608 | dmi_check_system(sw_any_bug_dmi_table); | 634 | dmi_check_system(sw_any_bug_dmi_table); |
@@ -791,7 +817,7 @@ static int __init acpi_cpufreq_init(void) | |||
791 | 817 | ||
792 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); | 818 | ret = cpufreq_register_driver(&acpi_cpufreq_driver); |
793 | if (ret) | 819 | if (ret) |
794 | free_percpu(acpi_perf_data); | 820 | free_acpi_perf_data(); |
795 | 821 | ||
796 | return ret; | 822 | return ret; |
797 | } | 823 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/longhaul.c b/arch/x86/kernel/cpu/cpufreq/longhaul.c index b0461856acfb..a4cff5d6e380 100644 --- a/arch/x86/kernel/cpu/cpufreq/longhaul.c +++ b/arch/x86/kernel/cpu/cpufreq/longhaul.c | |||
@@ -982,7 +982,7 @@ static int __init longhaul_init(void) | |||
982 | case 10: | 982 | case 10: |
983 | printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); | 983 | printk(KERN_ERR PFX "Use acpi-cpufreq driver for VIA C7\n"); |
984 | default: | 984 | default: |
985 | ;; | 985 | ; |
986 | } | 986 | } |
987 | 987 | ||
988 | return -ENODEV; | 988 | return -ENODEV; |
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index b8e05ee4f736..beea4466b063 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -160,6 +160,7 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
160 | switch (c->x86_model) { | 160 | switch (c->x86_model) { |
161 | case 0x0E: /* Core */ | 161 | case 0x0E: /* Core */ |
162 | case 0x0F: /* Core Duo */ | 162 | case 0x0F: /* Core Duo */ |
163 | case 0x16: /* Celeron Core */ | ||
163 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; | 164 | p4clockmod_driver.flags |= CPUFREQ_CONST_LOOPS; |
164 | return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); | 165 | return speedstep_get_processor_frequency(SPEEDSTEP_PROCESSOR_PCORE); |
165 | case 0x0D: /* Pentium M (Dothan) */ | 166 | case 0x0D: /* Pentium M (Dothan) */ |
@@ -171,7 +172,9 @@ static unsigned int cpufreq_p4_get_frequency(struct cpuinfo_x86 *c) | |||
171 | } | 172 | } |
172 | 173 | ||
173 | if (c->x86 != 0xF) { | 174 | if (c->x86 != 0xF) { |
174 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. Please send an e-mail to <cpufreq@vger.kernel.org>\n"); | 175 | if (!cpu_has(c, X86_FEATURE_EST)) |
176 | printk(KERN_WARNING PFX "Unknown p4-clockmod-capable CPU. " | ||
177 | "Please send an e-mail to <cpufreq@vger.kernel.org>\n"); | ||
175 | return 0; | 178 | return 0; |
176 | } | 179 | } |
177 | 180 | ||
@@ -274,6 +277,7 @@ static struct cpufreq_driver p4clockmod_driver = { | |||
274 | .name = "p4-clockmod", | 277 | .name = "p4-clockmod", |
275 | .owner = THIS_MODULE, | 278 | .owner = THIS_MODULE, |
276 | .attr = p4clockmod_attr, | 279 | .attr = p4clockmod_attr, |
280 | .hide_interface = 1, | ||
277 | }; | 281 | }; |
278 | 282 | ||
279 | 283 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c index 7c7d56b43136..1b446d79a8fd 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.c | |||
@@ -310,6 +310,12 @@ static int powernow_acpi_init(void) | |||
310 | goto err0; | 310 | goto err0; |
311 | } | 311 | } |
312 | 312 | ||
313 | if (!alloc_cpumask_var(&acpi_processor_perf->shared_cpu_map, | ||
314 | GFP_KERNEL)) { | ||
315 | retval = -ENOMEM; | ||
316 | goto err05; | ||
317 | } | ||
318 | |||
313 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { | 319 | if (acpi_processor_register_performance(acpi_processor_perf, 0)) { |
314 | retval = -EIO; | 320 | retval = -EIO; |
315 | goto err1; | 321 | goto err1; |
@@ -412,6 +418,8 @@ static int powernow_acpi_init(void) | |||
412 | err2: | 418 | err2: |
413 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | 419 | acpi_processor_unregister_performance(acpi_processor_perf, 0); |
414 | err1: | 420 | err1: |
421 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | ||
422 | err05: | ||
415 | kfree(acpi_processor_perf); | 423 | kfree(acpi_processor_perf); |
416 | err0: | 424 | err0: |
417 | printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); | 425 | printk(KERN_WARNING PFX "ACPI perflib can not be used in this platform\n"); |
@@ -652,6 +660,7 @@ static int powernow_cpu_exit (struct cpufreq_policy *policy) { | |||
652 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI | 660 | #ifdef CONFIG_X86_POWERNOW_K7_ACPI |
653 | if (acpi_processor_perf) { | 661 | if (acpi_processor_perf) { |
654 | acpi_processor_unregister_performance(acpi_processor_perf, 0); | 662 | acpi_processor_unregister_performance(acpi_processor_perf, 0); |
663 | free_cpumask_var(acpi_processor_perf->shared_cpu_map); | ||
655 | kfree(acpi_processor_perf); | 664 | kfree(acpi_processor_perf); |
656 | } | 665 | } |
657 | #endif | 666 | #endif |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 7f05f44b97e9..c3c9adbaa26f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -766,7 +766,7 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned | |||
766 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | 766 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) |
767 | { | 767 | { |
768 | struct cpufreq_frequency_table *powernow_table; | 768 | struct cpufreq_frequency_table *powernow_table; |
769 | int ret_val; | 769 | int ret_val = -ENODEV; |
770 | 770 | ||
771 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { | 771 | if (acpi_processor_register_performance(&data->acpi_data, data->cpu)) { |
772 | dprintk("register performance failed: bad ACPI data\n"); | 772 | dprintk("register performance failed: bad ACPI data\n"); |
@@ -815,6 +815,13 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
815 | /* notify BIOS that we exist */ | 815 | /* notify BIOS that we exist */ |
816 | acpi_processor_notify_smm(THIS_MODULE); | 816 | acpi_processor_notify_smm(THIS_MODULE); |
817 | 817 | ||
818 | if (!alloc_cpumask_var(&data->acpi_data.shared_cpu_map, GFP_KERNEL)) { | ||
819 | printk(KERN_ERR PFX | ||
820 | "unable to alloc powernow_k8_data cpumask\n"); | ||
821 | ret_val = -ENOMEM; | ||
822 | goto err_out_mem; | ||
823 | } | ||
824 | |||
818 | return 0; | 825 | return 0; |
819 | 826 | ||
820 | err_out_mem: | 827 | err_out_mem: |
@@ -826,7 +833,7 @@ err_out: | |||
826 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ | 833 | /* data->acpi_data.state_count informs us at ->exit() whether ACPI was used */ |
827 | data->acpi_data.state_count = 0; | 834 | data->acpi_data.state_count = 0; |
828 | 835 | ||
829 | return -ENODEV; | 836 | return ret_val; |
830 | } | 837 | } |
831 | 838 | ||
832 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) | 839 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table) |
@@ -929,6 +936,7 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) | |||
929 | { | 936 | { |
930 | if (data->acpi_data.state_count) | 937 | if (data->acpi_data.state_count) |
931 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); | 938 | acpi_processor_unregister_performance(&data->acpi_data, data->cpu); |
939 | free_cpumask_var(data->acpi_data.shared_cpu_map); | ||
932 | } | 940 | } |
933 | 941 | ||
934 | #else | 942 | #else |
@@ -1134,7 +1142,8 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1134 | data->cpu = pol->cpu; | 1142 | data->cpu = pol->cpu; |
1135 | data->currpstate = HW_PSTATE_INVALID; | 1143 | data->currpstate = HW_PSTATE_INVALID; |
1136 | 1144 | ||
1137 | if (powernow_k8_cpu_init_acpi(data)) { | 1145 | rc = powernow_k8_cpu_init_acpi(data); |
1146 | if (rc) { | ||
1138 | /* | 1147 | /* |
1139 | * Use the PSB BIOS structure. This is only availabe on | 1148 | * Use the PSB BIOS structure. This is only availabe on |
1140 | * an UP version, and is deprecated by AMD. | 1149 | * an UP version, and is deprecated by AMD. |
@@ -1152,20 +1161,17 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1152 | "ACPI maintainers and complain to your BIOS " | 1161 | "ACPI maintainers and complain to your BIOS " |
1153 | "vendor.\n"); | 1162 | "vendor.\n"); |
1154 | #endif | 1163 | #endif |
1155 | kfree(data); | 1164 | goto err_out; |
1156 | return -ENODEV; | ||
1157 | } | 1165 | } |
1158 | if (pol->cpu != 0) { | 1166 | if (pol->cpu != 0) { |
1159 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " | 1167 | printk(KERN_ERR FW_BUG PFX "No ACPI _PSS objects for " |
1160 | "CPU other than CPU0. Complain to your BIOS " | 1168 | "CPU other than CPU0. Complain to your BIOS " |
1161 | "vendor.\n"); | 1169 | "vendor.\n"); |
1162 | kfree(data); | 1170 | goto err_out; |
1163 | return -ENODEV; | ||
1164 | } | 1171 | } |
1165 | rc = find_psb_table(data); | 1172 | rc = find_psb_table(data); |
1166 | if (rc) { | 1173 | if (rc) { |
1167 | kfree(data); | 1174 | goto err_out; |
1168 | return -ENODEV; | ||
1169 | } | 1175 | } |
1170 | } | 1176 | } |
1171 | 1177 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 3b5f06423e77..f0ea6fa2f53c 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -459,9 +459,7 @@ static int centrino_verify (struct cpufreq_policy *policy) | |||
459 | * Sets a new CPUFreq policy. | 459 | * Sets a new CPUFreq policy. |
460 | */ | 460 | */ |
461 | struct allmasks { | 461 | struct allmasks { |
462 | cpumask_t online_policy_cpus; | ||
463 | cpumask_t saved_mask; | 462 | cpumask_t saved_mask; |
464 | cpumask_t set_mask; | ||
465 | cpumask_t covered_cpus; | 463 | cpumask_t covered_cpus; |
466 | }; | 464 | }; |
467 | 465 | ||
@@ -475,9 +473,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
475 | int retval = 0; | 473 | int retval = 0; |
476 | unsigned int j, k, first_cpu, tmp; | 474 | unsigned int j, k, first_cpu, tmp; |
477 | CPUMASK_ALLOC(allmasks); | 475 | CPUMASK_ALLOC(allmasks); |
478 | CPUMASK_PTR(online_policy_cpus, allmasks); | ||
479 | CPUMASK_PTR(saved_mask, allmasks); | 476 | CPUMASK_PTR(saved_mask, allmasks); |
480 | CPUMASK_PTR(set_mask, allmasks); | ||
481 | CPUMASK_PTR(covered_cpus, allmasks); | 477 | CPUMASK_PTR(covered_cpus, allmasks); |
482 | 478 | ||
483 | if (unlikely(allmasks == NULL)) | 479 | if (unlikely(allmasks == NULL)) |
@@ -497,30 +493,28 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
497 | goto out; | 493 | goto out; |
498 | } | 494 | } |
499 | 495 | ||
500 | #ifdef CONFIG_HOTPLUG_CPU | ||
501 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | ||
502 | cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus); | ||
503 | #else | ||
504 | *online_policy_cpus = policy->cpus; | ||
505 | #endif | ||
506 | |||
507 | *saved_mask = current->cpus_allowed; | 496 | *saved_mask = current->cpus_allowed; |
508 | first_cpu = 1; | 497 | first_cpu = 1; |
509 | cpus_clear(*covered_cpus); | 498 | cpus_clear(*covered_cpus); |
510 | for_each_cpu_mask_nr(j, *online_policy_cpus) { | 499 | for_each_cpu_mask_nr(j, policy->cpus) { |
500 | const cpumask_t *mask; | ||
501 | |||
502 | /* cpufreq holds the hotplug lock, so we are safe here */ | ||
503 | if (!cpu_online(j)) | ||
504 | continue; | ||
505 | |||
511 | /* | 506 | /* |
512 | * Support for SMP systems. | 507 | * Support for SMP systems. |
513 | * Make sure we are running on CPU that wants to change freq | 508 | * Make sure we are running on CPU that wants to change freq |
514 | */ | 509 | */ |
515 | cpus_clear(*set_mask); | ||
516 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 510 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
517 | cpus_or(*set_mask, *set_mask, *online_policy_cpus); | 511 | mask = &policy->cpus; |
518 | else | 512 | else |
519 | cpu_set(j, *set_mask); | 513 | mask = &cpumask_of_cpu(j); |
520 | 514 | ||
521 | set_cpus_allowed_ptr(current, set_mask); | 515 | set_cpus_allowed_ptr(current, mask); |
522 | preempt_disable(); | 516 | preempt_disable(); |
523 | if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { | 517 | if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { |
524 | dprintk("couldn't limit to CPUs in this domain\n"); | 518 | dprintk("couldn't limit to CPUs in this domain\n"); |
525 | retval = -EAGAIN; | 519 | retval = -EAGAIN; |
526 | if (first_cpu) { | 520 | if (first_cpu) { |
@@ -548,7 +542,9 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
548 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | 542 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", |
549 | target_freq, freqs.old, freqs.new, msr); | 543 | target_freq, freqs.old, freqs.new, msr); |
550 | 544 | ||
551 | for_each_cpu_mask_nr(k, *online_policy_cpus) { | 545 | for_each_cpu_mask_nr(k, policy->cpus) { |
546 | if (!cpu_online(k)) | ||
547 | continue; | ||
552 | freqs.cpu = k; | 548 | freqs.cpu = k; |
553 | cpufreq_notify_transition(&freqs, | 549 | cpufreq_notify_transition(&freqs, |
554 | CPUFREQ_PRECHANGE); | 550 | CPUFREQ_PRECHANGE); |
@@ -571,7 +567,9 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
571 | preempt_enable(); | 567 | preempt_enable(); |
572 | } | 568 | } |
573 | 569 | ||
574 | for_each_cpu_mask_nr(k, *online_policy_cpus) { | 570 | for_each_cpu_mask_nr(k, policy->cpus) { |
571 | if (!cpu_online(k)) | ||
572 | continue; | ||
575 | freqs.cpu = k; | 573 | freqs.cpu = k; |
576 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 574 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
577 | } | 575 | } |
@@ -584,18 +582,17 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
584 | * Best effort undo.. | 582 | * Best effort undo.. |
585 | */ | 583 | */ |
586 | 584 | ||
587 | if (!cpus_empty(*covered_cpus)) | 585 | for_each_cpu_mask_nr(j, *covered_cpus) { |
588 | for_each_cpu_mask_nr(j, *covered_cpus) { | 586 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); |
589 | set_cpus_allowed_ptr(current, | 587 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
590 | &cpumask_of_cpu(j)); | 588 | } |
591 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | ||
592 | } | ||
593 | 589 | ||
594 | tmp = freqs.new; | 590 | tmp = freqs.new; |
595 | freqs.new = freqs.old; | 591 | freqs.new = freqs.old; |
596 | freqs.old = tmp; | 592 | freqs.old = tmp; |
597 | for_each_cpu_mask_nr(j, *online_policy_cpus) { | 593 | for_each_cpu_mask_nr(j, policy->cpus) { |
598 | freqs.cpu = j; | 594 | if (!cpu_online(j)) |
595 | continue; | ||
599 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 596 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
600 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 597 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
601 | } | 598 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 98d4fdb7dc04..cdac7d62369b 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -139,6 +139,15 @@ static unsigned int pentium_core_get_frequency(void) | |||
139 | case 3: | 139 | case 3: |
140 | fsb = 166667; | 140 | fsb = 166667; |
141 | break; | 141 | break; |
142 | case 2: | ||
143 | fsb = 200000; | ||
144 | break; | ||
145 | case 0: | ||
146 | fsb = 266667; | ||
147 | break; | ||
148 | case 4: | ||
149 | fsb = 333333; | ||
150 | break; | ||
142 | default: | 151 | default: |
143 | printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); | 152 | printk(KERN_ERR "PCORE - MSR_FSB_FREQ undefined value"); |
144 | } | 153 | } |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c new file mode 100644 index 000000000000..fb5b86af0b01 --- /dev/null +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -0,0 +1,58 @@ | |||
1 | /* | ||
2 | * Common hypervisor code | ||
3 | * | ||
4 | * Copyright (C) 2008, VMware, Inc. | ||
5 | * Author : Alok N Kataria <akataria@vmware.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
15 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
16 | * details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <asm/processor.h> | ||
25 | #include <asm/vmware.h> | ||
26 | #include <asm/hypervisor.h> | ||
27 | |||
28 | static inline void __cpuinit | ||
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | ||
30 | { | ||
31 | if (vmware_platform()) { | ||
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | ||
33 | } else { | ||
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | ||
35 | } | ||
36 | } | ||
37 | |||
38 | unsigned long get_hypervisor_tsc_freq(void) | ||
39 | { | ||
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | ||
41 | return vmware_get_tsc_khz(); | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | static inline void __cpuinit | ||
46 | hypervisor_set_feature_bits(struct cpuinfo_x86 *c) | ||
47 | { | ||
48 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) { | ||
49 | vmware_set_feature_bits(c); | ||
50 | return; | ||
51 | } | ||
52 | } | ||
53 | |||
54 | void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | ||
55 | { | ||
56 | detect_hypervisor_vendor(c); | ||
57 | hypervisor_set_feature_bits(c); | ||
58 | } | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index cce0b6118d55..8ea6929e974c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <asm/pgtable.h> | 11 | #include <asm/pgtable.h> |
12 | #include <asm/msr.h> | 12 | #include <asm/msr.h> |
13 | #include <asm/uaccess.h> | 13 | #include <asm/uaccess.h> |
14 | #include <asm/ptrace.h> | ||
15 | #include <asm/ds.h> | 14 | #include <asm/ds.h> |
16 | #include <asm/bugs.h> | 15 | #include <asm/bugs.h> |
17 | 16 | ||
@@ -41,6 +40,16 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
41 | if (c->x86 == 15 && c->x86_cache_alignment == 64) | 40 | if (c->x86 == 15 && c->x86_cache_alignment == 64) |
42 | c->x86_cache_alignment = 128; | 41 | c->x86_cache_alignment = 128; |
43 | #endif | 42 | #endif |
43 | |||
44 | /* | ||
45 | * c->x86_power is 8000_0007 edx. Bit 8 is TSC runs at constant rate | ||
46 | * with P/T states and does not stop in deep C-states | ||
47 | */ | ||
48 | if (c->x86_power & (1 << 8)) { | ||
49 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
50 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | ||
51 | } | ||
52 | |||
44 | } | 53 | } |
45 | 54 | ||
46 | #ifdef CONFIG_X86_32 | 55 | #ifdef CONFIG_X86_32 |
@@ -242,6 +251,13 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
242 | 251 | ||
243 | intel_workarounds(c); | 252 | intel_workarounds(c); |
244 | 253 | ||
254 | /* | ||
255 | * Detect the extended topology information if available. This | ||
256 | * will reinitialise the initial_apicid which will be used | ||
257 | * in init_intel_cacheinfo() | ||
258 | */ | ||
259 | detect_extended_topology(c); | ||
260 | |||
245 | l2 = init_intel_cacheinfo(c); | 261 | l2 = init_intel_cacheinfo(c); |
246 | if (c->cpuid_level > 9) { | 262 | if (c->cpuid_level > 9) { |
247 | unsigned eax = cpuid_eax(10); | 263 | unsigned eax = cpuid_eax(10); |
@@ -307,13 +323,8 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
307 | set_cpu_cap(c, X86_FEATURE_P4); | 323 | set_cpu_cap(c, X86_FEATURE_P4); |
308 | if (c->x86 == 6) | 324 | if (c->x86 == 6) |
309 | set_cpu_cap(c, X86_FEATURE_P3); | 325 | set_cpu_cap(c, X86_FEATURE_P3); |
310 | |||
311 | if (cpu_has_bts) | ||
312 | ptrace_bts_init_intel(c); | ||
313 | |||
314 | #endif | 326 | #endif |
315 | 327 | ||
316 | detect_extended_topology(c); | ||
317 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { | 328 | if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) { |
318 | /* | 329 | /* |
319 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology | 330 | * let's use the legacy cpuid vector 0x1 and 0x4 for topology |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 3f46afbb1cf1..48533d77be78 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -534,31 +534,16 @@ static void __cpuinit free_cache_attributes(unsigned int cpu) | |||
534 | per_cpu(cpuid4_info, cpu) = NULL; | 534 | per_cpu(cpuid4_info, cpu) = NULL; |
535 | } | 535 | } |
536 | 536 | ||
537 | static int __cpuinit detect_cache_attributes(unsigned int cpu) | 537 | static void __cpuinit get_cpu_leaves(void *_retval) |
538 | { | 538 | { |
539 | struct _cpuid4_info *this_leaf; | 539 | int j, *retval = _retval, cpu = smp_processor_id(); |
540 | unsigned long j; | ||
541 | int retval; | ||
542 | cpumask_t oldmask; | ||
543 | |||
544 | if (num_cache_leaves == 0) | ||
545 | return -ENOENT; | ||
546 | |||
547 | per_cpu(cpuid4_info, cpu) = kzalloc( | ||
548 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | ||
549 | if (per_cpu(cpuid4_info, cpu) == NULL) | ||
550 | return -ENOMEM; | ||
551 | |||
552 | oldmask = current->cpus_allowed; | ||
553 | retval = set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
554 | if (retval) | ||
555 | goto out; | ||
556 | 540 | ||
557 | /* Do cpuid and store the results */ | 541 | /* Do cpuid and store the results */ |
558 | for (j = 0; j < num_cache_leaves; j++) { | 542 | for (j = 0; j < num_cache_leaves; j++) { |
543 | struct _cpuid4_info *this_leaf; | ||
559 | this_leaf = CPUID4_INFO_IDX(cpu, j); | 544 | this_leaf = CPUID4_INFO_IDX(cpu, j); |
560 | retval = cpuid4_cache_lookup(j, this_leaf); | 545 | *retval = cpuid4_cache_lookup(j, this_leaf); |
561 | if (unlikely(retval < 0)) { | 546 | if (unlikely(*retval < 0)) { |
562 | int i; | 547 | int i; |
563 | 548 | ||
564 | for (i = 0; i < j; i++) | 549 | for (i = 0; i < j; i++) |
@@ -567,9 +552,21 @@ static int __cpuinit detect_cache_attributes(unsigned int cpu) | |||
567 | } | 552 | } |
568 | cache_shared_cpu_map_setup(cpu, j); | 553 | cache_shared_cpu_map_setup(cpu, j); |
569 | } | 554 | } |
570 | set_cpus_allowed_ptr(current, &oldmask); | 555 | } |
556 | |||
557 | static int __cpuinit detect_cache_attributes(unsigned int cpu) | ||
558 | { | ||
559 | int retval; | ||
560 | |||
561 | if (num_cache_leaves == 0) | ||
562 | return -ENOENT; | ||
571 | 563 | ||
572 | out: | 564 | per_cpu(cpuid4_info, cpu) = kzalloc( |
565 | sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL); | ||
566 | if (per_cpu(cpuid4_info, cpu) == NULL) | ||
567 | return -ENOMEM; | ||
568 | |||
569 | smp_call_function_single(cpu, get_cpu_leaves, &retval, true); | ||
573 | if (retval) { | 570 | if (retval) { |
574 | kfree(per_cpu(cpuid4_info, cpu)); | 571 | kfree(per_cpu(cpuid4_info, cpu)); |
575 | per_cpu(cpuid4_info, cpu) = NULL; | 572 | per_cpu(cpuid4_info, cpu) = NULL; |
@@ -626,8 +623,8 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | |||
626 | cpumask_t *mask = &this_leaf->shared_cpu_map; | 623 | cpumask_t *mask = &this_leaf->shared_cpu_map; |
627 | 624 | ||
628 | n = type? | 625 | n = type? |
629 | cpulist_scnprintf(buf, len-2, *mask): | 626 | cpulist_scnprintf(buf, len-2, mask) : |
630 | cpumask_scnprintf(buf, len-2, *mask); | 627 | cpumask_scnprintf(buf, len-2, mask); |
631 | buf[n++] = '\n'; | 628 | buf[n++] = '\n'; |
632 | buf[n] = '\0'; | 629 | buf[n] = '\0'; |
633 | } | 630 | } |
@@ -644,20 +641,17 @@ static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf) | |||
644 | return show_shared_cpu_map_func(leaf, 1, buf); | 641 | return show_shared_cpu_map_func(leaf, 1, buf); |
645 | } | 642 | } |
646 | 643 | ||
647 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) { | 644 | static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) |
648 | switch(this_leaf->eax.split.type) { | 645 | { |
649 | case CACHE_TYPE_DATA: | 646 | switch (this_leaf->eax.split.type) { |
647 | case CACHE_TYPE_DATA: | ||
650 | return sprintf(buf, "Data\n"); | 648 | return sprintf(buf, "Data\n"); |
651 | break; | 649 | case CACHE_TYPE_INST: |
652 | case CACHE_TYPE_INST: | ||
653 | return sprintf(buf, "Instruction\n"); | 650 | return sprintf(buf, "Instruction\n"); |
654 | break; | 651 | case CACHE_TYPE_UNIFIED: |
655 | case CACHE_TYPE_UNIFIED: | ||
656 | return sprintf(buf, "Unified\n"); | 652 | return sprintf(buf, "Unified\n"); |
657 | break; | 653 | default: |
658 | default: | ||
659 | return sprintf(buf, "Unknown\n"); | 654 | return sprintf(buf, "Unknown\n"); |
660 | break; | ||
661 | } | 655 | } |
662 | } | 656 | } |
663 | 657 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 5eb390a4b2e9..a5a5e0530370 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
@@ -83,34 +83,41 @@ static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | |||
83 | * CPU Initialization | 83 | * CPU Initialization |
84 | */ | 84 | */ |
85 | 85 | ||
86 | struct thresh_restart { | ||
87 | struct threshold_block *b; | ||
88 | int reset; | ||
89 | u16 old_limit; | ||
90 | }; | ||
91 | |||
86 | /* must be called with correct cpu affinity */ | 92 | /* must be called with correct cpu affinity */ |
87 | static void threshold_restart_bank(struct threshold_block *b, | 93 | static long threshold_restart_bank(void *_tr) |
88 | int reset, u16 old_limit) | ||
89 | { | 94 | { |
95 | struct thresh_restart *tr = _tr; | ||
90 | u32 mci_misc_hi, mci_misc_lo; | 96 | u32 mci_misc_hi, mci_misc_lo; |
91 | 97 | ||
92 | rdmsr(b->address, mci_misc_lo, mci_misc_hi); | 98 | rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); |
93 | 99 | ||
94 | if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | 100 | if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) |
95 | reset = 1; /* limit cannot be lower than err count */ | 101 | tr->reset = 1; /* limit cannot be lower than err count */ |
96 | 102 | ||
97 | if (reset) { /* reset err count and overflow bit */ | 103 | if (tr->reset) { /* reset err count and overflow bit */ |
98 | mci_misc_hi = | 104 | mci_misc_hi = |
99 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | | 105 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | |
100 | (THRESHOLD_MAX - b->threshold_limit); | 106 | (THRESHOLD_MAX - tr->b->threshold_limit); |
101 | } else if (old_limit) { /* change limit w/o reset */ | 107 | } else if (tr->old_limit) { /* change limit w/o reset */ |
102 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | 108 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + |
103 | (old_limit - b->threshold_limit); | 109 | (tr->old_limit - tr->b->threshold_limit); |
104 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | 110 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | |
105 | (new_count & THRESHOLD_MAX); | 111 | (new_count & THRESHOLD_MAX); |
106 | } | 112 | } |
107 | 113 | ||
108 | b->interrupt_enable ? | 114 | tr->b->interrupt_enable ? |
109 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | 115 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : |
110 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | 116 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); |
111 | 117 | ||
112 | mci_misc_hi |= MASK_COUNT_EN_HI; | 118 | mci_misc_hi |= MASK_COUNT_EN_HI; |
113 | wrmsr(b->address, mci_misc_lo, mci_misc_hi); | 119 | wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); |
120 | return 0; | ||
114 | } | 121 | } |
115 | 122 | ||
116 | /* cpu init entry point, called from mce.c with preempt off */ | 123 | /* cpu init entry point, called from mce.c with preempt off */ |
@@ -120,6 +127,7 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
120 | unsigned int cpu = smp_processor_id(); | 127 | unsigned int cpu = smp_processor_id(); |
121 | u8 lvt_off; | 128 | u8 lvt_off; |
122 | u32 low = 0, high = 0, address = 0; | 129 | u32 low = 0, high = 0, address = 0; |
130 | struct thresh_restart tr; | ||
123 | 131 | ||
124 | for (bank = 0; bank < NR_BANKS; ++bank) { | 132 | for (bank = 0; bank < NR_BANKS; ++bank) { |
125 | for (block = 0; block < NR_BLOCKS; ++block) { | 133 | for (block = 0; block < NR_BLOCKS; ++block) { |
@@ -162,7 +170,10 @@ void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
162 | wrmsr(address, low, high); | 170 | wrmsr(address, low, high); |
163 | 171 | ||
164 | threshold_defaults.address = address; | 172 | threshold_defaults.address = address; |
165 | threshold_restart_bank(&threshold_defaults, 0, 0); | 173 | tr.b = &threshold_defaults; |
174 | tr.reset = 0; | ||
175 | tr.old_limit = 0; | ||
176 | threshold_restart_bank(&tr); | ||
166 | } | 177 | } |
167 | } | 178 | } |
168 | } | 179 | } |
@@ -237,7 +248,7 @@ asmlinkage void mce_threshold_interrupt(void) | |||
237 | } | 248 | } |
238 | } | 249 | } |
239 | out: | 250 | out: |
240 | add_pda(irq_threshold_count, 1); | 251 | inc_irq_stat(irq_threshold_count); |
241 | irq_exit(); | 252 | irq_exit(); |
242 | } | 253 | } |
243 | 254 | ||
@@ -251,20 +262,6 @@ struct threshold_attr { | |||
251 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); | 262 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); |
252 | }; | 263 | }; |
253 | 264 | ||
254 | static void affinity_set(unsigned int cpu, cpumask_t *oldmask, | ||
255 | cpumask_t *newmask) | ||
256 | { | ||
257 | *oldmask = current->cpus_allowed; | ||
258 | cpus_clear(*newmask); | ||
259 | cpu_set(cpu, *newmask); | ||
260 | set_cpus_allowed_ptr(current, newmask); | ||
261 | } | ||
262 | |||
263 | static void affinity_restore(const cpumask_t *oldmask) | ||
264 | { | ||
265 | set_cpus_allowed_ptr(current, oldmask); | ||
266 | } | ||
267 | |||
268 | #define SHOW_FIELDS(name) \ | 265 | #define SHOW_FIELDS(name) \ |
269 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ | 266 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ |
270 | { \ | 267 | { \ |
@@ -277,15 +274,16 @@ static ssize_t store_interrupt_enable(struct threshold_block *b, | |||
277 | const char *buf, size_t count) | 274 | const char *buf, size_t count) |
278 | { | 275 | { |
279 | char *end; | 276 | char *end; |
280 | cpumask_t oldmask, newmask; | 277 | struct thresh_restart tr; |
281 | unsigned long new = simple_strtoul(buf, &end, 0); | 278 | unsigned long new = simple_strtoul(buf, &end, 0); |
282 | if (end == buf) | 279 | if (end == buf) |
283 | return -EINVAL; | 280 | return -EINVAL; |
284 | b->interrupt_enable = !!new; | 281 | b->interrupt_enable = !!new; |
285 | 282 | ||
286 | affinity_set(b->cpu, &oldmask, &newmask); | 283 | tr.b = b; |
287 | threshold_restart_bank(b, 0, 0); | 284 | tr.reset = 0; |
288 | affinity_restore(&oldmask); | 285 | tr.old_limit = 0; |
286 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); | ||
289 | 287 | ||
290 | return end - buf; | 288 | return end - buf; |
291 | } | 289 | } |
@@ -294,8 +292,7 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
294 | const char *buf, size_t count) | 292 | const char *buf, size_t count) |
295 | { | 293 | { |
296 | char *end; | 294 | char *end; |
297 | cpumask_t oldmask, newmask; | 295 | struct thresh_restart tr; |
298 | u16 old; | ||
299 | unsigned long new = simple_strtoul(buf, &end, 0); | 296 | unsigned long new = simple_strtoul(buf, &end, 0); |
300 | if (end == buf) | 297 | if (end == buf) |
301 | return -EINVAL; | 298 | return -EINVAL; |
@@ -303,34 +300,36 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
303 | new = THRESHOLD_MAX; | 300 | new = THRESHOLD_MAX; |
304 | if (new < 1) | 301 | if (new < 1) |
305 | new = 1; | 302 | new = 1; |
306 | old = b->threshold_limit; | 303 | tr.old_limit = b->threshold_limit; |
307 | b->threshold_limit = new; | 304 | b->threshold_limit = new; |
305 | tr.b = b; | ||
306 | tr.reset = 0; | ||
308 | 307 | ||
309 | affinity_set(b->cpu, &oldmask, &newmask); | 308 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); |
310 | threshold_restart_bank(b, 0, old); | ||
311 | affinity_restore(&oldmask); | ||
312 | 309 | ||
313 | return end - buf; | 310 | return end - buf; |
314 | } | 311 | } |
315 | 312 | ||
316 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | 313 | static long local_error_count(void *_b) |
317 | { | 314 | { |
318 | u32 high, low; | 315 | struct threshold_block *b = _b; |
319 | cpumask_t oldmask, newmask; | 316 | u32 low, high; |
320 | affinity_set(b->cpu, &oldmask, &newmask); | 317 | |
321 | rdmsr(b->address, low, high); | 318 | rdmsr(b->address, low, high); |
322 | affinity_restore(&oldmask); | 319 | return (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit); |
323 | return sprintf(buf, "%x\n", | 320 | } |
324 | (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); | 321 | |
322 | static ssize_t show_error_count(struct threshold_block *b, char *buf) | ||
323 | { | ||
324 | return sprintf(buf, "%lx\n", work_on_cpu(b->cpu, local_error_count, b)); | ||
325 | } | 325 | } |
326 | 326 | ||
327 | static ssize_t store_error_count(struct threshold_block *b, | 327 | static ssize_t store_error_count(struct threshold_block *b, |
328 | const char *buf, size_t count) | 328 | const char *buf, size_t count) |
329 | { | 329 | { |
330 | cpumask_t oldmask, newmask; | 330 | struct thresh_restart tr = { .b = b, .reset = 1, .old_limit = 0 }; |
331 | affinity_set(b->cpu, &oldmask, &newmask); | 331 | |
332 | threshold_restart_bank(b, 1, 0); | 332 | work_on_cpu(b->cpu, threshold_restart_bank, &tr); |
333 | affinity_restore(&oldmask); | ||
334 | return 1; | 333 | return 1; |
335 | } | 334 | } |
336 | 335 | ||
@@ -463,12 +462,19 @@ out_free: | |||
463 | return err; | 462 | return err; |
464 | } | 463 | } |
465 | 464 | ||
465 | static long local_allocate_threshold_blocks(void *_bank) | ||
466 | { | ||
467 | unsigned int *bank = _bank; | ||
468 | |||
469 | return allocate_threshold_blocks(smp_processor_id(), *bank, 0, | ||
470 | MSR_IA32_MC0_MISC + *bank * 4); | ||
471 | } | ||
472 | |||
466 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | 473 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ |
467 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | 474 | static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) |
468 | { | 475 | { |
469 | int i, err = 0; | 476 | int i, err = 0; |
470 | struct threshold_bank *b = NULL; | 477 | struct threshold_bank *b = NULL; |
471 | cpumask_t oldmask, newmask; | ||
472 | char name[32]; | 478 | char name[32]; |
473 | 479 | ||
474 | sprintf(name, "threshold_bank%i", bank); | 480 | sprintf(name, "threshold_bank%i", bank); |
@@ -519,11 +525,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
519 | 525 | ||
520 | per_cpu(threshold_banks, cpu)[bank] = b; | 526 | per_cpu(threshold_banks, cpu)[bank] = b; |
521 | 527 | ||
522 | affinity_set(cpu, &oldmask, &newmask); | 528 | err = work_on_cpu(cpu, local_allocate_threshold_blocks, &bank); |
523 | err = allocate_threshold_blocks(cpu, bank, 0, | ||
524 | MSR_IA32_MC0_MISC + bank * 4); | ||
525 | affinity_restore(&oldmask); | ||
526 | |||
527 | if (err) | 529 | if (err) |
528 | goto out_free; | 530 | goto out_free; |
529 | 531 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index c17eaf5dd6dd..4b48f251fd39 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c | |||
@@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void) | |||
26 | if (therm_throt_process(msr_val & 1)) | 26 | if (therm_throt_process(msr_val & 1)) |
27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); | 27 | mce_log_therm_throt_event(smp_processor_id(), msr_val); |
28 | 28 | ||
29 | add_pda(irq_thermal_count, 1); | 29 | inc_irq_stat(irq_thermal_count); |
30 | irq_exit(); | 30 | irq_exit(); |
31 | } | 31 | } |
32 | 32 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 4e8d77f01eeb..b59ddcc88cd8 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -14,14 +14,6 @@ | |||
14 | #include <asm/pat.h> | 14 | #include <asm/pat.h> |
15 | #include "mtrr.h" | 15 | #include "mtrr.h" |
16 | 16 | ||
17 | struct mtrr_state { | ||
18 | struct mtrr_var_range var_ranges[MAX_VAR_RANGES]; | ||
19 | mtrr_type fixed_ranges[NUM_FIXED_RANGES]; | ||
20 | unsigned char enabled; | ||
21 | unsigned char have_fixed; | ||
22 | mtrr_type def_type; | ||
23 | }; | ||
24 | |||
25 | struct fixed_range_block { | 17 | struct fixed_range_block { |
26 | int base_msr; /* start address of an MTRR block */ | 18 | int base_msr; /* start address of an MTRR block */ |
27 | int ranges; /* number of MTRRs in this block */ | 19 | int ranges; /* number of MTRRs in this block */ |
@@ -35,10 +27,12 @@ static struct fixed_range_block fixed_range_blocks[] = { | |||
35 | }; | 27 | }; |
36 | 28 | ||
37 | static unsigned long smp_changes_mask; | 29 | static unsigned long smp_changes_mask; |
38 | static struct mtrr_state mtrr_state = {}; | ||
39 | static int mtrr_state_set; | 30 | static int mtrr_state_set; |
40 | u64 mtrr_tom2; | 31 | u64 mtrr_tom2; |
41 | 32 | ||
33 | struct mtrr_state_type mtrr_state = {}; | ||
34 | EXPORT_SYMBOL_GPL(mtrr_state); | ||
35 | |||
42 | #undef MODULE_PARAM_PREFIX | 36 | #undef MODULE_PARAM_PREFIX |
43 | #define MODULE_PARAM_PREFIX "mtrr." | 37 | #define MODULE_PARAM_PREFIX "mtrr." |
44 | 38 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index c78c04821ea1..d259e5d2e054 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -49,7 +49,7 @@ | |||
49 | 49 | ||
50 | u32 num_var_ranges = 0; | 50 | u32 num_var_ranges = 0; |
51 | 51 | ||
52 | unsigned int mtrr_usage_table[MAX_VAR_RANGES]; | 52 | unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; |
53 | static DEFINE_MUTEX(mtrr_mutex); | 53 | static DEFINE_MUTEX(mtrr_mutex); |
54 | 54 | ||
55 | u64 size_or_mask, size_and_mask; | 55 | u64 size_or_mask, size_and_mask; |
@@ -574,7 +574,7 @@ struct mtrr_value { | |||
574 | unsigned long lsize; | 574 | unsigned long lsize; |
575 | }; | 575 | }; |
576 | 576 | ||
577 | static struct mtrr_value mtrr_state[MAX_VAR_RANGES]; | 577 | static struct mtrr_value mtrr_state[MTRR_MAX_VAR_RANGES]; |
578 | 578 | ||
579 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) | 579 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) |
580 | { | 580 | { |
@@ -803,6 +803,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
803 | } | 803 | } |
804 | 804 | ||
805 | static struct res_range __initdata range[RANGE_NUM]; | 805 | static struct res_range __initdata range[RANGE_NUM]; |
806 | static int __initdata nr_range; | ||
806 | 807 | ||
807 | #ifdef CONFIG_MTRR_SANITIZER | 808 | #ifdef CONFIG_MTRR_SANITIZER |
808 | 809 | ||
@@ -823,16 +824,14 @@ static int enable_mtrr_cleanup __initdata = | |||
823 | 824 | ||
824 | static int __init disable_mtrr_cleanup_setup(char *str) | 825 | static int __init disable_mtrr_cleanup_setup(char *str) |
825 | { | 826 | { |
826 | if (enable_mtrr_cleanup != -1) | 827 | enable_mtrr_cleanup = 0; |
827 | enable_mtrr_cleanup = 0; | ||
828 | return 0; | 828 | return 0; |
829 | } | 829 | } |
830 | early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); | 830 | early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup); |
831 | 831 | ||
832 | static int __init enable_mtrr_cleanup_setup(char *str) | 832 | static int __init enable_mtrr_cleanup_setup(char *str) |
833 | { | 833 | { |
834 | if (enable_mtrr_cleanup != -1) | 834 | enable_mtrr_cleanup = 1; |
835 | enable_mtrr_cleanup = 1; | ||
836 | return 0; | 835 | return 0; |
837 | } | 836 | } |
838 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); | 837 | early_param("enable_mtrr_cleanup", enable_mtrr_cleanup_setup); |
@@ -1206,39 +1205,43 @@ struct mtrr_cleanup_result { | |||
1206 | #define PSHIFT (PAGE_SHIFT - 10) | 1205 | #define PSHIFT (PAGE_SHIFT - 10) |
1207 | 1206 | ||
1208 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; | 1207 | static struct mtrr_cleanup_result __initdata result[NUM_RESULT]; |
1209 | static struct res_range __initdata range_new[RANGE_NUM]; | ||
1210 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | 1208 | static unsigned long __initdata min_loss_pfn[RANGE_NUM]; |
1211 | 1209 | ||
1212 | static int __init mtrr_cleanup(unsigned address_bits) | 1210 | static void __init print_out_mtrr_range_state(void) |
1213 | { | 1211 | { |
1214 | unsigned long extra_remove_base, extra_remove_size; | ||
1215 | unsigned long base, size, def, dummy; | ||
1216 | mtrr_type type; | ||
1217 | int nr_range, nr_range_new; | ||
1218 | u64 chunk_size, gran_size; | ||
1219 | unsigned long range_sums, range_sums_new; | ||
1220 | int index_good; | ||
1221 | int num_reg_good; | ||
1222 | int i; | 1212 | int i; |
1213 | char start_factor = 'K', size_factor = 'K'; | ||
1214 | unsigned long start_base, size_base; | ||
1215 | mtrr_type type; | ||
1223 | 1216 | ||
1224 | /* extra one for all 0 */ | 1217 | for (i = 0; i < num_var_ranges; i++) { |
1225 | int num[MTRR_NUM_TYPES + 1]; | ||
1226 | 1218 | ||
1227 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | 1219 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); |
1228 | return 0; | 1220 | if (!size_base) |
1229 | rdmsr(MTRRdefType_MSR, def, dummy); | 1221 | continue; |
1230 | def &= 0xff; | ||
1231 | if (def != MTRR_TYPE_UNCACHABLE) | ||
1232 | return 0; | ||
1233 | 1222 | ||
1234 | /* get it and store it aside */ | 1223 | size_base = to_size_factor(size_base, &size_factor), |
1235 | memset(range_state, 0, sizeof(range_state)); | 1224 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); |
1236 | for (i = 0; i < num_var_ranges; i++) { | 1225 | start_base = to_size_factor(start_base, &start_factor), |
1237 | mtrr_if->get(i, &base, &size, &type); | 1226 | type = range_state[i].type; |
1238 | range_state[i].base_pfn = base; | 1227 | |
1239 | range_state[i].size_pfn = size; | 1228 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", |
1240 | range_state[i].type = type; | 1229 | i, start_base, start_factor, |
1230 | size_base, size_factor, | ||
1231 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | ||
1232 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
1233 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
1234 | ); | ||
1241 | } | 1235 | } |
1236 | } | ||
1237 | |||
1238 | static int __init mtrr_need_cleanup(void) | ||
1239 | { | ||
1240 | int i; | ||
1241 | mtrr_type type; | ||
1242 | unsigned long size; | ||
1243 | /* extra one for all 0 */ | ||
1244 | int num[MTRR_NUM_TYPES + 1]; | ||
1242 | 1245 | ||
1243 | /* check entries number */ | 1246 | /* check entries number */ |
1244 | memset(num, 0, sizeof(num)); | 1247 | memset(num, 0, sizeof(num)); |
@@ -1263,29 +1266,133 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1263 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1266 | num_var_ranges - num[MTRR_NUM_TYPES]) |
1264 | return 0; | 1267 | return 0; |
1265 | 1268 | ||
1266 | /* print original var MTRRs at first, for debugging: */ | 1269 | return 1; |
1267 | printk(KERN_DEBUG "original variable MTRRs\n"); | 1270 | } |
1268 | for (i = 0; i < num_var_ranges; i++) { | ||
1269 | char start_factor = 'K', size_factor = 'K'; | ||
1270 | unsigned long start_base, size_base; | ||
1271 | 1271 | ||
1272 | size_base = range_state[i].size_pfn << (PAGE_SHIFT - 10); | 1272 | static unsigned long __initdata range_sums; |
1273 | if (!size_base) | 1273 | static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, |
1274 | continue; | 1274 | unsigned long extra_remove_base, |
1275 | unsigned long extra_remove_size, | ||
1276 | int i) | ||
1277 | { | ||
1278 | int num_reg; | ||
1279 | static struct res_range range_new[RANGE_NUM]; | ||
1280 | static int nr_range_new; | ||
1281 | unsigned long range_sums_new; | ||
1282 | |||
1283 | /* convert ranges to var ranges state */ | ||
1284 | num_reg = x86_setup_var_mtrrs(range, nr_range, | ||
1285 | chunk_size, gran_size); | ||
1286 | |||
1287 | /* we got new setting in range_state, check it */ | ||
1288 | memset(range_new, 0, sizeof(range_new)); | ||
1289 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1290 | extra_remove_base, extra_remove_size); | ||
1291 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1292 | |||
1293 | result[i].chunk_sizek = chunk_size >> 10; | ||
1294 | result[i].gran_sizek = gran_size >> 10; | ||
1295 | result[i].num_reg = num_reg; | ||
1296 | if (range_sums < range_sums_new) { | ||
1297 | result[i].lose_cover_sizek = | ||
1298 | (range_sums_new - range_sums) << PSHIFT; | ||
1299 | result[i].bad = 1; | ||
1300 | } else | ||
1301 | result[i].lose_cover_sizek = | ||
1302 | (range_sums - range_sums_new) << PSHIFT; | ||
1275 | 1303 | ||
1276 | size_base = to_size_factor(size_base, &size_factor), | 1304 | /* double check it */ |
1277 | start_base = range_state[i].base_pfn << (PAGE_SHIFT - 10); | 1305 | if (!result[i].bad && !result[i].lose_cover_sizek) { |
1278 | start_base = to_size_factor(start_base, &start_factor), | 1306 | if (nr_range_new != nr_range || |
1279 | type = range_state[i].type; | 1307 | memcmp(range, range_new, sizeof(range))) |
1308 | result[i].bad = 1; | ||
1309 | } | ||
1280 | 1310 | ||
1281 | printk(KERN_DEBUG "reg %d, base: %ld%cB, range: %ld%cB, type %s\n", | 1311 | if (!result[i].bad && (range_sums - range_sums_new < |
1282 | i, start_base, start_factor, | 1312 | min_loss_pfn[num_reg])) { |
1283 | size_base, size_factor, | 1313 | min_loss_pfn[num_reg] = |
1284 | (type == MTRR_TYPE_UNCACHABLE) ? "UC" : | 1314 | range_sums - range_sums_new; |
1285 | ((type == MTRR_TYPE_WRPROT) ? "WP" : | ||
1286 | ((type == MTRR_TYPE_WRBACK) ? "WB" : "Other")) | ||
1287 | ); | ||
1288 | } | 1315 | } |
1316 | } | ||
1317 | |||
1318 | static void __init mtrr_print_out_one_result(int i) | ||
1319 | { | ||
1320 | char gran_factor, chunk_factor, lose_factor; | ||
1321 | unsigned long gran_base, chunk_base, lose_base; | ||
1322 | |||
1323 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1324 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1325 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1326 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1327 | result[i].bad ? "*BAD*" : " ", | ||
1328 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1329 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1330 | result[i].num_reg, result[i].bad ? "-" : "", | ||
1331 | lose_base, lose_factor); | ||
1332 | } | ||
1333 | |||
1334 | static int __init mtrr_search_optimal_index(void) | ||
1335 | { | ||
1336 | int i; | ||
1337 | int num_reg_good; | ||
1338 | int index_good; | ||
1339 | |||
1340 | if (nr_mtrr_spare_reg >= num_var_ranges) | ||
1341 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
1342 | num_reg_good = -1; | ||
1343 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
1344 | if (!min_loss_pfn[i]) | ||
1345 | num_reg_good = i; | ||
1346 | } | ||
1347 | |||
1348 | index_good = -1; | ||
1349 | if (num_reg_good != -1) { | ||
1350 | for (i = 0; i < NUM_RESULT; i++) { | ||
1351 | if (!result[i].bad && | ||
1352 | result[i].num_reg == num_reg_good && | ||
1353 | !result[i].lose_cover_sizek) { | ||
1354 | index_good = i; | ||
1355 | break; | ||
1356 | } | ||
1357 | } | ||
1358 | } | ||
1359 | |||
1360 | return index_good; | ||
1361 | } | ||
1362 | |||
1363 | |||
1364 | static int __init mtrr_cleanup(unsigned address_bits) | ||
1365 | { | ||
1366 | unsigned long extra_remove_base, extra_remove_size; | ||
1367 | unsigned long base, size, def, dummy; | ||
1368 | mtrr_type type; | ||
1369 | u64 chunk_size, gran_size; | ||
1370 | int index_good; | ||
1371 | int i; | ||
1372 | |||
1373 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | ||
1374 | return 0; | ||
1375 | rdmsr(MTRRdefType_MSR, def, dummy); | ||
1376 | def &= 0xff; | ||
1377 | if (def != MTRR_TYPE_UNCACHABLE) | ||
1378 | return 0; | ||
1379 | |||
1380 | /* get it and store it aside */ | ||
1381 | memset(range_state, 0, sizeof(range_state)); | ||
1382 | for (i = 0; i < num_var_ranges; i++) { | ||
1383 | mtrr_if->get(i, &base, &size, &type); | ||
1384 | range_state[i].base_pfn = base; | ||
1385 | range_state[i].size_pfn = size; | ||
1386 | range_state[i].type = type; | ||
1387 | } | ||
1388 | |||
1389 | /* check if we need handle it and can handle it */ | ||
1390 | if (!mtrr_need_cleanup()) | ||
1391 | return 0; | ||
1392 | |||
1393 | /* print original var MTRRs at first, for debugging: */ | ||
1394 | printk(KERN_DEBUG "original variable MTRRs\n"); | ||
1395 | print_out_mtrr_range_state(); | ||
1289 | 1396 | ||
1290 | memset(range, 0, sizeof(range)); | 1397 | memset(range, 0, sizeof(range)); |
1291 | extra_remove_size = 0; | 1398 | extra_remove_size = 0; |
@@ -1309,176 +1416,64 @@ static int __init mtrr_cleanup(unsigned address_bits) | |||
1309 | range_sums >> (20 - PAGE_SHIFT)); | 1416 | range_sums >> (20 - PAGE_SHIFT)); |
1310 | 1417 | ||
1311 | if (mtrr_chunk_size && mtrr_gran_size) { | 1418 | if (mtrr_chunk_size && mtrr_gran_size) { |
1312 | int num_reg; | 1419 | i = 0; |
1313 | char gran_factor, chunk_factor, lose_factor; | 1420 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, |
1314 | unsigned long gran_base, chunk_base, lose_base; | 1421 | extra_remove_base, extra_remove_size, i); |
1315 | |||
1316 | debug_print++; | ||
1317 | /* convert ranges to var ranges state */ | ||
1318 | num_reg = x86_setup_var_mtrrs(range, nr_range, mtrr_chunk_size, | ||
1319 | mtrr_gran_size); | ||
1320 | 1422 | ||
1321 | /* we got new setting in range_state, check it */ | 1423 | mtrr_print_out_one_result(i); |
1322 | memset(range_new, 0, sizeof(range_new)); | ||
1323 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1324 | extra_remove_base, | ||
1325 | extra_remove_size); | ||
1326 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1327 | 1424 | ||
1328 | i = 0; | ||
1329 | result[i].chunk_sizek = mtrr_chunk_size >> 10; | ||
1330 | result[i].gran_sizek = mtrr_gran_size >> 10; | ||
1331 | result[i].num_reg = num_reg; | ||
1332 | if (range_sums < range_sums_new) { | ||
1333 | result[i].lose_cover_sizek = | ||
1334 | (range_sums_new - range_sums) << PSHIFT; | ||
1335 | result[i].bad = 1; | ||
1336 | } else | ||
1337 | result[i].lose_cover_sizek = | ||
1338 | (range_sums - range_sums_new) << PSHIFT; | ||
1339 | |||
1340 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1341 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1342 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1343 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1344 | result[i].bad?"*BAD*":" ", | ||
1345 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1346 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1347 | result[i].num_reg, result[i].bad?"-":"", | ||
1348 | lose_base, lose_factor); | ||
1349 | if (!result[i].bad) { | 1425 | if (!result[i].bad) { |
1350 | set_var_mtrr_all(address_bits); | 1426 | set_var_mtrr_all(address_bits); |
1351 | return 1; | 1427 | return 1; |
1352 | } | 1428 | } |
1353 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " | 1429 | printk(KERN_INFO "invalid mtrr_gran_size or mtrr_chunk_size, " |
1354 | "will find optimal one\n"); | 1430 | "will find optimal one\n"); |
1355 | debug_print--; | ||
1356 | memset(result, 0, sizeof(result[0])); | ||
1357 | } | 1431 | } |
1358 | 1432 | ||
1359 | i = 0; | 1433 | i = 0; |
1360 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); | 1434 | memset(min_loss_pfn, 0xff, sizeof(min_loss_pfn)); |
1361 | memset(result, 0, sizeof(result)); | 1435 | memset(result, 0, sizeof(result)); |
1362 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { | 1436 | for (gran_size = (1ULL<<16); gran_size < (1ULL<<32); gran_size <<= 1) { |
1363 | char gran_factor; | ||
1364 | unsigned long gran_base; | ||
1365 | |||
1366 | if (debug_print) | ||
1367 | gran_base = to_size_factor(gran_size >> 10, &gran_factor); | ||
1368 | 1437 | ||
1369 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); | 1438 | for (chunk_size = gran_size; chunk_size < (1ULL<<32); |
1370 | chunk_size <<= 1) { | 1439 | chunk_size <<= 1) { |
1371 | int num_reg; | ||
1372 | 1440 | ||
1373 | if (debug_print) { | ||
1374 | char chunk_factor; | ||
1375 | unsigned long chunk_base; | ||
1376 | |||
1377 | chunk_base = to_size_factor(chunk_size>>10, &chunk_factor), | ||
1378 | printk(KERN_INFO "\n"); | ||
1379 | printk(KERN_INFO "gran_size: %ld%c chunk_size: %ld%c \n", | ||
1380 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1381 | } | ||
1382 | if (i >= NUM_RESULT) | 1441 | if (i >= NUM_RESULT) |
1383 | continue; | 1442 | continue; |
1384 | 1443 | ||
1385 | /* convert ranges to var ranges state */ | 1444 | mtrr_calc_range_state(chunk_size, gran_size, |
1386 | num_reg = x86_setup_var_mtrrs(range, nr_range, | 1445 | extra_remove_base, extra_remove_size, i); |
1387 | chunk_size, gran_size); | 1446 | if (debug_print) { |
1388 | 1447 | mtrr_print_out_one_result(i); | |
1389 | /* we got new setting in range_state, check it */ | 1448 | printk(KERN_INFO "\n"); |
1390 | memset(range_new, 0, sizeof(range_new)); | ||
1391 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | ||
1392 | extra_remove_base, extra_remove_size); | ||
1393 | range_sums_new = sum_ranges(range_new, nr_range_new); | ||
1394 | |||
1395 | result[i].chunk_sizek = chunk_size >> 10; | ||
1396 | result[i].gran_sizek = gran_size >> 10; | ||
1397 | result[i].num_reg = num_reg; | ||
1398 | if (range_sums < range_sums_new) { | ||
1399 | result[i].lose_cover_sizek = | ||
1400 | (range_sums_new - range_sums) << PSHIFT; | ||
1401 | result[i].bad = 1; | ||
1402 | } else | ||
1403 | result[i].lose_cover_sizek = | ||
1404 | (range_sums - range_sums_new) << PSHIFT; | ||
1405 | |||
1406 | /* double check it */ | ||
1407 | if (!result[i].bad && !result[i].lose_cover_sizek) { | ||
1408 | if (nr_range_new != nr_range || | ||
1409 | memcmp(range, range_new, sizeof(range))) | ||
1410 | result[i].bad = 1; | ||
1411 | } | 1449 | } |
1412 | 1450 | ||
1413 | if (!result[i].bad && (range_sums - range_sums_new < | ||
1414 | min_loss_pfn[num_reg])) { | ||
1415 | min_loss_pfn[num_reg] = | ||
1416 | range_sums - range_sums_new; | ||
1417 | } | ||
1418 | i++; | 1451 | i++; |
1419 | } | 1452 | } |
1420 | } | 1453 | } |
1421 | 1454 | ||
1422 | /* print out all */ | ||
1423 | for (i = 0; i < NUM_RESULT; i++) { | ||
1424 | char gran_factor, chunk_factor, lose_factor; | ||
1425 | unsigned long gran_base, chunk_base, lose_base; | ||
1426 | |||
1427 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | ||
1428 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | ||
1429 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1430 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1431 | result[i].bad?"*BAD*":" ", | ||
1432 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1433 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | ||
1434 | result[i].num_reg, result[i].bad?"-":"", | ||
1435 | lose_base, lose_factor); | ||
1436 | } | ||
1437 | |||
1438 | /* try to find the optimal index */ | 1455 | /* try to find the optimal index */ |
1439 | if (nr_mtrr_spare_reg >= num_var_ranges) | 1456 | index_good = mtrr_search_optimal_index(); |
1440 | nr_mtrr_spare_reg = num_var_ranges - 1; | ||
1441 | num_reg_good = -1; | ||
1442 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | ||
1443 | if (!min_loss_pfn[i]) | ||
1444 | num_reg_good = i; | ||
1445 | } | ||
1446 | |||
1447 | index_good = -1; | ||
1448 | if (num_reg_good != -1) { | ||
1449 | for (i = 0; i < NUM_RESULT; i++) { | ||
1450 | if (!result[i].bad && | ||
1451 | result[i].num_reg == num_reg_good && | ||
1452 | !result[i].lose_cover_sizek) { | ||
1453 | index_good = i; | ||
1454 | break; | ||
1455 | } | ||
1456 | } | ||
1457 | } | ||
1458 | 1457 | ||
1459 | if (index_good != -1) { | 1458 | if (index_good != -1) { |
1460 | char gran_factor, chunk_factor, lose_factor; | ||
1461 | unsigned long gran_base, chunk_base, lose_base; | ||
1462 | |||
1463 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); | 1459 | printk(KERN_INFO "Found optimal setting for mtrr clean up\n"); |
1464 | i = index_good; | 1460 | i = index_good; |
1465 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 1461 | mtrr_print_out_one_result(i); |
1466 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 1462 | |
1467 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | ||
1468 | printk(KERN_INFO "gran_size: %ld%c \tchunk_size: %ld%c \t", | ||
1469 | gran_base, gran_factor, chunk_base, chunk_factor); | ||
1470 | printk(KERN_CONT "num_reg: %d \tlose RAM: %ld%c\n", | ||
1471 | result[i].num_reg, lose_base, lose_factor); | ||
1472 | /* convert ranges to var ranges state */ | 1463 | /* convert ranges to var ranges state */ |
1473 | chunk_size = result[i].chunk_sizek; | 1464 | chunk_size = result[i].chunk_sizek; |
1474 | chunk_size <<= 10; | 1465 | chunk_size <<= 10; |
1475 | gran_size = result[i].gran_sizek; | 1466 | gran_size = result[i].gran_sizek; |
1476 | gran_size <<= 10; | 1467 | gran_size <<= 10; |
1477 | debug_print++; | ||
1478 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); | 1468 | x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
1479 | debug_print--; | ||
1480 | set_var_mtrr_all(address_bits); | 1469 | set_var_mtrr_all(address_bits); |
1470 | printk(KERN_DEBUG "New variable MTRRs\n"); | ||
1471 | print_out_mtrr_range_state(); | ||
1481 | return 1; | 1472 | return 1; |
1473 | } else { | ||
1474 | /* print out all */ | ||
1475 | for (i = 0; i < NUM_RESULT; i++) | ||
1476 | mtrr_print_out_one_result(i); | ||
1482 | } | 1477 | } |
1483 | 1478 | ||
1484 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); | 1479 | printk(KERN_INFO "mtrr_cleanup: can not find optimal value\n"); |
@@ -1562,7 +1557,6 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1562 | { | 1557 | { |
1563 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | 1558 | unsigned long i, base, size, highest_pfn = 0, def, dummy; |
1564 | mtrr_type type; | 1559 | mtrr_type type; |
1565 | int nr_range; | ||
1566 | u64 total_trim_size; | 1560 | u64 total_trim_size; |
1567 | 1561 | ||
1568 | /* extra one for all 0 */ | 1562 | /* extra one for all 0 */ |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 2dc4ec656b23..ffd60409cc6d 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -8,11 +8,6 @@ | |||
8 | #define MTRRcap_MSR 0x0fe | 8 | #define MTRRcap_MSR 0x0fe |
9 | #define MTRRdefType_MSR 0x2ff | 9 | #define MTRRdefType_MSR 0x2ff |
10 | 10 | ||
11 | #define MTRRphysBase_MSR(reg) (0x200 + 2 * (reg)) | ||
12 | #define MTRRphysMask_MSR(reg) (0x200 + 2 * (reg) + 1) | ||
13 | |||
14 | #define NUM_FIXED_RANGES 88 | ||
15 | #define MAX_VAR_RANGES 256 | ||
16 | #define MTRRfix64K_00000_MSR 0x250 | 11 | #define MTRRfix64K_00000_MSR 0x250 |
17 | #define MTRRfix16K_80000_MSR 0x258 | 12 | #define MTRRfix16K_80000_MSR 0x258 |
18 | #define MTRRfix16K_A0000_MSR 0x259 | 13 | #define MTRRfix16K_A0000_MSR 0x259 |
@@ -29,11 +24,7 @@ | |||
29 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 | 24 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 |
30 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 | 25 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 |
31 | 26 | ||
32 | /* In the Intel processor's MTRR interface, the MTRR type is always held in | 27 | extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; |
33 | an 8 bit field: */ | ||
34 | typedef u8 mtrr_type; | ||
35 | |||
36 | extern unsigned int mtrr_usage_table[MAX_VAR_RANGES]; | ||
37 | 28 | ||
38 | struct mtrr_ops { | 29 | struct mtrr_ops { |
39 | u32 vendor; | 30 | u32 vendor; |
@@ -70,13 +61,6 @@ struct set_mtrr_context { | |||
70 | u32 ccr3; | 61 | u32 ccr3; |
71 | }; | 62 | }; |
72 | 63 | ||
73 | struct mtrr_var_range { | ||
74 | u32 base_lo; | ||
75 | u32 base_hi; | ||
76 | u32 mask_lo; | ||
77 | u32 mask_hi; | ||
78 | }; | ||
79 | |||
80 | void set_mtrr_done(struct set_mtrr_context *ctxt); | 64 | void set_mtrr_done(struct set_mtrr_context *ctxt); |
81 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); | 65 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt); |
82 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); | 66 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt); |
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c new file mode 100644 index 000000000000..284c399e3234 --- /dev/null +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -0,0 +1,112 @@ | |||
1 | /* | ||
2 | * VMware Detection code. | ||
3 | * | ||
4 | * Copyright (C) 2008, VMware, Inc. | ||
5 | * Author : Alok N Kataria <akataria@vmware.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU General Public License as published by | ||
9 | * the Free Software Foundation; either version 2 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This program is distributed in the hope that it will be useful, but | ||
13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
15 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
16 | * details. | ||
17 | * | ||
18 | * You should have received a copy of the GNU General Public License | ||
19 | * along with this program; if not, write to the Free Software | ||
20 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. | ||
21 | * | ||
22 | */ | ||
23 | |||
24 | #include <linux/dmi.h> | ||
25 | #include <asm/div64.h> | ||
26 | #include <asm/vmware.h> | ||
27 | |||
28 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | ||
29 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | ||
30 | #define VMWARE_HYPERVISOR_PORT 0x5658 | ||
31 | |||
32 | #define VMWARE_PORT_CMD_GETVERSION 10 | ||
33 | #define VMWARE_PORT_CMD_GETHZ 45 | ||
34 | |||
35 | #define VMWARE_PORT(cmd, eax, ebx, ecx, edx) \ | ||
36 | __asm__("inl (%%dx)" : \ | ||
37 | "=a"(eax), "=c"(ecx), "=d"(edx), "=b"(ebx) : \ | ||
38 | "0"(VMWARE_HYPERVISOR_MAGIC), \ | ||
39 | "1"(VMWARE_PORT_CMD_##cmd), \ | ||
40 | "2"(VMWARE_HYPERVISOR_PORT), "3"(UINT_MAX) : \ | ||
41 | "memory"); | ||
42 | |||
43 | static inline int __vmware_platform(void) | ||
44 | { | ||
45 | uint32_t eax, ebx, ecx, edx; | ||
46 | VMWARE_PORT(GETVERSION, eax, ebx, ecx, edx); | ||
47 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; | ||
48 | } | ||
49 | |||
50 | static unsigned long __vmware_get_tsc_khz(void) | ||
51 | { | ||
52 | uint64_t tsc_hz; | ||
53 | uint32_t eax, ebx, ecx, edx; | ||
54 | |||
55 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | ||
56 | |||
57 | if (ebx == UINT_MAX) | ||
58 | return 0; | ||
59 | tsc_hz = eax | (((uint64_t)ebx) << 32); | ||
60 | do_div(tsc_hz, 1000); | ||
61 | BUG_ON(tsc_hz >> 32); | ||
62 | return tsc_hz; | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * While checking the dmi string infomation, just checking the product | ||
67 | * serial key should be enough, as this will always have a VMware | ||
68 | * specific string when running under VMware hypervisor. | ||
69 | */ | ||
70 | int vmware_platform(void) | ||
71 | { | ||
72 | if (cpu_has_hypervisor) { | ||
73 | unsigned int eax, ebx, ecx, edx; | ||
74 | char hyper_vendor_id[13]; | ||
75 | |||
76 | cpuid(CPUID_VMWARE_INFO_LEAF, &eax, &ebx, &ecx, &edx); | ||
77 | memcpy(hyper_vendor_id + 0, &ebx, 4); | ||
78 | memcpy(hyper_vendor_id + 4, &ecx, 4); | ||
79 | memcpy(hyper_vendor_id + 8, &edx, 4); | ||
80 | hyper_vendor_id[12] = '\0'; | ||
81 | if (!strcmp(hyper_vendor_id, "VMwareVMware")) | ||
82 | return 1; | ||
83 | } else if (dmi_available && dmi_name_in_serial("VMware") && | ||
84 | __vmware_platform()) | ||
85 | return 1; | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | unsigned long vmware_get_tsc_khz(void) | ||
91 | { | ||
92 | BUG_ON(!vmware_platform()); | ||
93 | return __vmware_get_tsc_khz(); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | ||
98 | * Still, due to timing difference when running on virtual cpus, the TSC can | ||
99 | * be marked as unstable in some cases. For example, the TSC sync check at | ||
100 | * bootup can fail due to a marginal offset between vcpus' TSCs (though the | ||
101 | * TSCs do not drift from each other). Also, the ACPI PM timer clocksource | ||
102 | * is not suitable as a watchdog when running on a hypervisor because the | ||
103 | * kernel may miss a wrap of the counter if the vcpu is descheduled for a | ||
104 | * long time. To skip these checks at runtime we set these capability bits, | ||
105 | * so that the kernel could just trust the hypervisor with providing a | ||
106 | * reliable virtual TSC that is suitable for timekeeping. | ||
107 | */ | ||
108 | void __cpuinit vmware_set_feature_bits(struct cpuinfo_x86 *c) | ||
109 | { | ||
110 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | ||
111 | set_cpu_cap(c, X86_FEATURE_TSC_RELIABLE); | ||
112 | } | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 72cefd1e649b..2ac1f0c2beb3 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -39,10 +39,10 @@ | |||
39 | #include <linux/device.h> | 39 | #include <linux/device.h> |
40 | #include <linux/cpu.h> | 40 | #include <linux/cpu.h> |
41 | #include <linux/notifier.h> | 41 | #include <linux/notifier.h> |
42 | #include <linux/uaccess.h> | ||
42 | 43 | ||
43 | #include <asm/processor.h> | 44 | #include <asm/processor.h> |
44 | #include <asm/msr.h> | 45 | #include <asm/msr.h> |
45 | #include <asm/uaccess.h> | ||
46 | #include <asm/system.h> | 46 | #include <asm/system.h> |
47 | 47 | ||
48 | static struct class *cpuid_class; | 48 | static struct class *cpuid_class; |
@@ -82,7 +82,7 @@ static loff_t cpuid_seek(struct file *file, loff_t offset, int orig) | |||
82 | } | 82 | } |
83 | 83 | ||
84 | static ssize_t cpuid_read(struct file *file, char __user *buf, | 84 | static ssize_t cpuid_read(struct file *file, char __user *buf, |
85 | size_t count, loff_t * ppos) | 85 | size_t count, loff_t *ppos) |
86 | { | 86 | { |
87 | char __user *tmp = buf; | 87 | char __user *tmp = buf; |
88 | struct cpuid_regs cmd; | 88 | struct cpuid_regs cmd; |
@@ -117,11 +117,11 @@ static int cpuid_open(struct inode *inode, struct file *file) | |||
117 | unsigned int cpu; | 117 | unsigned int cpu; |
118 | struct cpuinfo_x86 *c; | 118 | struct cpuinfo_x86 *c; |
119 | int ret = 0; | 119 | int ret = 0; |
120 | 120 | ||
121 | lock_kernel(); | 121 | lock_kernel(); |
122 | 122 | ||
123 | cpu = iminor(file->f_path.dentry->d_inode); | 123 | cpu = iminor(file->f_path.dentry->d_inode); |
124 | if (cpu >= NR_CPUS || !cpu_online(cpu)) { | 124 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { |
125 | ret = -ENXIO; /* No such CPU */ | 125 | ret = -ENXIO; /* No such CPU */ |
126 | goto out; | 126 | goto out; |
127 | } | 127 | } |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 268553817909..c689d19e35ab 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -26,37 +26,21 @@ | |||
26 | #include <linux/kdebug.h> | 26 | #include <linux/kdebug.h> |
27 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | ||
29 | 30 | ||
30 | #include <mach_ipi.h> | 31 | #include <mach_ipi.h> |
31 | 32 | ||
32 | /* This keeps a track of which one is crashing cpu. */ | ||
33 | static int crashing_cpu; | ||
34 | 33 | ||
35 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 34 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
36 | static atomic_t waiting_for_crash_ipi; | ||
37 | 35 | ||
38 | static int crash_nmi_callback(struct notifier_block *self, | 36 | static void kdump_nmi_callback(int cpu, struct die_args *args) |
39 | unsigned long val, void *data) | ||
40 | { | 37 | { |
41 | struct pt_regs *regs; | 38 | struct pt_regs *regs; |
42 | #ifdef CONFIG_X86_32 | 39 | #ifdef CONFIG_X86_32 |
43 | struct pt_regs fixed_regs; | 40 | struct pt_regs fixed_regs; |
44 | #endif | 41 | #endif |
45 | int cpu; | ||
46 | 42 | ||
47 | if (val != DIE_NMI_IPI) | 43 | regs = args->regs; |
48 | return NOTIFY_OK; | ||
49 | |||
50 | regs = ((struct die_args *)data)->regs; | ||
51 | cpu = raw_smp_processor_id(); | ||
52 | |||
53 | /* Don't do anything if this handler is invoked on crashing cpu. | ||
54 | * Otherwise, system will completely hang. Crashing cpu can get | ||
55 | * an NMI if system was initially booted with nmi_watchdog parameter. | ||
56 | */ | ||
57 | if (cpu == crashing_cpu) | ||
58 | return NOTIFY_STOP; | ||
59 | local_irq_disable(); | ||
60 | 44 | ||
61 | #ifdef CONFIG_X86_32 | 45 | #ifdef CONFIG_X86_32 |
62 | if (!user_mode_vm(regs)) { | 46 | if (!user_mode_vm(regs)) { |
@@ -65,54 +49,28 @@ static int crash_nmi_callback(struct notifier_block *self, | |||
65 | } | 49 | } |
66 | #endif | 50 | #endif |
67 | crash_save_cpu(regs, cpu); | 51 | crash_save_cpu(regs, cpu); |
68 | disable_local_APIC(); | ||
69 | atomic_dec(&waiting_for_crash_ipi); | ||
70 | /* Assume hlt works */ | ||
71 | halt(); | ||
72 | for (;;) | ||
73 | cpu_relax(); | ||
74 | 52 | ||
75 | return 1; | 53 | /* Disable VMX or SVM if needed. |
76 | } | 54 | * |
55 | * We need to disable virtualization on all CPUs. | ||
56 | * Having VMX or SVM enabled on any CPU may break rebooting | ||
57 | * after the kdump kernel has finished its task. | ||
58 | */ | ||
59 | cpu_emergency_vmxoff(); | ||
60 | cpu_emergency_svm_disable(); | ||
77 | 61 | ||
78 | static void smp_send_nmi_allbutself(void) | 62 | disable_local_APIC(); |
79 | { | ||
80 | cpumask_t mask = cpu_online_map; | ||
81 | cpu_clear(safe_smp_processor_id(), mask); | ||
82 | if (!cpus_empty(mask)) | ||
83 | send_IPI_mask(mask, NMI_VECTOR); | ||
84 | } | 63 | } |
85 | 64 | ||
86 | static struct notifier_block crash_nmi_nb = { | 65 | static void kdump_nmi_shootdown_cpus(void) |
87 | .notifier_call = crash_nmi_callback, | ||
88 | }; | ||
89 | |||
90 | static void nmi_shootdown_cpus(void) | ||
91 | { | 66 | { |
92 | unsigned long msecs; | 67 | nmi_shootdown_cpus(kdump_nmi_callback); |
93 | |||
94 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | ||
95 | /* Would it be better to replace the trap vector here? */ | ||
96 | if (register_die_notifier(&crash_nmi_nb)) | ||
97 | return; /* return what? */ | ||
98 | /* Ensure the new callback function is set before sending | ||
99 | * out the NMI | ||
100 | */ | ||
101 | wmb(); | ||
102 | |||
103 | smp_send_nmi_allbutself(); | ||
104 | |||
105 | msecs = 1000; /* Wait at most a second for the other cpus to stop */ | ||
106 | while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { | ||
107 | mdelay(1); | ||
108 | msecs--; | ||
109 | } | ||
110 | 68 | ||
111 | /* Leave the nmi callback set */ | ||
112 | disable_local_APIC(); | 69 | disable_local_APIC(); |
113 | } | 70 | } |
71 | |||
114 | #else | 72 | #else |
115 | static void nmi_shootdown_cpus(void) | 73 | static void kdump_nmi_shootdown_cpus(void) |
116 | { | 74 | { |
117 | /* There are no cpus to shootdown */ | 75 | /* There are no cpus to shootdown */ |
118 | } | 76 | } |
@@ -131,9 +89,15 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
131 | /* The kernel is broken so disable interrupts */ | 89 | /* The kernel is broken so disable interrupts */ |
132 | local_irq_disable(); | 90 | local_irq_disable(); |
133 | 91 | ||
134 | /* Make a note of crashing cpu. Will be used in NMI callback.*/ | 92 | kdump_nmi_shootdown_cpus(); |
135 | crashing_cpu = safe_smp_processor_id(); | 93 | |
136 | nmi_shootdown_cpus(); | 94 | /* Booting kdump kernel with VMX or SVM enabled won't work, |
95 | * because (among other limitations) we can't disable paging | ||
96 | * with the virt flags. | ||
97 | */ | ||
98 | cpu_emergency_vmxoff(); | ||
99 | cpu_emergency_svm_disable(); | ||
100 | |||
137 | lapic_shutdown(); | 101 | lapic_shutdown(); |
138 | #if defined(CONFIG_X86_IO_APIC) | 102 | #if defined(CONFIG_X86_IO_APIC) |
139 | disable_IO_APIC(); | 103 | disable_IO_APIC(); |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index a2d1176c38ee..da91701a2348 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -6,14 +6,13 @@ | |||
6 | * precise-event based sampling (PEBS). | 6 | * precise-event based sampling (PEBS). |
7 | * | 7 | * |
8 | * It manages: | 8 | * It manages: |
9 | * - per-thread and per-cpu allocation of BTS and PEBS | 9 | * - DS and BTS hardware configuration |
10 | * - buffer memory allocation (optional) | 10 | * - buffer overflow handling (to be done) |
11 | * - buffer overflow handling | ||
12 | * - buffer access | 11 | * - buffer access |
13 | * | 12 | * |
14 | * It assumes: | 13 | * It does not do: |
15 | * - get_task_struct on all parameter tasks | 14 | * - security checking (is the caller allowed to trace the task) |
16 | * - current is allowed to trace parameter tasks | 15 | * - buffer allocation (memory accounting) |
17 | * | 16 | * |
18 | * | 17 | * |
19 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2008 Intel Corporation. |
@@ -28,22 +27,69 @@ | |||
28 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
29 | #include <linux/sched.h> | 28 | #include <linux/sched.h> |
30 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/kernel.h> | ||
31 | 31 | ||
32 | 32 | ||
33 | /* | 33 | /* |
34 | * The configuration for a particular DS hardware implementation. | 34 | * The configuration for a particular DS hardware implementation. |
35 | */ | 35 | */ |
36 | struct ds_configuration { | 36 | struct ds_configuration { |
37 | /* the size of the DS structure in bytes */ | 37 | /* the name of the configuration */ |
38 | unsigned char sizeof_ds; | 38 | const char *name; |
39 | /* the size of one pointer-typed field in the DS structure in bytes; | 39 | /* the size of one pointer-typed field in the DS structure and |
40 | this covers the first 8 fields related to buffer management. */ | 40 | in the BTS and PEBS buffers in bytes; |
41 | this covers the first 8 DS fields related to buffer management. */ | ||
41 | unsigned char sizeof_field; | 42 | unsigned char sizeof_field; |
42 | /* the size of a BTS/PEBS record in bytes */ | 43 | /* the size of a BTS/PEBS record in bytes */ |
43 | unsigned char sizeof_rec[2]; | 44 | unsigned char sizeof_rec[2]; |
45 | /* a series of bit-masks to control various features indexed | ||
46 | * by enum ds_feature */ | ||
47 | unsigned long ctl[dsf_ctl_max]; | ||
44 | }; | 48 | }; |
45 | static struct ds_configuration ds_cfg; | 49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); |
46 | 50 | ||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | ||
52 | |||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | ||
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | ||
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
56 | |||
57 | #define BTS_CONTROL \ | ||
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | ||
59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
60 | |||
61 | |||
62 | /* | ||
63 | * A BTS or PEBS tracer. | ||
64 | * | ||
65 | * This holds the configuration of the tracer and serves as a handle | ||
66 | * to identify tracers. | ||
67 | */ | ||
68 | struct ds_tracer { | ||
69 | /* the DS context (partially) owned by this tracer */ | ||
70 | struct ds_context *context; | ||
71 | /* the buffer provided on ds_request() and its size in bytes */ | ||
72 | void *buffer; | ||
73 | size_t size; | ||
74 | }; | ||
75 | |||
76 | struct bts_tracer { | ||
77 | /* the common DS part */ | ||
78 | struct ds_tracer ds; | ||
79 | /* the trace including the DS configuration */ | ||
80 | struct bts_trace trace; | ||
81 | /* buffer overflow notification function */ | ||
82 | bts_ovfl_callback_t ovfl; | ||
83 | }; | ||
84 | |||
85 | struct pebs_tracer { | ||
86 | /* the common DS part */ | ||
87 | struct ds_tracer ds; | ||
88 | /* the trace including the DS configuration */ | ||
89 | struct pebs_trace trace; | ||
90 | /* buffer overflow notification function */ | ||
91 | pebs_ovfl_callback_t ovfl; | ||
92 | }; | ||
47 | 93 | ||
48 | /* | 94 | /* |
49 | * Debug Store (DS) save area configuration (see Intel64 and IA32 | 95 | * Debug Store (DS) save area configuration (see Intel64 and IA32 |
@@ -109,32 +155,9 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
109 | 155 | ||
110 | 156 | ||
111 | /* | 157 | /* |
112 | * Locking is done only for allocating BTS or PEBS resources and for | 158 | * Locking is done only for allocating BTS or PEBS resources. |
113 | * guarding context and buffer memory allocation. | ||
114 | * | ||
115 | * Most functions require the current task to own the ds context part | ||
116 | * they are going to access. All the locking is done when validating | ||
117 | * access to the context. | ||
118 | */ | 159 | */ |
119 | static spinlock_t ds_lock = __SPIN_LOCK_UNLOCKED(ds_lock); | 160 | static DEFINE_SPINLOCK(ds_lock); |
120 | |||
121 | /* | ||
122 | * Validate that the current task is allowed to access the BTS/PEBS | ||
123 | * buffer of the parameter task. | ||
124 | * | ||
125 | * Returns 0, if access is granted; -Eerrno, otherwise. | ||
126 | */ | ||
127 | static inline int ds_validate_access(struct ds_context *context, | ||
128 | enum ds_qualifier qual) | ||
129 | { | ||
130 | if (!context) | ||
131 | return -EPERM; | ||
132 | |||
133 | if (context->owner[qual] == current) | ||
134 | return 0; | ||
135 | |||
136 | return -EPERM; | ||
137 | } | ||
138 | 161 | ||
139 | 162 | ||
140 | /* | 163 | /* |
@@ -150,27 +173,32 @@ static inline int ds_validate_access(struct ds_context *context, | |||
150 | * >0 number of per-thread tracers | 173 | * >0 number of per-thread tracers |
151 | * <0 number of per-cpu tracers | 174 | * <0 number of per-cpu tracers |
152 | * | 175 | * |
153 | * The below functions to get and put tracers and to check the | ||
154 | * allocation type require the ds_lock to be held by the caller. | ||
155 | * | ||
156 | * Tracers essentially gives the number of ds contexts for a certain | 176 | * Tracers essentially gives the number of ds contexts for a certain |
157 | * type of allocation. | 177 | * type of allocation. |
158 | */ | 178 | */ |
159 | static long tracers; | 179 | static atomic_t tracers = ATOMIC_INIT(0); |
160 | 180 | ||
161 | static inline void get_tracer(struct task_struct *task) | 181 | static inline void get_tracer(struct task_struct *task) |
162 | { | 182 | { |
163 | tracers += (task ? 1 : -1); | 183 | if (task) |
184 | atomic_inc(&tracers); | ||
185 | else | ||
186 | atomic_dec(&tracers); | ||
164 | } | 187 | } |
165 | 188 | ||
166 | static inline void put_tracer(struct task_struct *task) | 189 | static inline void put_tracer(struct task_struct *task) |
167 | { | 190 | { |
168 | tracers -= (task ? 1 : -1); | 191 | if (task) |
192 | atomic_dec(&tracers); | ||
193 | else | ||
194 | atomic_inc(&tracers); | ||
169 | } | 195 | } |
170 | 196 | ||
171 | static inline int check_tracer(struct task_struct *task) | 197 | static inline int check_tracer(struct task_struct *task) |
172 | { | 198 | { |
173 | return (task ? (tracers >= 0) : (tracers <= 0)); | 199 | return task ? |
200 | (atomic_read(&tracers) >= 0) : | ||
201 | (atomic_read(&tracers) <= 0); | ||
174 | } | 202 | } |
175 | 203 | ||
176 | 204 | ||
@@ -183,99 +211,70 @@ static inline int check_tracer(struct task_struct *task) | |||
183 | * | 211 | * |
184 | * Contexts are use-counted. They are allocated on first access and | 212 | * Contexts are use-counted. They are allocated on first access and |
185 | * deallocated when the last user puts the context. | 213 | * deallocated when the last user puts the context. |
186 | * | ||
187 | * We distinguish between an allocating and a non-allocating get of a | ||
188 | * context: | ||
189 | * - the allocating get is used for requesting BTS/PEBS resources. It | ||
190 | * requires the caller to hold the global ds_lock. | ||
191 | * - the non-allocating get is used for all other cases. A | ||
192 | * non-existing context indicates an error. It acquires and releases | ||
193 | * the ds_lock itself for obtaining the context. | ||
194 | * | ||
195 | * A context and its DS configuration are allocated and deallocated | ||
196 | * together. A context always has a DS configuration of the | ||
197 | * appropriate size. | ||
198 | */ | ||
199 | static DEFINE_PER_CPU(struct ds_context *, system_context); | ||
200 | |||
201 | #define this_system_context per_cpu(system_context, smp_processor_id()) | ||
202 | |||
203 | /* | ||
204 | * Returns the pointer to the parameter task's context or to the | ||
205 | * system-wide context, if task is NULL. | ||
206 | * | ||
207 | * Increases the use count of the returned context, if not NULL. | ||
208 | */ | 214 | */ |
209 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 215 | struct ds_context { |
210 | { | 216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ |
211 | struct ds_context *context; | 217 | unsigned char ds[MAX_SIZEOF_DS]; |
212 | unsigned long irq; | 218 | /* the owner of the BTS and PEBS configuration, respectively */ |
219 | struct bts_tracer *bts_master; | ||
220 | struct pebs_tracer *pebs_master; | ||
221 | /* use count */ | ||
222 | unsigned long count; | ||
223 | /* a pointer to the context location inside the thread_struct | ||
224 | * or the per_cpu context array */ | ||
225 | struct ds_context **this; | ||
226 | /* a pointer to the task owning this context, or NULL, if the | ||
227 | * context is owned by a cpu */ | ||
228 | struct task_struct *task; | ||
229 | }; | ||
213 | 230 | ||
214 | spin_lock_irqsave(&ds_lock, irq); | 231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); |
215 | 232 | ||
216 | context = (task ? task->thread.ds_ctx : this_system_context); | 233 | #define system_context per_cpu(system_context_array, smp_processor_id()) |
217 | if (context) | ||
218 | context->count++; | ||
219 | 234 | ||
220 | spin_unlock_irqrestore(&ds_lock, irq); | ||
221 | |||
222 | return context; | ||
223 | } | ||
224 | 235 | ||
225 | /* | 236 | static inline struct ds_context *ds_get_context(struct task_struct *task) |
226 | * Same as ds_get_context, but allocates the context and it's DS | ||
227 | * structure, if necessary; returns NULL; if out of memory. | ||
228 | */ | ||
229 | static inline struct ds_context *ds_alloc_context(struct task_struct *task) | ||
230 | { | 237 | { |
231 | struct ds_context **p_context = | 238 | struct ds_context **p_context = |
232 | (task ? &task->thread.ds_ctx : &this_system_context); | 239 | (task ? &task->thread.ds_ctx : &system_context); |
233 | struct ds_context *context = *p_context; | 240 | struct ds_context *context = NULL; |
241 | struct ds_context *new_context = NULL; | ||
234 | unsigned long irq; | 242 | unsigned long irq; |
235 | 243 | ||
236 | if (!context) { | 244 | /* Chances are small that we already have a context. */ |
237 | context = kzalloc(sizeof(*context), GFP_KERNEL); | 245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
238 | if (!context) | 246 | if (!new_context) |
239 | return NULL; | 247 | return NULL; |
240 | |||
241 | context->ds = kzalloc(ds_cfg.sizeof_ds, GFP_KERNEL); | ||
242 | if (!context->ds) { | ||
243 | kfree(context); | ||
244 | return NULL; | ||
245 | } | ||
246 | 248 | ||
247 | spin_lock_irqsave(&ds_lock, irq); | 249 | spin_lock_irqsave(&ds_lock, irq); |
248 | 250 | ||
249 | if (*p_context) { | 251 | context = *p_context; |
250 | kfree(context->ds); | 252 | if (!context) { |
251 | kfree(context); | 253 | context = new_context; |
252 | 254 | ||
253 | context = *p_context; | 255 | context->this = p_context; |
254 | } else { | 256 | context->task = task; |
255 | *p_context = context; | 257 | context->count = 0; |
256 | 258 | ||
257 | context->this = p_context; | 259 | if (task) |
258 | context->task = task; | 260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); |
259 | 261 | ||
260 | if (task) | 262 | if (!task || (task == current)) |
261 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | 263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); |
262 | 264 | ||
263 | if (!task || (task == current)) | 265 | *p_context = context; |
264 | wrmsrl(MSR_IA32_DS_AREA, | ||
265 | (unsigned long)context->ds); | ||
266 | } | ||
267 | spin_unlock_irqrestore(&ds_lock, irq); | ||
268 | } | 266 | } |
269 | 267 | ||
270 | context->count++; | 268 | context->count++; |
271 | 269 | ||
270 | spin_unlock_irqrestore(&ds_lock, irq); | ||
271 | |||
272 | if (context != new_context) | ||
273 | kfree(new_context); | ||
274 | |||
272 | return context; | 275 | return context; |
273 | } | 276 | } |
274 | 277 | ||
275 | /* | ||
276 | * Decreases the use count of the parameter context, if not NULL. | ||
277 | * Deallocates the context, if the use count reaches zero. | ||
278 | */ | ||
279 | static inline void ds_put_context(struct ds_context *context) | 278 | static inline void ds_put_context(struct ds_context *context) |
280 | { | 279 | { |
281 | unsigned long irq; | 280 | unsigned long irq; |
@@ -285,8 +284,10 @@ static inline void ds_put_context(struct ds_context *context) | |||
285 | 284 | ||
286 | spin_lock_irqsave(&ds_lock, irq); | 285 | spin_lock_irqsave(&ds_lock, irq); |
287 | 286 | ||
288 | if (--context->count) | 287 | if (--context->count) { |
289 | goto out; | 288 | spin_unlock_irqrestore(&ds_lock, irq); |
289 | return; | ||
290 | } | ||
290 | 291 | ||
291 | *(context->this) = NULL; | 292 | *(context->this) = NULL; |
292 | 293 | ||
@@ -296,135 +297,263 @@ static inline void ds_put_context(struct ds_context *context) | |||
296 | if (!context->task || (context->task == current)) | 297 | if (!context->task || (context->task == current)) |
297 | wrmsrl(MSR_IA32_DS_AREA, 0); | 298 | wrmsrl(MSR_IA32_DS_AREA, 0); |
298 | 299 | ||
299 | put_tracer(context->task); | 300 | spin_unlock_irqrestore(&ds_lock, irq); |
300 | 301 | ||
301 | /* free any leftover buffers from tracers that did not | ||
302 | * deallocate them properly. */ | ||
303 | kfree(context->buffer[ds_bts]); | ||
304 | kfree(context->buffer[ds_pebs]); | ||
305 | kfree(context->ds); | ||
306 | kfree(context); | 302 | kfree(context); |
307 | out: | ||
308 | spin_unlock_irqrestore(&ds_lock, irq); | ||
309 | } | 303 | } |
310 | 304 | ||
311 | 305 | ||
312 | /* | 306 | /* |
313 | * Handle a buffer overflow | 307 | * Call the tracer's callback on a buffer overflow. |
314 | * | 308 | * |
315 | * task: the task whose buffers are overflowing; | ||
316 | * NULL for a buffer overflow on the current cpu | ||
317 | * context: the ds context | 309 | * context: the ds context |
318 | * qual: the buffer type | 310 | * qual: the buffer type |
319 | */ | 311 | */ |
320 | static void ds_overflow(struct task_struct *task, struct ds_context *context, | 312 | static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) |
321 | enum ds_qualifier qual) | ||
322 | { | 313 | { |
323 | if (!context) | 314 | switch (qual) { |
324 | return; | 315 | case ds_bts: |
325 | 316 | if (context->bts_master && | |
326 | if (context->callback[qual]) | 317 | context->bts_master->ovfl) |
327 | (*context->callback[qual])(task); | 318 | context->bts_master->ovfl(context->bts_master); |
328 | 319 | break; | |
329 | /* todo: do some more overflow handling */ | 320 | case ds_pebs: |
321 | if (context->pebs_master && | ||
322 | context->pebs_master->ovfl) | ||
323 | context->pebs_master->ovfl(context->pebs_master); | ||
324 | break; | ||
325 | } | ||
330 | } | 326 | } |
331 | 327 | ||
332 | 328 | ||
333 | /* | 329 | /* |
334 | * Allocate a non-pageable buffer of the parameter size. | 330 | * Write raw data into the BTS or PEBS buffer. |
335 | * Checks the memory and the locked memory rlimit. | ||
336 | * | 331 | * |
337 | * Returns the buffer, if successful; | 332 | * The remainder of any partially written record is zeroed out. |
338 | * NULL, if out of memory or rlimit exceeded. | ||
339 | * | 333 | * |
340 | * size: the requested buffer size in bytes | 334 | * context: the DS context |
341 | * pages (out): if not NULL, contains the number of pages reserved | 335 | * qual: the buffer type |
336 | * record: the data to write | ||
337 | * size: the size of the data | ||
342 | */ | 338 | */ |
343 | static inline void *ds_allocate_buffer(size_t size, unsigned int *pages) | 339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
340 | const void *record, size_t size) | ||
344 | { | 341 | { |
345 | unsigned long rlim, vm, pgsz; | 342 | int bytes_written = 0; |
346 | void *buffer; | ||
347 | 343 | ||
348 | pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; | 344 | if (!record) |
345 | return -EINVAL; | ||
349 | 346 | ||
350 | rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | 347 | while (size) { |
351 | vm = current->mm->total_vm + pgsz; | 348 | unsigned long base, index, end, write_end, int_th; |
352 | if (rlim < vm) | 349 | unsigned long write_size, adj_write_size; |
353 | return NULL; | ||
354 | 350 | ||
355 | rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 351 | /* |
356 | vm = current->mm->locked_vm + pgsz; | 352 | * write as much as possible without producing an |
357 | if (rlim < vm) | 353 | * overflow interrupt. |
358 | return NULL; | 354 | * |
355 | * interrupt_threshold must either be | ||
356 | * - bigger than absolute_maximum or | ||
357 | * - point to a record between buffer_base and absolute_maximum | ||
358 | * | ||
359 | * index points to a valid record. | ||
360 | */ | ||
361 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
362 | index = ds_get(context->ds, qual, ds_index); | ||
363 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
364 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
359 | 365 | ||
360 | buffer = kzalloc(size, GFP_KERNEL); | 366 | write_end = min(end, int_th); |
361 | if (!buffer) | ||
362 | return NULL; | ||
363 | 367 | ||
364 | current->mm->total_vm += pgsz; | 368 | /* if we are already beyond the interrupt threshold, |
365 | current->mm->locked_vm += pgsz; | 369 | * we fill the entire buffer */ |
370 | if (write_end <= index) | ||
371 | write_end = end; | ||
366 | 372 | ||
367 | if (pages) | 373 | if (write_end <= index) |
368 | *pages = pgsz; | 374 | break; |
375 | |||
376 | write_size = min((unsigned long) size, write_end - index); | ||
377 | memcpy((void *)index, record, write_size); | ||
369 | 378 | ||
370 | return buffer; | 379 | record = (const char *)record + write_size; |
380 | size -= write_size; | ||
381 | bytes_written += write_size; | ||
382 | |||
383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | ||
384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | ||
385 | |||
386 | /* zero out trailing bytes */ | ||
387 | memset((char *)index + write_size, 0, | ||
388 | adj_write_size - write_size); | ||
389 | index += adj_write_size; | ||
390 | |||
391 | if (index >= end) | ||
392 | index = base; | ||
393 | ds_set(context->ds, qual, ds_index, index); | ||
394 | |||
395 | if (index >= int_th) | ||
396 | ds_overflow(context, qual); | ||
397 | } | ||
398 | |||
399 | return bytes_written; | ||
371 | } | 400 | } |
372 | 401 | ||
373 | static int ds_request(struct task_struct *task, void *base, size_t size, | 402 | |
374 | ds_ovfl_callback_t ovfl, enum ds_qualifier qual) | 403 | /* |
404 | * Branch Trace Store (BTS) uses the following format. Different | ||
405 | * architectures vary in the size of those fields. | ||
406 | * - source linear address | ||
407 | * - destination linear address | ||
408 | * - flags | ||
409 | * | ||
410 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
411 | * architectures use 32bit pointers in 32bit mode. | ||
412 | * | ||
413 | * We compute the base address for the first 8 fields based on: | ||
414 | * - the field size stored in the DS configuration | ||
415 | * - the relative field position | ||
416 | * | ||
417 | * In order to store additional information in the BTS buffer, we use | ||
418 | * a special source address to indicate that the record requires | ||
419 | * special interpretation. | ||
420 | * | ||
421 | * Netburst indicated via a bit in the flags field whether the branch | ||
422 | * was predicted; this is ignored. | ||
423 | * | ||
424 | * We use two levels of abstraction: | ||
425 | * - the raw data level defined here | ||
426 | * - an arch-independent level defined in ds.h | ||
427 | */ | ||
428 | |||
429 | enum bts_field { | ||
430 | bts_from, | ||
431 | bts_to, | ||
432 | bts_flags, | ||
433 | |||
434 | bts_qual = bts_from, | ||
435 | bts_jiffies = bts_to, | ||
436 | bts_pid = bts_flags, | ||
437 | |||
438 | bts_qual_mask = (bts_qual_max - 1), | ||
439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | ||
440 | }; | ||
441 | |||
442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
375 | { | 443 | { |
376 | struct ds_context *context; | 444 | base += (ds_cfg.sizeof_field * field); |
377 | unsigned long buffer, adj; | 445 | return *(unsigned long *)base; |
378 | const unsigned long alignment = (1 << 3); | 446 | } |
379 | unsigned long irq; | 447 | |
380 | int error = 0; | 448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
449 | { | ||
450 | base += (ds_cfg.sizeof_field * field);; | ||
451 | (*(unsigned long *)base) = val; | ||
452 | } | ||
381 | 453 | ||
382 | if (!ds_cfg.sizeof_ds) | ||
383 | return -EOPNOTSUPP; | ||
384 | 454 | ||
385 | /* we require some space to do alignment adjustments below */ | 455 | /* |
386 | if (size < (alignment + ds_cfg.sizeof_rec[qual])) | 456 | * The raw BTS data is architecture dependent. |
457 | * | ||
458 | * For higher-level users, we give an arch-independent view. | ||
459 | * - ds.h defines struct bts_struct | ||
460 | * - bts_read translates one raw bts record into a bts_struct | ||
461 | * - bts_write translates one bts_struct into the raw format and | ||
462 | * writes it into the top of the parameter tracer's buffer. | ||
463 | * | ||
464 | * return: bytes read/written on success; -Eerrno, otherwise | ||
465 | */ | ||
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | ||
467 | struct bts_struct *out) | ||
468 | { | ||
469 | if (!tracer) | ||
387 | return -EINVAL; | 470 | return -EINVAL; |
388 | 471 | ||
389 | /* buffer overflow notification is not yet implemented */ | 472 | if (at < tracer->trace.ds.begin) |
390 | if (ovfl) | 473 | return -EINVAL; |
391 | return -EOPNOTSUPP; | ||
392 | 474 | ||
475 | if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) | ||
476 | return -EINVAL; | ||
393 | 477 | ||
394 | context = ds_alloc_context(task); | 478 | memset(out, 0, sizeof(*out)); |
395 | if (!context) | 479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { |
396 | return -ENOMEM; | 480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); |
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | ||
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | ||
483 | } else { | ||
484 | out->qualifier = bts_branch; | ||
485 | out->variant.lbr.from = bts_get(at, bts_from); | ||
486 | out->variant.lbr.to = bts_get(at, bts_to); | ||
487 | |||
488 | if (!out->variant.lbr.from && !out->variant.lbr.to) | ||
489 | out->qualifier = bts_invalid; | ||
490 | } | ||
397 | 491 | ||
398 | spin_lock_irqsave(&ds_lock, irq); | 492 | return ds_cfg.sizeof_rec[ds_bts]; |
493 | } | ||
399 | 494 | ||
400 | error = -EPERM; | 495 | static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) |
401 | if (!check_tracer(task)) | 496 | { |
402 | goto out_unlock; | 497 | unsigned char raw[MAX_SIZEOF_BTS]; |
403 | 498 | ||
404 | get_tracer(task); | 499 | if (!tracer) |
500 | return -EINVAL; | ||
405 | 501 | ||
406 | error = -EALREADY; | 502 | if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) |
407 | if (context->owner[qual] == current) | 503 | return -EOVERFLOW; |
408 | goto out_put_tracer; | ||
409 | error = -EPERM; | ||
410 | if (context->owner[qual] != NULL) | ||
411 | goto out_put_tracer; | ||
412 | context->owner[qual] = current; | ||
413 | 504 | ||
414 | spin_unlock_irqrestore(&ds_lock, irq); | 505 | switch (in->qualifier) { |
506 | case bts_invalid: | ||
507 | bts_set(raw, bts_from, 0); | ||
508 | bts_set(raw, bts_to, 0); | ||
509 | bts_set(raw, bts_flags, 0); | ||
510 | break; | ||
511 | case bts_branch: | ||
512 | bts_set(raw, bts_from, in->variant.lbr.from); | ||
513 | bts_set(raw, bts_to, in->variant.lbr.to); | ||
514 | bts_set(raw, bts_flags, 0); | ||
515 | break; | ||
516 | case bts_task_arrives: | ||
517 | case bts_task_departs: | ||
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | ||
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | ||
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | ||
521 | break; | ||
522 | default: | ||
523 | return -EINVAL; | ||
524 | } | ||
415 | 525 | ||
526 | return ds_write(tracer->ds.context, ds_bts, raw, | ||
527 | ds_cfg.sizeof_rec[ds_bts]); | ||
528 | } | ||
416 | 529 | ||
417 | error = -ENOMEM; | ||
418 | if (!base) { | ||
419 | base = ds_allocate_buffer(size, &context->pages[qual]); | ||
420 | if (!base) | ||
421 | goto out_release; | ||
422 | 530 | ||
423 | context->buffer[qual] = base; | 531 | static void ds_write_config(struct ds_context *context, |
424 | } | 532 | struct ds_trace *cfg, enum ds_qualifier qual) |
425 | error = 0; | 533 | { |
534 | unsigned char *ds = context->ds; | ||
535 | |||
536 | ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); | ||
537 | ds_set(ds, qual, ds_index, (unsigned long)cfg->top); | ||
538 | ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); | ||
539 | ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); | ||
540 | } | ||
541 | |||
542 | static void ds_read_config(struct ds_context *context, | ||
543 | struct ds_trace *cfg, enum ds_qualifier qual) | ||
544 | { | ||
545 | unsigned char *ds = context->ds; | ||
426 | 546 | ||
427 | context->callback[qual] = ovfl; | 547 | cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); |
548 | cfg->top = (void *)ds_get(ds, qual, ds_index); | ||
549 | cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); | ||
550 | cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); | ||
551 | } | ||
552 | |||
553 | static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | ||
554 | void *base, size_t size, size_t ith, | ||
555 | unsigned int flags) { | ||
556 | unsigned long buffer, adj; | ||
428 | 557 | ||
429 | /* adjust the buffer address and size to meet alignment | 558 | /* adjust the buffer address and size to meet alignment |
430 | * constraints: | 559 | * constraints: |
@@ -436,410 +565,383 @@ static int ds_request(struct task_struct *task, void *base, size_t size, | |||
436 | */ | 565 | */ |
437 | buffer = (unsigned long)base; | 566 | buffer = (unsigned long)base; |
438 | 567 | ||
439 | adj = ALIGN(buffer, alignment) - buffer; | 568 | adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; |
440 | buffer += adj; | 569 | buffer += adj; |
441 | size -= adj; | 570 | size -= adj; |
442 | 571 | ||
443 | size /= ds_cfg.sizeof_rec[qual]; | 572 | trace->n = size / ds_cfg.sizeof_rec[qual]; |
444 | size *= ds_cfg.sizeof_rec[qual]; | 573 | trace->size = ds_cfg.sizeof_rec[qual]; |
445 | |||
446 | ds_set(context->ds, qual, ds_buffer_base, buffer); | ||
447 | ds_set(context->ds, qual, ds_index, buffer); | ||
448 | ds_set(context->ds, qual, ds_absolute_maximum, buffer + size); | ||
449 | 574 | ||
450 | if (ovfl) { | 575 | size = (trace->n * trace->size); |
451 | /* todo: select a suitable interrupt threshold */ | ||
452 | } else | ||
453 | ds_set(context->ds, qual, | ||
454 | ds_interrupt_threshold, buffer + size + 1); | ||
455 | 576 | ||
456 | /* we keep the context until ds_release */ | 577 | trace->begin = (void *)buffer; |
457 | return error; | 578 | trace->top = trace->begin; |
458 | 579 | trace->end = (void *)(buffer + size); | |
459 | out_release: | 580 | /* The value for 'no threshold' is -1, which will set the |
460 | context->owner[qual] = NULL; | 581 | * threshold outside of the buffer, just like we want it. |
461 | ds_put_context(context); | 582 | */ |
462 | put_tracer(task); | 583 | trace->ith = (void *)(buffer + size - ith); |
463 | return error; | ||
464 | |||
465 | out_put_tracer: | ||
466 | spin_unlock_irqrestore(&ds_lock, irq); | ||
467 | ds_put_context(context); | ||
468 | put_tracer(task); | ||
469 | return error; | ||
470 | 584 | ||
471 | out_unlock: | 585 | trace->flags = flags; |
472 | spin_unlock_irqrestore(&ds_lock, irq); | ||
473 | ds_put_context(context); | ||
474 | return error; | ||
475 | } | 586 | } |
476 | 587 | ||
477 | int ds_request_bts(struct task_struct *task, void *base, size_t size, | ||
478 | ds_ovfl_callback_t ovfl) | ||
479 | { | ||
480 | return ds_request(task, base, size, ovfl, ds_bts); | ||
481 | } | ||
482 | 588 | ||
483 | int ds_request_pebs(struct task_struct *task, void *base, size_t size, | 589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
484 | ds_ovfl_callback_t ovfl) | 590 | enum ds_qualifier qual, struct task_struct *task, |
485 | { | 591 | void *base, size_t size, size_t th, unsigned int flags) |
486 | return ds_request(task, base, size, ovfl, ds_pebs); | ||
487 | } | ||
488 | |||
489 | static int ds_release(struct task_struct *task, enum ds_qualifier qual) | ||
490 | { | 592 | { |
491 | struct ds_context *context; | 593 | struct ds_context *context; |
492 | int error; | 594 | int error; |
493 | 595 | ||
494 | context = ds_get_context(task); | 596 | error = -EINVAL; |
495 | error = ds_validate_access(context, qual); | 597 | if (!base) |
496 | if (error < 0) | ||
497 | goto out; | 598 | goto out; |
498 | 599 | ||
499 | kfree(context->buffer[qual]); | 600 | /* we require some space to do alignment adjustments below */ |
500 | context->buffer[qual] = NULL; | 601 | error = -EINVAL; |
501 | 602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | |
502 | current->mm->total_vm -= context->pages[qual]; | 603 | goto out; |
503 | current->mm->locked_vm -= context->pages[qual]; | ||
504 | context->pages[qual] = 0; | ||
505 | context->owner[qual] = NULL; | ||
506 | |||
507 | /* | ||
508 | * we put the context twice: | ||
509 | * once for the ds_get_context | ||
510 | * once for the corresponding ds_request | ||
511 | */ | ||
512 | ds_put_context(context); | ||
513 | out: | ||
514 | ds_put_context(context); | ||
515 | return error; | ||
516 | } | ||
517 | 604 | ||
518 | int ds_release_bts(struct task_struct *task) | 605 | if (th != (size_t)-1) { |
519 | { | 606 | th *= ds_cfg.sizeof_rec[qual]; |
520 | return ds_release(task, ds_bts); | ||
521 | } | ||
522 | 607 | ||
523 | int ds_release_pebs(struct task_struct *task) | 608 | error = -EINVAL; |
524 | { | 609 | if (size <= th) |
525 | return ds_release(task, ds_pebs); | 610 | goto out; |
526 | } | 611 | } |
527 | 612 | ||
528 | static int ds_get_index(struct task_struct *task, size_t *pos, | 613 | tracer->buffer = base; |
529 | enum ds_qualifier qual) | 614 | tracer->size = size; |
530 | { | ||
531 | struct ds_context *context; | ||
532 | unsigned long base, index; | ||
533 | int error; | ||
534 | 615 | ||
616 | error = -ENOMEM; | ||
535 | context = ds_get_context(task); | 617 | context = ds_get_context(task); |
536 | error = ds_validate_access(context, qual); | 618 | if (!context) |
537 | if (error < 0) | ||
538 | goto out; | 619 | goto out; |
620 | tracer->context = context; | ||
539 | 621 | ||
540 | base = ds_get(context->ds, qual, ds_buffer_base); | 622 | ds_init_ds_trace(trace, qual, base, size, th, flags); |
541 | index = ds_get(context->ds, qual, ds_index); | ||
542 | 623 | ||
543 | error = ((index - base) / ds_cfg.sizeof_rec[qual]); | 624 | error = 0; |
544 | if (pos) | ||
545 | *pos = error; | ||
546 | out: | 625 | out: |
547 | ds_put_context(context); | ||
548 | return error; | 626 | return error; |
549 | } | 627 | } |
550 | 628 | ||
551 | int ds_get_bts_index(struct task_struct *task, size_t *pos) | 629 | struct bts_tracer *ds_request_bts(struct task_struct *task, |
552 | { | 630 | void *base, size_t size, |
553 | return ds_get_index(task, pos, ds_bts); | 631 | bts_ovfl_callback_t ovfl, size_t th, |
554 | } | 632 | unsigned int flags) |
555 | |||
556 | int ds_get_pebs_index(struct task_struct *task, size_t *pos) | ||
557 | { | 633 | { |
558 | return ds_get_index(task, pos, ds_pebs); | 634 | struct bts_tracer *tracer; |
559 | } | 635 | unsigned long irq; |
560 | |||
561 | static int ds_get_end(struct task_struct *task, size_t *pos, | ||
562 | enum ds_qualifier qual) | ||
563 | { | ||
564 | struct ds_context *context; | ||
565 | unsigned long base, end; | ||
566 | int error; | 636 | int error; |
567 | 637 | ||
568 | context = ds_get_context(task); | 638 | error = -EOPNOTSUPP; |
569 | error = ds_validate_access(context, qual); | 639 | if (!ds_cfg.ctl[dsf_bts]) |
570 | if (error < 0) | ||
571 | goto out; | 640 | goto out; |
572 | 641 | ||
573 | base = ds_get(context->ds, qual, ds_buffer_base); | 642 | /* buffer overflow notification is not yet implemented */ |
574 | end = ds_get(context->ds, qual, ds_absolute_maximum); | 643 | error = -EOPNOTSUPP; |
644 | if (ovfl) | ||
645 | goto out; | ||
575 | 646 | ||
576 | error = ((end - base) / ds_cfg.sizeof_rec[qual]); | 647 | error = -ENOMEM; |
577 | if (pos) | 648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
578 | *pos = error; | 649 | if (!tracer) |
579 | out: | 650 | goto out; |
580 | ds_put_context(context); | 651 | tracer->ovfl = ovfl; |
581 | return error; | ||
582 | } | ||
583 | 652 | ||
584 | int ds_get_bts_end(struct task_struct *task, size_t *pos) | 653 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
585 | { | 654 | ds_bts, task, base, size, th, flags); |
586 | return ds_get_end(task, pos, ds_bts); | 655 | if (error < 0) |
587 | } | 656 | goto out_tracer; |
588 | 657 | ||
589 | int ds_get_pebs_end(struct task_struct *task, size_t *pos) | ||
590 | { | ||
591 | return ds_get_end(task, pos, ds_pebs); | ||
592 | } | ||
593 | 658 | ||
594 | static int ds_access(struct task_struct *task, size_t index, | 659 | spin_lock_irqsave(&ds_lock, irq); |
595 | const void **record, enum ds_qualifier qual) | ||
596 | { | ||
597 | struct ds_context *context; | ||
598 | unsigned long base, idx; | ||
599 | int error; | ||
600 | 660 | ||
601 | if (!record) | 661 | error = -EPERM; |
602 | return -EINVAL; | 662 | if (!check_tracer(task)) |
663 | goto out_unlock; | ||
664 | get_tracer(task); | ||
603 | 665 | ||
604 | context = ds_get_context(task); | 666 | error = -EPERM; |
605 | error = ds_validate_access(context, qual); | 667 | if (tracer->ds.context->bts_master) |
606 | if (error < 0) | 668 | goto out_put_tracer; |
607 | goto out; | 669 | tracer->ds.context->bts_master = tracer; |
608 | 670 | ||
609 | base = ds_get(context->ds, qual, ds_buffer_base); | 671 | spin_unlock_irqrestore(&ds_lock, irq); |
610 | idx = base + (index * ds_cfg.sizeof_rec[qual]); | ||
611 | 672 | ||
612 | error = -EINVAL; | ||
613 | if (idx > ds_get(context->ds, qual, ds_absolute_maximum)) | ||
614 | goto out; | ||
615 | 673 | ||
616 | *record = (const void *)idx; | 674 | tracer->trace.read = bts_read; |
617 | error = ds_cfg.sizeof_rec[qual]; | 675 | tracer->trace.write = bts_write; |
618 | out: | ||
619 | ds_put_context(context); | ||
620 | return error; | ||
621 | } | ||
622 | 676 | ||
623 | int ds_access_bts(struct task_struct *task, size_t index, const void **record) | 677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
624 | { | 678 | ds_resume_bts(tracer); |
625 | return ds_access(task, index, record, ds_bts); | ||
626 | } | ||
627 | 679 | ||
628 | int ds_access_pebs(struct task_struct *task, size_t index, const void **record) | 680 | return tracer; |
629 | { | 681 | |
630 | return ds_access(task, index, record, ds_pebs); | 682 | out_put_tracer: |
683 | put_tracer(task); | ||
684 | out_unlock: | ||
685 | spin_unlock_irqrestore(&ds_lock, irq); | ||
686 | ds_put_context(tracer->ds.context); | ||
687 | out_tracer: | ||
688 | kfree(tracer); | ||
689 | out: | ||
690 | return ERR_PTR(error); | ||
631 | } | 691 | } |
632 | 692 | ||
633 | static int ds_write(struct task_struct *task, const void *record, size_t size, | 693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, |
634 | enum ds_qualifier qual, int force) | 694 | void *base, size_t size, |
695 | pebs_ovfl_callback_t ovfl, size_t th, | ||
696 | unsigned int flags) | ||
635 | { | 697 | { |
636 | struct ds_context *context; | 698 | struct pebs_tracer *tracer; |
699 | unsigned long irq; | ||
637 | int error; | 700 | int error; |
638 | 701 | ||
639 | if (!record) | 702 | /* buffer overflow notification is not yet implemented */ |
640 | return -EINVAL; | 703 | error = -EOPNOTSUPP; |
704 | if (ovfl) | ||
705 | goto out; | ||
641 | 706 | ||
642 | error = -EPERM; | 707 | error = -ENOMEM; |
643 | context = ds_get_context(task); | 708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
644 | if (!context) | 709 | if (!tracer) |
645 | goto out; | 710 | goto out; |
711 | tracer->ovfl = ovfl; | ||
646 | 712 | ||
647 | if (!force) { | 713 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
648 | error = ds_validate_access(context, qual); | 714 | ds_pebs, task, base, size, th, flags); |
649 | if (error < 0) | 715 | if (error < 0) |
650 | goto out; | 716 | goto out_tracer; |
651 | } | ||
652 | 717 | ||
653 | error = 0; | 718 | spin_lock_irqsave(&ds_lock, irq); |
654 | while (size) { | ||
655 | unsigned long base, index, end, write_end, int_th; | ||
656 | unsigned long write_size, adj_write_size; | ||
657 | 719 | ||
658 | /* | 720 | error = -EPERM; |
659 | * write as much as possible without producing an | 721 | if (!check_tracer(task)) |
660 | * overflow interrupt. | 722 | goto out_unlock; |
661 | * | 723 | get_tracer(task); |
662 | * interrupt_threshold must either be | ||
663 | * - bigger than absolute_maximum or | ||
664 | * - point to a record between buffer_base and absolute_maximum | ||
665 | * | ||
666 | * index points to a valid record. | ||
667 | */ | ||
668 | base = ds_get(context->ds, qual, ds_buffer_base); | ||
669 | index = ds_get(context->ds, qual, ds_index); | ||
670 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
671 | int_th = ds_get(context->ds, qual, ds_interrupt_threshold); | ||
672 | 724 | ||
673 | write_end = min(end, int_th); | 725 | error = -EPERM; |
726 | if (tracer->ds.context->pebs_master) | ||
727 | goto out_put_tracer; | ||
728 | tracer->ds.context->pebs_master = tracer; | ||
674 | 729 | ||
675 | /* if we are already beyond the interrupt threshold, | 730 | spin_unlock_irqrestore(&ds_lock, irq); |
676 | * we fill the entire buffer */ | ||
677 | if (write_end <= index) | ||
678 | write_end = end; | ||
679 | 731 | ||
680 | if (write_end <= index) | 732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); |
681 | goto out; | 733 | ds_resume_pebs(tracer); |
682 | 734 | ||
683 | write_size = min((unsigned long) size, write_end - index); | 735 | return tracer; |
684 | memcpy((void *)index, record, write_size); | ||
685 | 736 | ||
686 | record = (const char *)record + write_size; | 737 | out_put_tracer: |
687 | size -= write_size; | 738 | put_tracer(task); |
688 | error += write_size; | 739 | out_unlock: |
740 | spin_unlock_irqrestore(&ds_lock, irq); | ||
741 | ds_put_context(tracer->ds.context); | ||
742 | out_tracer: | ||
743 | kfree(tracer); | ||
744 | out: | ||
745 | return ERR_PTR(error); | ||
746 | } | ||
689 | 747 | ||
690 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 748 | void ds_release_bts(struct bts_tracer *tracer) |
691 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 749 | { |
750 | if (!tracer) | ||
751 | return; | ||
692 | 752 | ||
693 | /* zero out trailing bytes */ | 753 | ds_suspend_bts(tracer); |
694 | memset((char *)index + write_size, 0, | ||
695 | adj_write_size - write_size); | ||
696 | index += adj_write_size; | ||
697 | 754 | ||
698 | if (index >= end) | 755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
699 | index = base; | 756 | tracer->ds.context->bts_master = NULL; |
700 | ds_set(context->ds, qual, ds_index, index); | ||
701 | 757 | ||
702 | if (index >= int_th) | 758 | put_tracer(tracer->ds.context->task); |
703 | ds_overflow(task, context, qual); | 759 | ds_put_context(tracer->ds.context); |
704 | } | ||
705 | 760 | ||
706 | out: | 761 | kfree(tracer); |
707 | ds_put_context(context); | ||
708 | return error; | ||
709 | } | 762 | } |
710 | 763 | ||
711 | int ds_write_bts(struct task_struct *task, const void *record, size_t size) | 764 | void ds_suspend_bts(struct bts_tracer *tracer) |
712 | { | 765 | { |
713 | return ds_write(task, record, size, ds_bts, /* force = */ 0); | 766 | struct task_struct *task; |
714 | } | ||
715 | 767 | ||
716 | int ds_write_pebs(struct task_struct *task, const void *record, size_t size) | 768 | if (!tracer) |
717 | { | 769 | return; |
718 | return ds_write(task, record, size, ds_pebs, /* force = */ 0); | ||
719 | } | ||
720 | 770 | ||
721 | int ds_unchecked_write_bts(struct task_struct *task, | 771 | task = tracer->ds.context->task; |
722 | const void *record, size_t size) | ||
723 | { | ||
724 | return ds_write(task, record, size, ds_bts, /* force = */ 1); | ||
725 | } | ||
726 | 772 | ||
727 | int ds_unchecked_write_pebs(struct task_struct *task, | 773 | if (!task || (task == current)) |
728 | const void *record, size_t size) | 774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); |
729 | { | 775 | |
730 | return ds_write(task, record, size, ds_pebs, /* force = */ 1); | 776 | if (task) { |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | ||
778 | |||
779 | if (!task->thread.debugctlmsr) | ||
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
781 | } | ||
731 | } | 782 | } |
732 | 783 | ||
733 | static int ds_reset_or_clear(struct task_struct *task, | 784 | void ds_resume_bts(struct bts_tracer *tracer) |
734 | enum ds_qualifier qual, int clear) | ||
735 | { | 785 | { |
736 | struct ds_context *context; | 786 | struct task_struct *task; |
737 | unsigned long base, end; | 787 | unsigned long control; |
738 | int error; | ||
739 | 788 | ||
740 | context = ds_get_context(task); | 789 | if (!tracer) |
741 | error = ds_validate_access(context, qual); | 790 | return; |
742 | if (error < 0) | ||
743 | goto out; | ||
744 | 791 | ||
745 | base = ds_get(context->ds, qual, ds_buffer_base); | 792 | task = tracer->ds.context->task; |
746 | end = ds_get(context->ds, qual, ds_absolute_maximum); | ||
747 | 793 | ||
748 | if (clear) | 794 | control = ds_cfg.ctl[dsf_bts]; |
749 | memset((void *)base, 0, end - base); | 795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
796 | control |= ds_cfg.ctl[dsf_bts_kernel]; | ||
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | ||
798 | control |= ds_cfg.ctl[dsf_bts_user]; | ||
750 | 799 | ||
751 | ds_set(context->ds, qual, ds_index, base); | 800 | if (task) { |
801 | task->thread.debugctlmsr |= control; | ||
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
803 | } | ||
752 | 804 | ||
753 | error = 0; | 805 | if (!task || (task == current)) |
754 | out: | 806 | update_debugctlmsr(get_debugctlmsr() | control); |
755 | ds_put_context(context); | ||
756 | return error; | ||
757 | } | 807 | } |
758 | 808 | ||
759 | int ds_reset_bts(struct task_struct *task) | 809 | void ds_release_pebs(struct pebs_tracer *tracer) |
760 | { | 810 | { |
761 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 0); | 811 | if (!tracer) |
812 | return; | ||
813 | |||
814 | ds_suspend_pebs(tracer); | ||
815 | |||
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | ||
817 | tracer->ds.context->pebs_master = NULL; | ||
818 | |||
819 | put_tracer(tracer->ds.context->task); | ||
820 | ds_put_context(tracer->ds.context); | ||
821 | |||
822 | kfree(tracer); | ||
762 | } | 823 | } |
763 | 824 | ||
764 | int ds_reset_pebs(struct task_struct *task) | 825 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
765 | { | 826 | { |
766 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 0); | 827 | |
767 | } | 828 | } |
768 | 829 | ||
769 | int ds_clear_bts(struct task_struct *task) | 830 | void ds_resume_pebs(struct pebs_tracer *tracer) |
770 | { | 831 | { |
771 | return ds_reset_or_clear(task, ds_bts, /* clear = */ 1); | 832 | |
772 | } | 833 | } |
773 | 834 | ||
774 | int ds_clear_pebs(struct task_struct *task) | 835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
775 | { | 836 | { |
776 | return ds_reset_or_clear(task, ds_pebs, /* clear = */ 1); | 837 | if (!tracer) |
838 | return NULL; | ||
839 | |||
840 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
841 | return &tracer->trace; | ||
777 | } | 842 | } |
778 | 843 | ||
779 | int ds_get_pebs_reset(struct task_struct *task, u64 *value) | 844 | const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) |
780 | { | 845 | { |
781 | struct ds_context *context; | 846 | if (!tracer) |
782 | int error; | 847 | return NULL; |
848 | |||
849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | ||
850 | tracer->trace.reset_value = | ||
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | ||
783 | 852 | ||
784 | if (!value) | 853 | return &tracer->trace; |
854 | } | ||
855 | |||
856 | int ds_reset_bts(struct bts_tracer *tracer) | ||
857 | { | ||
858 | if (!tracer) | ||
785 | return -EINVAL; | 859 | return -EINVAL; |
786 | 860 | ||
787 | context = ds_get_context(task); | 861 | tracer->trace.ds.top = tracer->trace.ds.begin; |
788 | error = ds_validate_access(context, ds_pebs); | ||
789 | if (error < 0) | ||
790 | goto out; | ||
791 | 862 | ||
792 | *value = *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)); | 863 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
864 | (unsigned long)tracer->trace.ds.top); | ||
793 | 865 | ||
794 | error = 0; | 866 | return 0; |
795 | out: | ||
796 | ds_put_context(context); | ||
797 | return error; | ||
798 | } | 867 | } |
799 | 868 | ||
800 | int ds_set_pebs_reset(struct task_struct *task, u64 value) | 869 | int ds_reset_pebs(struct pebs_tracer *tracer) |
801 | { | 870 | { |
802 | struct ds_context *context; | 871 | if (!tracer) |
803 | int error; | 872 | return -EINVAL; |
804 | 873 | ||
805 | context = ds_get_context(task); | 874 | tracer->trace.ds.top = tracer->trace.ds.begin; |
806 | error = ds_validate_access(context, ds_pebs); | ||
807 | if (error < 0) | ||
808 | goto out; | ||
809 | 875 | ||
810 | *(u64 *)(context->ds + (ds_cfg.sizeof_field * 8)) = value; | 876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | ||
811 | 878 | ||
812 | error = 0; | 879 | return 0; |
813 | out: | 880 | } |
814 | ds_put_context(context); | 881 | |
815 | return error; | 882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) |
883 | { | ||
884 | if (!tracer) | ||
885 | return -EINVAL; | ||
886 | |||
887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | ||
888 | |||
889 | return 0; | ||
816 | } | 890 | } |
817 | 891 | ||
818 | static const struct ds_configuration ds_cfg_var = { | 892 | static const struct ds_configuration ds_cfg_netburst = { |
819 | .sizeof_ds = sizeof(long) * 12, | 893 | .name = "netburst", |
820 | .sizeof_field = sizeof(long), | 894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
821 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | 895 | .ctl[dsf_bts_kernel] = (1 << 5), |
896 | .ctl[dsf_bts_user] = (1 << 6), | ||
897 | |||
898 | .sizeof_field = sizeof(long), | ||
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
822 | #ifdef __i386__ | 900 | #ifdef __i386__ |
823 | .sizeof_rec[ds_pebs] = sizeof(long) * 10 | 901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
824 | #else | 902 | #else |
825 | .sizeof_rec[ds_pebs] = sizeof(long) * 18 | 903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
826 | #endif | 904 | #endif |
827 | }; | 905 | }; |
828 | static const struct ds_configuration ds_cfg_64 = { | 906 | static const struct ds_configuration ds_cfg_pentium_m = { |
829 | .sizeof_ds = 8 * 12, | 907 | .name = "pentium m", |
830 | .sizeof_field = 8, | 908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
831 | .sizeof_rec[ds_bts] = 8 * 3, | 909 | |
910 | .sizeof_field = sizeof(long), | ||
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
832 | #ifdef __i386__ | 912 | #ifdef __i386__ |
833 | .sizeof_rec[ds_pebs] = 8 * 10 | 913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, |
834 | #else | 914 | #else |
835 | .sizeof_rec[ds_pebs] = 8 * 18 | 915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, |
836 | #endif | 916 | #endif |
837 | }; | 917 | }; |
918 | static const struct ds_configuration ds_cfg_core2 = { | ||
919 | .name = "core 2", | ||
920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
921 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
922 | .ctl[dsf_bts_user] = (1 << 10), | ||
923 | |||
924 | .sizeof_field = 8, | ||
925 | .sizeof_rec[ds_bts] = 8 * 3, | ||
926 | .sizeof_rec[ds_pebs] = 8 * 18, | ||
927 | }; | ||
838 | 928 | ||
839 | static inline void | 929 | static void |
840 | ds_configure(const struct ds_configuration *cfg) | 930 | ds_configure(const struct ds_configuration *cfg) |
841 | { | 931 | { |
932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | ||
842 | ds_cfg = *cfg; | 933 | ds_cfg = *cfg; |
934 | |||
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | ||
936 | |||
937 | if (!cpu_has_bts) { | ||
938 | ds_cfg.ctl[dsf_bts] = 0; | ||
939 | printk(KERN_INFO "[ds] bts not available\n"); | ||
940 | } | ||
941 | if (!cpu_has_pebs) | ||
942 | printk(KERN_INFO "[ds] pebs not available\n"); | ||
943 | |||
944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | ||
843 | } | 945 | } |
844 | 946 | ||
845 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
@@ -847,16 +949,15 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
847 | switch (c->x86) { | 949 | switch (c->x86) { |
848 | case 0x6: | 950 | case 0x6: |
849 | switch (c->x86_model) { | 951 | switch (c->x86_model) { |
952 | case 0 ... 0xC: | ||
953 | /* sorry, don't know about them */ | ||
954 | break; | ||
850 | case 0xD: | 955 | case 0xD: |
851 | case 0xE: /* Pentium M */ | 956 | case 0xE: /* Pentium M */ |
852 | ds_configure(&ds_cfg_var); | 957 | ds_configure(&ds_cfg_pentium_m); |
853 | break; | 958 | break; |
854 | case 0xF: /* Core2 */ | 959 | default: /* Core2, Atom, ... */ |
855 | case 0x1C: /* Atom */ | 960 | ds_configure(&ds_cfg_core2); |
856 | ds_configure(&ds_cfg_64); | ||
857 | break; | ||
858 | default: | ||
859 | /* sorry, don't know about them */ | ||
860 | break; | 961 | break; |
861 | } | 962 | } |
862 | break; | 963 | break; |
@@ -865,7 +966,7 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
865 | case 0x0: | 966 | case 0x0: |
866 | case 0x1: | 967 | case 0x1: |
867 | case 0x2: /* Netburst */ | 968 | case 0x2: /* Netburst */ |
868 | ds_configure(&ds_cfg_var); | 969 | ds_configure(&ds_cfg_netburst); |
869 | break; | 970 | break; |
870 | default: | 971 | default: |
871 | /* sorry, don't know about them */ | 972 | /* sorry, don't know about them */ |
@@ -878,12 +979,52 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
878 | } | 979 | } |
879 | } | 980 | } |
880 | 981 | ||
881 | void ds_free(struct ds_context *context) | 982 | /* |
983 | * Change the DS configuration from tracing prev to tracing next. | ||
984 | */ | ||
985 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | ||
986 | { | ||
987 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | ||
988 | struct ds_context *next_ctx = next->thread.ds_ctx; | ||
989 | |||
990 | if (prev_ctx) { | ||
991 | update_debugctlmsr(0); | ||
992 | |||
993 | if (prev_ctx->bts_master && | ||
994 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
995 | struct bts_struct ts = { | ||
996 | .qualifier = bts_task_departs, | ||
997 | .variant.timestamp.jiffies = jiffies_64, | ||
998 | .variant.timestamp.pid = prev->pid | ||
999 | }; | ||
1000 | bts_write(prev_ctx->bts_master, &ts); | ||
1001 | } | ||
1002 | } | ||
1003 | |||
1004 | if (next_ctx) { | ||
1005 | if (next_ctx->bts_master && | ||
1006 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
1007 | struct bts_struct ts = { | ||
1008 | .qualifier = bts_task_arrives, | ||
1009 | .variant.timestamp.jiffies = jiffies_64, | ||
1010 | .variant.timestamp.pid = next->pid | ||
1011 | }; | ||
1012 | bts_write(next_ctx->bts_master, &ts); | ||
1013 | } | ||
1014 | |||
1015 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | ||
1016 | } | ||
1017 | |||
1018 | update_debugctlmsr(next->thread.debugctlmsr); | ||
1019 | } | ||
1020 | |||
1021 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | ||
1022 | { | ||
1023 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | ||
1024 | tsk->thread.ds_ctx = NULL; | ||
1025 | } | ||
1026 | |||
1027 | void ds_exit_thread(struct task_struct *tsk) | ||
882 | { | 1028 | { |
883 | /* This is called when the task owning the parameter context | 1029 | WARN_ON(tsk->thread.ds_ctx); |
884 | * is dying. There should not be any user of that context left | ||
885 | * to disturb us, anymore. */ | ||
886 | unsigned long leftovers = context->count; | ||
887 | while (leftovers--) | ||
888 | ds_put_context(context); | ||
889 | } | 1030 | } |
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c new file mode 100644 index 000000000000..6b1f6f6f8661 --- /dev/null +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -0,0 +1,351 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | #include <linux/kallsyms.h> | ||
6 | #include <linux/kprobes.h> | ||
7 | #include <linux/uaccess.h> | ||
8 | #include <linux/utsname.h> | ||
9 | #include <linux/hardirq.h> | ||
10 | #include <linux/kdebug.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/ptrace.h> | ||
13 | #include <linux/kexec.h> | ||
14 | #include <linux/bug.h> | ||
15 | #include <linux/nmi.h> | ||
16 | #include <linux/sysfs.h> | ||
17 | |||
18 | #include <asm/stacktrace.h> | ||
19 | |||
20 | #include "dumpstack.h" | ||
21 | |||
22 | int panic_on_unrecovered_nmi; | ||
23 | unsigned int code_bytes = 64; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static int die_counter; | ||
26 | |||
27 | void printk_address(unsigned long address, int reliable) | ||
28 | { | ||
29 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
30 | reliable ? "" : "? ", (void *) address); | ||
31 | } | ||
32 | |||
33 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
34 | static void | ||
35 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
36 | const struct stacktrace_ops *ops, | ||
37 | struct thread_info *tinfo, int *graph) | ||
38 | { | ||
39 | struct task_struct *task = tinfo->task; | ||
40 | unsigned long ret_addr; | ||
41 | int index = task->curr_ret_stack; | ||
42 | |||
43 | if (addr != (unsigned long)return_to_handler) | ||
44 | return; | ||
45 | |||
46 | if (!task->ret_stack || index < *graph) | ||
47 | return; | ||
48 | |||
49 | index -= *graph; | ||
50 | ret_addr = task->ret_stack[index].ret; | ||
51 | |||
52 | ops->address(data, ret_addr, 1); | ||
53 | |||
54 | (*graph)++; | ||
55 | } | ||
56 | #else | ||
57 | static inline void | ||
58 | print_ftrace_graph_addr(unsigned long addr, void *data, | ||
59 | const struct stacktrace_ops *ops, | ||
60 | struct thread_info *tinfo, int *graph) | ||
61 | { } | ||
62 | #endif | ||
63 | |||
64 | /* | ||
65 | * x86-64 can have up to three kernel stacks: | ||
66 | * process stack | ||
67 | * interrupt stack | ||
68 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | ||
69 | */ | ||
70 | |||
71 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
72 | void *p, unsigned int size, void *end) | ||
73 | { | ||
74 | void *t = tinfo; | ||
75 | if (end) { | ||
76 | if (p < end && p >= (end-THREAD_SIZE)) | ||
77 | return 1; | ||
78 | else | ||
79 | return 0; | ||
80 | } | ||
81 | return p > t && p < t + THREAD_SIZE - size; | ||
82 | } | ||
83 | |||
84 | unsigned long | ||
85 | print_context_stack(struct thread_info *tinfo, | ||
86 | unsigned long *stack, unsigned long bp, | ||
87 | const struct stacktrace_ops *ops, void *data, | ||
88 | unsigned long *end, int *graph) | ||
89 | { | ||
90 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
91 | |||
92 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
93 | unsigned long addr; | ||
94 | |||
95 | addr = *stack; | ||
96 | if (__kernel_text_address(addr)) { | ||
97 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
98 | ops->address(data, addr, 1); | ||
99 | frame = frame->next_frame; | ||
100 | bp = (unsigned long) frame; | ||
101 | } else { | ||
102 | ops->address(data, addr, bp == 0); | ||
103 | } | ||
104 | print_ftrace_graph_addr(addr, data, ops, tinfo, graph); | ||
105 | } | ||
106 | stack++; | ||
107 | } | ||
108 | return bp; | ||
109 | } | ||
110 | |||
111 | |||
112 | static void | ||
113 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
114 | { | ||
115 | printk(data); | ||
116 | print_symbol(msg, symbol); | ||
117 | printk("\n"); | ||
118 | } | ||
119 | |||
120 | static void print_trace_warning(void *data, char *msg) | ||
121 | { | ||
122 | printk("%s%s\n", (char *)data, msg); | ||
123 | } | ||
124 | |||
125 | static int print_trace_stack(void *data, char *name) | ||
126 | { | ||
127 | printk("%s <%s> ", (char *)data, name); | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | /* | ||
132 | * Print one address/symbol entries per line. | ||
133 | */ | ||
134 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
135 | { | ||
136 | touch_nmi_watchdog(); | ||
137 | printk(data); | ||
138 | printk_address(addr, reliable); | ||
139 | } | ||
140 | |||
141 | static const struct stacktrace_ops print_trace_ops = { | ||
142 | .warning = print_trace_warning, | ||
143 | .warning_symbol = print_trace_warning_symbol, | ||
144 | .stack = print_trace_stack, | ||
145 | .address = print_trace_address, | ||
146 | }; | ||
147 | |||
148 | void | ||
149 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
150 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
151 | { | ||
152 | printk("%sCall Trace:\n", log_lvl); | ||
153 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
154 | } | ||
155 | |||
156 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
157 | unsigned long *stack, unsigned long bp) | ||
158 | { | ||
159 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
160 | } | ||
161 | |||
162 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
163 | { | ||
164 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
165 | } | ||
166 | |||
167 | /* | ||
168 | * The architecture-independent dump_stack generator | ||
169 | */ | ||
170 | void dump_stack(void) | ||
171 | { | ||
172 | unsigned long bp = 0; | ||
173 | unsigned long stack; | ||
174 | |||
175 | #ifdef CONFIG_FRAME_POINTER | ||
176 | if (!bp) | ||
177 | get_bp(bp); | ||
178 | #endif | ||
179 | |||
180 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
181 | current->pid, current->comm, print_tainted(), | ||
182 | init_utsname()->release, | ||
183 | (int)strcspn(init_utsname()->version, " "), | ||
184 | init_utsname()->version); | ||
185 | show_trace(NULL, NULL, &stack, bp); | ||
186 | } | ||
187 | EXPORT_SYMBOL(dump_stack); | ||
188 | |||
189 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
190 | static int die_owner = -1; | ||
191 | static unsigned int die_nest_count; | ||
192 | |||
193 | unsigned __kprobes long oops_begin(void) | ||
194 | { | ||
195 | int cpu; | ||
196 | unsigned long flags; | ||
197 | |||
198 | oops_enter(); | ||
199 | |||
200 | /* racy, but better than risking deadlock. */ | ||
201 | raw_local_irq_save(flags); | ||
202 | cpu = smp_processor_id(); | ||
203 | if (!__raw_spin_trylock(&die_lock)) { | ||
204 | if (cpu == die_owner) | ||
205 | /* nested oops. should stop eventually */; | ||
206 | else | ||
207 | __raw_spin_lock(&die_lock); | ||
208 | } | ||
209 | die_nest_count++; | ||
210 | die_owner = cpu; | ||
211 | console_verbose(); | ||
212 | bust_spinlocks(1); | ||
213 | return flags; | ||
214 | } | ||
215 | |||
216 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
217 | { | ||
218 | if (regs && kexec_should_crash(current)) | ||
219 | crash_kexec(regs); | ||
220 | |||
221 | bust_spinlocks(0); | ||
222 | die_owner = -1; | ||
223 | add_taint(TAINT_DIE); | ||
224 | die_nest_count--; | ||
225 | if (!die_nest_count) | ||
226 | /* Nest count reaches zero, release the lock. */ | ||
227 | __raw_spin_unlock(&die_lock); | ||
228 | raw_local_irq_restore(flags); | ||
229 | oops_exit(); | ||
230 | |||
231 | if (!signr) | ||
232 | return; | ||
233 | if (in_interrupt()) | ||
234 | panic("Fatal exception in interrupt"); | ||
235 | if (panic_on_oops) | ||
236 | panic("Fatal exception"); | ||
237 | do_exit(signr); | ||
238 | } | ||
239 | |||
240 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
241 | { | ||
242 | #ifdef CONFIG_X86_32 | ||
243 | unsigned short ss; | ||
244 | unsigned long sp; | ||
245 | #endif | ||
246 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
247 | #ifdef CONFIG_PREEMPT | ||
248 | printk("PREEMPT "); | ||
249 | #endif | ||
250 | #ifdef CONFIG_SMP | ||
251 | printk("SMP "); | ||
252 | #endif | ||
253 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
254 | printk("DEBUG_PAGEALLOC"); | ||
255 | #endif | ||
256 | printk("\n"); | ||
257 | sysfs_printk_last_file(); | ||
258 | if (notify_die(DIE_OOPS, str, regs, err, | ||
259 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
260 | return 1; | ||
261 | |||
262 | show_registers(regs); | ||
263 | #ifdef CONFIG_X86_32 | ||
264 | sp = (unsigned long) (®s->sp); | ||
265 | savesegment(ss, ss); | ||
266 | if (user_mode(regs)) { | ||
267 | sp = regs->sp; | ||
268 | ss = regs->ss & 0xffff; | ||
269 | } | ||
270 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
271 | print_symbol("%s", regs->ip); | ||
272 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
273 | #else | ||
274 | /* Executive summary in case the oops scrolled away */ | ||
275 | printk(KERN_ALERT "RIP "); | ||
276 | printk_address(regs->ip, 1); | ||
277 | printk(" RSP <%016lx>\n", regs->sp); | ||
278 | #endif | ||
279 | return 0; | ||
280 | } | ||
281 | |||
282 | /* | ||
283 | * This is gone through when something in the kernel has done something bad | ||
284 | * and is about to be terminated: | ||
285 | */ | ||
286 | void die(const char *str, struct pt_regs *regs, long err) | ||
287 | { | ||
288 | unsigned long flags = oops_begin(); | ||
289 | int sig = SIGSEGV; | ||
290 | |||
291 | if (!user_mode_vm(regs)) | ||
292 | report_bug(regs->ip, regs); | ||
293 | |||
294 | if (__die(str, regs, err)) | ||
295 | sig = 0; | ||
296 | oops_end(flags, regs, sig); | ||
297 | } | ||
298 | |||
299 | void notrace __kprobes | ||
300 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
301 | { | ||
302 | unsigned long flags; | ||
303 | |||
304 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
305 | return; | ||
306 | |||
307 | /* | ||
308 | * We are in trouble anyway, lets at least try | ||
309 | * to get a message out. | ||
310 | */ | ||
311 | flags = oops_begin(); | ||
312 | printk(KERN_EMERG "%s", str); | ||
313 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
314 | smp_processor_id(), regs->ip); | ||
315 | show_registers(regs); | ||
316 | oops_end(flags, regs, 0); | ||
317 | if (do_panic || panic_on_oops) | ||
318 | panic("Non maskable interrupt"); | ||
319 | nmi_exit(); | ||
320 | local_irq_enable(); | ||
321 | do_exit(SIGBUS); | ||
322 | } | ||
323 | |||
324 | static int __init oops_setup(char *s) | ||
325 | { | ||
326 | if (!s) | ||
327 | return -EINVAL; | ||
328 | if (!strcmp(s, "panic")) | ||
329 | panic_on_oops = 1; | ||
330 | return 0; | ||
331 | } | ||
332 | early_param("oops", oops_setup); | ||
333 | |||
334 | static int __init kstack_setup(char *s) | ||
335 | { | ||
336 | if (!s) | ||
337 | return -EINVAL; | ||
338 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
339 | return 0; | ||
340 | } | ||
341 | early_param("kstack", kstack_setup); | ||
342 | |||
343 | static int __init code_bytes_setup(char *s) | ||
344 | { | ||
345 | code_bytes = simple_strtoul(s, NULL, 0); | ||
346 | if (code_bytes > 8192) | ||
347 | code_bytes = 8192; | ||
348 | |||
349 | return 1; | ||
350 | } | ||
351 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h new file mode 100644 index 000000000000..da87590b8698 --- /dev/null +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -0,0 +1,39 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs | ||
4 | */ | ||
5 | |||
6 | #ifndef DUMPSTACK_H | ||
7 | #define DUMPSTACK_H | ||
8 | |||
9 | #ifdef CONFIG_X86_32 | ||
10 | #define STACKSLOTS_PER_LINE 8 | ||
11 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
12 | #else | ||
13 | #define STACKSLOTS_PER_LINE 4 | ||
14 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
15 | #endif | ||
16 | |||
17 | extern unsigned long | ||
18 | print_context_stack(struct thread_info *tinfo, | ||
19 | unsigned long *stack, unsigned long bp, | ||
20 | const struct stacktrace_ops *ops, void *data, | ||
21 | unsigned long *end, int *graph); | ||
22 | |||
23 | extern void | ||
24 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
25 | unsigned long *stack, unsigned long bp, char *log_lvl); | ||
26 | |||
27 | extern void | ||
28 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
29 | unsigned long *sp, unsigned long bp, char *log_lvl); | ||
30 | |||
31 | extern unsigned int code_bytes; | ||
32 | extern int kstack_depth_to_print; | ||
33 | |||
34 | /* The form of the top of the frame on the stack */ | ||
35 | struct stack_frame { | ||
36 | struct stack_frame *next_frame; | ||
37 | unsigned long return_address; | ||
38 | }; | ||
39 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index b3614752197b..d593cd1f58dc 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -17,69 +17,14 @@ | |||
17 | 17 | ||
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | ||
20 | #define STACKSLOTS_PER_LINE 8 | 20 | #include "dumpstack.h" |
21 | #define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :) | ||
22 | |||
23 | int panic_on_unrecovered_nmi; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static unsigned int code_bytes = 64; | ||
26 | static int die_counter; | ||
27 | |||
28 | void printk_address(unsigned long address, int reliable) | ||
29 | { | ||
30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
31 | reliable ? "" : "? ", (void *) address); | ||
32 | } | ||
33 | |||
34 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
35 | void *p, unsigned int size, void *end) | ||
36 | { | ||
37 | void *t = tinfo; | ||
38 | if (end) { | ||
39 | if (p < end && p >= (end-THREAD_SIZE)) | ||
40 | return 1; | ||
41 | else | ||
42 | return 0; | ||
43 | } | ||
44 | return p > t && p < t + THREAD_SIZE - size; | ||
45 | } | ||
46 | |||
47 | /* The form of the top of the frame on the stack */ | ||
48 | struct stack_frame { | ||
49 | struct stack_frame *next_frame; | ||
50 | unsigned long return_address; | ||
51 | }; | ||
52 | |||
53 | static inline unsigned long | ||
54 | print_context_stack(struct thread_info *tinfo, | ||
55 | unsigned long *stack, unsigned long bp, | ||
56 | const struct stacktrace_ops *ops, void *data, | ||
57 | unsigned long *end) | ||
58 | { | ||
59 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
60 | |||
61 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
62 | unsigned long addr; | ||
63 | |||
64 | addr = *stack; | ||
65 | if (__kernel_text_address(addr)) { | ||
66 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
67 | ops->address(data, addr, 1); | ||
68 | frame = frame->next_frame; | ||
69 | bp = (unsigned long) frame; | ||
70 | } else { | ||
71 | ops->address(data, addr, bp == 0); | ||
72 | } | ||
73 | } | ||
74 | stack++; | ||
75 | } | ||
76 | return bp; | ||
77 | } | ||
78 | 21 | ||
79 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 22 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
80 | unsigned long *stack, unsigned long bp, | 23 | unsigned long *stack, unsigned long bp, |
81 | const struct stacktrace_ops *ops, void *data) | 24 | const struct stacktrace_ops *ops, void *data) |
82 | { | 25 | { |
26 | int graph = 0; | ||
27 | |||
83 | if (!task) | 28 | if (!task) |
84 | task = current; | 29 | task = current; |
85 | 30 | ||
@@ -107,7 +52,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
107 | 52 | ||
108 | context = (struct thread_info *) | 53 | context = (struct thread_info *) |
109 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 54 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
110 | bp = print_context_stack(context, stack, bp, ops, data, NULL); | 55 | bp = print_context_stack(context, stack, bp, ops, |
56 | data, NULL, &graph); | ||
111 | 57 | ||
112 | stack = (unsigned long *)context->previous_esp; | 58 | stack = (unsigned long *)context->previous_esp; |
113 | if (!stack) | 59 | if (!stack) |
@@ -119,57 +65,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
119 | } | 65 | } |
120 | EXPORT_SYMBOL(dump_trace); | 66 | EXPORT_SYMBOL(dump_trace); |
121 | 67 | ||
122 | static void | 68 | void |
123 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
124 | { | ||
125 | printk(data); | ||
126 | print_symbol(msg, symbol); | ||
127 | printk("\n"); | ||
128 | } | ||
129 | |||
130 | static void print_trace_warning(void *data, char *msg) | ||
131 | { | ||
132 | printk("%s%s\n", (char *)data, msg); | ||
133 | } | ||
134 | |||
135 | static int print_trace_stack(void *data, char *name) | ||
136 | { | ||
137 | printk("%s <%s> ", (char *)data, name); | ||
138 | return 0; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * Print one address/symbol entries per line. | ||
143 | */ | ||
144 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
145 | { | ||
146 | touch_nmi_watchdog(); | ||
147 | printk(data); | ||
148 | printk_address(addr, reliable); | ||
149 | } | ||
150 | |||
151 | static const struct stacktrace_ops print_trace_ops = { | ||
152 | .warning = print_trace_warning, | ||
153 | .warning_symbol = print_trace_warning_symbol, | ||
154 | .stack = print_trace_stack, | ||
155 | .address = print_trace_address, | ||
156 | }; | ||
157 | |||
158 | static void | ||
159 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
160 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
161 | { | ||
162 | printk("%sCall Trace:\n", log_lvl); | ||
163 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
164 | } | ||
165 | |||
166 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
167 | unsigned long *stack, unsigned long bp) | ||
168 | { | ||
169 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
170 | } | ||
171 | |||
172 | static void | ||
173 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 69 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
174 | unsigned long *sp, unsigned long bp, char *log_lvl) | 70 | unsigned long *sp, unsigned long bp, char *log_lvl) |
175 | { | 71 | { |
@@ -196,33 +92,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
196 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 92 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
197 | } | 93 | } |
198 | 94 | ||
199 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
200 | { | ||
201 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * The architecture-independent dump_stack generator | ||
206 | */ | ||
207 | void dump_stack(void) | ||
208 | { | ||
209 | unsigned long bp = 0; | ||
210 | unsigned long stack; | ||
211 | |||
212 | #ifdef CONFIG_FRAME_POINTER | ||
213 | if (!bp) | ||
214 | get_bp(bp); | ||
215 | #endif | ||
216 | |||
217 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
218 | current->pid, current->comm, print_tainted(), | ||
219 | init_utsname()->release, | ||
220 | (int)strcspn(init_utsname()->version, " "), | ||
221 | init_utsname()->version); | ||
222 | show_trace(NULL, NULL, &stack, bp); | ||
223 | } | ||
224 | |||
225 | EXPORT_SYMBOL(dump_stack); | ||
226 | 95 | ||
227 | void show_registers(struct pt_regs *regs) | 96 | void show_registers(struct pt_regs *regs) |
228 | { | 97 | { |
@@ -283,167 +152,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
283 | return ud2 == 0x0b0f; | 152 | return ud2 == 0x0b0f; |
284 | } | 153 | } |
285 | 154 | ||
286 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
287 | static int die_owner = -1; | ||
288 | static unsigned int die_nest_count; | ||
289 | |||
290 | unsigned __kprobes long oops_begin(void) | ||
291 | { | ||
292 | unsigned long flags; | ||
293 | |||
294 | oops_enter(); | ||
295 | |||
296 | if (die_owner != raw_smp_processor_id()) { | ||
297 | console_verbose(); | ||
298 | raw_local_irq_save(flags); | ||
299 | __raw_spin_lock(&die_lock); | ||
300 | die_owner = smp_processor_id(); | ||
301 | die_nest_count = 0; | ||
302 | bust_spinlocks(1); | ||
303 | } else { | ||
304 | raw_local_irq_save(flags); | ||
305 | } | ||
306 | die_nest_count++; | ||
307 | return flags; | ||
308 | } | ||
309 | |||
310 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
311 | { | ||
312 | bust_spinlocks(0); | ||
313 | die_owner = -1; | ||
314 | add_taint(TAINT_DIE); | ||
315 | __raw_spin_unlock(&die_lock); | ||
316 | raw_local_irq_restore(flags); | ||
317 | |||
318 | if (!regs) | ||
319 | return; | ||
320 | |||
321 | if (kexec_should_crash(current)) | ||
322 | crash_kexec(regs); | ||
323 | if (in_interrupt()) | ||
324 | panic("Fatal exception in interrupt"); | ||
325 | if (panic_on_oops) | ||
326 | panic("Fatal exception"); | ||
327 | oops_exit(); | ||
328 | do_exit(signr); | ||
329 | } | ||
330 | |||
331 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
332 | { | ||
333 | unsigned short ss; | ||
334 | unsigned long sp; | ||
335 | |||
336 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
337 | #ifdef CONFIG_PREEMPT | ||
338 | printk("PREEMPT "); | ||
339 | #endif | ||
340 | #ifdef CONFIG_SMP | ||
341 | printk("SMP "); | ||
342 | #endif | ||
343 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
344 | printk("DEBUG_PAGEALLOC"); | ||
345 | #endif | ||
346 | printk("\n"); | ||
347 | sysfs_printk_last_file(); | ||
348 | if (notify_die(DIE_OOPS, str, regs, err, | ||
349 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
350 | return 1; | ||
351 | |||
352 | show_registers(regs); | ||
353 | /* Executive summary in case the oops scrolled away */ | ||
354 | sp = (unsigned long) (®s->sp); | ||
355 | savesegment(ss, ss); | ||
356 | if (user_mode(regs)) { | ||
357 | sp = regs->sp; | ||
358 | ss = regs->ss & 0xffff; | ||
359 | } | ||
360 | printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip); | ||
361 | print_symbol("%s", regs->ip); | ||
362 | printk(" SS:ESP %04x:%08lx\n", ss, sp); | ||
363 | return 0; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * This is gone through when something in the kernel has done something bad | ||
368 | * and is about to be terminated: | ||
369 | */ | ||
370 | void die(const char *str, struct pt_regs *regs, long err) | ||
371 | { | ||
372 | unsigned long flags = oops_begin(); | ||
373 | |||
374 | if (die_nest_count < 3) { | ||
375 | report_bug(regs->ip, regs); | ||
376 | |||
377 | if (__die(str, regs, err)) | ||
378 | regs = NULL; | ||
379 | } else { | ||
380 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | ||
381 | } | ||
382 | |||
383 | oops_end(flags, regs, SIGSEGV); | ||
384 | } | ||
385 | |||
386 | static DEFINE_SPINLOCK(nmi_print_lock); | ||
387 | |||
388 | void notrace __kprobes | ||
389 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
390 | { | ||
391 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
392 | return; | ||
393 | |||
394 | spin_lock(&nmi_print_lock); | ||
395 | /* | ||
396 | * We are in trouble anyway, lets at least try | ||
397 | * to get a message out: | ||
398 | */ | ||
399 | bust_spinlocks(1); | ||
400 | printk(KERN_EMERG "%s", str); | ||
401 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
402 | smp_processor_id(), regs->ip); | ||
403 | show_registers(regs); | ||
404 | if (do_panic) | ||
405 | panic("Non maskable interrupt"); | ||
406 | console_silent(); | ||
407 | spin_unlock(&nmi_print_lock); | ||
408 | |||
409 | /* | ||
410 | * If we are in kernel we are probably nested up pretty bad | ||
411 | * and might aswell get out now while we still can: | ||
412 | */ | ||
413 | if (!user_mode_vm(regs)) { | ||
414 | current->thread.trap_no = 2; | ||
415 | crash_kexec(regs); | ||
416 | } | ||
417 | |||
418 | bust_spinlocks(0); | ||
419 | do_exit(SIGSEGV); | ||
420 | } | ||
421 | |||
422 | static int __init oops_setup(char *s) | ||
423 | { | ||
424 | if (!s) | ||
425 | return -EINVAL; | ||
426 | if (!strcmp(s, "panic")) | ||
427 | panic_on_oops = 1; | ||
428 | return 0; | ||
429 | } | ||
430 | early_param("oops", oops_setup); | ||
431 | |||
432 | static int __init kstack_setup(char *s) | ||
433 | { | ||
434 | if (!s) | ||
435 | return -EINVAL; | ||
436 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
437 | return 0; | ||
438 | } | ||
439 | early_param("kstack", kstack_setup); | ||
440 | |||
441 | static int __init code_bytes_setup(char *s) | ||
442 | { | ||
443 | code_bytes = simple_strtoul(s, NULL, 0); | ||
444 | if (code_bytes > 8192) | ||
445 | code_bytes = 8192; | ||
446 | |||
447 | return 1; | ||
448 | } | ||
449 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 96a5db7da8a7..c302d0707048 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -17,19 +17,7 @@ | |||
17 | 17 | ||
18 | #include <asm/stacktrace.h> | 18 | #include <asm/stacktrace.h> |
19 | 19 | ||
20 | #define STACKSLOTS_PER_LINE 4 | 20 | #include "dumpstack.h" |
21 | #define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :) | ||
22 | |||
23 | int panic_on_unrecovered_nmi; | ||
24 | int kstack_depth_to_print = 3 * STACKSLOTS_PER_LINE; | ||
25 | static unsigned int code_bytes = 64; | ||
26 | static int die_counter; | ||
27 | |||
28 | void printk_address(unsigned long address, int reliable) | ||
29 | { | ||
30 | printk(" [<%p>] %s%pS\n", (void *) address, | ||
31 | reliable ? "" : "? ", (void *) address); | ||
32 | } | ||
33 | 21 | ||
34 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 22 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
35 | unsigned *usedp, char **idp) | 23 | unsigned *usedp, char **idp) |
@@ -113,51 +101,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
113 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack | 101 | * severe exception (double fault, nmi, stack fault, debug, mce) hardware stack |
114 | */ | 102 | */ |
115 | 103 | ||
116 | static inline int valid_stack_ptr(struct thread_info *tinfo, | ||
117 | void *p, unsigned int size, void *end) | ||
118 | { | ||
119 | void *t = tinfo; | ||
120 | if (end) { | ||
121 | if (p < end && p >= (end-THREAD_SIZE)) | ||
122 | return 1; | ||
123 | else | ||
124 | return 0; | ||
125 | } | ||
126 | return p > t && p < t + THREAD_SIZE - size; | ||
127 | } | ||
128 | |||
129 | /* The form of the top of the frame on the stack */ | ||
130 | struct stack_frame { | ||
131 | struct stack_frame *next_frame; | ||
132 | unsigned long return_address; | ||
133 | }; | ||
134 | |||
135 | static inline unsigned long | ||
136 | print_context_stack(struct thread_info *tinfo, | ||
137 | unsigned long *stack, unsigned long bp, | ||
138 | const struct stacktrace_ops *ops, void *data, | ||
139 | unsigned long *end) | ||
140 | { | ||
141 | struct stack_frame *frame = (struct stack_frame *)bp; | ||
142 | |||
143 | while (valid_stack_ptr(tinfo, stack, sizeof(*stack), end)) { | ||
144 | unsigned long addr; | ||
145 | |||
146 | addr = *stack; | ||
147 | if (__kernel_text_address(addr)) { | ||
148 | if ((unsigned long) stack == bp + sizeof(long)) { | ||
149 | ops->address(data, addr, 1); | ||
150 | frame = frame->next_frame; | ||
151 | bp = (unsigned long) frame; | ||
152 | } else { | ||
153 | ops->address(data, addr, bp == 0); | ||
154 | } | ||
155 | } | ||
156 | stack++; | ||
157 | } | ||
158 | return bp; | ||
159 | } | ||
160 | |||
161 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 104 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
162 | unsigned long *stack, unsigned long bp, | 105 | unsigned long *stack, unsigned long bp, |
163 | const struct stacktrace_ops *ops, void *data) | 106 | const struct stacktrace_ops *ops, void *data) |
@@ -166,6 +109,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
166 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; | 109 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
167 | unsigned used = 0; | 110 | unsigned used = 0; |
168 | struct thread_info *tinfo; | 111 | struct thread_info *tinfo; |
112 | int graph = 0; | ||
169 | 113 | ||
170 | if (!task) | 114 | if (!task) |
171 | task = current; | 115 | task = current; |
@@ -206,7 +150,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
206 | break; | 150 | break; |
207 | 151 | ||
208 | bp = print_context_stack(tinfo, stack, bp, ops, | 152 | bp = print_context_stack(tinfo, stack, bp, ops, |
209 | data, estack_end); | 153 | data, estack_end, &graph); |
210 | ops->stack(data, "<EOE>"); | 154 | ops->stack(data, "<EOE>"); |
211 | /* | 155 | /* |
212 | * We link to the next stack via the | 156 | * We link to the next stack via the |
@@ -225,7 +169,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
225 | if (ops->stack(data, "IRQ") < 0) | 169 | if (ops->stack(data, "IRQ") < 0) |
226 | break; | 170 | break; |
227 | bp = print_context_stack(tinfo, stack, bp, | 171 | bp = print_context_stack(tinfo, stack, bp, |
228 | ops, data, irqstack_end); | 172 | ops, data, irqstack_end, &graph); |
229 | /* | 173 | /* |
230 | * We link to the next stack (which would be | 174 | * We link to the next stack (which would be |
231 | * the process stack normally) the last | 175 | * the process stack normally) the last |
@@ -243,62 +187,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
243 | /* | 187 | /* |
244 | * This handles the process stack: | 188 | * This handles the process stack: |
245 | */ | 189 | */ |
246 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL); | 190 | bp = print_context_stack(tinfo, stack, bp, ops, data, NULL, &graph); |
247 | put_cpu(); | 191 | put_cpu(); |
248 | } | 192 | } |
249 | EXPORT_SYMBOL(dump_trace); | 193 | EXPORT_SYMBOL(dump_trace); |
250 | 194 | ||
251 | static void | 195 | void |
252 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
253 | { | ||
254 | printk(data); | ||
255 | print_symbol(msg, symbol); | ||
256 | printk("\n"); | ||
257 | } | ||
258 | |||
259 | static void print_trace_warning(void *data, char *msg) | ||
260 | { | ||
261 | printk("%s%s\n", (char *)data, msg); | ||
262 | } | ||
263 | |||
264 | static int print_trace_stack(void *data, char *name) | ||
265 | { | ||
266 | printk("%s <%s> ", (char *)data, name); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | /* | ||
271 | * Print one address/symbol entries per line. | ||
272 | */ | ||
273 | static void print_trace_address(void *data, unsigned long addr, int reliable) | ||
274 | { | ||
275 | touch_nmi_watchdog(); | ||
276 | printk(data); | ||
277 | printk_address(addr, reliable); | ||
278 | } | ||
279 | |||
280 | static const struct stacktrace_ops print_trace_ops = { | ||
281 | .warning = print_trace_warning, | ||
282 | .warning_symbol = print_trace_warning_symbol, | ||
283 | .stack = print_trace_stack, | ||
284 | .address = print_trace_address, | ||
285 | }; | ||
286 | |||
287 | static void | ||
288 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
289 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
290 | { | ||
291 | printk("%sCall Trace:\n", log_lvl); | ||
292 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | ||
293 | } | ||
294 | |||
295 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
296 | unsigned long *stack, unsigned long bp) | ||
297 | { | ||
298 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
299 | } | ||
300 | |||
301 | static void | ||
302 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | 196 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
303 | unsigned long *sp, unsigned long bp, char *log_lvl) | 197 | unsigned long *sp, unsigned long bp, char *log_lvl) |
304 | { | 198 | { |
@@ -342,33 +236,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
342 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); | 236 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
343 | } | 237 | } |
344 | 238 | ||
345 | void show_stack(struct task_struct *task, unsigned long *sp) | ||
346 | { | ||
347 | show_stack_log_lvl(task, NULL, sp, 0, ""); | ||
348 | } | ||
349 | |||
350 | /* | ||
351 | * The architecture-independent dump_stack generator | ||
352 | */ | ||
353 | void dump_stack(void) | ||
354 | { | ||
355 | unsigned long bp = 0; | ||
356 | unsigned long stack; | ||
357 | |||
358 | #ifdef CONFIG_FRAME_POINTER | ||
359 | if (!bp) | ||
360 | get_bp(bp); | ||
361 | #endif | ||
362 | |||
363 | printk("Pid: %d, comm: %.20s %s %s %.*s\n", | ||
364 | current->pid, current->comm, print_tainted(), | ||
365 | init_utsname()->release, | ||
366 | (int)strcspn(init_utsname()->version, " "), | ||
367 | init_utsname()->version); | ||
368 | show_trace(NULL, NULL, &stack, bp); | ||
369 | } | ||
370 | EXPORT_SYMBOL(dump_stack); | ||
371 | |||
372 | void show_registers(struct pt_regs *regs) | 239 | void show_registers(struct pt_regs *regs) |
373 | { | 240 | { |
374 | int i; | 241 | int i; |
@@ -429,147 +296,3 @@ int is_valid_bugaddr(unsigned long ip) | |||
429 | return ud2 == 0x0b0f; | 296 | return ud2 == 0x0b0f; |
430 | } | 297 | } |
431 | 298 | ||
432 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
433 | static int die_owner = -1; | ||
434 | static unsigned int die_nest_count; | ||
435 | |||
436 | unsigned __kprobes long oops_begin(void) | ||
437 | { | ||
438 | int cpu; | ||
439 | unsigned long flags; | ||
440 | |||
441 | oops_enter(); | ||
442 | |||
443 | /* racy, but better than risking deadlock. */ | ||
444 | raw_local_irq_save(flags); | ||
445 | cpu = smp_processor_id(); | ||
446 | if (!__raw_spin_trylock(&die_lock)) { | ||
447 | if (cpu == die_owner) | ||
448 | /* nested oops. should stop eventually */; | ||
449 | else | ||
450 | __raw_spin_lock(&die_lock); | ||
451 | } | ||
452 | die_nest_count++; | ||
453 | die_owner = cpu; | ||
454 | console_verbose(); | ||
455 | bust_spinlocks(1); | ||
456 | return flags; | ||
457 | } | ||
458 | |||
459 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
460 | { | ||
461 | die_owner = -1; | ||
462 | bust_spinlocks(0); | ||
463 | die_nest_count--; | ||
464 | if (!die_nest_count) | ||
465 | /* Nest count reaches zero, release the lock. */ | ||
466 | __raw_spin_unlock(&die_lock); | ||
467 | raw_local_irq_restore(flags); | ||
468 | if (!regs) { | ||
469 | oops_exit(); | ||
470 | return; | ||
471 | } | ||
472 | if (in_interrupt()) | ||
473 | panic("Fatal exception in interrupt"); | ||
474 | if (panic_on_oops) | ||
475 | panic("Fatal exception"); | ||
476 | oops_exit(); | ||
477 | do_exit(signr); | ||
478 | } | ||
479 | |||
480 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | ||
481 | { | ||
482 | printk(KERN_EMERG "%s: %04lx [#%d] ", str, err & 0xffff, ++die_counter); | ||
483 | #ifdef CONFIG_PREEMPT | ||
484 | printk("PREEMPT "); | ||
485 | #endif | ||
486 | #ifdef CONFIG_SMP | ||
487 | printk("SMP "); | ||
488 | #endif | ||
489 | #ifdef CONFIG_DEBUG_PAGEALLOC | ||
490 | printk("DEBUG_PAGEALLOC"); | ||
491 | #endif | ||
492 | printk("\n"); | ||
493 | sysfs_printk_last_file(); | ||
494 | if (notify_die(DIE_OOPS, str, regs, err, | ||
495 | current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) | ||
496 | return 1; | ||
497 | |||
498 | show_registers(regs); | ||
499 | add_taint(TAINT_DIE); | ||
500 | /* Executive summary in case the oops scrolled away */ | ||
501 | printk(KERN_ALERT "RIP "); | ||
502 | printk_address(regs->ip, 1); | ||
503 | printk(" RSP <%016lx>\n", regs->sp); | ||
504 | if (kexec_should_crash(current)) | ||
505 | crash_kexec(regs); | ||
506 | return 0; | ||
507 | } | ||
508 | |||
509 | void die(const char *str, struct pt_regs *regs, long err) | ||
510 | { | ||
511 | unsigned long flags = oops_begin(); | ||
512 | |||
513 | if (!user_mode(regs)) | ||
514 | report_bug(regs->ip, regs); | ||
515 | |||
516 | if (__die(str, regs, err)) | ||
517 | regs = NULL; | ||
518 | oops_end(flags, regs, SIGSEGV); | ||
519 | } | ||
520 | |||
521 | notrace __kprobes void | ||
522 | die_nmi(char *str, struct pt_regs *regs, int do_panic) | ||
523 | { | ||
524 | unsigned long flags; | ||
525 | |||
526 | if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP) | ||
527 | return; | ||
528 | |||
529 | flags = oops_begin(); | ||
530 | /* | ||
531 | * We are in trouble anyway, lets at least try | ||
532 | * to get a message out. | ||
533 | */ | ||
534 | printk(KERN_EMERG "%s", str); | ||
535 | printk(" on CPU%d, ip %08lx, registers:\n", | ||
536 | smp_processor_id(), regs->ip); | ||
537 | show_registers(regs); | ||
538 | if (kexec_should_crash(current)) | ||
539 | crash_kexec(regs); | ||
540 | if (do_panic || panic_on_oops) | ||
541 | panic("Non maskable interrupt"); | ||
542 | oops_end(flags, NULL, SIGBUS); | ||
543 | nmi_exit(); | ||
544 | local_irq_enable(); | ||
545 | do_exit(SIGBUS); | ||
546 | } | ||
547 | |||
548 | static int __init oops_setup(char *s) | ||
549 | { | ||
550 | if (!s) | ||
551 | return -EINVAL; | ||
552 | if (!strcmp(s, "panic")) | ||
553 | panic_on_oops = 1; | ||
554 | return 0; | ||
555 | } | ||
556 | early_param("oops", oops_setup); | ||
557 | |||
558 | static int __init kstack_setup(char *s) | ||
559 | { | ||
560 | if (!s) | ||
561 | return -EINVAL; | ||
562 | kstack_depth_to_print = simple_strtoul(s, NULL, 0); | ||
563 | return 0; | ||
564 | } | ||
565 | early_param("kstack", kstack_setup); | ||
566 | |||
567 | static int __init code_bytes_setup(char *s) | ||
568 | { | ||
569 | code_bytes = simple_strtoul(s, NULL, 0); | ||
570 | if (code_bytes > 8192) | ||
571 | code_bytes = 8192; | ||
572 | |||
573 | return 1; | ||
574 | } | ||
575 | __setup("code_bytes=", code_bytes_setup); | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 74c6a21fdc8c..e85826829cf2 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -698,22 +698,6 @@ struct early_res { | |||
698 | }; | 698 | }; |
699 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { | 699 | static struct early_res early_res[MAX_EARLY_RES] __initdata = { |
700 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ | 700 | { 0, PAGE_SIZE, "BIOS data page" }, /* BIOS data page */ |
701 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_TRAMPOLINE) | ||
702 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + 2 * PAGE_SIZE, "TRAMPOLINE" }, | ||
703 | #endif | ||
704 | #if defined(CONFIG_X86_32) && defined(CONFIG_SMP) | ||
705 | /* | ||
706 | * But first pinch a few for the stack/trampoline stuff | ||
707 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
708 | * trampoline before removing it. (see the GDT stuff) | ||
709 | */ | ||
710 | { PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE" }, | ||
711 | /* | ||
712 | * Has to be in very low memory so we can execute | ||
713 | * real-mode AP code. | ||
714 | */ | ||
715 | { TRAMPOLINE_BASE, TRAMPOLINE_BASE + PAGE_SIZE, "TRAMPOLINE" }, | ||
716 | #endif | ||
717 | {} | 701 | {} |
718 | }; | 702 | }; |
719 | 703 | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 1b894b72c0f5..744aa7fc49d5 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/io_apic.h> | 17 | #include <asm/io_apic.h> |
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | #include <asm/iommu.h> | 19 | #include <asm/iommu.h> |
20 | #include <asm/gart.h> | ||
20 | 21 | ||
21 | static void __init fix_hypertransport_config(int num, int slot, int func) | 22 | static void __init fix_hypertransport_config(int num, int slot, int func) |
22 | { | 23 | { |
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 34ad997d3834..504ad198e4ad 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c | |||
@@ -875,49 +875,6 @@ static struct console early_dbgp_console = { | |||
875 | }; | 875 | }; |
876 | #endif | 876 | #endif |
877 | 877 | ||
878 | /* Console interface to a host file on AMD's SimNow! */ | ||
879 | |||
880 | static int simnow_fd; | ||
881 | |||
882 | enum { | ||
883 | MAGIC1 = 0xBACCD00A, | ||
884 | MAGIC2 = 0xCA110000, | ||
885 | XOPEN = 5, | ||
886 | XWRITE = 4, | ||
887 | }; | ||
888 | |||
889 | static noinline long simnow(long cmd, long a, long b, long c) | ||
890 | { | ||
891 | long ret; | ||
892 | |||
893 | asm volatile("cpuid" : | ||
894 | "=a" (ret) : | ||
895 | "b" (a), "c" (b), "d" (c), "0" (MAGIC1), "D" (cmd + MAGIC2)); | ||
896 | return ret; | ||
897 | } | ||
898 | |||
899 | static void __init simnow_init(char *str) | ||
900 | { | ||
901 | char *fn = "klog"; | ||
902 | |||
903 | if (*str == '=') | ||
904 | fn = ++str; | ||
905 | /* error ignored */ | ||
906 | simnow_fd = simnow(XOPEN, (unsigned long)fn, O_WRONLY|O_APPEND|O_CREAT, 0644); | ||
907 | } | ||
908 | |||
909 | static void simnow_write(struct console *con, const char *s, unsigned n) | ||
910 | { | ||
911 | simnow(XWRITE, simnow_fd, (unsigned long)s, n); | ||
912 | } | ||
913 | |||
914 | static struct console simnow_console = { | ||
915 | .name = "simnow", | ||
916 | .write = simnow_write, | ||
917 | .flags = CON_PRINTBUFFER, | ||
918 | .index = -1, | ||
919 | }; | ||
920 | |||
921 | /* Direct interface for emergencies */ | 878 | /* Direct interface for emergencies */ |
922 | static struct console *early_console = &early_vga_console; | 879 | static struct console *early_console = &early_vga_console; |
923 | static int __initdata early_console_initialized; | 880 | static int __initdata early_console_initialized; |
@@ -929,7 +886,7 @@ asmlinkage void early_printk(const char *fmt, ...) | |||
929 | va_list ap; | 886 | va_list ap; |
930 | 887 | ||
931 | va_start(ap, fmt); | 888 | va_start(ap, fmt); |
932 | n = vscnprintf(buf, 512, fmt, ap); | 889 | n = vscnprintf(buf, sizeof(buf), fmt, ap); |
933 | early_console->write(early_console, buf, n); | 890 | early_console->write(early_console, buf, n); |
934 | va_end(ap); | 891 | va_end(ap); |
935 | } | 892 | } |
@@ -960,10 +917,6 @@ static int __init setup_early_printk(char *buf) | |||
960 | max_ypos = boot_params.screen_info.orig_video_lines; | 917 | max_ypos = boot_params.screen_info.orig_video_lines; |
961 | current_ypos = boot_params.screen_info.orig_y; | 918 | current_ypos = boot_params.screen_info.orig_y; |
962 | early_console = &early_vga_console; | 919 | early_console = &early_vga_console; |
963 | } else if (!strncmp(buf, "simnow", 6)) { | ||
964 | simnow_init(buf + 6); | ||
965 | early_console = &simnow_console; | ||
966 | keep_early = 1; | ||
967 | #ifdef CONFIG_EARLY_PRINTK_DBGP | 920 | #ifdef CONFIG_EARLY_PRINTK_DBGP |
968 | } else if (!strncmp(buf, "dbgp", 4)) { | 921 | } else if (!strncmp(buf, "dbgp", 4)) { |
969 | if (early_dbgp_init(buf+4) < 0) | 922 | if (early_dbgp_init(buf+4) < 0) |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca1..d6f0490a7391 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -619,28 +619,37 @@ END(syscall_badsys) | |||
619 | 27:; | 619 | 27:; |
620 | 620 | ||
621 | /* | 621 | /* |
622 | * Build the entry stubs and pointer table with | 622 | * Build the entry stubs and pointer table with some assembler magic. |
623 | * some assembler magic. | 623 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a |
624 | * single cache line on all modern x86 implementations. | ||
624 | */ | 625 | */ |
625 | .section .rodata,"a" | 626 | .section .init.rodata,"a" |
626 | ENTRY(interrupt) | 627 | ENTRY(interrupt) |
627 | .text | 628 | .text |
628 | 629 | .p2align 5 | |
630 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
629 | ENTRY(irq_entries_start) | 631 | ENTRY(irq_entries_start) |
630 | RING0_INT_FRAME | 632 | RING0_INT_FRAME |
631 | vector=0 | 633 | vector=FIRST_EXTERNAL_VECTOR |
632 | .rept NR_VECTORS | 634 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
633 | ALIGN | 635 | .balign 32 |
634 | .if vector | 636 | .rept 7 |
637 | .if vector < NR_VECTORS | ||
638 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
635 | CFI_ADJUST_CFA_OFFSET -4 | 639 | CFI_ADJUST_CFA_OFFSET -4 |
636 | .endif | 640 | .endif |
637 | 1: pushl $~(vector) | 641 | 1: pushl $(~vector+0x80) /* Note: always in signed byte range */ |
638 | CFI_ADJUST_CFA_OFFSET 4 | 642 | CFI_ADJUST_CFA_OFFSET 4 |
639 | jmp common_interrupt | 643 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 |
640 | .previous | 644 | jmp 2f |
645 | .endif | ||
646 | .previous | ||
641 | .long 1b | 647 | .long 1b |
642 | .text | 648 | .text |
643 | vector=vector+1 | 649 | vector=vector+1 |
650 | .endif | ||
651 | .endr | ||
652 | 2: jmp common_interrupt | ||
644 | .endr | 653 | .endr |
645 | END(irq_entries_start) | 654 | END(irq_entries_start) |
646 | 655 | ||
@@ -652,8 +661,9 @@ END(interrupt) | |||
652 | * the CPU automatically disables interrupts when executing an IRQ vector, | 661 | * the CPU automatically disables interrupts when executing an IRQ vector, |
653 | * so IRQ-flags tracing has to follow that: | 662 | * so IRQ-flags tracing has to follow that: |
654 | */ | 663 | */ |
655 | ALIGN | 664 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
656 | common_interrupt: | 665 | common_interrupt: |
666 | addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ | ||
657 | SAVE_ALL | 667 | SAVE_ALL |
658 | TRACE_IRQS_OFF | 668 | TRACE_IRQS_OFF |
659 | movl %esp,%eax | 669 | movl %esp,%eax |
@@ -678,65 +688,6 @@ ENDPROC(name) | |||
678 | /* The include is where all of the SMP etc. interrupts come from */ | 688 | /* The include is where all of the SMP etc. interrupts come from */ |
679 | #include "entry_arch.h" | 689 | #include "entry_arch.h" |
680 | 690 | ||
681 | KPROBE_ENTRY(page_fault) | ||
682 | RING0_EC_FRAME | ||
683 | pushl $do_page_fault | ||
684 | CFI_ADJUST_CFA_OFFSET 4 | ||
685 | ALIGN | ||
686 | error_code: | ||
687 | /* the function address is in %fs's slot on the stack */ | ||
688 | pushl %es | ||
689 | CFI_ADJUST_CFA_OFFSET 4 | ||
690 | /*CFI_REL_OFFSET es, 0*/ | ||
691 | pushl %ds | ||
692 | CFI_ADJUST_CFA_OFFSET 4 | ||
693 | /*CFI_REL_OFFSET ds, 0*/ | ||
694 | pushl %eax | ||
695 | CFI_ADJUST_CFA_OFFSET 4 | ||
696 | CFI_REL_OFFSET eax, 0 | ||
697 | pushl %ebp | ||
698 | CFI_ADJUST_CFA_OFFSET 4 | ||
699 | CFI_REL_OFFSET ebp, 0 | ||
700 | pushl %edi | ||
701 | CFI_ADJUST_CFA_OFFSET 4 | ||
702 | CFI_REL_OFFSET edi, 0 | ||
703 | pushl %esi | ||
704 | CFI_ADJUST_CFA_OFFSET 4 | ||
705 | CFI_REL_OFFSET esi, 0 | ||
706 | pushl %edx | ||
707 | CFI_ADJUST_CFA_OFFSET 4 | ||
708 | CFI_REL_OFFSET edx, 0 | ||
709 | pushl %ecx | ||
710 | CFI_ADJUST_CFA_OFFSET 4 | ||
711 | CFI_REL_OFFSET ecx, 0 | ||
712 | pushl %ebx | ||
713 | CFI_ADJUST_CFA_OFFSET 4 | ||
714 | CFI_REL_OFFSET ebx, 0 | ||
715 | cld | ||
716 | pushl %fs | ||
717 | CFI_ADJUST_CFA_OFFSET 4 | ||
718 | /*CFI_REL_OFFSET fs, 0*/ | ||
719 | movl $(__KERNEL_PERCPU), %ecx | ||
720 | movl %ecx, %fs | ||
721 | UNWIND_ESPFIX_STACK | ||
722 | popl %ecx | ||
723 | CFI_ADJUST_CFA_OFFSET -4 | ||
724 | /*CFI_REGISTER es, ecx*/ | ||
725 | movl PT_FS(%esp), %edi # get the function address | ||
726 | movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
727 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
728 | mov %ecx, PT_FS(%esp) | ||
729 | /*CFI_REL_OFFSET fs, ES*/ | ||
730 | movl $(__USER_DS), %ecx | ||
731 | movl %ecx, %ds | ||
732 | movl %ecx, %es | ||
733 | TRACE_IRQS_OFF | ||
734 | movl %esp,%eax # pt_regs pointer | ||
735 | call *%edi | ||
736 | jmp ret_from_exception | ||
737 | CFI_ENDPROC | ||
738 | KPROBE_END(page_fault) | ||
739 | |||
740 | ENTRY(coprocessor_error) | 691 | ENTRY(coprocessor_error) |
741 | RING0_INT_FRAME | 692 | RING0_INT_FRAME |
742 | pushl $0 | 693 | pushl $0 |
@@ -767,140 +718,6 @@ ENTRY(device_not_available) | |||
767 | CFI_ENDPROC | 718 | CFI_ENDPROC |
768 | END(device_not_available) | 719 | END(device_not_available) |
769 | 720 | ||
770 | /* | ||
771 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
772 | * that sets up the real kernel stack. Check here, since we can't | ||
773 | * allow the wrong stack to be used. | ||
774 | * | ||
775 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
776 | * already pushed 3 words if it hits on the sysenter instruction: | ||
777 | * eflags, cs and eip. | ||
778 | * | ||
779 | * We just load the right stack, and push the three (known) values | ||
780 | * by hand onto the new stack - while updating the return eip past | ||
781 | * the instruction that would have done it for sysenter. | ||
782 | */ | ||
783 | #define FIX_STACK(offset, ok, label) \ | ||
784 | cmpw $__KERNEL_CS,4(%esp); \ | ||
785 | jne ok; \ | ||
786 | label: \ | ||
787 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | ||
788 | CFI_DEF_CFA esp, 0; \ | ||
789 | CFI_UNDEFINED eip; \ | ||
790 | pushfl; \ | ||
791 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
792 | pushl $__KERNEL_CS; \ | ||
793 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
794 | pushl $sysenter_past_esp; \ | ||
795 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
796 | CFI_REL_OFFSET eip, 0 | ||
797 | |||
798 | KPROBE_ENTRY(debug) | ||
799 | RING0_INT_FRAME | ||
800 | cmpl $ia32_sysenter_target,(%esp) | ||
801 | jne debug_stack_correct | ||
802 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | ||
803 | debug_stack_correct: | ||
804 | pushl $-1 # mark this as an int | ||
805 | CFI_ADJUST_CFA_OFFSET 4 | ||
806 | SAVE_ALL | ||
807 | TRACE_IRQS_OFF | ||
808 | xorl %edx,%edx # error code 0 | ||
809 | movl %esp,%eax # pt_regs pointer | ||
810 | call do_debug | ||
811 | jmp ret_from_exception | ||
812 | CFI_ENDPROC | ||
813 | KPROBE_END(debug) | ||
814 | |||
815 | /* | ||
816 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
817 | * a debug fault, and the debug fault hasn't yet been able to | ||
818 | * clear up the stack. So we first check whether we got an | ||
819 | * NMI on the sysenter entry path, but after that we need to | ||
820 | * check whether we got an NMI on the debug path where the debug | ||
821 | * fault happened on the sysenter path. | ||
822 | */ | ||
823 | KPROBE_ENTRY(nmi) | ||
824 | RING0_INT_FRAME | ||
825 | pushl %eax | ||
826 | CFI_ADJUST_CFA_OFFSET 4 | ||
827 | movl %ss, %eax | ||
828 | cmpw $__ESPFIX_SS, %ax | ||
829 | popl %eax | ||
830 | CFI_ADJUST_CFA_OFFSET -4 | ||
831 | je nmi_espfix_stack | ||
832 | cmpl $ia32_sysenter_target,(%esp) | ||
833 | je nmi_stack_fixup | ||
834 | pushl %eax | ||
835 | CFI_ADJUST_CFA_OFFSET 4 | ||
836 | movl %esp,%eax | ||
837 | /* Do not access memory above the end of our stack page, | ||
838 | * it might not exist. | ||
839 | */ | ||
840 | andl $(THREAD_SIZE-1),%eax | ||
841 | cmpl $(THREAD_SIZE-20),%eax | ||
842 | popl %eax | ||
843 | CFI_ADJUST_CFA_OFFSET -4 | ||
844 | jae nmi_stack_correct | ||
845 | cmpl $ia32_sysenter_target,12(%esp) | ||
846 | je nmi_debug_stack_check | ||
847 | nmi_stack_correct: | ||
848 | /* We have a RING0_INT_FRAME here */ | ||
849 | pushl %eax | ||
850 | CFI_ADJUST_CFA_OFFSET 4 | ||
851 | SAVE_ALL | ||
852 | TRACE_IRQS_OFF | ||
853 | xorl %edx,%edx # zero error code | ||
854 | movl %esp,%eax # pt_regs pointer | ||
855 | call do_nmi | ||
856 | jmp restore_nocheck_notrace | ||
857 | CFI_ENDPROC | ||
858 | |||
859 | nmi_stack_fixup: | ||
860 | RING0_INT_FRAME | ||
861 | FIX_STACK(12,nmi_stack_correct, 1) | ||
862 | jmp nmi_stack_correct | ||
863 | |||
864 | nmi_debug_stack_check: | ||
865 | /* We have a RING0_INT_FRAME here */ | ||
866 | cmpw $__KERNEL_CS,16(%esp) | ||
867 | jne nmi_stack_correct | ||
868 | cmpl $debug,(%esp) | ||
869 | jb nmi_stack_correct | ||
870 | cmpl $debug_esp_fix_insn,(%esp) | ||
871 | ja nmi_stack_correct | ||
872 | FIX_STACK(24,nmi_stack_correct, 1) | ||
873 | jmp nmi_stack_correct | ||
874 | |||
875 | nmi_espfix_stack: | ||
876 | /* We have a RING0_INT_FRAME here. | ||
877 | * | ||
878 | * create the pointer to lss back | ||
879 | */ | ||
880 | pushl %ss | ||
881 | CFI_ADJUST_CFA_OFFSET 4 | ||
882 | pushl %esp | ||
883 | CFI_ADJUST_CFA_OFFSET 4 | ||
884 | addw $4, (%esp) | ||
885 | /* copy the iret frame of 12 bytes */ | ||
886 | .rept 3 | ||
887 | pushl 16(%esp) | ||
888 | CFI_ADJUST_CFA_OFFSET 4 | ||
889 | .endr | ||
890 | pushl %eax | ||
891 | CFI_ADJUST_CFA_OFFSET 4 | ||
892 | SAVE_ALL | ||
893 | TRACE_IRQS_OFF | ||
894 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
895 | xorl %edx,%edx # zero error code | ||
896 | call do_nmi | ||
897 | RESTORE_REGS | ||
898 | lss 12+4(%esp), %esp # back to espfix stack | ||
899 | CFI_ADJUST_CFA_OFFSET -24 | ||
900 | jmp irq_return | ||
901 | CFI_ENDPROC | ||
902 | KPROBE_END(nmi) | ||
903 | |||
904 | #ifdef CONFIG_PARAVIRT | 721 | #ifdef CONFIG_PARAVIRT |
905 | ENTRY(native_iret) | 722 | ENTRY(native_iret) |
906 | iret | 723 | iret |
@@ -916,19 +733,6 @@ ENTRY(native_irq_enable_sysexit) | |||
916 | END(native_irq_enable_sysexit) | 733 | END(native_irq_enable_sysexit) |
917 | #endif | 734 | #endif |
918 | 735 | ||
919 | KPROBE_ENTRY(int3) | ||
920 | RING0_INT_FRAME | ||
921 | pushl $-1 # mark this as an int | ||
922 | CFI_ADJUST_CFA_OFFSET 4 | ||
923 | SAVE_ALL | ||
924 | TRACE_IRQS_OFF | ||
925 | xorl %edx,%edx # zero error code | ||
926 | movl %esp,%eax # pt_regs pointer | ||
927 | call do_int3 | ||
928 | jmp ret_from_exception | ||
929 | CFI_ENDPROC | ||
930 | KPROBE_END(int3) | ||
931 | |||
932 | ENTRY(overflow) | 736 | ENTRY(overflow) |
933 | RING0_INT_FRAME | 737 | RING0_INT_FRAME |
934 | pushl $0 | 738 | pushl $0 |
@@ -993,14 +797,6 @@ ENTRY(stack_segment) | |||
993 | CFI_ENDPROC | 797 | CFI_ENDPROC |
994 | END(stack_segment) | 798 | END(stack_segment) |
995 | 799 | ||
996 | KPROBE_ENTRY(general_protection) | ||
997 | RING0_EC_FRAME | ||
998 | pushl $do_general_protection | ||
999 | CFI_ADJUST_CFA_OFFSET 4 | ||
1000 | jmp error_code | ||
1001 | CFI_ENDPROC | ||
1002 | KPROBE_END(general_protection) | ||
1003 | |||
1004 | ENTRY(alignment_check) | 800 | ENTRY(alignment_check) |
1005 | RING0_EC_FRAME | 801 | RING0_EC_FRAME |
1006 | pushl $do_alignment_check | 802 | pushl $do_alignment_check |
@@ -1051,6 +847,7 @@ ENTRY(kernel_thread_helper) | |||
1051 | push %eax | 847 | push %eax |
1052 | CFI_ADJUST_CFA_OFFSET 4 | 848 | CFI_ADJUST_CFA_OFFSET 4 |
1053 | call do_exit | 849 | call do_exit |
850 | ud2 # padding for call trace | ||
1054 | CFI_ENDPROC | 851 | CFI_ENDPROC |
1055 | ENDPROC(kernel_thread_helper) | 852 | ENDPROC(kernel_thread_helper) |
1056 | 853 | ||
@@ -1157,6 +954,9 @@ ENTRY(mcount) | |||
1157 | END(mcount) | 954 | END(mcount) |
1158 | 955 | ||
1159 | ENTRY(ftrace_caller) | 956 | ENTRY(ftrace_caller) |
957 | cmpl $0, function_trace_stop | ||
958 | jne ftrace_stub | ||
959 | |||
1160 | pushl %eax | 960 | pushl %eax |
1161 | pushl %ecx | 961 | pushl %ecx |
1162 | pushl %edx | 962 | pushl %edx |
@@ -1171,6 +971,11 @@ ftrace_call: | |||
1171 | popl %edx | 971 | popl %edx |
1172 | popl %ecx | 972 | popl %ecx |
1173 | popl %eax | 973 | popl %eax |
974 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
975 | .globl ftrace_graph_call | ||
976 | ftrace_graph_call: | ||
977 | jmp ftrace_stub | ||
978 | #endif | ||
1174 | 979 | ||
1175 | .globl ftrace_stub | 980 | .globl ftrace_stub |
1176 | ftrace_stub: | 981 | ftrace_stub: |
@@ -1180,8 +985,18 @@ END(ftrace_caller) | |||
1180 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 985 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
1181 | 986 | ||
1182 | ENTRY(mcount) | 987 | ENTRY(mcount) |
988 | cmpl $0, function_trace_stop | ||
989 | jne ftrace_stub | ||
990 | |||
1183 | cmpl $ftrace_stub, ftrace_trace_function | 991 | cmpl $ftrace_stub, ftrace_trace_function |
1184 | jnz trace | 992 | jnz trace |
993 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
994 | cmpl $ftrace_stub, ftrace_graph_return | ||
995 | jnz ftrace_graph_caller | ||
996 | |||
997 | cmpl $ftrace_graph_entry_stub, ftrace_graph_entry | ||
998 | jnz ftrace_graph_caller | ||
999 | #endif | ||
1185 | .globl ftrace_stub | 1000 | .globl ftrace_stub |
1186 | ftrace_stub: | 1001 | ftrace_stub: |
1187 | ret | 1002 | ret |
@@ -1200,13 +1015,268 @@ trace: | |||
1200 | popl %edx | 1015 | popl %edx |
1201 | popl %ecx | 1016 | popl %ecx |
1202 | popl %eax | 1017 | popl %eax |
1203 | |||
1204 | jmp ftrace_stub | 1018 | jmp ftrace_stub |
1205 | END(mcount) | 1019 | END(mcount) |
1206 | #endif /* CONFIG_DYNAMIC_FTRACE */ | 1020 | #endif /* CONFIG_DYNAMIC_FTRACE */ |
1207 | #endif /* CONFIG_FUNCTION_TRACER */ | 1021 | #endif /* CONFIG_FUNCTION_TRACER */ |
1208 | 1022 | ||
1023 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
1024 | ENTRY(ftrace_graph_caller) | ||
1025 | cmpl $0, function_trace_stop | ||
1026 | jne ftrace_stub | ||
1027 | |||
1028 | pushl %eax | ||
1029 | pushl %ecx | ||
1030 | pushl %edx | ||
1031 | movl 0xc(%esp), %edx | ||
1032 | lea 0x4(%ebp), %eax | ||
1033 | subl $MCOUNT_INSN_SIZE, %edx | ||
1034 | call prepare_ftrace_return | ||
1035 | popl %edx | ||
1036 | popl %ecx | ||
1037 | popl %eax | ||
1038 | ret | ||
1039 | END(ftrace_graph_caller) | ||
1040 | |||
1041 | .globl return_to_handler | ||
1042 | return_to_handler: | ||
1043 | pushl $0 | ||
1044 | pushl %eax | ||
1045 | pushl %ecx | ||
1046 | pushl %edx | ||
1047 | call ftrace_return_to_handler | ||
1048 | movl %eax, 0xc(%esp) | ||
1049 | popl %edx | ||
1050 | popl %ecx | ||
1051 | popl %eax | ||
1052 | ret | ||
1053 | #endif | ||
1054 | |||
1209 | .section .rodata,"a" | 1055 | .section .rodata,"a" |
1210 | #include "syscall_table_32.S" | 1056 | #include "syscall_table_32.S" |
1211 | 1057 | ||
1212 | syscall_table_size=(.-sys_call_table) | 1058 | syscall_table_size=(.-sys_call_table) |
1059 | |||
1060 | /* | ||
1061 | * Some functions should be protected against kprobes | ||
1062 | */ | ||
1063 | .pushsection .kprobes.text, "ax" | ||
1064 | |||
1065 | ENTRY(page_fault) | ||
1066 | RING0_EC_FRAME | ||
1067 | pushl $do_page_fault | ||
1068 | CFI_ADJUST_CFA_OFFSET 4 | ||
1069 | ALIGN | ||
1070 | error_code: | ||
1071 | /* the function address is in %fs's slot on the stack */ | ||
1072 | pushl %es | ||
1073 | CFI_ADJUST_CFA_OFFSET 4 | ||
1074 | /*CFI_REL_OFFSET es, 0*/ | ||
1075 | pushl %ds | ||
1076 | CFI_ADJUST_CFA_OFFSET 4 | ||
1077 | /*CFI_REL_OFFSET ds, 0*/ | ||
1078 | pushl %eax | ||
1079 | CFI_ADJUST_CFA_OFFSET 4 | ||
1080 | CFI_REL_OFFSET eax, 0 | ||
1081 | pushl %ebp | ||
1082 | CFI_ADJUST_CFA_OFFSET 4 | ||
1083 | CFI_REL_OFFSET ebp, 0 | ||
1084 | pushl %edi | ||
1085 | CFI_ADJUST_CFA_OFFSET 4 | ||
1086 | CFI_REL_OFFSET edi, 0 | ||
1087 | pushl %esi | ||
1088 | CFI_ADJUST_CFA_OFFSET 4 | ||
1089 | CFI_REL_OFFSET esi, 0 | ||
1090 | pushl %edx | ||
1091 | CFI_ADJUST_CFA_OFFSET 4 | ||
1092 | CFI_REL_OFFSET edx, 0 | ||
1093 | pushl %ecx | ||
1094 | CFI_ADJUST_CFA_OFFSET 4 | ||
1095 | CFI_REL_OFFSET ecx, 0 | ||
1096 | pushl %ebx | ||
1097 | CFI_ADJUST_CFA_OFFSET 4 | ||
1098 | CFI_REL_OFFSET ebx, 0 | ||
1099 | cld | ||
1100 | pushl %fs | ||
1101 | CFI_ADJUST_CFA_OFFSET 4 | ||
1102 | /*CFI_REL_OFFSET fs, 0*/ | ||
1103 | movl $(__KERNEL_PERCPU), %ecx | ||
1104 | movl %ecx, %fs | ||
1105 | UNWIND_ESPFIX_STACK | ||
1106 | popl %ecx | ||
1107 | CFI_ADJUST_CFA_OFFSET -4 | ||
1108 | /*CFI_REGISTER es, ecx*/ | ||
1109 | movl PT_FS(%esp), %edi # get the function address | ||
1110 | movl PT_ORIG_EAX(%esp), %edx # get the error code | ||
1111 | movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart | ||
1112 | mov %ecx, PT_FS(%esp) | ||
1113 | /*CFI_REL_OFFSET fs, ES*/ | ||
1114 | movl $(__USER_DS), %ecx | ||
1115 | movl %ecx, %ds | ||
1116 | movl %ecx, %es | ||
1117 | TRACE_IRQS_OFF | ||
1118 | movl %esp,%eax # pt_regs pointer | ||
1119 | call *%edi | ||
1120 | jmp ret_from_exception | ||
1121 | CFI_ENDPROC | ||
1122 | END(page_fault) | ||
1123 | |||
1124 | /* | ||
1125 | * Debug traps and NMI can happen at the one SYSENTER instruction | ||
1126 | * that sets up the real kernel stack. Check here, since we can't | ||
1127 | * allow the wrong stack to be used. | ||
1128 | * | ||
1129 | * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have | ||
1130 | * already pushed 3 words if it hits on the sysenter instruction: | ||
1131 | * eflags, cs and eip. | ||
1132 | * | ||
1133 | * We just load the right stack, and push the three (known) values | ||
1134 | * by hand onto the new stack - while updating the return eip past | ||
1135 | * the instruction that would have done it for sysenter. | ||
1136 | */ | ||
1137 | #define FIX_STACK(offset, ok, label) \ | ||
1138 | cmpw $__KERNEL_CS,4(%esp); \ | ||
1139 | jne ok; \ | ||
1140 | label: \ | ||
1141 | movl TSS_sysenter_sp0+offset(%esp),%esp; \ | ||
1142 | CFI_DEF_CFA esp, 0; \ | ||
1143 | CFI_UNDEFINED eip; \ | ||
1144 | pushfl; \ | ||
1145 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
1146 | pushl $__KERNEL_CS; \ | ||
1147 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
1148 | pushl $sysenter_past_esp; \ | ||
1149 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
1150 | CFI_REL_OFFSET eip, 0 | ||
1151 | |||
1152 | ENTRY(debug) | ||
1153 | RING0_INT_FRAME | ||
1154 | cmpl $ia32_sysenter_target,(%esp) | ||
1155 | jne debug_stack_correct | ||
1156 | FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) | ||
1157 | debug_stack_correct: | ||
1158 | pushl $-1 # mark this as an int | ||
1159 | CFI_ADJUST_CFA_OFFSET 4 | ||
1160 | SAVE_ALL | ||
1161 | TRACE_IRQS_OFF | ||
1162 | xorl %edx,%edx # error code 0 | ||
1163 | movl %esp,%eax # pt_regs pointer | ||
1164 | call do_debug | ||
1165 | jmp ret_from_exception | ||
1166 | CFI_ENDPROC | ||
1167 | END(debug) | ||
1168 | |||
1169 | /* | ||
1170 | * NMI is doubly nasty. It can happen _while_ we're handling | ||
1171 | * a debug fault, and the debug fault hasn't yet been able to | ||
1172 | * clear up the stack. So we first check whether we got an | ||
1173 | * NMI on the sysenter entry path, but after that we need to | ||
1174 | * check whether we got an NMI on the debug path where the debug | ||
1175 | * fault happened on the sysenter path. | ||
1176 | */ | ||
1177 | ENTRY(nmi) | ||
1178 | RING0_INT_FRAME | ||
1179 | pushl %eax | ||
1180 | CFI_ADJUST_CFA_OFFSET 4 | ||
1181 | movl %ss, %eax | ||
1182 | cmpw $__ESPFIX_SS, %ax | ||
1183 | popl %eax | ||
1184 | CFI_ADJUST_CFA_OFFSET -4 | ||
1185 | je nmi_espfix_stack | ||
1186 | cmpl $ia32_sysenter_target,(%esp) | ||
1187 | je nmi_stack_fixup | ||
1188 | pushl %eax | ||
1189 | CFI_ADJUST_CFA_OFFSET 4 | ||
1190 | movl %esp,%eax | ||
1191 | /* Do not access memory above the end of our stack page, | ||
1192 | * it might not exist. | ||
1193 | */ | ||
1194 | andl $(THREAD_SIZE-1),%eax | ||
1195 | cmpl $(THREAD_SIZE-20),%eax | ||
1196 | popl %eax | ||
1197 | CFI_ADJUST_CFA_OFFSET -4 | ||
1198 | jae nmi_stack_correct | ||
1199 | cmpl $ia32_sysenter_target,12(%esp) | ||
1200 | je nmi_debug_stack_check | ||
1201 | nmi_stack_correct: | ||
1202 | /* We have a RING0_INT_FRAME here */ | ||
1203 | pushl %eax | ||
1204 | CFI_ADJUST_CFA_OFFSET 4 | ||
1205 | SAVE_ALL | ||
1206 | TRACE_IRQS_OFF | ||
1207 | xorl %edx,%edx # zero error code | ||
1208 | movl %esp,%eax # pt_regs pointer | ||
1209 | call do_nmi | ||
1210 | jmp restore_nocheck_notrace | ||
1211 | CFI_ENDPROC | ||
1212 | |||
1213 | nmi_stack_fixup: | ||
1214 | RING0_INT_FRAME | ||
1215 | FIX_STACK(12,nmi_stack_correct, 1) | ||
1216 | jmp nmi_stack_correct | ||
1217 | |||
1218 | nmi_debug_stack_check: | ||
1219 | /* We have a RING0_INT_FRAME here */ | ||
1220 | cmpw $__KERNEL_CS,16(%esp) | ||
1221 | jne nmi_stack_correct | ||
1222 | cmpl $debug,(%esp) | ||
1223 | jb nmi_stack_correct | ||
1224 | cmpl $debug_esp_fix_insn,(%esp) | ||
1225 | ja nmi_stack_correct | ||
1226 | FIX_STACK(24,nmi_stack_correct, 1) | ||
1227 | jmp nmi_stack_correct | ||
1228 | |||
1229 | nmi_espfix_stack: | ||
1230 | /* We have a RING0_INT_FRAME here. | ||
1231 | * | ||
1232 | * create the pointer to lss back | ||
1233 | */ | ||
1234 | pushl %ss | ||
1235 | CFI_ADJUST_CFA_OFFSET 4 | ||
1236 | pushl %esp | ||
1237 | CFI_ADJUST_CFA_OFFSET 4 | ||
1238 | addw $4, (%esp) | ||
1239 | /* copy the iret frame of 12 bytes */ | ||
1240 | .rept 3 | ||
1241 | pushl 16(%esp) | ||
1242 | CFI_ADJUST_CFA_OFFSET 4 | ||
1243 | .endr | ||
1244 | pushl %eax | ||
1245 | CFI_ADJUST_CFA_OFFSET 4 | ||
1246 | SAVE_ALL | ||
1247 | TRACE_IRQS_OFF | ||
1248 | FIXUP_ESPFIX_STACK # %eax == %esp | ||
1249 | xorl %edx,%edx # zero error code | ||
1250 | call do_nmi | ||
1251 | RESTORE_REGS | ||
1252 | lss 12+4(%esp), %esp # back to espfix stack | ||
1253 | CFI_ADJUST_CFA_OFFSET -24 | ||
1254 | jmp irq_return | ||
1255 | CFI_ENDPROC | ||
1256 | END(nmi) | ||
1257 | |||
1258 | ENTRY(int3) | ||
1259 | RING0_INT_FRAME | ||
1260 | pushl $-1 # mark this as an int | ||
1261 | CFI_ADJUST_CFA_OFFSET 4 | ||
1262 | SAVE_ALL | ||
1263 | TRACE_IRQS_OFF | ||
1264 | xorl %edx,%edx # zero error code | ||
1265 | movl %esp,%eax # pt_regs pointer | ||
1266 | call do_int3 | ||
1267 | jmp ret_from_exception | ||
1268 | CFI_ENDPROC | ||
1269 | END(int3) | ||
1270 | |||
1271 | ENTRY(general_protection) | ||
1272 | RING0_EC_FRAME | ||
1273 | pushl $do_general_protection | ||
1274 | CFI_ADJUST_CFA_OFFSET 4 | ||
1275 | jmp error_code | ||
1276 | CFI_ENDPROC | ||
1277 | END(general_protection) | ||
1278 | |||
1279 | /* | ||
1280 | * End of kprobes section | ||
1281 | */ | ||
1282 | .popsection | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a6..e28c7a987793 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -11,15 +11,15 @@ | |||
11 | * | 11 | * |
12 | * NOTE: This code handles signal-recognition, which happens every time | 12 | * NOTE: This code handles signal-recognition, which happens every time |
13 | * after an interrupt and after each system call. | 13 | * after an interrupt and after each system call. |
14 | * | 14 | * |
15 | * Normal syscalls and interrupts don't save a full stack frame, this is | 15 | * Normal syscalls and interrupts don't save a full stack frame, this is |
16 | * only done for syscall tracing, signals or fork/exec et.al. | 16 | * only done for syscall tracing, signals or fork/exec et.al. |
17 | * | 17 | * |
18 | * A note on terminology: | 18 | * A note on terminology: |
19 | * - top of stack: Architecture defined interrupt frame from SS to RIP | 19 | * - top of stack: Architecture defined interrupt frame from SS to RIP |
20 | * at the top of the kernel process stack. | 20 | * at the top of the kernel process stack. |
21 | * - partial stack frame: partially saved registers upto R11. | 21 | * - partial stack frame: partially saved registers upto R11. |
22 | * - full stack frame: Like partial stack frame, but all register saved. | 22 | * - full stack frame: Like partial stack frame, but all register saved. |
23 | * | 23 | * |
24 | * Some macro usage: | 24 | * Some macro usage: |
25 | * - CFI macros are used to generate dwarf2 unwind information for better | 25 | * - CFI macros are used to generate dwarf2 unwind information for better |
@@ -60,7 +60,6 @@ | |||
60 | #define __AUDIT_ARCH_LE 0x40000000 | 60 | #define __AUDIT_ARCH_LE 0x40000000 |
61 | 61 | ||
62 | .code64 | 62 | .code64 |
63 | |||
64 | #ifdef CONFIG_FUNCTION_TRACER | 63 | #ifdef CONFIG_FUNCTION_TRACER |
65 | #ifdef CONFIG_DYNAMIC_FTRACE | 64 | #ifdef CONFIG_DYNAMIC_FTRACE |
66 | ENTRY(mcount) | 65 | ENTRY(mcount) |
@@ -68,16 +67,10 @@ ENTRY(mcount) | |||
68 | END(mcount) | 67 | END(mcount) |
69 | 68 | ||
70 | ENTRY(ftrace_caller) | 69 | ENTRY(ftrace_caller) |
70 | cmpl $0, function_trace_stop | ||
71 | jne ftrace_stub | ||
71 | 72 | ||
72 | /* taken from glibc */ | 73 | MCOUNT_SAVE_FRAME |
73 | subq $0x38, %rsp | ||
74 | movq %rax, (%rsp) | ||
75 | movq %rcx, 8(%rsp) | ||
76 | movq %rdx, 16(%rsp) | ||
77 | movq %rsi, 24(%rsp) | ||
78 | movq %rdi, 32(%rsp) | ||
79 | movq %r8, 40(%rsp) | ||
80 | movq %r9, 48(%rsp) | ||
81 | 74 | ||
82 | movq 0x38(%rsp), %rdi | 75 | movq 0x38(%rsp), %rdi |
83 | movq 8(%rbp), %rsi | 76 | movq 8(%rbp), %rsi |
@@ -87,14 +80,13 @@ ENTRY(ftrace_caller) | |||
87 | ftrace_call: | 80 | ftrace_call: |
88 | call ftrace_stub | 81 | call ftrace_stub |
89 | 82 | ||
90 | movq 48(%rsp), %r9 | 83 | MCOUNT_RESTORE_FRAME |
91 | movq 40(%rsp), %r8 | 84 | |
92 | movq 32(%rsp), %rdi | 85 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER |
93 | movq 24(%rsp), %rsi | 86 | .globl ftrace_graph_call |
94 | movq 16(%rsp), %rdx | 87 | ftrace_graph_call: |
95 | movq 8(%rsp), %rcx | 88 | jmp ftrace_stub |
96 | movq (%rsp), %rax | 89 | #endif |
97 | addq $0x38, %rsp | ||
98 | 90 | ||
99 | .globl ftrace_stub | 91 | .globl ftrace_stub |
100 | ftrace_stub: | 92 | ftrace_stub: |
@@ -103,15 +95,63 @@ END(ftrace_caller) | |||
103 | 95 | ||
104 | #else /* ! CONFIG_DYNAMIC_FTRACE */ | 96 | #else /* ! CONFIG_DYNAMIC_FTRACE */ |
105 | ENTRY(mcount) | 97 | ENTRY(mcount) |
98 | cmpl $0, function_trace_stop | ||
99 | jne ftrace_stub | ||
100 | |||
106 | cmpq $ftrace_stub, ftrace_trace_function | 101 | cmpq $ftrace_stub, ftrace_trace_function |
107 | jnz trace | 102 | jnz trace |
103 | |||
104 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
105 | cmpq $ftrace_stub, ftrace_graph_return | ||
106 | jnz ftrace_graph_caller | ||
107 | |||
108 | cmpq $ftrace_graph_entry_stub, ftrace_graph_entry | ||
109 | jnz ftrace_graph_caller | ||
110 | #endif | ||
111 | |||
108 | .globl ftrace_stub | 112 | .globl ftrace_stub |
109 | ftrace_stub: | 113 | ftrace_stub: |
110 | retq | 114 | retq |
111 | 115 | ||
112 | trace: | 116 | trace: |
113 | /* taken from glibc */ | 117 | MCOUNT_SAVE_FRAME |
114 | subq $0x38, %rsp | 118 | |
119 | movq 0x38(%rsp), %rdi | ||
120 | movq 8(%rbp), %rsi | ||
121 | subq $MCOUNT_INSN_SIZE, %rdi | ||
122 | |||
123 | call *ftrace_trace_function | ||
124 | |||
125 | MCOUNT_RESTORE_FRAME | ||
126 | |||
127 | jmp ftrace_stub | ||
128 | END(mcount) | ||
129 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
130 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
131 | |||
132 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
133 | ENTRY(ftrace_graph_caller) | ||
134 | cmpl $0, function_trace_stop | ||
135 | jne ftrace_stub | ||
136 | |||
137 | MCOUNT_SAVE_FRAME | ||
138 | |||
139 | leaq 8(%rbp), %rdi | ||
140 | movq 0x38(%rsp), %rsi | ||
141 | subq $MCOUNT_INSN_SIZE, %rsi | ||
142 | |||
143 | call prepare_ftrace_return | ||
144 | |||
145 | MCOUNT_RESTORE_FRAME | ||
146 | |||
147 | retq | ||
148 | END(ftrace_graph_caller) | ||
149 | |||
150 | |||
151 | .globl return_to_handler | ||
152 | return_to_handler: | ||
153 | subq $80, %rsp | ||
154 | |||
115 | movq %rax, (%rsp) | 155 | movq %rax, (%rsp) |
116 | movq %rcx, 8(%rsp) | 156 | movq %rcx, 8(%rsp) |
117 | movq %rdx, 16(%rsp) | 157 | movq %rdx, 16(%rsp) |
@@ -119,13 +159,14 @@ trace: | |||
119 | movq %rdi, 32(%rsp) | 159 | movq %rdi, 32(%rsp) |
120 | movq %r8, 40(%rsp) | 160 | movq %r8, 40(%rsp) |
121 | movq %r9, 48(%rsp) | 161 | movq %r9, 48(%rsp) |
162 | movq %r10, 56(%rsp) | ||
163 | movq %r11, 64(%rsp) | ||
122 | 164 | ||
123 | movq 0x38(%rsp), %rdi | 165 | call ftrace_return_to_handler |
124 | movq 8(%rbp), %rsi | ||
125 | subq $MCOUNT_INSN_SIZE, %rdi | ||
126 | |||
127 | call *ftrace_trace_function | ||
128 | 166 | ||
167 | movq %rax, 72(%rsp) | ||
168 | movq 64(%rsp), %r11 | ||
169 | movq 56(%rsp), %r10 | ||
129 | movq 48(%rsp), %r9 | 170 | movq 48(%rsp), %r9 |
130 | movq 40(%rsp), %r8 | 171 | movq 40(%rsp), %r8 |
131 | movq 32(%rsp), %rdi | 172 | movq 32(%rsp), %rdi |
@@ -133,16 +174,14 @@ trace: | |||
133 | movq 16(%rsp), %rdx | 174 | movq 16(%rsp), %rdx |
134 | movq 8(%rsp), %rcx | 175 | movq 8(%rsp), %rcx |
135 | movq (%rsp), %rax | 176 | movq (%rsp), %rax |
136 | addq $0x38, %rsp | 177 | addq $72, %rsp |
178 | retq | ||
179 | #endif | ||
137 | 180 | ||
138 | jmp ftrace_stub | ||
139 | END(mcount) | ||
140 | #endif /* CONFIG_DYNAMIC_FTRACE */ | ||
141 | #endif /* CONFIG_FUNCTION_TRACER */ | ||
142 | 181 | ||
143 | #ifndef CONFIG_PREEMPT | 182 | #ifndef CONFIG_PREEMPT |
144 | #define retint_kernel retint_restore_args | 183 | #define retint_kernel retint_restore_args |
145 | #endif | 184 | #endif |
146 | 185 | ||
147 | #ifdef CONFIG_PARAVIRT | 186 | #ifdef CONFIG_PARAVIRT |
148 | ENTRY(native_usergs_sysret64) | 187 | ENTRY(native_usergs_sysret64) |
@@ -161,29 +200,29 @@ ENTRY(native_usergs_sysret64) | |||
161 | .endm | 200 | .endm |
162 | 201 | ||
163 | /* | 202 | /* |
164 | * C code is not supposed to know about undefined top of stack. Every time | 203 | * C code is not supposed to know about undefined top of stack. Every time |
165 | * a C function with an pt_regs argument is called from the SYSCALL based | 204 | * a C function with an pt_regs argument is called from the SYSCALL based |
166 | * fast path FIXUP_TOP_OF_STACK is needed. | 205 | * fast path FIXUP_TOP_OF_STACK is needed. |
167 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs | 206 | * RESTORE_TOP_OF_STACK syncs the syscall state after any possible ptregs |
168 | * manipulation. | 207 | * manipulation. |
169 | */ | 208 | */ |
170 | 209 | ||
171 | /* %rsp:at FRAMEEND */ | 210 | /* %rsp:at FRAMEEND */ |
172 | .macro FIXUP_TOP_OF_STACK tmp | 211 | .macro FIXUP_TOP_OF_STACK tmp offset=0 |
173 | movq %gs:pda_oldrsp,\tmp | 212 | movq %gs:pda_oldrsp,\tmp |
174 | movq \tmp,RSP(%rsp) | 213 | movq \tmp,RSP+\offset(%rsp) |
175 | movq $__USER_DS,SS(%rsp) | 214 | movq $__USER_DS,SS+\offset(%rsp) |
176 | movq $__USER_CS,CS(%rsp) | 215 | movq $__USER_CS,CS+\offset(%rsp) |
177 | movq $-1,RCX(%rsp) | 216 | movq $-1,RCX+\offset(%rsp) |
178 | movq R11(%rsp),\tmp /* get eflags */ | 217 | movq R11+\offset(%rsp),\tmp /* get eflags */ |
179 | movq \tmp,EFLAGS(%rsp) | 218 | movq \tmp,EFLAGS+\offset(%rsp) |
180 | .endm | 219 | .endm |
181 | 220 | ||
182 | .macro RESTORE_TOP_OF_STACK tmp,offset=0 | 221 | .macro RESTORE_TOP_OF_STACK tmp offset=0 |
183 | movq RSP-\offset(%rsp),\tmp | 222 | movq RSP+\offset(%rsp),\tmp |
184 | movq \tmp,%gs:pda_oldrsp | 223 | movq \tmp,%gs:pda_oldrsp |
185 | movq EFLAGS-\offset(%rsp),\tmp | 224 | movq EFLAGS+\offset(%rsp),\tmp |
186 | movq \tmp,R11-\offset(%rsp) | 225 | movq \tmp,R11+\offset(%rsp) |
187 | .endm | 226 | .endm |
188 | 227 | ||
189 | .macro FAKE_STACK_FRAME child_rip | 228 | .macro FAKE_STACK_FRAME child_rip |
@@ -195,7 +234,7 @@ ENTRY(native_usergs_sysret64) | |||
195 | pushq %rax /* rsp */ | 234 | pushq %rax /* rsp */ |
196 | CFI_ADJUST_CFA_OFFSET 8 | 235 | CFI_ADJUST_CFA_OFFSET 8 |
197 | CFI_REL_OFFSET rsp,0 | 236 | CFI_REL_OFFSET rsp,0 |
198 | pushq $(1<<9) /* eflags - interrupts on */ | 237 | pushq $X86_EFLAGS_IF /* eflags - interrupts on */ |
199 | CFI_ADJUST_CFA_OFFSET 8 | 238 | CFI_ADJUST_CFA_OFFSET 8 |
200 | /*CFI_REL_OFFSET rflags,0*/ | 239 | /*CFI_REL_OFFSET rflags,0*/ |
201 | pushq $__KERNEL_CS /* cs */ | 240 | pushq $__KERNEL_CS /* cs */ |
@@ -213,62 +252,184 @@ ENTRY(native_usergs_sysret64) | |||
213 | CFI_ADJUST_CFA_OFFSET -(6*8) | 252 | CFI_ADJUST_CFA_OFFSET -(6*8) |
214 | .endm | 253 | .endm |
215 | 254 | ||
216 | .macro CFI_DEFAULT_STACK start=1 | 255 | /* |
256 | * initial frame state for interrupts (and exceptions without error code) | ||
257 | */ | ||
258 | .macro EMPTY_FRAME start=1 offset=0 | ||
217 | .if \start | 259 | .if \start |
218 | CFI_STARTPROC simple | 260 | CFI_STARTPROC simple |
219 | CFI_SIGNAL_FRAME | 261 | CFI_SIGNAL_FRAME |
220 | CFI_DEF_CFA rsp,SS+8 | 262 | CFI_DEF_CFA rsp,8+\offset |
221 | .else | 263 | .else |
222 | CFI_DEF_CFA_OFFSET SS+8 | 264 | CFI_DEF_CFA_OFFSET 8+\offset |
223 | .endif | 265 | .endif |
224 | CFI_REL_OFFSET r15,R15 | ||
225 | CFI_REL_OFFSET r14,R14 | ||
226 | CFI_REL_OFFSET r13,R13 | ||
227 | CFI_REL_OFFSET r12,R12 | ||
228 | CFI_REL_OFFSET rbp,RBP | ||
229 | CFI_REL_OFFSET rbx,RBX | ||
230 | CFI_REL_OFFSET r11,R11 | ||
231 | CFI_REL_OFFSET r10,R10 | ||
232 | CFI_REL_OFFSET r9,R9 | ||
233 | CFI_REL_OFFSET r8,R8 | ||
234 | CFI_REL_OFFSET rax,RAX | ||
235 | CFI_REL_OFFSET rcx,RCX | ||
236 | CFI_REL_OFFSET rdx,RDX | ||
237 | CFI_REL_OFFSET rsi,RSI | ||
238 | CFI_REL_OFFSET rdi,RDI | ||
239 | CFI_REL_OFFSET rip,RIP | ||
240 | /*CFI_REL_OFFSET cs,CS*/ | ||
241 | /*CFI_REL_OFFSET rflags,EFLAGS*/ | ||
242 | CFI_REL_OFFSET rsp,RSP | ||
243 | /*CFI_REL_OFFSET ss,SS*/ | ||
244 | .endm | 266 | .endm |
267 | |||
268 | /* | ||
269 | * initial frame state for interrupts (and exceptions without error code) | ||
270 | */ | ||
271 | .macro INTR_FRAME start=1 offset=0 | ||
272 | EMPTY_FRAME \start, SS+8+\offset-RIP | ||
273 | /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ | ||
274 | CFI_REL_OFFSET rsp, RSP+\offset-RIP | ||
275 | /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ | ||
276 | /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ | ||
277 | CFI_REL_OFFSET rip, RIP+\offset-RIP | ||
278 | .endm | ||
279 | |||
280 | /* | ||
281 | * initial frame state for exceptions with error code (and interrupts | ||
282 | * with vector already pushed) | ||
283 | */ | ||
284 | .macro XCPT_FRAME start=1 offset=0 | ||
285 | INTR_FRAME \start, RIP+\offset-ORIG_RAX | ||
286 | /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ | ||
287 | .endm | ||
288 | |||
245 | /* | 289 | /* |
246 | * A newly forked process directly context switches into this. | 290 | * frame that enables calling into C. |
247 | */ | 291 | */ |
248 | /* rdi: prev */ | 292 | .macro PARTIAL_FRAME start=1 offset=0 |
293 | XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET | ||
294 | CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET | ||
295 | CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET | ||
296 | CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET | ||
297 | CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET | ||
298 | CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET | ||
299 | CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET | ||
300 | CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET | ||
301 | CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET | ||
302 | CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET | ||
303 | .endm | ||
304 | |||
305 | /* | ||
306 | * frame that enables passing a complete pt_regs to a C function. | ||
307 | */ | ||
308 | .macro DEFAULT_FRAME start=1 offset=0 | ||
309 | PARTIAL_FRAME \start, R11+\offset-R15 | ||
310 | CFI_REL_OFFSET rbx, RBX+\offset | ||
311 | CFI_REL_OFFSET rbp, RBP+\offset | ||
312 | CFI_REL_OFFSET r12, R12+\offset | ||
313 | CFI_REL_OFFSET r13, R13+\offset | ||
314 | CFI_REL_OFFSET r14, R14+\offset | ||
315 | CFI_REL_OFFSET r15, R15+\offset | ||
316 | .endm | ||
317 | |||
318 | /* save partial stack frame */ | ||
319 | ENTRY(save_args) | ||
320 | XCPT_FRAME | ||
321 | cld | ||
322 | movq_cfi rdi, RDI+16-ARGOFFSET | ||
323 | movq_cfi rsi, RSI+16-ARGOFFSET | ||
324 | movq_cfi rdx, RDX+16-ARGOFFSET | ||
325 | movq_cfi rcx, RCX+16-ARGOFFSET | ||
326 | movq_cfi rax, RAX+16-ARGOFFSET | ||
327 | movq_cfi r8, R8+16-ARGOFFSET | ||
328 | movq_cfi r9, R9+16-ARGOFFSET | ||
329 | movq_cfi r10, R10+16-ARGOFFSET | ||
330 | movq_cfi r11, R11+16-ARGOFFSET | ||
331 | |||
332 | leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ | ||
333 | movq_cfi rbp, 8 /* push %rbp */ | ||
334 | leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ | ||
335 | testl $3, CS(%rdi) | ||
336 | je 1f | ||
337 | SWAPGS | ||
338 | /* | ||
339 | * irqcount is used to check if a CPU is already on an interrupt stack | ||
340 | * or not. While this is essentially redundant with preempt_count it is | ||
341 | * a little cheaper to use a separate counter in the PDA (short of | ||
342 | * moving irq_enter into assembly, which would be too much work) | ||
343 | */ | ||
344 | 1: incl %gs:pda_irqcount | ||
345 | jne 2f | ||
346 | popq_cfi %rax /* move return address... */ | ||
347 | mov %gs:pda_irqstackptr,%rsp | ||
348 | EMPTY_FRAME 0 | ||
349 | pushq_cfi %rax /* ... to the new stack */ | ||
350 | /* | ||
351 | * We entered an interrupt context - irqs are off: | ||
352 | */ | ||
353 | 2: TRACE_IRQS_OFF | ||
354 | ret | ||
355 | CFI_ENDPROC | ||
356 | END(save_args) | ||
357 | |||
358 | ENTRY(save_rest) | ||
359 | PARTIAL_FRAME 1 REST_SKIP+8 | ||
360 | movq 5*8+16(%rsp), %r11 /* save return address */ | ||
361 | movq_cfi rbx, RBX+16 | ||
362 | movq_cfi rbp, RBP+16 | ||
363 | movq_cfi r12, R12+16 | ||
364 | movq_cfi r13, R13+16 | ||
365 | movq_cfi r14, R14+16 | ||
366 | movq_cfi r15, R15+16 | ||
367 | movq %r11, 8(%rsp) /* return address */ | ||
368 | FIXUP_TOP_OF_STACK %r11, 16 | ||
369 | ret | ||
370 | CFI_ENDPROC | ||
371 | END(save_rest) | ||
372 | |||
373 | /* save complete stack frame */ | ||
374 | ENTRY(save_paranoid) | ||
375 | XCPT_FRAME 1 RDI+8 | ||
376 | cld | ||
377 | movq_cfi rdi, RDI+8 | ||
378 | movq_cfi rsi, RSI+8 | ||
379 | movq_cfi rdx, RDX+8 | ||
380 | movq_cfi rcx, RCX+8 | ||
381 | movq_cfi rax, RAX+8 | ||
382 | movq_cfi r8, R8+8 | ||
383 | movq_cfi r9, R9+8 | ||
384 | movq_cfi r10, R10+8 | ||
385 | movq_cfi r11, R11+8 | ||
386 | movq_cfi rbx, RBX+8 | ||
387 | movq_cfi rbp, RBP+8 | ||
388 | movq_cfi r12, R12+8 | ||
389 | movq_cfi r13, R13+8 | ||
390 | movq_cfi r14, R14+8 | ||
391 | movq_cfi r15, R15+8 | ||
392 | movl $1,%ebx | ||
393 | movl $MSR_GS_BASE,%ecx | ||
394 | rdmsr | ||
395 | testl %edx,%edx | ||
396 | js 1f /* negative -> in kernel */ | ||
397 | SWAPGS | ||
398 | xorl %ebx,%ebx | ||
399 | 1: ret | ||
400 | CFI_ENDPROC | ||
401 | END(save_paranoid) | ||
402 | |||
403 | /* | ||
404 | * A newly forked process directly context switches into this address. | ||
405 | * | ||
406 | * rdi: prev task we switched from | ||
407 | */ | ||
249 | ENTRY(ret_from_fork) | 408 | ENTRY(ret_from_fork) |
250 | CFI_DEFAULT_STACK | 409 | DEFAULT_FRAME |
410 | |||
251 | push kernel_eflags(%rip) | 411 | push kernel_eflags(%rip) |
252 | CFI_ADJUST_CFA_OFFSET 8 | 412 | CFI_ADJUST_CFA_OFFSET 8 |
253 | popf # reset kernel eflags | 413 | popf # reset kernel eflags |
254 | CFI_ADJUST_CFA_OFFSET -8 | 414 | CFI_ADJUST_CFA_OFFSET -8 |
255 | call schedule_tail | 415 | |
416 | call schedule_tail # rdi: 'prev' task parameter | ||
417 | |||
256 | GET_THREAD_INFO(%rcx) | 418 | GET_THREAD_INFO(%rcx) |
257 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 419 | |
258 | jnz rff_trace | 420 | CFI_REMEMBER_STATE |
259 | rff_action: | ||
260 | RESTORE_REST | 421 | RESTORE_REST |
261 | testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? | 422 | |
423 | testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? | ||
262 | je int_ret_from_sys_call | 424 | je int_ret_from_sys_call |
263 | testl $_TIF_IA32,TI_flags(%rcx) | 425 | |
426 | testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET | ||
264 | jnz int_ret_from_sys_call | 427 | jnz int_ret_from_sys_call |
265 | RESTORE_TOP_OF_STACK %rdi,ARGOFFSET | 428 | |
266 | jmp ret_from_sys_call | 429 | RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET |
267 | rff_trace: | 430 | jmp ret_from_sys_call # go to the SYSRET fastpath |
268 | movq %rsp,%rdi | 431 | |
269 | call syscall_trace_leave | 432 | CFI_RESTORE_STATE |
270 | GET_THREAD_INFO(%rcx) | ||
271 | jmp rff_action | ||
272 | CFI_ENDPROC | 433 | CFI_ENDPROC |
273 | END(ret_from_fork) | 434 | END(ret_from_fork) |
274 | 435 | ||
@@ -278,20 +439,20 @@ END(ret_from_fork) | |||
278 | * SYSCALL does not save anything on the stack and does not change the | 439 | * SYSCALL does not save anything on the stack and does not change the |
279 | * stack pointer. | 440 | * stack pointer. |
280 | */ | 441 | */ |
281 | 442 | ||
282 | /* | 443 | /* |
283 | * Register setup: | 444 | * Register setup: |
284 | * rax system call number | 445 | * rax system call number |
285 | * rdi arg0 | 446 | * rdi arg0 |
286 | * rcx return address for syscall/sysret, C arg3 | 447 | * rcx return address for syscall/sysret, C arg3 |
287 | * rsi arg1 | 448 | * rsi arg1 |
288 | * rdx arg2 | 449 | * rdx arg2 |
289 | * r10 arg3 (--> moved to rcx for C) | 450 | * r10 arg3 (--> moved to rcx for C) |
290 | * r8 arg4 | 451 | * r8 arg4 |
291 | * r9 arg5 | 452 | * r9 arg5 |
292 | * r11 eflags for syscall/sysret, temporary for C | 453 | * r11 eflags for syscall/sysret, temporary for C |
293 | * r12-r15,rbp,rbx saved by C code, not touched. | 454 | * r12-r15,rbp,rbx saved by C code, not touched. |
294 | * | 455 | * |
295 | * Interrupts are off on entry. | 456 | * Interrupts are off on entry. |
296 | * Only called from user space. | 457 | * Only called from user space. |
297 | * | 458 | * |
@@ -301,7 +462,7 @@ END(ret_from_fork) | |||
301 | * When user can change the frames always force IRET. That is because | 462 | * When user can change the frames always force IRET. That is because |
302 | * it deals with uncanonical addresses better. SYSRET has trouble | 463 | * it deals with uncanonical addresses better. SYSRET has trouble |
303 | * with them due to bugs in both AMD and Intel CPUs. | 464 | * with them due to bugs in both AMD and Intel CPUs. |
304 | */ | 465 | */ |
305 | 466 | ||
306 | ENTRY(system_call) | 467 | ENTRY(system_call) |
307 | CFI_STARTPROC simple | 468 | CFI_STARTPROC simple |
@@ -317,7 +478,7 @@ ENTRY(system_call) | |||
317 | */ | 478 | */ |
318 | ENTRY(system_call_after_swapgs) | 479 | ENTRY(system_call_after_swapgs) |
319 | 480 | ||
320 | movq %rsp,%gs:pda_oldrsp | 481 | movq %rsp,%gs:pda_oldrsp |
321 | movq %gs:pda_kernelstack,%rsp | 482 | movq %gs:pda_kernelstack,%rsp |
322 | /* | 483 | /* |
323 | * No need to follow this irqs off/on section - it's straight | 484 | * No need to follow this irqs off/on section - it's straight |
@@ -325,7 +486,7 @@ ENTRY(system_call_after_swapgs) | |||
325 | */ | 486 | */ |
326 | ENABLE_INTERRUPTS(CLBR_NONE) | 487 | ENABLE_INTERRUPTS(CLBR_NONE) |
327 | SAVE_ARGS 8,1 | 488 | SAVE_ARGS 8,1 |
328 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) | 489 | movq %rax,ORIG_RAX-ARGOFFSET(%rsp) |
329 | movq %rcx,RIP-ARGOFFSET(%rsp) | 490 | movq %rcx,RIP-ARGOFFSET(%rsp) |
330 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 491 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
331 | GET_THREAD_INFO(%rcx) | 492 | GET_THREAD_INFO(%rcx) |
@@ -339,19 +500,19 @@ system_call_fastpath: | |||
339 | movq %rax,RAX-ARGOFFSET(%rsp) | 500 | movq %rax,RAX-ARGOFFSET(%rsp) |
340 | /* | 501 | /* |
341 | * Syscall return path ending with SYSRET (fast path) | 502 | * Syscall return path ending with SYSRET (fast path) |
342 | * Has incomplete stack frame and undefined top of stack. | 503 | * Has incomplete stack frame and undefined top of stack. |
343 | */ | 504 | */ |
344 | ret_from_sys_call: | 505 | ret_from_sys_call: |
345 | movl $_TIF_ALLWORK_MASK,%edi | 506 | movl $_TIF_ALLWORK_MASK,%edi |
346 | /* edi: flagmask */ | 507 | /* edi: flagmask */ |
347 | sysret_check: | 508 | sysret_check: |
348 | LOCKDEP_SYS_EXIT | 509 | LOCKDEP_SYS_EXIT |
349 | GET_THREAD_INFO(%rcx) | 510 | GET_THREAD_INFO(%rcx) |
350 | DISABLE_INTERRUPTS(CLBR_NONE) | 511 | DISABLE_INTERRUPTS(CLBR_NONE) |
351 | TRACE_IRQS_OFF | 512 | TRACE_IRQS_OFF |
352 | movl TI_flags(%rcx),%edx | 513 | movl TI_flags(%rcx),%edx |
353 | andl %edi,%edx | 514 | andl %edi,%edx |
354 | jnz sysret_careful | 515 | jnz sysret_careful |
355 | CFI_REMEMBER_STATE | 516 | CFI_REMEMBER_STATE |
356 | /* | 517 | /* |
357 | * sysretq will re-enable interrupts: | 518 | * sysretq will re-enable interrupts: |
@@ -366,7 +527,7 @@ sysret_check: | |||
366 | 527 | ||
367 | CFI_RESTORE_STATE | 528 | CFI_RESTORE_STATE |
368 | /* Handle reschedules */ | 529 | /* Handle reschedules */ |
369 | /* edx: work, edi: workmask */ | 530 | /* edx: work, edi: workmask */ |
370 | sysret_careful: | 531 | sysret_careful: |
371 | bt $TIF_NEED_RESCHED,%edx | 532 | bt $TIF_NEED_RESCHED,%edx |
372 | jnc sysret_signal | 533 | jnc sysret_signal |
@@ -379,7 +540,7 @@ sysret_careful: | |||
379 | CFI_ADJUST_CFA_OFFSET -8 | 540 | CFI_ADJUST_CFA_OFFSET -8 |
380 | jmp sysret_check | 541 | jmp sysret_check |
381 | 542 | ||
382 | /* Handle a signal */ | 543 | /* Handle a signal */ |
383 | sysret_signal: | 544 | sysret_signal: |
384 | TRACE_IRQS_ON | 545 | TRACE_IRQS_ON |
385 | ENABLE_INTERRUPTS(CLBR_NONE) | 546 | ENABLE_INTERRUPTS(CLBR_NONE) |
@@ -388,17 +549,20 @@ sysret_signal: | |||
388 | jc sysret_audit | 549 | jc sysret_audit |
389 | #endif | 550 | #endif |
390 | /* edx: work flags (arg3) */ | 551 | /* edx: work flags (arg3) */ |
391 | leaq do_notify_resume(%rip),%rax | ||
392 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 552 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
393 | xorl %esi,%esi # oldset -> arg2 | 553 | xorl %esi,%esi # oldset -> arg2 |
394 | call ptregscall_common | 554 | SAVE_REST |
555 | FIXUP_TOP_OF_STACK %r11 | ||
556 | call do_notify_resume | ||
557 | RESTORE_TOP_OF_STACK %r11 | ||
558 | RESTORE_REST | ||
395 | movl $_TIF_WORK_MASK,%edi | 559 | movl $_TIF_WORK_MASK,%edi |
396 | /* Use IRET because user could have changed frame. This | 560 | /* Use IRET because user could have changed frame. This |
397 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 561 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
398 | DISABLE_INTERRUPTS(CLBR_NONE) | 562 | DISABLE_INTERRUPTS(CLBR_NONE) |
399 | TRACE_IRQS_OFF | 563 | TRACE_IRQS_OFF |
400 | jmp int_with_check | 564 | jmp int_with_check |
401 | 565 | ||
402 | badsys: | 566 | badsys: |
403 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 567 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
404 | jmp ret_from_sys_call | 568 | jmp ret_from_sys_call |
@@ -437,7 +601,7 @@ sysret_audit: | |||
437 | #endif /* CONFIG_AUDITSYSCALL */ | 601 | #endif /* CONFIG_AUDITSYSCALL */ |
438 | 602 | ||
439 | /* Do syscall tracing */ | 603 | /* Do syscall tracing */ |
440 | tracesys: | 604 | tracesys: |
441 | #ifdef CONFIG_AUDITSYSCALL | 605 | #ifdef CONFIG_AUDITSYSCALL |
442 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | 606 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) |
443 | jz auditsys | 607 | jz auditsys |
@@ -460,8 +624,8 @@ tracesys: | |||
460 | call *sys_call_table(,%rax,8) | 624 | call *sys_call_table(,%rax,8) |
461 | movq %rax,RAX-ARGOFFSET(%rsp) | 625 | movq %rax,RAX-ARGOFFSET(%rsp) |
462 | /* Use IRET because user could have changed frame */ | 626 | /* Use IRET because user could have changed frame */ |
463 | 627 | ||
464 | /* | 628 | /* |
465 | * Syscall return path ending with IRET. | 629 | * Syscall return path ending with IRET. |
466 | * Has correct top of stack, but partial stack frame. | 630 | * Has correct top of stack, but partial stack frame. |
467 | */ | 631 | */ |
@@ -505,18 +669,18 @@ int_very_careful: | |||
505 | TRACE_IRQS_ON | 669 | TRACE_IRQS_ON |
506 | ENABLE_INTERRUPTS(CLBR_NONE) | 670 | ENABLE_INTERRUPTS(CLBR_NONE) |
507 | SAVE_REST | 671 | SAVE_REST |
508 | /* Check for syscall exit trace */ | 672 | /* Check for syscall exit trace */ |
509 | testl $_TIF_WORK_SYSCALL_EXIT,%edx | 673 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
510 | jz int_signal | 674 | jz int_signal |
511 | pushq %rdi | 675 | pushq %rdi |
512 | CFI_ADJUST_CFA_OFFSET 8 | 676 | CFI_ADJUST_CFA_OFFSET 8 |
513 | leaq 8(%rsp),%rdi # &ptregs -> arg1 | 677 | leaq 8(%rsp),%rdi # &ptregs -> arg1 |
514 | call syscall_trace_leave | 678 | call syscall_trace_leave |
515 | popq %rdi | 679 | popq %rdi |
516 | CFI_ADJUST_CFA_OFFSET -8 | 680 | CFI_ADJUST_CFA_OFFSET -8 |
517 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi | 681 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
518 | jmp int_restore_rest | 682 | jmp int_restore_rest |
519 | 683 | ||
520 | int_signal: | 684 | int_signal: |
521 | testl $_TIF_DO_NOTIFY_MASK,%edx | 685 | testl $_TIF_DO_NOTIFY_MASK,%edx |
522 | jz 1f | 686 | jz 1f |
@@ -531,22 +695,24 @@ int_restore_rest: | |||
531 | jmp int_with_check | 695 | jmp int_with_check |
532 | CFI_ENDPROC | 696 | CFI_ENDPROC |
533 | END(system_call) | 697 | END(system_call) |
534 | 698 | ||
535 | /* | 699 | /* |
536 | * Certain special system calls that need to save a complete full stack frame. | 700 | * Certain special system calls that need to save a complete full stack frame. |
537 | */ | 701 | */ |
538 | |||
539 | .macro PTREGSCALL label,func,arg | 702 | .macro PTREGSCALL label,func,arg |
540 | .globl \label | 703 | ENTRY(\label) |
541 | \label: | 704 | PARTIAL_FRAME 1 8 /* offset 8: return address */ |
542 | leaq \func(%rip),%rax | 705 | subq $REST_SKIP, %rsp |
543 | leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ | 706 | CFI_ADJUST_CFA_OFFSET REST_SKIP |
544 | jmp ptregscall_common | 707 | call save_rest |
708 | DEFAULT_FRAME 0 8 /* offset 8: return address */ | ||
709 | leaq 8(%rsp), \arg /* pt_regs pointer */ | ||
710 | call \func | ||
711 | jmp ptregscall_common | ||
712 | CFI_ENDPROC | ||
545 | END(\label) | 713 | END(\label) |
546 | .endm | 714 | .endm |
547 | 715 | ||
548 | CFI_STARTPROC | ||
549 | |||
550 | PTREGSCALL stub_clone, sys_clone, %r8 | 716 | PTREGSCALL stub_clone, sys_clone, %r8 |
551 | PTREGSCALL stub_fork, sys_fork, %rdi | 717 | PTREGSCALL stub_fork, sys_fork, %rdi |
552 | PTREGSCALL stub_vfork, sys_vfork, %rdi | 718 | PTREGSCALL stub_vfork, sys_vfork, %rdi |
@@ -554,25 +720,18 @@ END(\label) | |||
554 | PTREGSCALL stub_iopl, sys_iopl, %rsi | 720 | PTREGSCALL stub_iopl, sys_iopl, %rsi |
555 | 721 | ||
556 | ENTRY(ptregscall_common) | 722 | ENTRY(ptregscall_common) |
557 | popq %r11 | 723 | DEFAULT_FRAME 1 8 /* offset 8: return address */ |
558 | CFI_ADJUST_CFA_OFFSET -8 | 724 | RESTORE_TOP_OF_STACK %r11, 8 |
559 | CFI_REGISTER rip, r11 | 725 | movq_cfi_restore R15+8, r15 |
560 | SAVE_REST | 726 | movq_cfi_restore R14+8, r14 |
561 | movq %r11, %r15 | 727 | movq_cfi_restore R13+8, r13 |
562 | CFI_REGISTER rip, r15 | 728 | movq_cfi_restore R12+8, r12 |
563 | FIXUP_TOP_OF_STACK %r11 | 729 | movq_cfi_restore RBP+8, rbp |
564 | call *%rax | 730 | movq_cfi_restore RBX+8, rbx |
565 | RESTORE_TOP_OF_STACK %r11 | 731 | ret $REST_SKIP /* pop extended registers */ |
566 | movq %r15, %r11 | ||
567 | CFI_REGISTER rip, r11 | ||
568 | RESTORE_REST | ||
569 | pushq %r11 | ||
570 | CFI_ADJUST_CFA_OFFSET 8 | ||
571 | CFI_REL_OFFSET rip, 0 | ||
572 | ret | ||
573 | CFI_ENDPROC | 732 | CFI_ENDPROC |
574 | END(ptregscall_common) | 733 | END(ptregscall_common) |
575 | 734 | ||
576 | ENTRY(stub_execve) | 735 | ENTRY(stub_execve) |
577 | CFI_STARTPROC | 736 | CFI_STARTPROC |
578 | popq %r11 | 737 | popq %r11 |
@@ -588,11 +747,11 @@ ENTRY(stub_execve) | |||
588 | jmp int_ret_from_sys_call | 747 | jmp int_ret_from_sys_call |
589 | CFI_ENDPROC | 748 | CFI_ENDPROC |
590 | END(stub_execve) | 749 | END(stub_execve) |
591 | 750 | ||
592 | /* | 751 | /* |
593 | * sigreturn is special because it needs to restore all registers on return. | 752 | * sigreturn is special because it needs to restore all registers on return. |
594 | * This cannot be done with SYSRET, so use the IRET return path instead. | 753 | * This cannot be done with SYSRET, so use the IRET return path instead. |
595 | */ | 754 | */ |
596 | ENTRY(stub_rt_sigreturn) | 755 | ENTRY(stub_rt_sigreturn) |
597 | CFI_STARTPROC | 756 | CFI_STARTPROC |
598 | addq $8, %rsp | 757 | addq $8, %rsp |
@@ -608,70 +767,70 @@ ENTRY(stub_rt_sigreturn) | |||
608 | END(stub_rt_sigreturn) | 767 | END(stub_rt_sigreturn) |
609 | 768 | ||
610 | /* | 769 | /* |
611 | * initial frame state for interrupts and exceptions | 770 | * Build the entry stubs and pointer table with some assembler magic. |
771 | * We pack 7 stubs into a single 32-byte chunk, which will fit in a | ||
772 | * single cache line on all modern x86 implementations. | ||
612 | */ | 773 | */ |
613 | .macro _frame ref | 774 | .section .init.rodata,"a" |
614 | CFI_STARTPROC simple | 775 | ENTRY(interrupt) |
615 | CFI_SIGNAL_FRAME | 776 | .text |
616 | CFI_DEF_CFA rsp,SS+8-\ref | 777 | .p2align 5 |
617 | /*CFI_REL_OFFSET ss,SS-\ref*/ | 778 | .p2align CONFIG_X86_L1_CACHE_SHIFT |
618 | CFI_REL_OFFSET rsp,RSP-\ref | 779 | ENTRY(irq_entries_start) |
619 | /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ | 780 | INTR_FRAME |
620 | /*CFI_REL_OFFSET cs,CS-\ref*/ | 781 | vector=FIRST_EXTERNAL_VECTOR |
621 | CFI_REL_OFFSET rip,RIP-\ref | 782 | .rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 |
622 | .endm | 783 | .balign 32 |
784 | .rept 7 | ||
785 | .if vector < NR_VECTORS | ||
786 | .if vector <> FIRST_EXTERNAL_VECTOR | ||
787 | CFI_ADJUST_CFA_OFFSET -8 | ||
788 | .endif | ||
789 | 1: pushq $(~vector+0x80) /* Note: always in signed byte range */ | ||
790 | CFI_ADJUST_CFA_OFFSET 8 | ||
791 | .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 | ||
792 | jmp 2f | ||
793 | .endif | ||
794 | .previous | ||
795 | .quad 1b | ||
796 | .text | ||
797 | vector=vector+1 | ||
798 | .endif | ||
799 | .endr | ||
800 | 2: jmp common_interrupt | ||
801 | .endr | ||
802 | CFI_ENDPROC | ||
803 | END(irq_entries_start) | ||
623 | 804 | ||
624 | /* initial frame state for interrupts (and exceptions without error code) */ | 805 | .previous |
625 | #define INTR_FRAME _frame RIP | 806 | END(interrupt) |
626 | /* initial frame state for exceptions with error code (and interrupts with | 807 | .previous |
627 | vector already pushed) */ | ||
628 | #define XCPT_FRAME _frame ORIG_RAX | ||
629 | 808 | ||
630 | /* | 809 | /* |
631 | * Interrupt entry/exit. | 810 | * Interrupt entry/exit. |
632 | * | 811 | * |
633 | * Interrupt entry points save only callee clobbered registers in fast path. | 812 | * Interrupt entry points save only callee clobbered registers in fast path. |
634 | * | 813 | * |
635 | * Entry runs with interrupts off. | 814 | * Entry runs with interrupts off. |
636 | */ | 815 | */ |
637 | 816 | ||
638 | /* 0(%rsp): interrupt number */ | 817 | /* 0(%rsp): ~(interrupt number) */ |
639 | .macro interrupt func | 818 | .macro interrupt func |
640 | cld | 819 | subq $10*8, %rsp |
641 | SAVE_ARGS | 820 | CFI_ADJUST_CFA_OFFSET 10*8 |
642 | leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler | 821 | call save_args |
643 | pushq %rbp | 822 | PARTIAL_FRAME 0 |
644 | /* | ||
645 | * Save rbp twice: One is for marking the stack frame, as usual, and the | ||
646 | * other, to fill pt_regs properly. This is because bx comes right | ||
647 | * before the last saved register in that structure, and not bp. If the | ||
648 | * base pointer were in the place bx is today, this would not be needed. | ||
649 | */ | ||
650 | movq %rbp, -8(%rsp) | ||
651 | CFI_ADJUST_CFA_OFFSET 8 | ||
652 | CFI_REL_OFFSET rbp, 0 | ||
653 | movq %rsp,%rbp | ||
654 | CFI_DEF_CFA_REGISTER rbp | ||
655 | testl $3,CS(%rdi) | ||
656 | je 1f | ||
657 | SWAPGS | ||
658 | /* irqcount is used to check if a CPU is already on an interrupt | ||
659 | stack or not. While this is essentially redundant with preempt_count | ||
660 | it is a little cheaper to use a separate counter in the PDA | ||
661 | (short of moving irq_enter into assembly, which would be too | ||
662 | much work) */ | ||
663 | 1: incl %gs:pda_irqcount | ||
664 | cmoveq %gs:pda_irqstackptr,%rsp | ||
665 | push %rbp # backlink for old unwinder | ||
666 | /* | ||
667 | * We entered an interrupt context - irqs are off: | ||
668 | */ | ||
669 | TRACE_IRQS_OFF | ||
670 | call \func | 823 | call \func |
671 | .endm | 824 | .endm |
672 | 825 | ||
673 | ENTRY(common_interrupt) | 826 | /* |
827 | * The interrupt stubs push (~vector+0x80) onto the stack and | ||
828 | * then jump to common_interrupt. | ||
829 | */ | ||
830 | .p2align CONFIG_X86_L1_CACHE_SHIFT | ||
831 | common_interrupt: | ||
674 | XCPT_FRAME | 832 | XCPT_FRAME |
833 | addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ | ||
675 | interrupt do_IRQ | 834 | interrupt do_IRQ |
676 | /* 0(%rsp): oldrsp-ARGOFFSET */ | 835 | /* 0(%rsp): oldrsp-ARGOFFSET */ |
677 | ret_from_intr: | 836 | ret_from_intr: |
@@ -685,12 +844,12 @@ exit_intr: | |||
685 | GET_THREAD_INFO(%rcx) | 844 | GET_THREAD_INFO(%rcx) |
686 | testl $3,CS-ARGOFFSET(%rsp) | 845 | testl $3,CS-ARGOFFSET(%rsp) |
687 | je retint_kernel | 846 | je retint_kernel |
688 | 847 | ||
689 | /* Interrupt came from user space */ | 848 | /* Interrupt came from user space */ |
690 | /* | 849 | /* |
691 | * Has a correct top of stack, but a partial stack frame | 850 | * Has a correct top of stack, but a partial stack frame |
692 | * %rcx: thread info. Interrupts off. | 851 | * %rcx: thread info. Interrupts off. |
693 | */ | 852 | */ |
694 | retint_with_reschedule: | 853 | retint_with_reschedule: |
695 | movl $_TIF_WORK_MASK,%edi | 854 | movl $_TIF_WORK_MASK,%edi |
696 | retint_check: | 855 | retint_check: |
@@ -763,20 +922,20 @@ retint_careful: | |||
763 | pushq %rdi | 922 | pushq %rdi |
764 | CFI_ADJUST_CFA_OFFSET 8 | 923 | CFI_ADJUST_CFA_OFFSET 8 |
765 | call schedule | 924 | call schedule |
766 | popq %rdi | 925 | popq %rdi |
767 | CFI_ADJUST_CFA_OFFSET -8 | 926 | CFI_ADJUST_CFA_OFFSET -8 |
768 | GET_THREAD_INFO(%rcx) | 927 | GET_THREAD_INFO(%rcx) |
769 | DISABLE_INTERRUPTS(CLBR_NONE) | 928 | DISABLE_INTERRUPTS(CLBR_NONE) |
770 | TRACE_IRQS_OFF | 929 | TRACE_IRQS_OFF |
771 | jmp retint_check | 930 | jmp retint_check |
772 | 931 | ||
773 | retint_signal: | 932 | retint_signal: |
774 | testl $_TIF_DO_NOTIFY_MASK,%edx | 933 | testl $_TIF_DO_NOTIFY_MASK,%edx |
775 | jz retint_swapgs | 934 | jz retint_swapgs |
776 | TRACE_IRQS_ON | 935 | TRACE_IRQS_ON |
777 | ENABLE_INTERRUPTS(CLBR_NONE) | 936 | ENABLE_INTERRUPTS(CLBR_NONE) |
778 | SAVE_REST | 937 | SAVE_REST |
779 | movq $-1,ORIG_RAX(%rsp) | 938 | movq $-1,ORIG_RAX(%rsp) |
780 | xorl %esi,%esi # oldset | 939 | xorl %esi,%esi # oldset |
781 | movq %rsp,%rdi # &pt_regs | 940 | movq %rsp,%rdi # &pt_regs |
782 | call do_notify_resume | 941 | call do_notify_resume |
@@ -798,324 +957,211 @@ ENTRY(retint_kernel) | |||
798 | jnc retint_restore_args | 957 | jnc retint_restore_args |
799 | call preempt_schedule_irq | 958 | call preempt_schedule_irq |
800 | jmp exit_intr | 959 | jmp exit_intr |
801 | #endif | 960 | #endif |
802 | 961 | ||
803 | CFI_ENDPROC | 962 | CFI_ENDPROC |
804 | END(common_interrupt) | 963 | END(common_interrupt) |
805 | 964 | ||
806 | /* | 965 | /* |
807 | * APIC interrupts. | 966 | * APIC interrupts. |
808 | */ | 967 | */ |
809 | .macro apicinterrupt num,func | 968 | .macro apicinterrupt num sym do_sym |
969 | ENTRY(\sym) | ||
810 | INTR_FRAME | 970 | INTR_FRAME |
811 | pushq $~(\num) | 971 | pushq $~(\num) |
812 | CFI_ADJUST_CFA_OFFSET 8 | 972 | CFI_ADJUST_CFA_OFFSET 8 |
813 | interrupt \func | 973 | interrupt \do_sym |
814 | jmp ret_from_intr | 974 | jmp ret_from_intr |
815 | CFI_ENDPROC | 975 | CFI_ENDPROC |
816 | .endm | 976 | END(\sym) |
817 | 977 | .endm | |
818 | ENTRY(thermal_interrupt) | ||
819 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | ||
820 | END(thermal_interrupt) | ||
821 | |||
822 | ENTRY(threshold_interrupt) | ||
823 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | ||
824 | END(threshold_interrupt) | ||
825 | |||
826 | #ifdef CONFIG_SMP | ||
827 | ENTRY(reschedule_interrupt) | ||
828 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | ||
829 | END(reschedule_interrupt) | ||
830 | |||
831 | .macro INVALIDATE_ENTRY num | ||
832 | ENTRY(invalidate_interrupt\num) | ||
833 | apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt | ||
834 | END(invalidate_interrupt\num) | ||
835 | .endm | ||
836 | 978 | ||
837 | INVALIDATE_ENTRY 0 | 979 | #ifdef CONFIG_SMP |
838 | INVALIDATE_ENTRY 1 | 980 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ |
839 | INVALIDATE_ENTRY 2 | 981 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
840 | INVALIDATE_ENTRY 3 | ||
841 | INVALIDATE_ENTRY 4 | ||
842 | INVALIDATE_ENTRY 5 | ||
843 | INVALIDATE_ENTRY 6 | ||
844 | INVALIDATE_ENTRY 7 | ||
845 | |||
846 | ENTRY(call_function_interrupt) | ||
847 | apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt | ||
848 | END(call_function_interrupt) | ||
849 | ENTRY(call_function_single_interrupt) | ||
850 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt | ||
851 | END(call_function_single_interrupt) | ||
852 | ENTRY(irq_move_cleanup_interrupt) | ||
853 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt | ||
854 | END(irq_move_cleanup_interrupt) | ||
855 | #endif | 982 | #endif |
856 | 983 | ||
857 | ENTRY(apic_timer_interrupt) | 984 | apicinterrupt UV_BAU_MESSAGE \ |
858 | apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt | 985 | uv_bau_message_intr1 uv_bau_message_interrupt |
859 | END(apic_timer_interrupt) | 986 | apicinterrupt LOCAL_TIMER_VECTOR \ |
987 | apic_timer_interrupt smp_apic_timer_interrupt | ||
988 | |||
989 | #ifdef CONFIG_SMP | ||
990 | apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ | ||
991 | invalidate_interrupt0 smp_invalidate_interrupt | ||
992 | apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ | ||
993 | invalidate_interrupt1 smp_invalidate_interrupt | ||
994 | apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ | ||
995 | invalidate_interrupt2 smp_invalidate_interrupt | ||
996 | apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ | ||
997 | invalidate_interrupt3 smp_invalidate_interrupt | ||
998 | apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ | ||
999 | invalidate_interrupt4 smp_invalidate_interrupt | ||
1000 | apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ | ||
1001 | invalidate_interrupt5 smp_invalidate_interrupt | ||
1002 | apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ | ||
1003 | invalidate_interrupt6 smp_invalidate_interrupt | ||
1004 | apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ | ||
1005 | invalidate_interrupt7 smp_invalidate_interrupt | ||
1006 | #endif | ||
860 | 1007 | ||
861 | ENTRY(uv_bau_message_intr1) | 1008 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
862 | apicinterrupt 220,uv_bau_message_interrupt | 1009 | threshold_interrupt mce_threshold_interrupt |
863 | END(uv_bau_message_intr1) | 1010 | apicinterrupt THERMAL_APIC_VECTOR \ |
1011 | thermal_interrupt smp_thermal_interrupt | ||
1012 | |||
1013 | #ifdef CONFIG_SMP | ||
1014 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | ||
1015 | call_function_single_interrupt smp_call_function_single_interrupt | ||
1016 | apicinterrupt CALL_FUNCTION_VECTOR \ | ||
1017 | call_function_interrupt smp_call_function_interrupt | ||
1018 | apicinterrupt RESCHEDULE_VECTOR \ | ||
1019 | reschedule_interrupt smp_reschedule_interrupt | ||
1020 | #endif | ||
864 | 1021 | ||
865 | ENTRY(error_interrupt) | 1022 | apicinterrupt ERROR_APIC_VECTOR \ |
866 | apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt | 1023 | error_interrupt smp_error_interrupt |
867 | END(error_interrupt) | 1024 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1025 | spurious_interrupt smp_spurious_interrupt | ||
868 | 1026 | ||
869 | ENTRY(spurious_interrupt) | ||
870 | apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt | ||
871 | END(spurious_interrupt) | ||
872 | |||
873 | /* | 1027 | /* |
874 | * Exception entry points. | 1028 | * Exception entry points. |
875 | */ | 1029 | */ |
876 | .macro zeroentry sym | 1030 | .macro zeroentry sym do_sym |
1031 | ENTRY(\sym) | ||
877 | INTR_FRAME | 1032 | INTR_FRAME |
878 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1033 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
879 | pushq $0 /* push error code/oldrax */ | 1034 | pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ |
880 | CFI_ADJUST_CFA_OFFSET 8 | 1035 | subq $15*8,%rsp |
881 | pushq %rax /* push real oldrax to the rdi slot */ | 1036 | CFI_ADJUST_CFA_OFFSET 15*8 |
882 | CFI_ADJUST_CFA_OFFSET 8 | 1037 | call error_entry |
883 | CFI_REL_OFFSET rax,0 | 1038 | DEFAULT_FRAME 0 |
884 | leaq \sym(%rip),%rax | 1039 | movq %rsp,%rdi /* pt_regs pointer */ |
885 | jmp error_entry | 1040 | xorl %esi,%esi /* no error code */ |
1041 | call \do_sym | ||
1042 | jmp error_exit /* %ebx: no swapgs flag */ | ||
886 | CFI_ENDPROC | 1043 | CFI_ENDPROC |
887 | .endm | 1044 | END(\sym) |
1045 | .endm | ||
888 | 1046 | ||
889 | .macro errorentry sym | 1047 | .macro paranoidzeroentry sym do_sym |
890 | XCPT_FRAME | 1048 | ENTRY(\sym) |
1049 | INTR_FRAME | ||
891 | PARAVIRT_ADJUST_EXCEPTION_FRAME | 1050 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
892 | pushq %rax | 1051 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
893 | CFI_ADJUST_CFA_OFFSET 8 | 1052 | CFI_ADJUST_CFA_OFFSET 8 |
894 | CFI_REL_OFFSET rax,0 | 1053 | subq $15*8, %rsp |
895 | leaq \sym(%rip),%rax | 1054 | call save_paranoid |
896 | jmp error_entry | 1055 | TRACE_IRQS_OFF |
1056 | movq %rsp,%rdi /* pt_regs pointer */ | ||
1057 | xorl %esi,%esi /* no error code */ | ||
1058 | call \do_sym | ||
1059 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
897 | CFI_ENDPROC | 1060 | CFI_ENDPROC |
898 | .endm | 1061 | END(\sym) |
1062 | .endm | ||
899 | 1063 | ||
900 | /* error code is on the stack already */ | 1064 | .macro paranoidzeroentry_ist sym do_sym ist |
901 | /* handle NMI like exceptions that can happen everywhere */ | 1065 | ENTRY(\sym) |
902 | .macro paranoidentry sym, ist=0, irqtrace=1 | 1066 | INTR_FRAME |
903 | SAVE_ALL | 1067 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
904 | cld | 1068 | pushq $-1 /* ORIG_RAX: no syscall to restart */ |
905 | movl $1,%ebx | 1069 | CFI_ADJUST_CFA_OFFSET 8 |
906 | movl $MSR_GS_BASE,%ecx | 1070 | subq $15*8, %rsp |
907 | rdmsr | 1071 | call save_paranoid |
908 | testl %edx,%edx | ||
909 | js 1f | ||
910 | SWAPGS | ||
911 | xorl %ebx,%ebx | ||
912 | 1: | ||
913 | .if \ist | ||
914 | movq %gs:pda_data_offset, %rbp | ||
915 | .endif | ||
916 | .if \irqtrace | ||
917 | TRACE_IRQS_OFF | ||
918 | .endif | ||
919 | movq %rsp,%rdi | ||
920 | movq ORIG_RAX(%rsp),%rsi | ||
921 | movq $-1,ORIG_RAX(%rsp) | ||
922 | .if \ist | ||
923 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
924 | .endif | ||
925 | call \sym | ||
926 | .if \ist | ||
927 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
928 | .endif | ||
929 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
930 | .if \irqtrace | ||
931 | TRACE_IRQS_OFF | 1072 | TRACE_IRQS_OFF |
932 | .endif | 1073 | movq %rsp,%rdi /* pt_regs pointer */ |
933 | .endm | 1074 | xorl %esi,%esi /* no error code */ |
1075 | movq %gs:pda_data_offset, %rbp | ||
1076 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
1077 | call \do_sym | ||
1078 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
1079 | jmp paranoid_exit /* %ebx: no swapgs flag */ | ||
1080 | CFI_ENDPROC | ||
1081 | END(\sym) | ||
1082 | .endm | ||
934 | 1083 | ||
935 | /* | 1084 | .macro errorentry sym do_sym |
936 | * "Paranoid" exit path from exception stack. | 1085 | ENTRY(\sym) |
937 | * Paranoid because this is used by NMIs and cannot take | 1086 | XCPT_FRAME |
938 | * any kernel state for granted. | 1087 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
939 | * We don't do kernel preemption checks here, because only | 1088 | subq $15*8,%rsp |
940 | * NMI should be common and it does not enable IRQs and | 1089 | CFI_ADJUST_CFA_OFFSET 15*8 |
941 | * cannot get reschedule ticks. | 1090 | call error_entry |
942 | * | 1091 | DEFAULT_FRAME 0 |
943 | * "trace" is 0 for the NMI handler only, because irq-tracing | 1092 | movq %rsp,%rdi /* pt_regs pointer */ |
944 | * is fundamentally NMI-unsafe. (we cannot change the soft and | 1093 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
945 | * hard flags at once, atomically) | 1094 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
946 | */ | 1095 | call \do_sym |
947 | .macro paranoidexit trace=1 | 1096 | jmp error_exit /* %ebx: no swapgs flag */ |
948 | /* ebx: no swapgs flag */ | ||
949 | paranoid_exit\trace: | ||
950 | testl %ebx,%ebx /* swapgs needed? */ | ||
951 | jnz paranoid_restore\trace | ||
952 | testl $3,CS(%rsp) | ||
953 | jnz paranoid_userspace\trace | ||
954 | paranoid_swapgs\trace: | ||
955 | .if \trace | ||
956 | TRACE_IRQS_IRETQ 0 | ||
957 | .endif | ||
958 | SWAPGS_UNSAFE_STACK | ||
959 | paranoid_restore\trace: | ||
960 | RESTORE_ALL 8 | ||
961 | jmp irq_return | ||
962 | paranoid_userspace\trace: | ||
963 | GET_THREAD_INFO(%rcx) | ||
964 | movl TI_flags(%rcx),%ebx | ||
965 | andl $_TIF_WORK_MASK,%ebx | ||
966 | jz paranoid_swapgs\trace | ||
967 | movq %rsp,%rdi /* &pt_regs */ | ||
968 | call sync_regs | ||
969 | movq %rax,%rsp /* switch stack for scheduling */ | ||
970 | testl $_TIF_NEED_RESCHED,%ebx | ||
971 | jnz paranoid_schedule\trace | ||
972 | movl %ebx,%edx /* arg3: thread flags */ | ||
973 | .if \trace | ||
974 | TRACE_IRQS_ON | ||
975 | .endif | ||
976 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
977 | xorl %esi,%esi /* arg2: oldset */ | ||
978 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
979 | call do_notify_resume | ||
980 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
981 | .if \trace | ||
982 | TRACE_IRQS_OFF | ||
983 | .endif | ||
984 | jmp paranoid_userspace\trace | ||
985 | paranoid_schedule\trace: | ||
986 | .if \trace | ||
987 | TRACE_IRQS_ON | ||
988 | .endif | ||
989 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
990 | call schedule | ||
991 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
992 | .if \trace | ||
993 | TRACE_IRQS_OFF | ||
994 | .endif | ||
995 | jmp paranoid_userspace\trace | ||
996 | CFI_ENDPROC | 1097 | CFI_ENDPROC |
997 | .endm | 1098 | END(\sym) |
1099 | .endm | ||
998 | 1100 | ||
999 | /* | 1101 | /* error code is on the stack already */ |
1000 | * Exception entry point. This expects an error code/orig_rax on the stack | 1102 | .macro paranoiderrorentry sym do_sym |
1001 | * and the exception handler in %rax. | 1103 | ENTRY(\sym) |
1002 | */ | 1104 | XCPT_FRAME |
1003 | KPROBE_ENTRY(error_entry) | 1105 | PARAVIRT_ADJUST_EXCEPTION_FRAME |
1004 | _frame RDI | 1106 | subq $15*8,%rsp |
1005 | CFI_REL_OFFSET rax,0 | 1107 | CFI_ADJUST_CFA_OFFSET 15*8 |
1006 | /* rdi slot contains rax, oldrax contains error code */ | 1108 | call save_paranoid |
1007 | cld | 1109 | DEFAULT_FRAME 0 |
1008 | subq $14*8,%rsp | ||
1009 | CFI_ADJUST_CFA_OFFSET (14*8) | ||
1010 | movq %rsi,13*8(%rsp) | ||
1011 | CFI_REL_OFFSET rsi,RSI | ||
1012 | movq 14*8(%rsp),%rsi /* load rax from rdi slot */ | ||
1013 | CFI_REGISTER rax,rsi | ||
1014 | movq %rdx,12*8(%rsp) | ||
1015 | CFI_REL_OFFSET rdx,RDX | ||
1016 | movq %rcx,11*8(%rsp) | ||
1017 | CFI_REL_OFFSET rcx,RCX | ||
1018 | movq %rsi,10*8(%rsp) /* store rax */ | ||
1019 | CFI_REL_OFFSET rax,RAX | ||
1020 | movq %r8, 9*8(%rsp) | ||
1021 | CFI_REL_OFFSET r8,R8 | ||
1022 | movq %r9, 8*8(%rsp) | ||
1023 | CFI_REL_OFFSET r9,R9 | ||
1024 | movq %r10,7*8(%rsp) | ||
1025 | CFI_REL_OFFSET r10,R10 | ||
1026 | movq %r11,6*8(%rsp) | ||
1027 | CFI_REL_OFFSET r11,R11 | ||
1028 | movq %rbx,5*8(%rsp) | ||
1029 | CFI_REL_OFFSET rbx,RBX | ||
1030 | movq %rbp,4*8(%rsp) | ||
1031 | CFI_REL_OFFSET rbp,RBP | ||
1032 | movq %r12,3*8(%rsp) | ||
1033 | CFI_REL_OFFSET r12,R12 | ||
1034 | movq %r13,2*8(%rsp) | ||
1035 | CFI_REL_OFFSET r13,R13 | ||
1036 | movq %r14,1*8(%rsp) | ||
1037 | CFI_REL_OFFSET r14,R14 | ||
1038 | movq %r15,(%rsp) | ||
1039 | CFI_REL_OFFSET r15,R15 | ||
1040 | xorl %ebx,%ebx | ||
1041 | testl $3,CS(%rsp) | ||
1042 | je error_kernelspace | ||
1043 | error_swapgs: | ||
1044 | SWAPGS | ||
1045 | error_sti: | ||
1046 | TRACE_IRQS_OFF | ||
1047 | movq %rdi,RDI(%rsp) | ||
1048 | CFI_REL_OFFSET rdi,RDI | ||
1049 | movq %rsp,%rdi | ||
1050 | movq ORIG_RAX(%rsp),%rsi /* get error code */ | ||
1051 | movq $-1,ORIG_RAX(%rsp) | ||
1052 | call *%rax | ||
1053 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
1054 | error_exit: | ||
1055 | movl %ebx,%eax | ||
1056 | RESTORE_REST | ||
1057 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1058 | TRACE_IRQS_OFF | 1110 | TRACE_IRQS_OFF |
1059 | GET_THREAD_INFO(%rcx) | 1111 | movq %rsp,%rdi /* pt_regs pointer */ |
1060 | testl %eax,%eax | 1112 | movq ORIG_RAX(%rsp),%rsi /* get error code */ |
1061 | jne retint_kernel | 1113 | movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ |
1062 | LOCKDEP_SYS_EXIT_IRQ | 1114 | call \do_sym |
1063 | movl TI_flags(%rcx),%edx | 1115 | jmp paranoid_exit /* %ebx: no swapgs flag */ |
1064 | movl $_TIF_WORK_MASK,%edi | ||
1065 | andl %edi,%edx | ||
1066 | jnz retint_careful | ||
1067 | jmp retint_swapgs | ||
1068 | CFI_ENDPROC | 1116 | CFI_ENDPROC |
1117 | END(\sym) | ||
1118 | .endm | ||
1069 | 1119 | ||
1070 | error_kernelspace: | 1120 | zeroentry divide_error do_divide_error |
1071 | incl %ebx | 1121 | zeroentry overflow do_overflow |
1072 | /* There are two places in the kernel that can potentially fault with | 1122 | zeroentry bounds do_bounds |
1073 | usergs. Handle them here. The exception handlers after | 1123 | zeroentry invalid_op do_invalid_op |
1074 | iret run with kernel gs again, so don't set the user space flag. | 1124 | zeroentry device_not_available do_device_not_available |
1075 | B stepping K8s sometimes report an truncated RIP for IRET | 1125 | paranoiderrorentry double_fault do_double_fault |
1076 | exceptions returning to compat mode. Check for these here too. */ | 1126 | zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun |
1077 | leaq irq_return(%rip),%rcx | 1127 | errorentry invalid_TSS do_invalid_TSS |
1078 | cmpq %rcx,RIP(%rsp) | 1128 | errorentry segment_not_present do_segment_not_present |
1079 | je error_swapgs | 1129 | zeroentry spurious_interrupt_bug do_spurious_interrupt_bug |
1080 | movl %ecx,%ecx /* zero extend */ | 1130 | zeroentry coprocessor_error do_coprocessor_error |
1081 | cmpq %rcx,RIP(%rsp) | 1131 | errorentry alignment_check do_alignment_check |
1082 | je error_swapgs | 1132 | zeroentry simd_coprocessor_error do_simd_coprocessor_error |
1083 | cmpq $gs_change,RIP(%rsp) | 1133 | |
1084 | je error_swapgs | 1134 | /* Reload gs selector with exception handling */ |
1085 | jmp error_sti | 1135 | /* edi: new selector */ |
1086 | KPROBE_END(error_entry) | ||
1087 | |||
1088 | /* Reload gs selector with exception handling */ | ||
1089 | /* edi: new selector */ | ||
1090 | ENTRY(native_load_gs_index) | 1136 | ENTRY(native_load_gs_index) |
1091 | CFI_STARTPROC | 1137 | CFI_STARTPROC |
1092 | pushf | 1138 | pushf |
1093 | CFI_ADJUST_CFA_OFFSET 8 | 1139 | CFI_ADJUST_CFA_OFFSET 8 |
1094 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) | 1140 | DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) |
1095 | SWAPGS | 1141 | SWAPGS |
1096 | gs_change: | 1142 | gs_change: |
1097 | movl %edi,%gs | 1143 | movl %edi,%gs |
1098 | 2: mfence /* workaround */ | 1144 | 2: mfence /* workaround */ |
1099 | SWAPGS | 1145 | SWAPGS |
1100 | popf | 1146 | popf |
1101 | CFI_ADJUST_CFA_OFFSET -8 | 1147 | CFI_ADJUST_CFA_OFFSET -8 |
1102 | ret | 1148 | ret |
1103 | CFI_ENDPROC | 1149 | CFI_ENDPROC |
1104 | ENDPROC(native_load_gs_index) | 1150 | END(native_load_gs_index) |
1105 | 1151 | ||
1106 | .section __ex_table,"a" | 1152 | .section __ex_table,"a" |
1107 | .align 8 | 1153 | .align 8 |
1108 | .quad gs_change,bad_gs | 1154 | .quad gs_change,bad_gs |
1109 | .previous | 1155 | .previous |
1110 | .section .fixup,"ax" | 1156 | .section .fixup,"ax" |
1111 | /* running with kernelgs */ | 1157 | /* running with kernelgs */ |
1112 | bad_gs: | 1158 | bad_gs: |
1113 | SWAPGS /* switch back to user gs */ | 1159 | SWAPGS /* switch back to user gs */ |
1114 | xorl %eax,%eax | 1160 | xorl %eax,%eax |
1115 | movl %eax,%gs | 1161 | movl %eax,%gs |
1116 | jmp 2b | 1162 | jmp 2b |
1117 | .previous | 1163 | .previous |
1118 | 1164 | ||
1119 | /* | 1165 | /* |
1120 | * Create a kernel thread. | 1166 | * Create a kernel thread. |
1121 | * | 1167 | * |
@@ -1138,7 +1184,7 @@ ENTRY(kernel_thread) | |||
1138 | 1184 | ||
1139 | xorl %r8d,%r8d | 1185 | xorl %r8d,%r8d |
1140 | xorl %r9d,%r9d | 1186 | xorl %r9d,%r9d |
1141 | 1187 | ||
1142 | # clone now | 1188 | # clone now |
1143 | call do_fork | 1189 | call do_fork |
1144 | movq %rax,RAX(%rsp) | 1190 | movq %rax,RAX(%rsp) |
@@ -1149,15 +1195,15 @@ ENTRY(kernel_thread) | |||
1149 | * so internally to the x86_64 port you can rely on kernel_thread() | 1195 | * so internally to the x86_64 port you can rely on kernel_thread() |
1150 | * not to reschedule the child before returning, this avoids the need | 1196 | * not to reschedule the child before returning, this avoids the need |
1151 | * of hacks for example to fork off the per-CPU idle tasks. | 1197 | * of hacks for example to fork off the per-CPU idle tasks. |
1152 | * [Hopefully no generic code relies on the reschedule -AK] | 1198 | * [Hopefully no generic code relies on the reschedule -AK] |
1153 | */ | 1199 | */ |
1154 | RESTORE_ALL | 1200 | RESTORE_ALL |
1155 | UNFAKE_STACK_FRAME | 1201 | UNFAKE_STACK_FRAME |
1156 | ret | 1202 | ret |
1157 | CFI_ENDPROC | 1203 | CFI_ENDPROC |
1158 | ENDPROC(kernel_thread) | 1204 | END(kernel_thread) |
1159 | 1205 | ||
1160 | child_rip: | 1206 | ENTRY(child_rip) |
1161 | pushq $0 # fake return address | 1207 | pushq $0 # fake return address |
1162 | CFI_STARTPROC | 1208 | CFI_STARTPROC |
1163 | /* | 1209 | /* |
@@ -1170,8 +1216,9 @@ child_rip: | |||
1170 | # exit | 1216 | # exit |
1171 | mov %eax, %edi | 1217 | mov %eax, %edi |
1172 | call do_exit | 1218 | call do_exit |
1219 | ud2 # padding for call trace | ||
1173 | CFI_ENDPROC | 1220 | CFI_ENDPROC |
1174 | ENDPROC(child_rip) | 1221 | END(child_rip) |
1175 | 1222 | ||
1176 | /* | 1223 | /* |
1177 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. | 1224 | * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. |
@@ -1191,10 +1238,10 @@ ENDPROC(child_rip) | |||
1191 | ENTRY(kernel_execve) | 1238 | ENTRY(kernel_execve) |
1192 | CFI_STARTPROC | 1239 | CFI_STARTPROC |
1193 | FAKE_STACK_FRAME $0 | 1240 | FAKE_STACK_FRAME $0 |
1194 | SAVE_ALL | 1241 | SAVE_ALL |
1195 | movq %rsp,%rcx | 1242 | movq %rsp,%rcx |
1196 | call sys_execve | 1243 | call sys_execve |
1197 | movq %rax, RAX(%rsp) | 1244 | movq %rax, RAX(%rsp) |
1198 | RESTORE_REST | 1245 | RESTORE_REST |
1199 | testq %rax,%rax | 1246 | testq %rax,%rax |
1200 | je int_ret_from_sys_call | 1247 | je int_ret_from_sys_call |
@@ -1202,129 +1249,7 @@ ENTRY(kernel_execve) | |||
1202 | UNFAKE_STACK_FRAME | 1249 | UNFAKE_STACK_FRAME |
1203 | ret | 1250 | ret |
1204 | CFI_ENDPROC | 1251 | CFI_ENDPROC |
1205 | ENDPROC(kernel_execve) | 1252 | END(kernel_execve) |
1206 | |||
1207 | KPROBE_ENTRY(page_fault) | ||
1208 | errorentry do_page_fault | ||
1209 | KPROBE_END(page_fault) | ||
1210 | |||
1211 | ENTRY(coprocessor_error) | ||
1212 | zeroentry do_coprocessor_error | ||
1213 | END(coprocessor_error) | ||
1214 | |||
1215 | ENTRY(simd_coprocessor_error) | ||
1216 | zeroentry do_simd_coprocessor_error | ||
1217 | END(simd_coprocessor_error) | ||
1218 | |||
1219 | ENTRY(device_not_available) | ||
1220 | zeroentry do_device_not_available | ||
1221 | END(device_not_available) | ||
1222 | |||
1223 | /* runs on exception stack */ | ||
1224 | KPROBE_ENTRY(debug) | ||
1225 | INTR_FRAME | ||
1226 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1227 | pushq $0 | ||
1228 | CFI_ADJUST_CFA_OFFSET 8 | ||
1229 | paranoidentry do_debug, DEBUG_STACK | ||
1230 | paranoidexit | ||
1231 | KPROBE_END(debug) | ||
1232 | |||
1233 | /* runs on exception stack */ | ||
1234 | KPROBE_ENTRY(nmi) | ||
1235 | INTR_FRAME | ||
1236 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1237 | pushq $-1 | ||
1238 | CFI_ADJUST_CFA_OFFSET 8 | ||
1239 | paranoidentry do_nmi, 0, 0 | ||
1240 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1241 | paranoidexit 0 | ||
1242 | #else | ||
1243 | jmp paranoid_exit1 | ||
1244 | CFI_ENDPROC | ||
1245 | #endif | ||
1246 | KPROBE_END(nmi) | ||
1247 | |||
1248 | KPROBE_ENTRY(int3) | ||
1249 | INTR_FRAME | ||
1250 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1251 | pushq $0 | ||
1252 | CFI_ADJUST_CFA_OFFSET 8 | ||
1253 | paranoidentry do_int3, DEBUG_STACK | ||
1254 | jmp paranoid_exit1 | ||
1255 | CFI_ENDPROC | ||
1256 | KPROBE_END(int3) | ||
1257 | |||
1258 | ENTRY(overflow) | ||
1259 | zeroentry do_overflow | ||
1260 | END(overflow) | ||
1261 | |||
1262 | ENTRY(bounds) | ||
1263 | zeroentry do_bounds | ||
1264 | END(bounds) | ||
1265 | |||
1266 | ENTRY(invalid_op) | ||
1267 | zeroentry do_invalid_op | ||
1268 | END(invalid_op) | ||
1269 | |||
1270 | ENTRY(coprocessor_segment_overrun) | ||
1271 | zeroentry do_coprocessor_segment_overrun | ||
1272 | END(coprocessor_segment_overrun) | ||
1273 | |||
1274 | /* runs on exception stack */ | ||
1275 | ENTRY(double_fault) | ||
1276 | XCPT_FRAME | ||
1277 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1278 | paranoidentry do_double_fault | ||
1279 | jmp paranoid_exit1 | ||
1280 | CFI_ENDPROC | ||
1281 | END(double_fault) | ||
1282 | |||
1283 | ENTRY(invalid_TSS) | ||
1284 | errorentry do_invalid_TSS | ||
1285 | END(invalid_TSS) | ||
1286 | |||
1287 | ENTRY(segment_not_present) | ||
1288 | errorentry do_segment_not_present | ||
1289 | END(segment_not_present) | ||
1290 | |||
1291 | /* runs on exception stack */ | ||
1292 | ENTRY(stack_segment) | ||
1293 | XCPT_FRAME | ||
1294 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1295 | paranoidentry do_stack_segment | ||
1296 | jmp paranoid_exit1 | ||
1297 | CFI_ENDPROC | ||
1298 | END(stack_segment) | ||
1299 | |||
1300 | KPROBE_ENTRY(general_protection) | ||
1301 | errorentry do_general_protection | ||
1302 | KPROBE_END(general_protection) | ||
1303 | |||
1304 | ENTRY(alignment_check) | ||
1305 | errorentry do_alignment_check | ||
1306 | END(alignment_check) | ||
1307 | |||
1308 | ENTRY(divide_error) | ||
1309 | zeroentry do_divide_error | ||
1310 | END(divide_error) | ||
1311 | |||
1312 | ENTRY(spurious_interrupt_bug) | ||
1313 | zeroentry do_spurious_interrupt_bug | ||
1314 | END(spurious_interrupt_bug) | ||
1315 | |||
1316 | #ifdef CONFIG_X86_MCE | ||
1317 | /* runs on exception stack */ | ||
1318 | ENTRY(machine_check) | ||
1319 | INTR_FRAME | ||
1320 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1321 | pushq $0 | ||
1322 | CFI_ADJUST_CFA_OFFSET 8 | ||
1323 | paranoidentry do_machine_check | ||
1324 | jmp paranoid_exit1 | ||
1325 | CFI_ENDPROC | ||
1326 | END(machine_check) | ||
1327 | #endif | ||
1328 | 1253 | ||
1329 | /* Call softirq on interrupt stack. Interrupts are off. */ | 1254 | /* Call softirq on interrupt stack. Interrupts are off. */ |
1330 | ENTRY(call_softirq) | 1255 | ENTRY(call_softirq) |
@@ -1344,40 +1269,33 @@ ENTRY(call_softirq) | |||
1344 | decl %gs:pda_irqcount | 1269 | decl %gs:pda_irqcount |
1345 | ret | 1270 | ret |
1346 | CFI_ENDPROC | 1271 | CFI_ENDPROC |
1347 | ENDPROC(call_softirq) | 1272 | END(call_softirq) |
1348 | |||
1349 | KPROBE_ENTRY(ignore_sysret) | ||
1350 | CFI_STARTPROC | ||
1351 | mov $-ENOSYS,%eax | ||
1352 | sysret | ||
1353 | CFI_ENDPROC | ||
1354 | ENDPROC(ignore_sysret) | ||
1355 | 1273 | ||
1356 | #ifdef CONFIG_XEN | 1274 | #ifdef CONFIG_XEN |
1357 | ENTRY(xen_hypervisor_callback) | 1275 | zeroentry xen_hypervisor_callback xen_do_hypervisor_callback |
1358 | zeroentry xen_do_hypervisor_callback | ||
1359 | END(xen_hypervisor_callback) | ||
1360 | 1276 | ||
1361 | /* | 1277 | /* |
1362 | # A note on the "critical region" in our callback handler. | 1278 | * A note on the "critical region" in our callback handler. |
1363 | # We want to avoid stacking callback handlers due to events occurring | 1279 | * We want to avoid stacking callback handlers due to events occurring |
1364 | # during handling of the last event. To do this, we keep events disabled | 1280 | * during handling of the last event. To do this, we keep events disabled |
1365 | # until we've done all processing. HOWEVER, we must enable events before | 1281 | * until we've done all processing. HOWEVER, we must enable events before |
1366 | # popping the stack frame (can't be done atomically) and so it would still | 1282 | * popping the stack frame (can't be done atomically) and so it would still |
1367 | # be possible to get enough handler activations to overflow the stack. | 1283 | * be possible to get enough handler activations to overflow the stack. |
1368 | # Although unlikely, bugs of that kind are hard to track down, so we'd | 1284 | * Although unlikely, bugs of that kind are hard to track down, so we'd |
1369 | # like to avoid the possibility. | 1285 | * like to avoid the possibility. |
1370 | # So, on entry to the handler we detect whether we interrupted an | 1286 | * So, on entry to the handler we detect whether we interrupted an |
1371 | # existing activation in its critical region -- if so, we pop the current | 1287 | * existing activation in its critical region -- if so, we pop the current |
1372 | # activation and restart the handler using the previous one. | 1288 | * activation and restart the handler using the previous one. |
1373 | */ | 1289 | */ |
1374 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | 1290 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) |
1375 | CFI_STARTPROC | 1291 | CFI_STARTPROC |
1376 | /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | 1292 | /* |
1377 | see the correct pointer to the pt_regs */ | 1293 | * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will |
1294 | * see the correct pointer to the pt_regs | ||
1295 | */ | ||
1378 | movq %rdi, %rsp # we don't return, adjust the stack frame | 1296 | movq %rdi, %rsp # we don't return, adjust the stack frame |
1379 | CFI_ENDPROC | 1297 | CFI_ENDPROC |
1380 | CFI_DEFAULT_STACK | 1298 | DEFAULT_FRAME |
1381 | 11: incl %gs:pda_irqcount | 1299 | 11: incl %gs:pda_irqcount |
1382 | movq %rsp,%rbp | 1300 | movq %rsp,%rbp |
1383 | CFI_DEF_CFA_REGISTER rbp | 1301 | CFI_DEF_CFA_REGISTER rbp |
@@ -1392,23 +1310,26 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | |||
1392 | END(do_hypervisor_callback) | 1310 | END(do_hypervisor_callback) |
1393 | 1311 | ||
1394 | /* | 1312 | /* |
1395 | # Hypervisor uses this for application faults while it executes. | 1313 | * Hypervisor uses this for application faults while it executes. |
1396 | # We get here for two reasons: | 1314 | * We get here for two reasons: |
1397 | # 1. Fault while reloading DS, ES, FS or GS | 1315 | * 1. Fault while reloading DS, ES, FS or GS |
1398 | # 2. Fault while executing IRET | 1316 | * 2. Fault while executing IRET |
1399 | # Category 1 we do not need to fix up as Xen has already reloaded all segment | 1317 | * Category 1 we do not need to fix up as Xen has already reloaded all segment |
1400 | # registers that could be reloaded and zeroed the others. | 1318 | * registers that could be reloaded and zeroed the others. |
1401 | # Category 2 we fix up by killing the current process. We cannot use the | 1319 | * Category 2 we fix up by killing the current process. We cannot use the |
1402 | # normal Linux return path in this case because if we use the IRET hypercall | 1320 | * normal Linux return path in this case because if we use the IRET hypercall |
1403 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | 1321 | * to pop the stack frame we end up in an infinite loop of failsafe callbacks. |
1404 | # We distinguish between categories by comparing each saved segment register | 1322 | * We distinguish between categories by comparing each saved segment register |
1405 | # with its current contents: any discrepancy means we in category 1. | 1323 | * with its current contents: any discrepancy means we in category 1. |
1406 | */ | 1324 | */ |
1407 | ENTRY(xen_failsafe_callback) | 1325 | ENTRY(xen_failsafe_callback) |
1408 | framesz = (RIP-0x30) /* workaround buggy gas */ | 1326 | INTR_FRAME 1 (6*8) |
1409 | _frame framesz | 1327 | /*CFI_REL_OFFSET gs,GS*/ |
1410 | CFI_REL_OFFSET rcx, 0 | 1328 | /*CFI_REL_OFFSET fs,FS*/ |
1411 | CFI_REL_OFFSET r11, 8 | 1329 | /*CFI_REL_OFFSET es,ES*/ |
1330 | /*CFI_REL_OFFSET ds,DS*/ | ||
1331 | CFI_REL_OFFSET r11,8 | ||
1332 | CFI_REL_OFFSET rcx,0 | ||
1412 | movw %ds,%cx | 1333 | movw %ds,%cx |
1413 | cmpw %cx,0x10(%rsp) | 1334 | cmpw %cx,0x10(%rsp) |
1414 | CFI_REMEMBER_STATE | 1335 | CFI_REMEMBER_STATE |
@@ -1429,12 +1350,9 @@ ENTRY(xen_failsafe_callback) | |||
1429 | CFI_RESTORE r11 | 1350 | CFI_RESTORE r11 |
1430 | addq $0x30,%rsp | 1351 | addq $0x30,%rsp |
1431 | CFI_ADJUST_CFA_OFFSET -0x30 | 1352 | CFI_ADJUST_CFA_OFFSET -0x30 |
1432 | pushq $0 | 1353 | pushq_cfi $0 /* RIP */ |
1433 | CFI_ADJUST_CFA_OFFSET 8 | 1354 | pushq_cfi %r11 |
1434 | pushq %r11 | 1355 | pushq_cfi %rcx |
1435 | CFI_ADJUST_CFA_OFFSET 8 | ||
1436 | pushq %rcx | ||
1437 | CFI_ADJUST_CFA_OFFSET 8 | ||
1438 | jmp general_protection | 1356 | jmp general_protection |
1439 | CFI_RESTORE_STATE | 1357 | CFI_RESTORE_STATE |
1440 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | 1358 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ |
@@ -1444,11 +1362,223 @@ ENTRY(xen_failsafe_callback) | |||
1444 | CFI_RESTORE r11 | 1362 | CFI_RESTORE r11 |
1445 | addq $0x30,%rsp | 1363 | addq $0x30,%rsp |
1446 | CFI_ADJUST_CFA_OFFSET -0x30 | 1364 | CFI_ADJUST_CFA_OFFSET -0x30 |
1447 | pushq $0 | 1365 | pushq_cfi $0 |
1448 | CFI_ADJUST_CFA_OFFSET 8 | ||
1449 | SAVE_ALL | 1366 | SAVE_ALL |
1450 | jmp error_exit | 1367 | jmp error_exit |
1451 | CFI_ENDPROC | 1368 | CFI_ENDPROC |
1452 | END(xen_failsafe_callback) | 1369 | END(xen_failsafe_callback) |
1453 | 1370 | ||
1454 | #endif /* CONFIG_XEN */ | 1371 | #endif /* CONFIG_XEN */ |
1372 | |||
1373 | /* | ||
1374 | * Some functions should be protected against kprobes | ||
1375 | */ | ||
1376 | .pushsection .kprobes.text, "ax" | ||
1377 | |||
1378 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | ||
1379 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | ||
1380 | paranoiderrorentry stack_segment do_stack_segment | ||
1381 | errorentry general_protection do_general_protection | ||
1382 | errorentry page_fault do_page_fault | ||
1383 | #ifdef CONFIG_X86_MCE | ||
1384 | paranoidzeroentry machine_check do_machine_check | ||
1385 | #endif | ||
1386 | |||
1387 | /* | ||
1388 | * "Paranoid" exit path from exception stack. | ||
1389 | * Paranoid because this is used by NMIs and cannot take | ||
1390 | * any kernel state for granted. | ||
1391 | * We don't do kernel preemption checks here, because only | ||
1392 | * NMI should be common and it does not enable IRQs and | ||
1393 | * cannot get reschedule ticks. | ||
1394 | * | ||
1395 | * "trace" is 0 for the NMI handler only, because irq-tracing | ||
1396 | * is fundamentally NMI-unsafe. (we cannot change the soft and | ||
1397 | * hard flags at once, atomically) | ||
1398 | */ | ||
1399 | |||
1400 | /* ebx: no swapgs flag */ | ||
1401 | ENTRY(paranoid_exit) | ||
1402 | INTR_FRAME | ||
1403 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1404 | TRACE_IRQS_OFF | ||
1405 | testl %ebx,%ebx /* swapgs needed? */ | ||
1406 | jnz paranoid_restore | ||
1407 | testl $3,CS(%rsp) | ||
1408 | jnz paranoid_userspace | ||
1409 | paranoid_swapgs: | ||
1410 | TRACE_IRQS_IRETQ 0 | ||
1411 | SWAPGS_UNSAFE_STACK | ||
1412 | paranoid_restore: | ||
1413 | RESTORE_ALL 8 | ||
1414 | jmp irq_return | ||
1415 | paranoid_userspace: | ||
1416 | GET_THREAD_INFO(%rcx) | ||
1417 | movl TI_flags(%rcx),%ebx | ||
1418 | andl $_TIF_WORK_MASK,%ebx | ||
1419 | jz paranoid_swapgs | ||
1420 | movq %rsp,%rdi /* &pt_regs */ | ||
1421 | call sync_regs | ||
1422 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1423 | testl $_TIF_NEED_RESCHED,%ebx | ||
1424 | jnz paranoid_schedule | ||
1425 | movl %ebx,%edx /* arg3: thread flags */ | ||
1426 | TRACE_IRQS_ON | ||
1427 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1428 | xorl %esi,%esi /* arg2: oldset */ | ||
1429 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1430 | call do_notify_resume | ||
1431 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1432 | TRACE_IRQS_OFF | ||
1433 | jmp paranoid_userspace | ||
1434 | paranoid_schedule: | ||
1435 | TRACE_IRQS_ON | ||
1436 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1437 | call schedule | ||
1438 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1439 | TRACE_IRQS_OFF | ||
1440 | jmp paranoid_userspace | ||
1441 | CFI_ENDPROC | ||
1442 | END(paranoid_exit) | ||
1443 | |||
1444 | /* | ||
1445 | * Exception entry point. This expects an error code/orig_rax on the stack. | ||
1446 | * returns in "no swapgs flag" in %ebx. | ||
1447 | */ | ||
1448 | ENTRY(error_entry) | ||
1449 | XCPT_FRAME | ||
1450 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
1451 | /* oldrax contains error code */ | ||
1452 | cld | ||
1453 | movq_cfi rdi, RDI+8 | ||
1454 | movq_cfi rsi, RSI+8 | ||
1455 | movq_cfi rdx, RDX+8 | ||
1456 | movq_cfi rcx, RCX+8 | ||
1457 | movq_cfi rax, RAX+8 | ||
1458 | movq_cfi r8, R8+8 | ||
1459 | movq_cfi r9, R9+8 | ||
1460 | movq_cfi r10, R10+8 | ||
1461 | movq_cfi r11, R11+8 | ||
1462 | movq_cfi rbx, RBX+8 | ||
1463 | movq_cfi rbp, RBP+8 | ||
1464 | movq_cfi r12, R12+8 | ||
1465 | movq_cfi r13, R13+8 | ||
1466 | movq_cfi r14, R14+8 | ||
1467 | movq_cfi r15, R15+8 | ||
1468 | xorl %ebx,%ebx | ||
1469 | testl $3,CS+8(%rsp) | ||
1470 | je error_kernelspace | ||
1471 | error_swapgs: | ||
1472 | SWAPGS | ||
1473 | error_sti: | ||
1474 | TRACE_IRQS_OFF | ||
1475 | ret | ||
1476 | CFI_ENDPROC | ||
1477 | |||
1478 | /* | ||
1479 | * There are two places in the kernel that can potentially fault with | ||
1480 | * usergs. Handle them here. The exception handlers after iret run with | ||
1481 | * kernel gs again, so don't set the user space flag. B stepping K8s | ||
1482 | * sometimes report an truncated RIP for IRET exceptions returning to | ||
1483 | * compat mode. Check for these here too. | ||
1484 | */ | ||
1485 | error_kernelspace: | ||
1486 | incl %ebx | ||
1487 | leaq irq_return(%rip),%rcx | ||
1488 | cmpq %rcx,RIP+8(%rsp) | ||
1489 | je error_swapgs | ||
1490 | movl %ecx,%ecx /* zero extend */ | ||
1491 | cmpq %rcx,RIP+8(%rsp) | ||
1492 | je error_swapgs | ||
1493 | cmpq $gs_change,RIP+8(%rsp) | ||
1494 | je error_swapgs | ||
1495 | jmp error_sti | ||
1496 | END(error_entry) | ||
1497 | |||
1498 | |||
1499 | /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ | ||
1500 | ENTRY(error_exit) | ||
1501 | DEFAULT_FRAME | ||
1502 | movl %ebx,%eax | ||
1503 | RESTORE_REST | ||
1504 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1505 | TRACE_IRQS_OFF | ||
1506 | GET_THREAD_INFO(%rcx) | ||
1507 | testl %eax,%eax | ||
1508 | jne retint_kernel | ||
1509 | LOCKDEP_SYS_EXIT_IRQ | ||
1510 | movl TI_flags(%rcx),%edx | ||
1511 | movl $_TIF_WORK_MASK,%edi | ||
1512 | andl %edi,%edx | ||
1513 | jnz retint_careful | ||
1514 | jmp retint_swapgs | ||
1515 | CFI_ENDPROC | ||
1516 | END(error_exit) | ||
1517 | |||
1518 | |||
1519 | /* runs on exception stack */ | ||
1520 | ENTRY(nmi) | ||
1521 | INTR_FRAME | ||
1522 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1523 | pushq_cfi $-1 | ||
1524 | subq $15*8, %rsp | ||
1525 | CFI_ADJUST_CFA_OFFSET 15*8 | ||
1526 | call save_paranoid | ||
1527 | DEFAULT_FRAME 0 | ||
1528 | /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ | ||
1529 | movq %rsp,%rdi | ||
1530 | movq $-1,%rsi | ||
1531 | call do_nmi | ||
1532 | #ifdef CONFIG_TRACE_IRQFLAGS | ||
1533 | /* paranoidexit; without TRACE_IRQS_OFF */ | ||
1534 | /* ebx: no swapgs flag */ | ||
1535 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1536 | testl %ebx,%ebx /* swapgs needed? */ | ||
1537 | jnz nmi_restore | ||
1538 | testl $3,CS(%rsp) | ||
1539 | jnz nmi_userspace | ||
1540 | nmi_swapgs: | ||
1541 | SWAPGS_UNSAFE_STACK | ||
1542 | nmi_restore: | ||
1543 | RESTORE_ALL 8 | ||
1544 | jmp irq_return | ||
1545 | nmi_userspace: | ||
1546 | GET_THREAD_INFO(%rcx) | ||
1547 | movl TI_flags(%rcx),%ebx | ||
1548 | andl $_TIF_WORK_MASK,%ebx | ||
1549 | jz nmi_swapgs | ||
1550 | movq %rsp,%rdi /* &pt_regs */ | ||
1551 | call sync_regs | ||
1552 | movq %rax,%rsp /* switch stack for scheduling */ | ||
1553 | testl $_TIF_NEED_RESCHED,%ebx | ||
1554 | jnz nmi_schedule | ||
1555 | movl %ebx,%edx /* arg3: thread flags */ | ||
1556 | ENABLE_INTERRUPTS(CLBR_NONE) | ||
1557 | xorl %esi,%esi /* arg2: oldset */ | ||
1558 | movq %rsp,%rdi /* arg1: &pt_regs */ | ||
1559 | call do_notify_resume | ||
1560 | DISABLE_INTERRUPTS(CLBR_NONE) | ||
1561 | jmp nmi_userspace | ||
1562 | nmi_schedule: | ||
1563 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
1564 | call schedule | ||
1565 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
1566 | jmp nmi_userspace | ||
1567 | CFI_ENDPROC | ||
1568 | #else | ||
1569 | jmp paranoid_exit | ||
1570 | CFI_ENDPROC | ||
1571 | #endif | ||
1572 | END(nmi) | ||
1573 | |||
1574 | ENTRY(ignore_sysret) | ||
1575 | CFI_STARTPROC | ||
1576 | mov $-ENOSYS,%eax | ||
1577 | sysret | ||
1578 | CFI_ENDPROC | ||
1579 | END(ignore_sysret) | ||
1580 | |||
1581 | /* | ||
1582 | * End of kprobes section | ||
1583 | */ | ||
1584 | .popsection | ||
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c index 0aa2c443d600..53699c931ad4 100644 --- a/arch/x86/kernel/es7000_32.c +++ b/arch/x86/kernel/es7000_32.c | |||
@@ -38,8 +38,11 @@ | |||
38 | #include <asm/io.h> | 38 | #include <asm/io.h> |
39 | #include <asm/nmi.h> | 39 | #include <asm/nmi.h> |
40 | #include <asm/smp.h> | 40 | #include <asm/smp.h> |
41 | #include <asm/atomic.h> | ||
41 | #include <asm/apicdef.h> | 42 | #include <asm/apicdef.h> |
42 | #include <mach_mpparse.h> | 43 | #include <mach_mpparse.h> |
44 | #include <asm/genapic.h> | ||
45 | #include <asm/setup.h> | ||
43 | 46 | ||
44 | /* | 47 | /* |
45 | * ES7000 chipsets | 48 | * ES7000 chipsets |
@@ -161,6 +164,43 @@ es7000_rename_gsi(int ioapic, int gsi) | |||
161 | return gsi; | 164 | return gsi; |
162 | } | 165 | } |
163 | 166 | ||
167 | static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | ||
168 | { | ||
169 | unsigned long vect = 0, psaival = 0; | ||
170 | |||
171 | if (psai == NULL) | ||
172 | return -1; | ||
173 | |||
174 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
175 | psaival = (0x1000000 | vect | cpu); | ||
176 | |||
177 | while (*psai & 0x1000000) | ||
178 | ; | ||
179 | |||
180 | *psai = psaival; | ||
181 | |||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | static void noop_wait_for_deassert(atomic_t *deassert_not_used) | ||
186 | { | ||
187 | } | ||
188 | |||
189 | static int __init es7000_update_genapic(void) | ||
190 | { | ||
191 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
192 | |||
193 | /* MPENTIUMIII */ | ||
194 | if (boot_cpu_data.x86 == 6 && | ||
195 | (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { | ||
196 | es7000_update_genapic_to_cluster(); | ||
197 | genapic->wait_for_init_deassert = noop_wait_for_deassert; | ||
198 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; | ||
199 | } | ||
200 | |||
201 | return 0; | ||
202 | } | ||
203 | |||
164 | void __init | 204 | void __init |
165 | setup_unisys(void) | 205 | setup_unisys(void) |
166 | { | 206 | { |
@@ -176,6 +216,8 @@ setup_unisys(void) | |||
176 | else | 216 | else |
177 | es7000_plat = ES7000_CLASSIC; | 217 | es7000_plat = ES7000_CLASSIC; |
178 | ioapic_renumber_irq = es7000_rename_gsi; | 218 | ioapic_renumber_irq = es7000_rename_gsi; |
219 | |||
220 | x86_quirks->update_genapic = es7000_update_genapic; | ||
179 | } | 221 | } |
180 | 222 | ||
181 | /* | 223 | /* |
@@ -317,26 +359,6 @@ es7000_mip_write(struct mip_reg *mip_reg) | |||
317 | return status; | 359 | return status; |
318 | } | 360 | } |
319 | 361 | ||
320 | int | ||
321 | es7000_start_cpu(int cpu, unsigned long eip) | ||
322 | { | ||
323 | unsigned long vect = 0, psaival = 0; | ||
324 | |||
325 | if (psai == NULL) | ||
326 | return -1; | ||
327 | |||
328 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
329 | psaival = (0x1000000 | vect | cpu); | ||
330 | |||
331 | while (*psai & 0x1000000) | ||
332 | ; | ||
333 | |||
334 | *psai = psaival; | ||
335 | |||
336 | return 0; | ||
337 | |||
338 | } | ||
339 | |||
340 | void __init | 362 | void __init |
341 | es7000_sw_apic(void) | 363 | es7000_sw_apic(void) |
342 | { | 364 | { |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 50ea0ac8c9bf..1b43086b097a 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -14,14 +14,17 @@ | |||
14 | #include <linux/uaccess.h> | 14 | #include <linux/uaccess.h> |
15 | #include <linux/ftrace.h> | 15 | #include <linux/ftrace.h> |
16 | #include <linux/percpu.h> | 16 | #include <linux/percpu.h> |
17 | #include <linux/sched.h> | ||
17 | #include <linux/init.h> | 18 | #include <linux/init.h> |
18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
19 | 20 | ||
20 | #include <asm/ftrace.h> | 21 | #include <asm/ftrace.h> |
22 | #include <linux/ftrace.h> | ||
21 | #include <asm/nops.h> | 23 | #include <asm/nops.h> |
24 | #include <asm/nmi.h> | ||
22 | 25 | ||
23 | 26 | ||
24 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | 27 | #ifdef CONFIG_DYNAMIC_FTRACE |
25 | 28 | ||
26 | union ftrace_code_union { | 29 | union ftrace_code_union { |
27 | char code[MCOUNT_INSN_SIZE]; | 30 | char code[MCOUNT_INSN_SIZE]; |
@@ -31,18 +34,12 @@ union ftrace_code_union { | |||
31 | } __attribute__((packed)); | 34 | } __attribute__((packed)); |
32 | }; | 35 | }; |
33 | 36 | ||
34 | |||
35 | static int ftrace_calc_offset(long ip, long addr) | 37 | static int ftrace_calc_offset(long ip, long addr) |
36 | { | 38 | { |
37 | return (int)(addr - ip); | 39 | return (int)(addr - ip); |
38 | } | 40 | } |
39 | 41 | ||
40 | unsigned char *ftrace_nop_replace(void) | 42 | static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) |
41 | { | ||
42 | return ftrace_nop; | ||
43 | } | ||
44 | |||
45 | unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | ||
46 | { | 43 | { |
47 | static union ftrace_code_union calc; | 44 | static union ftrace_code_union calc; |
48 | 45 | ||
@@ -56,7 +53,142 @@ unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) | |||
56 | return calc.code; | 53 | return calc.code; |
57 | } | 54 | } |
58 | 55 | ||
59 | int | 56 | /* |
57 | * Modifying code must take extra care. On an SMP machine, if | ||
58 | * the code being modified is also being executed on another CPU | ||
59 | * that CPU will have undefined results and possibly take a GPF. | ||
60 | * We use kstop_machine to stop other CPUS from exectuing code. | ||
61 | * But this does not stop NMIs from happening. We still need | ||
62 | * to protect against that. We separate out the modification of | ||
63 | * the code to take care of this. | ||
64 | * | ||
65 | * Two buffers are added: An IP buffer and a "code" buffer. | ||
66 | * | ||
67 | * 1) Put the instruction pointer into the IP buffer | ||
68 | * and the new code into the "code" buffer. | ||
69 | * 2) Set a flag that says we are modifying code | ||
70 | * 3) Wait for any running NMIs to finish. | ||
71 | * 4) Write the code | ||
72 | * 5) clear the flag. | ||
73 | * 6) Wait for any running NMIs to finish. | ||
74 | * | ||
75 | * If an NMI is executed, the first thing it does is to call | ||
76 | * "ftrace_nmi_enter". This will check if the flag is set to write | ||
77 | * and if it is, it will write what is in the IP and "code" buffers. | ||
78 | * | ||
79 | * The trick is, it does not matter if everyone is writing the same | ||
80 | * content to the code location. Also, if a CPU is executing code | ||
81 | * it is OK to write to that code location if the contents being written | ||
82 | * are the same as what exists. | ||
83 | */ | ||
84 | |||
85 | static atomic_t in_nmi = ATOMIC_INIT(0); | ||
86 | static int mod_code_status; /* holds return value of text write */ | ||
87 | static int mod_code_write; /* set when NMI should do the write */ | ||
88 | static void *mod_code_ip; /* holds the IP to write to */ | ||
89 | static void *mod_code_newcode; /* holds the text to write to the IP */ | ||
90 | |||
91 | static unsigned nmi_wait_count; | ||
92 | static atomic_t nmi_update_count = ATOMIC_INIT(0); | ||
93 | |||
94 | int ftrace_arch_read_dyn_info(char *buf, int size) | ||
95 | { | ||
96 | int r; | ||
97 | |||
98 | r = snprintf(buf, size, "%u %u", | ||
99 | nmi_wait_count, | ||
100 | atomic_read(&nmi_update_count)); | ||
101 | return r; | ||
102 | } | ||
103 | |||
104 | static void ftrace_mod_code(void) | ||
105 | { | ||
106 | /* | ||
107 | * Yes, more than one CPU process can be writing to mod_code_status. | ||
108 | * (and the code itself) | ||
109 | * But if one were to fail, then they all should, and if one were | ||
110 | * to succeed, then they all should. | ||
111 | */ | ||
112 | mod_code_status = probe_kernel_write(mod_code_ip, mod_code_newcode, | ||
113 | MCOUNT_INSN_SIZE); | ||
114 | } | ||
115 | |||
116 | void ftrace_nmi_enter(void) | ||
117 | { | ||
118 | atomic_inc(&in_nmi); | ||
119 | /* Must have in_nmi seen before reading write flag */ | ||
120 | smp_mb(); | ||
121 | if (mod_code_write) { | ||
122 | ftrace_mod_code(); | ||
123 | atomic_inc(&nmi_update_count); | ||
124 | } | ||
125 | } | ||
126 | |||
127 | void ftrace_nmi_exit(void) | ||
128 | { | ||
129 | /* Finish all executions before clearing in_nmi */ | ||
130 | smp_wmb(); | ||
131 | atomic_dec(&in_nmi); | ||
132 | } | ||
133 | |||
134 | static void wait_for_nmi(void) | ||
135 | { | ||
136 | int waited = 0; | ||
137 | |||
138 | while (atomic_read(&in_nmi)) { | ||
139 | waited = 1; | ||
140 | cpu_relax(); | ||
141 | } | ||
142 | |||
143 | if (waited) | ||
144 | nmi_wait_count++; | ||
145 | } | ||
146 | |||
147 | static int | ||
148 | do_ftrace_mod_code(unsigned long ip, void *new_code) | ||
149 | { | ||
150 | mod_code_ip = (void *)ip; | ||
151 | mod_code_newcode = new_code; | ||
152 | |||
153 | /* The buffers need to be visible before we let NMIs write them */ | ||
154 | smp_wmb(); | ||
155 | |||
156 | mod_code_write = 1; | ||
157 | |||
158 | /* Make sure write bit is visible before we wait on NMIs */ | ||
159 | smp_mb(); | ||
160 | |||
161 | wait_for_nmi(); | ||
162 | |||
163 | /* Make sure all running NMIs have finished before we write the code */ | ||
164 | smp_mb(); | ||
165 | |||
166 | ftrace_mod_code(); | ||
167 | |||
168 | /* Make sure the write happens before clearing the bit */ | ||
169 | smp_wmb(); | ||
170 | |||
171 | mod_code_write = 0; | ||
172 | |||
173 | /* make sure NMIs see the cleared bit */ | ||
174 | smp_mb(); | ||
175 | |||
176 | wait_for_nmi(); | ||
177 | |||
178 | return mod_code_status; | ||
179 | } | ||
180 | |||
181 | |||
182 | |||
183 | |||
184 | static unsigned char ftrace_nop[MCOUNT_INSN_SIZE]; | ||
185 | |||
186 | static unsigned char *ftrace_nop_replace(void) | ||
187 | { | ||
188 | return ftrace_nop; | ||
189 | } | ||
190 | |||
191 | static int | ||
60 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, | 192 | ftrace_modify_code(unsigned long ip, unsigned char *old_code, |
61 | unsigned char *new_code) | 193 | unsigned char *new_code) |
62 | { | 194 | { |
@@ -81,7 +213,7 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
81 | return -EINVAL; | 213 | return -EINVAL; |
82 | 214 | ||
83 | /* replace the text with the new text */ | 215 | /* replace the text with the new text */ |
84 | if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) | 216 | if (do_ftrace_mod_code(ip, new_code)) |
85 | return -EPERM; | 217 | return -EPERM; |
86 | 218 | ||
87 | sync_core(); | 219 | sync_core(); |
@@ -89,6 +221,29 @@ ftrace_modify_code(unsigned long ip, unsigned char *old_code, | |||
89 | return 0; | 221 | return 0; |
90 | } | 222 | } |
91 | 223 | ||
224 | int ftrace_make_nop(struct module *mod, | ||
225 | struct dyn_ftrace *rec, unsigned long addr) | ||
226 | { | ||
227 | unsigned char *new, *old; | ||
228 | unsigned long ip = rec->ip; | ||
229 | |||
230 | old = ftrace_call_replace(ip, addr); | ||
231 | new = ftrace_nop_replace(); | ||
232 | |||
233 | return ftrace_modify_code(rec->ip, old, new); | ||
234 | } | ||
235 | |||
236 | int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) | ||
237 | { | ||
238 | unsigned char *new, *old; | ||
239 | unsigned long ip = rec->ip; | ||
240 | |||
241 | old = ftrace_nop_replace(); | ||
242 | new = ftrace_call_replace(ip, addr); | ||
243 | |||
244 | return ftrace_modify_code(rec->ip, old, new); | ||
245 | } | ||
246 | |||
92 | int ftrace_update_ftrace_func(ftrace_func_t func) | 247 | int ftrace_update_ftrace_func(ftrace_func_t func) |
93 | { | 248 | { |
94 | unsigned long ip = (unsigned long)(&ftrace_call); | 249 | unsigned long ip = (unsigned long)(&ftrace_call); |
@@ -165,3 +320,218 @@ int __init ftrace_dyn_arch_init(void *data) | |||
165 | 320 | ||
166 | return 0; | 321 | return 0; |
167 | } | 322 | } |
323 | #endif | ||
324 | |||
325 | #ifdef CONFIG_FUNCTION_GRAPH_TRACER | ||
326 | |||
327 | #ifdef CONFIG_DYNAMIC_FTRACE | ||
328 | extern void ftrace_graph_call(void); | ||
329 | |||
330 | static int ftrace_mod_jmp(unsigned long ip, | ||
331 | int old_offset, int new_offset) | ||
332 | { | ||
333 | unsigned char code[MCOUNT_INSN_SIZE]; | ||
334 | |||
335 | if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) | ||
336 | return -EFAULT; | ||
337 | |||
338 | if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) | ||
339 | return -EINVAL; | ||
340 | |||
341 | *(int *)(&code[1]) = new_offset; | ||
342 | |||
343 | if (do_ftrace_mod_code(ip, &code)) | ||
344 | return -EPERM; | ||
345 | |||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | int ftrace_enable_ftrace_graph_caller(void) | ||
350 | { | ||
351 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | ||
352 | int old_offset, new_offset; | ||
353 | |||
354 | old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
355 | new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
356 | |||
357 | return ftrace_mod_jmp(ip, old_offset, new_offset); | ||
358 | } | ||
359 | |||
360 | int ftrace_disable_ftrace_graph_caller(void) | ||
361 | { | ||
362 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | ||
363 | int old_offset, new_offset; | ||
364 | |||
365 | old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
366 | new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
367 | |||
368 | return ftrace_mod_jmp(ip, old_offset, new_offset); | ||
369 | } | ||
370 | |||
371 | #else /* CONFIG_DYNAMIC_FTRACE */ | ||
372 | |||
373 | /* | ||
374 | * These functions are picked from those used on | ||
375 | * this page for dynamic ftrace. They have been | ||
376 | * simplified to ignore all traces in NMI context. | ||
377 | */ | ||
378 | static atomic_t in_nmi; | ||
379 | |||
380 | void ftrace_nmi_enter(void) | ||
381 | { | ||
382 | atomic_inc(&in_nmi); | ||
383 | } | ||
384 | |||
385 | void ftrace_nmi_exit(void) | ||
386 | { | ||
387 | atomic_dec(&in_nmi); | ||
388 | } | ||
389 | |||
390 | #endif /* !CONFIG_DYNAMIC_FTRACE */ | ||
391 | |||
392 | /* Add a function return address to the trace stack on thread info.*/ | ||
393 | static int push_return_trace(unsigned long ret, unsigned long long time, | ||
394 | unsigned long func, int *depth) | ||
395 | { | ||
396 | int index; | ||
397 | |||
398 | if (!current->ret_stack) | ||
399 | return -EBUSY; | ||
400 | |||
401 | /* The return trace stack is full */ | ||
402 | if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) { | ||
403 | atomic_inc(¤t->trace_overrun); | ||
404 | return -EBUSY; | ||
405 | } | ||
406 | |||
407 | index = ++current->curr_ret_stack; | ||
408 | barrier(); | ||
409 | current->ret_stack[index].ret = ret; | ||
410 | current->ret_stack[index].func = func; | ||
411 | current->ret_stack[index].calltime = time; | ||
412 | *depth = index; | ||
413 | |||
414 | return 0; | ||
415 | } | ||
416 | |||
417 | /* Retrieve a function return address to the trace stack on thread info.*/ | ||
418 | static void pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret) | ||
419 | { | ||
420 | int index; | ||
421 | |||
422 | index = current->curr_ret_stack; | ||
423 | |||
424 | if (unlikely(index < 0)) { | ||
425 | ftrace_graph_stop(); | ||
426 | WARN_ON(1); | ||
427 | /* Might as well panic, otherwise we have no where to go */ | ||
428 | *ret = (unsigned long)panic; | ||
429 | return; | ||
430 | } | ||
431 | |||
432 | *ret = current->ret_stack[index].ret; | ||
433 | trace->func = current->ret_stack[index].func; | ||
434 | trace->calltime = current->ret_stack[index].calltime; | ||
435 | trace->overrun = atomic_read(¤t->trace_overrun); | ||
436 | trace->depth = index; | ||
437 | barrier(); | ||
438 | current->curr_ret_stack--; | ||
439 | |||
440 | } | ||
441 | |||
442 | /* | ||
443 | * Send the trace to the ring-buffer. | ||
444 | * @return the original return address. | ||
445 | */ | ||
446 | unsigned long ftrace_return_to_handler(void) | ||
447 | { | ||
448 | struct ftrace_graph_ret trace; | ||
449 | unsigned long ret; | ||
450 | |||
451 | pop_return_trace(&trace, &ret); | ||
452 | trace.rettime = cpu_clock(raw_smp_processor_id()); | ||
453 | ftrace_graph_return(&trace); | ||
454 | |||
455 | if (unlikely(!ret)) { | ||
456 | ftrace_graph_stop(); | ||
457 | WARN_ON(1); | ||
458 | /* Might as well panic. What else to do? */ | ||
459 | ret = (unsigned long)panic; | ||
460 | } | ||
461 | |||
462 | return ret; | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Hook the return address and push it in the stack of return addrs | ||
467 | * in current thread info. | ||
468 | */ | ||
469 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | ||
470 | { | ||
471 | unsigned long old; | ||
472 | unsigned long long calltime; | ||
473 | int faulted; | ||
474 | struct ftrace_graph_ent trace; | ||
475 | unsigned long return_hooker = (unsigned long) | ||
476 | &return_to_handler; | ||
477 | |||
478 | /* Nmi's are currently unsupported */ | ||
479 | if (unlikely(atomic_read(&in_nmi))) | ||
480 | return; | ||
481 | |||
482 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | ||
483 | return; | ||
484 | |||
485 | /* | ||
486 | * Protect against fault, even if it shouldn't | ||
487 | * happen. This tool is too much intrusive to | ||
488 | * ignore such a protection. | ||
489 | */ | ||
490 | asm volatile( | ||
491 | "1: " _ASM_MOV " (%[parent_old]), %[old]\n" | ||
492 | "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" | ||
493 | " movl $0, %[faulted]\n" | ||
494 | |||
495 | ".section .fixup, \"ax\"\n" | ||
496 | "3: movl $1, %[faulted]\n" | ||
497 | ".previous\n" | ||
498 | |||
499 | _ASM_EXTABLE(1b, 3b) | ||
500 | _ASM_EXTABLE(2b, 3b) | ||
501 | |||
502 | : [parent_replaced] "=r" (parent), [old] "=r" (old), | ||
503 | [faulted] "=r" (faulted) | ||
504 | : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker) | ||
505 | : "memory" | ||
506 | ); | ||
507 | |||
508 | if (unlikely(faulted)) { | ||
509 | ftrace_graph_stop(); | ||
510 | WARN_ON(1); | ||
511 | return; | ||
512 | } | ||
513 | |||
514 | if (unlikely(!__kernel_text_address(old))) { | ||
515 | ftrace_graph_stop(); | ||
516 | *parent = old; | ||
517 | WARN_ON(1); | ||
518 | return; | ||
519 | } | ||
520 | |||
521 | calltime = cpu_clock(raw_smp_processor_id()); | ||
522 | |||
523 | if (push_return_trace(old, calltime, | ||
524 | self_addr, &trace.depth) == -EBUSY) { | ||
525 | *parent = old; | ||
526 | return; | ||
527 | } | ||
528 | |||
529 | trace.func = self_addr; | ||
530 | |||
531 | /* Only trace if the calling function expects to */ | ||
532 | if (!ftrace_graph_entry(&trace)) { | ||
533 | current->curr_ret_stack--; | ||
534 | *parent = old; | ||
535 | } | ||
536 | } | ||
537 | #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ | ||
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 6c9bfc9e1e95..2bced78b0b8e 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <asm/smp.h> | 21 | #include <asm/smp.h> |
22 | #include <asm/ipi.h> | 22 | #include <asm/ipi.h> |
23 | #include <asm/genapic.h> | 23 | #include <asm/genapic.h> |
24 | #include <asm/setup.h> | ||
24 | 25 | ||
25 | extern struct genapic apic_flat; | 26 | extern struct genapic apic_flat; |
26 | extern struct genapic apic_physflat; | 27 | extern struct genapic apic_physflat; |
@@ -53,6 +54,9 @@ void __init setup_apic_routing(void) | |||
53 | genapic = &apic_physflat; | 54 | genapic = &apic_physflat; |
54 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); | 55 | printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); |
55 | } | 56 | } |
57 | |||
58 | if (x86_quirks->update_genapic) | ||
59 | x86_quirks->update_genapic(); | ||
56 | } | 60 | } |
57 | 61 | ||
58 | /* Same for both flat and physical. */ | 62 | /* Same for both flat and physical. */ |
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index c0262791bda4..34185488e4fb 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c | |||
@@ -30,12 +30,12 @@ static int flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
30 | return 1; | 30 | return 1; |
31 | } | 31 | } |
32 | 32 | ||
33 | static cpumask_t flat_target_cpus(void) | 33 | static const struct cpumask *flat_target_cpus(void) |
34 | { | 34 | { |
35 | return cpu_online_map; | 35 | return cpu_online_mask; |
36 | } | 36 | } |
37 | 37 | ||
38 | static cpumask_t flat_vector_allocation_domain(int cpu) | 38 | static void flat_vector_allocation_domain(int cpu, struct cpumask *retmask) |
39 | { | 39 | { |
40 | /* Careful. Some cpus do not strictly honor the set of cpus | 40 | /* Careful. Some cpus do not strictly honor the set of cpus |
41 | * specified in the interrupt destination when using lowest | 41 | * specified in the interrupt destination when using lowest |
@@ -45,8 +45,8 @@ static cpumask_t flat_vector_allocation_domain(int cpu) | |||
45 | * deliver interrupts to the wrong hyperthread when only one | 45 | * deliver interrupts to the wrong hyperthread when only one |
46 | * hyperthread was specified in the interrupt desitination. | 46 | * hyperthread was specified in the interrupt desitination. |
47 | */ | 47 | */ |
48 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 48 | cpumask_clear(retmask); |
49 | return domain; | 49 | cpumask_bits(retmask)[0] = APIC_ALL_CPUS; |
50 | } | 50 | } |
51 | 51 | ||
52 | /* | 52 | /* |
@@ -69,9 +69,8 @@ static void flat_init_apic_ldr(void) | |||
69 | apic_write(APIC_LDR, val); | 69 | apic_write(APIC_LDR, val); |
70 | } | 70 | } |
71 | 71 | ||
72 | static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | 72 | static inline void _flat_send_IPI_mask(unsigned long mask, int vector) |
73 | { | 73 | { |
74 | unsigned long mask = cpus_addr(cpumask)[0]; | ||
75 | unsigned long flags; | 74 | unsigned long flags; |
76 | 75 | ||
77 | local_irq_save(flags); | 76 | local_irq_save(flags); |
@@ -79,20 +78,41 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) | |||
79 | local_irq_restore(flags); | 78 | local_irq_restore(flags); |
80 | } | 79 | } |
81 | 80 | ||
81 | static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector) | ||
82 | { | ||
83 | unsigned long mask = cpumask_bits(cpumask)[0]; | ||
84 | |||
85 | _flat_send_IPI_mask(mask, vector); | ||
86 | } | ||
87 | |||
88 | static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, | ||
89 | int vector) | ||
90 | { | ||
91 | unsigned long mask = cpumask_bits(cpumask)[0]; | ||
92 | int cpu = smp_processor_id(); | ||
93 | |||
94 | if (cpu < BITS_PER_LONG) | ||
95 | clear_bit(cpu, &mask); | ||
96 | _flat_send_IPI_mask(mask, vector); | ||
97 | } | ||
98 | |||
82 | static void flat_send_IPI_allbutself(int vector) | 99 | static void flat_send_IPI_allbutself(int vector) |
83 | { | 100 | { |
101 | int cpu = smp_processor_id(); | ||
84 | #ifdef CONFIG_HOTPLUG_CPU | 102 | #ifdef CONFIG_HOTPLUG_CPU |
85 | int hotplug = 1; | 103 | int hotplug = 1; |
86 | #else | 104 | #else |
87 | int hotplug = 0; | 105 | int hotplug = 0; |
88 | #endif | 106 | #endif |
89 | if (hotplug || vector == NMI_VECTOR) { | 107 | if (hotplug || vector == NMI_VECTOR) { |
90 | cpumask_t allbutme = cpu_online_map; | 108 | if (!cpumask_equal(cpu_online_mask, cpumask_of(cpu))) { |
109 | unsigned long mask = cpumask_bits(cpu_online_mask)[0]; | ||
91 | 110 | ||
92 | cpu_clear(smp_processor_id(), allbutme); | 111 | if (cpu < BITS_PER_LONG) |
112 | clear_bit(cpu, &mask); | ||
93 | 113 | ||
94 | if (!cpus_empty(allbutme)) | 114 | _flat_send_IPI_mask(mask, vector); |
95 | flat_send_IPI_mask(allbutme, vector); | 115 | } |
96 | } else if (num_online_cpus() > 1) { | 116 | } else if (num_online_cpus() > 1) { |
97 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); | 117 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); |
98 | } | 118 | } |
@@ -101,7 +121,7 @@ static void flat_send_IPI_allbutself(int vector) | |||
101 | static void flat_send_IPI_all(int vector) | 121 | static void flat_send_IPI_all(int vector) |
102 | { | 122 | { |
103 | if (vector == NMI_VECTOR) | 123 | if (vector == NMI_VECTOR) |
104 | flat_send_IPI_mask(cpu_online_map, vector); | 124 | flat_send_IPI_mask(cpu_online_mask, vector); |
105 | else | 125 | else |
106 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); | 126 | __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); |
107 | } | 127 | } |
@@ -135,9 +155,18 @@ static int flat_apic_id_registered(void) | |||
135 | return physid_isset(read_xapic_id(), phys_cpu_present_map); | 155 | return physid_isset(read_xapic_id(), phys_cpu_present_map); |
136 | } | 156 | } |
137 | 157 | ||
138 | static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) | 158 | static unsigned int flat_cpu_mask_to_apicid(const struct cpumask *cpumask) |
159 | { | ||
160 | return cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; | ||
161 | } | ||
162 | |||
163 | static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
164 | const struct cpumask *andmask) | ||
139 | { | 165 | { |
140 | return cpus_addr(cpumask)[0] & APIC_ALL_CPUS; | 166 | unsigned long mask1 = cpumask_bits(cpumask)[0] & APIC_ALL_CPUS; |
167 | unsigned long mask2 = cpumask_bits(andmask)[0] & APIC_ALL_CPUS; | ||
168 | |||
169 | return mask1 & mask2; | ||
141 | } | 170 | } |
142 | 171 | ||
143 | static unsigned int phys_pkg_id(int index_msb) | 172 | static unsigned int phys_pkg_id(int index_msb) |
@@ -157,8 +186,10 @@ struct genapic apic_flat = { | |||
157 | .send_IPI_all = flat_send_IPI_all, | 186 | .send_IPI_all = flat_send_IPI_all, |
158 | .send_IPI_allbutself = flat_send_IPI_allbutself, | 187 | .send_IPI_allbutself = flat_send_IPI_allbutself, |
159 | .send_IPI_mask = flat_send_IPI_mask, | 188 | .send_IPI_mask = flat_send_IPI_mask, |
189 | .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, | ||
160 | .send_IPI_self = apic_send_IPI_self, | 190 | .send_IPI_self = apic_send_IPI_self, |
161 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, | 191 | .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, |
192 | .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, | ||
162 | .phys_pkg_id = phys_pkg_id, | 193 | .phys_pkg_id = phys_pkg_id, |
163 | .get_apic_id = get_apic_id, | 194 | .get_apic_id = get_apic_id, |
164 | .set_apic_id = set_apic_id, | 195 | .set_apic_id = set_apic_id, |
@@ -188,35 +219,39 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
188 | return 0; | 219 | return 0; |
189 | } | 220 | } |
190 | 221 | ||
191 | static cpumask_t physflat_target_cpus(void) | 222 | static const struct cpumask *physflat_target_cpus(void) |
192 | { | 223 | { |
193 | return cpu_online_map; | 224 | return cpu_online_mask; |
194 | } | 225 | } |
195 | 226 | ||
196 | static cpumask_t physflat_vector_allocation_domain(int cpu) | 227 | static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask) |
197 | { | 228 | { |
198 | return cpumask_of_cpu(cpu); | 229 | cpumask_clear(retmask); |
230 | cpumask_set_cpu(cpu, retmask); | ||
199 | } | 231 | } |
200 | 232 | ||
201 | static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) | 233 | static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) |
202 | { | 234 | { |
203 | send_IPI_mask_sequence(cpumask, vector); | 235 | send_IPI_mask_sequence(cpumask, vector); |
204 | } | 236 | } |
205 | 237 | ||
206 | static void physflat_send_IPI_allbutself(int vector) | 238 | static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, |
239 | int vector) | ||
207 | { | 240 | { |
208 | cpumask_t allbutme = cpu_online_map; | 241 | send_IPI_mask_allbutself(cpumask, vector); |
242 | } | ||
209 | 243 | ||
210 | cpu_clear(smp_processor_id(), allbutme); | 244 | static void physflat_send_IPI_allbutself(int vector) |
211 | physflat_send_IPI_mask(allbutme, vector); | 245 | { |
246 | send_IPI_mask_allbutself(cpu_online_mask, vector); | ||
212 | } | 247 | } |
213 | 248 | ||
214 | static void physflat_send_IPI_all(int vector) | 249 | static void physflat_send_IPI_all(int vector) |
215 | { | 250 | { |
216 | physflat_send_IPI_mask(cpu_online_map, vector); | 251 | physflat_send_IPI_mask(cpu_online_mask, vector); |
217 | } | 252 | } |
218 | 253 | ||
219 | static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | 254 | static unsigned int physflat_cpu_mask_to_apicid(const struct cpumask *cpumask) |
220 | { | 255 | { |
221 | int cpu; | 256 | int cpu; |
222 | 257 | ||
@@ -224,13 +259,31 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
224 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 259 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
225 | * May as well be the first. | 260 | * May as well be the first. |
226 | */ | 261 | */ |
227 | cpu = first_cpu(cpumask); | 262 | cpu = cpumask_first(cpumask); |
228 | if ((unsigned)cpu < nr_cpu_ids) | 263 | if ((unsigned)cpu < nr_cpu_ids) |
229 | return per_cpu(x86_cpu_to_apicid, cpu); | 264 | return per_cpu(x86_cpu_to_apicid, cpu); |
230 | else | 265 | else |
231 | return BAD_APICID; | 266 | return BAD_APICID; |
232 | } | 267 | } |
233 | 268 | ||
269 | static unsigned int | ||
270 | physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
271 | const struct cpumask *andmask) | ||
272 | { | ||
273 | int cpu; | ||
274 | |||
275 | /* | ||
276 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
277 | * May as well be the first. | ||
278 | */ | ||
279 | for_each_cpu_and(cpu, cpumask, andmask) | ||
280 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
281 | break; | ||
282 | if (cpu < nr_cpu_ids) | ||
283 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
284 | return BAD_APICID; | ||
285 | } | ||
286 | |||
234 | struct genapic apic_physflat = { | 287 | struct genapic apic_physflat = { |
235 | .name = "physical flat", | 288 | .name = "physical flat", |
236 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, | 289 | .acpi_madt_oem_check = physflat_acpi_madt_oem_check, |
@@ -243,8 +296,10 @@ struct genapic apic_physflat = { | |||
243 | .send_IPI_all = physflat_send_IPI_all, | 296 | .send_IPI_all = physflat_send_IPI_all, |
244 | .send_IPI_allbutself = physflat_send_IPI_allbutself, | 297 | .send_IPI_allbutself = physflat_send_IPI_allbutself, |
245 | .send_IPI_mask = physflat_send_IPI_mask, | 298 | .send_IPI_mask = physflat_send_IPI_mask, |
299 | .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, | ||
246 | .send_IPI_self = apic_send_IPI_self, | 300 | .send_IPI_self = apic_send_IPI_self, |
247 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, | 301 | .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, |
302 | .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, | ||
248 | .phys_pkg_id = phys_pkg_id, | 303 | .phys_pkg_id = phys_pkg_id, |
249 | .get_apic_id = get_apic_id, | 304 | .get_apic_id = get_apic_id, |
250 | .set_apic_id = set_apic_id, | 305 | .set_apic_id = set_apic_id, |
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c index f6a2c8eb48a6..6ce497cc372d 100644 --- a/arch/x86/kernel/genx2apic_cluster.c +++ b/arch/x86/kernel/genx2apic_cluster.c | |||
@@ -22,19 +22,18 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
22 | 22 | ||
23 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 23 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
24 | 24 | ||
25 | static cpumask_t x2apic_target_cpus(void) | 25 | static const struct cpumask *x2apic_target_cpus(void) |
26 | { | 26 | { |
27 | return cpumask_of_cpu(0); | 27 | return cpumask_of(0); |
28 | } | 28 | } |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * for now each logical cpu is in its own vector allocation domain. | 31 | * for now each logical cpu is in its own vector allocation domain. |
32 | */ | 32 | */ |
33 | static cpumask_t x2apic_vector_allocation_domain(int cpu) | 33 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) |
34 | { | 34 | { |
35 | cpumask_t domain = CPU_MASK_NONE; | 35 | cpumask_clear(retmask); |
36 | cpu_set(cpu, domain); | 36 | cpumask_set_cpu(cpu, retmask); |
37 | return domain; | ||
38 | } | 37 | } |
39 | 38 | ||
40 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | 39 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, |
@@ -56,32 +55,53 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
56 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register | 55 | * at once. We have 16 cpu's in a cluster. This will minimize IPI register |
57 | * writes. | 56 | * writes. |
58 | */ | 57 | */ |
59 | static void x2apic_send_IPI_mask(cpumask_t mask, int vector) | 58 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) |
60 | { | 59 | { |
61 | unsigned long flags; | 60 | unsigned long flags; |
62 | unsigned long query_cpu; | 61 | unsigned long query_cpu; |
63 | 62 | ||
64 | local_irq_save(flags); | 63 | local_irq_save(flags); |
65 | for_each_cpu_mask(query_cpu, mask) { | 64 | for_each_cpu(query_cpu, mask) |
66 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu), | 65 | __x2apic_send_IPI_dest( |
67 | vector, APIC_DEST_LOGICAL); | 66 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), |
68 | } | 67 | vector, APIC_DEST_LOGICAL); |
69 | local_irq_restore(flags); | 68 | local_irq_restore(flags); |
70 | } | 69 | } |
71 | 70 | ||
72 | static void x2apic_send_IPI_allbutself(int vector) | 71 | static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, |
72 | int vector) | ||
73 | { | 73 | { |
74 | cpumask_t mask = cpu_online_map; | 74 | unsigned long flags; |
75 | unsigned long query_cpu; | ||
76 | unsigned long this_cpu = smp_processor_id(); | ||
75 | 77 | ||
76 | cpu_clear(smp_processor_id(), mask); | 78 | local_irq_save(flags); |
79 | for_each_cpu(query_cpu, mask) | ||
80 | if (query_cpu != this_cpu) | ||
81 | __x2apic_send_IPI_dest( | ||
82 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
83 | vector, APIC_DEST_LOGICAL); | ||
84 | local_irq_restore(flags); | ||
85 | } | ||
86 | |||
87 | static void x2apic_send_IPI_allbutself(int vector) | ||
88 | { | ||
89 | unsigned long flags; | ||
90 | unsigned long query_cpu; | ||
91 | unsigned long this_cpu = smp_processor_id(); | ||
77 | 92 | ||
78 | if (!cpus_empty(mask)) | 93 | local_irq_save(flags); |
79 | x2apic_send_IPI_mask(mask, vector); | 94 | for_each_online_cpu(query_cpu) |
95 | if (query_cpu != this_cpu) | ||
96 | __x2apic_send_IPI_dest( | ||
97 | per_cpu(x86_cpu_to_logical_apicid, query_cpu), | ||
98 | vector, APIC_DEST_LOGICAL); | ||
99 | local_irq_restore(flags); | ||
80 | } | 100 | } |
81 | 101 | ||
82 | static void x2apic_send_IPI_all(int vector) | 102 | static void x2apic_send_IPI_all(int vector) |
83 | { | 103 | { |
84 | x2apic_send_IPI_mask(cpu_online_map, vector); | 104 | x2apic_send_IPI_mask(cpu_online_mask, vector); |
85 | } | 105 | } |
86 | 106 | ||
87 | static int x2apic_apic_id_registered(void) | 107 | static int x2apic_apic_id_registered(void) |
@@ -89,21 +109,38 @@ static int x2apic_apic_id_registered(void) | |||
89 | return 1; | 109 | return 1; |
90 | } | 110 | } |
91 | 111 | ||
92 | static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | 112 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
93 | { | 113 | { |
94 | int cpu; | 114 | int cpu; |
95 | 115 | ||
96 | /* | 116 | /* |
97 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 117 | * We're using fixed IRQ delivery, can only return one logical APIC ID. |
98 | * May as well be the first. | 118 | * May as well be the first. |
99 | */ | 119 | */ |
100 | cpu = first_cpu(cpumask); | 120 | cpu = cpumask_first(cpumask); |
101 | if ((unsigned)cpu < NR_CPUS) | 121 | if ((unsigned)cpu < nr_cpu_ids) |
102 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | 122 | return per_cpu(x86_cpu_to_logical_apicid, cpu); |
103 | else | 123 | else |
104 | return BAD_APICID; | 124 | return BAD_APICID; |
105 | } | 125 | } |
106 | 126 | ||
127 | static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
128 | const struct cpumask *andmask) | ||
129 | { | ||
130 | int cpu; | ||
131 | |||
132 | /* | ||
133 | * We're using fixed IRQ delivery, can only return one logical APIC ID. | ||
134 | * May as well be the first. | ||
135 | */ | ||
136 | for_each_cpu_and(cpu, cpumask, andmask) | ||
137 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
138 | break; | ||
139 | if (cpu < nr_cpu_ids) | ||
140 | return per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
141 | return BAD_APICID; | ||
142 | } | ||
143 | |||
107 | static unsigned int get_apic_id(unsigned long x) | 144 | static unsigned int get_apic_id(unsigned long x) |
108 | { | 145 | { |
109 | unsigned int id; | 146 | unsigned int id; |
@@ -150,8 +187,10 @@ struct genapic apic_x2apic_cluster = { | |||
150 | .send_IPI_all = x2apic_send_IPI_all, | 187 | .send_IPI_all = x2apic_send_IPI_all, |
151 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | 188 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, |
152 | .send_IPI_mask = x2apic_send_IPI_mask, | 189 | .send_IPI_mask = x2apic_send_IPI_mask, |
190 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | ||
153 | .send_IPI_self = x2apic_send_IPI_self, | 191 | .send_IPI_self = x2apic_send_IPI_self, |
154 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 192 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, |
193 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
155 | .phys_pkg_id = phys_pkg_id, | 194 | .phys_pkg_id = phys_pkg_id, |
156 | .get_apic_id = get_apic_id, | 195 | .get_apic_id = get_apic_id, |
157 | .set_apic_id = set_apic_id, | 196 | .set_apic_id = set_apic_id, |
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c index d042211768b7..21bcc0e098ba 100644 --- a/arch/x86/kernel/genx2apic_phys.c +++ b/arch/x86/kernel/genx2apic_phys.c | |||
@@ -29,16 +29,15 @@ static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
29 | 29 | ||
30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 30 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
31 | 31 | ||
32 | static cpumask_t x2apic_target_cpus(void) | 32 | static const struct cpumask *x2apic_target_cpus(void) |
33 | { | 33 | { |
34 | return cpumask_of_cpu(0); | 34 | return cpumask_of(0); |
35 | } | 35 | } |
36 | 36 | ||
37 | static cpumask_t x2apic_vector_allocation_domain(int cpu) | 37 | static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask) |
38 | { | 38 | { |
39 | cpumask_t domain = CPU_MASK_NONE; | 39 | cpumask_clear(retmask); |
40 | cpu_set(cpu, domain); | 40 | cpumask_set_cpu(cpu, retmask); |
41 | return domain; | ||
42 | } | 41 | } |
43 | 42 | ||
44 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | 43 | static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, |
@@ -54,32 +53,54 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, | |||
54 | x2apic_icr_write(cfg, apicid); | 53 | x2apic_icr_write(cfg, apicid); |
55 | } | 54 | } |
56 | 55 | ||
57 | static void x2apic_send_IPI_mask(cpumask_t mask, int vector) | 56 | static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) |
58 | { | 57 | { |
59 | unsigned long flags; | 58 | unsigned long flags; |
60 | unsigned long query_cpu; | 59 | unsigned long query_cpu; |
61 | 60 | ||
62 | local_irq_save(flags); | 61 | local_irq_save(flags); |
63 | for_each_cpu_mask(query_cpu, mask) { | 62 | for_each_cpu(query_cpu, mask) { |
64 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), | 63 | __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu), |
65 | vector, APIC_DEST_PHYSICAL); | 64 | vector, APIC_DEST_PHYSICAL); |
66 | } | 65 | } |
67 | local_irq_restore(flags); | 66 | local_irq_restore(flags); |
68 | } | 67 | } |
69 | 68 | ||
70 | static void x2apic_send_IPI_allbutself(int vector) | 69 | static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, |
70 | int vector) | ||
71 | { | 71 | { |
72 | cpumask_t mask = cpu_online_map; | 72 | unsigned long flags; |
73 | unsigned long query_cpu; | ||
74 | unsigned long this_cpu = smp_processor_id(); | ||
75 | |||
76 | local_irq_save(flags); | ||
77 | for_each_cpu(query_cpu, mask) { | ||
78 | if (query_cpu != this_cpu) | ||
79 | __x2apic_send_IPI_dest( | ||
80 | per_cpu(x86_cpu_to_apicid, query_cpu), | ||
81 | vector, APIC_DEST_PHYSICAL); | ||
82 | } | ||
83 | local_irq_restore(flags); | ||
84 | } | ||
73 | 85 | ||
74 | cpu_clear(smp_processor_id(), mask); | 86 | static void x2apic_send_IPI_allbutself(int vector) |
87 | { | ||
88 | unsigned long flags; | ||
89 | unsigned long query_cpu; | ||
90 | unsigned long this_cpu = smp_processor_id(); | ||
75 | 91 | ||
76 | if (!cpus_empty(mask)) | 92 | local_irq_save(flags); |
77 | x2apic_send_IPI_mask(mask, vector); | 93 | for_each_online_cpu(query_cpu) |
94 | if (query_cpu != this_cpu) | ||
95 | __x2apic_send_IPI_dest( | ||
96 | per_cpu(x86_cpu_to_apicid, query_cpu), | ||
97 | vector, APIC_DEST_PHYSICAL); | ||
98 | local_irq_restore(flags); | ||
78 | } | 99 | } |
79 | 100 | ||
80 | static void x2apic_send_IPI_all(int vector) | 101 | static void x2apic_send_IPI_all(int vector) |
81 | { | 102 | { |
82 | x2apic_send_IPI_mask(cpu_online_map, vector); | 103 | x2apic_send_IPI_mask(cpu_online_mask, vector); |
83 | } | 104 | } |
84 | 105 | ||
85 | static int x2apic_apic_id_registered(void) | 106 | static int x2apic_apic_id_registered(void) |
@@ -87,7 +108,7 @@ static int x2apic_apic_id_registered(void) | |||
87 | return 1; | 108 | return 1; |
88 | } | 109 | } |
89 | 110 | ||
90 | static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | 111 | static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) |
91 | { | 112 | { |
92 | int cpu; | 113 | int cpu; |
93 | 114 | ||
@@ -95,13 +116,30 @@ static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask) | |||
95 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 116 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
96 | * May as well be the first. | 117 | * May as well be the first. |
97 | */ | 118 | */ |
98 | cpu = first_cpu(cpumask); | 119 | cpu = cpumask_first(cpumask); |
99 | if ((unsigned)cpu < NR_CPUS) | 120 | if ((unsigned)cpu < nr_cpu_ids) |
100 | return per_cpu(x86_cpu_to_apicid, cpu); | 121 | return per_cpu(x86_cpu_to_apicid, cpu); |
101 | else | 122 | else |
102 | return BAD_APICID; | 123 | return BAD_APICID; |
103 | } | 124 | } |
104 | 125 | ||
126 | static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
127 | const struct cpumask *andmask) | ||
128 | { | ||
129 | int cpu; | ||
130 | |||
131 | /* | ||
132 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
133 | * May as well be the first. | ||
134 | */ | ||
135 | for_each_cpu_and(cpu, cpumask, andmask) | ||
136 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
137 | break; | ||
138 | if (cpu < nr_cpu_ids) | ||
139 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
140 | return BAD_APICID; | ||
141 | } | ||
142 | |||
105 | static unsigned int get_apic_id(unsigned long x) | 143 | static unsigned int get_apic_id(unsigned long x) |
106 | { | 144 | { |
107 | unsigned int id; | 145 | unsigned int id; |
@@ -123,12 +161,12 @@ static unsigned int phys_pkg_id(int index_msb) | |||
123 | return current_cpu_data.initial_apicid >> index_msb; | 161 | return current_cpu_data.initial_apicid >> index_msb; |
124 | } | 162 | } |
125 | 163 | ||
126 | void x2apic_send_IPI_self(int vector) | 164 | static void x2apic_send_IPI_self(int vector) |
127 | { | 165 | { |
128 | apic_write(APIC_SELF_IPI, vector); | 166 | apic_write(APIC_SELF_IPI, vector); |
129 | } | 167 | } |
130 | 168 | ||
131 | void init_x2apic_ldr(void) | 169 | static void init_x2apic_ldr(void) |
132 | { | 170 | { |
133 | return; | 171 | return; |
134 | } | 172 | } |
@@ -145,8 +183,10 @@ struct genapic apic_x2apic_phys = { | |||
145 | .send_IPI_all = x2apic_send_IPI_all, | 183 | .send_IPI_all = x2apic_send_IPI_all, |
146 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, | 184 | .send_IPI_allbutself = x2apic_send_IPI_allbutself, |
147 | .send_IPI_mask = x2apic_send_IPI_mask, | 185 | .send_IPI_mask = x2apic_send_IPI_mask, |
186 | .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, | ||
148 | .send_IPI_self = x2apic_send_IPI_self, | 187 | .send_IPI_self = x2apic_send_IPI_self, |
149 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, | 188 | .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, |
189 | .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, | ||
150 | .phys_pkg_id = phys_pkg_id, | 190 | .phys_pkg_id = phys_pkg_id, |
151 | .get_apic_id = get_apic_id, | 191 | .get_apic_id = get_apic_id, |
152 | .set_apic_id = set_apic_id, | 192 | .set_apic_id = set_apic_id, |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 2c7dbdb98278..b193e082f6ce 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -10,6 +10,7 @@ | |||
10 | 10 | ||
11 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
12 | #include <linux/threads.h> | 12 | #include <linux/threads.h> |
13 | #include <linux/cpu.h> | ||
13 | #include <linux/cpumask.h> | 14 | #include <linux/cpumask.h> |
14 | #include <linux/string.h> | 15 | #include <linux/string.h> |
15 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
@@ -17,6 +18,9 @@ | |||
17 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
18 | #include <linux/module.h> | 19 | #include <linux/module.h> |
19 | #include <linux/hardirq.h> | 20 | #include <linux/hardirq.h> |
21 | #include <linux/timer.h> | ||
22 | #include <linux/proc_fs.h> | ||
23 | #include <asm/current.h> | ||
20 | #include <asm/smp.h> | 24 | #include <asm/smp.h> |
21 | #include <asm/ipi.h> | 25 | #include <asm/ipi.h> |
22 | #include <asm/genapic.h> | 26 | #include <asm/genapic.h> |
@@ -75,16 +79,15 @@ EXPORT_SYMBOL(sn_rtc_cycles_per_second); | |||
75 | 79 | ||
76 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 80 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
77 | 81 | ||
78 | static cpumask_t uv_target_cpus(void) | 82 | static const struct cpumask *uv_target_cpus(void) |
79 | { | 83 | { |
80 | return cpumask_of_cpu(0); | 84 | return cpumask_of(0); |
81 | } | 85 | } |
82 | 86 | ||
83 | static cpumask_t uv_vector_allocation_domain(int cpu) | 87 | static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) |
84 | { | 88 | { |
85 | cpumask_t domain = CPU_MASK_NONE; | 89 | cpumask_clear(retmask); |
86 | cpu_set(cpu, domain); | 90 | cpumask_set_cpu(cpu, retmask); |
87 | return domain; | ||
88 | } | 91 | } |
89 | 92 | ||
90 | int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) | 93 | int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) |
@@ -123,28 +126,37 @@ static void uv_send_IPI_one(int cpu, int vector) | |||
123 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); | 126 | uv_write_global_mmr64(pnode, UVH_IPI_INT, val); |
124 | } | 127 | } |
125 | 128 | ||
126 | static void uv_send_IPI_mask(cpumask_t mask, int vector) | 129 | static void uv_send_IPI_mask(const struct cpumask *mask, int vector) |
127 | { | 130 | { |
128 | unsigned int cpu; | 131 | unsigned int cpu; |
129 | 132 | ||
130 | for_each_possible_cpu(cpu) | 133 | for_each_cpu(cpu, mask) |
131 | if (cpu_isset(cpu, mask)) | 134 | uv_send_IPI_one(cpu, vector); |
135 | } | ||
136 | |||
137 | static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | ||
138 | { | ||
139 | unsigned int cpu; | ||
140 | unsigned int this_cpu = smp_processor_id(); | ||
141 | |||
142 | for_each_cpu(cpu, mask) | ||
143 | if (cpu != this_cpu) | ||
132 | uv_send_IPI_one(cpu, vector); | 144 | uv_send_IPI_one(cpu, vector); |
133 | } | 145 | } |
134 | 146 | ||
135 | static void uv_send_IPI_allbutself(int vector) | 147 | static void uv_send_IPI_allbutself(int vector) |
136 | { | 148 | { |
137 | cpumask_t mask = cpu_online_map; | 149 | unsigned int cpu; |
138 | 150 | unsigned int this_cpu = smp_processor_id(); | |
139 | cpu_clear(smp_processor_id(), mask); | ||
140 | 151 | ||
141 | if (!cpus_empty(mask)) | 152 | for_each_online_cpu(cpu) |
142 | uv_send_IPI_mask(mask, vector); | 153 | if (cpu != this_cpu) |
154 | uv_send_IPI_one(cpu, vector); | ||
143 | } | 155 | } |
144 | 156 | ||
145 | static void uv_send_IPI_all(int vector) | 157 | static void uv_send_IPI_all(int vector) |
146 | { | 158 | { |
147 | uv_send_IPI_mask(cpu_online_map, vector); | 159 | uv_send_IPI_mask(cpu_online_mask, vector); |
148 | } | 160 | } |
149 | 161 | ||
150 | static int uv_apic_id_registered(void) | 162 | static int uv_apic_id_registered(void) |
@@ -156,7 +168,7 @@ static void uv_init_apic_ldr(void) | |||
156 | { | 168 | { |
157 | } | 169 | } |
158 | 170 | ||
159 | static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | 171 | static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) |
160 | { | 172 | { |
161 | int cpu; | 173 | int cpu; |
162 | 174 | ||
@@ -164,13 +176,30 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | |||
164 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | 176 | * We're using fixed IRQ delivery, can only return one phys APIC ID. |
165 | * May as well be the first. | 177 | * May as well be the first. |
166 | */ | 178 | */ |
167 | cpu = first_cpu(cpumask); | 179 | cpu = cpumask_first(cpumask); |
168 | if ((unsigned)cpu < nr_cpu_ids) | 180 | if ((unsigned)cpu < nr_cpu_ids) |
169 | return per_cpu(x86_cpu_to_apicid, cpu); | 181 | return per_cpu(x86_cpu_to_apicid, cpu); |
170 | else | 182 | else |
171 | return BAD_APICID; | 183 | return BAD_APICID; |
172 | } | 184 | } |
173 | 185 | ||
186 | static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
187 | const struct cpumask *andmask) | ||
188 | { | ||
189 | int cpu; | ||
190 | |||
191 | /* | ||
192 | * We're using fixed IRQ delivery, can only return one phys APIC ID. | ||
193 | * May as well be the first. | ||
194 | */ | ||
195 | for_each_cpu_and(cpu, cpumask, andmask) | ||
196 | if (cpumask_test_cpu(cpu, cpu_online_mask)) | ||
197 | break; | ||
198 | if (cpu < nr_cpu_ids) | ||
199 | return per_cpu(x86_cpu_to_apicid, cpu); | ||
200 | return BAD_APICID; | ||
201 | } | ||
202 | |||
174 | static unsigned int get_apic_id(unsigned long x) | 203 | static unsigned int get_apic_id(unsigned long x) |
175 | { | 204 | { |
176 | unsigned int id; | 205 | unsigned int id; |
@@ -218,8 +247,10 @@ struct genapic apic_x2apic_uv_x = { | |||
218 | .send_IPI_all = uv_send_IPI_all, | 247 | .send_IPI_all = uv_send_IPI_all, |
219 | .send_IPI_allbutself = uv_send_IPI_allbutself, | 248 | .send_IPI_allbutself = uv_send_IPI_allbutself, |
220 | .send_IPI_mask = uv_send_IPI_mask, | 249 | .send_IPI_mask = uv_send_IPI_mask, |
250 | .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, | ||
221 | .send_IPI_self = uv_send_IPI_self, | 251 | .send_IPI_self = uv_send_IPI_self, |
222 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, | 252 | .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, |
253 | .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, | ||
223 | .phys_pkg_id = phys_pkg_id, | 254 | .phys_pkg_id = phys_pkg_id, |
224 | .get_apic_id = get_apic_id, | 255 | .get_apic_id = get_apic_id, |
225 | .set_apic_id = set_apic_id, | 256 | .set_apic_id = set_apic_id, |
@@ -356,6 +387,103 @@ static __init void uv_rtc_init(void) | |||
356 | } | 387 | } |
357 | 388 | ||
358 | /* | 389 | /* |
390 | * percpu heartbeat timer | ||
391 | */ | ||
392 | static void uv_heartbeat(unsigned long ignored) | ||
393 | { | ||
394 | struct timer_list *timer = &uv_hub_info->scir.timer; | ||
395 | unsigned char bits = uv_hub_info->scir.state; | ||
396 | |||
397 | /* flip heartbeat bit */ | ||
398 | bits ^= SCIR_CPU_HEARTBEAT; | ||
399 | |||
400 | /* is this cpu idle? */ | ||
401 | if (idle_cpu(raw_smp_processor_id())) | ||
402 | bits &= ~SCIR_CPU_ACTIVITY; | ||
403 | else | ||
404 | bits |= SCIR_CPU_ACTIVITY; | ||
405 | |||
406 | /* update system controller interface reg */ | ||
407 | uv_set_scir_bits(bits); | ||
408 | |||
409 | /* enable next timer period */ | ||
410 | mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); | ||
411 | } | ||
412 | |||
413 | static void __cpuinit uv_heartbeat_enable(int cpu) | ||
414 | { | ||
415 | if (!uv_cpu_hub_info(cpu)->scir.enabled) { | ||
416 | struct timer_list *timer = &uv_cpu_hub_info(cpu)->scir.timer; | ||
417 | |||
418 | uv_set_cpu_scir_bits(cpu, SCIR_CPU_HEARTBEAT|SCIR_CPU_ACTIVITY); | ||
419 | setup_timer(timer, uv_heartbeat, cpu); | ||
420 | timer->expires = jiffies + SCIR_CPU_HB_INTERVAL; | ||
421 | add_timer_on(timer, cpu); | ||
422 | uv_cpu_hub_info(cpu)->scir.enabled = 1; | ||
423 | } | ||
424 | |||
425 | /* check boot cpu */ | ||
426 | if (!uv_cpu_hub_info(0)->scir.enabled) | ||
427 | uv_heartbeat_enable(0); | ||
428 | } | ||
429 | |||
430 | #ifdef CONFIG_HOTPLUG_CPU | ||
431 | static void __cpuinit uv_heartbeat_disable(int cpu) | ||
432 | { | ||
433 | if (uv_cpu_hub_info(cpu)->scir.enabled) { | ||
434 | uv_cpu_hub_info(cpu)->scir.enabled = 0; | ||
435 | del_timer(&uv_cpu_hub_info(cpu)->scir.timer); | ||
436 | } | ||
437 | uv_set_cpu_scir_bits(cpu, 0xff); | ||
438 | } | ||
439 | |||
440 | /* | ||
441 | * cpu hotplug notifier | ||
442 | */ | ||
443 | static __cpuinit int uv_scir_cpu_notify(struct notifier_block *self, | ||
444 | unsigned long action, void *hcpu) | ||
445 | { | ||
446 | long cpu = (long)hcpu; | ||
447 | |||
448 | switch (action) { | ||
449 | case CPU_ONLINE: | ||
450 | uv_heartbeat_enable(cpu); | ||
451 | break; | ||
452 | case CPU_DOWN_PREPARE: | ||
453 | uv_heartbeat_disable(cpu); | ||
454 | break; | ||
455 | default: | ||
456 | break; | ||
457 | } | ||
458 | return NOTIFY_OK; | ||
459 | } | ||
460 | |||
461 | static __init void uv_scir_register_cpu_notifier(void) | ||
462 | { | ||
463 | hotcpu_notifier(uv_scir_cpu_notify, 0); | ||
464 | } | ||
465 | |||
466 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
467 | |||
468 | static __init void uv_scir_register_cpu_notifier(void) | ||
469 | { | ||
470 | } | ||
471 | |||
472 | static __init int uv_init_heartbeat(void) | ||
473 | { | ||
474 | int cpu; | ||
475 | |||
476 | if (is_uv_system()) | ||
477 | for_each_online_cpu(cpu) | ||
478 | uv_heartbeat_enable(cpu); | ||
479 | return 0; | ||
480 | } | ||
481 | |||
482 | late_initcall(uv_init_heartbeat); | ||
483 | |||
484 | #endif /* !CONFIG_HOTPLUG_CPU */ | ||
485 | |||
486 | /* | ||
359 | * Called on each cpu to initialize the per_cpu UV data area. | 487 | * Called on each cpu to initialize the per_cpu UV data area. |
360 | * ZZZ hotplug not supported yet | 488 | * ZZZ hotplug not supported yet |
361 | */ | 489 | */ |
@@ -428,7 +556,7 @@ void __init uv_system_init(void) | |||
428 | 556 | ||
429 | uv_bios_init(); | 557 | uv_bios_init(); |
430 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, | 558 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, |
431 | &uv_coherency_id, &uv_region_size); | 559 | &sn_coherency_id, &sn_region_size); |
432 | uv_rtc_init(); | 560 | uv_rtc_init(); |
433 | 561 | ||
434 | for_each_present_cpu(cpu) { | 562 | for_each_present_cpu(cpu) { |
@@ -439,8 +567,7 @@ void __init uv_system_init(void) | |||
439 | uv_blade_info[blade].nr_possible_cpus++; | 567 | uv_blade_info[blade].nr_possible_cpus++; |
440 | 568 | ||
441 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; | 569 | uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; |
442 | uv_cpu_hub_info(cpu)->lowmem_remap_top = | 570 | uv_cpu_hub_info(cpu)->lowmem_remap_top = lowmem_redir_size; |
443 | lowmem_redir_base + lowmem_redir_size; | ||
444 | uv_cpu_hub_info(cpu)->m_val = m_val; | 571 | uv_cpu_hub_info(cpu)->m_val = m_val; |
445 | uv_cpu_hub_info(cpu)->n_val = m_val; | 572 | uv_cpu_hub_info(cpu)->n_val = m_val; |
446 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; | 573 | uv_cpu_hub_info(cpu)->numa_blade_id = blade; |
@@ -450,7 +577,8 @@ void __init uv_system_init(void) | |||
450 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; | 577 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; |
451 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 578 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
452 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 579 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
453 | uv_cpu_hub_info(cpu)->coherency_domain_number = uv_coherency_id; | 580 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; |
581 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; | ||
454 | uv_node_to_blade[nid] = blade; | 582 | uv_node_to_blade[nid] = blade; |
455 | uv_cpu_to_blade[cpu] = blade; | 583 | uv_cpu_to_blade[cpu] = blade; |
456 | max_pnode = max(pnode, max_pnode); | 584 | max_pnode = max(pnode, max_pnode); |
@@ -467,4 +595,6 @@ void __init uv_system_init(void) | |||
467 | map_mmioh_high(max_pnode); | 595 | map_mmioh_high(max_pnode); |
468 | 596 | ||
469 | uv_cpu_init(); | 597 | uv_cpu_init(); |
598 | uv_scir_register_cpu_notifier(); | ||
599 | proc_mkdir("sgi_uv", NULL); | ||
470 | } | 600 | } |
diff --git a/arch/x86/kernel/head.c b/arch/x86/kernel/head.c index 1dcb0f13897e..3e66bd364a9d 100644 --- a/arch/x86/kernel/head.c +++ b/arch/x86/kernel/head.c | |||
@@ -35,7 +35,6 @@ void __init reserve_ebda_region(void) | |||
35 | 35 | ||
36 | /* start of EBDA area */ | 36 | /* start of EBDA area */ |
37 | ebda_addr = get_bios_ebda(); | 37 | ebda_addr = get_bios_ebda(); |
38 | printk(KERN_INFO "BIOS EBDA/lowmem at: %08x/%08x\n", ebda_addr, lowmem); | ||
39 | 38 | ||
40 | /* Fixup: bios puts an EBDA in the top 64K segment */ | 39 | /* Fixup: bios puts an EBDA in the top 64K segment */ |
41 | /* of conventional memory, but does not adjust lowmem. */ | 40 | /* of conventional memory, but does not adjust lowmem. */ |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index fa1d25dd83e3..ac108d1fe182 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -12,9 +12,12 @@ | |||
12 | #include <asm/sections.h> | 12 | #include <asm/sections.h> |
13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
14 | #include <asm/bios_ebda.h> | 14 | #include <asm/bios_ebda.h> |
15 | #include <asm/trampoline.h> | ||
15 | 16 | ||
16 | void __init i386_start_kernel(void) | 17 | void __init i386_start_kernel(void) |
17 | { | 18 | { |
19 | reserve_trampoline_memory(); | ||
20 | |||
18 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 21 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
19 | 22 | ||
20 | #ifdef CONFIG_BLK_DEV_INITRD | 23 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d16084f90649..b9a4d8c4b935 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -24,9 +24,10 @@ | |||
24 | #include <asm/kdebug.h> | 24 | #include <asm/kdebug.h> |
25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
26 | #include <asm/bios_ebda.h> | 26 | #include <asm/bios_ebda.h> |
27 | #include <asm/trampoline.h> | ||
27 | 28 | ||
28 | /* boot cpu pda */ | 29 | /* boot cpu pda */ |
29 | static struct x8664_pda _boot_cpu_pda __read_mostly; | 30 | static struct x8664_pda _boot_cpu_pda; |
30 | 31 | ||
31 | #ifdef CONFIG_SMP | 32 | #ifdef CONFIG_SMP |
32 | /* | 33 | /* |
@@ -120,6 +121,8 @@ void __init x86_64_start_reservations(char *real_mode_data) | |||
120 | { | 121 | { |
121 | copy_bootdata(__va(real_mode_data)); | 122 | copy_bootdata(__va(real_mode_data)); |
122 | 123 | ||
124 | reserve_trampoline_memory(); | ||
125 | |||
123 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); | 126 | reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); |
124 | 127 | ||
125 | #ifdef CONFIG_BLK_DEV_INITRD | 128 | #ifdef CONFIG_BLK_DEV_INITRD |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 067d8de913f6..cd759ad90690 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -33,7 +33,9 @@ | |||
33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists | 33 | * HPET address is set in acpi/boot.c, when an ACPI entry exists |
34 | */ | 34 | */ |
35 | unsigned long hpet_address; | 35 | unsigned long hpet_address; |
36 | unsigned long hpet_num_timers; | 36 | #ifdef CONFIG_PCI_MSI |
37 | static unsigned long hpet_num_timers; | ||
38 | #endif | ||
37 | static void __iomem *hpet_virt_address; | 39 | static void __iomem *hpet_virt_address; |
38 | 40 | ||
39 | struct hpet_dev { | 41 | struct hpet_dev { |
@@ -246,7 +248,7 @@ static void hpet_legacy_clockevent_register(void) | |||
246 | * Start hpet with the boot cpu mask and make it | 248 | * Start hpet with the boot cpu mask and make it |
247 | * global after the IO_APIC has been initialized. | 249 | * global after the IO_APIC has been initialized. |
248 | */ | 250 | */ |
249 | hpet_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); | 251 | hpet_clockevent.cpumask = cpumask_of(smp_processor_id()); |
250 | clockevents_register_device(&hpet_clockevent); | 252 | clockevents_register_device(&hpet_clockevent); |
251 | global_clock_event = &hpet_clockevent; | 253 | global_clock_event = &hpet_clockevent; |
252 | printk(KERN_DEBUG "hpet clockevent registered\n"); | 254 | printk(KERN_DEBUG "hpet clockevent registered\n"); |
@@ -301,7 +303,7 @@ static void hpet_set_mode(enum clock_event_mode mode, | |||
301 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); | 303 | struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); |
302 | hpet_setup_msi_irq(hdev->irq); | 304 | hpet_setup_msi_irq(hdev->irq); |
303 | disable_irq(hdev->irq); | 305 | disable_irq(hdev->irq); |
304 | irq_set_affinity(hdev->irq, cpumask_of_cpu(hdev->cpu)); | 306 | irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); |
305 | enable_irq(hdev->irq); | 307 | enable_irq(hdev->irq); |
306 | } | 308 | } |
307 | break; | 309 | break; |
@@ -449,7 +451,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
449 | return -1; | 451 | return -1; |
450 | 452 | ||
451 | disable_irq(dev->irq); | 453 | disable_irq(dev->irq); |
452 | irq_set_affinity(dev->irq, cpumask_of_cpu(dev->cpu)); | 454 | irq_set_affinity(dev->irq, cpumask_of(dev->cpu)); |
453 | enable_irq(dev->irq); | 455 | enable_irq(dev->irq); |
454 | 456 | ||
455 | printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", | 457 | printk(KERN_DEBUG "hpet: %s irq %d for MSI\n", |
@@ -500,7 +502,7 @@ static void init_one_hpet_msi_clockevent(struct hpet_dev *hdev, int cpu) | |||
500 | /* 5 usec minimum reprogramming delta. */ | 502 | /* 5 usec minimum reprogramming delta. */ |
501 | evt->min_delta_ns = 5000; | 503 | evt->min_delta_ns = 5000; |
502 | 504 | ||
503 | evt->cpumask = cpumask_of_cpu(hdev->cpu); | 505 | evt->cpumask = cpumask_of(hdev->cpu); |
504 | clockevents_register_device(evt); | 506 | clockevents_register_device(evt); |
505 | } | 507 | } |
506 | 508 | ||
@@ -811,7 +813,7 @@ int __init hpet_enable(void) | |||
811 | 813 | ||
812 | out_nohpet: | 814 | out_nohpet: |
813 | hpet_clear_mapping(); | 815 | hpet_clear_mapping(); |
814 | boot_hpet_disable = 1; | 816 | hpet_address = 0; |
815 | return 0; | 817 | return 0; |
816 | } | 818 | } |
817 | 819 | ||
@@ -834,10 +836,11 @@ static __init int hpet_late_init(void) | |||
834 | 836 | ||
835 | hpet_address = force_hpet_address; | 837 | hpet_address = force_hpet_address; |
836 | hpet_enable(); | 838 | hpet_enable(); |
837 | if (!hpet_virt_address) | ||
838 | return -ENODEV; | ||
839 | } | 839 | } |
840 | 840 | ||
841 | if (!hpet_virt_address) | ||
842 | return -ENODEV; | ||
843 | |||
841 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); | 844 | hpet_reserve_platform_timers(hpet_readl(HPET_ID)); |
842 | 845 | ||
843 | for_each_online_cpu(cpu) { | 846 | for_each_online_cpu(cpu) { |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index c1b5e3ece1f2..10f92fb532f3 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -114,7 +114,7 @@ void __init setup_pit_timer(void) | |||
114 | * Start pit with the boot cpu mask and make it global after the | 114 | * Start pit with the boot cpu mask and make it global after the |
115 | * IO_APIC has been initialized. | 115 | * IO_APIC has been initialized. |
116 | */ | 116 | */ |
117 | pit_clockevent.cpumask = cpumask_of_cpu(smp_processor_id()); | 117 | pit_clockevent.cpumask = cpumask_of(smp_processor_id()); |
118 | pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, | 118 | pit_clockevent.mult = div_sc(CLOCK_TICK_RATE, NSEC_PER_SEC, |
119 | pit_clockevent.shift); | 119 | pit_clockevent.shift); |
120 | pit_clockevent.max_delta_ns = | 120 | pit_clockevent.max_delta_ns = |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index a4f93b4120c1..df3bf269beab 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -10,11 +10,9 @@ | |||
10 | #include <asm/pgtable.h> | 10 | #include <asm/pgtable.h> |
11 | #include <asm/desc.h> | 11 | #include <asm/desc.h> |
12 | 12 | ||
13 | static struct fs_struct init_fs = INIT_FS; | ||
14 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 13 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
15 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 14 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
16 | struct mm_struct init_mm = INIT_MM(init_mm); | 15 | struct mm_struct init_mm = INIT_MM(init_mm); |
17 | EXPORT_UNUSED_SYMBOL(init_mm); /* will be removed in 2.6.26 */ | ||
18 | 16 | ||
19 | /* | 17 | /* |
20 | * Initial thread structure. | 18 | * Initial thread structure. |
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c index 9043251210fb..3639442aa7a4 100644 --- a/arch/x86/kernel/io_apic.c +++ b/arch/x86/kernel/io_apic.c | |||
@@ -108,93 +108,276 @@ static int __init parse_noapic(char *str) | |||
108 | early_param("noapic", parse_noapic); | 108 | early_param("noapic", parse_noapic); |
109 | 109 | ||
110 | struct irq_pin_list; | 110 | struct irq_pin_list; |
111 | |||
112 | /* | ||
113 | * This is performance-critical, we want to do it O(1) | ||
114 | * | ||
115 | * the indexing order of this array favors 1:1 mappings | ||
116 | * between pins and IRQs. | ||
117 | */ | ||
118 | |||
119 | struct irq_pin_list { | ||
120 | int apic, pin; | ||
121 | struct irq_pin_list *next; | ||
122 | }; | ||
123 | |||
124 | static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) | ||
125 | { | ||
126 | struct irq_pin_list *pin; | ||
127 | int node; | ||
128 | |||
129 | node = cpu_to_node(cpu); | ||
130 | |||
131 | pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); | ||
132 | printk(KERN_DEBUG " alloc irq_2_pin on cpu %d node %d\n", cpu, node); | ||
133 | |||
134 | return pin; | ||
135 | } | ||
136 | |||
111 | struct irq_cfg { | 137 | struct irq_cfg { |
112 | unsigned int irq; | ||
113 | struct irq_pin_list *irq_2_pin; | 138 | struct irq_pin_list *irq_2_pin; |
114 | cpumask_t domain; | 139 | cpumask_var_t domain; |
115 | cpumask_t old_domain; | 140 | cpumask_var_t old_domain; |
116 | unsigned move_cleanup_count; | 141 | unsigned move_cleanup_count; |
117 | u8 vector; | 142 | u8 vector; |
118 | u8 move_in_progress : 1; | 143 | u8 move_in_progress : 1; |
144 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
145 | u8 move_desc_pending : 1; | ||
146 | #endif | ||
119 | }; | 147 | }; |
120 | 148 | ||
121 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 149 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
150 | #ifdef CONFIG_SPARSE_IRQ | ||
151 | static struct irq_cfg irq_cfgx[] = { | ||
152 | #else | ||
122 | static struct irq_cfg irq_cfgx[NR_IRQS] = { | 153 | static struct irq_cfg irq_cfgx[NR_IRQS] = { |
123 | [0] = { .irq = 0, .domain = CPU_MASK_ALL, .vector = IRQ0_VECTOR, }, | 154 | #endif |
124 | [1] = { .irq = 1, .domain = CPU_MASK_ALL, .vector = IRQ1_VECTOR, }, | 155 | [0] = { .vector = IRQ0_VECTOR, }, |
125 | [2] = { .irq = 2, .domain = CPU_MASK_ALL, .vector = IRQ2_VECTOR, }, | 156 | [1] = { .vector = IRQ1_VECTOR, }, |
126 | [3] = { .irq = 3, .domain = CPU_MASK_ALL, .vector = IRQ3_VECTOR, }, | 157 | [2] = { .vector = IRQ2_VECTOR, }, |
127 | [4] = { .irq = 4, .domain = CPU_MASK_ALL, .vector = IRQ4_VECTOR, }, | 158 | [3] = { .vector = IRQ3_VECTOR, }, |
128 | [5] = { .irq = 5, .domain = CPU_MASK_ALL, .vector = IRQ5_VECTOR, }, | 159 | [4] = { .vector = IRQ4_VECTOR, }, |
129 | [6] = { .irq = 6, .domain = CPU_MASK_ALL, .vector = IRQ6_VECTOR, }, | 160 | [5] = { .vector = IRQ5_VECTOR, }, |
130 | [7] = { .irq = 7, .domain = CPU_MASK_ALL, .vector = IRQ7_VECTOR, }, | 161 | [6] = { .vector = IRQ6_VECTOR, }, |
131 | [8] = { .irq = 8, .domain = CPU_MASK_ALL, .vector = IRQ8_VECTOR, }, | 162 | [7] = { .vector = IRQ7_VECTOR, }, |
132 | [9] = { .irq = 9, .domain = CPU_MASK_ALL, .vector = IRQ9_VECTOR, }, | 163 | [8] = { .vector = IRQ8_VECTOR, }, |
133 | [10] = { .irq = 10, .domain = CPU_MASK_ALL, .vector = IRQ10_VECTOR, }, | 164 | [9] = { .vector = IRQ9_VECTOR, }, |
134 | [11] = { .irq = 11, .domain = CPU_MASK_ALL, .vector = IRQ11_VECTOR, }, | 165 | [10] = { .vector = IRQ10_VECTOR, }, |
135 | [12] = { .irq = 12, .domain = CPU_MASK_ALL, .vector = IRQ12_VECTOR, }, | 166 | [11] = { .vector = IRQ11_VECTOR, }, |
136 | [13] = { .irq = 13, .domain = CPU_MASK_ALL, .vector = IRQ13_VECTOR, }, | 167 | [12] = { .vector = IRQ12_VECTOR, }, |
137 | [14] = { .irq = 14, .domain = CPU_MASK_ALL, .vector = IRQ14_VECTOR, }, | 168 | [13] = { .vector = IRQ13_VECTOR, }, |
138 | [15] = { .irq = 15, .domain = CPU_MASK_ALL, .vector = IRQ15_VECTOR, }, | 169 | [14] = { .vector = IRQ14_VECTOR, }, |
170 | [15] = { .vector = IRQ15_VECTOR, }, | ||
139 | }; | 171 | }; |
140 | 172 | ||
141 | #define for_each_irq_cfg(irq, cfg) \ | 173 | int __init arch_early_irq_init(void) |
142 | for (irq = 0, cfg = irq_cfgx; irq < nr_irqs; irq++, cfg++) | 174 | { |
175 | struct irq_cfg *cfg; | ||
176 | struct irq_desc *desc; | ||
177 | int count; | ||
178 | int i; | ||
179 | |||
180 | cfg = irq_cfgx; | ||
181 | count = ARRAY_SIZE(irq_cfgx); | ||
143 | 182 | ||
183 | for (i = 0; i < count; i++) { | ||
184 | desc = irq_to_desc(i); | ||
185 | desc->chip_data = &cfg[i]; | ||
186 | alloc_bootmem_cpumask_var(&cfg[i].domain); | ||
187 | alloc_bootmem_cpumask_var(&cfg[i].old_domain); | ||
188 | if (i < NR_IRQS_LEGACY) | ||
189 | cpumask_setall(cfg[i].domain); | ||
190 | } | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | #ifdef CONFIG_SPARSE_IRQ | ||
144 | static struct irq_cfg *irq_cfg(unsigned int irq) | 196 | static struct irq_cfg *irq_cfg(unsigned int irq) |
145 | { | 197 | { |
146 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | 198 | struct irq_cfg *cfg = NULL; |
199 | struct irq_desc *desc; | ||
200 | |||
201 | desc = irq_to_desc(irq); | ||
202 | if (desc) | ||
203 | cfg = desc->chip_data; | ||
204 | |||
205 | return cfg; | ||
147 | } | 206 | } |
148 | 207 | ||
149 | static struct irq_cfg *irq_cfg_alloc(unsigned int irq) | 208 | static struct irq_cfg *get_one_free_irq_cfg(int cpu) |
150 | { | 209 | { |
151 | return irq_cfg(irq); | 210 | struct irq_cfg *cfg; |
211 | int node; | ||
212 | |||
213 | node = cpu_to_node(cpu); | ||
214 | |||
215 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); | ||
216 | if (cfg) { | ||
217 | if (!alloc_cpumask_var_node(&cfg->domain, GFP_ATOMIC, node)) { | ||
218 | kfree(cfg); | ||
219 | cfg = NULL; | ||
220 | } else if (!alloc_cpumask_var_node(&cfg->old_domain, | ||
221 | GFP_ATOMIC, node)) { | ||
222 | free_cpumask_var(cfg->domain); | ||
223 | kfree(cfg); | ||
224 | cfg = NULL; | ||
225 | } else { | ||
226 | cpumask_clear(cfg->domain); | ||
227 | cpumask_clear(cfg->old_domain); | ||
228 | } | ||
229 | } | ||
230 | printk(KERN_DEBUG " alloc irq_cfg on cpu %d node %d\n", cpu, node); | ||
231 | |||
232 | return cfg; | ||
152 | } | 233 | } |
153 | 234 | ||
154 | /* | 235 | int arch_init_chip_data(struct irq_desc *desc, int cpu) |
155 | * Rough estimation of how many shared IRQs there are, can be changed | 236 | { |
156 | * anytime. | 237 | struct irq_cfg *cfg; |
157 | */ | ||
158 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | ||
159 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | ||
160 | 238 | ||
161 | /* | 239 | cfg = desc->chip_data; |
162 | * This is performance-critical, we want to do it O(1) | 240 | if (!cfg) { |
163 | * | 241 | desc->chip_data = get_one_free_irq_cfg(cpu); |
164 | * the indexing order of this array favors 1:1 mappings | 242 | if (!desc->chip_data) { |
165 | * between pins and IRQs. | 243 | printk(KERN_ERR "can not alloc irq_cfg\n"); |
166 | */ | 244 | BUG_ON(1); |
245 | } | ||
246 | } | ||
167 | 247 | ||
168 | struct irq_pin_list { | 248 | return 0; |
169 | int apic, pin; | 249 | } |
170 | struct irq_pin_list *next; | ||
171 | }; | ||
172 | 250 | ||
173 | static struct irq_pin_list irq_2_pin_head[PIN_MAP_SIZE]; | 251 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC |
174 | static struct irq_pin_list *irq_2_pin_ptr; | ||
175 | 252 | ||
176 | static void __init irq_2_pin_init(void) | 253 | static void |
254 | init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) | ||
177 | { | 255 | { |
178 | struct irq_pin_list *pin = irq_2_pin_head; | 256 | struct irq_pin_list *old_entry, *head, *tail, *entry; |
179 | int i; | 257 | |
258 | cfg->irq_2_pin = NULL; | ||
259 | old_entry = old_cfg->irq_2_pin; | ||
260 | if (!old_entry) | ||
261 | return; | ||
262 | |||
263 | entry = get_one_free_irq_2_pin(cpu); | ||
264 | if (!entry) | ||
265 | return; | ||
266 | |||
267 | entry->apic = old_entry->apic; | ||
268 | entry->pin = old_entry->pin; | ||
269 | head = entry; | ||
270 | tail = entry; | ||
271 | old_entry = old_entry->next; | ||
272 | while (old_entry) { | ||
273 | entry = get_one_free_irq_2_pin(cpu); | ||
274 | if (!entry) { | ||
275 | entry = head; | ||
276 | while (entry) { | ||
277 | head = entry->next; | ||
278 | kfree(entry); | ||
279 | entry = head; | ||
280 | } | ||
281 | /* still use the old one */ | ||
282 | return; | ||
283 | } | ||
284 | entry->apic = old_entry->apic; | ||
285 | entry->pin = old_entry->pin; | ||
286 | tail->next = entry; | ||
287 | tail = entry; | ||
288 | old_entry = old_entry->next; | ||
289 | } | ||
180 | 290 | ||
181 | for (i = 1; i < PIN_MAP_SIZE; i++) | 291 | tail->next = NULL; |
182 | pin[i-1].next = &pin[i]; | 292 | cfg->irq_2_pin = head; |
293 | } | ||
294 | |||
295 | static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) | ||
296 | { | ||
297 | struct irq_pin_list *entry, *next; | ||
298 | |||
299 | if (old_cfg->irq_2_pin == cfg->irq_2_pin) | ||
300 | return; | ||
183 | 301 | ||
184 | irq_2_pin_ptr = &pin[0]; | 302 | entry = old_cfg->irq_2_pin; |
303 | |||
304 | while (entry) { | ||
305 | next = entry->next; | ||
306 | kfree(entry); | ||
307 | entry = next; | ||
308 | } | ||
309 | old_cfg->irq_2_pin = NULL; | ||
185 | } | 310 | } |
186 | 311 | ||
187 | static struct irq_pin_list *get_one_free_irq_2_pin(void) | 312 | void arch_init_copy_chip_data(struct irq_desc *old_desc, |
313 | struct irq_desc *desc, int cpu) | ||
188 | { | 314 | { |
189 | struct irq_pin_list *pin = irq_2_pin_ptr; | 315 | struct irq_cfg *cfg; |
316 | struct irq_cfg *old_cfg; | ||
190 | 317 | ||
191 | if (!pin) | 318 | cfg = get_one_free_irq_cfg(cpu); |
192 | panic("can not get more irq_2_pin\n"); | ||
193 | 319 | ||
194 | irq_2_pin_ptr = pin->next; | 320 | if (!cfg) |
195 | pin->next = NULL; | 321 | return; |
196 | return pin; | 322 | |
323 | desc->chip_data = cfg; | ||
324 | |||
325 | old_cfg = old_desc->chip_data; | ||
326 | |||
327 | memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); | ||
328 | |||
329 | init_copy_irq_2_pin(old_cfg, cfg, cpu); | ||
330 | } | ||
331 | |||
332 | static void free_irq_cfg(struct irq_cfg *old_cfg) | ||
333 | { | ||
334 | kfree(old_cfg); | ||
335 | } | ||
336 | |||
337 | void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | ||
338 | { | ||
339 | struct irq_cfg *old_cfg, *cfg; | ||
340 | |||
341 | old_cfg = old_desc->chip_data; | ||
342 | cfg = desc->chip_data; | ||
343 | |||
344 | if (old_cfg == cfg) | ||
345 | return; | ||
346 | |||
347 | if (old_cfg) { | ||
348 | free_irq_2_pin(old_cfg, cfg); | ||
349 | free_irq_cfg(old_cfg); | ||
350 | old_desc->chip_data = NULL; | ||
351 | } | ||
352 | } | ||
353 | |||
354 | static void | ||
355 | set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
356 | { | ||
357 | struct irq_cfg *cfg = desc->chip_data; | ||
358 | |||
359 | if (!cfg->move_in_progress) { | ||
360 | /* it means that domain is not changed */ | ||
361 | if (!cpumask_intersects(&desc->affinity, mask)) | ||
362 | cfg->move_desc_pending = 1; | ||
363 | } | ||
197 | } | 364 | } |
365 | #endif | ||
366 | |||
367 | #else | ||
368 | static struct irq_cfg *irq_cfg(unsigned int irq) | ||
369 | { | ||
370 | return irq < nr_irqs ? irq_cfgx + irq : NULL; | ||
371 | } | ||
372 | |||
373 | #endif | ||
374 | |||
375 | #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
376 | static inline void | ||
377 | set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
378 | { | ||
379 | } | ||
380 | #endif | ||
198 | 381 | ||
199 | struct io_apic { | 382 | struct io_apic { |
200 | unsigned int index; | 383 | unsigned int index; |
@@ -237,11 +420,10 @@ static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned | |||
237 | writel(value, &io_apic->data); | 420 | writel(value, &io_apic->data); |
238 | } | 421 | } |
239 | 422 | ||
240 | static bool io_apic_level_ack_pending(unsigned int irq) | 423 | static bool io_apic_level_ack_pending(struct irq_cfg *cfg) |
241 | { | 424 | { |
242 | struct irq_pin_list *entry; | 425 | struct irq_pin_list *entry; |
243 | unsigned long flags; | 426 | unsigned long flags; |
244 | struct irq_cfg *cfg = irq_cfg(irq); | ||
245 | 427 | ||
246 | spin_lock_irqsave(&ioapic_lock, flags); | 428 | spin_lock_irqsave(&ioapic_lock, flags); |
247 | entry = cfg->irq_2_pin; | 429 | entry = cfg->irq_2_pin; |
@@ -323,13 +505,32 @@ static void ioapic_mask_entry(int apic, int pin) | |||
323 | } | 505 | } |
324 | 506 | ||
325 | #ifdef CONFIG_SMP | 507 | #ifdef CONFIG_SMP |
326 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | 508 | static void send_cleanup_vector(struct irq_cfg *cfg) |
509 | { | ||
510 | cpumask_var_t cleanup_mask; | ||
511 | |||
512 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | ||
513 | unsigned int i; | ||
514 | cfg->move_cleanup_count = 0; | ||
515 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
516 | cfg->move_cleanup_count++; | ||
517 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
518 | send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | ||
519 | } else { | ||
520 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | ||
521 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | ||
522 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
523 | free_cpumask_var(cleanup_mask); | ||
524 | } | ||
525 | cfg->move_in_progress = 0; | ||
526 | } | ||
527 | |||
528 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
327 | { | 529 | { |
328 | int apic, pin; | 530 | int apic, pin; |
329 | struct irq_cfg *cfg; | ||
330 | struct irq_pin_list *entry; | 531 | struct irq_pin_list *entry; |
532 | u8 vector = cfg->vector; | ||
331 | 533 | ||
332 | cfg = irq_cfg(irq); | ||
333 | entry = cfg->irq_2_pin; | 534 | entry = cfg->irq_2_pin; |
334 | for (;;) { | 535 | for (;;) { |
335 | unsigned int reg; | 536 | unsigned int reg; |
@@ -359,36 +560,61 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
359 | } | 560 | } |
360 | } | 561 | } |
361 | 562 | ||
362 | static int assign_irq_vector(int irq, cpumask_t mask); | 563 | static int |
564 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | ||
363 | 565 | ||
364 | static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | 566 | /* |
567 | * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid | ||
568 | * of that, or returns BAD_APICID and leaves desc->affinity untouched. | ||
569 | */ | ||
570 | static unsigned int | ||
571 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | ||
572 | { | ||
573 | struct irq_cfg *cfg; | ||
574 | unsigned int irq; | ||
575 | |||
576 | if (!cpumask_intersects(mask, cpu_online_mask)) | ||
577 | return BAD_APICID; | ||
578 | |||
579 | irq = desc->irq; | ||
580 | cfg = desc->chip_data; | ||
581 | if (assign_irq_vector(irq, cfg, mask)) | ||
582 | return BAD_APICID; | ||
583 | |||
584 | cpumask_and(&desc->affinity, cfg->domain, mask); | ||
585 | set_extra_move_desc(desc, mask); | ||
586 | return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); | ||
587 | } | ||
588 | |||
589 | static void | ||
590 | set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
365 | { | 591 | { |
366 | struct irq_cfg *cfg; | 592 | struct irq_cfg *cfg; |
367 | unsigned long flags; | 593 | unsigned long flags; |
368 | unsigned int dest; | 594 | unsigned int dest; |
369 | cpumask_t tmp; | 595 | unsigned int irq; |
370 | struct irq_desc *desc; | ||
371 | 596 | ||
372 | cpus_and(tmp, mask, cpu_online_map); | 597 | irq = desc->irq; |
373 | if (cpus_empty(tmp)) | 598 | cfg = desc->chip_data; |
374 | return; | ||
375 | 599 | ||
376 | cfg = irq_cfg(irq); | 600 | spin_lock_irqsave(&ioapic_lock, flags); |
377 | if (assign_irq_vector(irq, mask)) | 601 | dest = set_desc_affinity(desc, mask); |
378 | return; | 602 | if (dest != BAD_APICID) { |
603 | /* Only the high 8 bits are valid. */ | ||
604 | dest = SET_APIC_LOGICAL_ID(dest); | ||
605 | __target_IO_APIC_irq(irq, dest, cfg); | ||
606 | } | ||
607 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
608 | } | ||
379 | 609 | ||
380 | cpus_and(tmp, cfg->domain, mask); | 610 | static void |
381 | dest = cpu_mask_to_apicid(tmp); | 611 | set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) |
382 | /* | 612 | { |
383 | * Only the high 8 bits are valid. | 613 | struct irq_desc *desc; |
384 | */ | ||
385 | dest = SET_APIC_LOGICAL_ID(dest); | ||
386 | 614 | ||
387 | desc = irq_to_desc(irq); | 615 | desc = irq_to_desc(irq); |
388 | spin_lock_irqsave(&ioapic_lock, flags); | 616 | |
389 | __target_IO_APIC_irq(irq, dest, cfg->vector); | 617 | set_ioapic_affinity_irq_desc(desc, mask); |
390 | desc->affinity = mask; | ||
391 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
392 | } | 618 | } |
393 | #endif /* CONFIG_SMP */ | 619 | #endif /* CONFIG_SMP */ |
394 | 620 | ||
@@ -397,16 +623,18 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
397 | * shared ISA-space IRQs, so we have to support them. We are super | 623 | * shared ISA-space IRQs, so we have to support them. We are super |
398 | * fast in the common case, and fast for shared ISA-space IRQs. | 624 | * fast in the common case, and fast for shared ISA-space IRQs. |
399 | */ | 625 | */ |
400 | static void add_pin_to_irq(unsigned int irq, int apic, int pin) | 626 | static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) |
401 | { | 627 | { |
402 | struct irq_cfg *cfg; | ||
403 | struct irq_pin_list *entry; | 628 | struct irq_pin_list *entry; |
404 | 629 | ||
405 | /* first time to refer irq_cfg, so with new */ | ||
406 | cfg = irq_cfg_alloc(irq); | ||
407 | entry = cfg->irq_2_pin; | 630 | entry = cfg->irq_2_pin; |
408 | if (!entry) { | 631 | if (!entry) { |
409 | entry = get_one_free_irq_2_pin(); | 632 | entry = get_one_free_irq_2_pin(cpu); |
633 | if (!entry) { | ||
634 | printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", | ||
635 | apic, pin); | ||
636 | return; | ||
637 | } | ||
410 | cfg->irq_2_pin = entry; | 638 | cfg->irq_2_pin = entry; |
411 | entry->apic = apic; | 639 | entry->apic = apic; |
412 | entry->pin = pin; | 640 | entry->pin = pin; |
@@ -421,7 +649,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
421 | entry = entry->next; | 649 | entry = entry->next; |
422 | } | 650 | } |
423 | 651 | ||
424 | entry->next = get_one_free_irq_2_pin(); | 652 | entry->next = get_one_free_irq_2_pin(cpu); |
425 | entry = entry->next; | 653 | entry = entry->next; |
426 | entry->apic = apic; | 654 | entry->apic = apic; |
427 | entry->pin = pin; | 655 | entry->pin = pin; |
@@ -430,11 +658,10 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
430 | /* | 658 | /* |
431 | * Reroute an IRQ to a different pin. | 659 | * Reroute an IRQ to a different pin. |
432 | */ | 660 | */ |
433 | static void __init replace_pin_at_irq(unsigned int irq, | 661 | static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, |
434 | int oldapic, int oldpin, | 662 | int oldapic, int oldpin, |
435 | int newapic, int newpin) | 663 | int newapic, int newpin) |
436 | { | 664 | { |
437 | struct irq_cfg *cfg = irq_cfg(irq); | ||
438 | struct irq_pin_list *entry = cfg->irq_2_pin; | 665 | struct irq_pin_list *entry = cfg->irq_2_pin; |
439 | int replaced = 0; | 666 | int replaced = 0; |
440 | 667 | ||
@@ -451,18 +678,16 @@ static void __init replace_pin_at_irq(unsigned int irq, | |||
451 | 678 | ||
452 | /* why? call replace before add? */ | 679 | /* why? call replace before add? */ |
453 | if (!replaced) | 680 | if (!replaced) |
454 | add_pin_to_irq(irq, newapic, newpin); | 681 | add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); |
455 | } | 682 | } |
456 | 683 | ||
457 | static inline void io_apic_modify_irq(unsigned int irq, | 684 | static inline void io_apic_modify_irq(struct irq_cfg *cfg, |
458 | int mask_and, int mask_or, | 685 | int mask_and, int mask_or, |
459 | void (*final)(struct irq_pin_list *entry)) | 686 | void (*final)(struct irq_pin_list *entry)) |
460 | { | 687 | { |
461 | int pin; | 688 | int pin; |
462 | struct irq_cfg *cfg; | ||
463 | struct irq_pin_list *entry; | 689 | struct irq_pin_list *entry; |
464 | 690 | ||
465 | cfg = irq_cfg(irq); | ||
466 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { | 691 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { |
467 | unsigned int reg; | 692 | unsigned int reg; |
468 | pin = entry->pin; | 693 | pin = entry->pin; |
@@ -475,13 +700,13 @@ static inline void io_apic_modify_irq(unsigned int irq, | |||
475 | } | 700 | } |
476 | } | 701 | } |
477 | 702 | ||
478 | static void __unmask_IO_APIC_irq(unsigned int irq) | 703 | static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) |
479 | { | 704 | { |
480 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, 0, NULL); | 705 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); |
481 | } | 706 | } |
482 | 707 | ||
483 | #ifdef CONFIG_X86_64 | 708 | #ifdef CONFIG_X86_64 |
484 | void io_apic_sync(struct irq_pin_list *entry) | 709 | static void io_apic_sync(struct irq_pin_list *entry) |
485 | { | 710 | { |
486 | /* | 711 | /* |
487 | * Synchronize the IO-APIC and the CPU by doing | 712 | * Synchronize the IO-APIC and the CPU by doing |
@@ -492,47 +717,64 @@ void io_apic_sync(struct irq_pin_list *entry) | |||
492 | readl(&io_apic->data); | 717 | readl(&io_apic->data); |
493 | } | 718 | } |
494 | 719 | ||
495 | static void __mask_IO_APIC_irq(unsigned int irq) | 720 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) |
496 | { | 721 | { |
497 | io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 722 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
498 | } | 723 | } |
499 | #else /* CONFIG_X86_32 */ | 724 | #else /* CONFIG_X86_32 */ |
500 | static void __mask_IO_APIC_irq(unsigned int irq) | 725 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) |
501 | { | 726 | { |
502 | io_apic_modify_irq(irq, ~0, IO_APIC_REDIR_MASKED, NULL); | 727 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); |
503 | } | 728 | } |
504 | 729 | ||
505 | static void __mask_and_edge_IO_APIC_irq(unsigned int irq) | 730 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) |
506 | { | 731 | { |
507 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_LEVEL_TRIGGER, | 732 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_LEVEL_TRIGGER, |
508 | IO_APIC_REDIR_MASKED, NULL); | 733 | IO_APIC_REDIR_MASKED, NULL); |
509 | } | 734 | } |
510 | 735 | ||
511 | static void __unmask_and_level_IO_APIC_irq(unsigned int irq) | 736 | static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) |
512 | { | 737 | { |
513 | io_apic_modify_irq(irq, ~IO_APIC_REDIR_MASKED, | 738 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, |
514 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | 739 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); |
515 | } | 740 | } |
516 | #endif /* CONFIG_X86_32 */ | 741 | #endif /* CONFIG_X86_32 */ |
517 | 742 | ||
518 | static void mask_IO_APIC_irq (unsigned int irq) | 743 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) |
519 | { | 744 | { |
745 | struct irq_cfg *cfg = desc->chip_data; | ||
520 | unsigned long flags; | 746 | unsigned long flags; |
521 | 747 | ||
748 | BUG_ON(!cfg); | ||
749 | |||
522 | spin_lock_irqsave(&ioapic_lock, flags); | 750 | spin_lock_irqsave(&ioapic_lock, flags); |
523 | __mask_IO_APIC_irq(irq); | 751 | __mask_IO_APIC_irq(cfg); |
524 | spin_unlock_irqrestore(&ioapic_lock, flags); | 752 | spin_unlock_irqrestore(&ioapic_lock, flags); |
525 | } | 753 | } |
526 | 754 | ||
527 | static void unmask_IO_APIC_irq (unsigned int irq) | 755 | static void unmask_IO_APIC_irq_desc(struct irq_desc *desc) |
528 | { | 756 | { |
757 | struct irq_cfg *cfg = desc->chip_data; | ||
529 | unsigned long flags; | 758 | unsigned long flags; |
530 | 759 | ||
531 | spin_lock_irqsave(&ioapic_lock, flags); | 760 | spin_lock_irqsave(&ioapic_lock, flags); |
532 | __unmask_IO_APIC_irq(irq); | 761 | __unmask_IO_APIC_irq(cfg); |
533 | spin_unlock_irqrestore(&ioapic_lock, flags); | 762 | spin_unlock_irqrestore(&ioapic_lock, flags); |
534 | } | 763 | } |
535 | 764 | ||
765 | static void mask_IO_APIC_irq(unsigned int irq) | ||
766 | { | ||
767 | struct irq_desc *desc = irq_to_desc(irq); | ||
768 | |||
769 | mask_IO_APIC_irq_desc(desc); | ||
770 | } | ||
771 | static void unmask_IO_APIC_irq(unsigned int irq) | ||
772 | { | ||
773 | struct irq_desc *desc = irq_to_desc(irq); | ||
774 | |||
775 | unmask_IO_APIC_irq_desc(desc); | ||
776 | } | ||
777 | |||
536 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 778 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
537 | { | 779 | { |
538 | struct IO_APIC_route_entry entry; | 780 | struct IO_APIC_route_entry entry; |
@@ -809,7 +1051,7 @@ EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | |||
809 | */ | 1051 | */ |
810 | static int EISA_ELCR(unsigned int irq) | 1052 | static int EISA_ELCR(unsigned int irq) |
811 | { | 1053 | { |
812 | if (irq < 16) { | 1054 | if (irq < NR_IRQS_LEGACY) { |
813 | unsigned int port = 0x4d0 + (irq >> 3); | 1055 | unsigned int port = 0x4d0 + (irq >> 3); |
814 | return (inb(port) >> (irq & 7)) & 1; | 1056 | return (inb(port) >> (irq & 7)) & 1; |
815 | } | 1057 | } |
@@ -1034,7 +1276,8 @@ void unlock_vector_lock(void) | |||
1034 | spin_unlock(&vector_lock); | 1276 | spin_unlock(&vector_lock); |
1035 | } | 1277 | } |
1036 | 1278 | ||
1037 | static int __assign_irq_vector(int irq, cpumask_t mask) | 1279 | static int |
1280 | __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1038 | { | 1281 | { |
1039 | /* | 1282 | /* |
1040 | * NOTE! The local APIC isn't very good at handling | 1283 | * NOTE! The local APIC isn't very good at handling |
@@ -1049,52 +1292,49 @@ static int __assign_irq_vector(int irq, cpumask_t mask) | |||
1049 | */ | 1292 | */ |
1050 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; | 1293 | static int current_vector = FIRST_DEVICE_VECTOR, current_offset = 0; |
1051 | unsigned int old_vector; | 1294 | unsigned int old_vector; |
1052 | int cpu; | 1295 | int cpu, err; |
1053 | struct irq_cfg *cfg; | 1296 | cpumask_var_t tmp_mask; |
1054 | |||
1055 | cfg = irq_cfg(irq); | ||
1056 | |||
1057 | /* Only try and allocate irqs on cpus that are present */ | ||
1058 | cpus_and(mask, mask, cpu_online_map); | ||
1059 | 1297 | ||
1060 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) | 1298 | if ((cfg->move_in_progress) || cfg->move_cleanup_count) |
1061 | return -EBUSY; | 1299 | return -EBUSY; |
1062 | 1300 | ||
1301 | if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC)) | ||
1302 | return -ENOMEM; | ||
1303 | |||
1063 | old_vector = cfg->vector; | 1304 | old_vector = cfg->vector; |
1064 | if (old_vector) { | 1305 | if (old_vector) { |
1065 | cpumask_t tmp; | 1306 | cpumask_and(tmp_mask, mask, cpu_online_mask); |
1066 | cpus_and(tmp, cfg->domain, mask); | 1307 | cpumask_and(tmp_mask, cfg->domain, tmp_mask); |
1067 | if (!cpus_empty(tmp)) | 1308 | if (!cpumask_empty(tmp_mask)) { |
1309 | free_cpumask_var(tmp_mask); | ||
1068 | return 0; | 1310 | return 0; |
1311 | } | ||
1069 | } | 1312 | } |
1070 | 1313 | ||
1071 | for_each_cpu_mask_nr(cpu, mask) { | 1314 | /* Only try and allocate irqs on cpus that are present */ |
1072 | cpumask_t domain, new_mask; | 1315 | err = -ENOSPC; |
1316 | for_each_cpu_and(cpu, mask, cpu_online_mask) { | ||
1073 | int new_cpu; | 1317 | int new_cpu; |
1074 | int vector, offset; | 1318 | int vector, offset; |
1075 | 1319 | ||
1076 | domain = vector_allocation_domain(cpu); | 1320 | vector_allocation_domain(cpu, tmp_mask); |
1077 | cpus_and(new_mask, domain, cpu_online_map); | ||
1078 | 1321 | ||
1079 | vector = current_vector; | 1322 | vector = current_vector; |
1080 | offset = current_offset; | 1323 | offset = current_offset; |
1081 | next: | 1324 | next: |
1082 | vector += 8; | 1325 | vector += 8; |
1083 | if (vector >= first_system_vector) { | 1326 | if (vector >= first_system_vector) { |
1084 | /* If we run out of vectors on large boxen, must share them. */ | 1327 | /* If out of vectors on large boxen, must share them. */ |
1085 | offset = (offset + 1) % 8; | 1328 | offset = (offset + 1) % 8; |
1086 | vector = FIRST_DEVICE_VECTOR + offset; | 1329 | vector = FIRST_DEVICE_VECTOR + offset; |
1087 | } | 1330 | } |
1088 | if (unlikely(current_vector == vector)) | 1331 | if (unlikely(current_vector == vector)) |
1089 | continue; | 1332 | continue; |
1090 | #ifdef CONFIG_X86_64 | 1333 | |
1091 | if (vector == IA32_SYSCALL_VECTOR) | 1334 | if (test_bit(vector, used_vectors)) |
1092 | goto next; | ||
1093 | #else | ||
1094 | if (vector == SYSCALL_VECTOR) | ||
1095 | goto next; | 1335 | goto next; |
1096 | #endif | 1336 | |
1097 | for_each_cpu_mask_nr(new_cpu, new_mask) | 1337 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) |
1098 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 1338 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) |
1099 | goto next; | 1339 | goto next; |
1100 | /* Found one! */ | 1340 | /* Found one! */ |
@@ -1102,49 +1342,47 @@ next: | |||
1102 | current_offset = offset; | 1342 | current_offset = offset; |
1103 | if (old_vector) { | 1343 | if (old_vector) { |
1104 | cfg->move_in_progress = 1; | 1344 | cfg->move_in_progress = 1; |
1105 | cfg->old_domain = cfg->domain; | 1345 | cpumask_copy(cfg->old_domain, cfg->domain); |
1106 | } | 1346 | } |
1107 | for_each_cpu_mask_nr(new_cpu, new_mask) | 1347 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) |
1108 | per_cpu(vector_irq, new_cpu)[vector] = irq; | 1348 | per_cpu(vector_irq, new_cpu)[vector] = irq; |
1109 | cfg->vector = vector; | 1349 | cfg->vector = vector; |
1110 | cfg->domain = domain; | 1350 | cpumask_copy(cfg->domain, tmp_mask); |
1111 | return 0; | 1351 | err = 0; |
1352 | break; | ||
1112 | } | 1353 | } |
1113 | return -ENOSPC; | 1354 | free_cpumask_var(tmp_mask); |
1355 | return err; | ||
1114 | } | 1356 | } |
1115 | 1357 | ||
1116 | static int assign_irq_vector(int irq, cpumask_t mask) | 1358 | static int |
1359 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask) | ||
1117 | { | 1360 | { |
1118 | int err; | 1361 | int err; |
1119 | unsigned long flags; | 1362 | unsigned long flags; |
1120 | 1363 | ||
1121 | spin_lock_irqsave(&vector_lock, flags); | 1364 | spin_lock_irqsave(&vector_lock, flags); |
1122 | err = __assign_irq_vector(irq, mask); | 1365 | err = __assign_irq_vector(irq, cfg, mask); |
1123 | spin_unlock_irqrestore(&vector_lock, flags); | 1366 | spin_unlock_irqrestore(&vector_lock, flags); |
1124 | return err; | 1367 | return err; |
1125 | } | 1368 | } |
1126 | 1369 | ||
1127 | static void __clear_irq_vector(int irq) | 1370 | static void __clear_irq_vector(int irq, struct irq_cfg *cfg) |
1128 | { | 1371 | { |
1129 | struct irq_cfg *cfg; | ||
1130 | cpumask_t mask; | ||
1131 | int cpu, vector; | 1372 | int cpu, vector; |
1132 | 1373 | ||
1133 | cfg = irq_cfg(irq); | ||
1134 | BUG_ON(!cfg->vector); | 1374 | BUG_ON(!cfg->vector); |
1135 | 1375 | ||
1136 | vector = cfg->vector; | 1376 | vector = cfg->vector; |
1137 | cpus_and(mask, cfg->domain, cpu_online_map); | 1377 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) |
1138 | for_each_cpu_mask_nr(cpu, mask) | ||
1139 | per_cpu(vector_irq, cpu)[vector] = -1; | 1378 | per_cpu(vector_irq, cpu)[vector] = -1; |
1140 | 1379 | ||
1141 | cfg->vector = 0; | 1380 | cfg->vector = 0; |
1142 | cpus_clear(cfg->domain); | 1381 | cpumask_clear(cfg->domain); |
1143 | 1382 | ||
1144 | if (likely(!cfg->move_in_progress)) | 1383 | if (likely(!cfg->move_in_progress)) |
1145 | return; | 1384 | return; |
1146 | cpus_and(mask, cfg->old_domain, cpu_online_map); | 1385 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { |
1147 | for_each_cpu_mask_nr(cpu, mask) { | ||
1148 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | 1386 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; |
1149 | vector++) { | 1387 | vector++) { |
1150 | if (per_cpu(vector_irq, cpu)[vector] != irq) | 1388 | if (per_cpu(vector_irq, cpu)[vector] != irq) |
@@ -1162,10 +1400,12 @@ void __setup_vector_irq(int cpu) | |||
1162 | /* This function must be called with vector_lock held */ | 1400 | /* This function must be called with vector_lock held */ |
1163 | int irq, vector; | 1401 | int irq, vector; |
1164 | struct irq_cfg *cfg; | 1402 | struct irq_cfg *cfg; |
1403 | struct irq_desc *desc; | ||
1165 | 1404 | ||
1166 | /* Mark the inuse vectors */ | 1405 | /* Mark the inuse vectors */ |
1167 | for_each_irq_cfg(irq, cfg) { | 1406 | for_each_irq_desc(irq, desc) { |
1168 | if (!cpu_isset(cpu, cfg->domain)) | 1407 | cfg = desc->chip_data; |
1408 | if (!cpumask_test_cpu(cpu, cfg->domain)) | ||
1169 | continue; | 1409 | continue; |
1170 | vector = cfg->vector; | 1410 | vector = cfg->vector; |
1171 | per_cpu(vector_irq, cpu)[vector] = irq; | 1411 | per_cpu(vector_irq, cpu)[vector] = irq; |
@@ -1177,7 +1417,7 @@ void __setup_vector_irq(int cpu) | |||
1177 | continue; | 1417 | continue; |
1178 | 1418 | ||
1179 | cfg = irq_cfg(irq); | 1419 | cfg = irq_cfg(irq); |
1180 | if (!cpu_isset(cpu, cfg->domain)) | 1420 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1181 | per_cpu(vector_irq, cpu)[vector] = -1; | 1421 | per_cpu(vector_irq, cpu)[vector] = -1; |
1182 | } | 1422 | } |
1183 | } | 1423 | } |
@@ -1215,11 +1455,8 @@ static inline int IO_APIC_irq_trigger(int irq) | |||
1215 | } | 1455 | } |
1216 | #endif | 1456 | #endif |
1217 | 1457 | ||
1218 | static void ioapic_register_intr(int irq, unsigned long trigger) | 1458 | static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long trigger) |
1219 | { | 1459 | { |
1220 | struct irq_desc *desc; | ||
1221 | |||
1222 | desc = irq_to_desc(irq); | ||
1223 | 1460 | ||
1224 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || | 1461 | if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) || |
1225 | trigger == IOAPIC_LEVEL) | 1462 | trigger == IOAPIC_LEVEL) |
@@ -1311,23 +1548,22 @@ static int setup_ioapic_entry(int apic, int irq, | |||
1311 | return 0; | 1548 | return 0; |
1312 | } | 1549 | } |
1313 | 1550 | ||
1314 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | 1551 | static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, |
1315 | int trigger, int polarity) | 1552 | int trigger, int polarity) |
1316 | { | 1553 | { |
1317 | struct irq_cfg *cfg; | 1554 | struct irq_cfg *cfg; |
1318 | struct IO_APIC_route_entry entry; | 1555 | struct IO_APIC_route_entry entry; |
1319 | cpumask_t mask; | 1556 | unsigned int dest; |
1320 | 1557 | ||
1321 | if (!IO_APIC_IRQ(irq)) | 1558 | if (!IO_APIC_IRQ(irq)) |
1322 | return; | 1559 | return; |
1323 | 1560 | ||
1324 | cfg = irq_cfg(irq); | 1561 | cfg = desc->chip_data; |
1325 | 1562 | ||
1326 | mask = TARGET_CPUS; | 1563 | if (assign_irq_vector(irq, cfg, TARGET_CPUS)) |
1327 | if (assign_irq_vector(irq, mask)) | ||
1328 | return; | 1564 | return; |
1329 | 1565 | ||
1330 | cpus_and(mask, cfg->domain, mask); | 1566 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); |
1331 | 1567 | ||
1332 | apic_printk(APIC_VERBOSE,KERN_DEBUG | 1568 | apic_printk(APIC_VERBOSE,KERN_DEBUG |
1333 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " | 1569 | "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " |
@@ -1337,16 +1573,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, | |||
1337 | 1573 | ||
1338 | 1574 | ||
1339 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, | 1575 | if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, |
1340 | cpu_mask_to_apicid(mask), trigger, polarity, | 1576 | dest, trigger, polarity, cfg->vector)) { |
1341 | cfg->vector)) { | ||
1342 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", | 1577 | printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", |
1343 | mp_ioapics[apic].mp_apicid, pin); | 1578 | mp_ioapics[apic].mp_apicid, pin); |
1344 | __clear_irq_vector(irq); | 1579 | __clear_irq_vector(irq, cfg); |
1345 | return; | 1580 | return; |
1346 | } | 1581 | } |
1347 | 1582 | ||
1348 | ioapic_register_intr(irq, trigger); | 1583 | ioapic_register_intr(irq, desc, trigger); |
1349 | if (irq < 16) | 1584 | if (irq < NR_IRQS_LEGACY) |
1350 | disable_8259A_irq(irq); | 1585 | disable_8259A_irq(irq); |
1351 | 1586 | ||
1352 | ioapic_write_entry(apic, pin, entry); | 1587 | ioapic_write_entry(apic, pin, entry); |
@@ -1356,6 +1591,9 @@ static void __init setup_IO_APIC_irqs(void) | |||
1356 | { | 1591 | { |
1357 | int apic, pin, idx, irq; | 1592 | int apic, pin, idx, irq; |
1358 | int notcon = 0; | 1593 | int notcon = 0; |
1594 | struct irq_desc *desc; | ||
1595 | struct irq_cfg *cfg; | ||
1596 | int cpu = boot_cpu_id; | ||
1359 | 1597 | ||
1360 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1598 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
1361 | 1599 | ||
@@ -1387,9 +1625,15 @@ static void __init setup_IO_APIC_irqs(void) | |||
1387 | if (multi_timer_check(apic, irq)) | 1625 | if (multi_timer_check(apic, irq)) |
1388 | continue; | 1626 | continue; |
1389 | #endif | 1627 | #endif |
1390 | add_pin_to_irq(irq, apic, pin); | 1628 | desc = irq_to_desc_alloc_cpu(irq, cpu); |
1629 | if (!desc) { | ||
1630 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
1631 | continue; | ||
1632 | } | ||
1633 | cfg = desc->chip_data; | ||
1634 | add_pin_to_irq_cpu(cfg, cpu, apic, pin); | ||
1391 | 1635 | ||
1392 | setup_IO_APIC_irq(apic, pin, irq, | 1636 | setup_IO_APIC_irq(apic, pin, irq, desc, |
1393 | irq_trigger(idx), irq_polarity(idx)); | 1637 | irq_trigger(idx), irq_polarity(idx)); |
1394 | } | 1638 | } |
1395 | } | 1639 | } |
@@ -1448,6 +1692,7 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1448 | union IO_APIC_reg_03 reg_03; | 1692 | union IO_APIC_reg_03 reg_03; |
1449 | unsigned long flags; | 1693 | unsigned long flags; |
1450 | struct irq_cfg *cfg; | 1694 | struct irq_cfg *cfg; |
1695 | struct irq_desc *desc; | ||
1451 | unsigned int irq; | 1696 | unsigned int irq; |
1452 | 1697 | ||
1453 | if (apic_verbosity == APIC_QUIET) | 1698 | if (apic_verbosity == APIC_QUIET) |
@@ -1537,8 +1782,11 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1537 | } | 1782 | } |
1538 | } | 1783 | } |
1539 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); | 1784 | printk(KERN_DEBUG "IRQ to pin mappings:\n"); |
1540 | for_each_irq_cfg(irq, cfg) { | 1785 | for_each_irq_desc(irq, desc) { |
1541 | struct irq_pin_list *entry = cfg->irq_2_pin; | 1786 | struct irq_pin_list *entry; |
1787 | |||
1788 | cfg = desc->chip_data; | ||
1789 | entry = cfg->irq_2_pin; | ||
1542 | if (!entry) | 1790 | if (!entry) |
1543 | continue; | 1791 | continue; |
1544 | printk(KERN_DEBUG "IRQ%d ", irq); | 1792 | printk(KERN_DEBUG "IRQ%d ", irq); |
@@ -2022,14 +2270,16 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2022 | { | 2270 | { |
2023 | int was_pending = 0; | 2271 | int was_pending = 0; |
2024 | unsigned long flags; | 2272 | unsigned long flags; |
2273 | struct irq_cfg *cfg; | ||
2025 | 2274 | ||
2026 | spin_lock_irqsave(&ioapic_lock, flags); | 2275 | spin_lock_irqsave(&ioapic_lock, flags); |
2027 | if (irq < 16) { | 2276 | if (irq < NR_IRQS_LEGACY) { |
2028 | disable_8259A_irq(irq); | 2277 | disable_8259A_irq(irq); |
2029 | if (i8259A_irq_pending(irq)) | 2278 | if (i8259A_irq_pending(irq)) |
2030 | was_pending = 1; | 2279 | was_pending = 1; |
2031 | } | 2280 | } |
2032 | __unmask_IO_APIC_irq(irq); | 2281 | cfg = irq_cfg(irq); |
2282 | __unmask_IO_APIC_irq(cfg); | ||
2033 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2283 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2034 | 2284 | ||
2035 | return was_pending; | 2285 | return was_pending; |
@@ -2043,7 +2293,7 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2043 | unsigned long flags; | 2293 | unsigned long flags; |
2044 | 2294 | ||
2045 | spin_lock_irqsave(&vector_lock, flags); | 2295 | spin_lock_irqsave(&vector_lock, flags); |
2046 | send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); | 2296 | send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); |
2047 | spin_unlock_irqrestore(&vector_lock, flags); | 2297 | spin_unlock_irqrestore(&vector_lock, flags); |
2048 | 2298 | ||
2049 | return 1; | 2299 | return 1; |
@@ -2092,35 +2342,35 @@ static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration); | |||
2092 | * as simple as edge triggered migration and we can do the irq migration | 2342 | * as simple as edge triggered migration and we can do the irq migration |
2093 | * with a simple atomic update to IO-APIC RTE. | 2343 | * with a simple atomic update to IO-APIC RTE. |
2094 | */ | 2344 | */ |
2095 | static void migrate_ioapic_irq(int irq, cpumask_t mask) | 2345 | static void |
2346 | migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
2096 | { | 2347 | { |
2097 | struct irq_cfg *cfg; | 2348 | struct irq_cfg *cfg; |
2098 | struct irq_desc *desc; | ||
2099 | cpumask_t tmp, cleanup_mask; | ||
2100 | struct irte irte; | 2349 | struct irte irte; |
2101 | int modify_ioapic_rte; | 2350 | int modify_ioapic_rte; |
2102 | unsigned int dest; | 2351 | unsigned int dest; |
2103 | unsigned long flags; | 2352 | unsigned long flags; |
2353 | unsigned int irq; | ||
2104 | 2354 | ||
2105 | cpus_and(tmp, mask, cpu_online_map); | 2355 | if (!cpumask_intersects(mask, cpu_online_mask)) |
2106 | if (cpus_empty(tmp)) | ||
2107 | return; | 2356 | return; |
2108 | 2357 | ||
2358 | irq = desc->irq; | ||
2109 | if (get_irte(irq, &irte)) | 2359 | if (get_irte(irq, &irte)) |
2110 | return; | 2360 | return; |
2111 | 2361 | ||
2112 | if (assign_irq_vector(irq, mask)) | 2362 | cfg = desc->chip_data; |
2363 | if (assign_irq_vector(irq, cfg, mask)) | ||
2113 | return; | 2364 | return; |
2114 | 2365 | ||
2115 | cfg = irq_cfg(irq); | 2366 | set_extra_move_desc(desc, mask); |
2116 | cpus_and(tmp, cfg->domain, mask); | 2367 | |
2117 | dest = cpu_mask_to_apicid(tmp); | 2368 | dest = cpu_mask_to_apicid_and(cfg->domain, mask); |
2118 | 2369 | ||
2119 | desc = irq_to_desc(irq); | ||
2120 | modify_ioapic_rte = desc->status & IRQ_LEVEL; | 2370 | modify_ioapic_rte = desc->status & IRQ_LEVEL; |
2121 | if (modify_ioapic_rte) { | 2371 | if (modify_ioapic_rte) { |
2122 | spin_lock_irqsave(&ioapic_lock, flags); | 2372 | spin_lock_irqsave(&ioapic_lock, flags); |
2123 | __target_IO_APIC_irq(irq, dest, cfg->vector); | 2373 | __target_IO_APIC_irq(irq, dest, cfg); |
2124 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2374 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2125 | } | 2375 | } |
2126 | 2376 | ||
@@ -2132,24 +2382,20 @@ static void migrate_ioapic_irq(int irq, cpumask_t mask) | |||
2132 | */ | 2382 | */ |
2133 | modify_irte(irq, &irte); | 2383 | modify_irte(irq, &irte); |
2134 | 2384 | ||
2135 | if (cfg->move_in_progress) { | 2385 | if (cfg->move_in_progress) |
2136 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 2386 | send_cleanup_vector(cfg); |
2137 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
2138 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
2139 | cfg->move_in_progress = 0; | ||
2140 | } | ||
2141 | 2387 | ||
2142 | desc->affinity = mask; | 2388 | cpumask_copy(&desc->affinity, mask); |
2143 | } | 2389 | } |
2144 | 2390 | ||
2145 | static int migrate_irq_remapped_level(int irq) | 2391 | static int migrate_irq_remapped_level_desc(struct irq_desc *desc) |
2146 | { | 2392 | { |
2147 | int ret = -1; | 2393 | int ret = -1; |
2148 | struct irq_desc *desc = irq_to_desc(irq); | 2394 | struct irq_cfg *cfg = desc->chip_data; |
2149 | 2395 | ||
2150 | mask_IO_APIC_irq(irq); | 2396 | mask_IO_APIC_irq_desc(desc); |
2151 | 2397 | ||
2152 | if (io_apic_level_ack_pending(irq)) { | 2398 | if (io_apic_level_ack_pending(cfg)) { |
2153 | /* | 2399 | /* |
2154 | * Interrupt in progress. Migrating irq now will change the | 2400 | * Interrupt in progress. Migrating irq now will change the |
2155 | * vector information in the IO-APIC RTE and that will confuse | 2401 | * vector information in the IO-APIC RTE and that will confuse |
@@ -2161,14 +2407,15 @@ static int migrate_irq_remapped_level(int irq) | |||
2161 | } | 2407 | } |
2162 | 2408 | ||
2163 | /* everthing is clear. we have right of way */ | 2409 | /* everthing is clear. we have right of way */ |
2164 | migrate_ioapic_irq(irq, desc->pending_mask); | 2410 | migrate_ioapic_irq_desc(desc, &desc->pending_mask); |
2165 | 2411 | ||
2166 | ret = 0; | 2412 | ret = 0; |
2167 | desc->status &= ~IRQ_MOVE_PENDING; | 2413 | desc->status &= ~IRQ_MOVE_PENDING; |
2168 | cpus_clear(desc->pending_mask); | 2414 | cpumask_clear(&desc->pending_mask); |
2169 | 2415 | ||
2170 | unmask: | 2416 | unmask: |
2171 | unmask_IO_APIC_irq(irq); | 2417 | unmask_IO_APIC_irq_desc(desc); |
2418 | |||
2172 | return ret; | 2419 | return ret; |
2173 | } | 2420 | } |
2174 | 2421 | ||
@@ -2189,7 +2436,7 @@ static void ir_irq_migration(struct work_struct *work) | |||
2189 | continue; | 2436 | continue; |
2190 | } | 2437 | } |
2191 | 2438 | ||
2192 | desc->chip->set_affinity(irq, desc->pending_mask); | 2439 | desc->chip->set_affinity(irq, &desc->pending_mask); |
2193 | spin_unlock_irqrestore(&desc->lock, flags); | 2440 | spin_unlock_irqrestore(&desc->lock, flags); |
2194 | } | 2441 | } |
2195 | } | 2442 | } |
@@ -2198,28 +2445,33 @@ static void ir_irq_migration(struct work_struct *work) | |||
2198 | /* | 2445 | /* |
2199 | * Migrates the IRQ destination in the process context. | 2446 | * Migrates the IRQ destination in the process context. |
2200 | */ | 2447 | */ |
2201 | static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | 2448 | static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, |
2449 | const struct cpumask *mask) | ||
2202 | { | 2450 | { |
2203 | struct irq_desc *desc = irq_to_desc(irq); | ||
2204 | |||
2205 | if (desc->status & IRQ_LEVEL) { | 2451 | if (desc->status & IRQ_LEVEL) { |
2206 | desc->status |= IRQ_MOVE_PENDING; | 2452 | desc->status |= IRQ_MOVE_PENDING; |
2207 | desc->pending_mask = mask; | 2453 | cpumask_copy(&desc->pending_mask, mask); |
2208 | migrate_irq_remapped_level(irq); | 2454 | migrate_irq_remapped_level_desc(desc); |
2209 | return; | 2455 | return; |
2210 | } | 2456 | } |
2211 | 2457 | ||
2212 | migrate_ioapic_irq(irq, mask); | 2458 | migrate_ioapic_irq_desc(desc, mask); |
2459 | } | ||
2460 | static void set_ir_ioapic_affinity_irq(unsigned int irq, | ||
2461 | const struct cpumask *mask) | ||
2462 | { | ||
2463 | struct irq_desc *desc = irq_to_desc(irq); | ||
2464 | |||
2465 | set_ir_ioapic_affinity_irq_desc(desc, mask); | ||
2213 | } | 2466 | } |
2214 | #endif | 2467 | #endif |
2215 | 2468 | ||
2216 | asmlinkage void smp_irq_move_cleanup_interrupt(void) | 2469 | asmlinkage void smp_irq_move_cleanup_interrupt(void) |
2217 | { | 2470 | { |
2218 | unsigned vector, me; | 2471 | unsigned vector, me; |
2472 | |||
2219 | ack_APIC_irq(); | 2473 | ack_APIC_irq(); |
2220 | #ifdef CONFIG_X86_64 | ||
2221 | exit_idle(); | 2474 | exit_idle(); |
2222 | #endif | ||
2223 | irq_enter(); | 2475 | irq_enter(); |
2224 | 2476 | ||
2225 | me = smp_processor_id(); | 2477 | me = smp_processor_id(); |
@@ -2229,6 +2481,9 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2229 | struct irq_cfg *cfg; | 2481 | struct irq_cfg *cfg; |
2230 | irq = __get_cpu_var(vector_irq)[vector]; | 2482 | irq = __get_cpu_var(vector_irq)[vector]; |
2231 | 2483 | ||
2484 | if (irq == -1) | ||
2485 | continue; | ||
2486 | |||
2232 | desc = irq_to_desc(irq); | 2487 | desc = irq_to_desc(irq); |
2233 | if (!desc) | 2488 | if (!desc) |
2234 | continue; | 2489 | continue; |
@@ -2238,7 +2493,7 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2238 | if (!cfg->move_cleanup_count) | 2493 | if (!cfg->move_cleanup_count) |
2239 | goto unlock; | 2494 | goto unlock; |
2240 | 2495 | ||
2241 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) | 2496 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2242 | goto unlock; | 2497 | goto unlock; |
2243 | 2498 | ||
2244 | __get_cpu_var(vector_irq)[vector] = -1; | 2499 | __get_cpu_var(vector_irq)[vector] = -1; |
@@ -2250,28 +2505,44 @@ unlock: | |||
2250 | irq_exit(); | 2505 | irq_exit(); |
2251 | } | 2506 | } |
2252 | 2507 | ||
2253 | static void irq_complete_move(unsigned int irq) | 2508 | static void irq_complete_move(struct irq_desc **descp) |
2254 | { | 2509 | { |
2255 | struct irq_cfg *cfg = irq_cfg(irq); | 2510 | struct irq_desc *desc = *descp; |
2511 | struct irq_cfg *cfg = desc->chip_data; | ||
2256 | unsigned vector, me; | 2512 | unsigned vector, me; |
2257 | 2513 | ||
2258 | if (likely(!cfg->move_in_progress)) | 2514 | if (likely(!cfg->move_in_progress)) { |
2515 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
2516 | if (likely(!cfg->move_desc_pending)) | ||
2517 | return; | ||
2518 | |||
2519 | /* domain has not changed, but affinity did */ | ||
2520 | me = smp_processor_id(); | ||
2521 | if (cpu_isset(me, desc->affinity)) { | ||
2522 | *descp = desc = move_irq_desc(desc, me); | ||
2523 | /* get the new one */ | ||
2524 | cfg = desc->chip_data; | ||
2525 | cfg->move_desc_pending = 0; | ||
2526 | } | ||
2527 | #endif | ||
2259 | return; | 2528 | return; |
2529 | } | ||
2260 | 2530 | ||
2261 | vector = ~get_irq_regs()->orig_ax; | 2531 | vector = ~get_irq_regs()->orig_ax; |
2262 | me = smp_processor_id(); | 2532 | me = smp_processor_id(); |
2263 | if ((vector == cfg->vector) && cpu_isset(me, cfg->domain)) { | 2533 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC |
2264 | cpumask_t cleanup_mask; | 2534 | *descp = desc = move_irq_desc(desc, me); |
2535 | /* get the new one */ | ||
2536 | cfg = desc->chip_data; | ||
2537 | #endif | ||
2265 | 2538 | ||
2266 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 2539 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2267 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | 2540 | send_cleanup_vector(cfg); |
2268 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
2269 | cfg->move_in_progress = 0; | ||
2270 | } | ||
2271 | } | 2541 | } |
2272 | #else | 2542 | #else |
2273 | static inline void irq_complete_move(unsigned int irq) {} | 2543 | static inline void irq_complete_move(struct irq_desc **descp) {} |
2274 | #endif | 2544 | #endif |
2545 | |||
2275 | #ifdef CONFIG_INTR_REMAP | 2546 | #ifdef CONFIG_INTR_REMAP |
2276 | static void ack_x2apic_level(unsigned int irq) | 2547 | static void ack_x2apic_level(unsigned int irq) |
2277 | { | 2548 | { |
@@ -2282,11 +2553,14 @@ static void ack_x2apic_edge(unsigned int irq) | |||
2282 | { | 2553 | { |
2283 | ack_x2APIC_irq(); | 2554 | ack_x2APIC_irq(); |
2284 | } | 2555 | } |
2556 | |||
2285 | #endif | 2557 | #endif |
2286 | 2558 | ||
2287 | static void ack_apic_edge(unsigned int irq) | 2559 | static void ack_apic_edge(unsigned int irq) |
2288 | { | 2560 | { |
2289 | irq_complete_move(irq); | 2561 | struct irq_desc *desc = irq_to_desc(irq); |
2562 | |||
2563 | irq_complete_move(&desc); | ||
2290 | move_native_irq(irq); | 2564 | move_native_irq(irq); |
2291 | ack_APIC_irq(); | 2565 | ack_APIC_irq(); |
2292 | } | 2566 | } |
@@ -2295,18 +2569,21 @@ atomic_t irq_mis_count; | |||
2295 | 2569 | ||
2296 | static void ack_apic_level(unsigned int irq) | 2570 | static void ack_apic_level(unsigned int irq) |
2297 | { | 2571 | { |
2572 | struct irq_desc *desc = irq_to_desc(irq); | ||
2573 | |||
2298 | #ifdef CONFIG_X86_32 | 2574 | #ifdef CONFIG_X86_32 |
2299 | unsigned long v; | 2575 | unsigned long v; |
2300 | int i; | 2576 | int i; |
2301 | #endif | 2577 | #endif |
2578 | struct irq_cfg *cfg; | ||
2302 | int do_unmask_irq = 0; | 2579 | int do_unmask_irq = 0; |
2303 | 2580 | ||
2304 | irq_complete_move(irq); | 2581 | irq_complete_move(&desc); |
2305 | #ifdef CONFIG_GENERIC_PENDING_IRQ | 2582 | #ifdef CONFIG_GENERIC_PENDING_IRQ |
2306 | /* If we are moving the irq we need to mask it */ | 2583 | /* If we are moving the irq we need to mask it */ |
2307 | if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { | 2584 | if (unlikely(desc->status & IRQ_MOVE_PENDING)) { |
2308 | do_unmask_irq = 1; | 2585 | do_unmask_irq = 1; |
2309 | mask_IO_APIC_irq(irq); | 2586 | mask_IO_APIC_irq_desc(desc); |
2310 | } | 2587 | } |
2311 | #endif | 2588 | #endif |
2312 | 2589 | ||
@@ -2330,7 +2607,8 @@ static void ack_apic_level(unsigned int irq) | |||
2330 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | 2607 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2331 | * The idea is from Manfred Spraul. --macro | 2608 | * The idea is from Manfred Spraul. --macro |
2332 | */ | 2609 | */ |
2333 | i = irq_cfg(irq)->vector; | 2610 | cfg = desc->chip_data; |
2611 | i = cfg->vector; | ||
2334 | 2612 | ||
2335 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | 2613 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); |
2336 | #endif | 2614 | #endif |
@@ -2369,17 +2647,18 @@ static void ack_apic_level(unsigned int irq) | |||
2369 | * accurate and is causing problems then it is a hardware bug | 2647 | * accurate and is causing problems then it is a hardware bug |
2370 | * and you can go talk to the chipset vendor about it. | 2648 | * and you can go talk to the chipset vendor about it. |
2371 | */ | 2649 | */ |
2372 | if (!io_apic_level_ack_pending(irq)) | 2650 | cfg = desc->chip_data; |
2651 | if (!io_apic_level_ack_pending(cfg)) | ||
2373 | move_masked_irq(irq); | 2652 | move_masked_irq(irq); |
2374 | unmask_IO_APIC_irq(irq); | 2653 | unmask_IO_APIC_irq_desc(desc); |
2375 | } | 2654 | } |
2376 | 2655 | ||
2377 | #ifdef CONFIG_X86_32 | 2656 | #ifdef CONFIG_X86_32 |
2378 | if (!(v & (1 << (i & 0x1f)))) { | 2657 | if (!(v & (1 << (i & 0x1f)))) { |
2379 | atomic_inc(&irq_mis_count); | 2658 | atomic_inc(&irq_mis_count); |
2380 | spin_lock(&ioapic_lock); | 2659 | spin_lock(&ioapic_lock); |
2381 | __mask_and_edge_IO_APIC_irq(irq); | 2660 | __mask_and_edge_IO_APIC_irq(cfg); |
2382 | __unmask_and_level_IO_APIC_irq(irq); | 2661 | __unmask_and_level_IO_APIC_irq(cfg); |
2383 | spin_unlock(&ioapic_lock); | 2662 | spin_unlock(&ioapic_lock); |
2384 | } | 2663 | } |
2385 | #endif | 2664 | #endif |
@@ -2430,20 +2709,19 @@ static inline void init_IO_APIC_traps(void) | |||
2430 | * Also, we've got to be careful not to trash gate | 2709 | * Also, we've got to be careful not to trash gate |
2431 | * 0x80, because int 0x80 is hm, kind of importantish. ;) | 2710 | * 0x80, because int 0x80 is hm, kind of importantish. ;) |
2432 | */ | 2711 | */ |
2433 | for_each_irq_cfg(irq, cfg) { | 2712 | for_each_irq_desc(irq, desc) { |
2434 | if (IO_APIC_IRQ(irq) && !cfg->vector) { | 2713 | cfg = desc->chip_data; |
2714 | if (IO_APIC_IRQ(irq) && cfg && !cfg->vector) { | ||
2435 | /* | 2715 | /* |
2436 | * Hmm.. We don't have an entry for this, | 2716 | * Hmm.. We don't have an entry for this, |
2437 | * so default to an old-fashioned 8259 | 2717 | * so default to an old-fashioned 8259 |
2438 | * interrupt if we can.. | 2718 | * interrupt if we can.. |
2439 | */ | 2719 | */ |
2440 | if (irq < 16) | 2720 | if (irq < NR_IRQS_LEGACY) |
2441 | make_8259A_irq(irq); | 2721 | make_8259A_irq(irq); |
2442 | else { | 2722 | else |
2443 | desc = irq_to_desc(irq); | ||
2444 | /* Strange. Oh, well.. */ | 2723 | /* Strange. Oh, well.. */ |
2445 | desc->chip = &no_irq_chip; | 2724 | desc->chip = &no_irq_chip; |
2446 | } | ||
2447 | } | 2725 | } |
2448 | } | 2726 | } |
2449 | } | 2727 | } |
@@ -2468,7 +2746,7 @@ static void unmask_lapic_irq(unsigned int irq) | |||
2468 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); | 2746 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
2469 | } | 2747 | } |
2470 | 2748 | ||
2471 | static void ack_lapic_irq (unsigned int irq) | 2749 | static void ack_lapic_irq(unsigned int irq) |
2472 | { | 2750 | { |
2473 | ack_APIC_irq(); | 2751 | ack_APIC_irq(); |
2474 | } | 2752 | } |
@@ -2480,11 +2758,8 @@ static struct irq_chip lapic_chip __read_mostly = { | |||
2480 | .ack = ack_lapic_irq, | 2758 | .ack = ack_lapic_irq, |
2481 | }; | 2759 | }; |
2482 | 2760 | ||
2483 | static void lapic_register_intr(int irq) | 2761 | static void lapic_register_intr(int irq, struct irq_desc *desc) |
2484 | { | 2762 | { |
2485 | struct irq_desc *desc; | ||
2486 | |||
2487 | desc = irq_to_desc(irq); | ||
2488 | desc->status &= ~IRQ_LEVEL; | 2763 | desc->status &= ~IRQ_LEVEL; |
2489 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, | 2764 | set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq, |
2490 | "edge"); | 2765 | "edge"); |
@@ -2588,7 +2863,9 @@ int timer_through_8259 __initdata; | |||
2588 | */ | 2863 | */ |
2589 | static inline void __init check_timer(void) | 2864 | static inline void __init check_timer(void) |
2590 | { | 2865 | { |
2591 | struct irq_cfg *cfg = irq_cfg(0); | 2866 | struct irq_desc *desc = irq_to_desc(0); |
2867 | struct irq_cfg *cfg = desc->chip_data; | ||
2868 | int cpu = boot_cpu_id; | ||
2592 | int apic1, pin1, apic2, pin2; | 2869 | int apic1, pin1, apic2, pin2; |
2593 | unsigned long flags; | 2870 | unsigned long flags; |
2594 | unsigned int ver; | 2871 | unsigned int ver; |
@@ -2603,7 +2880,7 @@ static inline void __init check_timer(void) | |||
2603 | * get/set the timer IRQ vector: | 2880 | * get/set the timer IRQ vector: |
2604 | */ | 2881 | */ |
2605 | disable_8259A_irq(0); | 2882 | disable_8259A_irq(0); |
2606 | assign_irq_vector(0, TARGET_CPUS); | 2883 | assign_irq_vector(0, cfg, TARGET_CPUS); |
2607 | 2884 | ||
2608 | /* | 2885 | /* |
2609 | * As IRQ0 is to be enabled in the 8259A, the virtual | 2886 | * As IRQ0 is to be enabled in the 8259A, the virtual |
@@ -2654,10 +2931,10 @@ static inline void __init check_timer(void) | |||
2654 | * Ok, does IRQ0 through the IOAPIC work? | 2931 | * Ok, does IRQ0 through the IOAPIC work? |
2655 | */ | 2932 | */ |
2656 | if (no_pin1) { | 2933 | if (no_pin1) { |
2657 | add_pin_to_irq(0, apic1, pin1); | 2934 | add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); |
2658 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | 2935 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); |
2659 | } | 2936 | } |
2660 | unmask_IO_APIC_irq(0); | 2937 | unmask_IO_APIC_irq_desc(desc); |
2661 | if (timer_irq_works()) { | 2938 | if (timer_irq_works()) { |
2662 | if (nmi_watchdog == NMI_IO_APIC) { | 2939 | if (nmi_watchdog == NMI_IO_APIC) { |
2663 | setup_nmi(); | 2940 | setup_nmi(); |
@@ -2683,9 +2960,9 @@ static inline void __init check_timer(void) | |||
2683 | /* | 2960 | /* |
2684 | * legacy devices should be connected to IO APIC #0 | 2961 | * legacy devices should be connected to IO APIC #0 |
2685 | */ | 2962 | */ |
2686 | replace_pin_at_irq(0, apic1, pin1, apic2, pin2); | 2963 | replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); |
2687 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 2964 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
2688 | unmask_IO_APIC_irq(0); | 2965 | unmask_IO_APIC_irq_desc(desc); |
2689 | enable_8259A_irq(0); | 2966 | enable_8259A_irq(0); |
2690 | if (timer_irq_works()) { | 2967 | if (timer_irq_works()) { |
2691 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); | 2968 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
@@ -2717,7 +2994,7 @@ static inline void __init check_timer(void) | |||
2717 | apic_printk(APIC_QUIET, KERN_INFO | 2994 | apic_printk(APIC_QUIET, KERN_INFO |
2718 | "...trying to set up timer as Virtual Wire IRQ...\n"); | 2995 | "...trying to set up timer as Virtual Wire IRQ...\n"); |
2719 | 2996 | ||
2720 | lapic_register_intr(0); | 2997 | lapic_register_intr(0, desc); |
2721 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 2998 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
2722 | enable_8259A_irq(0); | 2999 | enable_8259A_irq(0); |
2723 | 3000 | ||
@@ -2902,22 +3179,26 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
2902 | unsigned int irq; | 3179 | unsigned int irq; |
2903 | unsigned int new; | 3180 | unsigned int new; |
2904 | unsigned long flags; | 3181 | unsigned long flags; |
2905 | struct irq_cfg *cfg_new; | 3182 | struct irq_cfg *cfg_new = NULL; |
2906 | 3183 | int cpu = boot_cpu_id; | |
2907 | irq_want = nr_irqs - 1; | 3184 | struct irq_desc *desc_new = NULL; |
2908 | 3185 | ||
2909 | irq = 0; | 3186 | irq = 0; |
2910 | spin_lock_irqsave(&vector_lock, flags); | 3187 | spin_lock_irqsave(&vector_lock, flags); |
2911 | for (new = irq_want; new > 0; new--) { | 3188 | for (new = irq_want; new < NR_IRQS; new++) { |
2912 | if (platform_legacy_irq(new)) | 3189 | if (platform_legacy_irq(new)) |
2913 | continue; | 3190 | continue; |
2914 | cfg_new = irq_cfg(new); | 3191 | |
2915 | if (cfg_new && cfg_new->vector != 0) | 3192 | desc_new = irq_to_desc_alloc_cpu(new, cpu); |
3193 | if (!desc_new) { | ||
3194 | printk(KERN_INFO "can not get irq_desc for %d\n", new); | ||
2916 | continue; | 3195 | continue; |
2917 | /* check if need to create one */ | 3196 | } |
2918 | if (!cfg_new) | 3197 | cfg_new = desc_new->chip_data; |
2919 | cfg_new = irq_cfg_alloc(new); | 3198 | |
2920 | if (__assign_irq_vector(new, TARGET_CPUS) == 0) | 3199 | if (cfg_new->vector != 0) |
3200 | continue; | ||
3201 | if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) | ||
2921 | irq = new; | 3202 | irq = new; |
2922 | break; | 3203 | break; |
2923 | } | 3204 | } |
@@ -2925,15 +3206,21 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
2925 | 3206 | ||
2926 | if (irq > 0) { | 3207 | if (irq > 0) { |
2927 | dynamic_irq_init(irq); | 3208 | dynamic_irq_init(irq); |
3209 | /* restore it, in case dynamic_irq_init clear it */ | ||
3210 | if (desc_new) | ||
3211 | desc_new->chip_data = cfg_new; | ||
2928 | } | 3212 | } |
2929 | return irq; | 3213 | return irq; |
2930 | } | 3214 | } |
2931 | 3215 | ||
3216 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
2932 | int create_irq(void) | 3217 | int create_irq(void) |
2933 | { | 3218 | { |
3219 | unsigned int irq_want; | ||
2934 | int irq; | 3220 | int irq; |
2935 | 3221 | ||
2936 | irq = create_irq_nr(nr_irqs - 1); | 3222 | irq_want = nr_irqs_gsi; |
3223 | irq = create_irq_nr(irq_want); | ||
2937 | 3224 | ||
2938 | if (irq == 0) | 3225 | if (irq == 0) |
2939 | irq = -1; | 3226 | irq = -1; |
@@ -2944,14 +3231,22 @@ int create_irq(void) | |||
2944 | void destroy_irq(unsigned int irq) | 3231 | void destroy_irq(unsigned int irq) |
2945 | { | 3232 | { |
2946 | unsigned long flags; | 3233 | unsigned long flags; |
3234 | struct irq_cfg *cfg; | ||
3235 | struct irq_desc *desc; | ||
2947 | 3236 | ||
3237 | /* store it, in case dynamic_irq_cleanup clear it */ | ||
3238 | desc = irq_to_desc(irq); | ||
3239 | cfg = desc->chip_data; | ||
2948 | dynamic_irq_cleanup(irq); | 3240 | dynamic_irq_cleanup(irq); |
3241 | /* connect back irq_cfg */ | ||
3242 | if (desc) | ||
3243 | desc->chip_data = cfg; | ||
2949 | 3244 | ||
2950 | #ifdef CONFIG_INTR_REMAP | 3245 | #ifdef CONFIG_INTR_REMAP |
2951 | free_irte(irq); | 3246 | free_irte(irq); |
2952 | #endif | 3247 | #endif |
2953 | spin_lock_irqsave(&vector_lock, flags); | 3248 | spin_lock_irqsave(&vector_lock, flags); |
2954 | __clear_irq_vector(irq); | 3249 | __clear_irq_vector(irq, cfg); |
2955 | spin_unlock_irqrestore(&vector_lock, flags); | 3250 | spin_unlock_irqrestore(&vector_lock, flags); |
2956 | } | 3251 | } |
2957 | 3252 | ||
@@ -2964,16 +3259,13 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
2964 | struct irq_cfg *cfg; | 3259 | struct irq_cfg *cfg; |
2965 | int err; | 3260 | int err; |
2966 | unsigned dest; | 3261 | unsigned dest; |
2967 | cpumask_t tmp; | ||
2968 | 3262 | ||
2969 | tmp = TARGET_CPUS; | 3263 | cfg = irq_cfg(irq); |
2970 | err = assign_irq_vector(irq, tmp); | 3264 | err = assign_irq_vector(irq, cfg, TARGET_CPUS); |
2971 | if (err) | 3265 | if (err) |
2972 | return err; | 3266 | return err; |
2973 | 3267 | ||
2974 | cfg = irq_cfg(irq); | 3268 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); |
2975 | cpus_and(tmp, cfg->domain, tmp); | ||
2976 | dest = cpu_mask_to_apicid(tmp); | ||
2977 | 3269 | ||
2978 | #ifdef CONFIG_INTR_REMAP | 3270 | #ifdef CONFIG_INTR_REMAP |
2979 | if (irq_remapped(irq)) { | 3271 | if (irq_remapped(irq)) { |
@@ -3027,64 +3319,48 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3027 | } | 3319 | } |
3028 | 3320 | ||
3029 | #ifdef CONFIG_SMP | 3321 | #ifdef CONFIG_SMP |
3030 | static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | 3322 | static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3031 | { | 3323 | { |
3324 | struct irq_desc *desc = irq_to_desc(irq); | ||
3032 | struct irq_cfg *cfg; | 3325 | struct irq_cfg *cfg; |
3033 | struct msi_msg msg; | 3326 | struct msi_msg msg; |
3034 | unsigned int dest; | 3327 | unsigned int dest; |
3035 | cpumask_t tmp; | ||
3036 | struct irq_desc *desc; | ||
3037 | 3328 | ||
3038 | cpus_and(tmp, mask, cpu_online_map); | 3329 | dest = set_desc_affinity(desc, mask); |
3039 | if (cpus_empty(tmp)) | 3330 | if (dest == BAD_APICID) |
3040 | return; | 3331 | return; |
3041 | 3332 | ||
3042 | if (assign_irq_vector(irq, mask)) | 3333 | cfg = desc->chip_data; |
3043 | return; | ||
3044 | 3334 | ||
3045 | cfg = irq_cfg(irq); | 3335 | read_msi_msg_desc(desc, &msg); |
3046 | cpus_and(tmp, cfg->domain, mask); | ||
3047 | dest = cpu_mask_to_apicid(tmp); | ||
3048 | |||
3049 | read_msi_msg(irq, &msg); | ||
3050 | 3336 | ||
3051 | msg.data &= ~MSI_DATA_VECTOR_MASK; | 3337 | msg.data &= ~MSI_DATA_VECTOR_MASK; |
3052 | msg.data |= MSI_DATA_VECTOR(cfg->vector); | 3338 | msg.data |= MSI_DATA_VECTOR(cfg->vector); |
3053 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; | 3339 | msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; |
3054 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3340 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3055 | 3341 | ||
3056 | write_msi_msg(irq, &msg); | 3342 | write_msi_msg_desc(desc, &msg); |
3057 | desc = irq_to_desc(irq); | ||
3058 | desc->affinity = mask; | ||
3059 | } | 3343 | } |
3060 | |||
3061 | #ifdef CONFIG_INTR_REMAP | 3344 | #ifdef CONFIG_INTR_REMAP |
3062 | /* | 3345 | /* |
3063 | * Migrate the MSI irq to another cpumask. This migration is | 3346 | * Migrate the MSI irq to another cpumask. This migration is |
3064 | * done in the process context using interrupt-remapping hardware. | 3347 | * done in the process context using interrupt-remapping hardware. |
3065 | */ | 3348 | */ |
3066 | static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | 3349 | static void |
3350 | ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | ||
3067 | { | 3351 | { |
3068 | struct irq_cfg *cfg; | 3352 | struct irq_desc *desc = irq_to_desc(irq); |
3353 | struct irq_cfg *cfg = desc->chip_data; | ||
3069 | unsigned int dest; | 3354 | unsigned int dest; |
3070 | cpumask_t tmp, cleanup_mask; | ||
3071 | struct irte irte; | 3355 | struct irte irte; |
3072 | struct irq_desc *desc; | ||
3073 | |||
3074 | cpus_and(tmp, mask, cpu_online_map); | ||
3075 | if (cpus_empty(tmp)) | ||
3076 | return; | ||
3077 | 3356 | ||
3078 | if (get_irte(irq, &irte)) | 3357 | if (get_irte(irq, &irte)) |
3079 | return; | 3358 | return; |
3080 | 3359 | ||
3081 | if (assign_irq_vector(irq, mask)) | 3360 | dest = set_desc_affinity(desc, mask); |
3361 | if (dest == BAD_APICID) | ||
3082 | return; | 3362 | return; |
3083 | 3363 | ||
3084 | cfg = irq_cfg(irq); | ||
3085 | cpus_and(tmp, cfg->domain, mask); | ||
3086 | dest = cpu_mask_to_apicid(tmp); | ||
3087 | |||
3088 | irte.vector = cfg->vector; | 3364 | irte.vector = cfg->vector; |
3089 | irte.dest_id = IRTE_DEST(dest); | 3365 | irte.dest_id = IRTE_DEST(dest); |
3090 | 3366 | ||
@@ -3098,16 +3374,10 @@ static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask) | |||
3098 | * at the new destination. So, time to cleanup the previous | 3374 | * at the new destination. So, time to cleanup the previous |
3099 | * vector allocation. | 3375 | * vector allocation. |
3100 | */ | 3376 | */ |
3101 | if (cfg->move_in_progress) { | 3377 | if (cfg->move_in_progress) |
3102 | cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map); | 3378 | send_cleanup_vector(cfg); |
3103 | cfg->move_cleanup_count = cpus_weight(cleanup_mask); | ||
3104 | send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
3105 | cfg->move_in_progress = 0; | ||
3106 | } | ||
3107 | |||
3108 | desc = irq_to_desc(irq); | ||
3109 | desc->affinity = mask; | ||
3110 | } | 3379 | } |
3380 | |||
3111 | #endif | 3381 | #endif |
3112 | #endif /* CONFIG_SMP */ | 3382 | #endif /* CONFIG_SMP */ |
3113 | 3383 | ||
@@ -3166,7 +3436,7 @@ static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec) | |||
3166 | } | 3436 | } |
3167 | #endif | 3437 | #endif |
3168 | 3438 | ||
3169 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | 3439 | static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq) |
3170 | { | 3440 | { |
3171 | int ret; | 3441 | int ret; |
3172 | struct msi_msg msg; | 3442 | struct msi_msg msg; |
@@ -3175,7 +3445,7 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | |||
3175 | if (ret < 0) | 3445 | if (ret < 0) |
3176 | return ret; | 3446 | return ret; |
3177 | 3447 | ||
3178 | set_irq_msi(irq, desc); | 3448 | set_irq_msi(irq, msidesc); |
3179 | write_msi_msg(irq, &msg); | 3449 | write_msi_msg(irq, &msg); |
3180 | 3450 | ||
3181 | #ifdef CONFIG_INTR_REMAP | 3451 | #ifdef CONFIG_INTR_REMAP |
@@ -3195,26 +3465,13 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq) | |||
3195 | return 0; | 3465 | return 0; |
3196 | } | 3466 | } |
3197 | 3467 | ||
3198 | static unsigned int build_irq_for_pci_dev(struct pci_dev *dev) | 3468 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc) |
3199 | { | ||
3200 | unsigned int irq; | ||
3201 | |||
3202 | irq = dev->bus->number; | ||
3203 | irq <<= 8; | ||
3204 | irq |= dev->devfn; | ||
3205 | irq <<= 12; | ||
3206 | |||
3207 | return irq; | ||
3208 | } | ||
3209 | |||
3210 | int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | ||
3211 | { | 3469 | { |
3212 | unsigned int irq; | 3470 | unsigned int irq; |
3213 | int ret; | 3471 | int ret; |
3214 | unsigned int irq_want; | 3472 | unsigned int irq_want; |
3215 | 3473 | ||
3216 | irq_want = build_irq_for_pci_dev(dev) + 0x100; | 3474 | irq_want = nr_irqs_gsi; |
3217 | |||
3218 | irq = create_irq_nr(irq_want); | 3475 | irq = create_irq_nr(irq_want); |
3219 | if (irq == 0) | 3476 | if (irq == 0) |
3220 | return -1; | 3477 | return -1; |
@@ -3228,7 +3485,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) | |||
3228 | goto error; | 3485 | goto error; |
3229 | no_ir: | 3486 | no_ir: |
3230 | #endif | 3487 | #endif |
3231 | ret = setup_msi_irq(dev, desc, irq); | 3488 | ret = setup_msi_irq(dev, msidesc, irq); |
3232 | if (ret < 0) { | 3489 | if (ret < 0) { |
3233 | destroy_irq(irq); | 3490 | destroy_irq(irq); |
3234 | return ret; | 3491 | return ret; |
@@ -3246,7 +3503,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3246 | { | 3503 | { |
3247 | unsigned int irq; | 3504 | unsigned int irq; |
3248 | int ret, sub_handle; | 3505 | int ret, sub_handle; |
3249 | struct msi_desc *desc; | 3506 | struct msi_desc *msidesc; |
3250 | unsigned int irq_want; | 3507 | unsigned int irq_want; |
3251 | 3508 | ||
3252 | #ifdef CONFIG_INTR_REMAP | 3509 | #ifdef CONFIG_INTR_REMAP |
@@ -3254,10 +3511,11 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3254 | int index = 0; | 3511 | int index = 0; |
3255 | #endif | 3512 | #endif |
3256 | 3513 | ||
3257 | irq_want = build_irq_for_pci_dev(dev) + 0x100; | 3514 | irq_want = nr_irqs_gsi; |
3258 | sub_handle = 0; | 3515 | sub_handle = 0; |
3259 | list_for_each_entry(desc, &dev->msi_list, list) { | 3516 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
3260 | irq = create_irq_nr(irq_want--); | 3517 | irq = create_irq_nr(irq_want); |
3518 | irq_want++; | ||
3261 | if (irq == 0) | 3519 | if (irq == 0) |
3262 | return -1; | 3520 | return -1; |
3263 | #ifdef CONFIG_INTR_REMAP | 3521 | #ifdef CONFIG_INTR_REMAP |
@@ -3289,7 +3547,7 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3289 | } | 3547 | } |
3290 | no_ir: | 3548 | no_ir: |
3291 | #endif | 3549 | #endif |
3292 | ret = setup_msi_irq(dev, desc, irq); | 3550 | ret = setup_msi_irq(dev, msidesc, irq); |
3293 | if (ret < 0) | 3551 | if (ret < 0) |
3294 | goto error; | 3552 | goto error; |
3295 | sub_handle++; | 3553 | sub_handle++; |
@@ -3308,24 +3566,18 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
3308 | 3566 | ||
3309 | #ifdef CONFIG_DMAR | 3567 | #ifdef CONFIG_DMAR |
3310 | #ifdef CONFIG_SMP | 3568 | #ifdef CONFIG_SMP |
3311 | static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | 3569 | static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) |
3312 | { | 3570 | { |
3571 | struct irq_desc *desc = irq_to_desc(irq); | ||
3313 | struct irq_cfg *cfg; | 3572 | struct irq_cfg *cfg; |
3314 | struct msi_msg msg; | 3573 | struct msi_msg msg; |
3315 | unsigned int dest; | 3574 | unsigned int dest; |
3316 | cpumask_t tmp; | ||
3317 | struct irq_desc *desc; | ||
3318 | 3575 | ||
3319 | cpus_and(tmp, mask, cpu_online_map); | 3576 | dest = set_desc_affinity(desc, mask); |
3320 | if (cpus_empty(tmp)) | 3577 | if (dest == BAD_APICID) |
3321 | return; | 3578 | return; |
3322 | 3579 | ||
3323 | if (assign_irq_vector(irq, mask)) | 3580 | cfg = desc->chip_data; |
3324 | return; | ||
3325 | |||
3326 | cfg = irq_cfg(irq); | ||
3327 | cpus_and(tmp, cfg->domain, mask); | ||
3328 | dest = cpu_mask_to_apicid(tmp); | ||
3329 | 3581 | ||
3330 | dmar_msi_read(irq, &msg); | 3582 | dmar_msi_read(irq, &msg); |
3331 | 3583 | ||
@@ -3335,9 +3587,8 @@ static void dmar_msi_set_affinity(unsigned int irq, cpumask_t mask) | |||
3335 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3587 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3336 | 3588 | ||
3337 | dmar_msi_write(irq, &msg); | 3589 | dmar_msi_write(irq, &msg); |
3338 | desc = irq_to_desc(irq); | ||
3339 | desc->affinity = mask; | ||
3340 | } | 3590 | } |
3591 | |||
3341 | #endif /* CONFIG_SMP */ | 3592 | #endif /* CONFIG_SMP */ |
3342 | 3593 | ||
3343 | struct irq_chip dmar_msi_type = { | 3594 | struct irq_chip dmar_msi_type = { |
@@ -3369,24 +3620,18 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3369 | #ifdef CONFIG_HPET_TIMER | 3620 | #ifdef CONFIG_HPET_TIMER |
3370 | 3621 | ||
3371 | #ifdef CONFIG_SMP | 3622 | #ifdef CONFIG_SMP |
3372 | static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) | 3623 | static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) |
3373 | { | 3624 | { |
3625 | struct irq_desc *desc = irq_to_desc(irq); | ||
3374 | struct irq_cfg *cfg; | 3626 | struct irq_cfg *cfg; |
3375 | struct irq_desc *desc; | ||
3376 | struct msi_msg msg; | 3627 | struct msi_msg msg; |
3377 | unsigned int dest; | 3628 | unsigned int dest; |
3378 | cpumask_t tmp; | ||
3379 | 3629 | ||
3380 | cpus_and(tmp, mask, cpu_online_map); | 3630 | dest = set_desc_affinity(desc, mask); |
3381 | if (cpus_empty(tmp)) | 3631 | if (dest == BAD_APICID) |
3382 | return; | 3632 | return; |
3383 | 3633 | ||
3384 | if (assign_irq_vector(irq, mask)) | 3634 | cfg = desc->chip_data; |
3385 | return; | ||
3386 | |||
3387 | cfg = irq_cfg(irq); | ||
3388 | cpus_and(tmp, cfg->domain, mask); | ||
3389 | dest = cpu_mask_to_apicid(tmp); | ||
3390 | 3635 | ||
3391 | hpet_msi_read(irq, &msg); | 3636 | hpet_msi_read(irq, &msg); |
3392 | 3637 | ||
@@ -3396,9 +3641,8 @@ static void hpet_msi_set_affinity(unsigned int irq, cpumask_t mask) | |||
3396 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3641 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3397 | 3642 | ||
3398 | hpet_msi_write(irq, &msg); | 3643 | hpet_msi_write(irq, &msg); |
3399 | desc = irq_to_desc(irq); | ||
3400 | desc->affinity = mask; | ||
3401 | } | 3644 | } |
3645 | |||
3402 | #endif /* CONFIG_SMP */ | 3646 | #endif /* CONFIG_SMP */ |
3403 | 3647 | ||
3404 | struct irq_chip hpet_msi_type = { | 3648 | struct irq_chip hpet_msi_type = { |
@@ -3451,28 +3695,21 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
3451 | write_ht_irq_msg(irq, &msg); | 3695 | write_ht_irq_msg(irq, &msg); |
3452 | } | 3696 | } |
3453 | 3697 | ||
3454 | static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask) | 3698 | static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3455 | { | 3699 | { |
3700 | struct irq_desc *desc = irq_to_desc(irq); | ||
3456 | struct irq_cfg *cfg; | 3701 | struct irq_cfg *cfg; |
3457 | unsigned int dest; | 3702 | unsigned int dest; |
3458 | cpumask_t tmp; | ||
3459 | struct irq_desc *desc; | ||
3460 | 3703 | ||
3461 | cpus_and(tmp, mask, cpu_online_map); | 3704 | dest = set_desc_affinity(desc, mask); |
3462 | if (cpus_empty(tmp)) | 3705 | if (dest == BAD_APICID) |
3463 | return; | 3706 | return; |
3464 | 3707 | ||
3465 | if (assign_irq_vector(irq, mask)) | 3708 | cfg = desc->chip_data; |
3466 | return; | ||
3467 | |||
3468 | cfg = irq_cfg(irq); | ||
3469 | cpus_and(tmp, cfg->domain, mask); | ||
3470 | dest = cpu_mask_to_apicid(tmp); | ||
3471 | 3709 | ||
3472 | target_ht_irq(irq, dest, cfg->vector); | 3710 | target_ht_irq(irq, dest, cfg->vector); |
3473 | desc = irq_to_desc(irq); | ||
3474 | desc->affinity = mask; | ||
3475 | } | 3711 | } |
3712 | |||
3476 | #endif | 3713 | #endif |
3477 | 3714 | ||
3478 | static struct irq_chip ht_irq_chip = { | 3715 | static struct irq_chip ht_irq_chip = { |
@@ -3490,17 +3727,14 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3490 | { | 3727 | { |
3491 | struct irq_cfg *cfg; | 3728 | struct irq_cfg *cfg; |
3492 | int err; | 3729 | int err; |
3493 | cpumask_t tmp; | ||
3494 | 3730 | ||
3495 | tmp = TARGET_CPUS; | 3731 | cfg = irq_cfg(irq); |
3496 | err = assign_irq_vector(irq, tmp); | 3732 | err = assign_irq_vector(irq, cfg, TARGET_CPUS); |
3497 | if (!err) { | 3733 | if (!err) { |
3498 | struct ht_irq_msg msg; | 3734 | struct ht_irq_msg msg; |
3499 | unsigned dest; | 3735 | unsigned dest; |
3500 | 3736 | ||
3501 | cfg = irq_cfg(irq); | 3737 | dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); |
3502 | cpus_and(tmp, cfg->domain, tmp); | ||
3503 | dest = cpu_mask_to_apicid(tmp); | ||
3504 | 3738 | ||
3505 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); | 3739 | msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); |
3506 | 3740 | ||
@@ -3536,7 +3770,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev) | |||
3536 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | 3770 | int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, |
3537 | unsigned long mmr_offset) | 3771 | unsigned long mmr_offset) |
3538 | { | 3772 | { |
3539 | const cpumask_t *eligible_cpu = get_cpu_mask(cpu); | 3773 | const struct cpumask *eligible_cpu = cpumask_of(cpu); |
3540 | struct irq_cfg *cfg; | 3774 | struct irq_cfg *cfg; |
3541 | int mmr_pnode; | 3775 | int mmr_pnode; |
3542 | unsigned long mmr_value; | 3776 | unsigned long mmr_value; |
@@ -3544,7 +3778,9 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3544 | unsigned long flags; | 3778 | unsigned long flags; |
3545 | int err; | 3779 | int err; |
3546 | 3780 | ||
3547 | err = assign_irq_vector(irq, *eligible_cpu); | 3781 | cfg = irq_cfg(irq); |
3782 | |||
3783 | err = assign_irq_vector(irq, cfg, eligible_cpu); | ||
3548 | if (err != 0) | 3784 | if (err != 0) |
3549 | return err; | 3785 | return err; |
3550 | 3786 | ||
@@ -3553,8 +3789,6 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3553 | irq_name); | 3789 | irq_name); |
3554 | spin_unlock_irqrestore(&vector_lock, flags); | 3790 | spin_unlock_irqrestore(&vector_lock, flags); |
3555 | 3791 | ||
3556 | cfg = irq_cfg(irq); | ||
3557 | |||
3558 | mmr_value = 0; | 3792 | mmr_value = 0; |
3559 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | 3793 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; |
3560 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | 3794 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); |
@@ -3565,7 +3799,7 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3565 | entry->polarity = 0; | 3799 | entry->polarity = 0; |
3566 | entry->trigger = 0; | 3800 | entry->trigger = 0; |
3567 | entry->mask = 0; | 3801 | entry->mask = 0; |
3568 | entry->dest = cpu_mask_to_apicid(*eligible_cpu); | 3802 | entry->dest = cpu_mask_to_apicid(eligible_cpu); |
3569 | 3803 | ||
3570 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 3804 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
3571 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 3805 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
@@ -3606,9 +3840,16 @@ int __init io_apic_get_redir_entries (int ioapic) | |||
3606 | return reg_01.bits.entries; | 3840 | return reg_01.bits.entries; |
3607 | } | 3841 | } |
3608 | 3842 | ||
3609 | int __init probe_nr_irqs(void) | 3843 | void __init probe_nr_irqs_gsi(void) |
3610 | { | 3844 | { |
3611 | return NR_IRQS; | 3845 | int idx; |
3846 | int nr = 0; | ||
3847 | |||
3848 | for (idx = 0; idx < nr_ioapics; idx++) | ||
3849 | nr += io_apic_get_redir_entries(idx) + 1; | ||
3850 | |||
3851 | if (nr > nr_irqs_gsi) | ||
3852 | nr_irqs_gsi = nr; | ||
3612 | } | 3853 | } |
3613 | 3854 | ||
3614 | /* -------------------------------------------------------------------------- | 3855 | /* -------------------------------------------------------------------------- |
@@ -3707,19 +3948,31 @@ int __init io_apic_get_version(int ioapic) | |||
3707 | 3948 | ||
3708 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) | 3949 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) |
3709 | { | 3950 | { |
3951 | struct irq_desc *desc; | ||
3952 | struct irq_cfg *cfg; | ||
3953 | int cpu = boot_cpu_id; | ||
3954 | |||
3710 | if (!IO_APIC_IRQ(irq)) { | 3955 | if (!IO_APIC_IRQ(irq)) { |
3711 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", | 3956 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", |
3712 | ioapic); | 3957 | ioapic); |
3713 | return -EINVAL; | 3958 | return -EINVAL; |
3714 | } | 3959 | } |
3715 | 3960 | ||
3961 | desc = irq_to_desc_alloc_cpu(irq, cpu); | ||
3962 | if (!desc) { | ||
3963 | printk(KERN_INFO "can not get irq_desc %d\n", irq); | ||
3964 | return 0; | ||
3965 | } | ||
3966 | |||
3716 | /* | 3967 | /* |
3717 | * IRQs < 16 are already in the irq_2_pin[] map | 3968 | * IRQs < 16 are already in the irq_2_pin[] map |
3718 | */ | 3969 | */ |
3719 | if (irq >= 16) | 3970 | if (irq >= NR_IRQS_LEGACY) { |
3720 | add_pin_to_irq(irq, ioapic, pin); | 3971 | cfg = desc->chip_data; |
3972 | add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); | ||
3973 | } | ||
3721 | 3974 | ||
3722 | setup_IO_APIC_irq(ioapic, pin, irq, triggering, polarity); | 3975 | setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); |
3723 | 3976 | ||
3724 | return 0; | 3977 | return 0; |
3725 | } | 3978 | } |
@@ -3757,7 +4010,7 @@ void __init setup_ioapic_dest(void) | |||
3757 | int pin, ioapic, irq, irq_entry; | 4010 | int pin, ioapic, irq, irq_entry; |
3758 | struct irq_desc *desc; | 4011 | struct irq_desc *desc; |
3759 | struct irq_cfg *cfg; | 4012 | struct irq_cfg *cfg; |
3760 | cpumask_t mask; | 4013 | const struct cpumask *mask; |
3761 | 4014 | ||
3762 | if (skip_ioapic_setup == 1) | 4015 | if (skip_ioapic_setup == 1) |
3763 | return; | 4016 | return; |
@@ -3773,9 +4026,10 @@ void __init setup_ioapic_dest(void) | |||
3773 | * when you have too many devices, because at that time only boot | 4026 | * when you have too many devices, because at that time only boot |
3774 | * cpu is online. | 4027 | * cpu is online. |
3775 | */ | 4028 | */ |
3776 | cfg = irq_cfg(irq); | 4029 | desc = irq_to_desc(irq); |
4030 | cfg = desc->chip_data; | ||
3777 | if (!cfg->vector) { | 4031 | if (!cfg->vector) { |
3778 | setup_IO_APIC_irq(ioapic, pin, irq, | 4032 | setup_IO_APIC_irq(ioapic, pin, irq, desc, |
3779 | irq_trigger(irq_entry), | 4033 | irq_trigger(irq_entry), |
3780 | irq_polarity(irq_entry)); | 4034 | irq_polarity(irq_entry)); |
3781 | continue; | 4035 | continue; |
@@ -3785,19 +4039,18 @@ void __init setup_ioapic_dest(void) | |||
3785 | /* | 4039 | /* |
3786 | * Honour affinities which have been set in early boot | 4040 | * Honour affinities which have been set in early boot |
3787 | */ | 4041 | */ |
3788 | desc = irq_to_desc(irq); | ||
3789 | if (desc->status & | 4042 | if (desc->status & |
3790 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) | 4043 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) |
3791 | mask = desc->affinity; | 4044 | mask = &desc->affinity; |
3792 | else | 4045 | else |
3793 | mask = TARGET_CPUS; | 4046 | mask = TARGET_CPUS; |
3794 | 4047 | ||
3795 | #ifdef CONFIG_INTR_REMAP | 4048 | #ifdef CONFIG_INTR_REMAP |
3796 | if (intr_remapping_enabled) | 4049 | if (intr_remapping_enabled) |
3797 | set_ir_ioapic_affinity_irq(irq, mask); | 4050 | set_ir_ioapic_affinity_irq_desc(desc, mask); |
3798 | else | 4051 | else |
3799 | #endif | 4052 | #endif |
3800 | set_ioapic_affinity_irq(irq, mask); | 4053 | set_ioapic_affinity_irq_desc(desc, mask); |
3801 | } | 4054 | } |
3802 | 4055 | ||
3803 | } | 4056 | } |
@@ -3846,7 +4099,6 @@ void __init ioapic_init_mappings(void) | |||
3846 | struct resource *ioapic_res; | 4099 | struct resource *ioapic_res; |
3847 | int i; | 4100 | int i; |
3848 | 4101 | ||
3849 | irq_2_pin_init(); | ||
3850 | ioapic_res = ioapic_setup_resources(); | 4102 | ioapic_res = ioapic_setup_resources(); |
3851 | for (i = 0; i < nr_ioapics; i++) { | 4103 | for (i = 0; i < nr_ioapics; i++) { |
3852 | if (smp_found_config) { | 4104 | if (smp_found_config) { |
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index f1c688e46f35..285bbf8831fa 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c | |||
@@ -116,18 +116,18 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) | |||
116 | /* | 116 | /* |
117 | * This is only used on smaller machines. | 117 | * This is only used on smaller machines. |
118 | */ | 118 | */ |
119 | void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | 119 | void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) |
120 | { | 120 | { |
121 | unsigned long mask = cpus_addr(cpumask)[0]; | 121 | unsigned long mask = cpumask_bits(cpumask)[0]; |
122 | unsigned long flags; | 122 | unsigned long flags; |
123 | 123 | ||
124 | local_irq_save(flags); | 124 | local_irq_save(flags); |
125 | WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); | 125 | WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); |
126 | __send_IPI_dest_field(mask, vector); | 126 | __send_IPI_dest_field(mask, vector); |
127 | local_irq_restore(flags); | 127 | local_irq_restore(flags); |
128 | } | 128 | } |
129 | 129 | ||
130 | void send_IPI_mask_sequence(cpumask_t mask, int vector) | 130 | void send_IPI_mask_sequence(const struct cpumask *mask, int vector) |
131 | { | 131 | { |
132 | unsigned long flags; | 132 | unsigned long flags; |
133 | unsigned int query_cpu; | 133 | unsigned int query_cpu; |
@@ -139,12 +139,24 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) | |||
139 | */ | 139 | */ |
140 | 140 | ||
141 | local_irq_save(flags); | 141 | local_irq_save(flags); |
142 | for_each_possible_cpu(query_cpu) { | 142 | for_each_cpu(query_cpu, mask) |
143 | if (cpu_isset(query_cpu, mask)) { | 143 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector); |
144 | local_irq_restore(flags); | ||
145 | } | ||
146 | |||
147 | void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) | ||
148 | { | ||
149 | unsigned long flags; | ||
150 | unsigned int query_cpu; | ||
151 | unsigned int this_cpu = smp_processor_id(); | ||
152 | |||
153 | /* See Hack comment above */ | ||
154 | |||
155 | local_irq_save(flags); | ||
156 | for_each_cpu(query_cpu, mask) | ||
157 | if (query_cpu != this_cpu) | ||
144 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), | 158 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), |
145 | vector); | 159 | vector); |
146 | } | ||
147 | } | ||
148 | local_irq_restore(flags); | 160 | local_irq_restore(flags); |
149 | } | 161 | } |
150 | 162 | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index d1d4dc52f649..bce53e1352a0 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <asm/apic.h> | 9 | #include <asm/apic.h> |
10 | #include <asm/io_apic.h> | 10 | #include <asm/io_apic.h> |
11 | #include <asm/smp.h> | 11 | #include <asm/smp.h> |
12 | #include <asm/irq.h> | ||
12 | 13 | ||
13 | atomic_t irq_err_count; | 14 | atomic_t irq_err_count; |
14 | 15 | ||
@@ -118,6 +119,9 @@ int show_interrupts(struct seq_file *p, void *v) | |||
118 | } | 119 | } |
119 | 120 | ||
120 | desc = irq_to_desc(i); | 121 | desc = irq_to_desc(i); |
122 | if (!desc) | ||
123 | return 0; | ||
124 | |||
121 | spin_lock_irqsave(&desc->lock, flags); | 125 | spin_lock_irqsave(&desc->lock, flags); |
122 | #ifndef CONFIG_SMP | 126 | #ifndef CONFIG_SMP |
123 | any_count = kstat_irqs(i); | 127 | any_count = kstat_irqs(i); |
@@ -187,3 +191,5 @@ u64 arch_irq_stat(void) | |||
187 | #endif | 191 | #endif |
188 | return sum; | 192 | return sum; |
189 | } | 193 | } |
194 | |||
195 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | ||
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index a51382672de0..9dc5588f336a 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -233,25 +233,28 @@ unsigned int do_IRQ(struct pt_regs *regs) | |||
233 | #ifdef CONFIG_HOTPLUG_CPU | 233 | #ifdef CONFIG_HOTPLUG_CPU |
234 | #include <mach_apic.h> | 234 | #include <mach_apic.h> |
235 | 235 | ||
236 | void fixup_irqs(cpumask_t map) | 236 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
237 | void fixup_irqs(void) | ||
237 | { | 238 | { |
238 | unsigned int irq; | 239 | unsigned int irq; |
239 | static int warned; | 240 | static int warned; |
240 | struct irq_desc *desc; | 241 | struct irq_desc *desc; |
241 | 242 | ||
242 | for_each_irq_desc(irq, desc) { | 243 | for_each_irq_desc(irq, desc) { |
243 | cpumask_t mask; | 244 | const struct cpumask *affinity; |
244 | 245 | ||
246 | if (!desc) | ||
247 | continue; | ||
245 | if (irq == 2) | 248 | if (irq == 2) |
246 | continue; | 249 | continue; |
247 | 250 | ||
248 | cpus_and(mask, desc->affinity, map); | 251 | affinity = &desc->affinity; |
249 | if (any_online_cpu(mask) == NR_CPUS) { | 252 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { |
250 | printk("Breaking affinity for irq %i\n", irq); | 253 | printk("Breaking affinity for irq %i\n", irq); |
251 | mask = map; | 254 | affinity = cpu_all_mask; |
252 | } | 255 | } |
253 | if (desc->chip->set_affinity) | 256 | if (desc->chip->set_affinity) |
254 | desc->chip->set_affinity(irq, mask); | 257 | desc->chip->set_affinity(irq, affinity); |
255 | else if (desc->action && !(warned++)) | 258 | else if (desc->action && !(warned++)) |
256 | printk("Cannot set affinity for irq %i\n", irq); | 259 | printk("Cannot set affinity for irq %i\n", irq); |
257 | } | 260 | } |
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 60eb84eb77a0..6383d50f82ea 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c | |||
@@ -13,12 +13,12 @@ | |||
13 | #include <linux/seq_file.h> | 13 | #include <linux/seq_file.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/delay.h> | 15 | #include <linux/delay.h> |
16 | #include <linux/ftrace.h> | ||
16 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
17 | #include <asm/io_apic.h> | 18 | #include <asm/io_apic.h> |
18 | #include <asm/idle.h> | 19 | #include <asm/idle.h> |
19 | #include <asm/smp.h> | 20 | #include <asm/smp.h> |
20 | 21 | ||
21 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
22 | /* | 22 | /* |
23 | * Probabilistic stack overflow check: | 23 | * Probabilistic stack overflow check: |
24 | * | 24 | * |
@@ -28,26 +28,25 @@ | |||
28 | */ | 28 | */ |
29 | static inline void stack_overflow_check(struct pt_regs *regs) | 29 | static inline void stack_overflow_check(struct pt_regs *regs) |
30 | { | 30 | { |
31 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
31 | u64 curbase = (u64)task_stack_page(current); | 32 | u64 curbase = (u64)task_stack_page(current); |
32 | static unsigned long warned = -60*HZ; | 33 | |
33 | 34 | WARN_ONCE(regs->sp >= curbase && | |
34 | if (regs->sp >= curbase && regs->sp <= curbase + THREAD_SIZE && | 35 | regs->sp <= curbase + THREAD_SIZE && |
35 | regs->sp < curbase + sizeof(struct thread_info) + 128 && | 36 | regs->sp < curbase + sizeof(struct thread_info) + |
36 | time_after(jiffies, warned + 60*HZ)) { | 37 | sizeof(struct pt_regs) + 128, |
37 | printk("do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", | 38 | |
38 | current->comm, curbase, regs->sp); | 39 | "do_IRQ: %s near stack overflow (cur:%Lx,sp:%lx)\n", |
39 | show_stack(NULL,NULL); | 40 | current->comm, curbase, regs->sp); |
40 | warned = jiffies; | ||
41 | } | ||
42 | } | ||
43 | #endif | 41 | #endif |
42 | } | ||
44 | 43 | ||
45 | /* | 44 | /* |
46 | * do_IRQ handles all normal device IRQ's (the special | 45 | * do_IRQ handles all normal device IRQ's (the special |
47 | * SMP cross-CPU interrupts have their own specific | 46 | * SMP cross-CPU interrupts have their own specific |
48 | * handlers). | 47 | * handlers). |
49 | */ | 48 | */ |
50 | asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | 49 | asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs) |
51 | { | 50 | { |
52 | struct pt_regs *old_regs = set_irq_regs(regs); | 51 | struct pt_regs *old_regs = set_irq_regs(regs); |
53 | struct irq_desc *desc; | 52 | struct irq_desc *desc; |
@@ -60,9 +59,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
60 | irq_enter(); | 59 | irq_enter(); |
61 | irq = __get_cpu_var(vector_irq)[vector]; | 60 | irq = __get_cpu_var(vector_irq)[vector]; |
62 | 61 | ||
63 | #ifdef CONFIG_DEBUG_STACKOVERFLOW | ||
64 | stack_overflow_check(regs); | 62 | stack_overflow_check(regs); |
65 | #endif | ||
66 | 63 | ||
67 | desc = irq_to_desc(irq); | 64 | desc = irq_to_desc(irq); |
68 | if (likely(desc)) | 65 | if (likely(desc)) |
@@ -83,40 +80,43 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
83 | } | 80 | } |
84 | 81 | ||
85 | #ifdef CONFIG_HOTPLUG_CPU | 82 | #ifdef CONFIG_HOTPLUG_CPU |
86 | void fixup_irqs(cpumask_t map) | 83 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
84 | void fixup_irqs(void) | ||
87 | { | 85 | { |
88 | unsigned int irq; | 86 | unsigned int irq; |
89 | static int warned; | 87 | static int warned; |
90 | struct irq_desc *desc; | 88 | struct irq_desc *desc; |
91 | 89 | ||
92 | for_each_irq_desc(irq, desc) { | 90 | for_each_irq_desc(irq, desc) { |
93 | cpumask_t mask; | ||
94 | int break_affinity = 0; | 91 | int break_affinity = 0; |
95 | int set_affinity = 1; | 92 | int set_affinity = 1; |
93 | const struct cpumask *affinity; | ||
96 | 94 | ||
95 | if (!desc) | ||
96 | continue; | ||
97 | if (irq == 2) | 97 | if (irq == 2) |
98 | continue; | 98 | continue; |
99 | 99 | ||
100 | /* interrupt's are disabled at this point */ | 100 | /* interrupt's are disabled at this point */ |
101 | spin_lock(&desc->lock); | 101 | spin_lock(&desc->lock); |
102 | 102 | ||
103 | affinity = &desc->affinity; | ||
103 | if (!irq_has_action(irq) || | 104 | if (!irq_has_action(irq) || |
104 | cpus_equal(desc->affinity, map)) { | 105 | cpumask_equal(affinity, cpu_online_mask)) { |
105 | spin_unlock(&desc->lock); | 106 | spin_unlock(&desc->lock); |
106 | continue; | 107 | continue; |
107 | } | 108 | } |
108 | 109 | ||
109 | cpus_and(mask, desc->affinity, map); | 110 | if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { |
110 | if (cpus_empty(mask)) { | ||
111 | break_affinity = 1; | 111 | break_affinity = 1; |
112 | mask = map; | 112 | affinity = cpu_all_mask; |
113 | } | 113 | } |
114 | 114 | ||
115 | if (desc->chip->mask) | 115 | if (desc->chip->mask) |
116 | desc->chip->mask(irq); | 116 | desc->chip->mask(irq); |
117 | 117 | ||
118 | if (desc->chip->set_affinity) | 118 | if (desc->chip->set_affinity) |
119 | desc->chip->set_affinity(irq, mask); | 119 | desc->chip->set_affinity(irq, affinity); |
120 | else if (!(warned++)) | 120 | else if (!(warned++)) |
121 | set_affinity = 0; | 121 | set_affinity = 0; |
122 | 122 | ||
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 845aa9803e80..84723295f88a 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c | |||
@@ -68,8 +68,7 @@ void __init init_ISA_irqs (void) | |||
68 | /* | 68 | /* |
69 | * 16 old-style INTA-cycle interrupts: | 69 | * 16 old-style INTA-cycle interrupts: |
70 | */ | 70 | */ |
71 | for (i = 0; i < 16; i++) { | 71 | for (i = 0; i < NR_IRQS_LEGACY; i++) { |
72 | /* first time call this irq_desc */ | ||
73 | struct irq_desc *desc = irq_to_desc(i); | 72 | struct irq_desc *desc = irq_to_desc(i); |
74 | 73 | ||
75 | desc->status = IRQ_DISABLED; | 74 | desc->status = IRQ_DISABLED; |
@@ -111,6 +110,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | |||
111 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | 110 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 |
112 | }; | 111 | }; |
113 | 112 | ||
113 | int vector_used_by_percpu_irq(unsigned int vector) | ||
114 | { | ||
115 | int cpu; | ||
116 | |||
117 | for_each_online_cpu(cpu) { | ||
118 | if (per_cpu(vector_irq, cpu)[vector] != -1) | ||
119 | return 1; | ||
120 | } | ||
121 | |||
122 | return 0; | ||
123 | } | ||
124 | |||
114 | /* Overridden in paravirt.c */ | 125 | /* Overridden in paravirt.c */ |
115 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | 126 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); |
116 | 127 | ||
@@ -129,7 +140,7 @@ void __init native_init_IRQ(void) | |||
129 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | 140 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
130 | /* SYSCALL_VECTOR was reserved in trap_init. */ | 141 | /* SYSCALL_VECTOR was reserved in trap_init. */ |
131 | if (i != SYSCALL_VECTOR) | 142 | if (i != SYSCALL_VECTOR) |
132 | set_intr_gate(i, interrupt[i]); | 143 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); |
133 | } | 144 | } |
134 | 145 | ||
135 | 146 | ||
@@ -147,10 +158,12 @@ void __init native_init_IRQ(void) | |||
147 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 158 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
148 | 159 | ||
149 | /* IPI for single call function */ | 160 | /* IPI for single call function */ |
150 | set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt); | 161 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, |
162 | call_function_single_interrupt); | ||
151 | 163 | ||
152 | /* Low priority IPI to cleanup after moving an irq */ | 164 | /* Low priority IPI to cleanup after moving an irq */ |
153 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 165 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
166 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | ||
154 | #endif | 167 | #endif |
155 | 168 | ||
156 | #ifdef CONFIG_X86_LOCAL_APIC | 169 | #ifdef CONFIG_X86_LOCAL_APIC |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ff0235391285..31ebfe38e96c 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -24,41 +24,6 @@ | |||
24 | #include <asm/i8259.h> | 24 | #include <asm/i8259.h> |
25 | 25 | ||
26 | /* | 26 | /* |
27 | * Common place to define all x86 IRQ vectors | ||
28 | * | ||
29 | * This builds up the IRQ handler stubs using some ugly macros in irq.h | ||
30 | * | ||
31 | * These macros create the low-level assembly IRQ routines that save | ||
32 | * register context and call do_IRQ(). do_IRQ() then does all the | ||
33 | * operations that are needed to keep the AT (or SMP IOAPIC) | ||
34 | * interrupt-controller happy. | ||
35 | */ | ||
36 | |||
37 | #define IRQ_NAME2(nr) nr##_interrupt(void) | ||
38 | #define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) | ||
39 | |||
40 | /* | ||
41 | * SMP has a few special interrupts for IPI messages | ||
42 | */ | ||
43 | |||
44 | #define BUILD_IRQ(nr) \ | ||
45 | asmlinkage void IRQ_NAME(nr); \ | ||
46 | asm("\n.text\n.p2align\n" \ | ||
47 | "IRQ" #nr "_interrupt:\n\t" \ | ||
48 | "push $~(" #nr ") ; " \ | ||
49 | "jmp common_interrupt\n" \ | ||
50 | ".previous"); | ||
51 | |||
52 | #define BI(x,y) \ | ||
53 | BUILD_IRQ(x##y) | ||
54 | |||
55 | #define BUILD_16_IRQS(x) \ | ||
56 | BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ | ||
57 | BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ | ||
58 | BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ | ||
59 | BI(x,c) BI(x,d) BI(x,e) BI(x,f) | ||
60 | |||
61 | /* | ||
62 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | 27 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: |
63 | * (these are usually mapped to vectors 0x30-0x3f) | 28 | * (these are usually mapped to vectors 0x30-0x3f) |
64 | */ | 29 | */ |
@@ -73,37 +38,6 @@ | |||
73 | * | 38 | * |
74 | * (these are usually mapped into the 0x30-0xff vector range) | 39 | * (these are usually mapped into the 0x30-0xff vector range) |
75 | */ | 40 | */ |
76 | BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) | ||
77 | BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) | ||
78 | BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) | ||
79 | BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) | ||
80 | |||
81 | #undef BUILD_16_IRQS | ||
82 | #undef BI | ||
83 | |||
84 | |||
85 | #define IRQ(x,y) \ | ||
86 | IRQ##x##y##_interrupt | ||
87 | |||
88 | #define IRQLIST_16(x) \ | ||
89 | IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ | ||
90 | IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ | ||
91 | IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ | ||
92 | IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) | ||
93 | |||
94 | /* for the irq vectors */ | ||
95 | static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { | ||
96 | IRQLIST_16(0x2), IRQLIST_16(0x3), | ||
97 | IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), | ||
98 | IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), | ||
99 | IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) | ||
100 | }; | ||
101 | |||
102 | #undef IRQ | ||
103 | #undef IRQLIST_16 | ||
104 | |||
105 | |||
106 | |||
107 | 41 | ||
108 | /* | 42 | /* |
109 | * IRQ2 is cascade interrupt to second interrupt controller | 43 | * IRQ2 is cascade interrupt to second interrupt controller |
@@ -135,6 +69,18 @@ DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | |||
135 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | 69 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 |
136 | }; | 70 | }; |
137 | 71 | ||
72 | int vector_used_by_percpu_irq(unsigned int vector) | ||
73 | { | ||
74 | int cpu; | ||
75 | |||
76 | for_each_online_cpu(cpu) { | ||
77 | if (per_cpu(vector_irq, cpu)[vector] != -1) | ||
78 | return 1; | ||
79 | } | ||
80 | |||
81 | return 0; | ||
82 | } | ||
83 | |||
138 | void __init init_ISA_irqs(void) | 84 | void __init init_ISA_irqs(void) |
139 | { | 85 | { |
140 | int i; | 86 | int i; |
@@ -142,8 +88,7 @@ void __init init_ISA_irqs(void) | |||
142 | init_bsp_APIC(); | 88 | init_bsp_APIC(); |
143 | init_8259A(0); | 89 | init_8259A(0); |
144 | 90 | ||
145 | for (i = 0; i < 16; i++) { | 91 | for (i = 0; i < NR_IRQS_LEGACY; i++) { |
146 | /* first time call this irq_desc */ | ||
147 | struct irq_desc *desc = irq_to_desc(i); | 92 | struct irq_desc *desc = irq_to_desc(i); |
148 | 93 | ||
149 | desc->status = IRQ_DISABLED; | 94 | desc->status = IRQ_DISABLED; |
@@ -188,6 +133,7 @@ static void __init smp_intr_init(void) | |||
188 | 133 | ||
189 | /* Low priority IPI to cleanup after moving an irq */ | 134 | /* Low priority IPI to cleanup after moving an irq */ |
190 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 135 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
136 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | ||
191 | #endif | 137 | #endif |
192 | } | 138 | } |
193 | 139 | ||
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 6c27679ec6aa..884d985b8b82 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -376,9 +376,10 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) | |||
376 | 376 | ||
377 | void __kprobes arch_remove_kprobe(struct kprobe *p) | 377 | void __kprobes arch_remove_kprobe(struct kprobe *p) |
378 | { | 378 | { |
379 | mutex_lock(&kprobe_mutex); | 379 | if (p->ainsn.insn) { |
380 | free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); | 380 | free_insn_slot(p->ainsn.insn, (p->ainsn.boostable == 1)); |
381 | mutex_unlock(&kprobe_mutex); | 381 | p->ainsn.insn = NULL; |
382 | } | ||
382 | } | 383 | } |
383 | 384 | ||
384 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) | 385 | static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) |
@@ -694,7 +695,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
694 | /* | 695 | /* |
695 | * It is possible to have multiple instances associated with a given | 696 | * It is possible to have multiple instances associated with a given |
696 | * task either because multiple functions in the call path have | 697 | * task either because multiple functions in the call path have |
697 | * return probes installed on them, and/or more then one | 698 | * return probes installed on them, and/or more than one |
698 | * return probe was registered for a target function. | 699 | * return probe was registered for a target function. |
699 | * | 700 | * |
700 | * We can handle this because: | 701 | * We can handle this because: |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e169ae9b6a62..652fce6d2cce 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -89,17 +89,17 @@ static cycle_t kvm_clock_read(void) | |||
89 | */ | 89 | */ |
90 | static unsigned long kvm_get_tsc_khz(void) | 90 | static unsigned long kvm_get_tsc_khz(void) |
91 | { | 91 | { |
92 | return preset_lpj; | 92 | struct pvclock_vcpu_time_info *src; |
93 | src = &per_cpu(hv_clock, 0); | ||
94 | return pvclock_tsc_khz(src); | ||
93 | } | 95 | } |
94 | 96 | ||
95 | static void kvm_get_preset_lpj(void) | 97 | static void kvm_get_preset_lpj(void) |
96 | { | 98 | { |
97 | struct pvclock_vcpu_time_info *src; | ||
98 | unsigned long khz; | 99 | unsigned long khz; |
99 | u64 lpj; | 100 | u64 lpj; |
100 | 101 | ||
101 | src = &per_cpu(hv_clock, 0); | 102 | khz = kvm_get_tsc_khz(); |
102 | khz = pvclock_tsc_khz(src); | ||
103 | 103 | ||
104 | lpj = ((u64)khz * 1000); | 104 | lpj = ((u64)khz * 1000); |
105 | do_div(lpj, HZ); | 105 | do_div(lpj, HZ); |
@@ -194,5 +194,7 @@ void __init kvmclock_init(void) | |||
194 | #endif | 194 | #endif |
195 | kvm_get_preset_lpj(); | 195 | kvm_get_preset_lpj(); |
196 | clocksource_register(&kvm_clock); | 196 | clocksource_register(&kvm_clock); |
197 | pv_info.paravirt_enabled = 1; | ||
198 | pv_info.name = "KVM"; | ||
197 | } | 199 | } |
198 | } | 200 | } |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index eee32b43fee3..71f1d99a635d 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -12,8 +12,8 @@ | |||
12 | #include <linux/mm.h> | 12 | #include <linux/mm.h> |
13 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
14 | #include <linux/vmalloc.h> | 14 | #include <linux/vmalloc.h> |
15 | #include <linux/uaccess.h> | ||
15 | 16 | ||
16 | #include <asm/uaccess.h> | ||
17 | #include <asm/system.h> | 17 | #include <asm/system.h> |
18 | #include <asm/ldt.h> | 18 | #include <asm/ldt.h> |
19 | #include <asm/desc.h> | 19 | #include <asm/desc.h> |
@@ -93,7 +93,7 @@ static inline int copy_ldt(mm_context_t *new, mm_context_t *old) | |||
93 | if (err < 0) | 93 | if (err < 0) |
94 | return err; | 94 | return err; |
95 | 95 | ||
96 | for(i = 0; i < old->size; i++) | 96 | for (i = 0; i < old->size; i++) |
97 | write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); | 97 | write_ldt_entry(new->ldt, i, old->ldt + i * LDT_ENTRY_SIZE); |
98 | return 0; | 98 | return 0; |
99 | } | 99 | } |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 7a385746509a..37f420018a41 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
15 | #include <linux/suspend.h> | 15 | #include <linux/suspend.h> |
16 | #include <linux/gfp.h> | ||
16 | 17 | ||
17 | #include <asm/pgtable.h> | 18 | #include <asm/pgtable.h> |
18 | #include <asm/pgalloc.h> | 19 | #include <asm/pgalloc.h> |
@@ -25,15 +26,6 @@ | |||
25 | #include <asm/system.h> | 26 | #include <asm/system.h> |
26 | #include <asm/cacheflush.h> | 27 | #include <asm/cacheflush.h> |
27 | 28 | ||
28 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | ||
29 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | ||
30 | #ifdef CONFIG_X86_PAE | ||
31 | static u32 kexec_pmd0[1024] PAGE_ALIGNED; | ||
32 | static u32 kexec_pmd1[1024] PAGE_ALIGNED; | ||
33 | #endif | ||
34 | static u32 kexec_pte0[1024] PAGE_ALIGNED; | ||
35 | static u32 kexec_pte1[1024] PAGE_ALIGNED; | ||
36 | |||
37 | static void set_idt(void *newidt, __u16 limit) | 29 | static void set_idt(void *newidt, __u16 limit) |
38 | { | 30 | { |
39 | struct desc_ptr curidt; | 31 | struct desc_ptr curidt; |
@@ -76,6 +68,76 @@ static void load_segments(void) | |||
76 | #undef __STR | 68 | #undef __STR |
77 | } | 69 | } |
78 | 70 | ||
71 | static void machine_kexec_free_page_tables(struct kimage *image) | ||
72 | { | ||
73 | free_page((unsigned long)image->arch.pgd); | ||
74 | #ifdef CONFIG_X86_PAE | ||
75 | free_page((unsigned long)image->arch.pmd0); | ||
76 | free_page((unsigned long)image->arch.pmd1); | ||
77 | #endif | ||
78 | free_page((unsigned long)image->arch.pte0); | ||
79 | free_page((unsigned long)image->arch.pte1); | ||
80 | } | ||
81 | |||
82 | static int machine_kexec_alloc_page_tables(struct kimage *image) | ||
83 | { | ||
84 | image->arch.pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); | ||
85 | #ifdef CONFIG_X86_PAE | ||
86 | image->arch.pmd0 = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
87 | image->arch.pmd1 = (pmd_t *)get_zeroed_page(GFP_KERNEL); | ||
88 | #endif | ||
89 | image->arch.pte0 = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
90 | image->arch.pte1 = (pte_t *)get_zeroed_page(GFP_KERNEL); | ||
91 | if (!image->arch.pgd || | ||
92 | #ifdef CONFIG_X86_PAE | ||
93 | !image->arch.pmd0 || !image->arch.pmd1 || | ||
94 | #endif | ||
95 | !image->arch.pte0 || !image->arch.pte1) { | ||
96 | machine_kexec_free_page_tables(image); | ||
97 | return -ENOMEM; | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | static void machine_kexec_page_table_set_one( | ||
103 | pgd_t *pgd, pmd_t *pmd, pte_t *pte, | ||
104 | unsigned long vaddr, unsigned long paddr) | ||
105 | { | ||
106 | pud_t *pud; | ||
107 | |||
108 | pgd += pgd_index(vaddr); | ||
109 | #ifdef CONFIG_X86_PAE | ||
110 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) | ||
111 | set_pgd(pgd, __pgd(__pa(pmd) | _PAGE_PRESENT)); | ||
112 | #endif | ||
113 | pud = pud_offset(pgd, vaddr); | ||
114 | pmd = pmd_offset(pud, vaddr); | ||
115 | if (!(pmd_val(*pmd) & _PAGE_PRESENT)) | ||
116 | set_pmd(pmd, __pmd(__pa(pte) | _PAGE_TABLE)); | ||
117 | pte = pte_offset_kernel(pmd, vaddr); | ||
118 | set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC)); | ||
119 | } | ||
120 | |||
121 | static void machine_kexec_prepare_page_tables(struct kimage *image) | ||
122 | { | ||
123 | void *control_page; | ||
124 | pmd_t *pmd = 0; | ||
125 | |||
126 | control_page = page_address(image->control_code_page); | ||
127 | #ifdef CONFIG_X86_PAE | ||
128 | pmd = image->arch.pmd0; | ||
129 | #endif | ||
130 | machine_kexec_page_table_set_one( | ||
131 | image->arch.pgd, pmd, image->arch.pte0, | ||
132 | (unsigned long)control_page, __pa(control_page)); | ||
133 | #ifdef CONFIG_X86_PAE | ||
134 | pmd = image->arch.pmd1; | ||
135 | #endif | ||
136 | machine_kexec_page_table_set_one( | ||
137 | image->arch.pgd, pmd, image->arch.pte1, | ||
138 | __pa(control_page), __pa(control_page)); | ||
139 | } | ||
140 | |||
79 | /* | 141 | /* |
80 | * A architecture hook called to validate the | 142 | * A architecture hook called to validate the |
81 | * proposed image and prepare the control pages | 143 | * proposed image and prepare the control pages |
@@ -87,12 +149,20 @@ static void load_segments(void) | |||
87 | * reboot code buffer to allow us to avoid allocations | 149 | * reboot code buffer to allow us to avoid allocations |
88 | * later. | 150 | * later. |
89 | * | 151 | * |
90 | * Make control page executable. | 152 | * - Make control page executable. |
153 | * - Allocate page tables | ||
154 | * - Setup page tables | ||
91 | */ | 155 | */ |
92 | int machine_kexec_prepare(struct kimage *image) | 156 | int machine_kexec_prepare(struct kimage *image) |
93 | { | 157 | { |
158 | int error; | ||
159 | |||
94 | if (nx_enabled) | 160 | if (nx_enabled) |
95 | set_pages_x(image->control_code_page, 1); | 161 | set_pages_x(image->control_code_page, 1); |
162 | error = machine_kexec_alloc_page_tables(image); | ||
163 | if (error) | ||
164 | return error; | ||
165 | machine_kexec_prepare_page_tables(image); | ||
96 | return 0; | 166 | return 0; |
97 | } | 167 | } |
98 | 168 | ||
@@ -104,6 +174,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
104 | { | 174 | { |
105 | if (nx_enabled) | 175 | if (nx_enabled) |
106 | set_pages_nx(image->control_code_page, 1); | 176 | set_pages_nx(image->control_code_page, 1); |
177 | machine_kexec_free_page_tables(image); | ||
107 | } | 178 | } |
108 | 179 | ||
109 | /* | 180 | /* |
@@ -150,18 +221,7 @@ void machine_kexec(struct kimage *image) | |||
150 | relocate_kernel_ptr = control_page; | 221 | relocate_kernel_ptr = control_page; |
151 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 222 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
152 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; | 223 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
153 | page_list[PA_PGD] = __pa(kexec_pgd); | 224 | page_list[PA_PGD] = __pa(image->arch.pgd); |
154 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | ||
155 | #ifdef CONFIG_X86_PAE | ||
156 | page_list[PA_PMD_0] = __pa(kexec_pmd0); | ||
157 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; | ||
158 | page_list[PA_PMD_1] = __pa(kexec_pmd1); | ||
159 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
160 | #endif | ||
161 | page_list[PA_PTE_0] = __pa(kexec_pte0); | ||
162 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | ||
163 | page_list[PA_PTE_1] = __pa(kexec_pte1); | ||
164 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
165 | 225 | ||
166 | if (image->type == KEXEC_TYPE_DEFAULT) | 226 | if (image->type == KEXEC_TYPE_DEFAULT) |
167 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) | 227 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 3b599518c322..8815f3c7fec7 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -252,7 +252,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | |||
252 | /* | 252 | /* |
253 | * The MFPGT timers on the CS5536 provide us with suitable timers to use | 253 | * The MFPGT timers on the CS5536 provide us with suitable timers to use |
254 | * as clock event sources - not as good as a HPET or APIC, but certainly | 254 | * as clock event sources - not as good as a HPET or APIC, but certainly |
255 | * better then the PIT. This isn't a general purpose MFGPT driver, but | 255 | * better than the PIT. This isn't a general purpose MFGPT driver, but |
256 | * a simplified one designed specifically to act as a clock event source. | 256 | * a simplified one designed specifically to act as a clock event source. |
257 | * For full details about the MFGPT, please consult the CS5536 data sheet. | 257 | * For full details about the MFGPT, please consult the CS5536 data sheet. |
258 | */ | 258 | */ |
@@ -287,7 +287,7 @@ static struct clock_event_device mfgpt_clockevent = { | |||
287 | .set_mode = mfgpt_set_mode, | 287 | .set_mode = mfgpt_set_mode, |
288 | .set_next_event = mfgpt_next_event, | 288 | .set_next_event = mfgpt_next_event, |
289 | .rating = 250, | 289 | .rating = 250, |
290 | .cpumask = CPU_MASK_ALL, | 290 | .cpumask = cpu_all_mask, |
291 | .shift = 32 | 291 | .shift = 32 |
292 | }; | 292 | }; |
293 | 293 | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 5f8e5d75a254..c25fdb382292 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * This driver allows to upgrade microcode on AMD | 10 | * This driver allows to upgrade microcode on AMD |
11 | * family 0x10 and 0x11 processors. | 11 | * family 0x10 and 0x11 processors. |
12 | * | 12 | * |
13 | * Licensed unter the terms of the GNU General Public | 13 | * Licensed under the terms of the GNU General Public |
14 | * License version 2. See file COPYING for details. | 14 | * License version 2. See file COPYING for details. |
15 | */ | 15 | */ |
16 | 16 | ||
@@ -32,9 +32,9 @@ | |||
32 | #include <linux/platform_device.h> | 32 | #include <linux/platform_device.h> |
33 | #include <linux/pci.h> | 33 | #include <linux/pci.h> |
34 | #include <linux/pci_ids.h> | 34 | #include <linux/pci_ids.h> |
35 | #include <linux/uaccess.h> | ||
35 | 36 | ||
36 | #include <asm/msr.h> | 37 | #include <asm/msr.h> |
37 | #include <asm/uaccess.h> | ||
38 | #include <asm/processor.h> | 38 | #include <asm/processor.h> |
39 | #include <asm/microcode.h> | 39 | #include <asm/microcode.h> |
40 | 40 | ||
@@ -47,43 +47,38 @@ MODULE_LICENSE("GPL v2"); | |||
47 | #define UCODE_UCODE_TYPE 0x00000001 | 47 | #define UCODE_UCODE_TYPE 0x00000001 |
48 | 48 | ||
49 | struct equiv_cpu_entry { | 49 | struct equiv_cpu_entry { |
50 | unsigned int installed_cpu; | 50 | u32 installed_cpu; |
51 | unsigned int fixed_errata_mask; | 51 | u32 fixed_errata_mask; |
52 | unsigned int fixed_errata_compare; | 52 | u32 fixed_errata_compare; |
53 | unsigned int equiv_cpu; | 53 | u16 equiv_cpu; |
54 | }; | 54 | u16 res; |
55 | } __attribute__((packed)); | ||
55 | 56 | ||
56 | struct microcode_header_amd { | 57 | struct microcode_header_amd { |
57 | unsigned int data_code; | 58 | u32 data_code; |
58 | unsigned int patch_id; | 59 | u32 patch_id; |
59 | unsigned char mc_patch_data_id[2]; | 60 | u16 mc_patch_data_id; |
60 | unsigned char mc_patch_data_len; | 61 | u8 mc_patch_data_len; |
61 | unsigned char init_flag; | 62 | u8 init_flag; |
62 | unsigned int mc_patch_data_checksum; | 63 | u32 mc_patch_data_checksum; |
63 | unsigned int nb_dev_id; | 64 | u32 nb_dev_id; |
64 | unsigned int sb_dev_id; | 65 | u32 sb_dev_id; |
65 | unsigned char processor_rev_id[2]; | 66 | u16 processor_rev_id; |
66 | unsigned char nb_rev_id; | 67 | u8 nb_rev_id; |
67 | unsigned char sb_rev_id; | 68 | u8 sb_rev_id; |
68 | unsigned char bios_api_rev; | 69 | u8 bios_api_rev; |
69 | unsigned char reserved1[3]; | 70 | u8 reserved1[3]; |
70 | unsigned int match_reg[8]; | 71 | u32 match_reg[8]; |
71 | }; | 72 | } __attribute__((packed)); |
72 | 73 | ||
73 | struct microcode_amd { | 74 | struct microcode_amd { |
74 | struct microcode_header_amd hdr; | 75 | struct microcode_header_amd hdr; |
75 | unsigned int mpb[0]; | 76 | unsigned int mpb[0]; |
76 | }; | 77 | }; |
77 | 78 | ||
78 | #define UCODE_MAX_SIZE (2048) | 79 | #define UCODE_MAX_SIZE 2048 |
79 | #define DEFAULT_UCODE_DATASIZE (896) | 80 | #define UCODE_CONTAINER_SECTION_HDR 8 |
80 | #define MC_HEADER_SIZE (sizeof(struct microcode_header_amd)) | 81 | #define UCODE_CONTAINER_HEADER_SIZE 12 |
81 | #define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) | ||
82 | #define DWSIZE (sizeof(u32)) | ||
83 | /* For now we support a fixed ucode total size only */ | ||
84 | #define get_totalsize(mc) \ | ||
85 | ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \ | ||
86 | + MC_HEADER_SIZE) | ||
87 | 82 | ||
88 | /* serialize access to the physical write */ | 83 | /* serialize access to the physical write */ |
89 | static DEFINE_SPINLOCK(microcode_update_lock); | 84 | static DEFINE_SPINLOCK(microcode_update_lock); |
@@ -93,31 +88,24 @@ static struct equiv_cpu_entry *equiv_cpu_table; | |||
93 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 88 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
94 | { | 89 | { |
95 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 90 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
91 | u32 dummy; | ||
96 | 92 | ||
97 | memset(csig, 0, sizeof(*csig)); | 93 | memset(csig, 0, sizeof(*csig)); |
98 | |||
99 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { | 94 | if (c->x86_vendor != X86_VENDOR_AMD || c->x86 < 0x10) { |
100 | printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n", | 95 | printk(KERN_WARNING "microcode: CPU%d: AMD CPU family 0x%x not " |
101 | cpu); | 96 | "supported\n", cpu, c->x86); |
102 | return -1; | 97 | return -1; |
103 | } | 98 | } |
104 | 99 | rdmsr(MSR_AMD64_PATCH_LEVEL, csig->rev, dummy); | |
105 | asm volatile("movl %1, %%ecx; rdmsr" | 100 | printk(KERN_INFO "microcode: CPU%d: patch_level=0x%x\n", cpu, csig->rev); |
106 | : "=a" (csig->rev) | ||
107 | : "i" (0x0000008B) : "ecx"); | ||
108 | |||
109 | printk(KERN_INFO "microcode: collect_cpu_info_amd : patch_id=0x%x\n", | ||
110 | csig->rev); | ||
111 | |||
112 | return 0; | 101 | return 0; |
113 | } | 102 | } |
114 | 103 | ||
115 | static int get_matching_microcode(int cpu, void *mc, int rev) | 104 | static int get_matching_microcode(int cpu, void *mc, int rev) |
116 | { | 105 | { |
117 | struct microcode_header_amd *mc_header = mc; | 106 | struct microcode_header_amd *mc_header = mc; |
118 | struct pci_dev *nb_pci_dev, *sb_pci_dev; | ||
119 | unsigned int current_cpu_id; | 107 | unsigned int current_cpu_id; |
120 | unsigned int equiv_cpu_id = 0x00; | 108 | u16 equiv_cpu_id = 0; |
121 | unsigned int i = 0; | 109 | unsigned int i = 0; |
122 | 110 | ||
123 | BUG_ON(equiv_cpu_table == NULL); | 111 | BUG_ON(equiv_cpu_table == NULL); |
@@ -132,57 +120,25 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
132 | } | 120 | } |
133 | 121 | ||
134 | if (!equiv_cpu_id) { | 122 | if (!equiv_cpu_id) { |
135 | printk(KERN_ERR "microcode: CPU%d cpu_id " | 123 | printk(KERN_WARNING "microcode: CPU%d: cpu revision " |
136 | "not found in equivalent cpu table \n", cpu); | 124 | "not listed in equivalent cpu table\n", cpu); |
137 | return 0; | 125 | return 0; |
138 | } | 126 | } |
139 | 127 | ||
140 | if ((mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff)) { | 128 | if (mc_header->processor_rev_id != equiv_cpu_id) { |
141 | printk(KERN_ERR | 129 | printk(KERN_ERR "microcode: CPU%d: patch mismatch " |
142 | "microcode: CPU%d patch does not match " | 130 | "(processor_rev_id: %x, equiv_cpu_id: %x)\n", |
143 | "(patch is %x, cpu extended is %x) \n", | 131 | cpu, mc_header->processor_rev_id, equiv_cpu_id); |
144 | cpu, mc_header->processor_rev_id[0], | ||
145 | (equiv_cpu_id & 0xff)); | ||
146 | return 0; | 132 | return 0; |
147 | } | 133 | } |
148 | 134 | ||
149 | if ((mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff)) { | 135 | /* ucode might be chipset specific -- currently we don't support this */ |
150 | printk(KERN_ERR "microcode: CPU%d patch does not match " | 136 | if (mc_header->nb_dev_id || mc_header->sb_dev_id) { |
151 | "(patch is %x, cpu base id is %x) \n", | 137 | printk(KERN_ERR "microcode: CPU%d: loading of chipset " |
152 | cpu, mc_header->processor_rev_id[1], | 138 | "specific code not yet supported\n", cpu); |
153 | ((equiv_cpu_id >> 16) & 0xff)); | ||
154 | |||
155 | return 0; | 139 | return 0; |
156 | } | 140 | } |
157 | 141 | ||
158 | /* ucode may be northbridge specific */ | ||
159 | if (mc_header->nb_dev_id) { | ||
160 | nb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
161 | (mc_header->nb_dev_id & 0xff), | ||
162 | NULL); | ||
163 | if ((!nb_pci_dev) || | ||
164 | (mc_header->nb_rev_id != nb_pci_dev->revision)) { | ||
165 | printk(KERN_ERR "microcode: CPU%d NB mismatch \n", cpu); | ||
166 | pci_dev_put(nb_pci_dev); | ||
167 | return 0; | ||
168 | } | ||
169 | pci_dev_put(nb_pci_dev); | ||
170 | } | ||
171 | |||
172 | /* ucode may be southbridge specific */ | ||
173 | if (mc_header->sb_dev_id) { | ||
174 | sb_pci_dev = pci_get_device(PCI_VENDOR_ID_AMD, | ||
175 | (mc_header->sb_dev_id & 0xff), | ||
176 | NULL); | ||
177 | if ((!sb_pci_dev) || | ||
178 | (mc_header->sb_rev_id != sb_pci_dev->revision)) { | ||
179 | printk(KERN_ERR "microcode: CPU%d SB mismatch \n", cpu); | ||
180 | pci_dev_put(sb_pci_dev); | ||
181 | return 0; | ||
182 | } | ||
183 | pci_dev_put(sb_pci_dev); | ||
184 | } | ||
185 | |||
186 | if (mc_header->patch_id <= rev) | 142 | if (mc_header->patch_id <= rev) |
187 | return 0; | 143 | return 0; |
188 | 144 | ||
@@ -192,12 +148,10 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
192 | static void apply_microcode_amd(int cpu) | 148 | static void apply_microcode_amd(int cpu) |
193 | { | 149 | { |
194 | unsigned long flags; | 150 | unsigned long flags; |
195 | unsigned int eax, edx; | 151 | u32 rev, dummy; |
196 | unsigned int rev; | ||
197 | int cpu_num = raw_smp_processor_id(); | 152 | int cpu_num = raw_smp_processor_id(); |
198 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | 153 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; |
199 | struct microcode_amd *mc_amd = uci->mc; | 154 | struct microcode_amd *mc_amd = uci->mc; |
200 | unsigned long addr; | ||
201 | 155 | ||
202 | /* We should bind the task to the CPU */ | 156 | /* We should bind the task to the CPU */ |
203 | BUG_ON(cpu_num != cpu); | 157 | BUG_ON(cpu_num != cpu); |
@@ -206,42 +160,34 @@ static void apply_microcode_amd(int cpu) | |||
206 | return; | 160 | return; |
207 | 161 | ||
208 | spin_lock_irqsave(µcode_update_lock, flags); | 162 | spin_lock_irqsave(µcode_update_lock, flags); |
209 | 163 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | |
210 | addr = (unsigned long)&mc_amd->hdr.data_code; | ||
211 | edx = (unsigned int)(((unsigned long)upper_32_bits(addr))); | ||
212 | eax = (unsigned int)(((unsigned long)lower_32_bits(addr))); | ||
213 | |||
214 | asm volatile("movl %0, %%ecx; wrmsr" : | ||
215 | : "i" (0xc0010020), "a" (eax), "d" (edx) : "ecx"); | ||
216 | |||
217 | /* get patch id after patching */ | 164 | /* get patch id after patching */ |
218 | asm volatile("movl %1, %%ecx; rdmsr" | 165 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
219 | : "=a" (rev) | ||
220 | : "i" (0x0000008B) : "ecx"); | ||
221 | |||
222 | spin_unlock_irqrestore(µcode_update_lock, flags); | 166 | spin_unlock_irqrestore(µcode_update_lock, flags); |
223 | 167 | ||
224 | /* check current patch id and patch's id for match */ | 168 | /* check current patch id and patch's id for match */ |
225 | if (rev != mc_amd->hdr.patch_id) { | 169 | if (rev != mc_amd->hdr.patch_id) { |
226 | printk(KERN_ERR "microcode: CPU%d update from revision " | 170 | printk(KERN_ERR "microcode: CPU%d: update failed " |
227 | "0x%x to 0x%x failed\n", cpu_num, | 171 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); |
228 | mc_amd->hdr.patch_id, rev); | ||
229 | return; | 172 | return; |
230 | } | 173 | } |
231 | 174 | ||
232 | printk(KERN_INFO "microcode: CPU%d updated from revision " | 175 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", |
233 | "0x%x to 0x%x \n", | 176 | cpu, rev); |
234 | cpu_num, uci->cpu_sig.rev, mc_amd->hdr.patch_id); | ||
235 | 177 | ||
236 | uci->cpu_sig.rev = rev; | 178 | uci->cpu_sig.rev = rev; |
237 | } | 179 | } |
238 | 180 | ||
239 | static void * get_next_ucode(u8 *buf, unsigned int size, | 181 | static int get_ucode_data(void *to, const u8 *from, size_t n) |
240 | int (*get_ucode_data)(void *, const void *, size_t), | 182 | { |
241 | unsigned int *mc_size) | 183 | memcpy(to, from, n); |
184 | return 0; | ||
185 | } | ||
186 | |||
187 | static void *get_next_ucode(const u8 *buf, unsigned int size, | ||
188 | unsigned int *mc_size) | ||
242 | { | 189 | { |
243 | unsigned int total_size; | 190 | unsigned int total_size; |
244 | #define UCODE_CONTAINER_SECTION_HDR 8 | ||
245 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; | 191 | u8 section_hdr[UCODE_CONTAINER_SECTION_HDR]; |
246 | void *mc; | 192 | void *mc; |
247 | 193 | ||
@@ -249,39 +195,37 @@ static void * get_next_ucode(u8 *buf, unsigned int size, | |||
249 | return NULL; | 195 | return NULL; |
250 | 196 | ||
251 | if (section_hdr[0] != UCODE_UCODE_TYPE) { | 197 | if (section_hdr[0] != UCODE_UCODE_TYPE) { |
252 | printk(KERN_ERR "microcode: error! " | 198 | printk(KERN_ERR "microcode: error: invalid type field in " |
253 | "Wrong microcode payload type field\n"); | 199 | "container file section header\n"); |
254 | return NULL; | 200 | return NULL; |
255 | } | 201 | } |
256 | 202 | ||
257 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); | 203 | total_size = (unsigned long) (section_hdr[4] + (section_hdr[5] << 8)); |
258 | 204 | ||
259 | printk(KERN_INFO "microcode: size %u, total_size %u\n", | 205 | printk(KERN_DEBUG "microcode: size %u, total_size %u\n", |
260 | size, total_size); | 206 | size, total_size); |
261 | 207 | ||
262 | if (total_size > size || total_size > UCODE_MAX_SIZE) { | 208 | if (total_size > size || total_size > UCODE_MAX_SIZE) { |
263 | printk(KERN_ERR "microcode: error! Bad data in microcode data file\n"); | 209 | printk(KERN_ERR "microcode: error: size mismatch\n"); |
264 | return NULL; | 210 | return NULL; |
265 | } | 211 | } |
266 | 212 | ||
267 | mc = vmalloc(UCODE_MAX_SIZE); | 213 | mc = vmalloc(UCODE_MAX_SIZE); |
268 | if (mc) { | 214 | if (mc) { |
269 | memset(mc, 0, UCODE_MAX_SIZE); | 215 | memset(mc, 0, UCODE_MAX_SIZE); |
270 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, total_size)) { | 216 | if (get_ucode_data(mc, buf + UCODE_CONTAINER_SECTION_HDR, |
217 | total_size)) { | ||
271 | vfree(mc); | 218 | vfree(mc); |
272 | mc = NULL; | 219 | mc = NULL; |
273 | } else | 220 | } else |
274 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; | 221 | *mc_size = total_size + UCODE_CONTAINER_SECTION_HDR; |
275 | } | 222 | } |
276 | #undef UCODE_CONTAINER_SECTION_HDR | ||
277 | return mc; | 223 | return mc; |
278 | } | 224 | } |
279 | 225 | ||
280 | 226 | ||
281 | static int install_equiv_cpu_table(u8 *buf, | 227 | static int install_equiv_cpu_table(const u8 *buf) |
282 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
283 | { | 228 | { |
284 | #define UCODE_CONTAINER_HEADER_SIZE 12 | ||
285 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; | 229 | u8 *container_hdr[UCODE_CONTAINER_HEADER_SIZE]; |
286 | unsigned int *buf_pos = (unsigned int *)container_hdr; | 230 | unsigned int *buf_pos = (unsigned int *)container_hdr; |
287 | unsigned long size; | 231 | unsigned long size; |
@@ -292,14 +236,15 @@ static int install_equiv_cpu_table(u8 *buf, | |||
292 | size = buf_pos[2]; | 236 | size = buf_pos[2]; |
293 | 237 | ||
294 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { | 238 | if (buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE || !size) { |
295 | printk(KERN_ERR "microcode: error! " | 239 | printk(KERN_ERR "microcode: error: invalid type field in " |
296 | "Wrong microcode equivalnet cpu table\n"); | 240 | "container file section header\n"); |
297 | return 0; | 241 | return 0; |
298 | } | 242 | } |
299 | 243 | ||
300 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); | 244 | equiv_cpu_table = (struct equiv_cpu_entry *) vmalloc(size); |
301 | if (!equiv_cpu_table) { | 245 | if (!equiv_cpu_table) { |
302 | printk(KERN_ERR "microcode: error, can't allocate memory for equiv CPU table\n"); | 246 | printk(KERN_ERR "microcode: failed to allocate " |
247 | "equivalent CPU table\n"); | ||
303 | return 0; | 248 | return 0; |
304 | } | 249 | } |
305 | 250 | ||
@@ -310,7 +255,6 @@ static int install_equiv_cpu_table(u8 *buf, | |||
310 | } | 255 | } |
311 | 256 | ||
312 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ | 257 | return size + UCODE_CONTAINER_HEADER_SIZE; /* add header length */ |
313 | #undef UCODE_CONTAINER_HEADER_SIZE | ||
314 | } | 258 | } |
315 | 259 | ||
316 | static void free_equiv_cpu_table(void) | 260 | static void free_equiv_cpu_table(void) |
@@ -321,18 +265,20 @@ static void free_equiv_cpu_table(void) | |||
321 | } | 265 | } |
322 | } | 266 | } |
323 | 267 | ||
324 | static int generic_load_microcode(int cpu, void *data, size_t size, | 268 | static int generic_load_microcode(int cpu, const u8 *data, size_t size) |
325 | int (*get_ucode_data)(void *, const void *, size_t)) | ||
326 | { | 269 | { |
327 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 270 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
328 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 271 | const u8 *ucode_ptr = data; |
272 | void *new_mc = NULL; | ||
273 | void *mc; | ||
329 | int new_rev = uci->cpu_sig.rev; | 274 | int new_rev = uci->cpu_sig.rev; |
330 | unsigned int leftover; | 275 | unsigned int leftover; |
331 | unsigned long offset; | 276 | unsigned long offset; |
332 | 277 | ||
333 | offset = install_equiv_cpu_table(ucode_ptr, get_ucode_data); | 278 | offset = install_equiv_cpu_table(ucode_ptr); |
334 | if (!offset) { | 279 | if (!offset) { |
335 | printk(KERN_ERR "microcode: installing equivalent cpu table failed\n"); | 280 | printk(KERN_ERR "microcode: failed to create " |
281 | "equivalent cpu table\n"); | ||
336 | return -EINVAL; | 282 | return -EINVAL; |
337 | } | 283 | } |
338 | 284 | ||
@@ -343,7 +289,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
343 | unsigned int uninitialized_var(mc_size); | 289 | unsigned int uninitialized_var(mc_size); |
344 | struct microcode_header_amd *mc_header; | 290 | struct microcode_header_amd *mc_header; |
345 | 291 | ||
346 | mc = get_next_ucode(ucode_ptr, leftover, get_ucode_data, &mc_size); | 292 | mc = get_next_ucode(ucode_ptr, leftover, &mc_size); |
347 | if (!mc) | 293 | if (!mc) |
348 | break; | 294 | break; |
349 | 295 | ||
@@ -353,7 +299,7 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
353 | vfree(new_mc); | 299 | vfree(new_mc); |
354 | new_rev = mc_header->patch_id; | 300 | new_rev = mc_header->patch_id; |
355 | new_mc = mc; | 301 | new_mc = mc; |
356 | } else | 302 | } else |
357 | vfree(mc); | 303 | vfree(mc); |
358 | 304 | ||
359 | ucode_ptr += mc_size; | 305 | ucode_ptr += mc_size; |
@@ -365,9 +311,9 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
365 | if (uci->mc) | 311 | if (uci->mc) |
366 | vfree(uci->mc); | 312 | vfree(uci->mc); |
367 | uci->mc = new_mc; | 313 | uci->mc = new_mc; |
368 | pr_debug("microcode: CPU%d found a matching microcode update with" | 314 | pr_debug("microcode: CPU%d found a matching microcode " |
369 | " version 0x%x (current=0x%x)\n", | 315 | "update with version 0x%x (current=0x%x)\n", |
370 | cpu, new_rev, uci->cpu_sig.rev); | 316 | cpu, new_rev, uci->cpu_sig.rev); |
371 | } else | 317 | } else |
372 | vfree(new_mc); | 318 | vfree(new_mc); |
373 | } | 319 | } |
@@ -377,12 +323,6 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
377 | return (int)leftover; | 323 | return (int)leftover; |
378 | } | 324 | } |
379 | 325 | ||
380 | static int get_ucode_fw(void *to, const void *from, size_t n) | ||
381 | { | ||
382 | memcpy(to, from, n); | ||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | static int request_microcode_fw(int cpu, struct device *device) | 326 | static int request_microcode_fw(int cpu, struct device *device) |
387 | { | 327 | { |
388 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | 328 | const char *fw_name = "amd-ucode/microcode_amd.bin"; |
@@ -394,12 +334,11 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
394 | 334 | ||
395 | ret = request_firmware(&firmware, fw_name, device); | 335 | ret = request_firmware(&firmware, fw_name, device); |
396 | if (ret) { | 336 | if (ret) { |
397 | printk(KERN_ERR "microcode: ucode data file %s load failed\n", fw_name); | 337 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); |
398 | return ret; | 338 | return ret; |
399 | } | 339 | } |
400 | 340 | ||
401 | ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, | 341 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); |
402 | &get_ucode_fw); | ||
403 | 342 | ||
404 | release_firmware(firmware); | 343 | release_firmware(firmware); |
405 | 344 | ||
@@ -408,8 +347,8 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
408 | 347 | ||
409 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) | 348 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) |
410 | { | 349 | { |
411 | printk(KERN_WARNING "microcode: AMD microcode update via /dev/cpu/microcode" | 350 | printk(KERN_INFO "microcode: AMD microcode update via " |
412 | "is not supported\n"); | 351 | "/dev/cpu/microcode not supported\n"); |
413 | return -1; | 352 | return -1; |
414 | } | 353 | } |
415 | 354 | ||
@@ -433,3 +372,4 @@ struct microcode_ops * __init init_amd_microcode(void) | |||
433 | { | 372 | { |
434 | return µcode_amd_ops; | 373 | return µcode_amd_ops; |
435 | } | 374 | } |
375 | |||
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index c4b5b24e0217..c9b721ba968c 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -99,7 +99,7 @@ MODULE_LICENSE("GPL"); | |||
99 | 99 | ||
100 | #define MICROCODE_VERSION "2.00" | 100 | #define MICROCODE_VERSION "2.00" |
101 | 101 | ||
102 | struct microcode_ops *microcode_ops; | 102 | static struct microcode_ops *microcode_ops; |
103 | 103 | ||
104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | 104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ |
105 | static DEFINE_MUTEX(microcode_mutex); | 105 | static DEFINE_MUTEX(microcode_mutex); |
@@ -203,7 +203,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | |||
203 | #endif | 203 | #endif |
204 | 204 | ||
205 | /* fake device for request_firmware */ | 205 | /* fake device for request_firmware */ |
206 | struct platform_device *microcode_pdev; | 206 | static struct platform_device *microcode_pdev; |
207 | 207 | ||
208 | static ssize_t reload_store(struct sys_device *dev, | 208 | static ssize_t reload_store(struct sys_device *dev, |
209 | struct sysdev_attribute *attr, | 209 | struct sysdev_attribute *attr, |
@@ -328,7 +328,7 @@ static int microcode_resume_cpu(int cpu) | |||
328 | return 0; | 328 | return 0; |
329 | } | 329 | } |
330 | 330 | ||
331 | void microcode_update_cpu(int cpu) | 331 | static void microcode_update_cpu(int cpu) |
332 | { | 332 | { |
333 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 333 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
334 | int err = 0; | 334 | int err = 0; |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index a8e62792d171..b7f4c929e615 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -471,7 +471,7 @@ static void microcode_fini_cpu(int cpu) | |||
471 | uci->mc = NULL; | 471 | uci->mc = NULL; |
472 | } | 472 | } |
473 | 473 | ||
474 | struct microcode_ops microcode_intel_ops = { | 474 | static struct microcode_ops microcode_intel_ops = { |
475 | .request_microcode_user = request_microcode_user, | 475 | .request_microcode_user = request_microcode_user, |
476 | .request_microcode_fw = request_microcode_fw, | 476 | .request_microcode_fw = request_microcode_fw, |
477 | .collect_cpu_info = collect_cpu_info, | 477 | .collect_cpu_info = collect_cpu_info, |
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index efc2f361fe85..666e43df51f9 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -13,8 +13,7 @@ | |||
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/acpi.h> | 14 | #include <asm/acpi.h> |
15 | #include <asm/mmconfig.h> | 15 | #include <asm/mmconfig.h> |
16 | 16 | #include <asm/pci_x86.h> | |
17 | #include "../pci/pci.h" | ||
18 | 17 | ||
19 | struct pci_hostbridge_probe { | 18 | struct pci_hostbridge_probe { |
20 | u32 bus; | 19 | u32 bus; |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 0f4c1fd5a1f4..c5c5b8df1dbc 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -16,14 +16,14 @@ | |||
16 | #include <linux/bitops.h> | 16 | #include <linux/bitops.h> |
17 | #include <linux/acpi.h> | 17 | #include <linux/acpi.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/smp.h> | ||
20 | #include <linux/acpi.h> | ||
19 | 21 | ||
20 | #include <asm/smp.h> | ||
21 | #include <asm/mtrr.h> | 22 | #include <asm/mtrr.h> |
22 | #include <asm/mpspec.h> | 23 | #include <asm/mpspec.h> |
23 | #include <asm/pgalloc.h> | 24 | #include <asm/pgalloc.h> |
24 | #include <asm/io_apic.h> | 25 | #include <asm/io_apic.h> |
25 | #include <asm/proto.h> | 26 | #include <asm/proto.h> |
26 | #include <asm/acpi.h> | ||
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/e820.h> | 28 | #include <asm/e820.h> |
29 | #include <asm/trampoline.h> | 29 | #include <asm/trampoline.h> |
@@ -95,8 +95,8 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
95 | #endif | 95 | #endif |
96 | 96 | ||
97 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { | 97 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA) - 1) == 0) { |
98 | set_bit(m->mpc_busid, mp_bus_not_pci); | 98 | set_bit(m->mpc_busid, mp_bus_not_pci); |
99 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 99 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
100 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | 100 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; |
101 | #endif | 101 | #endif |
102 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 102 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
@@ -104,7 +104,7 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
104 | x86_quirks->mpc_oem_pci_bus(m); | 104 | x86_quirks->mpc_oem_pci_bus(m); |
105 | 105 | ||
106 | clear_bit(m->mpc_busid, mp_bus_not_pci); | 106 | clear_bit(m->mpc_busid, mp_bus_not_pci); |
107 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 107 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
108 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | 108 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; |
109 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { | 109 | } else if (strncmp(str, BUSTYPE_EISA, sizeof(BUSTYPE_EISA) - 1) == 0) { |
110 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; | 110 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_EISA; |
@@ -586,26 +586,23 @@ static void __init __get_smp_config(unsigned int early) | |||
586 | { | 586 | { |
587 | struct intel_mp_floating *mpf = mpf_found; | 587 | struct intel_mp_floating *mpf = mpf_found; |
588 | 588 | ||
589 | if (x86_quirks->mach_get_smp_config) { | 589 | if (!mpf) |
590 | if (x86_quirks->mach_get_smp_config(early)) | 590 | return; |
591 | return; | 591 | |
592 | } | ||
593 | if (acpi_lapic && early) | 592 | if (acpi_lapic && early) |
594 | return; | 593 | return; |
594 | |||
595 | /* | 595 | /* |
596 | * ACPI supports both logical (e.g. Hyper-Threading) and physical | 596 | * MPS doesn't support hyperthreading, aka only have |
597 | * processors, where MPS only supports physical. | 597 | * thread 0 apic id in MPS table |
598 | */ | 598 | */ |
599 | if (acpi_lapic && acpi_ioapic) { | 599 | if (acpi_lapic && acpi_ioapic) |
600 | printk(KERN_INFO "Using ACPI (MADT) for SMP configuration " | ||
601 | "information\n"); | ||
602 | return; | 600 | return; |
603 | } else if (acpi_lapic) | ||
604 | printk(KERN_INFO "Using ACPI for processor (LAPIC) " | ||
605 | "configuration information\n"); | ||
606 | 601 | ||
607 | if (!mpf) | 602 | if (x86_quirks->mach_get_smp_config) { |
608 | return; | 603 | if (x86_quirks->mach_get_smp_config(early)) |
604 | return; | ||
605 | } | ||
609 | 606 | ||
610 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 607 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
611 | mpf->mpf_specification); | 608 | mpf->mpf_specification); |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 82a7c7ed6d45..726266695b2c 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -136,7 +136,7 @@ static int msr_open(struct inode *inode, struct file *file) | |||
136 | lock_kernel(); | 136 | lock_kernel(); |
137 | cpu = iminor(file->f_path.dentry->d_inode); | 137 | cpu = iminor(file->f_path.dentry->d_inode); |
138 | 138 | ||
139 | if (cpu >= NR_CPUS || !cpu_online(cpu)) { | 139 | if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { |
140 | ret = -ENXIO; /* No such CPU */ | 140 | ret = -ENXIO; /* No such CPU */ |
141 | goto out; | 141 | goto out; |
142 | } | 142 | } |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 2c97f07f1c2c..45a09ccdc214 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -26,11 +26,10 @@ | |||
26 | #include <linux/kernel_stat.h> | 26 | #include <linux/kernel_stat.h> |
27 | #include <linux/kdebug.h> | 27 | #include <linux/kdebug.h> |
28 | #include <linux/smp.h> | 28 | #include <linux/smp.h> |
29 | #include <linux/nmi.h> | ||
29 | 30 | ||
30 | #include <asm/i8259.h> | 31 | #include <asm/i8259.h> |
31 | #include <asm/io_apic.h> | 32 | #include <asm/io_apic.h> |
32 | #include <asm/smp.h> | ||
33 | #include <asm/nmi.h> | ||
34 | #include <asm/proto.h> | 33 | #include <asm/proto.h> |
35 | #include <asm/timer.h> | 34 | #include <asm/timer.h> |
36 | 35 | ||
@@ -131,6 +130,11 @@ static void report_broken_nmi(int cpu, int *prev_nmi_count) | |||
131 | atomic_dec(&nmi_active); | 130 | atomic_dec(&nmi_active); |
132 | } | 131 | } |
133 | 132 | ||
133 | static void __acpi_nmi_disable(void *__unused) | ||
134 | { | ||
135 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
136 | } | ||
137 | |||
134 | int __init check_nmi_watchdog(void) | 138 | int __init check_nmi_watchdog(void) |
135 | { | 139 | { |
136 | unsigned int *prev_nmi_count; | 140 | unsigned int *prev_nmi_count; |
@@ -179,8 +183,12 @@ int __init check_nmi_watchdog(void) | |||
179 | kfree(prev_nmi_count); | 183 | kfree(prev_nmi_count); |
180 | return 0; | 184 | return 0; |
181 | error: | 185 | error: |
182 | if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) | 186 | if (nmi_watchdog == NMI_IO_APIC) { |
183 | disable_8259A_irq(0); | 187 | if (!timer_through_8259) |
188 | disable_8259A_irq(0); | ||
189 | on_each_cpu(__acpi_nmi_disable, NULL, 1); | ||
190 | } | ||
191 | |||
184 | #ifdef CONFIG_X86_32 | 192 | #ifdef CONFIG_X86_32 |
185 | timer_ack = 0; | 193 | timer_ack = 0; |
186 | #endif | 194 | #endif |
@@ -199,12 +207,17 @@ static int __init setup_nmi_watchdog(char *str) | |||
199 | ++str; | 207 | ++str; |
200 | } | 208 | } |
201 | 209 | ||
202 | get_option(&str, &nmi); | 210 | if (!strncmp(str, "lapic", 5)) |
203 | 211 | nmi_watchdog = NMI_LOCAL_APIC; | |
204 | if (nmi >= NMI_INVALID) | 212 | else if (!strncmp(str, "ioapic", 6)) |
205 | return 0; | 213 | nmi_watchdog = NMI_IO_APIC; |
214 | else { | ||
215 | get_option(&str, &nmi); | ||
216 | if (nmi >= NMI_INVALID) | ||
217 | return 0; | ||
218 | nmi_watchdog = nmi; | ||
219 | } | ||
206 | 220 | ||
207 | nmi_watchdog = nmi; | ||
208 | return 1; | 221 | return 1; |
209 | } | 222 | } |
210 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 223 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
@@ -285,11 +298,6 @@ void acpi_nmi_enable(void) | |||
285 | on_each_cpu(__acpi_nmi_enable, NULL, 1); | 298 | on_each_cpu(__acpi_nmi_enable, NULL, 1); |
286 | } | 299 | } |
287 | 300 | ||
288 | static void __acpi_nmi_disable(void *__unused) | ||
289 | { | ||
290 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
291 | } | ||
292 | |||
293 | /* | 301 | /* |
294 | * Disable timer based NMIs on all CPUs: | 302 | * Disable timer based NMIs on all CPUs: |
295 | */ | 303 | */ |
@@ -340,6 +348,8 @@ void stop_apic_nmi_watchdog(void *unused) | |||
340 | return; | 348 | return; |
341 | if (nmi_watchdog == NMI_LOCAL_APIC) | 349 | if (nmi_watchdog == NMI_LOCAL_APIC) |
342 | lapic_watchdog_stop(); | 350 | lapic_watchdog_stop(); |
351 | else | ||
352 | __acpi_nmi_disable(NULL); | ||
343 | __get_cpu_var(wd_enabled) = 0; | 353 | __get_cpu_var(wd_enabled) = 0; |
344 | atomic_dec(&nmi_active); | 354 | atomic_dec(&nmi_active); |
345 | } | 355 | } |
@@ -465,6 +475,24 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
465 | 475 | ||
466 | #ifdef CONFIG_SYSCTL | 476 | #ifdef CONFIG_SYSCTL |
467 | 477 | ||
478 | static void enable_ioapic_nmi_watchdog_single(void *unused) | ||
479 | { | ||
480 | __get_cpu_var(wd_enabled) = 1; | ||
481 | atomic_inc(&nmi_active); | ||
482 | __acpi_nmi_enable(NULL); | ||
483 | } | ||
484 | |||
485 | static void enable_ioapic_nmi_watchdog(void) | ||
486 | { | ||
487 | on_each_cpu(enable_ioapic_nmi_watchdog_single, NULL, 1); | ||
488 | touch_nmi_watchdog(); | ||
489 | } | ||
490 | |||
491 | static void disable_ioapic_nmi_watchdog(void) | ||
492 | { | ||
493 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); | ||
494 | } | ||
495 | |||
468 | static int __init setup_unknown_nmi_panic(char *str) | 496 | static int __init setup_unknown_nmi_panic(char *str) |
469 | { | 497 | { |
470 | unknown_nmi_panic = 1; | 498 | unknown_nmi_panic = 1; |
@@ -507,6 +535,11 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
507 | enable_lapic_nmi_watchdog(); | 535 | enable_lapic_nmi_watchdog(); |
508 | else | 536 | else |
509 | disable_lapic_nmi_watchdog(); | 537 | disable_lapic_nmi_watchdog(); |
538 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
539 | if (nmi_watchdog_enabled) | ||
540 | enable_ioapic_nmi_watchdog(); | ||
541 | else | ||
542 | disable_ioapic_nmi_watchdog(); | ||
510 | } else { | 543 | } else { |
511 | printk(KERN_WARNING | 544 | printk(KERN_WARNING |
512 | "NMI watchdog doesn't know what hardware to touch\n"); | 545 | "NMI watchdog doesn't know what hardware to touch\n"); |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index 4caff39078e0..0deea37a53cf 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -31,7 +31,7 @@ | |||
31 | #include <asm/numaq.h> | 31 | #include <asm/numaq.h> |
32 | #include <asm/topology.h> | 32 | #include <asm/topology.h> |
33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
34 | #include <asm/mpspec.h> | 34 | #include <asm/genapic.h> |
35 | #include <asm/e820.h> | 35 | #include <asm/e820.h> |
36 | #include <asm/setup.h> | 36 | #include <asm/setup.h> |
37 | 37 | ||
@@ -235,6 +235,13 @@ static int __init numaq_setup_ioapic_ids(void) | |||
235 | return 1; | 235 | return 1; |
236 | } | 236 | } |
237 | 237 | ||
238 | static int __init numaq_update_genapic(void) | ||
239 | { | ||
240 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; | ||
241 | |||
242 | return 0; | ||
243 | } | ||
244 | |||
238 | static struct x86_quirks numaq_x86_quirks __initdata = { | 245 | static struct x86_quirks numaq_x86_quirks __initdata = { |
239 | .arch_pre_time_init = numaq_pre_time_init, | 246 | .arch_pre_time_init = numaq_pre_time_init, |
240 | .arch_time_init = NULL, | 247 | .arch_time_init = NULL, |
@@ -250,6 +257,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = { | |||
250 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | 257 | .mpc_oem_pci_bus = mpc_oem_pci_bus, |
251 | .smp_read_mpc_oem = smp_read_mpc_oem, | 258 | .smp_read_mpc_oem = smp_read_mpc_oem, |
252 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | 259 | .setup_ioapic_ids = numaq_setup_ioapic_ids, |
260 | .update_genapic = numaq_update_genapic, | ||
253 | }; | 261 | }; |
254 | 262 | ||
255 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | 263 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 192624820217..b25428533141 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/proto.h> | 6 | #include <asm/proto.h> |
7 | #include <asm/dma.h> | 7 | #include <asm/dma.h> |
8 | #include <asm/iommu.h> | 8 | #include <asm/iommu.h> |
9 | #include <asm/gart.h> | ||
9 | #include <asm/calgary.h> | 10 | #include <asm/calgary.h> |
10 | #include <asm/amd_iommu.h> | 11 | #include <asm/amd_iommu.h> |
11 | 12 | ||
@@ -30,11 +31,6 @@ int no_iommu __read_mostly; | |||
30 | /* Set this to 1 if there is a HW IOMMU in the system */ | 31 | /* Set this to 1 if there is a HW IOMMU in the system */ |
31 | int iommu_detected __read_mostly = 0; | 32 | int iommu_detected __read_mostly = 0; |
32 | 33 | ||
33 | /* This tells the BIO block layer to assume merging. Default to off | ||
34 | because we cannot guarantee merging later. */ | ||
35 | int iommu_bio_merge __read_mostly = 0; | ||
36 | EXPORT_SYMBOL(iommu_bio_merge); | ||
37 | |||
38 | dma_addr_t bad_dma_address __read_mostly = 0; | 34 | dma_addr_t bad_dma_address __read_mostly = 0; |
39 | EXPORT_SYMBOL(bad_dma_address); | 35 | EXPORT_SYMBOL(bad_dma_address); |
40 | 36 | ||
@@ -42,7 +38,7 @@ EXPORT_SYMBOL(bad_dma_address); | |||
42 | be probably a smaller DMA mask, but this is bug-to-bug compatible | 38 | be probably a smaller DMA mask, but this is bug-to-bug compatible |
43 | to older i386. */ | 39 | to older i386. */ |
44 | struct device x86_dma_fallback_dev = { | 40 | struct device x86_dma_fallback_dev = { |
45 | .bus_id = "fallback device", | 41 | .init_name = "fallback device", |
46 | .coherent_dma_mask = DMA_32BIT_MASK, | 42 | .coherent_dma_mask = DMA_32BIT_MASK, |
47 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, | 43 | .dma_mask = &x86_dma_fallback_dev.coherent_dma_mask, |
48 | }; | 44 | }; |
@@ -105,11 +101,15 @@ static void __init dma32_free_bootmem(void) | |||
105 | dma32_bootmem_ptr = NULL; | 101 | dma32_bootmem_ptr = NULL; |
106 | dma32_bootmem_size = 0; | 102 | dma32_bootmem_size = 0; |
107 | } | 103 | } |
104 | #endif | ||
108 | 105 | ||
109 | void __init pci_iommu_alloc(void) | 106 | void __init pci_iommu_alloc(void) |
110 | { | 107 | { |
108 | #ifdef CONFIG_X86_64 | ||
111 | /* free the range so iommu could get some range less than 4G */ | 109 | /* free the range so iommu could get some range less than 4G */ |
112 | dma32_free_bootmem(); | 110 | dma32_free_bootmem(); |
111 | #endif | ||
112 | |||
113 | /* | 113 | /* |
114 | * The order of these functions is important for | 114 | * The order of these functions is important for |
115 | * fall-back/fail-over reasons | 115 | * fall-back/fail-over reasons |
@@ -125,15 +125,6 @@ void __init pci_iommu_alloc(void) | |||
125 | pci_swiotlb_init(); | 125 | pci_swiotlb_init(); |
126 | } | 126 | } |
127 | 127 | ||
128 | unsigned long iommu_nr_pages(unsigned long addr, unsigned long len) | ||
129 | { | ||
130 | unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); | ||
131 | |||
132 | return size >> PAGE_SHIFT; | ||
133 | } | ||
134 | EXPORT_SYMBOL(iommu_nr_pages); | ||
135 | #endif | ||
136 | |||
137 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 128 | void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
138 | dma_addr_t *dma_addr, gfp_t flag) | 129 | dma_addr_t *dma_addr, gfp_t flag) |
139 | { | 130 | { |
@@ -188,7 +179,6 @@ static __init int iommu_setup(char *p) | |||
188 | } | 179 | } |
189 | 180 | ||
190 | if (!strncmp(p, "biomerge", 8)) { | 181 | if (!strncmp(p, "biomerge", 8)) { |
191 | iommu_bio_merge = 4096; | ||
192 | iommu_merge = 1; | 182 | iommu_merge = 1; |
193 | force_iommu = 1; | 183 | force_iommu = 1; |
194 | } | 184 | } |
@@ -300,8 +290,8 @@ fs_initcall(pci_iommu_init); | |||
300 | static __devinit void via_no_dac(struct pci_dev *dev) | 290 | static __devinit void via_no_dac(struct pci_dev *dev) |
301 | { | 291 | { |
302 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { | 292 | if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { |
303 | printk(KERN_INFO "PCI: VIA PCI bridge detected." | 293 | printk(KERN_INFO |
304 | "Disabling DAC.\n"); | 294 | "PCI: VIA PCI bridge detected. Disabling DAC.\n"); |
305 | forbid_dac = 1; | 295 | forbid_dac = 1; |
306 | } | 296 | } |
307 | } | 297 | } |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index a35eaa379ff6..00c2bcd41463 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -52,7 +52,7 @@ static u32 *iommu_gatt_base; /* Remapping table */ | |||
52 | * to trigger bugs with some popular PCI cards, in particular 3ware (but | 52 | * to trigger bugs with some popular PCI cards, in particular 3ware (but |
53 | * has been also also seen with Qlogic at least). | 53 | * has been also also seen with Qlogic at least). |
54 | */ | 54 | */ |
55 | int iommu_fullflush = 1; | 55 | static int iommu_fullflush = 1; |
56 | 56 | ||
57 | /* Allocation bitmap for the remapping area: */ | 57 | /* Allocation bitmap for the remapping area: */ |
58 | static DEFINE_SPINLOCK(iommu_bitmap_lock); | 58 | static DEFINE_SPINLOCK(iommu_bitmap_lock); |
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 3c539d111abb..d59c91747665 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
@@ -3,6 +3,8 @@ | |||
3 | #include <linux/pci.h> | 3 | #include <linux/pci.h> |
4 | #include <linux/cache.h> | 4 | #include <linux/cache.h> |
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/swiotlb.h> | ||
7 | #include <linux/bootmem.h> | ||
6 | #include <linux/dma-mapping.h> | 8 | #include <linux/dma-mapping.h> |
7 | 9 | ||
8 | #include <asm/iommu.h> | 10 | #include <asm/iommu.h> |
@@ -11,6 +13,31 @@ | |||
11 | 13 | ||
12 | int swiotlb __read_mostly; | 14 | int swiotlb __read_mostly; |
13 | 15 | ||
16 | void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) | ||
17 | { | ||
18 | return alloc_bootmem_low_pages(size); | ||
19 | } | ||
20 | |||
21 | void *swiotlb_alloc(unsigned order, unsigned long nslabs) | ||
22 | { | ||
23 | return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); | ||
24 | } | ||
25 | |||
26 | dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) | ||
27 | { | ||
28 | return paddr; | ||
29 | } | ||
30 | |||
31 | phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) | ||
32 | { | ||
33 | return baddr; | ||
34 | } | ||
35 | |||
36 | int __weak swiotlb_arch_range_needs_mapping(void *ptr, size_t size) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
14 | static dma_addr_t | 41 | static dma_addr_t |
15 | swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, | 42 | swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, |
16 | int direction) | 43 | int direction) |
@@ -50,8 +77,10 @@ struct dma_mapping_ops swiotlb_dma_ops = { | |||
50 | void __init pci_swiotlb_init(void) | 77 | void __init pci_swiotlb_init(void) |
51 | { | 78 | { |
52 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 79 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
80 | #ifdef CONFIG_X86_64 | ||
53 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) | 81 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) |
54 | swiotlb = 1; | 82 | swiotlb = 1; |
83 | #endif | ||
55 | if (swiotlb_force) | 84 | if (swiotlb_force) |
56 | swiotlb = 1; | 85 | swiotlb = 1; |
57 | if (swiotlb) { | 86 | if (swiotlb) { |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c622772744d8..e68bb9e30864 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -1,13 +1,16 @@ | |||
1 | #include <linux/errno.h> | 1 | #include <linux/errno.h> |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
4 | #include <asm/idle.h> | ||
4 | #include <linux/smp.h> | 5 | #include <linux/smp.h> |
5 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
7 | #include <linux/module.h> | 8 | #include <linux/module.h> |
8 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
9 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/ftrace.h> | ||
10 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/apic.h> | ||
11 | 14 | ||
12 | unsigned long idle_halt; | 15 | unsigned long idle_halt; |
13 | EXPORT_SYMBOL(idle_halt); | 16 | EXPORT_SYMBOL(idle_halt); |
@@ -100,6 +103,9 @@ static inline int hlt_use_halt(void) | |||
100 | void default_idle(void) | 103 | void default_idle(void) |
101 | { | 104 | { |
102 | if (hlt_use_halt()) { | 105 | if (hlt_use_halt()) { |
106 | struct power_trace it; | ||
107 | |||
108 | trace_power_start(&it, POWER_CSTATE, 1); | ||
103 | current_thread_info()->status &= ~TS_POLLING; | 109 | current_thread_info()->status &= ~TS_POLLING; |
104 | /* | 110 | /* |
105 | * TS_POLLING-cleared state must be visible before we | 111 | * TS_POLLING-cleared state must be visible before we |
@@ -112,6 +118,7 @@ void default_idle(void) | |||
112 | else | 118 | else |
113 | local_irq_enable(); | 119 | local_irq_enable(); |
114 | current_thread_info()->status |= TS_POLLING; | 120 | current_thread_info()->status |= TS_POLLING; |
121 | trace_power_end(&it); | ||
115 | } else { | 122 | } else { |
116 | local_irq_enable(); | 123 | local_irq_enable(); |
117 | /* loop is done by the caller */ | 124 | /* loop is done by the caller */ |
@@ -122,6 +129,21 @@ void default_idle(void) | |||
122 | EXPORT_SYMBOL(default_idle); | 129 | EXPORT_SYMBOL(default_idle); |
123 | #endif | 130 | #endif |
124 | 131 | ||
132 | void stop_this_cpu(void *dummy) | ||
133 | { | ||
134 | local_irq_disable(); | ||
135 | /* | ||
136 | * Remove this CPU: | ||
137 | */ | ||
138 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
139 | disable_local_APIC(); | ||
140 | |||
141 | for (;;) { | ||
142 | if (hlt_works(smp_processor_id())) | ||
143 | halt(); | ||
144 | } | ||
145 | } | ||
146 | |||
125 | static void do_nothing(void *unused) | 147 | static void do_nothing(void *unused) |
126 | { | 148 | { |
127 | } | 149 | } |
@@ -154,24 +176,31 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
154 | */ | 176 | */ |
155 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | 177 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) |
156 | { | 178 | { |
179 | struct power_trace it; | ||
180 | |||
181 | trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); | ||
157 | if (!need_resched()) { | 182 | if (!need_resched()) { |
158 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 183 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
159 | smp_mb(); | 184 | smp_mb(); |
160 | if (!need_resched()) | 185 | if (!need_resched()) |
161 | __mwait(ax, cx); | 186 | __mwait(ax, cx); |
162 | } | 187 | } |
188 | trace_power_end(&it); | ||
163 | } | 189 | } |
164 | 190 | ||
165 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ | 191 | /* Default MONITOR/MWAIT with no hints, used for default C1 state */ |
166 | static void mwait_idle(void) | 192 | static void mwait_idle(void) |
167 | { | 193 | { |
194 | struct power_trace it; | ||
168 | if (!need_resched()) { | 195 | if (!need_resched()) { |
196 | trace_power_start(&it, POWER_CSTATE, 1); | ||
169 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 197 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
170 | smp_mb(); | 198 | smp_mb(); |
171 | if (!need_resched()) | 199 | if (!need_resched()) |
172 | __sti_mwait(0, 0); | 200 | __sti_mwait(0, 0); |
173 | else | 201 | else |
174 | local_irq_enable(); | 202 | local_irq_enable(); |
203 | trace_power_end(&it); | ||
175 | } else | 204 | } else |
176 | local_irq_enable(); | 205 | local_irq_enable(); |
177 | } | 206 | } |
@@ -183,9 +212,13 @@ static void mwait_idle(void) | |||
183 | */ | 212 | */ |
184 | static void poll_idle(void) | 213 | static void poll_idle(void) |
185 | { | 214 | { |
215 | struct power_trace it; | ||
216 | |||
217 | trace_power_start(&it, POWER_CSTATE, 0); | ||
186 | local_irq_enable(); | 218 | local_irq_enable(); |
187 | while (!need_resched()) | 219 | while (!need_resched()) |
188 | cpu_relax(); | 220 | cpu_relax(); |
221 | trace_power_end(&it); | ||
189 | } | 222 | } |
190 | 223 | ||
191 | /* | 224 | /* |
@@ -270,7 +303,7 @@ static void c1e_idle(void) | |||
270 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 303 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
271 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 304 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
272 | c1e_detected = 1; | 305 | c1e_detected = 1; |
273 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | 306 | if (!boot_cpu_has(X86_FEATURE_NONSTOP_TSC)) |
274 | mark_tsc_unstable("TSC halt in AMD C1E"); | 307 | mark_tsc_unstable("TSC halt in AMD C1E"); |
275 | printk(KERN_INFO "System has AMD C1E enabled\n"); | 308 | printk(KERN_INFO "System has AMD C1E enabled\n"); |
276 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | 309 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0a1302fe6d45..3ba155d24884 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <linux/percpu.h> | 38 | #include <linux/percpu.h> |
39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
40 | #include <linux/dmi.h> | 40 | #include <linux/dmi.h> |
41 | #include <linux/ftrace.h> | ||
41 | 42 | ||
42 | #include <asm/uaccess.h> | 43 | #include <asm/uaccess.h> |
43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
@@ -59,6 +60,7 @@ | |||
59 | #include <asm/idle.h> | 60 | #include <asm/idle.h> |
60 | #include <asm/syscalls.h> | 61 | #include <asm/syscalls.h> |
61 | #include <asm/smp.h> | 62 | #include <asm/smp.h> |
63 | #include <asm/ds.h> | ||
62 | 64 | ||
63 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 65 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
64 | 66 | ||
@@ -250,14 +252,8 @@ void exit_thread(void) | |||
250 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 252 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
251 | put_cpu(); | 253 | put_cpu(); |
252 | } | 254 | } |
253 | #ifdef CONFIG_X86_DS | 255 | |
254 | /* Free any DS contexts that have not been properly released. */ | 256 | ds_exit_thread(current); |
255 | if (unlikely(current->thread.ds_ctx)) { | ||
256 | /* we clear debugctl to make sure DS is not used. */ | ||
257 | update_debugctlmsr(0); | ||
258 | ds_free(current->thread.ds_ctx); | ||
259 | } | ||
260 | #endif /* CONFIG_X86_DS */ | ||
261 | } | 257 | } |
262 | 258 | ||
263 | void flush_thread(void) | 259 | void flush_thread(void) |
@@ -339,6 +335,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
339 | kfree(p->thread.io_bitmap_ptr); | 335 | kfree(p->thread.io_bitmap_ptr); |
340 | p->thread.io_bitmap_max = 0; | 336 | p->thread.io_bitmap_max = 0; |
341 | } | 337 | } |
338 | |||
339 | ds_copy_thread(p, current); | ||
340 | |||
341 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
342 | p->thread.debugctlmsr = 0; | ||
343 | |||
342 | return err; | 344 | return err; |
343 | } | 345 | } |
344 | 346 | ||
@@ -419,48 +421,19 @@ int set_tsc_mode(unsigned int val) | |||
419 | return 0; | 421 | return 0; |
420 | } | 422 | } |
421 | 423 | ||
422 | #ifdef CONFIG_X86_DS | ||
423 | static int update_debugctl(struct thread_struct *prev, | ||
424 | struct thread_struct *next, unsigned long debugctl) | ||
425 | { | ||
426 | unsigned long ds_prev = 0; | ||
427 | unsigned long ds_next = 0; | ||
428 | |||
429 | if (prev->ds_ctx) | ||
430 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
431 | if (next->ds_ctx) | ||
432 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
433 | |||
434 | if (ds_next != ds_prev) { | ||
435 | /* we clear debugctl to make sure DS | ||
436 | * is not in use when we change it */ | ||
437 | debugctl = 0; | ||
438 | update_debugctlmsr(0); | ||
439 | wrmsr(MSR_IA32_DS_AREA, ds_next, 0); | ||
440 | } | ||
441 | return debugctl; | ||
442 | } | ||
443 | #else | ||
444 | static int update_debugctl(struct thread_struct *prev, | ||
445 | struct thread_struct *next, unsigned long debugctl) | ||
446 | { | ||
447 | return debugctl; | ||
448 | } | ||
449 | #endif /* CONFIG_X86_DS */ | ||
450 | |||
451 | static noinline void | 424 | static noinline void |
452 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | 425 | __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, |
453 | struct tss_struct *tss) | 426 | struct tss_struct *tss) |
454 | { | 427 | { |
455 | struct thread_struct *prev, *next; | 428 | struct thread_struct *prev, *next; |
456 | unsigned long debugctl; | ||
457 | 429 | ||
458 | prev = &prev_p->thread; | 430 | prev = &prev_p->thread; |
459 | next = &next_p->thread; | 431 | next = &next_p->thread; |
460 | 432 | ||
461 | debugctl = update_debugctl(prev, next, prev->debugctlmsr); | 433 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || |
462 | 434 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | |
463 | if (next->debugctlmsr != debugctl) | 435 | ds_switch_to(prev_p, next_p); |
436 | else if (next->debugctlmsr != prev->debugctlmsr) | ||
464 | update_debugctlmsr(next->debugctlmsr); | 437 | update_debugctlmsr(next->debugctlmsr); |
465 | 438 | ||
466 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 439 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
@@ -482,15 +455,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
482 | hard_enable_TSC(); | 455 | hard_enable_TSC(); |
483 | } | 456 | } |
484 | 457 | ||
485 | #ifdef CONFIG_X86_PTRACE_BTS | ||
486 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
487 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
488 | |||
489 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
490 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
491 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
492 | |||
493 | |||
494 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { | 458 | if (!test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { |
495 | /* | 459 | /* |
496 | * Disable the bitmap via an invalid offset. We still cache | 460 | * Disable the bitmap via an invalid offset. We still cache |
@@ -548,7 +512,8 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, | |||
548 | * the task-switch, and shows up in ret_from_fork in entry.S, | 512 | * the task-switch, and shows up in ret_from_fork in entry.S, |
549 | * for example. | 513 | * for example. |
550 | */ | 514 | */ |
551 | struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 515 | __notrace_funcgraph struct task_struct * |
516 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
552 | { | 517 | { |
553 | struct thread_struct *prev = &prev_p->thread, | 518 | struct thread_struct *prev = &prev_p->thread, |
554 | *next = &next_p->thread; | 519 | *next = &next_p->thread; |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index c958120fb1b6..416fb9282f4f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/prctl.h> | 39 | #include <linux/prctl.h> |
40 | #include <linux/uaccess.h> | 40 | #include <linux/uaccess.h> |
41 | #include <linux/io.h> | 41 | #include <linux/io.h> |
42 | #include <linux/ftrace.h> | ||
42 | 43 | ||
43 | #include <asm/pgtable.h> | 44 | #include <asm/pgtable.h> |
44 | #include <asm/system.h> | 45 | #include <asm/system.h> |
@@ -52,6 +53,7 @@ | |||
52 | #include <asm/ia32.h> | 53 | #include <asm/ia32.h> |
53 | #include <asm/idle.h> | 54 | #include <asm/idle.h> |
54 | #include <asm/syscalls.h> | 55 | #include <asm/syscalls.h> |
56 | #include <asm/ds.h> | ||
55 | 57 | ||
56 | asmlinkage extern void ret_from_fork(void); | 58 | asmlinkage extern void ret_from_fork(void); |
57 | 59 | ||
@@ -235,14 +237,8 @@ void exit_thread(void) | |||
235 | t->io_bitmap_max = 0; | 237 | t->io_bitmap_max = 0; |
236 | put_cpu(); | 238 | put_cpu(); |
237 | } | 239 | } |
238 | #ifdef CONFIG_X86_DS | 240 | |
239 | /* Free any DS contexts that have not been properly released. */ | 241 | ds_exit_thread(current); |
240 | if (unlikely(t->ds_ctx)) { | ||
241 | /* we clear debugctl to make sure DS is not used. */ | ||
242 | update_debugctlmsr(0); | ||
243 | ds_free(t->ds_ctx); | ||
244 | } | ||
245 | #endif /* CONFIG_X86_DS */ | ||
246 | } | 242 | } |
247 | 243 | ||
248 | void flush_thread(void) | 244 | void flush_thread(void) |
@@ -372,6 +368,12 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, | |||
372 | if (err) | 368 | if (err) |
373 | goto out; | 369 | goto out; |
374 | } | 370 | } |
371 | |||
372 | ds_copy_thread(p, me); | ||
373 | |||
374 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | ||
375 | p->thread.debugctlmsr = 0; | ||
376 | |||
375 | err = 0; | 377 | err = 0; |
376 | out: | 378 | out: |
377 | if (err && p->thread.io_bitmap_ptr) { | 379 | if (err && p->thread.io_bitmap_ptr) { |
@@ -470,35 +472,14 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
470 | struct tss_struct *tss) | 472 | struct tss_struct *tss) |
471 | { | 473 | { |
472 | struct thread_struct *prev, *next; | 474 | struct thread_struct *prev, *next; |
473 | unsigned long debugctl; | ||
474 | 475 | ||
475 | prev = &prev_p->thread, | 476 | prev = &prev_p->thread, |
476 | next = &next_p->thread; | 477 | next = &next_p->thread; |
477 | 478 | ||
478 | debugctl = prev->debugctlmsr; | 479 | if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || |
479 | 480 | test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) | |
480 | #ifdef CONFIG_X86_DS | 481 | ds_switch_to(prev_p, next_p); |
481 | { | 482 | else if (next->debugctlmsr != prev->debugctlmsr) |
482 | unsigned long ds_prev = 0, ds_next = 0; | ||
483 | |||
484 | if (prev->ds_ctx) | ||
485 | ds_prev = (unsigned long)prev->ds_ctx->ds; | ||
486 | if (next->ds_ctx) | ||
487 | ds_next = (unsigned long)next->ds_ctx->ds; | ||
488 | |||
489 | if (ds_next != ds_prev) { | ||
490 | /* | ||
491 | * We clear debugctl to make sure DS | ||
492 | * is not in use when we change it: | ||
493 | */ | ||
494 | debugctl = 0; | ||
495 | update_debugctlmsr(0); | ||
496 | wrmsrl(MSR_IA32_DS_AREA, ds_next); | ||
497 | } | ||
498 | } | ||
499 | #endif /* CONFIG_X86_DS */ | ||
500 | |||
501 | if (next->debugctlmsr != debugctl) | ||
502 | update_debugctlmsr(next->debugctlmsr); | 483 | update_debugctlmsr(next->debugctlmsr); |
503 | 484 | ||
504 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { | 485 | if (test_tsk_thread_flag(next_p, TIF_DEBUG)) { |
@@ -533,14 +514,6 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
533 | */ | 514 | */ |
534 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); | 515 | memset(tss->io_bitmap, 0xff, prev->io_bitmap_max); |
535 | } | 516 | } |
536 | |||
537 | #ifdef CONFIG_X86_PTRACE_BTS | ||
538 | if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) | ||
539 | ptrace_bts_take_timestamp(prev_p, BTS_TASK_DEPARTS); | ||
540 | |||
541 | if (test_tsk_thread_flag(next_p, TIF_BTS_TRACE_TS)) | ||
542 | ptrace_bts_take_timestamp(next_p, BTS_TASK_ARRIVES); | ||
543 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
544 | } | 517 | } |
545 | 518 | ||
546 | /* | 519 | /* |
@@ -551,8 +524,9 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
551 | * - could test fs/gs bitsliced | 524 | * - could test fs/gs bitsliced |
552 | * | 525 | * |
553 | * Kprobes not supported here. Set the probe on schedule instead. | 526 | * Kprobes not supported here. Set the probe on schedule instead. |
527 | * Function graph tracer not supported too. | ||
554 | */ | 528 | */ |
555 | struct task_struct * | 529 | __notrace_funcgraph struct task_struct * |
556 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 530 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
557 | { | 531 | { |
558 | struct thread_struct *prev = &prev_p->thread; | 532 | struct thread_struct *prev = &prev_p->thread; |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 0a6d8c12e10d..0a5df5f82fb9 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -581,158 +581,91 @@ static int ioperm_get(struct task_struct *target, | |||
581 | } | 581 | } |
582 | 582 | ||
583 | #ifdef CONFIG_X86_PTRACE_BTS | 583 | #ifdef CONFIG_X86_PTRACE_BTS |
584 | /* | ||
585 | * The configuration for a particular BTS hardware implementation. | ||
586 | */ | ||
587 | struct bts_configuration { | ||
588 | /* the size of a BTS record in bytes; at most BTS_MAX_RECORD_SIZE */ | ||
589 | unsigned char sizeof_bts; | ||
590 | /* the size of a field in the BTS record in bytes */ | ||
591 | unsigned char sizeof_field; | ||
592 | /* a bitmask to enable/disable BTS in DEBUGCTL MSR */ | ||
593 | unsigned long debugctl_mask; | ||
594 | }; | ||
595 | static struct bts_configuration bts_cfg; | ||
596 | |||
597 | #define BTS_MAX_RECORD_SIZE (8 * 3) | ||
598 | |||
599 | |||
600 | /* | ||
601 | * Branch Trace Store (BTS) uses the following format. Different | ||
602 | * architectures vary in the size of those fields. | ||
603 | * - source linear address | ||
604 | * - destination linear address | ||
605 | * - flags | ||
606 | * | ||
607 | * Later architectures use 64bit pointers throughout, whereas earlier | ||
608 | * architectures use 32bit pointers in 32bit mode. | ||
609 | * | ||
610 | * We compute the base address for the first 8 fields based on: | ||
611 | * - the field size stored in the DS configuration | ||
612 | * - the relative field position | ||
613 | * | ||
614 | * In order to store additional information in the BTS buffer, we use | ||
615 | * a special source address to indicate that the record requires | ||
616 | * special interpretation. | ||
617 | * | ||
618 | * Netburst indicated via a bit in the flags field whether the branch | ||
619 | * was predicted; this is ignored. | ||
620 | */ | ||
621 | |||
622 | enum bts_field { | ||
623 | bts_from = 0, | ||
624 | bts_to, | ||
625 | bts_flags, | ||
626 | |||
627 | bts_escape = (unsigned long)-1, | ||
628 | bts_qual = bts_to, | ||
629 | bts_jiffies = bts_flags | ||
630 | }; | ||
631 | |||
632 | static inline unsigned long bts_get(const char *base, enum bts_field field) | ||
633 | { | ||
634 | base += (bts_cfg.sizeof_field * field); | ||
635 | return *(unsigned long *)base; | ||
636 | } | ||
637 | |||
638 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | ||
639 | { | ||
640 | base += (bts_cfg.sizeof_field * field);; | ||
641 | (*(unsigned long *)base) = val; | ||
642 | } | ||
643 | |||
644 | /* | ||
645 | * Translate a BTS record from the raw format into the bts_struct format | ||
646 | * | ||
647 | * out (out): bts_struct interpretation | ||
648 | * raw: raw BTS record | ||
649 | */ | ||
650 | static void ptrace_bts_translate_record(struct bts_struct *out, const void *raw) | ||
651 | { | ||
652 | memset(out, 0, sizeof(*out)); | ||
653 | if (bts_get(raw, bts_from) == bts_escape) { | ||
654 | out->qualifier = bts_get(raw, bts_qual); | ||
655 | out->variant.jiffies = bts_get(raw, bts_jiffies); | ||
656 | } else { | ||
657 | out->qualifier = BTS_BRANCH; | ||
658 | out->variant.lbr.from_ip = bts_get(raw, bts_from); | ||
659 | out->variant.lbr.to_ip = bts_get(raw, bts_to); | ||
660 | } | ||
661 | } | ||
662 | |||
663 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | 584 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
664 | struct bts_struct __user *out) | 585 | struct bts_struct __user *out) |
665 | { | 586 | { |
666 | struct bts_struct ret; | 587 | const struct bts_trace *trace; |
667 | const void *bts_record; | 588 | struct bts_struct bts; |
668 | size_t bts_index, bts_end; | 589 | const unsigned char *at; |
669 | int error; | 590 | int error; |
670 | 591 | ||
671 | error = ds_get_bts_end(child, &bts_end); | 592 | trace = ds_read_bts(child->bts); |
672 | if (error < 0) | 593 | if (!trace) |
673 | return error; | 594 | return -EPERM; |
674 | |||
675 | if (bts_end <= index) | ||
676 | return -EINVAL; | ||
677 | 595 | ||
678 | error = ds_get_bts_index(child, &bts_index); | 596 | at = trace->ds.top - ((index + 1) * trace->ds.size); |
679 | if (error < 0) | 597 | if ((void *)at < trace->ds.begin) |
680 | return error; | 598 | at += (trace->ds.n * trace->ds.size); |
681 | 599 | ||
682 | /* translate the ptrace bts index into the ds bts index */ | 600 | if (!trace->read) |
683 | bts_index += bts_end - (index + 1); | 601 | return -EOPNOTSUPP; |
684 | if (bts_end <= bts_index) | ||
685 | bts_index -= bts_end; | ||
686 | 602 | ||
687 | error = ds_access_bts(child, bts_index, &bts_record); | 603 | error = trace->read(child->bts, at, &bts); |
688 | if (error < 0) | 604 | if (error < 0) |
689 | return error; | 605 | return error; |
690 | 606 | ||
691 | ptrace_bts_translate_record(&ret, bts_record); | 607 | if (copy_to_user(out, &bts, sizeof(bts))) |
692 | |||
693 | if (copy_to_user(out, &ret, sizeof(ret))) | ||
694 | return -EFAULT; | 608 | return -EFAULT; |
695 | 609 | ||
696 | return sizeof(ret); | 610 | return sizeof(bts); |
697 | } | 611 | } |
698 | 612 | ||
699 | static int ptrace_bts_drain(struct task_struct *child, | 613 | static int ptrace_bts_drain(struct task_struct *child, |
700 | long size, | 614 | long size, |
701 | struct bts_struct __user *out) | 615 | struct bts_struct __user *out) |
702 | { | 616 | { |
703 | struct bts_struct ret; | 617 | const struct bts_trace *trace; |
704 | const unsigned char *raw; | 618 | const unsigned char *at; |
705 | size_t end, i; | 619 | int error, drained = 0; |
706 | int error; | ||
707 | 620 | ||
708 | error = ds_get_bts_index(child, &end); | 621 | trace = ds_read_bts(child->bts); |
709 | if (error < 0) | 622 | if (!trace) |
710 | return error; | 623 | return -EPERM; |
711 | 624 | ||
712 | if (size < (end * sizeof(struct bts_struct))) | 625 | if (!trace->read) |
626 | return -EOPNOTSUPP; | ||
627 | |||
628 | if (size < (trace->ds.top - trace->ds.begin)) | ||
713 | return -EIO; | 629 | return -EIO; |
714 | 630 | ||
715 | error = ds_access_bts(child, 0, (const void **)&raw); | 631 | for (at = trace->ds.begin; (void *)at < trace->ds.top; |
716 | if (error < 0) | 632 | out++, drained++, at += trace->ds.size) { |
717 | return error; | 633 | struct bts_struct bts; |
634 | int error; | ||
718 | 635 | ||
719 | for (i = 0; i < end; i++, out++, raw += bts_cfg.sizeof_bts) { | 636 | error = trace->read(child->bts, at, &bts); |
720 | ptrace_bts_translate_record(&ret, raw); | 637 | if (error < 0) |
638 | return error; | ||
721 | 639 | ||
722 | if (copy_to_user(out, &ret, sizeof(ret))) | 640 | if (copy_to_user(out, &bts, sizeof(bts))) |
723 | return -EFAULT; | 641 | return -EFAULT; |
724 | } | 642 | } |
725 | 643 | ||
726 | error = ds_clear_bts(child); | 644 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
645 | |||
646 | error = ds_reset_bts(child->bts); | ||
727 | if (error < 0) | 647 | if (error < 0) |
728 | return error; | 648 | return error; |
729 | 649 | ||
730 | return end; | 650 | return drained; |
731 | } | 651 | } |
732 | 652 | ||
733 | static void ptrace_bts_ovfl(struct task_struct *child) | 653 | static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) |
734 | { | 654 | { |
735 | send_sig(child->thread.bts_ovfl_signal, child, 0); | 655 | child->bts_buffer = alloc_locked_buffer(size); |
656 | if (!child->bts_buffer) | ||
657 | return -ENOMEM; | ||
658 | |||
659 | child->bts_size = size; | ||
660 | |||
661 | return 0; | ||
662 | } | ||
663 | |||
664 | static void ptrace_bts_free_buffer(struct task_struct *child) | ||
665 | { | ||
666 | free_locked_buffer(child->bts_buffer, child->bts_size); | ||
667 | child->bts_buffer = NULL; | ||
668 | child->bts_size = 0; | ||
736 | } | 669 | } |
737 | 670 | ||
738 | static int ptrace_bts_config(struct task_struct *child, | 671 | static int ptrace_bts_config(struct task_struct *child, |
@@ -740,114 +673,86 @@ static int ptrace_bts_config(struct task_struct *child, | |||
740 | const struct ptrace_bts_config __user *ucfg) | 673 | const struct ptrace_bts_config __user *ucfg) |
741 | { | 674 | { |
742 | struct ptrace_bts_config cfg; | 675 | struct ptrace_bts_config cfg; |
743 | int error = 0; | 676 | unsigned int flags = 0; |
744 | |||
745 | error = -EOPNOTSUPP; | ||
746 | if (!bts_cfg.sizeof_bts) | ||
747 | goto errout; | ||
748 | 677 | ||
749 | error = -EIO; | ||
750 | if (cfg_size < sizeof(cfg)) | 678 | if (cfg_size < sizeof(cfg)) |
751 | goto errout; | 679 | return -EIO; |
752 | 680 | ||
753 | error = -EFAULT; | ||
754 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 681 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
755 | goto errout; | 682 | return -EFAULT; |
756 | 683 | ||
757 | error = -EINVAL; | 684 | if (child->bts) { |
758 | if ((cfg.flags & PTRACE_BTS_O_SIGNAL) && | 685 | ds_release_bts(child->bts); |
759 | !(cfg.flags & PTRACE_BTS_O_ALLOC)) | 686 | child->bts = NULL; |
760 | goto errout; | 687 | } |
761 | 688 | ||
762 | if (cfg.flags & PTRACE_BTS_O_ALLOC) { | 689 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
763 | ds_ovfl_callback_t ovfl = NULL; | 690 | if (!cfg.signal) |
764 | unsigned int sig = 0; | 691 | return -EINVAL; |
765 | 692 | ||
766 | /* we ignore the error in case we were not tracing child */ | 693 | return -EOPNOTSUPP; |
767 | (void)ds_release_bts(child); | ||
768 | 694 | ||
769 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | 695 | child->thread.bts_ovfl_signal = cfg.signal; |
770 | if (!cfg.signal) | 696 | } |
771 | goto errout; | ||
772 | 697 | ||
773 | sig = cfg.signal; | 698 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && |
774 | ovfl = ptrace_bts_ovfl; | 699 | (cfg.size != child->bts_size)) { |
775 | } | 700 | int error; |
776 | 701 | ||
777 | error = ds_request_bts(child, /* base = */ NULL, cfg.size, ovfl); | 702 | ptrace_bts_free_buffer(child); |
778 | if (error < 0) | ||
779 | goto errout; | ||
780 | 703 | ||
781 | child->thread.bts_ovfl_signal = sig; | 704 | error = ptrace_bts_allocate_buffer(child, cfg.size); |
705 | if (error < 0) | ||
706 | return error; | ||
782 | } | 707 | } |
783 | 708 | ||
784 | error = -EINVAL; | ||
785 | if (!child->thread.ds_ctx && cfg.flags) | ||
786 | goto errout; | ||
787 | |||
788 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 709 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
789 | child->thread.debugctlmsr |= bts_cfg.debugctl_mask; | 710 | flags |= BTS_USER; |
790 | else | ||
791 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
792 | 711 | ||
793 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 712 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
794 | set_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | 713 | flags |= BTS_TIMESTAMPS; |
795 | else | ||
796 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
797 | 714 | ||
798 | error = sizeof(cfg); | 715 | child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, |
716 | /* ovfl = */ NULL, /* th = */ (size_t)-1, | ||
717 | flags); | ||
718 | if (IS_ERR(child->bts)) { | ||
719 | int error = PTR_ERR(child->bts); | ||
799 | 720 | ||
800 | out: | 721 | ptrace_bts_free_buffer(child); |
801 | if (child->thread.debugctlmsr) | 722 | child->bts = NULL; |
802 | set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
803 | else | ||
804 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
805 | 723 | ||
806 | return error; | 724 | return error; |
725 | } | ||
807 | 726 | ||
808 | errout: | 727 | return sizeof(cfg); |
809 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
810 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
811 | goto out; | ||
812 | } | 728 | } |
813 | 729 | ||
814 | static int ptrace_bts_status(struct task_struct *child, | 730 | static int ptrace_bts_status(struct task_struct *child, |
815 | long cfg_size, | 731 | long cfg_size, |
816 | struct ptrace_bts_config __user *ucfg) | 732 | struct ptrace_bts_config __user *ucfg) |
817 | { | 733 | { |
734 | const struct bts_trace *trace; | ||
818 | struct ptrace_bts_config cfg; | 735 | struct ptrace_bts_config cfg; |
819 | size_t end; | ||
820 | const void *base, *max; | ||
821 | int error; | ||
822 | 736 | ||
823 | if (cfg_size < sizeof(cfg)) | 737 | if (cfg_size < sizeof(cfg)) |
824 | return -EIO; | 738 | return -EIO; |
825 | 739 | ||
826 | error = ds_get_bts_end(child, &end); | 740 | trace = ds_read_bts(child->bts); |
827 | if (error < 0) | 741 | if (!trace) |
828 | return error; | 742 | return -EPERM; |
829 | |||
830 | error = ds_access_bts(child, /* index = */ 0, &base); | ||
831 | if (error < 0) | ||
832 | return error; | ||
833 | |||
834 | error = ds_access_bts(child, /* index = */ end, &max); | ||
835 | if (error < 0) | ||
836 | return error; | ||
837 | 743 | ||
838 | memset(&cfg, 0, sizeof(cfg)); | 744 | memset(&cfg, 0, sizeof(cfg)); |
839 | cfg.size = (max - base); | 745 | cfg.size = trace->ds.end - trace->ds.begin; |
840 | cfg.signal = child->thread.bts_ovfl_signal; | 746 | cfg.signal = child->thread.bts_ovfl_signal; |
841 | cfg.bts_size = sizeof(struct bts_struct); | 747 | cfg.bts_size = sizeof(struct bts_struct); |
842 | 748 | ||
843 | if (cfg.signal) | 749 | if (cfg.signal) |
844 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 750 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
845 | 751 | ||
846 | if (test_tsk_thread_flag(child, TIF_DEBUGCTLMSR) && | 752 | if (trace->ds.flags & BTS_USER) |
847 | child->thread.debugctlmsr & bts_cfg.debugctl_mask) | ||
848 | cfg.flags |= PTRACE_BTS_O_TRACE; | 753 | cfg.flags |= PTRACE_BTS_O_TRACE; |
849 | 754 | ||
850 | if (test_tsk_thread_flag(child, TIF_BTS_TRACE_TS)) | 755 | if (trace->ds.flags & BTS_TIMESTAMPS) |
851 | cfg.flags |= PTRACE_BTS_O_SCHED; | 756 | cfg.flags |= PTRACE_BTS_O_SCHED; |
852 | 757 | ||
853 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) | 758 | if (copy_to_user(ucfg, &cfg, sizeof(cfg))) |
@@ -856,110 +761,77 @@ static int ptrace_bts_status(struct task_struct *child, | |||
856 | return sizeof(cfg); | 761 | return sizeof(cfg); |
857 | } | 762 | } |
858 | 763 | ||
859 | static int ptrace_bts_write_record(struct task_struct *child, | 764 | static int ptrace_bts_clear(struct task_struct *child) |
860 | const struct bts_struct *in) | ||
861 | { | 765 | { |
862 | unsigned char bts_record[BTS_MAX_RECORD_SIZE]; | 766 | const struct bts_trace *trace; |
863 | 767 | ||
864 | BUG_ON(BTS_MAX_RECORD_SIZE < bts_cfg.sizeof_bts); | 768 | trace = ds_read_bts(child->bts); |
769 | if (!trace) | ||
770 | return -EPERM; | ||
865 | 771 | ||
866 | memset(bts_record, 0, bts_cfg.sizeof_bts); | 772 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
867 | switch (in->qualifier) { | ||
868 | case BTS_INVALID: | ||
869 | break; | ||
870 | 773 | ||
871 | case BTS_BRANCH: | 774 | return ds_reset_bts(child->bts); |
872 | bts_set(bts_record, bts_from, in->variant.lbr.from_ip); | 775 | } |
873 | bts_set(bts_record, bts_to, in->variant.lbr.to_ip); | ||
874 | break; | ||
875 | 776 | ||
876 | case BTS_TASK_ARRIVES: | 777 | static int ptrace_bts_size(struct task_struct *child) |
877 | case BTS_TASK_DEPARTS: | 778 | { |
878 | bts_set(bts_record, bts_from, bts_escape); | 779 | const struct bts_trace *trace; |
879 | bts_set(bts_record, bts_qual, in->qualifier); | ||
880 | bts_set(bts_record, bts_jiffies, in->variant.jiffies); | ||
881 | break; | ||
882 | 780 | ||
883 | default: | 781 | trace = ds_read_bts(child->bts); |
884 | return -EINVAL; | 782 | if (!trace) |
885 | } | 783 | return -EPERM; |
886 | 784 | ||
887 | /* The writing task will be the switched-to task on a context | 785 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; |
888 | * switch. It needs to write into the switched-from task's BTS | ||
889 | * buffer. */ | ||
890 | return ds_unchecked_write_bts(child, bts_record, bts_cfg.sizeof_bts); | ||
891 | } | 786 | } |
892 | 787 | ||
893 | void ptrace_bts_take_timestamp(struct task_struct *tsk, | 788 | static void ptrace_bts_fork(struct task_struct *tsk) |
894 | enum bts_qualifier qualifier) | ||
895 | { | 789 | { |
896 | struct bts_struct rec = { | 790 | tsk->bts = NULL; |
897 | .qualifier = qualifier, | 791 | tsk->bts_buffer = NULL; |
898 | .variant.jiffies = jiffies_64 | 792 | tsk->bts_size = 0; |
899 | }; | 793 | tsk->thread.bts_ovfl_signal = 0; |
900 | |||
901 | ptrace_bts_write_record(tsk, &rec); | ||
902 | } | 794 | } |
903 | 795 | ||
904 | static const struct bts_configuration bts_cfg_netburst = { | 796 | static void ptrace_bts_untrace(struct task_struct *child) |
905 | .sizeof_bts = sizeof(long) * 3, | 797 | { |
906 | .sizeof_field = sizeof(long), | 798 | if (unlikely(child->bts)) { |
907 | .debugctl_mask = (1<<2)|(1<<3)|(1<<5) | 799 | ds_release_bts(child->bts); |
908 | }; | 800 | child->bts = NULL; |
801 | |||
802 | /* We cannot update total_vm and locked_vm since | ||
803 | child's mm is already gone. But we can reclaim the | ||
804 | memory. */ | ||
805 | kfree(child->bts_buffer); | ||
806 | child->bts_buffer = NULL; | ||
807 | child->bts_size = 0; | ||
808 | } | ||
809 | } | ||
909 | 810 | ||
910 | static const struct bts_configuration bts_cfg_pentium_m = { | 811 | static void ptrace_bts_detach(struct task_struct *child) |
911 | .sizeof_bts = sizeof(long) * 3, | 812 | { |
912 | .sizeof_field = sizeof(long), | 813 | if (unlikely(child->bts)) { |
913 | .debugctl_mask = (1<<6)|(1<<7) | 814 | ds_release_bts(child->bts); |
914 | }; | 815 | child->bts = NULL; |
915 | 816 | ||
916 | static const struct bts_configuration bts_cfg_core2 = { | 817 | ptrace_bts_free_buffer(child); |
917 | .sizeof_bts = 8 * 3, | 818 | } |
918 | .sizeof_field = 8, | 819 | } |
919 | .debugctl_mask = (1<<6)|(1<<7)|(1<<9) | 820 | #else |
920 | }; | 821 | static inline void ptrace_bts_fork(struct task_struct *tsk) {} |
822 | static inline void ptrace_bts_detach(struct task_struct *child) {} | ||
823 | static inline void ptrace_bts_untrace(struct task_struct *child) {} | ||
824 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
921 | 825 | ||
922 | static inline void bts_configure(const struct bts_configuration *cfg) | 826 | void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) |
923 | { | 827 | { |
924 | bts_cfg = *cfg; | 828 | ptrace_bts_fork(child); |
925 | } | 829 | } |
926 | 830 | ||
927 | void __cpuinit ptrace_bts_init_intel(struct cpuinfo_x86 *c) | 831 | void x86_ptrace_untrace(struct task_struct *child) |
928 | { | 832 | { |
929 | switch (c->x86) { | 833 | ptrace_bts_untrace(child); |
930 | case 0x6: | ||
931 | switch (c->x86_model) { | ||
932 | case 0xD: | ||
933 | case 0xE: /* Pentium M */ | ||
934 | bts_configure(&bts_cfg_pentium_m); | ||
935 | break; | ||
936 | case 0xF: /* Core2 */ | ||
937 | case 0x1C: /* Atom */ | ||
938 | bts_configure(&bts_cfg_core2); | ||
939 | break; | ||
940 | default: | ||
941 | /* sorry, don't know about them */ | ||
942 | break; | ||
943 | } | ||
944 | break; | ||
945 | case 0xF: | ||
946 | switch (c->x86_model) { | ||
947 | case 0x0: | ||
948 | case 0x1: | ||
949 | case 0x2: /* Netburst */ | ||
950 | bts_configure(&bts_cfg_netburst); | ||
951 | break; | ||
952 | default: | ||
953 | /* sorry, don't know about them */ | ||
954 | break; | ||
955 | } | ||
956 | break; | ||
957 | default: | ||
958 | /* sorry, don't know about them */ | ||
959 | break; | ||
960 | } | ||
961 | } | 834 | } |
962 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
963 | 835 | ||
964 | /* | 836 | /* |
965 | * Called by kernel/ptrace.c when detaching.. | 837 | * Called by kernel/ptrace.c when detaching.. |
@@ -972,15 +844,7 @@ void ptrace_disable(struct task_struct *child) | |||
972 | #ifdef TIF_SYSCALL_EMU | 844 | #ifdef TIF_SYSCALL_EMU |
973 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 845 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
974 | #endif | 846 | #endif |
975 | #ifdef CONFIG_X86_PTRACE_BTS | 847 | ptrace_bts_detach(child); |
976 | (void)ds_release_bts(child); | ||
977 | |||
978 | child->thread.debugctlmsr &= ~bts_cfg.debugctl_mask; | ||
979 | if (!child->thread.debugctlmsr) | ||
980 | clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); | ||
981 | |||
982 | clear_tsk_thread_flag(child, TIF_BTS_TRACE_TS); | ||
983 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
984 | } | 848 | } |
985 | 849 | ||
986 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 850 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
@@ -1112,7 +976,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1112 | break; | 976 | break; |
1113 | 977 | ||
1114 | case PTRACE_BTS_SIZE: | 978 | case PTRACE_BTS_SIZE: |
1115 | ret = ds_get_bts_index(child, /* pos = */ NULL); | 979 | ret = ptrace_bts_size(child); |
1116 | break; | 980 | break; |
1117 | 981 | ||
1118 | case PTRACE_BTS_GET: | 982 | case PTRACE_BTS_GET: |
@@ -1121,7 +985,7 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) | |||
1121 | break; | 985 | break; |
1122 | 986 | ||
1123 | case PTRACE_BTS_CLEAR: | 987 | case PTRACE_BTS_CLEAR: |
1124 | ret = ds_clear_bts(child); | 988 | ret = ptrace_bts_clear(child); |
1125 | break; | 989 | break; |
1126 | 990 | ||
1127 | case PTRACE_BTS_DRAIN: | 991 | case PTRACE_BTS_DRAIN: |
@@ -1384,6 +1248,14 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, | |||
1384 | 1248 | ||
1385 | case PTRACE_GET_THREAD_AREA: | 1249 | case PTRACE_GET_THREAD_AREA: |
1386 | case PTRACE_SET_THREAD_AREA: | 1250 | case PTRACE_SET_THREAD_AREA: |
1251 | #ifdef CONFIG_X86_PTRACE_BTS | ||
1252 | case PTRACE_BTS_CONFIG: | ||
1253 | case PTRACE_BTS_STATUS: | ||
1254 | case PTRACE_BTS_SIZE: | ||
1255 | case PTRACE_BTS_GET: | ||
1256 | case PTRACE_BTS_CLEAR: | ||
1257 | case PTRACE_BTS_DRAIN: | ||
1258 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
1387 | return arch_ptrace(child, request, addr, data); | 1259 | return arch_ptrace(child, request, addr, data); |
1388 | 1260 | ||
1389 | default: | 1261 | default: |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 67465ed89310..309949e9e1c1 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -168,6 +168,8 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH7_31, | |||
168 | ich_force_enable_hpet); | 168 | ich_force_enable_hpet); |
169 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, | 169 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_1, |
170 | ich_force_enable_hpet); | 170 | ich_force_enable_hpet); |
171 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH8_4, | ||
172 | ich_force_enable_hpet); | ||
171 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, | 173 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ICH9_7, |
172 | ich_force_enable_hpet); | 174 | ich_force_enable_hpet); |
173 | 175 | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index cc5a2545dd41..2b46eb41643b 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -12,6 +12,8 @@ | |||
12 | #include <asm/proto.h> | 12 | #include <asm/proto.h> |
13 | #include <asm/reboot_fixups.h> | 13 | #include <asm/reboot_fixups.h> |
14 | #include <asm/reboot.h> | 14 | #include <asm/reboot.h> |
15 | #include <asm/pci_x86.h> | ||
16 | #include <asm/virtext.h> | ||
15 | 17 | ||
16 | #ifdef CONFIG_X86_32 | 18 | #ifdef CONFIG_X86_32 |
17 | # include <linux/dmi.h> | 19 | # include <linux/dmi.h> |
@@ -21,6 +23,8 @@ | |||
21 | # include <asm/iommu.h> | 23 | # include <asm/iommu.h> |
22 | #endif | 24 | #endif |
23 | 25 | ||
26 | #include <mach_ipi.h> | ||
27 | |||
24 | /* | 28 | /* |
25 | * Power off function, if any | 29 | * Power off function, if any |
26 | */ | 30 | */ |
@@ -36,7 +40,16 @@ int reboot_force; | |||
36 | static int reboot_cpu = -1; | 40 | static int reboot_cpu = -1; |
37 | #endif | 41 | #endif |
38 | 42 | ||
39 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | 43 | /* This is set if we need to go through the 'emergency' path. |
44 | * When machine_emergency_restart() is called, we may be on | ||
45 | * an inconsistent state and won't be able to do a clean cleanup | ||
46 | */ | ||
47 | static int reboot_emergency; | ||
48 | |||
49 | /* This is set by the PCI code if either type 1 or type 2 PCI is detected */ | ||
50 | bool port_cf9_safe = false; | ||
51 | |||
52 | /* reboot=b[ios] | s[mp] | t[riple] | k[bd] | e[fi] [, [w]arm | [c]old] | p[ci] | ||
40 | warm Don't set the cold reboot flag | 53 | warm Don't set the cold reboot flag |
41 | cold Set the cold reboot flag | 54 | cold Set the cold reboot flag |
42 | bios Reboot by jumping through the BIOS (only for X86_32) | 55 | bios Reboot by jumping through the BIOS (only for X86_32) |
@@ -45,6 +58,7 @@ static int reboot_cpu = -1; | |||
45 | kbd Use the keyboard controller. cold reset (default) | 58 | kbd Use the keyboard controller. cold reset (default) |
46 | acpi Use the RESET_REG in the FADT | 59 | acpi Use the RESET_REG in the FADT |
47 | efi Use efi reset_system runtime service | 60 | efi Use efi reset_system runtime service |
61 | pci Use the so-called "PCI reset register", CF9 | ||
48 | force Avoid anything that could hang. | 62 | force Avoid anything that could hang. |
49 | */ | 63 | */ |
50 | static int __init reboot_setup(char *str) | 64 | static int __init reboot_setup(char *str) |
@@ -79,6 +93,7 @@ static int __init reboot_setup(char *str) | |||
79 | case 'k': | 93 | case 'k': |
80 | case 't': | 94 | case 't': |
81 | case 'e': | 95 | case 'e': |
96 | case 'p': | ||
82 | reboot_type = *str; | 97 | reboot_type = *str; |
83 | break; | 98 | break; |
84 | 99 | ||
@@ -360,6 +375,48 @@ static inline void kb_wait(void) | |||
360 | } | 375 | } |
361 | } | 376 | } |
362 | 377 | ||
378 | static void vmxoff_nmi(int cpu, struct die_args *args) | ||
379 | { | ||
380 | cpu_emergency_vmxoff(); | ||
381 | } | ||
382 | |||
383 | /* Use NMIs as IPIs to tell all CPUs to disable virtualization | ||
384 | */ | ||
385 | static void emergency_vmx_disable_all(void) | ||
386 | { | ||
387 | /* Just make sure we won't change CPUs while doing this */ | ||
388 | local_irq_disable(); | ||
389 | |||
390 | /* We need to disable VMX on all CPUs before rebooting, otherwise | ||
391 | * we risk hanging up the machine, because the CPU ignore INIT | ||
392 | * signals when VMX is enabled. | ||
393 | * | ||
394 | * We can't take any locks and we may be on an inconsistent | ||
395 | * state, so we use NMIs as IPIs to tell the other CPUs to disable | ||
396 | * VMX and halt. | ||
397 | * | ||
398 | * For safety, we will avoid running the nmi_shootdown_cpus() | ||
399 | * stuff unnecessarily, but we don't have a way to check | ||
400 | * if other CPUs have VMX enabled. So we will call it only if the | ||
401 | * CPU we are running on has VMX enabled. | ||
402 | * | ||
403 | * We will miss cases where VMX is not enabled on all CPUs. This | ||
404 | * shouldn't do much harm because KVM always enable VMX on all | ||
405 | * CPUs anyway. But we can miss it on the small window where KVM | ||
406 | * is still enabling VMX. | ||
407 | */ | ||
408 | if (cpu_has_vmx() && cpu_vmx_enabled()) { | ||
409 | /* Disable VMX on this CPU. | ||
410 | */ | ||
411 | cpu_vmxoff(); | ||
412 | |||
413 | /* Halt and disable VMX on the other CPUs */ | ||
414 | nmi_shootdown_cpus(vmxoff_nmi); | ||
415 | |||
416 | } | ||
417 | } | ||
418 | |||
419 | |||
363 | void __attribute__((weak)) mach_reboot_fixups(void) | 420 | void __attribute__((weak)) mach_reboot_fixups(void) |
364 | { | 421 | { |
365 | } | 422 | } |
@@ -368,6 +425,9 @@ static void native_machine_emergency_restart(void) | |||
368 | { | 425 | { |
369 | int i; | 426 | int i; |
370 | 427 | ||
428 | if (reboot_emergency) | ||
429 | emergency_vmx_disable_all(); | ||
430 | |||
371 | /* Tell the BIOS if we want cold or warm reboot */ | 431 | /* Tell the BIOS if we want cold or warm reboot */ |
372 | *((unsigned short *)__va(0x472)) = reboot_mode; | 432 | *((unsigned short *)__va(0x472)) = reboot_mode; |
373 | 433 | ||
@@ -404,12 +464,27 @@ static void native_machine_emergency_restart(void) | |||
404 | reboot_type = BOOT_KBD; | 464 | reboot_type = BOOT_KBD; |
405 | break; | 465 | break; |
406 | 466 | ||
407 | |||
408 | case BOOT_EFI: | 467 | case BOOT_EFI: |
409 | if (efi_enabled) | 468 | if (efi_enabled) |
410 | efi.reset_system(reboot_mode ? EFI_RESET_WARM : EFI_RESET_COLD, | 469 | efi.reset_system(reboot_mode ? |
470 | EFI_RESET_WARM : | ||
471 | EFI_RESET_COLD, | ||
411 | EFI_SUCCESS, 0, NULL); | 472 | EFI_SUCCESS, 0, NULL); |
473 | reboot_type = BOOT_KBD; | ||
474 | break; | ||
475 | |||
476 | case BOOT_CF9: | ||
477 | port_cf9_safe = true; | ||
478 | /* fall through */ | ||
412 | 479 | ||
480 | case BOOT_CF9_COND: | ||
481 | if (port_cf9_safe) { | ||
482 | u8 cf9 = inb(0xcf9) & ~6; | ||
483 | outb(cf9|2, 0xcf9); /* Request hard reset */ | ||
484 | udelay(50); | ||
485 | outb(cf9|6, 0xcf9); /* Actually do the reset */ | ||
486 | udelay(50); | ||
487 | } | ||
413 | reboot_type = BOOT_KBD; | 488 | reboot_type = BOOT_KBD; |
414 | break; | 489 | break; |
415 | } | 490 | } |
@@ -426,7 +501,7 @@ void native_machine_shutdown(void) | |||
426 | 501 | ||
427 | #ifdef CONFIG_X86_32 | 502 | #ifdef CONFIG_X86_32 |
428 | /* See if there has been given a command line override */ | 503 | /* See if there has been given a command line override */ |
429 | if ((reboot_cpu != -1) && (reboot_cpu < NR_CPUS) && | 504 | if ((reboot_cpu != -1) && (reboot_cpu < nr_cpu_ids) && |
430 | cpu_online(reboot_cpu)) | 505 | cpu_online(reboot_cpu)) |
431 | reboot_cpu_id = reboot_cpu; | 506 | reboot_cpu_id = reboot_cpu; |
432 | #endif | 507 | #endif |
@@ -436,7 +511,7 @@ void native_machine_shutdown(void) | |||
436 | reboot_cpu_id = smp_processor_id(); | 511 | reboot_cpu_id = smp_processor_id(); |
437 | 512 | ||
438 | /* Make certain I only run on the appropriate processor */ | 513 | /* Make certain I only run on the appropriate processor */ |
439 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(reboot_cpu_id)); | 514 | set_cpus_allowed_ptr(current, cpumask_of(reboot_cpu_id)); |
440 | 515 | ||
441 | /* O.K Now that I'm on the appropriate processor, | 516 | /* O.K Now that I'm on the appropriate processor, |
442 | * stop all of the others. | 517 | * stop all of the others. |
@@ -459,17 +534,28 @@ void native_machine_shutdown(void) | |||
459 | #endif | 534 | #endif |
460 | } | 535 | } |
461 | 536 | ||
537 | static void __machine_emergency_restart(int emergency) | ||
538 | { | ||
539 | reboot_emergency = emergency; | ||
540 | machine_ops.emergency_restart(); | ||
541 | } | ||
542 | |||
462 | static void native_machine_restart(char *__unused) | 543 | static void native_machine_restart(char *__unused) |
463 | { | 544 | { |
464 | printk("machine restart\n"); | 545 | printk("machine restart\n"); |
465 | 546 | ||
466 | if (!reboot_force) | 547 | if (!reboot_force) |
467 | machine_shutdown(); | 548 | machine_shutdown(); |
468 | machine_emergency_restart(); | 549 | __machine_emergency_restart(0); |
469 | } | 550 | } |
470 | 551 | ||
471 | static void native_machine_halt(void) | 552 | static void native_machine_halt(void) |
472 | { | 553 | { |
554 | /* stop other cpus and apics */ | ||
555 | machine_shutdown(); | ||
556 | |||
557 | /* stop this cpu */ | ||
558 | stop_this_cpu(NULL); | ||
473 | } | 559 | } |
474 | 560 | ||
475 | static void native_machine_power_off(void) | 561 | static void native_machine_power_off(void) |
@@ -504,7 +590,7 @@ void machine_shutdown(void) | |||
504 | 590 | ||
505 | void machine_emergency_restart(void) | 591 | void machine_emergency_restart(void) |
506 | { | 592 | { |
507 | machine_ops.emergency_restart(); | 593 | __machine_emergency_restart(1); |
508 | } | 594 | } |
509 | 595 | ||
510 | void machine_restart(char *cmd) | 596 | void machine_restart(char *cmd) |
@@ -523,3 +609,92 @@ void machine_crash_shutdown(struct pt_regs *regs) | |||
523 | machine_ops.crash_shutdown(regs); | 609 | machine_ops.crash_shutdown(regs); |
524 | } | 610 | } |
525 | #endif | 611 | #endif |
612 | |||
613 | |||
614 | #if defined(CONFIG_SMP) | ||
615 | |||
616 | /* This keeps a track of which one is crashing cpu. */ | ||
617 | static int crashing_cpu; | ||
618 | static nmi_shootdown_cb shootdown_callback; | ||
619 | |||
620 | static atomic_t waiting_for_crash_ipi; | ||
621 | |||
622 | static int crash_nmi_callback(struct notifier_block *self, | ||
623 | unsigned long val, void *data) | ||
624 | { | ||
625 | int cpu; | ||
626 | |||
627 | if (val != DIE_NMI_IPI) | ||
628 | return NOTIFY_OK; | ||
629 | |||
630 | cpu = raw_smp_processor_id(); | ||
631 | |||
632 | /* Don't do anything if this handler is invoked on crashing cpu. | ||
633 | * Otherwise, system will completely hang. Crashing cpu can get | ||
634 | * an NMI if system was initially booted with nmi_watchdog parameter. | ||
635 | */ | ||
636 | if (cpu == crashing_cpu) | ||
637 | return NOTIFY_STOP; | ||
638 | local_irq_disable(); | ||
639 | |||
640 | shootdown_callback(cpu, (struct die_args *)data); | ||
641 | |||
642 | atomic_dec(&waiting_for_crash_ipi); | ||
643 | /* Assume hlt works */ | ||
644 | halt(); | ||
645 | for (;;) | ||
646 | cpu_relax(); | ||
647 | |||
648 | return 1; | ||
649 | } | ||
650 | |||
651 | static void smp_send_nmi_allbutself(void) | ||
652 | { | ||
653 | send_IPI_allbutself(NMI_VECTOR); | ||
654 | } | ||
655 | |||
656 | static struct notifier_block crash_nmi_nb = { | ||
657 | .notifier_call = crash_nmi_callback, | ||
658 | }; | ||
659 | |||
660 | /* Halt all other CPUs, calling the specified function on each of them | ||
661 | * | ||
662 | * This function can be used to halt all other CPUs on crash | ||
663 | * or emergency reboot time. The function passed as parameter | ||
664 | * will be called inside a NMI handler on all CPUs. | ||
665 | */ | ||
666 | void nmi_shootdown_cpus(nmi_shootdown_cb callback) | ||
667 | { | ||
668 | unsigned long msecs; | ||
669 | local_irq_disable(); | ||
670 | |||
671 | /* Make a note of crashing cpu. Will be used in NMI callback.*/ | ||
672 | crashing_cpu = safe_smp_processor_id(); | ||
673 | |||
674 | shootdown_callback = callback; | ||
675 | |||
676 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | ||
677 | /* Would it be better to replace the trap vector here? */ | ||
678 | if (register_die_notifier(&crash_nmi_nb)) | ||
679 | return; /* return what? */ | ||
680 | /* Ensure the new callback function is set before sending | ||
681 | * out the NMI | ||
682 | */ | ||
683 | wmb(); | ||
684 | |||
685 | smp_send_nmi_allbutself(); | ||
686 | |||
687 | msecs = 1000; /* Wait at most a second for the other cpus to stop */ | ||
688 | while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { | ||
689 | mdelay(1); | ||
690 | msecs--; | ||
691 | } | ||
692 | |||
693 | /* Leave the nmi callback set */ | ||
694 | } | ||
695 | #else /* !CONFIG_SMP */ | ||
696 | void nmi_shootdown_cpus(nmi_shootdown_cb callback) | ||
697 | { | ||
698 | /* No other CPUs to shoot down */ | ||
699 | } | ||
700 | #endif | ||
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index 6f50664b2ba5..a160f3119725 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -10,15 +10,12 @@ | |||
10 | #include <asm/page.h> | 10 | #include <asm/page.h> |
11 | #include <asm/kexec.h> | 11 | #include <asm/kexec.h> |
12 | #include <asm/processor-flags.h> | 12 | #include <asm/processor-flags.h> |
13 | #include <asm/pgtable.h> | ||
14 | 13 | ||
15 | /* | 14 | /* |
16 | * Must be relocatable PIC code callable as a C function | 15 | * Must be relocatable PIC code callable as a C function |
17 | */ | 16 | */ |
18 | 17 | ||
19 | #define PTR(x) (x << 2) | 18 | #define PTR(x) (x << 2) |
20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | ||
21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | ||
22 | 19 | ||
23 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE | 20 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE |
24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for | 21 | * ~ control_page + PAGE_SIZE are used as data storage and stack for |
@@ -39,7 +36,6 @@ | |||
39 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | 36 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) |
40 | 37 | ||
41 | .text | 38 | .text |
42 | .align PAGE_SIZE | ||
43 | .globl relocate_kernel | 39 | .globl relocate_kernel |
44 | relocate_kernel: | 40 | relocate_kernel: |
45 | /* Save the CPU context, used for jumping back */ | 41 | /* Save the CPU context, used for jumping back */ |
@@ -60,117 +56,6 @@ relocate_kernel: | |||
60 | movl %cr4, %eax | 56 | movl %cr4, %eax |
61 | movl %eax, CR4(%edi) | 57 | movl %eax, CR4(%edi) |
62 | 58 | ||
63 | #ifdef CONFIG_X86_PAE | ||
64 | /* map the control page at its virtual address */ | ||
65 | |||
66 | movl PTR(VA_PGD)(%ebp), %edi | ||
67 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
68 | andl $0xc0000000, %eax | ||
69 | shrl $27, %eax | ||
70 | addl %edi, %eax | ||
71 | |||
72 | movl PTR(PA_PMD_0)(%ebp), %edx | ||
73 | orl $PAE_PGD_ATTR, %edx | ||
74 | movl %edx, (%eax) | ||
75 | |||
76 | movl PTR(VA_PMD_0)(%ebp), %edi | ||
77 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
78 | andl $0x3fe00000, %eax | ||
79 | shrl $18, %eax | ||
80 | addl %edi, %eax | ||
81 | |||
82 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
83 | orl $PAGE_ATTR, %edx | ||
84 | movl %edx, (%eax) | ||
85 | |||
86 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
87 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
88 | andl $0x001ff000, %eax | ||
89 | shrl $9, %eax | ||
90 | addl %edi, %eax | ||
91 | |||
92 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
93 | orl $PAGE_ATTR, %edx | ||
94 | movl %edx, (%eax) | ||
95 | |||
96 | /* identity map the control page at its physical address */ | ||
97 | |||
98 | movl PTR(VA_PGD)(%ebp), %edi | ||
99 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
100 | andl $0xc0000000, %eax | ||
101 | shrl $27, %eax | ||
102 | addl %edi, %eax | ||
103 | |||
104 | movl PTR(PA_PMD_1)(%ebp), %edx | ||
105 | orl $PAE_PGD_ATTR, %edx | ||
106 | movl %edx, (%eax) | ||
107 | |||
108 | movl PTR(VA_PMD_1)(%ebp), %edi | ||
109 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
110 | andl $0x3fe00000, %eax | ||
111 | shrl $18, %eax | ||
112 | addl %edi, %eax | ||
113 | |||
114 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
115 | orl $PAGE_ATTR, %edx | ||
116 | movl %edx, (%eax) | ||
117 | |||
118 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
119 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
120 | andl $0x001ff000, %eax | ||
121 | shrl $9, %eax | ||
122 | addl %edi, %eax | ||
123 | |||
124 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
125 | orl $PAGE_ATTR, %edx | ||
126 | movl %edx, (%eax) | ||
127 | #else | ||
128 | /* map the control page at its virtual address */ | ||
129 | |||
130 | movl PTR(VA_PGD)(%ebp), %edi | ||
131 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
132 | andl $0xffc00000, %eax | ||
133 | shrl $20, %eax | ||
134 | addl %edi, %eax | ||
135 | |||
136 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
137 | orl $PAGE_ATTR, %edx | ||
138 | movl %edx, (%eax) | ||
139 | |||
140 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
141 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
142 | andl $0x003ff000, %eax | ||
143 | shrl $10, %eax | ||
144 | addl %edi, %eax | ||
145 | |||
146 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
147 | orl $PAGE_ATTR, %edx | ||
148 | movl %edx, (%eax) | ||
149 | |||
150 | /* identity map the control page at its physical address */ | ||
151 | |||
152 | movl PTR(VA_PGD)(%ebp), %edi | ||
153 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
154 | andl $0xffc00000, %eax | ||
155 | shrl $20, %eax | ||
156 | addl %edi, %eax | ||
157 | |||
158 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
159 | orl $PAGE_ATTR, %edx | ||
160 | movl %edx, (%eax) | ||
161 | |||
162 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
163 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
164 | andl $0x003ff000, %eax | ||
165 | shrl $10, %eax | ||
166 | addl %edi, %eax | ||
167 | |||
168 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
169 | orl $PAGE_ATTR, %edx | ||
170 | movl %edx, (%eax) | ||
171 | #endif | ||
172 | |||
173 | relocate_new_kernel: | ||
174 | /* read the arguments and say goodbye to the stack */ | 59 | /* read the arguments and say goodbye to the stack */ |
175 | movl 20+4(%esp), %ebx /* page_list */ | 60 | movl 20+4(%esp), %ebx /* page_list */ |
176 | movl 20+8(%esp), %ebp /* list of pages */ | 61 | movl 20+8(%esp), %ebp /* list of pages */ |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bdec76e55594..ae0d8042cf69 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -93,11 +93,13 @@ | |||
93 | #include <asm/desc.h> | 93 | #include <asm/desc.h> |
94 | #include <asm/dma.h> | 94 | #include <asm/dma.h> |
95 | #include <asm/iommu.h> | 95 | #include <asm/iommu.h> |
96 | #include <asm/gart.h> | ||
96 | #include <asm/mmu_context.h> | 97 | #include <asm/mmu_context.h> |
97 | #include <asm/proto.h> | 98 | #include <asm/proto.h> |
98 | 99 | ||
99 | #include <mach_apic.h> | 100 | #include <mach_apic.h> |
100 | #include <asm/paravirt.h> | 101 | #include <asm/paravirt.h> |
102 | #include <asm/hypervisor.h> | ||
101 | 103 | ||
102 | #include <asm/percpu.h> | 104 | #include <asm/percpu.h> |
103 | #include <asm/topology.h> | 105 | #include <asm/topology.h> |
@@ -448,6 +450,7 @@ static void __init reserve_early_setup_data(void) | |||
448 | * @size: Size of the crashkernel memory to reserve. | 450 | * @size: Size of the crashkernel memory to reserve. |
449 | * Returns the base address on success, and -1ULL on failure. | 451 | * Returns the base address on success, and -1ULL on failure. |
450 | */ | 452 | */ |
453 | static | ||
451 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) | 454 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) |
452 | { | 455 | { |
453 | const unsigned long long alignment = 16<<20; /* 16M */ | 456 | const unsigned long long alignment = 16<<20; /* 16M */ |
@@ -583,161 +586,24 @@ static int __init setup_elfcorehdr(char *arg) | |||
583 | early_param("elfcorehdr", setup_elfcorehdr); | 586 | early_param("elfcorehdr", setup_elfcorehdr); |
584 | #endif | 587 | #endif |
585 | 588 | ||
586 | static struct x86_quirks default_x86_quirks __initdata; | 589 | static int __init default_update_genapic(void) |
587 | |||
588 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
589 | |||
590 | /* | ||
591 | * Some BIOSes seem to corrupt the low 64k of memory during events | ||
592 | * like suspend/resume and unplugging an HDMI cable. Reserve all | ||
593 | * remaining free memory in that area and fill it with a distinct | ||
594 | * pattern. | ||
595 | */ | ||
596 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | ||
597 | #define MAX_SCAN_AREAS 8 | ||
598 | |||
599 | static int __read_mostly memory_corruption_check = -1; | ||
600 | |||
601 | static unsigned __read_mostly corruption_check_size = 64*1024; | ||
602 | static unsigned __read_mostly corruption_check_period = 60; /* seconds */ | ||
603 | |||
604 | static struct e820entry scan_areas[MAX_SCAN_AREAS]; | ||
605 | static int num_scan_areas; | ||
606 | |||
607 | |||
608 | static int set_corruption_check(char *arg) | ||
609 | { | ||
610 | char *end; | ||
611 | |||
612 | memory_corruption_check = simple_strtol(arg, &end, 10); | ||
613 | |||
614 | return (*end == 0) ? 0 : -EINVAL; | ||
615 | } | ||
616 | early_param("memory_corruption_check", set_corruption_check); | ||
617 | |||
618 | static int set_corruption_check_period(char *arg) | ||
619 | { | ||
620 | char *end; | ||
621 | |||
622 | corruption_check_period = simple_strtoul(arg, &end, 10); | ||
623 | |||
624 | return (*end == 0) ? 0 : -EINVAL; | ||
625 | } | ||
626 | early_param("memory_corruption_check_period", set_corruption_check_period); | ||
627 | |||
628 | static int set_corruption_check_size(char *arg) | ||
629 | { | 590 | { |
630 | char *end; | 591 | #ifdef CONFIG_X86_SMP |
631 | unsigned size; | 592 | # if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) |
632 | 593 | genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; | |
633 | size = memparse(arg, &end); | 594 | # endif |
634 | |||
635 | if (*end == '\0') | ||
636 | corruption_check_size = size; | ||
637 | |||
638 | return (size == corruption_check_size) ? 0 : -EINVAL; | ||
639 | } | ||
640 | early_param("memory_corruption_check_size", set_corruption_check_size); | ||
641 | |||
642 | |||
643 | static void __init setup_bios_corruption_check(void) | ||
644 | { | ||
645 | u64 addr = PAGE_SIZE; /* assume first page is reserved anyway */ | ||
646 | |||
647 | if (memory_corruption_check == -1) { | ||
648 | memory_corruption_check = | ||
649 | #ifdef CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK | ||
650 | 1 | ||
651 | #else | ||
652 | 0 | ||
653 | #endif | 595 | #endif |
654 | ; | ||
655 | } | ||
656 | |||
657 | if (corruption_check_size == 0) | ||
658 | memory_corruption_check = 0; | ||
659 | |||
660 | if (!memory_corruption_check) | ||
661 | return; | ||
662 | |||
663 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | ||
664 | 596 | ||
665 | while(addr < corruption_check_size && num_scan_areas < MAX_SCAN_AREAS) { | 597 | return 0; |
666 | u64 size; | ||
667 | addr = find_e820_area_size(addr, &size, PAGE_SIZE); | ||
668 | |||
669 | if (addr == 0) | ||
670 | break; | ||
671 | |||
672 | if ((addr + size) > corruption_check_size) | ||
673 | size = corruption_check_size - addr; | ||
674 | |||
675 | if (size == 0) | ||
676 | break; | ||
677 | |||
678 | e820_update_range(addr, size, E820_RAM, E820_RESERVED); | ||
679 | scan_areas[num_scan_areas].addr = addr; | ||
680 | scan_areas[num_scan_areas].size = size; | ||
681 | num_scan_areas++; | ||
682 | |||
683 | /* Assume we've already mapped this early memory */ | ||
684 | memset(__va(addr), 0, size); | ||
685 | |||
686 | addr += size; | ||
687 | } | ||
688 | |||
689 | printk(KERN_INFO "Scanning %d areas for low memory corruption\n", | ||
690 | num_scan_areas); | ||
691 | update_e820(); | ||
692 | } | ||
693 | |||
694 | static struct timer_list periodic_check_timer; | ||
695 | |||
696 | void check_for_bios_corruption(void) | ||
697 | { | ||
698 | int i; | ||
699 | int corruption = 0; | ||
700 | |||
701 | if (!memory_corruption_check) | ||
702 | return; | ||
703 | |||
704 | for(i = 0; i < num_scan_areas; i++) { | ||
705 | unsigned long *addr = __va(scan_areas[i].addr); | ||
706 | unsigned long size = scan_areas[i].size; | ||
707 | |||
708 | for(; size; addr++, size -= sizeof(unsigned long)) { | ||
709 | if (!*addr) | ||
710 | continue; | ||
711 | printk(KERN_ERR "Corrupted low memory at %p (%lx phys) = %08lx\n", | ||
712 | addr, __pa(addr), *addr); | ||
713 | corruption = 1; | ||
714 | *addr = 0; | ||
715 | } | ||
716 | } | ||
717 | |||
718 | WARN(corruption, KERN_ERR "Memory corruption detected in low memory\n"); | ||
719 | } | ||
720 | |||
721 | static void periodic_check_for_corruption(unsigned long data) | ||
722 | { | ||
723 | check_for_bios_corruption(); | ||
724 | mod_timer(&periodic_check_timer, round_jiffies(jiffies + corruption_check_period*HZ)); | ||
725 | } | 598 | } |
726 | 599 | ||
727 | void start_periodic_check_for_corruption(void) | 600 | static struct x86_quirks default_x86_quirks __initdata = { |
728 | { | 601 | .update_genapic = default_update_genapic, |
729 | if (!memory_corruption_check || corruption_check_period == 0) | 602 | }; |
730 | return; | ||
731 | |||
732 | printk(KERN_INFO "Scanning for low memory corruption every %d seconds\n", | ||
733 | corruption_check_period); | ||
734 | 603 | ||
735 | init_timer(&periodic_check_timer); | 604 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; |
736 | periodic_check_timer.function = &periodic_check_for_corruption; | ||
737 | periodic_check_for_corruption(0); | ||
738 | } | ||
739 | #endif | ||
740 | 605 | ||
606 | #ifdef CONFIG_X86_RESERVE_LOW_64K | ||
741 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | 607 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) |
742 | { | 608 | { |
743 | printk(KERN_NOTICE | 609 | printk(KERN_NOTICE |
@@ -749,6 +615,7 @@ static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | |||
749 | 615 | ||
750 | return 0; | 616 | return 0; |
751 | } | 617 | } |
618 | #endif | ||
752 | 619 | ||
753 | /* List of systems that have known low memory corruption BIOS problems */ | 620 | /* List of systems that have known low memory corruption BIOS problems */ |
754 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { | 621 | static struct dmi_system_id __initdata bad_bios_dmi_table[] = { |
@@ -907,6 +774,12 @@ void __init setup_arch(char **cmdline_p) | |||
907 | 774 | ||
908 | dmi_check_system(bad_bios_dmi_table); | 775 | dmi_check_system(bad_bios_dmi_table); |
909 | 776 | ||
777 | /* | ||
778 | * VMware detection requires dmi to be available, so this | ||
779 | * needs to be done after dmi_scan_machine, for the BP. | ||
780 | */ | ||
781 | init_hypervisor(&boot_cpu_data); | ||
782 | |||
910 | #ifdef CONFIG_X86_32 | 783 | #ifdef CONFIG_X86_32 |
911 | probe_roms(); | 784 | probe_roms(); |
912 | #endif | 785 | #endif |
@@ -1080,7 +953,7 @@ void __init setup_arch(char **cmdline_p) | |||
1080 | ioapic_init_mappings(); | 953 | ioapic_init_mappings(); |
1081 | 954 | ||
1082 | /* need to wait for io_apic is mapped */ | 955 | /* need to wait for io_apic is mapped */ |
1083 | nr_irqs = probe_nr_irqs(); | 956 | probe_nr_irqs_gsi(); |
1084 | 957 | ||
1085 | kvm_guest_init(); | 958 | kvm_guest_init(); |
1086 | 959 | ||
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index ae0c0d3bb770..a4b619c33106 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -152,8 +152,11 @@ void __init setup_per_cpu_areas(void) | |||
152 | old_size = PERCPU_ENOUGH_ROOM; | 152 | old_size = PERCPU_ENOUGH_ROOM; |
153 | align = max_t(unsigned long, PAGE_SIZE, align); | 153 | align = max_t(unsigned long, PAGE_SIZE, align); |
154 | size = roundup(old_size, align); | 154 | size = roundup(old_size, align); |
155 | printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n", | 155 | |
156 | size); | 156 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", |
157 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); | ||
158 | |||
159 | pr_info("PERCPU: Allocating %zd bytes of per cpu data\n", size); | ||
157 | 160 | ||
158 | for_each_possible_cpu(cpu) { | 161 | for_each_possible_cpu(cpu) { |
159 | #ifndef CONFIG_NEED_MULTIPLE_NODES | 162 | #ifndef CONFIG_NEED_MULTIPLE_NODES |
@@ -164,28 +167,21 @@ void __init setup_per_cpu_areas(void) | |||
164 | if (!node_online(node) || !NODE_DATA(node)) { | 167 | if (!node_online(node) || !NODE_DATA(node)) { |
165 | ptr = __alloc_bootmem(size, align, | 168 | ptr = __alloc_bootmem(size, align, |
166 | __pa(MAX_DMA_ADDRESS)); | 169 | __pa(MAX_DMA_ADDRESS)); |
167 | printk(KERN_INFO | 170 | pr_info("cpu %d has no node %d or node-local memory\n", |
168 | "cpu %d has no node %d or node-local memory\n", | ||
169 | cpu, node); | 171 | cpu, node); |
170 | if (ptr) | 172 | pr_debug("per cpu data for cpu%d at %016lx\n", |
171 | printk(KERN_DEBUG "per cpu data for cpu%d at %016lx\n", | 173 | cpu, __pa(ptr)); |
172 | cpu, __pa(ptr)); | 174 | } else { |
173 | } | ||
174 | else { | ||
175 | ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, | 175 | ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, |
176 | __pa(MAX_DMA_ADDRESS)); | 176 | __pa(MAX_DMA_ADDRESS)); |
177 | if (ptr) | 177 | pr_debug("per cpu data for cpu%d on node%d at %016lx\n", |
178 | printk(KERN_DEBUG "per cpu data for cpu%d on node%d at %016lx\n", | 178 | cpu, node, __pa(ptr)); |
179 | cpu, node, __pa(ptr)); | ||
180 | } | 179 | } |
181 | #endif | 180 | #endif |
182 | per_cpu_offset(cpu) = ptr - __per_cpu_start; | 181 | per_cpu_offset(cpu) = ptr - __per_cpu_start; |
183 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 182 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
184 | } | 183 | } |
185 | 184 | ||
186 | printk(KERN_DEBUG "NR_CPUS: %d, nr_cpu_ids: %d, nr_node_ids %d\n", | ||
187 | NR_CPUS, nr_cpu_ids, nr_node_ids); | ||
188 | |||
189 | /* Setup percpu data maps */ | 185 | /* Setup percpu data maps */ |
190 | setup_per_cpu_maps(); | 186 | setup_per_cpu_maps(); |
191 | 187 | ||
@@ -282,7 +278,7 @@ static void __cpuinit numa_set_cpumask(int cpu, int enable) | |||
282 | else | 278 | else |
283 | cpu_clear(cpu, *mask); | 279 | cpu_clear(cpu, *mask); |
284 | 280 | ||
285 | cpulist_scnprintf(buf, sizeof(buf), *mask); | 281 | cpulist_scnprintf(buf, sizeof(buf), mask); |
286 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", | 282 | printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n", |
287 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); | 283 | enable? "numa_add_cpu":"numa_remove_cpu", cpu, node, buf); |
288 | } | 284 | } |
@@ -334,25 +330,25 @@ static const cpumask_t cpu_mask_none; | |||
334 | /* | 330 | /* |
335 | * Returns a pointer to the bitmask of CPUs on Node 'node'. | 331 | * Returns a pointer to the bitmask of CPUs on Node 'node'. |
336 | */ | 332 | */ |
337 | const cpumask_t *_node_to_cpumask_ptr(int node) | 333 | const cpumask_t *cpumask_of_node(int node) |
338 | { | 334 | { |
339 | if (node_to_cpumask_map == NULL) { | 335 | if (node_to_cpumask_map == NULL) { |
340 | printk(KERN_WARNING | 336 | printk(KERN_WARNING |
341 | "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", | 337 | "cpumask_of_node(%d): no node_to_cpumask_map!\n", |
342 | node); | 338 | node); |
343 | dump_stack(); | 339 | dump_stack(); |
344 | return (const cpumask_t *)&cpu_online_map; | 340 | return (const cpumask_t *)&cpu_online_map; |
345 | } | 341 | } |
346 | if (node >= nr_node_ids) { | 342 | if (node >= nr_node_ids) { |
347 | printk(KERN_WARNING | 343 | printk(KERN_WARNING |
348 | "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", | 344 | "cpumask_of_node(%d): node > nr_node_ids(%d)\n", |
349 | node, nr_node_ids); | 345 | node, nr_node_ids); |
350 | dump_stack(); | 346 | dump_stack(); |
351 | return &cpu_mask_none; | 347 | return &cpu_mask_none; |
352 | } | 348 | } |
353 | return &node_to_cpumask_map[node]; | 349 | return &node_to_cpumask_map[node]; |
354 | } | 350 | } |
355 | EXPORT_SYMBOL(_node_to_cpumask_ptr); | 351 | EXPORT_SYMBOL(cpumask_of_node); |
356 | 352 | ||
357 | /* | 353 | /* |
358 | * Returns a bitmask of CPUs on Node 'node'. | 354 | * Returns a bitmask of CPUs on Node 'node'. |
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h deleted file mode 100644 index cc673aa55ce4..000000000000 --- a/arch/x86/kernel/sigframe.h +++ /dev/null | |||
@@ -1,42 +0,0 @@ | |||
1 | #ifdef CONFIG_X86_32 | ||
2 | struct sigframe { | ||
3 | char __user *pretcode; | ||
4 | int sig; | ||
5 | struct sigcontext sc; | ||
6 | /* | ||
7 | * fpstate is unused. fpstate is moved/allocated after | ||
8 | * retcode[] below. This movement allows to have the FP state and the | ||
9 | * future state extensions (xsave) stay together. | ||
10 | * And at the same time retaining the unused fpstate, prevents changing | ||
11 | * the offset of extramask[] in the sigframe and thus prevent any | ||
12 | * legacy application accessing/modifying it. | ||
13 | */ | ||
14 | struct _fpstate fpstate_unused; | ||
15 | unsigned long extramask[_NSIG_WORDS-1]; | ||
16 | char retcode[8]; | ||
17 | /* fp state follows here */ | ||
18 | }; | ||
19 | |||
20 | struct rt_sigframe { | ||
21 | char __user *pretcode; | ||
22 | int sig; | ||
23 | struct siginfo __user *pinfo; | ||
24 | void __user *puc; | ||
25 | struct siginfo info; | ||
26 | struct ucontext uc; | ||
27 | char retcode[8]; | ||
28 | /* fp state follows here */ | ||
29 | }; | ||
30 | #else | ||
31 | struct rt_sigframe { | ||
32 | char __user *pretcode; | ||
33 | struct ucontext uc; | ||
34 | struct siginfo info; | ||
35 | /* fp state follows here */ | ||
36 | }; | ||
37 | |||
38 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
39 | sigset_t *set, struct pt_regs *regs); | ||
40 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
41 | sigset_t *set, struct pt_regs *regs); | ||
42 | #endif | ||
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal.c index d6dd057d0f22..89bb7668041d 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal.c | |||
@@ -1,36 +1,41 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 1991, 1992 Linus Torvalds | 2 | * Copyright (C) 1991, 1992 Linus Torvalds |
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
3 | * | 4 | * |
4 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | 5 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson |
5 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | ||
6 | */ | 8 | */ |
7 | #include <linux/list.h> | ||
8 | 9 | ||
9 | #include <linux/personality.h> | 10 | #include <linux/sched.h> |
10 | #include <linux/binfmts.h> | 11 | #include <linux/mm.h> |
11 | #include <linux/suspend.h> | 12 | #include <linux/smp.h> |
12 | #include <linux/kernel.h> | 13 | #include <linux/kernel.h> |
13 | #include <linux/ptrace.h> | ||
14 | #include <linux/signal.h> | 14 | #include <linux/signal.h> |
15 | #include <linux/stddef.h> | ||
16 | #include <linux/unistd.h> | ||
17 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
18 | #include <linux/sched.h> | ||
19 | #include <linux/wait.h> | 16 | #include <linux/wait.h> |
17 | #include <linux/ptrace.h> | ||
20 | #include <linux/tracehook.h> | 18 | #include <linux/tracehook.h> |
21 | #include <linux/elf.h> | 19 | #include <linux/unistd.h> |
22 | #include <linux/smp.h> | 20 | #include <linux/stddef.h> |
23 | #include <linux/mm.h> | 21 | #include <linux/personality.h> |
22 | #include <linux/uaccess.h> | ||
24 | 23 | ||
25 | #include <asm/processor.h> | 24 | #include <asm/processor.h> |
26 | #include <asm/ucontext.h> | 25 | #include <asm/ucontext.h> |
27 | #include <asm/uaccess.h> | ||
28 | #include <asm/i387.h> | 26 | #include <asm/i387.h> |
29 | #include <asm/vdso.h> | 27 | #include <asm/vdso.h> |
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | #include <asm/proto.h> | ||
31 | #include <asm/ia32_unistd.h> | ||
32 | #include <asm/mce.h> | ||
33 | #endif /* CONFIG_X86_64 */ | ||
34 | |||
30 | #include <asm/syscall.h> | 35 | #include <asm/syscall.h> |
31 | #include <asm/syscalls.h> | 36 | #include <asm/syscalls.h> |
32 | 37 | ||
33 | #include "sigframe.h" | 38 | #include <asm/sigframe.h> |
34 | 39 | ||
35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | 40 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) |
36 | 41 | ||
@@ -45,74 +50,6 @@ | |||
45 | # define FIX_EFLAGS __FIX_EFLAGS | 50 | # define FIX_EFLAGS __FIX_EFLAGS |
46 | #endif | 51 | #endif |
47 | 52 | ||
48 | /* | ||
49 | * Atomically swap in the new signal mask, and wait for a signal. | ||
50 | */ | ||
51 | asmlinkage int | ||
52 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
53 | { | ||
54 | mask &= _BLOCKABLE; | ||
55 | spin_lock_irq(¤t->sighand->siglock); | ||
56 | current->saved_sigmask = current->blocked; | ||
57 | siginitset(¤t->blocked, mask); | ||
58 | recalc_sigpending(); | ||
59 | spin_unlock_irq(¤t->sighand->siglock); | ||
60 | |||
61 | current->state = TASK_INTERRUPTIBLE; | ||
62 | schedule(); | ||
63 | set_restore_sigmask(); | ||
64 | |||
65 | return -ERESTARTNOHAND; | ||
66 | } | ||
67 | |||
68 | asmlinkage int | ||
69 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
70 | struct old_sigaction __user *oact) | ||
71 | { | ||
72 | struct k_sigaction new_ka, old_ka; | ||
73 | int ret; | ||
74 | |||
75 | if (act) { | ||
76 | old_sigset_t mask; | ||
77 | |||
78 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
79 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
80 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
81 | return -EFAULT; | ||
82 | |||
83 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
84 | __get_user(mask, &act->sa_mask); | ||
85 | siginitset(&new_ka.sa.sa_mask, mask); | ||
86 | } | ||
87 | |||
88 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
89 | |||
90 | if (!ret && oact) { | ||
91 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
92 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
93 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
94 | return -EFAULT; | ||
95 | |||
96 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
97 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
98 | } | ||
99 | |||
100 | return ret; | ||
101 | } | ||
102 | |||
103 | asmlinkage int sys_sigaltstack(unsigned long bx) | ||
104 | { | ||
105 | /* | ||
106 | * This is needed to make gcc realize it doesn't own the | ||
107 | * "struct pt_regs" | ||
108 | */ | ||
109 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
110 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
111 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
112 | |||
113 | return do_sigaltstack(uss, uoss, regs->sp); | ||
114 | } | ||
115 | |||
116 | #define COPY(x) { \ | 53 | #define COPY(x) { \ |
117 | err |= __get_user(regs->x, &sc->x); \ | 54 | err |= __get_user(regs->x, &sc->x); \ |
118 | } | 55 | } |
@@ -123,7 +60,7 @@ asmlinkage int sys_sigaltstack(unsigned long bx) | |||
123 | regs->seg = tmp; \ | 60 | regs->seg = tmp; \ |
124 | } | 61 | } |
125 | 62 | ||
126 | #define COPY_SEG_STRICT(seg) { \ | 63 | #define COPY_SEG_CPL3(seg) { \ |
127 | unsigned short tmp; \ | 64 | unsigned short tmp; \ |
128 | err |= __get_user(tmp, &sc->seg); \ | 65 | err |= __get_user(tmp, &sc->seg); \ |
129 | regs->seg = tmp | 3; \ | 66 | regs->seg = tmp | 3; \ |
@@ -135,9 +72,6 @@ asmlinkage int sys_sigaltstack(unsigned long bx) | |||
135 | loadsegment(seg, tmp); \ | 72 | loadsegment(seg, tmp); \ |
136 | } | 73 | } |
137 | 74 | ||
138 | /* | ||
139 | * Do a signal return; undo the signal stack. | ||
140 | */ | ||
141 | static int | 75 | static int |
142 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | 76 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, |
143 | unsigned long *pax) | 77 | unsigned long *pax) |
@@ -149,14 +83,36 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
149 | /* Always make any pending restarted system calls return -EINTR */ | 83 | /* Always make any pending restarted system calls return -EINTR */ |
150 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | 84 | current_thread_info()->restart_block.fn = do_no_restart_syscall; |
151 | 85 | ||
86 | #ifdef CONFIG_X86_32 | ||
152 | GET_SEG(gs); | 87 | GET_SEG(gs); |
153 | COPY_SEG(fs); | 88 | COPY_SEG(fs); |
154 | COPY_SEG(es); | 89 | COPY_SEG(es); |
155 | COPY_SEG(ds); | 90 | COPY_SEG(ds); |
91 | #endif /* CONFIG_X86_32 */ | ||
92 | |||
156 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | 93 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); |
157 | COPY(dx); COPY(cx); COPY(ip); | 94 | COPY(dx); COPY(cx); COPY(ip); |
158 | COPY_SEG_STRICT(cs); | 95 | |
159 | COPY_SEG_STRICT(ss); | 96 | #ifdef CONFIG_X86_64 |
97 | COPY(r8); | ||
98 | COPY(r9); | ||
99 | COPY(r10); | ||
100 | COPY(r11); | ||
101 | COPY(r12); | ||
102 | COPY(r13); | ||
103 | COPY(r14); | ||
104 | COPY(r15); | ||
105 | #endif /* CONFIG_X86_64 */ | ||
106 | |||
107 | #ifdef CONFIG_X86_32 | ||
108 | COPY_SEG_CPL3(cs); | ||
109 | COPY_SEG_CPL3(ss); | ||
110 | #else /* !CONFIG_X86_32 */ | ||
111 | /* Kernel saves and restores only the CS segment register on signals, | ||
112 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
113 | * App's signal handler can save/restore other segments if needed. */ | ||
114 | COPY_SEG_CPL3(cs); | ||
115 | #endif /* CONFIG_X86_32 */ | ||
160 | 116 | ||
161 | err |= __get_user(tmpflags, &sc->flags); | 117 | err |= __get_user(tmpflags, &sc->flags); |
162 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | 118 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
@@ -169,102 +125,24 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | |||
169 | return err; | 125 | return err; |
170 | } | 126 | } |
171 | 127 | ||
172 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | ||
173 | { | ||
174 | struct sigframe __user *frame; | ||
175 | struct pt_regs *regs; | ||
176 | unsigned long ax; | ||
177 | sigset_t set; | ||
178 | |||
179 | regs = (struct pt_regs *) &__unused; | ||
180 | frame = (struct sigframe __user *)(regs->sp - 8); | ||
181 | |||
182 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
183 | goto badframe; | ||
184 | if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 | ||
185 | && __copy_from_user(&set.sig[1], &frame->extramask, | ||
186 | sizeof(frame->extramask)))) | ||
187 | goto badframe; | ||
188 | |||
189 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
190 | spin_lock_irq(¤t->sighand->siglock); | ||
191 | current->blocked = set; | ||
192 | recalc_sigpending(); | ||
193 | spin_unlock_irq(¤t->sighand->siglock); | ||
194 | |||
195 | if (restore_sigcontext(regs, &frame->sc, &ax)) | ||
196 | goto badframe; | ||
197 | return ax; | ||
198 | |||
199 | badframe: | ||
200 | if (show_unhandled_signals && printk_ratelimit()) { | ||
201 | printk("%s%s[%d] bad frame in sigreturn frame:" | ||
202 | "%p ip:%lx sp:%lx oeax:%lx", | ||
203 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | ||
204 | current->comm, task_pid_nr(current), frame, regs->ip, | ||
205 | regs->sp, regs->orig_ax); | ||
206 | print_vma_addr(" in ", regs->ip); | ||
207 | printk(KERN_CONT "\n"); | ||
208 | } | ||
209 | |||
210 | force_sig(SIGSEGV, current); | ||
211 | |||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
216 | { | ||
217 | struct rt_sigframe __user *frame; | ||
218 | unsigned long ax; | ||
219 | sigset_t set; | ||
220 | |||
221 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
222 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
223 | goto badframe; | ||
224 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
225 | goto badframe; | ||
226 | |||
227 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
228 | spin_lock_irq(¤t->sighand->siglock); | ||
229 | current->blocked = set; | ||
230 | recalc_sigpending(); | ||
231 | spin_unlock_irq(¤t->sighand->siglock); | ||
232 | |||
233 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
234 | goto badframe; | ||
235 | |||
236 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
237 | goto badframe; | ||
238 | |||
239 | return ax; | ||
240 | |||
241 | badframe: | ||
242 | signal_fault(regs, frame, "rt_sigreturn"); | ||
243 | return 0; | ||
244 | } | ||
245 | |||
246 | asmlinkage int sys_rt_sigreturn(unsigned long __unused) | ||
247 | { | ||
248 | struct pt_regs *regs = (struct pt_regs *)&__unused; | ||
249 | |||
250 | return do_rt_sigreturn(regs); | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * Set up a signal frame. | ||
255 | */ | ||
256 | static int | 128 | static int |
257 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | 129 | setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, |
258 | struct pt_regs *regs, unsigned long mask) | 130 | struct pt_regs *regs, unsigned long mask) |
259 | { | 131 | { |
260 | int tmp, err = 0; | 132 | int err = 0; |
261 | 133 | ||
262 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); | 134 | #ifdef CONFIG_X86_32 |
263 | savesegment(gs, tmp); | 135 | { |
264 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | 136 | unsigned int tmp; |
265 | 137 | ||
138 | savesegment(gs, tmp); | ||
139 | err |= __put_user(tmp, (unsigned int __user *)&sc->gs); | ||
140 | } | ||
141 | err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); | ||
266 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); | 142 | err |= __put_user(regs->es, (unsigned int __user *)&sc->es); |
267 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); | 143 | err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds); |
144 | #endif /* CONFIG_X86_32 */ | ||
145 | |||
268 | err |= __put_user(regs->di, &sc->di); | 146 | err |= __put_user(regs->di, &sc->di); |
269 | err |= __put_user(regs->si, &sc->si); | 147 | err |= __put_user(regs->si, &sc->si); |
270 | err |= __put_user(regs->bp, &sc->bp); | 148 | err |= __put_user(regs->bp, &sc->bp); |
@@ -273,19 +151,33 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
273 | err |= __put_user(regs->dx, &sc->dx); | 151 | err |= __put_user(regs->dx, &sc->dx); |
274 | err |= __put_user(regs->cx, &sc->cx); | 152 | err |= __put_user(regs->cx, &sc->cx); |
275 | err |= __put_user(regs->ax, &sc->ax); | 153 | err |= __put_user(regs->ax, &sc->ax); |
154 | #ifdef CONFIG_X86_64 | ||
155 | err |= __put_user(regs->r8, &sc->r8); | ||
156 | err |= __put_user(regs->r9, &sc->r9); | ||
157 | err |= __put_user(regs->r10, &sc->r10); | ||
158 | err |= __put_user(regs->r11, &sc->r11); | ||
159 | err |= __put_user(regs->r12, &sc->r12); | ||
160 | err |= __put_user(regs->r13, &sc->r13); | ||
161 | err |= __put_user(regs->r14, &sc->r14); | ||
162 | err |= __put_user(regs->r15, &sc->r15); | ||
163 | #endif /* CONFIG_X86_64 */ | ||
164 | |||
276 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 165 | err |= __put_user(current->thread.trap_no, &sc->trapno); |
277 | err |= __put_user(current->thread.error_code, &sc->err); | 166 | err |= __put_user(current->thread.error_code, &sc->err); |
278 | err |= __put_user(regs->ip, &sc->ip); | 167 | err |= __put_user(regs->ip, &sc->ip); |
168 | #ifdef CONFIG_X86_32 | ||
279 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); | 169 | err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); |
280 | err |= __put_user(regs->flags, &sc->flags); | 170 | err |= __put_user(regs->flags, &sc->flags); |
281 | err |= __put_user(regs->sp, &sc->sp_at_signal); | 171 | err |= __put_user(regs->sp, &sc->sp_at_signal); |
282 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); | 172 | err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); |
173 | #else /* !CONFIG_X86_32 */ | ||
174 | err |= __put_user(regs->flags, &sc->flags); | ||
175 | err |= __put_user(regs->cs, &sc->cs); | ||
176 | err |= __put_user(0, &sc->gs); | ||
177 | err |= __put_user(0, &sc->fs); | ||
178 | #endif /* CONFIG_X86_32 */ | ||
283 | 179 | ||
284 | tmp = save_i387_xstate(fpstate); | 180 | err |= __put_user(fpstate, &sc->fpstate); |
285 | if (tmp < 0) | ||
286 | err = 1; | ||
287 | else | ||
288 | err |= __put_user(tmp ? fpstate : NULL, &sc->fpstate); | ||
289 | 181 | ||
290 | /* non-iBCS2 extensions.. */ | 182 | /* non-iBCS2 extensions.. */ |
291 | err |= __put_user(mask, &sc->oldmask); | 183 | err |= __put_user(mask, &sc->oldmask); |
@@ -295,6 +187,32 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, | |||
295 | } | 187 | } |
296 | 188 | ||
297 | /* | 189 | /* |
190 | * Set up a signal frame. | ||
191 | */ | ||
192 | #ifdef CONFIG_X86_32 | ||
193 | static const struct { | ||
194 | u16 poplmovl; | ||
195 | u32 val; | ||
196 | u16 int80; | ||
197 | } __attribute__((packed)) retcode = { | ||
198 | 0xb858, /* popl %eax; movl $..., %eax */ | ||
199 | __NR_sigreturn, | ||
200 | 0x80cd, /* int $0x80 */ | ||
201 | }; | ||
202 | |||
203 | static const struct { | ||
204 | u8 movl; | ||
205 | u32 val; | ||
206 | u16 int80; | ||
207 | u8 pad; | ||
208 | } __attribute__((packed)) rt_retcode = { | ||
209 | 0xb8, /* movl $..., %eax */ | ||
210 | __NR_rt_sigreturn, | ||
211 | 0x80cd, /* int $0x80 */ | ||
212 | 0 | ||
213 | }; | ||
214 | |||
215 | /* | ||
298 | * Determine which stack to use.. | 216 | * Determine which stack to use.. |
299 | */ | 217 | */ |
300 | static inline void __user * | 218 | static inline void __user * |
@@ -328,6 +246,8 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size, | |||
328 | if (used_math()) { | 246 | if (used_math()) { |
329 | sp = sp - sig_xstate_size; | 247 | sp = sp - sig_xstate_size; |
330 | *fpstate = (struct _fpstate *) sp; | 248 | *fpstate = (struct _fpstate *) sp; |
249 | if (save_i387_xstate(*fpstate) < 0) | ||
250 | return (void __user *)-1L; | ||
331 | } | 251 | } |
332 | 252 | ||
333 | sp -= frame_size; | 253 | sp -= frame_size; |
@@ -383,9 +303,7 @@ __setup_frame(int sig, struct k_sigaction *ka, sigset_t *set, | |||
383 | * reasons and because gdb uses it as a signature to notice | 303 | * reasons and because gdb uses it as a signature to notice |
384 | * signal handler stack frames. | 304 | * signal handler stack frames. |
385 | */ | 305 | */ |
386 | err |= __put_user(0xb858, (short __user *)(frame->retcode+0)); | 306 | err |= __put_user(*((u64 *)&retcode), (u64 *)frame->retcode); |
387 | err |= __put_user(__NR_sigreturn, (int __user *)(frame->retcode+2)); | ||
388 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+6)); | ||
389 | 307 | ||
390 | if (err) | 308 | if (err) |
391 | return -EFAULT; | 309 | return -EFAULT; |
@@ -454,9 +372,7 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
454 | * reasons and because gdb uses it as a signature to notice | 372 | * reasons and because gdb uses it as a signature to notice |
455 | * signal handler stack frames. | 373 | * signal handler stack frames. |
456 | */ | 374 | */ |
457 | err |= __put_user(0xb8, (char __user *)(frame->retcode+0)); | 375 | err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); |
458 | err |= __put_user(__NR_rt_sigreturn, (int __user *)(frame->retcode+1)); | ||
459 | err |= __put_user(0x80cd, (short __user *)(frame->retcode+5)); | ||
460 | 376 | ||
461 | if (err) | 377 | if (err) |
462 | return -EFAULT; | 378 | return -EFAULT; |
@@ -475,23 +391,293 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
475 | 391 | ||
476 | return 0; | 392 | return 0; |
477 | } | 393 | } |
394 | #else /* !CONFIG_X86_32 */ | ||
395 | /* | ||
396 | * Determine which stack to use.. | ||
397 | */ | ||
398 | static void __user * | ||
399 | get_stack(struct k_sigaction *ka, unsigned long sp, unsigned long size) | ||
400 | { | ||
401 | /* Default to using normal stack - redzone*/ | ||
402 | sp -= 128; | ||
403 | |||
404 | /* This is the X/Open sanctioned signal stack switching. */ | ||
405 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
406 | if (sas_ss_flags(sp) == 0) | ||
407 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
408 | } | ||
409 | |||
410 | return (void __user *)round_down(sp - size, 64); | ||
411 | } | ||
412 | |||
413 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
414 | sigset_t *set, struct pt_regs *regs) | ||
415 | { | ||
416 | struct rt_sigframe __user *frame; | ||
417 | void __user *fp = NULL; | ||
418 | int err = 0; | ||
419 | struct task_struct *me = current; | ||
420 | |||
421 | if (used_math()) { | ||
422 | fp = get_stack(ka, regs->sp, sig_xstate_size); | ||
423 | frame = (void __user *)round_down( | ||
424 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
425 | |||
426 | if (save_i387_xstate(fp) < 0) | ||
427 | return -EFAULT; | ||
428 | } else | ||
429 | frame = get_stack(ka, regs->sp, sizeof(struct rt_sigframe)) - 8; | ||
430 | |||
431 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
432 | return -EFAULT; | ||
433 | |||
434 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
435 | if (copy_siginfo_to_user(&frame->info, info)) | ||
436 | return -EFAULT; | ||
437 | } | ||
438 | |||
439 | /* Create the ucontext. */ | ||
440 | if (cpu_has_xsave) | ||
441 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
442 | else | ||
443 | err |= __put_user(0, &frame->uc.uc_flags); | ||
444 | err |= __put_user(0, &frame->uc.uc_link); | ||
445 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
446 | err |= __put_user(sas_ss_flags(regs->sp), | ||
447 | &frame->uc.uc_stack.ss_flags); | ||
448 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
449 | err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); | ||
450 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
451 | |||
452 | /* Set up to return from userspace. If provided, use a stub | ||
453 | already in userspace. */ | ||
454 | /* x86-64 should always use SA_RESTORER. */ | ||
455 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
456 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | ||
457 | } else { | ||
458 | /* could use a vstub here */ | ||
459 | return -EFAULT; | ||
460 | } | ||
461 | |||
462 | if (err) | ||
463 | return -EFAULT; | ||
464 | |||
465 | /* Set up registers for signal handler */ | ||
466 | regs->di = sig; | ||
467 | /* In case the signal handler was declared without prototypes */ | ||
468 | regs->ax = 0; | ||
469 | |||
470 | /* This also works for non SA_SIGINFO handlers because they expect the | ||
471 | next argument after the signal number on the stack. */ | ||
472 | regs->si = (unsigned long)&frame->info; | ||
473 | regs->dx = (unsigned long)&frame->uc; | ||
474 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
475 | |||
476 | regs->sp = (unsigned long)frame; | ||
477 | |||
478 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
479 | even if the handler happens to be interrupting 32-bit code. */ | ||
480 | regs->cs = __USER_CS; | ||
481 | |||
482 | return 0; | ||
483 | } | ||
484 | #endif /* CONFIG_X86_32 */ | ||
485 | |||
486 | #ifdef CONFIG_X86_32 | ||
487 | /* | ||
488 | * Atomically swap in the new signal mask, and wait for a signal. | ||
489 | */ | ||
490 | asmlinkage int | ||
491 | sys_sigsuspend(int history0, int history1, old_sigset_t mask) | ||
492 | { | ||
493 | mask &= _BLOCKABLE; | ||
494 | spin_lock_irq(¤t->sighand->siglock); | ||
495 | current->saved_sigmask = current->blocked; | ||
496 | siginitset(¤t->blocked, mask); | ||
497 | recalc_sigpending(); | ||
498 | spin_unlock_irq(¤t->sighand->siglock); | ||
499 | |||
500 | current->state = TASK_INTERRUPTIBLE; | ||
501 | schedule(); | ||
502 | set_restore_sigmask(); | ||
503 | |||
504 | return -ERESTARTNOHAND; | ||
505 | } | ||
506 | |||
507 | asmlinkage int | ||
508 | sys_sigaction(int sig, const struct old_sigaction __user *act, | ||
509 | struct old_sigaction __user *oact) | ||
510 | { | ||
511 | struct k_sigaction new_ka, old_ka; | ||
512 | int ret; | ||
513 | |||
514 | if (act) { | ||
515 | old_sigset_t mask; | ||
516 | |||
517 | if (!access_ok(VERIFY_READ, act, sizeof(*act)) || | ||
518 | __get_user(new_ka.sa.sa_handler, &act->sa_handler) || | ||
519 | __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) | ||
520 | return -EFAULT; | ||
521 | |||
522 | __get_user(new_ka.sa.sa_flags, &act->sa_flags); | ||
523 | __get_user(mask, &act->sa_mask); | ||
524 | siginitset(&new_ka.sa.sa_mask, mask); | ||
525 | } | ||
526 | |||
527 | ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); | ||
528 | |||
529 | if (!ret && oact) { | ||
530 | if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || | ||
531 | __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || | ||
532 | __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) | ||
533 | return -EFAULT; | ||
534 | |||
535 | __put_user(old_ka.sa.sa_flags, &oact->sa_flags); | ||
536 | __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); | ||
537 | } | ||
538 | |||
539 | return ret; | ||
540 | } | ||
541 | #endif /* CONFIG_X86_32 */ | ||
542 | |||
543 | #ifdef CONFIG_X86_32 | ||
544 | asmlinkage int sys_sigaltstack(unsigned long bx) | ||
545 | { | ||
546 | /* | ||
547 | * This is needed to make gcc realize it doesn't own the | ||
548 | * "struct pt_regs" | ||
549 | */ | ||
550 | struct pt_regs *regs = (struct pt_regs *)&bx; | ||
551 | const stack_t __user *uss = (const stack_t __user *)bx; | ||
552 | stack_t __user *uoss = (stack_t __user *)regs->cx; | ||
553 | |||
554 | return do_sigaltstack(uss, uoss, regs->sp); | ||
555 | } | ||
556 | #else /* !CONFIG_X86_32 */ | ||
557 | asmlinkage long | ||
558 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
559 | struct pt_regs *regs) | ||
560 | { | ||
561 | return do_sigaltstack(uss, uoss, regs->sp); | ||
562 | } | ||
563 | #endif /* CONFIG_X86_32 */ | ||
564 | |||
565 | /* | ||
566 | * Do a signal return; undo the signal stack. | ||
567 | */ | ||
568 | #ifdef CONFIG_X86_32 | ||
569 | asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | ||
570 | { | ||
571 | struct sigframe __user *frame; | ||
572 | struct pt_regs *regs; | ||
573 | unsigned long ax; | ||
574 | sigset_t set; | ||
575 | |||
576 | regs = (struct pt_regs *) &__unused; | ||
577 | frame = (struct sigframe __user *)(regs->sp - 8); | ||
578 | |||
579 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
580 | goto badframe; | ||
581 | if (__get_user(set.sig[0], &frame->sc.oldmask) || (_NSIG_WORDS > 1 | ||
582 | && __copy_from_user(&set.sig[1], &frame->extramask, | ||
583 | sizeof(frame->extramask)))) | ||
584 | goto badframe; | ||
585 | |||
586 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
587 | spin_lock_irq(¤t->sighand->siglock); | ||
588 | current->blocked = set; | ||
589 | recalc_sigpending(); | ||
590 | spin_unlock_irq(¤t->sighand->siglock); | ||
591 | |||
592 | if (restore_sigcontext(regs, &frame->sc, &ax)) | ||
593 | goto badframe; | ||
594 | return ax; | ||
595 | |||
596 | badframe: | ||
597 | signal_fault(regs, frame, "sigreturn"); | ||
598 | |||
599 | return 0; | ||
600 | } | ||
601 | #endif /* CONFIG_X86_32 */ | ||
602 | |||
603 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
604 | { | ||
605 | struct rt_sigframe __user *frame; | ||
606 | unsigned long ax; | ||
607 | sigset_t set; | ||
608 | |||
609 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
610 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
611 | goto badframe; | ||
612 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
613 | goto badframe; | ||
614 | |||
615 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
616 | spin_lock_irq(¤t->sighand->siglock); | ||
617 | current->blocked = set; | ||
618 | recalc_sigpending(); | ||
619 | spin_unlock_irq(¤t->sighand->siglock); | ||
620 | |||
621 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
622 | goto badframe; | ||
623 | |||
624 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
625 | goto badframe; | ||
626 | |||
627 | return ax; | ||
628 | |||
629 | badframe: | ||
630 | signal_fault(regs, frame, "rt_sigreturn"); | ||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | #ifdef CONFIG_X86_32 | ||
635 | asmlinkage int sys_rt_sigreturn(struct pt_regs regs) | ||
636 | { | ||
637 | return do_rt_sigreturn(®s); | ||
638 | } | ||
639 | #else /* !CONFIG_X86_32 */ | ||
640 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
641 | { | ||
642 | return do_rt_sigreturn(regs); | ||
643 | } | ||
644 | #endif /* CONFIG_X86_32 */ | ||
478 | 645 | ||
479 | /* | 646 | /* |
480 | * OK, we're invoking a handler: | 647 | * OK, we're invoking a handler: |
481 | */ | 648 | */ |
482 | static int signr_convert(int sig) | 649 | static int signr_convert(int sig) |
483 | { | 650 | { |
651 | #ifdef CONFIG_X86_32 | ||
484 | struct thread_info *info = current_thread_info(); | 652 | struct thread_info *info = current_thread_info(); |
485 | 653 | ||
486 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) | 654 | if (info->exec_domain && info->exec_domain->signal_invmap && sig < 32) |
487 | return info->exec_domain->signal_invmap[sig]; | 655 | return info->exec_domain->signal_invmap[sig]; |
656 | #endif /* CONFIG_X86_32 */ | ||
488 | return sig; | 657 | return sig; |
489 | } | 658 | } |
490 | 659 | ||
660 | #ifdef CONFIG_X86_32 | ||
661 | |||
491 | #define is_ia32 1 | 662 | #define is_ia32 1 |
492 | #define ia32_setup_frame __setup_frame | 663 | #define ia32_setup_frame __setup_frame |
493 | #define ia32_setup_rt_frame __setup_rt_frame | 664 | #define ia32_setup_rt_frame __setup_rt_frame |
494 | 665 | ||
666 | #else /* !CONFIG_X86_32 */ | ||
667 | |||
668 | #ifdef CONFIG_IA32_EMULATION | ||
669 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
670 | #else /* !CONFIG_IA32_EMULATION */ | ||
671 | #define is_ia32 0 | ||
672 | #endif /* CONFIG_IA32_EMULATION */ | ||
673 | |||
674 | int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
675 | sigset_t *set, struct pt_regs *regs); | ||
676 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | ||
677 | sigset_t *set, struct pt_regs *regs); | ||
678 | |||
679 | #endif /* CONFIG_X86_32 */ | ||
680 | |||
495 | static int | 681 | static int |
496 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | 682 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, |
497 | sigset_t *set, struct pt_regs *regs) | 683 | sigset_t *set, struct pt_regs *regs) |
@@ -592,7 +778,13 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | |||
592 | return 0; | 778 | return 0; |
593 | } | 779 | } |
594 | 780 | ||
781 | #ifdef CONFIG_X86_32 | ||
595 | #define NR_restart_syscall __NR_restart_syscall | 782 | #define NR_restart_syscall __NR_restart_syscall |
783 | #else /* !CONFIG_X86_32 */ | ||
784 | #define NR_restart_syscall \ | ||
785 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
786 | #endif /* CONFIG_X86_32 */ | ||
787 | |||
596 | /* | 788 | /* |
597 | * Note that 'init' is a special process: it doesn't get signals it doesn't | 789 | * Note that 'init' is a special process: it doesn't get signals it doesn't |
598 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | 790 | * want to handle. Thus you cannot kill init even with a SIGKILL even by |
@@ -704,8 +896,9 @@ void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | |||
704 | struct task_struct *me = current; | 896 | struct task_struct *me = current; |
705 | 897 | ||
706 | if (show_unhandled_signals && printk_ratelimit()) { | 898 | if (show_unhandled_signals && printk_ratelimit()) { |
707 | printk(KERN_INFO | 899 | printk("%s" |
708 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | 900 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", |
901 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | ||
709 | me->comm, me->pid, where, frame, | 902 | me->comm, me->pid, where, frame, |
710 | regs->ip, regs->sp, regs->orig_ax); | 903 | regs->ip, regs->sp, regs->orig_ax); |
711 | print_vma_addr(" in ", regs->ip); | 904 | print_vma_addr(" in ", regs->ip); |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c deleted file mode 100644 index a5c9627f4db9..000000000000 --- a/arch/x86/kernel/signal_64.c +++ /dev/null | |||
@@ -1,516 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992 Linus Torvalds | ||
3 | * Copyright (C) 2000, 2001, 2002 Andi Kleen SuSE Labs | ||
4 | * | ||
5 | * 1997-11-28 Modified for POSIX.1b signals by Richard Henderson | ||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | ||
7 | * 2000-2002 x86-64 support by Andi Kleen | ||
8 | */ | ||
9 | |||
10 | #include <linux/sched.h> | ||
11 | #include <linux/mm.h> | ||
12 | #include <linux/smp.h> | ||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/signal.h> | ||
15 | #include <linux/errno.h> | ||
16 | #include <linux/wait.h> | ||
17 | #include <linux/ptrace.h> | ||
18 | #include <linux/tracehook.h> | ||
19 | #include <linux/unistd.h> | ||
20 | #include <linux/stddef.h> | ||
21 | #include <linux/personality.h> | ||
22 | #include <linux/compiler.h> | ||
23 | #include <linux/uaccess.h> | ||
24 | |||
25 | #include <asm/processor.h> | ||
26 | #include <asm/ucontext.h> | ||
27 | #include <asm/i387.h> | ||
28 | #include <asm/proto.h> | ||
29 | #include <asm/ia32_unistd.h> | ||
30 | #include <asm/mce.h> | ||
31 | #include <asm/syscall.h> | ||
32 | #include <asm/syscalls.h> | ||
33 | #include "sigframe.h" | ||
34 | |||
35 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | ||
36 | |||
37 | #define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ | ||
38 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ | ||
39 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | ||
40 | X86_EFLAGS_CF) | ||
41 | |||
42 | #ifdef CONFIG_X86_32 | ||
43 | # define FIX_EFLAGS (__FIX_EFLAGS | X86_EFLAGS_RF) | ||
44 | #else | ||
45 | # define FIX_EFLAGS __FIX_EFLAGS | ||
46 | #endif | ||
47 | |||
48 | asmlinkage long | ||
49 | sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | ||
50 | struct pt_regs *regs) | ||
51 | { | ||
52 | return do_sigaltstack(uss, uoss, regs->sp); | ||
53 | } | ||
54 | |||
55 | #define COPY(x) { \ | ||
56 | err |= __get_user(regs->x, &sc->x); \ | ||
57 | } | ||
58 | |||
59 | #define COPY_SEG_STRICT(seg) { \ | ||
60 | unsigned short tmp; \ | ||
61 | err |= __get_user(tmp, &sc->seg); \ | ||
62 | regs->seg = tmp | 3; \ | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * Do a signal return; undo the signal stack. | ||
67 | */ | ||
68 | static int | ||
69 | restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, | ||
70 | unsigned long *pax) | ||
71 | { | ||
72 | void __user *buf; | ||
73 | unsigned int tmpflags; | ||
74 | unsigned int err = 0; | ||
75 | |||
76 | /* Always make any pending restarted system calls return -EINTR */ | ||
77 | current_thread_info()->restart_block.fn = do_no_restart_syscall; | ||
78 | |||
79 | COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); | ||
80 | COPY(dx); COPY(cx); COPY(ip); | ||
81 | COPY(r8); | ||
82 | COPY(r9); | ||
83 | COPY(r10); | ||
84 | COPY(r11); | ||
85 | COPY(r12); | ||
86 | COPY(r13); | ||
87 | COPY(r14); | ||
88 | COPY(r15); | ||
89 | |||
90 | /* Kernel saves and restores only the CS segment register on signals, | ||
91 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
92 | * App's signal handler can save/restore other segments if needed. */ | ||
93 | COPY_SEG_STRICT(cs); | ||
94 | |||
95 | err |= __get_user(tmpflags, &sc->flags); | ||
96 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); | ||
97 | regs->orig_ax = -1; /* disable syscall checks */ | ||
98 | |||
99 | err |= __get_user(buf, &sc->fpstate); | ||
100 | err |= restore_i387_xstate(buf); | ||
101 | |||
102 | err |= __get_user(*pax, &sc->ax); | ||
103 | return err; | ||
104 | } | ||
105 | |||
106 | static long do_rt_sigreturn(struct pt_regs *regs) | ||
107 | { | ||
108 | struct rt_sigframe __user *frame; | ||
109 | unsigned long ax; | ||
110 | sigset_t set; | ||
111 | |||
112 | frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); | ||
113 | if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) | ||
114 | goto badframe; | ||
115 | if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) | ||
116 | goto badframe; | ||
117 | |||
118 | sigdelsetmask(&set, ~_BLOCKABLE); | ||
119 | spin_lock_irq(¤t->sighand->siglock); | ||
120 | current->blocked = set; | ||
121 | recalc_sigpending(); | ||
122 | spin_unlock_irq(¤t->sighand->siglock); | ||
123 | |||
124 | if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax)) | ||
125 | goto badframe; | ||
126 | |||
127 | if (do_sigaltstack(&frame->uc.uc_stack, NULL, regs->sp) == -EFAULT) | ||
128 | goto badframe; | ||
129 | |||
130 | return ax; | ||
131 | |||
132 | badframe: | ||
133 | signal_fault(regs, frame, "rt_sigreturn"); | ||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) | ||
138 | { | ||
139 | return do_rt_sigreturn(regs); | ||
140 | } | ||
141 | |||
142 | /* | ||
143 | * Set up a signal frame. | ||
144 | */ | ||
145 | |||
146 | static inline int | ||
147 | setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, | ||
148 | unsigned long mask, struct task_struct *me) | ||
149 | { | ||
150 | int err = 0; | ||
151 | |||
152 | err |= __put_user(regs->cs, &sc->cs); | ||
153 | err |= __put_user(0, &sc->gs); | ||
154 | err |= __put_user(0, &sc->fs); | ||
155 | |||
156 | err |= __put_user(regs->di, &sc->di); | ||
157 | err |= __put_user(regs->si, &sc->si); | ||
158 | err |= __put_user(regs->bp, &sc->bp); | ||
159 | err |= __put_user(regs->sp, &sc->sp); | ||
160 | err |= __put_user(regs->bx, &sc->bx); | ||
161 | err |= __put_user(regs->dx, &sc->dx); | ||
162 | err |= __put_user(regs->cx, &sc->cx); | ||
163 | err |= __put_user(regs->ax, &sc->ax); | ||
164 | err |= __put_user(regs->r8, &sc->r8); | ||
165 | err |= __put_user(regs->r9, &sc->r9); | ||
166 | err |= __put_user(regs->r10, &sc->r10); | ||
167 | err |= __put_user(regs->r11, &sc->r11); | ||
168 | err |= __put_user(regs->r12, &sc->r12); | ||
169 | err |= __put_user(regs->r13, &sc->r13); | ||
170 | err |= __put_user(regs->r14, &sc->r14); | ||
171 | err |= __put_user(regs->r15, &sc->r15); | ||
172 | err |= __put_user(me->thread.trap_no, &sc->trapno); | ||
173 | err |= __put_user(me->thread.error_code, &sc->err); | ||
174 | err |= __put_user(regs->ip, &sc->ip); | ||
175 | err |= __put_user(regs->flags, &sc->flags); | ||
176 | err |= __put_user(mask, &sc->oldmask); | ||
177 | err |= __put_user(me->thread.cr2, &sc->cr2); | ||
178 | |||
179 | return err; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Determine which stack to use.. | ||
184 | */ | ||
185 | |||
186 | static void __user * | ||
187 | get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size) | ||
188 | { | ||
189 | unsigned long sp; | ||
190 | |||
191 | /* Default to using normal stack - redzone*/ | ||
192 | sp = regs->sp - 128; | ||
193 | |||
194 | /* This is the X/Open sanctioned signal stack switching. */ | ||
195 | if (ka->sa.sa_flags & SA_ONSTACK) { | ||
196 | if (sas_ss_flags(sp) == 0) | ||
197 | sp = current->sas_ss_sp + current->sas_ss_size; | ||
198 | } | ||
199 | |||
200 | return (void __user *)round_down(sp - size, 64); | ||
201 | } | ||
202 | |||
203 | static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
204 | sigset_t *set, struct pt_regs *regs) | ||
205 | { | ||
206 | struct rt_sigframe __user *frame; | ||
207 | void __user *fp = NULL; | ||
208 | int err = 0; | ||
209 | struct task_struct *me = current; | ||
210 | |||
211 | if (used_math()) { | ||
212 | fp = get_stack(ka, regs, sig_xstate_size); | ||
213 | frame = (void __user *)round_down( | ||
214 | (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; | ||
215 | |||
216 | if (save_i387_xstate(fp) < 0) | ||
217 | return -EFAULT; | ||
218 | } else | ||
219 | frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; | ||
220 | |||
221 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | ||
222 | return -EFAULT; | ||
223 | |||
224 | if (ka->sa.sa_flags & SA_SIGINFO) { | ||
225 | if (copy_siginfo_to_user(&frame->info, info)) | ||
226 | return -EFAULT; | ||
227 | } | ||
228 | |||
229 | /* Create the ucontext. */ | ||
230 | if (cpu_has_xsave) | ||
231 | err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); | ||
232 | else | ||
233 | err |= __put_user(0, &frame->uc.uc_flags); | ||
234 | err |= __put_user(0, &frame->uc.uc_link); | ||
235 | err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); | ||
236 | err |= __put_user(sas_ss_flags(regs->sp), | ||
237 | &frame->uc.uc_stack.ss_flags); | ||
238 | err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); | ||
239 | err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, set->sig[0], me); | ||
240 | err |= __put_user(fp, &frame->uc.uc_mcontext.fpstate); | ||
241 | if (sizeof(*set) == 16) { | ||
242 | __put_user(set->sig[0], &frame->uc.uc_sigmask.sig[0]); | ||
243 | __put_user(set->sig[1], &frame->uc.uc_sigmask.sig[1]); | ||
244 | } else | ||
245 | err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); | ||
246 | |||
247 | /* Set up to return from userspace. If provided, use a stub | ||
248 | already in userspace. */ | ||
249 | /* x86-64 should always use SA_RESTORER. */ | ||
250 | if (ka->sa.sa_flags & SA_RESTORER) { | ||
251 | err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); | ||
252 | } else { | ||
253 | /* could use a vstub here */ | ||
254 | return -EFAULT; | ||
255 | } | ||
256 | |||
257 | if (err) | ||
258 | return -EFAULT; | ||
259 | |||
260 | /* Set up registers for signal handler */ | ||
261 | regs->di = sig; | ||
262 | /* In case the signal handler was declared without prototypes */ | ||
263 | regs->ax = 0; | ||
264 | |||
265 | /* This also works for non SA_SIGINFO handlers because they expect the | ||
266 | next argument after the signal number on the stack. */ | ||
267 | regs->si = (unsigned long)&frame->info; | ||
268 | regs->dx = (unsigned long)&frame->uc; | ||
269 | regs->ip = (unsigned long) ka->sa.sa_handler; | ||
270 | |||
271 | regs->sp = (unsigned long)frame; | ||
272 | |||
273 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
274 | even if the handler happens to be interrupting 32-bit code. */ | ||
275 | regs->cs = __USER_CS; | ||
276 | |||
277 | return 0; | ||
278 | } | ||
279 | |||
280 | /* | ||
281 | * OK, we're invoking a handler | ||
282 | */ | ||
283 | static int signr_convert(int sig) | ||
284 | { | ||
285 | return sig; | ||
286 | } | ||
287 | |||
288 | #ifdef CONFIG_IA32_EMULATION | ||
289 | #define is_ia32 test_thread_flag(TIF_IA32) | ||
290 | #else | ||
291 | #define is_ia32 0 | ||
292 | #endif | ||
293 | |||
294 | static int | ||
295 | setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | ||
296 | sigset_t *set, struct pt_regs *regs) | ||
297 | { | ||
298 | int usig = signr_convert(sig); | ||
299 | int ret; | ||
300 | |||
301 | /* Set up the stack frame */ | ||
302 | if (is_ia32) { | ||
303 | if (ka->sa.sa_flags & SA_SIGINFO) | ||
304 | ret = ia32_setup_rt_frame(usig, ka, info, set, regs); | ||
305 | else | ||
306 | ret = ia32_setup_frame(usig, ka, set, regs); | ||
307 | } else | ||
308 | ret = __setup_rt_frame(sig, ka, info, set, regs); | ||
309 | |||
310 | if (ret) { | ||
311 | force_sigsegv(sig, current); | ||
312 | return -EFAULT; | ||
313 | } | ||
314 | |||
315 | return ret; | ||
316 | } | ||
317 | |||
318 | static int | ||
319 | handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka, | ||
320 | sigset_t *oldset, struct pt_regs *regs) | ||
321 | { | ||
322 | int ret; | ||
323 | |||
324 | /* Are we from a system call? */ | ||
325 | if (syscall_get_nr(current, regs) >= 0) { | ||
326 | /* If so, check system call restarting.. */ | ||
327 | switch (syscall_get_error(current, regs)) { | ||
328 | case -ERESTART_RESTARTBLOCK: | ||
329 | case -ERESTARTNOHAND: | ||
330 | regs->ax = -EINTR; | ||
331 | break; | ||
332 | |||
333 | case -ERESTARTSYS: | ||
334 | if (!(ka->sa.sa_flags & SA_RESTART)) { | ||
335 | regs->ax = -EINTR; | ||
336 | break; | ||
337 | } | ||
338 | /* fallthrough */ | ||
339 | case -ERESTARTNOINTR: | ||
340 | regs->ax = regs->orig_ax; | ||
341 | regs->ip -= 2; | ||
342 | break; | ||
343 | } | ||
344 | } | ||
345 | |||
346 | /* | ||
347 | * If TF is set due to a debugger (TIF_FORCED_TF), clear the TF | ||
348 | * flag so that register information in the sigcontext is correct. | ||
349 | */ | ||
350 | if (unlikely(regs->flags & X86_EFLAGS_TF) && | ||
351 | likely(test_and_clear_thread_flag(TIF_FORCED_TF))) | ||
352 | regs->flags &= ~X86_EFLAGS_TF; | ||
353 | |||
354 | ret = setup_rt_frame(sig, ka, info, oldset, regs); | ||
355 | |||
356 | if (ret) | ||
357 | return ret; | ||
358 | |||
359 | #ifdef CONFIG_X86_64 | ||
360 | /* | ||
361 | * This has nothing to do with segment registers, | ||
362 | * despite the name. This magic affects uaccess.h | ||
363 | * macros' behavior. Reset it to the normal setting. | ||
364 | */ | ||
365 | set_fs(USER_DS); | ||
366 | #endif | ||
367 | |||
368 | /* | ||
369 | * Clear the direction flag as per the ABI for function entry. | ||
370 | */ | ||
371 | regs->flags &= ~X86_EFLAGS_DF; | ||
372 | |||
373 | /* | ||
374 | * Clear TF when entering the signal handler, but | ||
375 | * notify any tracer that was single-stepping it. | ||
376 | * The tracer may want to single-step inside the | ||
377 | * handler too. | ||
378 | */ | ||
379 | regs->flags &= ~X86_EFLAGS_TF; | ||
380 | |||
381 | spin_lock_irq(¤t->sighand->siglock); | ||
382 | sigorsets(¤t->blocked, ¤t->blocked, &ka->sa.sa_mask); | ||
383 | if (!(ka->sa.sa_flags & SA_NODEFER)) | ||
384 | sigaddset(¤t->blocked, sig); | ||
385 | recalc_sigpending(); | ||
386 | spin_unlock_irq(¤t->sighand->siglock); | ||
387 | |||
388 | tracehook_signal_handler(sig, info, ka, regs, | ||
389 | test_thread_flag(TIF_SINGLESTEP)); | ||
390 | |||
391 | return 0; | ||
392 | } | ||
393 | |||
394 | #define NR_restart_syscall \ | ||
395 | test_thread_flag(TIF_IA32) ? __NR_ia32_restart_syscall : __NR_restart_syscall | ||
396 | /* | ||
397 | * Note that 'init' is a special process: it doesn't get signals it doesn't | ||
398 | * want to handle. Thus you cannot kill init even with a SIGKILL even by | ||
399 | * mistake. | ||
400 | */ | ||
401 | static void do_signal(struct pt_regs *regs) | ||
402 | { | ||
403 | struct k_sigaction ka; | ||
404 | siginfo_t info; | ||
405 | int signr; | ||
406 | sigset_t *oldset; | ||
407 | |||
408 | /* | ||
409 | * We want the common case to go fast, which is why we may in certain | ||
410 | * cases get here from kernel mode. Just return without doing anything | ||
411 | * if so. | ||
412 | * X86_32: vm86 regs switched out by assembly code before reaching | ||
413 | * here, so testing against kernel CS suffices. | ||
414 | */ | ||
415 | if (!user_mode(regs)) | ||
416 | return; | ||
417 | |||
418 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) | ||
419 | oldset = ¤t->saved_sigmask; | ||
420 | else | ||
421 | oldset = ¤t->blocked; | ||
422 | |||
423 | signr = get_signal_to_deliver(&info, &ka, regs, NULL); | ||
424 | if (signr > 0) { | ||
425 | /* | ||
426 | * Re-enable any watchpoints before delivering the | ||
427 | * signal to user space. The processor register will | ||
428 | * have been cleared if the watchpoint triggered | ||
429 | * inside the kernel. | ||
430 | */ | ||
431 | if (current->thread.debugreg7) | ||
432 | set_debugreg(current->thread.debugreg7, 7); | ||
433 | |||
434 | /* Whee! Actually deliver the signal. */ | ||
435 | if (handle_signal(signr, &info, &ka, oldset, regs) == 0) { | ||
436 | /* | ||
437 | * A signal was successfully delivered; the saved | ||
438 | * sigmask will have been stored in the signal frame, | ||
439 | * and will be restored by sigreturn, so we can simply | ||
440 | * clear the TS_RESTORE_SIGMASK flag. | ||
441 | */ | ||
442 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
443 | } | ||
444 | return; | ||
445 | } | ||
446 | |||
447 | /* Did we come from a system call? */ | ||
448 | if (syscall_get_nr(current, regs) >= 0) { | ||
449 | /* Restart the system call - no handlers present */ | ||
450 | switch (syscall_get_error(current, regs)) { | ||
451 | case -ERESTARTNOHAND: | ||
452 | case -ERESTARTSYS: | ||
453 | case -ERESTARTNOINTR: | ||
454 | regs->ax = regs->orig_ax; | ||
455 | regs->ip -= 2; | ||
456 | break; | ||
457 | |||
458 | case -ERESTART_RESTARTBLOCK: | ||
459 | regs->ax = NR_restart_syscall; | ||
460 | regs->ip -= 2; | ||
461 | break; | ||
462 | } | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * If there's no signal to deliver, we just put the saved sigmask | ||
467 | * back. | ||
468 | */ | ||
469 | if (current_thread_info()->status & TS_RESTORE_SIGMASK) { | ||
470 | current_thread_info()->status &= ~TS_RESTORE_SIGMASK; | ||
471 | sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); | ||
472 | } | ||
473 | } | ||
474 | |||
475 | /* | ||
476 | * notification of userspace execution resumption | ||
477 | * - triggered by the TIF_WORK_MASK flags | ||
478 | */ | ||
479 | void | ||
480 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | ||
481 | { | ||
482 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) | ||
483 | /* notify userspace of pending MCEs */ | ||
484 | if (thread_info_flags & _TIF_MCE_NOTIFY) | ||
485 | mce_notify_user(); | ||
486 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | ||
487 | |||
488 | /* deal with pending signal delivery */ | ||
489 | if (thread_info_flags & _TIF_SIGPENDING) | ||
490 | do_signal(regs); | ||
491 | |||
492 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | ||
493 | clear_thread_flag(TIF_NOTIFY_RESUME); | ||
494 | tracehook_notify_resume(regs); | ||
495 | } | ||
496 | |||
497 | #ifdef CONFIG_X86_32 | ||
498 | clear_thread_flag(TIF_IRET); | ||
499 | #endif /* CONFIG_X86_32 */ | ||
500 | } | ||
501 | |||
502 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | ||
503 | { | ||
504 | struct task_struct *me = current; | ||
505 | |||
506 | if (show_unhandled_signals && printk_ratelimit()) { | ||
507 | printk(KERN_INFO | ||
508 | "%s[%d] bad frame in %s frame:%p ip:%lx sp:%lx orax:%lx", | ||
509 | me->comm, me->pid, where, frame, | ||
510 | regs->ip, regs->sp, regs->orig_ax); | ||
511 | print_vma_addr(" in ", regs->ip); | ||
512 | printk(KERN_CONT "\n"); | ||
513 | } | ||
514 | |||
515 | force_sig(SIGSEGV, me); | ||
516 | } | ||
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18f9b19f5f8f..beea2649a240 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -118,41 +118,28 @@ static void native_smp_send_reschedule(int cpu) | |||
118 | WARN_ON(1); | 118 | WARN_ON(1); |
119 | return; | 119 | return; |
120 | } | 120 | } |
121 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | 121 | send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); |
122 | } | 122 | } |
123 | 123 | ||
124 | void native_send_call_func_single_ipi(int cpu) | 124 | void native_send_call_func_single_ipi(int cpu) |
125 | { | 125 | { |
126 | send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR); | 126 | send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); |
127 | } | 127 | } |
128 | 128 | ||
129 | void native_send_call_func_ipi(cpumask_t mask) | 129 | void native_send_call_func_ipi(const struct cpumask *mask) |
130 | { | 130 | { |
131 | cpumask_t allbutself; | 131 | cpumask_t allbutself; |
132 | 132 | ||
133 | allbutself = cpu_online_map; | 133 | allbutself = cpu_online_map; |
134 | cpu_clear(smp_processor_id(), allbutself); | 134 | cpu_clear(smp_processor_id(), allbutself); |
135 | 135 | ||
136 | if (cpus_equal(mask, allbutself) && | 136 | if (cpus_equal(*mask, allbutself) && |
137 | cpus_equal(cpu_online_map, cpu_callout_map)) | 137 | cpus_equal(cpu_online_map, cpu_callout_map)) |
138 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | 138 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); |
139 | else | 139 | else |
140 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | 140 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); |
141 | } | 141 | } |
142 | 142 | ||
143 | static void stop_this_cpu(void *dummy) | ||
144 | { | ||
145 | local_irq_disable(); | ||
146 | /* | ||
147 | * Remove this CPU: | ||
148 | */ | ||
149 | cpu_clear(smp_processor_id(), cpu_online_map); | ||
150 | disable_local_APIC(); | ||
151 | if (hlt_works(smp_processor_id())) | ||
152 | for (;;) halt(); | ||
153 | for (;;); | ||
154 | } | ||
155 | |||
156 | /* | 143 | /* |
157 | * this function calls the 'stop' function on all other CPUs in the system. | 144 | * this function calls the 'stop' function on all other CPUs in the system. |
158 | */ | 145 | */ |
@@ -178,11 +165,7 @@ static void native_smp_send_stop(void) | |||
178 | void smp_reschedule_interrupt(struct pt_regs *regs) | 165 | void smp_reschedule_interrupt(struct pt_regs *regs) |
179 | { | 166 | { |
180 | ack_APIC_irq(); | 167 | ack_APIC_irq(); |
181 | #ifdef CONFIG_X86_32 | 168 | inc_irq_stat(irq_resched_count); |
182 | __get_cpu_var(irq_stat).irq_resched_count++; | ||
183 | #else | ||
184 | add_pda(irq_resched_count, 1); | ||
185 | #endif | ||
186 | } | 169 | } |
187 | 170 | ||
188 | void smp_call_function_interrupt(struct pt_regs *regs) | 171 | void smp_call_function_interrupt(struct pt_regs *regs) |
@@ -190,11 +173,7 @@ void smp_call_function_interrupt(struct pt_regs *regs) | |||
190 | ack_APIC_irq(); | 173 | ack_APIC_irq(); |
191 | irq_enter(); | 174 | irq_enter(); |
192 | generic_smp_call_function_interrupt(); | 175 | generic_smp_call_function_interrupt(); |
193 | #ifdef CONFIG_X86_32 | 176 | inc_irq_stat(irq_call_count); |
194 | __get_cpu_var(irq_stat).irq_call_count++; | ||
195 | #else | ||
196 | add_pda(irq_call_count, 1); | ||
197 | #endif | ||
198 | irq_exit(); | 177 | irq_exit(); |
199 | } | 178 | } |
200 | 179 | ||
@@ -203,11 +182,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
203 | ack_APIC_irq(); | 182 | ack_APIC_irq(); |
204 | irq_enter(); | 183 | irq_enter(); |
205 | generic_smp_call_function_single_interrupt(); | 184 | generic_smp_call_function_single_interrupt(); |
206 | #ifdef CONFIG_X86_32 | 185 | inc_irq_stat(irq_call_count); |
207 | __get_cpu_var(irq_stat).irq_call_count++; | ||
208 | #else | ||
209 | add_pda(irq_call_count, 1); | ||
210 | #endif | ||
211 | irq_exit(); | 186 | irq_exit(); |
212 | } | 187 | } |
213 | 188 | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f71f96fc9e62..6bd4d9b73870 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -62,6 +62,7 @@ | |||
62 | #include <asm/mtrr.h> | 62 | #include <asm/mtrr.h> |
63 | #include <asm/vmi.h> | 63 | #include <asm/vmi.h> |
64 | #include <asm/genapic.h> | 64 | #include <asm/genapic.h> |
65 | #include <asm/setup.h> | ||
65 | #include <linux/mc146818rtc.h> | 66 | #include <linux/mc146818rtc.h> |
66 | 67 | ||
67 | #include <mach_apic.h> | 68 | #include <mach_apic.h> |
@@ -101,14 +102,8 @@ EXPORT_SYMBOL(smp_num_siblings); | |||
101 | /* Last level cache ID of each logical CPU */ | 102 | /* Last level cache ID of each logical CPU */ |
102 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; | 103 | DEFINE_PER_CPU(u16, cpu_llc_id) = BAD_APICID; |
103 | 104 | ||
104 | /* bitmap of online cpus */ | ||
105 | cpumask_t cpu_online_map __read_mostly; | ||
106 | EXPORT_SYMBOL(cpu_online_map); | ||
107 | |||
108 | cpumask_t cpu_callin_map; | 105 | cpumask_t cpu_callin_map; |
109 | cpumask_t cpu_callout_map; | 106 | cpumask_t cpu_callout_map; |
110 | cpumask_t cpu_possible_map; | ||
111 | EXPORT_SYMBOL(cpu_possible_map); | ||
112 | 107 | ||
113 | /* representing HT siblings of each logical CPU */ | 108 | /* representing HT siblings of each logical CPU */ |
114 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); | 109 | DEFINE_PER_CPU(cpumask_t, cpu_sibling_map); |
@@ -287,7 +282,7 @@ static int __cpuinitdata unsafe_smp; | |||
287 | /* | 282 | /* |
288 | * Activate a secondary processor. | 283 | * Activate a secondary processor. |
289 | */ | 284 | */ |
290 | static void __cpuinit start_secondary(void *unused) | 285 | notrace static void __cpuinit start_secondary(void *unused) |
291 | { | 286 | { |
292 | /* | 287 | /* |
293 | * Don't put *anything* before cpu_init(), SMP booting is too | 288 | * Don't put *anything* before cpu_init(), SMP booting is too |
@@ -501,7 +496,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
501 | } | 496 | } |
502 | 497 | ||
503 | /* maps the cpu to the sched domain representing multi-core */ | 498 | /* maps the cpu to the sched domain representing multi-core */ |
504 | cpumask_t cpu_coregroup_map(int cpu) | 499 | const struct cpumask *cpu_coregroup_mask(int cpu) |
505 | { | 500 | { |
506 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 501 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
507 | /* | 502 | /* |
@@ -509,9 +504,14 @@ cpumask_t cpu_coregroup_map(int cpu) | |||
509 | * And for power savings, we return cpu_core_map | 504 | * And for power savings, we return cpu_core_map |
510 | */ | 505 | */ |
511 | if (sched_mc_power_savings || sched_smt_power_savings) | 506 | if (sched_mc_power_savings || sched_smt_power_savings) |
512 | return per_cpu(cpu_core_map, cpu); | 507 | return &per_cpu(cpu_core_map, cpu); |
513 | else | 508 | else |
514 | return c->llc_shared_map; | 509 | return &c->llc_shared_map; |
510 | } | ||
511 | |||
512 | cpumask_t cpu_coregroup_map(int cpu) | ||
513 | { | ||
514 | return *cpu_coregroup_mask(cpu); | ||
515 | } | 515 | } |
516 | 516 | ||
517 | static void impress_friends(void) | 517 | static void impress_friends(void) |
@@ -534,7 +534,7 @@ static void impress_friends(void) | |||
534 | pr_debug("Before bogocount - setting activated=1.\n"); | 534 | pr_debug("Before bogocount - setting activated=1.\n"); |
535 | } | 535 | } |
536 | 536 | ||
537 | static inline void __inquire_remote_apic(int apicid) | 537 | void __inquire_remote_apic(int apicid) |
538 | { | 538 | { |
539 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 539 | unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
540 | char *names[] = { "ID", "VERSION", "SPIV" }; | 540 | char *names[] = { "ID", "VERSION", "SPIV" }; |
@@ -573,14 +573,13 @@ static inline void __inquire_remote_apic(int apicid) | |||
573 | } | 573 | } |
574 | } | 574 | } |
575 | 575 | ||
576 | #ifdef WAKE_SECONDARY_VIA_NMI | ||
577 | /* | 576 | /* |
578 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal | 577 | * Poke the other CPU in the eye via NMI to wake it up. Remember that the normal |
579 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this | 578 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this |
580 | * won't ... remember to clear down the APIC, etc later. | 579 | * won't ... remember to clear down the APIC, etc later. |
581 | */ | 580 | */ |
582 | static int __devinit | 581 | int __devinit |
583 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | 582 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) |
584 | { | 583 | { |
585 | unsigned long send_status, accept_status = 0; | 584 | unsigned long send_status, accept_status = 0; |
586 | int maxlvt; | 585 | int maxlvt; |
@@ -597,7 +596,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
597 | * Give the other CPU some time to accept the IPI. | 596 | * Give the other CPU some time to accept the IPI. |
598 | */ | 597 | */ |
599 | udelay(200); | 598 | udelay(200); |
600 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | 599 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
601 | maxlvt = lapic_get_maxlvt(); | 600 | maxlvt = lapic_get_maxlvt(); |
602 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ | 601 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
603 | apic_write(APIC_ESR, 0); | 602 | apic_write(APIC_ESR, 0); |
@@ -612,11 +611,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
612 | 611 | ||
613 | return (send_status | accept_status); | 612 | return (send_status | accept_status); |
614 | } | 613 | } |
615 | #endif /* WAKE_SECONDARY_VIA_NMI */ | ||
616 | 614 | ||
617 | #ifdef WAKE_SECONDARY_VIA_INIT | 615 | int __devinit |
618 | static int __devinit | 616 | wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) |
619 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | ||
620 | { | 617 | { |
621 | unsigned long send_status, accept_status = 0; | 618 | unsigned long send_status, accept_status = 0; |
622 | int maxlvt, num_starts, j; | 619 | int maxlvt, num_starts, j; |
@@ -735,7 +732,6 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
735 | 732 | ||
736 | return (send_status | accept_status); | 733 | return (send_status | accept_status); |
737 | } | 734 | } |
738 | #endif /* WAKE_SECONDARY_VIA_INIT */ | ||
739 | 735 | ||
740 | struct create_idle { | 736 | struct create_idle { |
741 | struct work_struct work; | 737 | struct work_struct work; |
@@ -1084,8 +1080,10 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
1084 | #endif | 1080 | #endif |
1085 | 1081 | ||
1086 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 1082 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
1087 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" | 1083 | printk(KERN_WARNING |
1088 | "by the BIOS.\n", hard_smp_processor_id()); | 1084 | "weird, boot CPU (#%d) not listed by the BIOS.\n", |
1085 | hard_smp_processor_id()); | ||
1086 | |||
1089 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); | 1087 | physid_set(hard_smp_processor_id(), phys_cpu_present_map); |
1090 | } | 1088 | } |
1091 | 1089 | ||
@@ -1156,7 +1154,7 @@ static void __init smp_cpu_index_default(void) | |||
1156 | for_each_possible_cpu(i) { | 1154 | for_each_possible_cpu(i) { |
1157 | c = &cpu_data(i); | 1155 | c = &cpu_data(i); |
1158 | /* mark all to hotplug */ | 1156 | /* mark all to hotplug */ |
1159 | c->cpu_index = NR_CPUS; | 1157 | c->cpu_index = nr_cpu_ids; |
1160 | } | 1158 | } |
1161 | } | 1159 | } |
1162 | 1160 | ||
@@ -1261,6 +1259,15 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1261 | check_nmi_watchdog(); | 1259 | check_nmi_watchdog(); |
1262 | } | 1260 | } |
1263 | 1261 | ||
1262 | static int __initdata setup_possible_cpus = -1; | ||
1263 | static int __init _setup_possible_cpus(char *str) | ||
1264 | { | ||
1265 | get_option(&str, &setup_possible_cpus); | ||
1266 | return 0; | ||
1267 | } | ||
1268 | early_param("possible_cpus", _setup_possible_cpus); | ||
1269 | |||
1270 | |||
1264 | /* | 1271 | /* |
1265 | * cpu_possible_map should be static, it cannot change as cpu's | 1272 | * cpu_possible_map should be static, it cannot change as cpu's |
1266 | * are onlined, or offlined. The reason is per-cpu data-structures | 1273 | * are onlined, or offlined. The reason is per-cpu data-structures |
@@ -1273,7 +1280,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1273 | * | 1280 | * |
1274 | * Three ways to find out the number of additional hotplug CPUs: | 1281 | * Three ways to find out the number of additional hotplug CPUs: |
1275 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. | 1282 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. |
1276 | * - The user can overwrite it with additional_cpus=NUM | 1283 | * - The user can overwrite it with possible_cpus=NUM |
1277 | * - Otherwise don't reserve additional CPUs. | 1284 | * - Otherwise don't reserve additional CPUs. |
1278 | * We do this because additional CPUs waste a lot of memory. | 1285 | * We do this because additional CPUs waste a lot of memory. |
1279 | * -AK | 1286 | * -AK |
@@ -1286,9 +1293,19 @@ __init void prefill_possible_map(void) | |||
1286 | if (!num_processors) | 1293 | if (!num_processors) |
1287 | num_processors = 1; | 1294 | num_processors = 1; |
1288 | 1295 | ||
1289 | possible = num_processors + disabled_cpus; | 1296 | if (setup_possible_cpus == -1) |
1290 | if (possible > NR_CPUS) | 1297 | possible = num_processors + disabled_cpus; |
1291 | possible = NR_CPUS; | 1298 | else |
1299 | possible = setup_possible_cpus; | ||
1300 | |||
1301 | total_cpus = max_t(int, possible, num_processors + disabled_cpus); | ||
1302 | |||
1303 | if (possible > CONFIG_NR_CPUS) { | ||
1304 | printk(KERN_WARNING | ||
1305 | "%d Processors exceeds NR_CPUS limit of %d\n", | ||
1306 | possible, CONFIG_NR_CPUS); | ||
1307 | possible = CONFIG_NR_CPUS; | ||
1308 | } | ||
1292 | 1309 | ||
1293 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | 1310 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", |
1294 | possible, max_t(int, possible - num_processors, 0)); | 1311 | possible, max_t(int, possible - num_processors, 0)); |
@@ -1353,7 +1370,7 @@ void cpu_disable_common(void) | |||
1353 | lock_vector_lock(); | 1370 | lock_vector_lock(); |
1354 | remove_cpu_from_maps(cpu); | 1371 | remove_cpu_from_maps(cpu); |
1355 | unlock_vector_lock(); | 1372 | unlock_vector_lock(); |
1356 | fixup_irqs(cpu_online_map); | 1373 | fixup_irqs(); |
1357 | } | 1374 | } |
1358 | 1375 | ||
1359 | int native_cpu_disable(void) | 1376 | int native_cpu_disable(void) |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index a03e7f6d90c3..10786af95545 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/stacktrace.h> | 7 | #include <linux/stacktrace.h> |
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/uaccess.h> | ||
9 | #include <asm/stacktrace.h> | 10 | #include <asm/stacktrace.h> |
10 | 11 | ||
11 | static void save_stack_warning(void *data, char *msg) | 12 | static void save_stack_warning(void *data, char *msg) |
@@ -83,3 +84,66 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | |||
83 | trace->entries[trace->nr_entries++] = ULONG_MAX; | 84 | trace->entries[trace->nr_entries++] = ULONG_MAX; |
84 | } | 85 | } |
85 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); | 86 | EXPORT_SYMBOL_GPL(save_stack_trace_tsk); |
87 | |||
88 | /* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */ | ||
89 | |||
90 | struct stack_frame { | ||
91 | const void __user *next_fp; | ||
92 | unsigned long ret_addr; | ||
93 | }; | ||
94 | |||
95 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
96 | { | ||
97 | int ret; | ||
98 | |||
99 | if (!access_ok(VERIFY_READ, fp, sizeof(*frame))) | ||
100 | return 0; | ||
101 | |||
102 | ret = 1; | ||
103 | pagefault_disable(); | ||
104 | if (__copy_from_user_inatomic(frame, fp, sizeof(*frame))) | ||
105 | ret = 0; | ||
106 | pagefault_enable(); | ||
107 | |||
108 | return ret; | ||
109 | } | ||
110 | |||
111 | static inline void __save_stack_trace_user(struct stack_trace *trace) | ||
112 | { | ||
113 | const struct pt_regs *regs = task_pt_regs(current); | ||
114 | const void __user *fp = (const void __user *)regs->bp; | ||
115 | |||
116 | if (trace->nr_entries < trace->max_entries) | ||
117 | trace->entries[trace->nr_entries++] = regs->ip; | ||
118 | |||
119 | while (trace->nr_entries < trace->max_entries) { | ||
120 | struct stack_frame frame; | ||
121 | |||
122 | frame.next_fp = NULL; | ||
123 | frame.ret_addr = 0; | ||
124 | if (!copy_stack_frame(fp, &frame)) | ||
125 | break; | ||
126 | if ((unsigned long)fp < regs->sp) | ||
127 | break; | ||
128 | if (frame.ret_addr) { | ||
129 | trace->entries[trace->nr_entries++] = | ||
130 | frame.ret_addr; | ||
131 | } | ||
132 | if (fp == frame.next_fp) | ||
133 | break; | ||
134 | fp = frame.next_fp; | ||
135 | } | ||
136 | } | ||
137 | |||
138 | void save_stack_trace_user(struct stack_trace *trace) | ||
139 | { | ||
140 | /* | ||
141 | * Trace user stack if we are not a kernel thread | ||
142 | */ | ||
143 | if (current->mm) { | ||
144 | __save_stack_trace_user(trace); | ||
145 | } | ||
146 | if (trace->nr_entries < trace->max_entries) | ||
147 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
148 | } | ||
149 | |||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 77b400f06ea2..65309e4cb1c0 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc); | |||
75 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 75 | irqreturn_t timer_interrupt(int irq, void *dev_id) |
76 | { | 76 | { |
77 | /* Keep nmi watchdog up to date */ | 77 | /* Keep nmi watchdog up to date */ |
78 | per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; | 78 | inc_irq_stat(irq0_irqs); |
79 | 79 | ||
80 | #ifdef CONFIG_X86_IO_APIC | 80 | #ifdef CONFIG_X86_IO_APIC |
81 | if (timer_ack) { | 81 | if (timer_ack) { |
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index cb19d650c216..891e7a7c4334 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c | |||
@@ -49,9 +49,9 @@ unsigned long profile_pc(struct pt_regs *regs) | |||
49 | } | 49 | } |
50 | EXPORT_SYMBOL(profile_pc); | 50 | EXPORT_SYMBOL(profile_pc); |
51 | 51 | ||
52 | irqreturn_t timer_interrupt(int irq, void *dev_id) | 52 | static irqreturn_t timer_interrupt(int irq, void *dev_id) |
53 | { | 53 | { |
54 | add_pda(irq0_irqs, 1); | 54 | inc_irq_stat(irq0_irqs); |
55 | 55 | ||
56 | global_clock_event->event_handler(global_clock_event); | 56 | global_clock_event->event_handler(global_clock_event); |
57 | 57 | ||
@@ -80,6 +80,8 @@ unsigned long __init calibrate_cpu(void) | |||
80 | break; | 80 | break; |
81 | no_ctr_free = (i == 4); | 81 | no_ctr_free = (i == 4); |
82 | if (no_ctr_free) { | 82 | if (no_ctr_free) { |
83 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
84 | "cpu_khz value may be incorrect.\n"); | ||
83 | i = 3; | 85 | i = 3; |
84 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | 86 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); |
85 | wrmsrl(MSR_K7_EVNTSEL3, 0); | 87 | wrmsrl(MSR_K7_EVNTSEL3, 0); |
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index f4049f3513b6..ce5054642247 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c | |||
@@ -34,9 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock); | |||
34 | */ | 34 | */ |
35 | void leave_mm(int cpu) | 35 | void leave_mm(int cpu) |
36 | { | 36 | { |
37 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) | 37 | BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); |
38 | BUG(); | 38 | cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); |
39 | cpu_clear(cpu, per_cpu(cpu_tlbstate, cpu).active_mm->cpu_vm_mask); | ||
40 | load_cr3(swapper_pg_dir); | 39 | load_cr3(swapper_pg_dir); |
41 | } | 40 | } |
42 | EXPORT_SYMBOL_GPL(leave_mm); | 41 | EXPORT_SYMBOL_GPL(leave_mm); |
@@ -104,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs) | |||
104 | * BUG(); | 103 | * BUG(); |
105 | */ | 104 | */ |
106 | 105 | ||
107 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { | 106 | if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { |
108 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { | 107 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { |
109 | if (flush_va == TLB_FLUSH_ALL) | 108 | if (flush_va == TLB_FLUSH_ALL) |
110 | local_flush_tlb(); | 109 | local_flush_tlb(); |
111 | else | 110 | else |
@@ -119,7 +118,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) | |||
119 | smp_mb__after_clear_bit(); | 118 | smp_mb__after_clear_bit(); |
120 | out: | 119 | out: |
121 | put_cpu_no_resched(); | 120 | put_cpu_no_resched(); |
122 | __get_cpu_var(irq_stat).irq_tlb_count++; | 121 | inc_irq_stat(irq_tlb_count); |
123 | } | 122 | } |
124 | 123 | ||
125 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | 124 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
@@ -164,7 +163,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | |||
164 | * We have to send the IPI only to | 163 | * We have to send the IPI only to |
165 | * CPUs affected. | 164 | * CPUs affected. |
166 | */ | 165 | */ |
167 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR); | 166 | send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); |
168 | 167 | ||
169 | while (!cpus_empty(flush_cpumask)) | 168 | while (!cpus_empty(flush_cpumask)) |
170 | /* nothing. lockup detection does not belong here */ | 169 | /* nothing. lockup detection does not belong here */ |
@@ -238,7 +237,7 @@ static void do_flush_tlb_all(void *info) | |||
238 | unsigned long cpu = smp_processor_id(); | 237 | unsigned long cpu = smp_processor_id(); |
239 | 238 | ||
240 | __flush_tlb_all(); | 239 | __flush_tlb_all(); |
241 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_LAZY) | 240 | if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) |
242 | leave_mm(cpu); | 241 | leave_mm(cpu); |
243 | } | 242 | } |
244 | 243 | ||
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 8f919ca69494..f8be6f1d2e48 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c | |||
@@ -154,7 +154,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) | |||
154 | out: | 154 | out: |
155 | ack_APIC_irq(); | 155 | ack_APIC_irq(); |
156 | cpu_clear(cpu, f->flush_cpumask); | 156 | cpu_clear(cpu, f->flush_cpumask); |
157 | add_pda(irq_tlb_count, 1); | 157 | inc_irq_stat(irq_tlb_count); |
158 | } | 158 | } |
159 | 159 | ||
160 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | 160 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
@@ -191,7 +191,7 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, | |||
191 | * We have to send the IPI only to | 191 | * We have to send the IPI only to |
192 | * CPUs affected. | 192 | * CPUs affected. |
193 | */ | 193 | */ |
194 | send_IPI_mask(cpumask, INVALIDATE_TLB_VECTOR_START + sender); | 194 | send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); |
195 | 195 | ||
196 | while (!cpus_empty(f->flush_cpumask)) | 196 | while (!cpus_empty(f->flush_cpumask)) |
197 | cpu_relax(); | 197 | cpu_relax(); |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 04431f34fd16..f885023167e0 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -566,14 +566,10 @@ static int __init uv_ptc_init(void) | |||
566 | if (!is_uv_system()) | 566 | if (!is_uv_system()) |
567 | return 0; | 567 | return 0; |
568 | 568 | ||
569 | if (!proc_mkdir("sgi_uv", NULL)) | ||
570 | return -EINVAL; | ||
571 | |||
572 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); | 569 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); |
573 | if (!proc_uv_ptc) { | 570 | if (!proc_uv_ptc) { |
574 | printk(KERN_ERR "unable to create %s proc entry\n", | 571 | printk(KERN_ERR "unable to create %s proc entry\n", |
575 | UV_PTC_BASENAME); | 572 | UV_PTC_BASENAME); |
576 | remove_proc_entry("sgi_uv", NULL); | ||
577 | return -EINVAL; | 573 | return -EINVAL; |
578 | } | 574 | } |
579 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; | 575 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; |
@@ -586,7 +582,6 @@ static int __init uv_ptc_init(void) | |||
586 | static struct bau_control * __init uv_table_bases_init(int blade, int node) | 582 | static struct bau_control * __init uv_table_bases_init(int blade, int node) |
587 | { | 583 | { |
588 | int i; | 584 | int i; |
589 | int *ip; | ||
590 | struct bau_msg_status *msp; | 585 | struct bau_msg_status *msp; |
591 | struct bau_control *bau_tabp; | 586 | struct bau_control *bau_tabp; |
592 | 587 | ||
@@ -603,13 +598,6 @@ static struct bau_control * __init uv_table_bases_init(int blade, int node) | |||
603 | bau_cpubits_clear(&msp->seen_by, (int) | 598 | bau_cpubits_clear(&msp->seen_by, (int) |
604 | uv_blade_nr_possible_cpus(blade)); | 599 | uv_blade_nr_possible_cpus(blade)); |
605 | 600 | ||
606 | bau_tabp->watching = | ||
607 | kmalloc_node(sizeof(int) * DEST_NUM_RESOURCES, GFP_KERNEL, node); | ||
608 | BUG_ON(!bau_tabp->watching); | ||
609 | |||
610 | for (i = 0, ip = bau_tabp->watching; i < DEST_Q_SIZE; i++, ip++) | ||
611 | *ip = 0; | ||
612 | |||
613 | uv_bau_table_bases[blade] = bau_tabp; | 601 | uv_bau_table_bases[blade] = bau_tabp; |
614 | 602 | ||
615 | return bau_tabp; | 603 | return bau_tabp; |
@@ -632,7 +620,6 @@ uv_table_bases_finish(int blade, int node, int cur_cpu, | |||
632 | bcp->bau_msg_head = bau_tablesp->va_queue_first; | 620 | bcp->bau_msg_head = bau_tablesp->va_queue_first; |
633 | bcp->va_queue_first = bau_tablesp->va_queue_first; | 621 | bcp->va_queue_first = bau_tablesp->va_queue_first; |
634 | bcp->va_queue_last = bau_tablesp->va_queue_last; | 622 | bcp->va_queue_last = bau_tablesp->va_queue_last; |
635 | bcp->watching = bau_tablesp->watching; | ||
636 | bcp->msg_statuses = bau_tablesp->msg_statuses; | 623 | bcp->msg_statuses = bau_tablesp->msg_statuses; |
637 | bcp->descriptor_base = adp; | 624 | bcp->descriptor_base = adp; |
638 | } | 625 | } |
diff --git a/arch/x86/kernel/trampoline.c b/arch/x86/kernel/trampoline.c index 1106fac6024d..808031a5ba19 100644 --- a/arch/x86/kernel/trampoline.c +++ b/arch/x86/kernel/trampoline.c | |||
@@ -1,10 +1,26 @@ | |||
1 | #include <linux/io.h> | 1 | #include <linux/io.h> |
2 | 2 | ||
3 | #include <asm/trampoline.h> | 3 | #include <asm/trampoline.h> |
4 | #include <asm/e820.h> | ||
4 | 5 | ||
5 | /* ready for x86_64 and x86 */ | 6 | /* ready for x86_64 and x86 */ |
6 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | 7 | unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); |
7 | 8 | ||
9 | void __init reserve_trampoline_memory(void) | ||
10 | { | ||
11 | #ifdef CONFIG_X86_32 | ||
12 | /* | ||
13 | * But first pinch a few for the stack/trampoline stuff | ||
14 | * FIXME: Don't need the extra page at 4K, but need to fix | ||
15 | * trampoline before removing it. (see the GDT stuff) | ||
16 | */ | ||
17 | reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); | ||
18 | #endif | ||
19 | /* Has to be in very low memory so we can execute real-mode AP code. */ | ||
20 | reserve_early(TRAMPOLINE_BASE, TRAMPOLINE_BASE + TRAMPOLINE_SIZE, | ||
21 | "TRAMPOLINE"); | ||
22 | } | ||
23 | |||
8 | /* | 24 | /* |
9 | * Currently trivial. Write the real->protected mode | 25 | * Currently trivial. Write the real->protected mode |
10 | * bootstrap into the page concerned. The caller | 26 | * bootstrap into the page concerned. The caller |
@@ -12,7 +28,6 @@ unsigned char *trampoline_base = __va(TRAMPOLINE_BASE); | |||
12 | */ | 28 | */ |
13 | unsigned long setup_trampoline(void) | 29 | unsigned long setup_trampoline(void) |
14 | { | 30 | { |
15 | memcpy(trampoline_base, trampoline_data, | 31 | memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE); |
16 | trampoline_end - trampoline_data); | ||
17 | return virt_to_phys(trampoline_base); | 32 | return virt_to_phys(trampoline_base); |
18 | } | 33 | } |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 04d242ab0161..c9a666cdd3db 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/ptrace.h> | 21 | #include <linux/ptrace.h> |
22 | #include <linux/string.h> | 22 | #include <linux/string.h> |
23 | #include <linux/unwind.h> | ||
24 | #include <linux/delay.h> | 23 | #include <linux/delay.h> |
25 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
26 | #include <linux/kexec.h> | 25 | #include <linux/kexec.h> |
@@ -51,7 +50,6 @@ | |||
51 | #include <asm/debugreg.h> | 50 | #include <asm/debugreg.h> |
52 | #include <asm/atomic.h> | 51 | #include <asm/atomic.h> |
53 | #include <asm/system.h> | 52 | #include <asm/system.h> |
54 | #include <asm/unwind.h> | ||
55 | #include <asm/traps.h> | 53 | #include <asm/traps.h> |
56 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
57 | #include <asm/i387.h> | 55 | #include <asm/i387.h> |
@@ -72,9 +70,6 @@ | |||
72 | 70 | ||
73 | #include "cpu/mcheck/mce.h" | 71 | #include "cpu/mcheck/mce.h" |
74 | 72 | ||
75 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | ||
76 | EXPORT_SYMBOL_GPL(used_vectors); | ||
77 | |||
78 | asmlinkage int system_call(void); | 73 | asmlinkage int system_call(void); |
79 | 74 | ||
80 | /* Do we ignore FPU interrupts ? */ | 75 | /* Do we ignore FPU interrupts ? */ |
@@ -89,6 +84,9 @@ gate_desc idt_table[256] | |||
89 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 84 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
90 | #endif | 85 | #endif |
91 | 86 | ||
87 | DECLARE_BITMAP(used_vectors, NR_VECTORS); | ||
88 | EXPORT_SYMBOL_GPL(used_vectors); | ||
89 | |||
92 | static int ignore_nmis; | 90 | static int ignore_nmis; |
93 | 91 | ||
94 | static inline void conditional_sti(struct pt_regs *regs) | 92 | static inline void conditional_sti(struct pt_regs *regs) |
@@ -292,8 +290,10 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code) | |||
292 | tsk->thread.error_code = error_code; | 290 | tsk->thread.error_code = error_code; |
293 | tsk->thread.trap_no = 8; | 291 | tsk->thread.trap_no = 8; |
294 | 292 | ||
295 | /* This is always a kernel trap and never fixable (and thus must | 293 | /* |
296 | never return). */ | 294 | * This is always a kernel trap and never fixable (and thus must |
295 | * never return). | ||
296 | */ | ||
297 | for (;;) | 297 | for (;;) |
298 | die(str, regs, error_code); | 298 | die(str, regs, error_code); |
299 | } | 299 | } |
@@ -481,11 +481,7 @@ do_nmi(struct pt_regs *regs, long error_code) | |||
481 | { | 481 | { |
482 | nmi_enter(); | 482 | nmi_enter(); |
483 | 483 | ||
484 | #ifdef CONFIG_X86_32 | 484 | inc_irq_stat(__nmi_count); |
485 | { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } | ||
486 | #else | ||
487 | add_pda(__nmi_count, 1); | ||
488 | #endif | ||
489 | 485 | ||
490 | if (!ignore_nmis) | 486 | if (!ignore_nmis) |
491 | default_do_nmi(regs); | 487 | default_do_nmi(regs); |
@@ -524,9 +520,11 @@ dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code) | |||
524 | } | 520 | } |
525 | 521 | ||
526 | #ifdef CONFIG_X86_64 | 522 | #ifdef CONFIG_X86_64 |
527 | /* Help handler running on IST stack to switch back to user stack | 523 | /* |
528 | for scheduling or signal handling. The actual stack switch is done in | 524 | * Help handler running on IST stack to switch back to user stack |
529 | entry.S */ | 525 | * for scheduling or signal handling. The actual stack switch is done in |
526 | * entry.S | ||
527 | */ | ||
530 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | 528 | asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) |
531 | { | 529 | { |
532 | struct pt_regs *regs = eregs; | 530 | struct pt_regs *regs = eregs; |
@@ -536,8 +534,10 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
536 | /* Exception from user space */ | 534 | /* Exception from user space */ |
537 | else if (user_mode(eregs)) | 535 | else if (user_mode(eregs)) |
538 | regs = task_pt_regs(current); | 536 | regs = task_pt_regs(current); |
539 | /* Exception from kernel and interrupts are enabled. Move to | 537 | /* |
540 | kernel process stack. */ | 538 | * Exception from kernel and interrupts are enabled. Move to |
539 | * kernel process stack. | ||
540 | */ | ||
541 | else if (eregs->flags & X86_EFLAGS_IF) | 541 | else if (eregs->flags & X86_EFLAGS_IF) |
542 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); | 542 | regs = (struct pt_regs *)(eregs->sp -= sizeof(struct pt_regs)); |
543 | if (eregs != regs) | 543 | if (eregs != regs) |
@@ -664,7 +664,7 @@ void math_error(void __user *ip) | |||
664 | { | 664 | { |
665 | struct task_struct *task; | 665 | struct task_struct *task; |
666 | siginfo_t info; | 666 | siginfo_t info; |
667 | unsigned short cwd, swd; | 667 | unsigned short cwd, swd, err; |
668 | 668 | ||
669 | /* | 669 | /* |
670 | * Save the info for the exception handler and clear the error. | 670 | * Save the info for the exception handler and clear the error. |
@@ -675,7 +675,6 @@ void math_error(void __user *ip) | |||
675 | task->thread.error_code = 0; | 675 | task->thread.error_code = 0; |
676 | info.si_signo = SIGFPE; | 676 | info.si_signo = SIGFPE; |
677 | info.si_errno = 0; | 677 | info.si_errno = 0; |
678 | info.si_code = __SI_FAULT; | ||
679 | info.si_addr = ip; | 678 | info.si_addr = ip; |
680 | /* | 679 | /* |
681 | * (~cwd & swd) will mask out exceptions that are not set to unmasked | 680 | * (~cwd & swd) will mask out exceptions that are not set to unmasked |
@@ -689,34 +688,30 @@ void math_error(void __user *ip) | |||
689 | */ | 688 | */ |
690 | cwd = get_fpu_cwd(task); | 689 | cwd = get_fpu_cwd(task); |
691 | swd = get_fpu_swd(task); | 690 | swd = get_fpu_swd(task); |
692 | switch (swd & ~cwd & 0x3f) { | 691 | |
693 | case 0x000: /* No unmasked exception */ | 692 | err = swd & ~cwd; |
694 | #ifdef CONFIG_X86_32 | 693 | |
695 | return; | 694 | if (err & 0x001) { /* Invalid op */ |
696 | #endif | ||
697 | default: /* Multiple exceptions */ | ||
698 | break; | ||
699 | case 0x001: /* Invalid Op */ | ||
700 | /* | 695 | /* |
701 | * swd & 0x240 == 0x040: Stack Underflow | 696 | * swd & 0x240 == 0x040: Stack Underflow |
702 | * swd & 0x240 == 0x240: Stack Overflow | 697 | * swd & 0x240 == 0x240: Stack Overflow |
703 | * User must clear the SF bit (0x40) if set | 698 | * User must clear the SF bit (0x40) if set |
704 | */ | 699 | */ |
705 | info.si_code = FPE_FLTINV; | 700 | info.si_code = FPE_FLTINV; |
706 | break; | 701 | } else if (err & 0x004) { /* Divide by Zero */ |
707 | case 0x002: /* Denormalize */ | ||
708 | case 0x010: /* Underflow */ | ||
709 | info.si_code = FPE_FLTUND; | ||
710 | break; | ||
711 | case 0x004: /* Zero Divide */ | ||
712 | info.si_code = FPE_FLTDIV; | 702 | info.si_code = FPE_FLTDIV; |
713 | break; | 703 | } else if (err & 0x008) { /* Overflow */ |
714 | case 0x008: /* Overflow */ | ||
715 | info.si_code = FPE_FLTOVF; | 704 | info.si_code = FPE_FLTOVF; |
716 | break; | 705 | } else if (err & 0x012) { /* Denormal, Underflow */ |
717 | case 0x020: /* Precision */ | 706 | info.si_code = FPE_FLTUND; |
707 | } else if (err & 0x020) { /* Precision */ | ||
718 | info.si_code = FPE_FLTRES; | 708 | info.si_code = FPE_FLTRES; |
719 | break; | 709 | } else { |
710 | /* | ||
711 | * If we're using IRQ 13, or supposedly even some trap 16 | ||
712 | * implementations, it's possible we get a spurious trap... | ||
713 | */ | ||
714 | return; /* Spurious trap, no error */ | ||
720 | } | 715 | } |
721 | force_sig_info(SIGFPE, &info, task); | 716 | force_sig_info(SIGFPE, &info, task); |
722 | } | 717 | } |
@@ -949,9 +944,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) | |||
949 | 944 | ||
950 | void __init trap_init(void) | 945 | void __init trap_init(void) |
951 | { | 946 | { |
952 | #ifdef CONFIG_X86_32 | ||
953 | int i; | 947 | int i; |
954 | #endif | ||
955 | 948 | ||
956 | #ifdef CONFIG_EISA | 949 | #ifdef CONFIG_EISA |
957 | void __iomem *p = early_ioremap(0x0FFFD9, 4); | 950 | void __iomem *p = early_ioremap(0x0FFFD9, 4); |
@@ -1008,11 +1001,15 @@ void __init trap_init(void) | |||
1008 | } | 1001 | } |
1009 | 1002 | ||
1010 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 1003 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
1004 | #endif | ||
1011 | 1005 | ||
1012 | /* Reserve all the builtin and the syscall vector: */ | 1006 | /* Reserve all the builtin and the syscall vector: */ |
1013 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | 1007 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) |
1014 | set_bit(i, used_vectors); | 1008 | set_bit(i, used_vectors); |
1015 | 1009 | ||
1010 | #ifdef CONFIG_X86_64 | ||
1011 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
1012 | #else | ||
1016 | set_bit(SYSCALL_VECTOR, used_vectors); | 1013 | set_bit(SYSCALL_VECTOR, used_vectors); |
1017 | #endif | 1014 | #endif |
1018 | /* | 1015 | /* |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 424093b157d3..599e58168631 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/vgtod.h> | 15 | #include <asm/vgtod.h> |
16 | #include <asm/time.h> | 16 | #include <asm/time.h> |
17 | #include <asm/delay.h> | 17 | #include <asm/delay.h> |
18 | #include <asm/hypervisor.h> | ||
18 | 19 | ||
19 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | 20 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ |
20 | EXPORT_SYMBOL(cpu_khz); | 21 | EXPORT_SYMBOL(cpu_khz); |
@@ -31,6 +32,7 @@ static int tsc_unstable; | |||
31 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 32 | erroneous rdtsc usage on !cpu_has_tsc processors */ |
32 | static int tsc_disabled = -1; | 33 | static int tsc_disabled = -1; |
33 | 34 | ||
35 | static int tsc_clocksource_reliable; | ||
34 | /* | 36 | /* |
35 | * Scheduler clock - returns current time in nanosec units. | 37 | * Scheduler clock - returns current time in nanosec units. |
36 | */ | 38 | */ |
@@ -98,6 +100,15 @@ int __init notsc_setup(char *str) | |||
98 | 100 | ||
99 | __setup("notsc", notsc_setup); | 101 | __setup("notsc", notsc_setup); |
100 | 102 | ||
103 | static int __init tsc_setup(char *str) | ||
104 | { | ||
105 | if (!strcmp(str, "reliable")) | ||
106 | tsc_clocksource_reliable = 1; | ||
107 | return 1; | ||
108 | } | ||
109 | |||
110 | __setup("tsc=", tsc_setup); | ||
111 | |||
101 | #define MAX_RETRIES 5 | 112 | #define MAX_RETRIES 5 |
102 | #define SMI_TRESHOLD 50000 | 113 | #define SMI_TRESHOLD 50000 |
103 | 114 | ||
@@ -352,9 +363,15 @@ unsigned long native_calibrate_tsc(void) | |||
352 | { | 363 | { |
353 | u64 tsc1, tsc2, delta, ref1, ref2; | 364 | u64 tsc1, tsc2, delta, ref1, ref2; |
354 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 365 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; |
355 | unsigned long flags, latch, ms, fast_calibrate; | 366 | unsigned long flags, latch, ms, fast_calibrate, tsc_khz; |
356 | int hpet = is_hpet_enabled(), i, loopmin; | 367 | int hpet = is_hpet_enabled(), i, loopmin; |
357 | 368 | ||
369 | tsc_khz = get_hypervisor_tsc_freq(); | ||
370 | if (tsc_khz) { | ||
371 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); | ||
372 | return tsc_khz; | ||
373 | } | ||
374 | |||
358 | local_irq_save(flags); | 375 | local_irq_save(flags); |
359 | fast_calibrate = quick_pit_calibrate(); | 376 | fast_calibrate = quick_pit_calibrate(); |
360 | local_irq_restore(flags); | 377 | local_irq_restore(flags); |
@@ -731,24 +748,21 @@ static struct dmi_system_id __initdata bad_tsc_dmi_table[] = { | |||
731 | {} | 748 | {} |
732 | }; | 749 | }; |
733 | 750 | ||
734 | /* | 751 | static void __init check_system_tsc_reliable(void) |
735 | * Geode_LX - the OLPC CPU has a possibly a very reliable TSC | 752 | { |
736 | */ | ||
737 | #ifdef CONFIG_MGEODE_LX | 753 | #ifdef CONFIG_MGEODE_LX |
738 | /* RTSC counts during suspend */ | 754 | /* RTSC counts during suspend */ |
739 | #define RTSC_SUSP 0x100 | 755 | #define RTSC_SUSP 0x100 |
740 | |||
741 | static void __init check_geode_tsc_reliable(void) | ||
742 | { | ||
743 | unsigned long res_low, res_high; | 756 | unsigned long res_low, res_high; |
744 | 757 | ||
745 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); | 758 | rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); |
759 | /* Geode_LX - the OLPC CPU has a possibly a very reliable TSC */ | ||
746 | if (res_low & RTSC_SUSP) | 760 | if (res_low & RTSC_SUSP) |
747 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | 761 | tsc_clocksource_reliable = 1; |
748 | } | ||
749 | #else | ||
750 | static inline void check_geode_tsc_reliable(void) { } | ||
751 | #endif | 762 | #endif |
763 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) | ||
764 | tsc_clocksource_reliable = 1; | ||
765 | } | ||
752 | 766 | ||
753 | /* | 767 | /* |
754 | * Make an educated guess if the TSC is trustworthy and synchronized | 768 | * Make an educated guess if the TSC is trustworthy and synchronized |
@@ -783,6 +797,8 @@ static void __init init_tsc_clocksource(void) | |||
783 | { | 797 | { |
784 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, | 798 | clocksource_tsc.mult = clocksource_khz2mult(tsc_khz, |
785 | clocksource_tsc.shift); | 799 | clocksource_tsc.shift); |
800 | if (tsc_clocksource_reliable) | ||
801 | clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; | ||
786 | /* lower the rating if we already know its unstable: */ | 802 | /* lower the rating if we already know its unstable: */ |
787 | if (check_tsc_unstable()) { | 803 | if (check_tsc_unstable()) { |
788 | clocksource_tsc.rating = 0; | 804 | clocksource_tsc.rating = 0; |
@@ -843,7 +859,7 @@ void __init tsc_init(void) | |||
843 | if (unsynchronized_tsc()) | 859 | if (unsynchronized_tsc()) |
844 | mark_tsc_unstable("TSCs unsynchronized"); | 860 | mark_tsc_unstable("TSCs unsynchronized"); |
845 | 861 | ||
846 | check_geode_tsc_reliable(); | 862 | check_system_tsc_reliable(); |
847 | init_tsc_clocksource(); | 863 | init_tsc_clocksource(); |
848 | } | 864 | } |
849 | 865 | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 1c0dfbca87c1..bf36328f6ef9 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -112,6 +112,12 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
112 | if (unsynchronized_tsc()) | 112 | if (unsynchronized_tsc()) |
113 | return; | 113 | return; |
114 | 114 | ||
115 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | ||
116 | printk(KERN_INFO | ||
117 | "Skipping synchronization checks as TSC is reliable.\n"); | ||
118 | return; | ||
119 | } | ||
120 | |||
115 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", | 121 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", |
116 | smp_processor_id(), cpu); | 122 | smp_processor_id(), cpu); |
117 | 123 | ||
@@ -165,7 +171,7 @@ void __cpuinit check_tsc_sync_target(void) | |||
165 | { | 171 | { |
166 | int cpus = 2; | 172 | int cpus = 2; |
167 | 173 | ||
168 | if (unsynchronized_tsc()) | 174 | if (unsynchronized_tsc() || boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) |
169 | return; | 175 | return; |
170 | 176 | ||
171 | /* | 177 | /* |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 22fd6577156a..23206ba16874 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -266,109 +266,6 @@ static void vmi_nop(void) | |||
266 | { | 266 | { |
267 | } | 267 | } |
268 | 268 | ||
269 | #ifdef CONFIG_DEBUG_PAGE_TYPE | ||
270 | |||
271 | #ifdef CONFIG_X86_PAE | ||
272 | #define MAX_BOOT_PTS (2048+4+1) | ||
273 | #else | ||
274 | #define MAX_BOOT_PTS (1024+1) | ||
275 | #endif | ||
276 | |||
277 | /* | ||
278 | * During boot, mem_map is not yet available in paging_init, so stash | ||
279 | * all the boot page allocations here. | ||
280 | */ | ||
281 | static struct { | ||
282 | u32 pfn; | ||
283 | int type; | ||
284 | } boot_page_allocations[MAX_BOOT_PTS]; | ||
285 | static int num_boot_page_allocations; | ||
286 | static int boot_allocations_applied; | ||
287 | |||
288 | void vmi_apply_boot_page_allocations(void) | ||
289 | { | ||
290 | int i; | ||
291 | BUG_ON(!mem_map); | ||
292 | for (i = 0; i < num_boot_page_allocations; i++) { | ||
293 | struct page *page = pfn_to_page(boot_page_allocations[i].pfn); | ||
294 | page->type = boot_page_allocations[i].type; | ||
295 | page->type = boot_page_allocations[i].type & | ||
296 | ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
297 | } | ||
298 | boot_allocations_applied = 1; | ||
299 | } | ||
300 | |||
301 | static void record_page_type(u32 pfn, int type) | ||
302 | { | ||
303 | BUG_ON(num_boot_page_allocations >= MAX_BOOT_PTS); | ||
304 | boot_page_allocations[num_boot_page_allocations].pfn = pfn; | ||
305 | boot_page_allocations[num_boot_page_allocations].type = type; | ||
306 | num_boot_page_allocations++; | ||
307 | } | ||
308 | |||
309 | static void check_zeroed_page(u32 pfn, int type, struct page *page) | ||
310 | { | ||
311 | u32 *ptr; | ||
312 | int i; | ||
313 | int limit = PAGE_SIZE / sizeof(int); | ||
314 | |||
315 | if (page_address(page)) | ||
316 | ptr = (u32 *)page_address(page); | ||
317 | else | ||
318 | ptr = (u32 *)__va(pfn << PAGE_SHIFT); | ||
319 | /* | ||
320 | * When cloning the root in non-PAE mode, only the userspace | ||
321 | * pdes need to be zeroed. | ||
322 | */ | ||
323 | if (type & VMI_PAGE_CLONE) | ||
324 | limit = KERNEL_PGD_BOUNDARY; | ||
325 | for (i = 0; i < limit; i++) | ||
326 | BUG_ON(ptr[i]); | ||
327 | } | ||
328 | |||
329 | /* | ||
330 | * We stash the page type into struct page so we can verify the page | ||
331 | * types are used properly. | ||
332 | */ | ||
333 | static void vmi_set_page_type(u32 pfn, int type) | ||
334 | { | ||
335 | /* PAE can have multiple roots per page - don't track */ | ||
336 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
337 | return; | ||
338 | |||
339 | if (boot_allocations_applied) { | ||
340 | struct page *page = pfn_to_page(pfn); | ||
341 | if (type != VMI_PAGE_NORMAL) | ||
342 | BUG_ON(page->type); | ||
343 | else | ||
344 | BUG_ON(page->type == VMI_PAGE_NORMAL); | ||
345 | page->type = type & ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
346 | if (type & VMI_PAGE_ZEROED) | ||
347 | check_zeroed_page(pfn, type, page); | ||
348 | } else { | ||
349 | record_page_type(pfn, type); | ||
350 | } | ||
351 | } | ||
352 | |||
353 | static void vmi_check_page_type(u32 pfn, int type) | ||
354 | { | ||
355 | /* PAE can have multiple roots per page - skip checks */ | ||
356 | if (PTRS_PER_PMD > 1 && (type & VMI_PAGE_PDP)) | ||
357 | return; | ||
358 | |||
359 | type &= ~(VMI_PAGE_ZEROED | VMI_PAGE_CLONE); | ||
360 | if (boot_allocations_applied) { | ||
361 | struct page *page = pfn_to_page(pfn); | ||
362 | BUG_ON((page->type ^ type) & VMI_PAGE_PAE); | ||
363 | BUG_ON(type == VMI_PAGE_NORMAL && page->type); | ||
364 | BUG_ON((type & page->type) == 0); | ||
365 | } | ||
366 | } | ||
367 | #else | ||
368 | #define vmi_set_page_type(p,t) do { } while (0) | ||
369 | #define vmi_check_page_type(p,t) do { } while (0) | ||
370 | #endif | ||
371 | |||
372 | #ifdef CONFIG_HIGHPTE | 269 | #ifdef CONFIG_HIGHPTE |
373 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | 270 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) |
374 | { | 271 | { |
@@ -395,7 +292,6 @@ static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | |||
395 | 292 | ||
396 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) | 293 | static void vmi_allocate_pte(struct mm_struct *mm, unsigned long pfn) |
397 | { | 294 | { |
398 | vmi_set_page_type(pfn, VMI_PAGE_L1); | ||
399 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); | 295 | vmi_ops.allocate_page(pfn, VMI_PAGE_L1, 0, 0, 0); |
400 | } | 296 | } |
401 | 297 | ||
@@ -406,27 +302,22 @@ static void vmi_allocate_pmd(struct mm_struct *mm, unsigned long pfn) | |||
406 | * It is called only for swapper_pg_dir, which already has | 302 | * It is called only for swapper_pg_dir, which already has |
407 | * data on it. | 303 | * data on it. |
408 | */ | 304 | */ |
409 | vmi_set_page_type(pfn, VMI_PAGE_L2); | ||
410 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); | 305 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2, 0, 0, 0); |
411 | } | 306 | } |
412 | 307 | ||
413 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) | 308 | static void vmi_allocate_pmd_clone(unsigned long pfn, unsigned long clonepfn, unsigned long start, unsigned long count) |
414 | { | 309 | { |
415 | vmi_set_page_type(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE); | ||
416 | vmi_check_page_type(clonepfn, VMI_PAGE_L2); | ||
417 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); | 310 | vmi_ops.allocate_page(pfn, VMI_PAGE_L2 | VMI_PAGE_CLONE, clonepfn, start, count); |
418 | } | 311 | } |
419 | 312 | ||
420 | static void vmi_release_pte(unsigned long pfn) | 313 | static void vmi_release_pte(unsigned long pfn) |
421 | { | 314 | { |
422 | vmi_ops.release_page(pfn, VMI_PAGE_L1); | 315 | vmi_ops.release_page(pfn, VMI_PAGE_L1); |
423 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
424 | } | 316 | } |
425 | 317 | ||
426 | static void vmi_release_pmd(unsigned long pfn) | 318 | static void vmi_release_pmd(unsigned long pfn) |
427 | { | 319 | { |
428 | vmi_ops.release_page(pfn, VMI_PAGE_L2); | 320 | vmi_ops.release_page(pfn, VMI_PAGE_L2); |
429 | vmi_set_page_type(pfn, VMI_PAGE_NORMAL); | ||
430 | } | 321 | } |
431 | 322 | ||
432 | /* | 323 | /* |
@@ -450,26 +341,22 @@ static void vmi_release_pmd(unsigned long pfn) | |||
450 | 341 | ||
451 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 342 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
452 | { | 343 | { |
453 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
454 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 344 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
455 | } | 345 | } |
456 | 346 | ||
457 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 347 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
458 | { | 348 | { |
459 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
460 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | 349 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); |
461 | } | 350 | } |
462 | 351 | ||
463 | static void vmi_set_pte(pte_t *ptep, pte_t pte) | 352 | static void vmi_set_pte(pte_t *ptep, pte_t pte) |
464 | { | 353 | { |
465 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ | 354 | /* XXX because of set_pmd_pte, this can be called on PT or PD layers */ |
466 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE | VMI_PAGE_PD); | ||
467 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | 355 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); |
468 | } | 356 | } |
469 | 357 | ||
470 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 358 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
471 | { | 359 | { |
472 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
473 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 360 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
474 | } | 361 | } |
475 | 362 | ||
@@ -477,10 +364,8 @@ static void vmi_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
477 | { | 364 | { |
478 | #ifdef CONFIG_X86_PAE | 365 | #ifdef CONFIG_X86_PAE |
479 | const pte_t pte = { .pte = pmdval.pmd }; | 366 | const pte_t pte = { .pte = pmdval.pmd }; |
480 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
481 | #else | 367 | #else |
482 | const pte_t pte = { pmdval.pud.pgd.pgd }; | 368 | const pte_t pte = { pmdval.pud.pgd.pgd }; |
483 | vmi_check_page_type(__pa(pmdp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
484 | #endif | 369 | #endif |
485 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); | 370 | vmi_ops.set_pte(pte, (pte_t *)pmdp, VMI_PAGE_PD); |
486 | } | 371 | } |
@@ -502,7 +387,6 @@ static void vmi_set_pte_atomic(pte_t *ptep, pte_t pteval) | |||
502 | 387 | ||
503 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | 388 | static void vmi_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
504 | { | 389 | { |
505 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
506 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); | 390 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 1)); |
507 | } | 391 | } |
508 | 392 | ||
@@ -510,21 +394,18 @@ static void vmi_set_pud(pud_t *pudp, pud_t pudval) | |||
510 | { | 394 | { |
511 | /* Um, eww */ | 395 | /* Um, eww */ |
512 | const pte_t pte = { .pte = pudval.pgd.pgd }; | 396 | const pte_t pte = { .pte = pudval.pgd.pgd }; |
513 | vmi_check_page_type(__pa(pudp) >> PAGE_SHIFT, VMI_PAGE_PGD); | ||
514 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); | 397 | vmi_ops.set_pte(pte, (pte_t *)pudp, VMI_PAGE_PDP); |
515 | } | 398 | } |
516 | 399 | ||
517 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 400 | static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
518 | { | 401 | { |
519 | const pte_t pte = { .pte = 0 }; | 402 | const pte_t pte = { .pte = 0 }; |
520 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | ||
521 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 403 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
522 | } | 404 | } |
523 | 405 | ||
524 | static void vmi_pmd_clear(pmd_t *pmd) | 406 | static void vmi_pmd_clear(pmd_t *pmd) |
525 | { | 407 | { |
526 | const pte_t pte = { .pte = 0 }; | 408 | const pte_t pte = { .pte = 0 }; |
527 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | ||
528 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); | 409 | vmi_ops.set_pte(pte, (pte_t *)pmd, VMI_PAGE_PD); |
529 | } | 410 | } |
530 | #endif | 411 | #endif |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 254ee07f8635..c4c1f9e09402 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -226,7 +226,7 @@ static void __devinit vmi_time_init_clockevent(void) | |||
226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | 226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ |
227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | 227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); |
228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | 228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); |
229 | evt->cpumask = cpumask_of_cpu(cpu); | 229 | evt->cpumask = cpumask_of(cpu); |
230 | 230 | ||
231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | 231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", |
232 | evt->name, evt->mult, evt->shift); | 232 | evt->name, evt->mult, evt->shift); |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index a9b8560adbc2..82c67559dde7 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
@@ -44,6 +44,7 @@ SECTIONS | |||
44 | SCHED_TEXT | 44 | SCHED_TEXT |
45 | LOCK_TEXT | 45 | LOCK_TEXT |
46 | KPROBES_TEXT | 46 | KPROBES_TEXT |
47 | IRQENTRY_TEXT | ||
47 | *(.fixup) | 48 | *(.fixup) |
48 | *(.gnu.warning) | 49 | *(.gnu.warning) |
49 | _etext = .; /* End of text section */ | 50 | _etext = .; /* End of text section */ |
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S index 46e05447405b..1a614c0e6bef 100644 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ b/arch/x86/kernel/vmlinux_64.lds.S | |||
@@ -35,6 +35,7 @@ SECTIONS | |||
35 | SCHED_TEXT | 35 | SCHED_TEXT |
36 | LOCK_TEXT | 36 | LOCK_TEXT |
37 | KPROBES_TEXT | 37 | KPROBES_TEXT |
38 | IRQENTRY_TEXT | ||
38 | *(.fixup) | 39 | *(.fixup) |
39 | *(.gnu.warning) | 40 | *(.gnu.warning) |
40 | _etext = .; /* End of text section */ | 41 | _etext = .; /* End of text section */ |
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 0b8b6690a86d..44153afc9067 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -17,6 +17,9 @@ | |||
17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. | 17 | * want per guest time just set the kernel.vsyscall64 sysctl to 0. |
18 | */ | 18 | */ |
19 | 19 | ||
20 | /* Disable profiling for userspace code: */ | ||
21 | #define DISABLE_BRANCH_PROFILING | ||
22 | |||
20 | #include <linux/time.h> | 23 | #include <linux/time.h> |
21 | #include <linux/init.h> | 24 | #include <linux/init.h> |
22 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
@@ -128,7 +131,16 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
128 | gettimeofday(tv,NULL); | 131 | gettimeofday(tv,NULL); |
129 | return; | 132 | return; |
130 | } | 133 | } |
134 | |||
135 | /* | ||
136 | * Surround the RDTSC by barriers, to make sure it's not | ||
137 | * speculated to outside the seqlock critical section and | ||
138 | * does not cause time warps: | ||
139 | */ | ||
140 | rdtsc_barrier(); | ||
131 | now = vread(); | 141 | now = vread(); |
142 | rdtsc_barrier(); | ||
143 | |||
132 | base = __vsyscall_gtod_data.clock.cycle_last; | 144 | base = __vsyscall_gtod_data.clock.cycle_last; |
133 | mask = __vsyscall_gtod_data.clock.mask; | 145 | mask = __vsyscall_gtod_data.clock.mask; |
134 | mult = __vsyscall_gtod_data.clock.mult; | 146 | mult = __vsyscall_gtod_data.clock.mult; |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 15c3e6999182..2b54fe002e94 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -159,7 +159,7 @@ int save_i387_xstate(void __user *buf) | |||
159 | * Restore the extended state if present. Otherwise, restore the FP/SSE | 159 | * Restore the extended state if present. Otherwise, restore the FP/SSE |
160 | * state. | 160 | * state. |
161 | */ | 161 | */ |
162 | int restore_user_xstate(void __user *buf) | 162 | static int restore_user_xstate(void __user *buf) |
163 | { | 163 | { |
164 | struct _fpx_sw_bytes fx_sw_user; | 164 | struct _fpx_sw_bytes fx_sw_user; |
165 | u64 mask; | 165 | u64 mask; |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index c02343594b4d..d3ec292f00f2 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -7,8 +7,8 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ | |||
7 | ifeq ($(CONFIG_KVM_TRACE),y) | 7 | ifeq ($(CONFIG_KVM_TRACE),y) |
8 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) | 8 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) |
9 | endif | 9 | endif |
10 | ifeq ($(CONFIG_DMAR),y) | 10 | ifeq ($(CONFIG_IOMMU_API),y) |
11 | common-objs += $(addprefix ../../../virt/kvm/, vtd.o) | 11 | common-objs += $(addprefix ../../../virt/kvm/, iommu.o) |
12 | endif | 12 | endif |
13 | 13 | ||
14 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 14 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 59ebd37ad79e..e665d1c623ca 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -603,10 +603,29 @@ void kvm_free_pit(struct kvm *kvm) | |||
603 | 603 | ||
604 | static void __inject_pit_timer_intr(struct kvm *kvm) | 604 | static void __inject_pit_timer_intr(struct kvm *kvm) |
605 | { | 605 | { |
606 | struct kvm_vcpu *vcpu; | ||
607 | int i; | ||
608 | |||
606 | mutex_lock(&kvm->lock); | 609 | mutex_lock(&kvm->lock); |
607 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); | 610 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1); |
608 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); | 611 | kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0); |
609 | mutex_unlock(&kvm->lock); | 612 | mutex_unlock(&kvm->lock); |
613 | |||
614 | /* | ||
615 | * Provides NMI watchdog support via Virtual Wire mode. | ||
616 | * The route is: PIT -> PIC -> LVT0 in NMI mode. | ||
617 | * | ||
618 | * Note: Our Virtual Wire implementation is simplified, only | ||
619 | * propagating PIT interrupts to all VCPUs when they have set | ||
620 | * LVT0 to NMI delivery. Other PIC interrupts are just sent to | ||
621 | * VCPU0, and only if its LVT0 is in EXTINT mode. | ||
622 | */ | ||
623 | if (kvm->arch.vapics_in_nmi_mode > 0) | ||
624 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
625 | vcpu = kvm->vcpus[i]; | ||
626 | if (vcpu) | ||
627 | kvm_apic_nmi_wd_deliver(vcpu); | ||
628 | } | ||
610 | } | 629 | } |
611 | 630 | ||
612 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) | 631 | void kvm_inject_pit_timer_irqs(struct kvm_vcpu *vcpu) |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 17e41e165f1a..179dcb0103fd 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -26,10 +26,40 @@ | |||
26 | * Port from Qemu. | 26 | * Port from Qemu. |
27 | */ | 27 | */ |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/bitops.h> | ||
29 | #include "irq.h" | 30 | #include "irq.h" |
30 | 31 | ||
31 | #include <linux/kvm_host.h> | 32 | #include <linux/kvm_host.h> |
32 | 33 | ||
34 | static void pic_lock(struct kvm_pic *s) | ||
35 | { | ||
36 | spin_lock(&s->lock); | ||
37 | } | ||
38 | |||
39 | static void pic_unlock(struct kvm_pic *s) | ||
40 | { | ||
41 | struct kvm *kvm = s->kvm; | ||
42 | unsigned acks = s->pending_acks; | ||
43 | bool wakeup = s->wakeup_needed; | ||
44 | struct kvm_vcpu *vcpu; | ||
45 | |||
46 | s->pending_acks = 0; | ||
47 | s->wakeup_needed = false; | ||
48 | |||
49 | spin_unlock(&s->lock); | ||
50 | |||
51 | while (acks) { | ||
52 | kvm_notify_acked_irq(kvm, __ffs(acks)); | ||
53 | acks &= acks - 1; | ||
54 | } | ||
55 | |||
56 | if (wakeup) { | ||
57 | vcpu = s->kvm->vcpus[0]; | ||
58 | if (vcpu) | ||
59 | kvm_vcpu_kick(vcpu); | ||
60 | } | ||
61 | } | ||
62 | |||
33 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) | 63 | static void pic_clear_isr(struct kvm_kpic_state *s, int irq) |
34 | { | 64 | { |
35 | s->isr &= ~(1 << irq); | 65 | s->isr &= ~(1 << irq); |
@@ -136,17 +166,21 @@ static void pic_update_irq(struct kvm_pic *s) | |||
136 | 166 | ||
137 | void kvm_pic_update_irq(struct kvm_pic *s) | 167 | void kvm_pic_update_irq(struct kvm_pic *s) |
138 | { | 168 | { |
169 | pic_lock(s); | ||
139 | pic_update_irq(s); | 170 | pic_update_irq(s); |
171 | pic_unlock(s); | ||
140 | } | 172 | } |
141 | 173 | ||
142 | void kvm_pic_set_irq(void *opaque, int irq, int level) | 174 | void kvm_pic_set_irq(void *opaque, int irq, int level) |
143 | { | 175 | { |
144 | struct kvm_pic *s = opaque; | 176 | struct kvm_pic *s = opaque; |
145 | 177 | ||
178 | pic_lock(s); | ||
146 | if (irq >= 0 && irq < PIC_NUM_PINS) { | 179 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
147 | pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 180 | pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
148 | pic_update_irq(s); | 181 | pic_update_irq(s); |
149 | } | 182 | } |
183 | pic_unlock(s); | ||
150 | } | 184 | } |
151 | 185 | ||
152 | /* | 186 | /* |
@@ -172,6 +206,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
172 | int irq, irq2, intno; | 206 | int irq, irq2, intno; |
173 | struct kvm_pic *s = pic_irqchip(kvm); | 207 | struct kvm_pic *s = pic_irqchip(kvm); |
174 | 208 | ||
209 | pic_lock(s); | ||
175 | irq = pic_get_irq(&s->pics[0]); | 210 | irq = pic_get_irq(&s->pics[0]); |
176 | if (irq >= 0) { | 211 | if (irq >= 0) { |
177 | pic_intack(&s->pics[0], irq); | 212 | pic_intack(&s->pics[0], irq); |
@@ -196,6 +231,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
196 | intno = s->pics[0].irq_base + irq; | 231 | intno = s->pics[0].irq_base + irq; |
197 | } | 232 | } |
198 | pic_update_irq(s); | 233 | pic_update_irq(s); |
234 | pic_unlock(s); | ||
199 | kvm_notify_acked_irq(kvm, irq); | 235 | kvm_notify_acked_irq(kvm, irq); |
200 | 236 | ||
201 | return intno; | 237 | return intno; |
@@ -203,7 +239,7 @@ int kvm_pic_read_irq(struct kvm *kvm) | |||
203 | 239 | ||
204 | void kvm_pic_reset(struct kvm_kpic_state *s) | 240 | void kvm_pic_reset(struct kvm_kpic_state *s) |
205 | { | 241 | { |
206 | int irq, irqbase; | 242 | int irq, irqbase, n; |
207 | struct kvm *kvm = s->pics_state->irq_request_opaque; | 243 | struct kvm *kvm = s->pics_state->irq_request_opaque; |
208 | struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; | 244 | struct kvm_vcpu *vcpu0 = kvm->vcpus[0]; |
209 | 245 | ||
@@ -214,8 +250,10 @@ void kvm_pic_reset(struct kvm_kpic_state *s) | |||
214 | 250 | ||
215 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { | 251 | for (irq = 0; irq < PIC_NUM_PINS/2; irq++) { |
216 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) | 252 | if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0)) |
217 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) | 253 | if (s->irr & (1 << irq) || s->isr & (1 << irq)) { |
218 | kvm_notify_acked_irq(kvm, irq+irqbase); | 254 | n = irq + irqbase; |
255 | s->pics_state->pending_acks |= 1 << n; | ||
256 | } | ||
219 | } | 257 | } |
220 | s->last_irr = 0; | 258 | s->last_irr = 0; |
221 | s->irr = 0; | 259 | s->irr = 0; |
@@ -406,6 +444,7 @@ static void picdev_write(struct kvm_io_device *this, | |||
406 | printk(KERN_ERR "PIC: non byte write\n"); | 444 | printk(KERN_ERR "PIC: non byte write\n"); |
407 | return; | 445 | return; |
408 | } | 446 | } |
447 | pic_lock(s); | ||
409 | switch (addr) { | 448 | switch (addr) { |
410 | case 0x20: | 449 | case 0x20: |
411 | case 0x21: | 450 | case 0x21: |
@@ -418,6 +457,7 @@ static void picdev_write(struct kvm_io_device *this, | |||
418 | elcr_ioport_write(&s->pics[addr & 1], addr, data); | 457 | elcr_ioport_write(&s->pics[addr & 1], addr, data); |
419 | break; | 458 | break; |
420 | } | 459 | } |
460 | pic_unlock(s); | ||
421 | } | 461 | } |
422 | 462 | ||
423 | static void picdev_read(struct kvm_io_device *this, | 463 | static void picdev_read(struct kvm_io_device *this, |
@@ -431,6 +471,7 @@ static void picdev_read(struct kvm_io_device *this, | |||
431 | printk(KERN_ERR "PIC: non byte read\n"); | 471 | printk(KERN_ERR "PIC: non byte read\n"); |
432 | return; | 472 | return; |
433 | } | 473 | } |
474 | pic_lock(s); | ||
434 | switch (addr) { | 475 | switch (addr) { |
435 | case 0x20: | 476 | case 0x20: |
436 | case 0x21: | 477 | case 0x21: |
@@ -444,6 +485,7 @@ static void picdev_read(struct kvm_io_device *this, | |||
444 | break; | 485 | break; |
445 | } | 486 | } |
446 | *(unsigned char *)val = data; | 487 | *(unsigned char *)val = data; |
488 | pic_unlock(s); | ||
447 | } | 489 | } |
448 | 490 | ||
449 | /* | 491 | /* |
@@ -459,7 +501,7 @@ static void pic_irq_request(void *opaque, int level) | |||
459 | s->output = level; | 501 | s->output = level; |
460 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { | 502 | if (vcpu && level && (s->pics[0].isr_ack & (1 << irq))) { |
461 | s->pics[0].isr_ack &= ~(1 << irq); | 503 | s->pics[0].isr_ack &= ~(1 << irq); |
462 | kvm_vcpu_kick(vcpu); | 504 | s->wakeup_needed = true; |
463 | } | 505 | } |
464 | } | 506 | } |
465 | 507 | ||
@@ -469,6 +511,8 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) | |||
469 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); | 511 | s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); |
470 | if (!s) | 512 | if (!s) |
471 | return NULL; | 513 | return NULL; |
514 | spin_lock_init(&s->lock); | ||
515 | s->kvm = kvm; | ||
472 | s->pics[0].elcr_mask = 0xf8; | 516 | s->pics[0].elcr_mask = 0xf8; |
473 | s->pics[1].elcr_mask = 0xde; | 517 | s->pics[1].elcr_mask = 0xde; |
474 | s->irq_request = pic_irq_request; | 518 | s->irq_request = pic_irq_request; |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index f17c8f5bbf31..2bf32a03ceec 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/mm_types.h> | 25 | #include <linux/mm_types.h> |
26 | #include <linux/hrtimer.h> | 26 | #include <linux/hrtimer.h> |
27 | #include <linux/kvm_host.h> | 27 | #include <linux/kvm_host.h> |
28 | #include <linux/spinlock.h> | ||
28 | 29 | ||
29 | #include "iodev.h" | 30 | #include "iodev.h" |
30 | #include "ioapic.h" | 31 | #include "ioapic.h" |
@@ -59,6 +60,10 @@ struct kvm_kpic_state { | |||
59 | }; | 60 | }; |
60 | 61 | ||
61 | struct kvm_pic { | 62 | struct kvm_pic { |
63 | spinlock_t lock; | ||
64 | bool wakeup_needed; | ||
65 | unsigned pending_acks; | ||
66 | struct kvm *kvm; | ||
62 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ | 67 | struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */ |
63 | irq_request_func *irq_request; | 68 | irq_request_func *irq_request; |
64 | void *irq_request_opaque; | 69 | void *irq_request_opaque; |
@@ -87,6 +92,7 @@ void kvm_pic_reset(struct kvm_kpic_state *s); | |||
87 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); | 92 | void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec); |
88 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); | 93 | void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu); |
89 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); | 94 | void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu); |
95 | void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu); | ||
90 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); | 96 | void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu); |
91 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); | 97 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu); |
92 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); | 98 | void __kvm_migrate_timers(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/kvm_svm.h b/arch/x86/kvm/kvm_svm.h index 65ef0fc2c036..8e5ee99551f6 100644 --- a/arch/x86/kvm/kvm_svm.h +++ b/arch/x86/kvm/kvm_svm.h | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/kvm_host.h> | 7 | #include <linux/kvm_host.h> |
8 | #include <asm/msr.h> | 8 | #include <asm/msr.h> |
9 | 9 | ||
10 | #include "svm.h" | 10 | #include <asm/svm.h> |
11 | 11 | ||
12 | static const u32 host_save_user_msrs[] = { | 12 | static const u32 host_save_user_msrs[] = { |
13 | #ifdef CONFIG_X86_64 | 13 | #ifdef CONFIG_X86_64 |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 0fc3cab48943..afac68c0815c 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -130,6 +130,11 @@ static inline int apic_lvtt_period(struct kvm_lapic *apic) | |||
130 | return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; | 130 | return apic_get_reg(apic, APIC_LVTT) & APIC_LVT_TIMER_PERIODIC; |
131 | } | 131 | } |
132 | 132 | ||
133 | static inline int apic_lvt_nmi_mode(u32 lvt_val) | ||
134 | { | ||
135 | return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI; | ||
136 | } | ||
137 | |||
133 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { | 138 | static unsigned int apic_lvt_mask[APIC_LVT_NUM] = { |
134 | LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ | 139 | LVT_MASK | APIC_LVT_TIMER_PERIODIC, /* LVTT */ |
135 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ | 140 | LVT_MASK | APIC_MODE_MASK, /* LVTTHMR */ |
@@ -354,6 +359,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
354 | 359 | ||
355 | case APIC_DM_NMI: | 360 | case APIC_DM_NMI: |
356 | kvm_inject_nmi(vcpu); | 361 | kvm_inject_nmi(vcpu); |
362 | kvm_vcpu_kick(vcpu); | ||
357 | break; | 363 | break; |
358 | 364 | ||
359 | case APIC_DM_INIT: | 365 | case APIC_DM_INIT: |
@@ -380,6 +386,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
380 | } | 386 | } |
381 | break; | 387 | break; |
382 | 388 | ||
389 | case APIC_DM_EXTINT: | ||
390 | /* | ||
391 | * Should only be called by kvm_apic_local_deliver() with LVT0, | ||
392 | * before NMI watchdog was enabled. Already handled by | ||
393 | * kvm_apic_accept_pic_intr(). | ||
394 | */ | ||
395 | break; | ||
396 | |||
383 | default: | 397 | default: |
384 | printk(KERN_ERR "TODO: unsupported delivery mode %x\n", | 398 | printk(KERN_ERR "TODO: unsupported delivery mode %x\n", |
385 | delivery_mode); | 399 | delivery_mode); |
@@ -663,6 +677,20 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
663 | apic->timer.period))); | 677 | apic->timer.period))); |
664 | } | 678 | } |
665 | 679 | ||
680 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) | ||
681 | { | ||
682 | int nmi_wd_enabled = apic_lvt_nmi_mode(apic_get_reg(apic, APIC_LVT0)); | ||
683 | |||
684 | if (apic_lvt_nmi_mode(lvt0_val)) { | ||
685 | if (!nmi_wd_enabled) { | ||
686 | apic_debug("Receive NMI setting on APIC_LVT0 " | ||
687 | "for cpu %d\n", apic->vcpu->vcpu_id); | ||
688 | apic->vcpu->kvm->arch.vapics_in_nmi_mode++; | ||
689 | } | ||
690 | } else if (nmi_wd_enabled) | ||
691 | apic->vcpu->kvm->arch.vapics_in_nmi_mode--; | ||
692 | } | ||
693 | |||
666 | static void apic_mmio_write(struct kvm_io_device *this, | 694 | static void apic_mmio_write(struct kvm_io_device *this, |
667 | gpa_t address, int len, const void *data) | 695 | gpa_t address, int len, const void *data) |
668 | { | 696 | { |
@@ -743,10 +771,11 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
743 | apic_set_reg(apic, APIC_ICR2, val & 0xff000000); | 771 | apic_set_reg(apic, APIC_ICR2, val & 0xff000000); |
744 | break; | 772 | break; |
745 | 773 | ||
774 | case APIC_LVT0: | ||
775 | apic_manage_nmi_watchdog(apic, val); | ||
746 | case APIC_LVTT: | 776 | case APIC_LVTT: |
747 | case APIC_LVTTHMR: | 777 | case APIC_LVTTHMR: |
748 | case APIC_LVTPC: | 778 | case APIC_LVTPC: |
749 | case APIC_LVT0: | ||
750 | case APIC_LVT1: | 779 | case APIC_LVT1: |
751 | case APIC_LVTERR: | 780 | case APIC_LVTERR: |
752 | /* TODO: Check vector */ | 781 | /* TODO: Check vector */ |
@@ -961,12 +990,26 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
961 | return 0; | 990 | return 0; |
962 | } | 991 | } |
963 | 992 | ||
964 | static int __inject_apic_timer_irq(struct kvm_lapic *apic) | 993 | static int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) |
994 | { | ||
995 | u32 reg = apic_get_reg(apic, lvt_type); | ||
996 | int vector, mode, trig_mode; | ||
997 | |||
998 | if (apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) { | ||
999 | vector = reg & APIC_VECTOR_MASK; | ||
1000 | mode = reg & APIC_MODE_MASK; | ||
1001 | trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; | ||
1002 | return __apic_accept_irq(apic, mode, vector, 1, trig_mode); | ||
1003 | } | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) | ||
965 | { | 1008 | { |
966 | int vector; | 1009 | struct kvm_lapic *apic = vcpu->arch.apic; |
967 | 1010 | ||
968 | vector = apic_lvt_vector(apic, APIC_LVTT); | 1011 | if (apic) |
969 | return __apic_accept_irq(apic, APIC_DM_FIXED, vector, 1, 0); | 1012 | kvm_apic_local_deliver(apic, APIC_LVT0); |
970 | } | 1013 | } |
971 | 1014 | ||
972 | static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) | 1015 | static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) |
@@ -1061,9 +1104,8 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | |||
1061 | { | 1104 | { |
1062 | struct kvm_lapic *apic = vcpu->arch.apic; | 1105 | struct kvm_lapic *apic = vcpu->arch.apic; |
1063 | 1106 | ||
1064 | if (apic && apic_lvt_enabled(apic, APIC_LVTT) && | 1107 | if (apic && atomic_read(&apic->timer.pending) > 0) { |
1065 | atomic_read(&apic->timer.pending) > 0) { | 1108 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) |
1066 | if (__inject_apic_timer_irq(apic)) | ||
1067 | atomic_dec(&apic->timer.pending); | 1109 | atomic_dec(&apic->timer.pending); |
1068 | } | 1110 | } |
1069 | } | 1111 | } |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 410ddbc1aa2e..83f11c7474a1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -17,7 +17,6 @@ | |||
17 | * | 17 | * |
18 | */ | 18 | */ |
19 | 19 | ||
20 | #include "vmx.h" | ||
21 | #include "mmu.h" | 20 | #include "mmu.h" |
22 | 21 | ||
23 | #include <linux/kvm_host.h> | 22 | #include <linux/kvm_host.h> |
@@ -33,6 +32,7 @@ | |||
33 | #include <asm/page.h> | 32 | #include <asm/page.h> |
34 | #include <asm/cmpxchg.h> | 33 | #include <asm/cmpxchg.h> |
35 | #include <asm/io.h> | 34 | #include <asm/io.h> |
35 | #include <asm/vmx.h> | ||
36 | 36 | ||
37 | /* | 37 | /* |
38 | * When setting this variable to true it enables Two-Dimensional-Paging | 38 | * When setting this variable to true it enables Two-Dimensional-Paging |
@@ -168,6 +168,7 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | |||
168 | static u64 __read_mostly shadow_user_mask; | 168 | static u64 __read_mostly shadow_user_mask; |
169 | static u64 __read_mostly shadow_accessed_mask; | 169 | static u64 __read_mostly shadow_accessed_mask; |
170 | static u64 __read_mostly shadow_dirty_mask; | 170 | static u64 __read_mostly shadow_dirty_mask; |
171 | static u64 __read_mostly shadow_mt_mask; | ||
171 | 172 | ||
172 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 173 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) |
173 | { | 174 | { |
@@ -183,13 +184,14 @@ void kvm_mmu_set_base_ptes(u64 base_pte) | |||
183 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | 184 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); |
184 | 185 | ||
185 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 186 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
186 | u64 dirty_mask, u64 nx_mask, u64 x_mask) | 187 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) |
187 | { | 188 | { |
188 | shadow_user_mask = user_mask; | 189 | shadow_user_mask = user_mask; |
189 | shadow_accessed_mask = accessed_mask; | 190 | shadow_accessed_mask = accessed_mask; |
190 | shadow_dirty_mask = dirty_mask; | 191 | shadow_dirty_mask = dirty_mask; |
191 | shadow_nx_mask = nx_mask; | 192 | shadow_nx_mask = nx_mask; |
192 | shadow_x_mask = x_mask; | 193 | shadow_x_mask = x_mask; |
194 | shadow_mt_mask = mt_mask; | ||
193 | } | 195 | } |
194 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 196 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
195 | 197 | ||
@@ -384,7 +386,9 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn) | |||
384 | { | 386 | { |
385 | int *write_count; | 387 | int *write_count; |
386 | 388 | ||
387 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | 389 | gfn = unalias_gfn(kvm, gfn); |
390 | write_count = slot_largepage_idx(gfn, | ||
391 | gfn_to_memslot_unaliased(kvm, gfn)); | ||
388 | *write_count += 1; | 392 | *write_count += 1; |
389 | } | 393 | } |
390 | 394 | ||
@@ -392,16 +396,20 @@ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn) | |||
392 | { | 396 | { |
393 | int *write_count; | 397 | int *write_count; |
394 | 398 | ||
395 | write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn)); | 399 | gfn = unalias_gfn(kvm, gfn); |
400 | write_count = slot_largepage_idx(gfn, | ||
401 | gfn_to_memslot_unaliased(kvm, gfn)); | ||
396 | *write_count -= 1; | 402 | *write_count -= 1; |
397 | WARN_ON(*write_count < 0); | 403 | WARN_ON(*write_count < 0); |
398 | } | 404 | } |
399 | 405 | ||
400 | static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) | 406 | static int has_wrprotected_page(struct kvm *kvm, gfn_t gfn) |
401 | { | 407 | { |
402 | struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); | 408 | struct kvm_memory_slot *slot; |
403 | int *largepage_idx; | 409 | int *largepage_idx; |
404 | 410 | ||
411 | gfn = unalias_gfn(kvm, gfn); | ||
412 | slot = gfn_to_memslot_unaliased(kvm, gfn); | ||
405 | if (slot) { | 413 | if (slot) { |
406 | largepage_idx = slot_largepage_idx(gfn, slot); | 414 | largepage_idx = slot_largepage_idx(gfn, slot); |
407 | return *largepage_idx; | 415 | return *largepage_idx; |
@@ -613,7 +621,7 @@ static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte) | |||
613 | return NULL; | 621 | return NULL; |
614 | } | 622 | } |
615 | 623 | ||
616 | static void rmap_write_protect(struct kvm *kvm, u64 gfn) | 624 | static int rmap_write_protect(struct kvm *kvm, u64 gfn) |
617 | { | 625 | { |
618 | unsigned long *rmapp; | 626 | unsigned long *rmapp; |
619 | u64 *spte; | 627 | u64 *spte; |
@@ -659,8 +667,7 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
659 | spte = rmap_next(kvm, rmapp, spte); | 667 | spte = rmap_next(kvm, rmapp, spte); |
660 | } | 668 | } |
661 | 669 | ||
662 | if (write_protected) | 670 | return write_protected; |
663 | kvm_flush_remote_tlbs(kvm); | ||
664 | } | 671 | } |
665 | 672 | ||
666 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | 673 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) |
@@ -786,9 +793,11 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, | |||
786 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); | 793 | sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE); |
787 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); | 794 | set_page_private(virt_to_page(sp->spt), (unsigned long)sp); |
788 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); | 795 | list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages); |
796 | INIT_LIST_HEAD(&sp->oos_link); | ||
789 | ASSERT(is_empty_shadow_page(sp->spt)); | 797 | ASSERT(is_empty_shadow_page(sp->spt)); |
790 | sp->slot_bitmap = 0; | 798 | bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS); |
791 | sp->multimapped = 0; | 799 | sp->multimapped = 0; |
800 | sp->global = 1; | ||
792 | sp->parent_pte = parent_pte; | 801 | sp->parent_pte = parent_pte; |
793 | --vcpu->kvm->arch.n_free_mmu_pages; | 802 | --vcpu->kvm->arch.n_free_mmu_pages; |
794 | return sp; | 803 | return sp; |
@@ -900,8 +909,9 @@ static void kvm_mmu_update_unsync_bitmap(u64 *spte) | |||
900 | struct kvm_mmu_page *sp = page_header(__pa(spte)); | 909 | struct kvm_mmu_page *sp = page_header(__pa(spte)); |
901 | 910 | ||
902 | index = spte - sp->spt; | 911 | index = spte - sp->spt; |
903 | __set_bit(index, sp->unsync_child_bitmap); | 912 | if (!__test_and_set_bit(index, sp->unsync_child_bitmap)) |
904 | sp->unsync_children = 1; | 913 | sp->unsync_children++; |
914 | WARN_ON(!sp->unsync_children); | ||
905 | } | 915 | } |
906 | 916 | ||
907 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | 917 | static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) |
@@ -928,7 +938,6 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp) | |||
928 | 938 | ||
929 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 939 | static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
930 | { | 940 | { |
931 | sp->unsync_children = 1; | ||
932 | kvm_mmu_update_parents_unsync(sp); | 941 | kvm_mmu_update_parents_unsync(sp); |
933 | return 1; | 942 | return 1; |
934 | } | 943 | } |
@@ -959,38 +968,66 @@ static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | |||
959 | { | 968 | { |
960 | } | 969 | } |
961 | 970 | ||
971 | #define KVM_PAGE_ARRAY_NR 16 | ||
972 | |||
973 | struct kvm_mmu_pages { | ||
974 | struct mmu_page_and_offset { | ||
975 | struct kvm_mmu_page *sp; | ||
976 | unsigned int idx; | ||
977 | } page[KVM_PAGE_ARRAY_NR]; | ||
978 | unsigned int nr; | ||
979 | }; | ||
980 | |||
962 | #define for_each_unsync_children(bitmap, idx) \ | 981 | #define for_each_unsync_children(bitmap, idx) \ |
963 | for (idx = find_first_bit(bitmap, 512); \ | 982 | for (idx = find_first_bit(bitmap, 512); \ |
964 | idx < 512; \ | 983 | idx < 512; \ |
965 | idx = find_next_bit(bitmap, 512, idx+1)) | 984 | idx = find_next_bit(bitmap, 512, idx+1)) |
966 | 985 | ||
967 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | 986 | int mmu_pages_add(struct kvm_mmu_pages *pvec, struct kvm_mmu_page *sp, |
968 | struct kvm_unsync_walk *walker) | 987 | int idx) |
969 | { | 988 | { |
970 | int i, ret; | 989 | int i; |
971 | 990 | ||
972 | if (!sp->unsync_children) | 991 | if (sp->unsync) |
973 | return 0; | 992 | for (i=0; i < pvec->nr; i++) |
993 | if (pvec->page[i].sp == sp) | ||
994 | return 0; | ||
995 | |||
996 | pvec->page[pvec->nr].sp = sp; | ||
997 | pvec->page[pvec->nr].idx = idx; | ||
998 | pvec->nr++; | ||
999 | return (pvec->nr == KVM_PAGE_ARRAY_NR); | ||
1000 | } | ||
1001 | |||
1002 | static int __mmu_unsync_walk(struct kvm_mmu_page *sp, | ||
1003 | struct kvm_mmu_pages *pvec) | ||
1004 | { | ||
1005 | int i, ret, nr_unsync_leaf = 0; | ||
974 | 1006 | ||
975 | for_each_unsync_children(sp->unsync_child_bitmap, i) { | 1007 | for_each_unsync_children(sp->unsync_child_bitmap, i) { |
976 | u64 ent = sp->spt[i]; | 1008 | u64 ent = sp->spt[i]; |
977 | 1009 | ||
978 | if (is_shadow_present_pte(ent)) { | 1010 | if (is_shadow_present_pte(ent) && !is_large_pte(ent)) { |
979 | struct kvm_mmu_page *child; | 1011 | struct kvm_mmu_page *child; |
980 | child = page_header(ent & PT64_BASE_ADDR_MASK); | 1012 | child = page_header(ent & PT64_BASE_ADDR_MASK); |
981 | 1013 | ||
982 | if (child->unsync_children) { | 1014 | if (child->unsync_children) { |
983 | ret = mmu_unsync_walk(child, walker); | 1015 | if (mmu_pages_add(pvec, child, i)) |
984 | if (ret) | 1016 | return -ENOSPC; |
1017 | |||
1018 | ret = __mmu_unsync_walk(child, pvec); | ||
1019 | if (!ret) | ||
1020 | __clear_bit(i, sp->unsync_child_bitmap); | ||
1021 | else if (ret > 0) | ||
1022 | nr_unsync_leaf += ret; | ||
1023 | else | ||
985 | return ret; | 1024 | return ret; |
986 | __clear_bit(i, sp->unsync_child_bitmap); | ||
987 | } | 1025 | } |
988 | 1026 | ||
989 | if (child->unsync) { | 1027 | if (child->unsync) { |
990 | ret = walker->entry(child, walker); | 1028 | nr_unsync_leaf++; |
991 | __clear_bit(i, sp->unsync_child_bitmap); | 1029 | if (mmu_pages_add(pvec, child, i)) |
992 | if (ret) | 1030 | return -ENOSPC; |
993 | return ret; | ||
994 | } | 1031 | } |
995 | } | 1032 | } |
996 | } | 1033 | } |
@@ -998,7 +1035,17 @@ static int mmu_unsync_walk(struct kvm_mmu_page *sp, | |||
998 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) | 1035 | if (find_first_bit(sp->unsync_child_bitmap, 512) == 512) |
999 | sp->unsync_children = 0; | 1036 | sp->unsync_children = 0; |
1000 | 1037 | ||
1001 | return 0; | 1038 | return nr_unsync_leaf; |
1039 | } | ||
1040 | |||
1041 | static int mmu_unsync_walk(struct kvm_mmu_page *sp, | ||
1042 | struct kvm_mmu_pages *pvec) | ||
1043 | { | ||
1044 | if (!sp->unsync_children) | ||
1045 | return 0; | ||
1046 | |||
1047 | mmu_pages_add(pvec, sp, 0); | ||
1048 | return __mmu_unsync_walk(sp, pvec); | ||
1002 | } | 1049 | } |
1003 | 1050 | ||
1004 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | 1051 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) |
@@ -1021,10 +1068,18 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
1021 | return NULL; | 1068 | return NULL; |
1022 | } | 1069 | } |
1023 | 1070 | ||
1071 | static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
1072 | { | ||
1073 | list_del(&sp->oos_link); | ||
1074 | --kvm->stat.mmu_unsync_global; | ||
1075 | } | ||
1076 | |||
1024 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1077 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1025 | { | 1078 | { |
1026 | WARN_ON(!sp->unsync); | 1079 | WARN_ON(!sp->unsync); |
1027 | sp->unsync = 0; | 1080 | sp->unsync = 0; |
1081 | if (sp->global) | ||
1082 | kvm_unlink_unsync_global(kvm, sp); | ||
1028 | --kvm->stat.mmu_unsync; | 1083 | --kvm->stat.mmu_unsync; |
1029 | } | 1084 | } |
1030 | 1085 | ||
@@ -1037,7 +1092,8 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1037 | return 1; | 1092 | return 1; |
1038 | } | 1093 | } |
1039 | 1094 | ||
1040 | rmap_write_protect(vcpu->kvm, sp->gfn); | 1095 | if (rmap_write_protect(vcpu->kvm, sp->gfn)) |
1096 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1041 | kvm_unlink_unsync_page(vcpu->kvm, sp); | 1097 | kvm_unlink_unsync_page(vcpu->kvm, sp); |
1042 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { | 1098 | if (vcpu->arch.mmu.sync_page(vcpu, sp)) { |
1043 | kvm_mmu_zap_page(vcpu->kvm, sp); | 1099 | kvm_mmu_zap_page(vcpu->kvm, sp); |
@@ -1048,30 +1104,89 @@ static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1048 | return 0; | 1104 | return 0; |
1049 | } | 1105 | } |
1050 | 1106 | ||
1051 | struct sync_walker { | 1107 | struct mmu_page_path { |
1052 | struct kvm_vcpu *vcpu; | 1108 | struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; |
1053 | struct kvm_unsync_walk walker; | 1109 | unsigned int idx[PT64_ROOT_LEVEL-1]; |
1054 | }; | 1110 | }; |
1055 | 1111 | ||
1056 | static int mmu_sync_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | 1112 | #define for_each_sp(pvec, sp, parents, i) \ |
1113 | for (i = mmu_pages_next(&pvec, &parents, -1), \ | ||
1114 | sp = pvec.page[i].sp; \ | ||
1115 | i < pvec.nr && ({ sp = pvec.page[i].sp; 1;}); \ | ||
1116 | i = mmu_pages_next(&pvec, &parents, i)) | ||
1117 | |||
1118 | int mmu_pages_next(struct kvm_mmu_pages *pvec, struct mmu_page_path *parents, | ||
1119 | int i) | ||
1057 | { | 1120 | { |
1058 | struct sync_walker *sync_walk = container_of(walk, struct sync_walker, | 1121 | int n; |
1059 | walker); | ||
1060 | struct kvm_vcpu *vcpu = sync_walk->vcpu; | ||
1061 | 1122 | ||
1062 | kvm_sync_page(vcpu, sp); | 1123 | for (n = i+1; n < pvec->nr; n++) { |
1063 | return (need_resched() || spin_needbreak(&vcpu->kvm->mmu_lock)); | 1124 | struct kvm_mmu_page *sp = pvec->page[n].sp; |
1125 | |||
1126 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) { | ||
1127 | parents->idx[0] = pvec->page[n].idx; | ||
1128 | return n; | ||
1129 | } | ||
1130 | |||
1131 | parents->parent[sp->role.level-2] = sp; | ||
1132 | parents->idx[sp->role.level-1] = pvec->page[n].idx; | ||
1133 | } | ||
1134 | |||
1135 | return n; | ||
1064 | } | 1136 | } |
1065 | 1137 | ||
1066 | static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1138 | void mmu_pages_clear_parents(struct mmu_page_path *parents) |
1067 | { | 1139 | { |
1068 | struct sync_walker walker = { | 1140 | struct kvm_mmu_page *sp; |
1069 | .walker = { .entry = mmu_sync_fn, }, | 1141 | unsigned int level = 0; |
1070 | .vcpu = vcpu, | 1142 | |
1071 | }; | 1143 | do { |
1144 | unsigned int idx = parents->idx[level]; | ||
1145 | |||
1146 | sp = parents->parent[level]; | ||
1147 | if (!sp) | ||
1148 | return; | ||
1149 | |||
1150 | --sp->unsync_children; | ||
1151 | WARN_ON((int)sp->unsync_children < 0); | ||
1152 | __clear_bit(idx, sp->unsync_child_bitmap); | ||
1153 | level++; | ||
1154 | } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); | ||
1155 | } | ||
1156 | |||
1157 | static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, | ||
1158 | struct mmu_page_path *parents, | ||
1159 | struct kvm_mmu_pages *pvec) | ||
1160 | { | ||
1161 | parents->parent[parent->role.level-1] = NULL; | ||
1162 | pvec->nr = 0; | ||
1163 | } | ||
1164 | |||
1165 | static void mmu_sync_children(struct kvm_vcpu *vcpu, | ||
1166 | struct kvm_mmu_page *parent) | ||
1167 | { | ||
1168 | int i; | ||
1169 | struct kvm_mmu_page *sp; | ||
1170 | struct mmu_page_path parents; | ||
1171 | struct kvm_mmu_pages pages; | ||
1172 | |||
1173 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
1174 | while (mmu_unsync_walk(parent, &pages)) { | ||
1175 | int protected = 0; | ||
1072 | 1176 | ||
1073 | while (mmu_unsync_walk(sp, &walker.walker)) | 1177 | for_each_sp(pages, sp, parents, i) |
1178 | protected |= rmap_write_protect(vcpu->kvm, sp->gfn); | ||
1179 | |||
1180 | if (protected) | ||
1181 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1182 | |||
1183 | for_each_sp(pages, sp, parents, i) { | ||
1184 | kvm_sync_page(vcpu, sp); | ||
1185 | mmu_pages_clear_parents(&parents); | ||
1186 | } | ||
1074 | cond_resched_lock(&vcpu->kvm->mmu_lock); | 1187 | cond_resched_lock(&vcpu->kvm->mmu_lock); |
1188 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
1189 | } | ||
1075 | } | 1190 | } |
1076 | 1191 | ||
1077 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | 1192 | static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, |
@@ -1129,7 +1244,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1129 | sp->role = role; | 1244 | sp->role = role; |
1130 | hlist_add_head(&sp->hash_link, bucket); | 1245 | hlist_add_head(&sp->hash_link, bucket); |
1131 | if (!metaphysical) { | 1246 | if (!metaphysical) { |
1132 | rmap_write_protect(vcpu->kvm, gfn); | 1247 | if (rmap_write_protect(vcpu->kvm, gfn)) |
1248 | kvm_flush_remote_tlbs(vcpu->kvm); | ||
1133 | account_shadowed(vcpu->kvm, gfn); | 1249 | account_shadowed(vcpu->kvm, gfn); |
1134 | } | 1250 | } |
1135 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) | 1251 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
@@ -1153,6 +1269,8 @@ static int walk_shadow(struct kvm_shadow_walk *walker, | |||
1153 | if (level == PT32E_ROOT_LEVEL) { | 1269 | if (level == PT32E_ROOT_LEVEL) { |
1154 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; | 1270 | shadow_addr = vcpu->arch.mmu.pae_root[(addr >> 30) & 3]; |
1155 | shadow_addr &= PT64_BASE_ADDR_MASK; | 1271 | shadow_addr &= PT64_BASE_ADDR_MASK; |
1272 | if (!shadow_addr) | ||
1273 | return 1; | ||
1156 | --level; | 1274 | --level; |
1157 | } | 1275 | } |
1158 | 1276 | ||
@@ -1237,33 +1355,29 @@ static void kvm_mmu_unlink_parents(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
1237 | } | 1355 | } |
1238 | } | 1356 | } |
1239 | 1357 | ||
1240 | struct zap_walker { | 1358 | static int mmu_zap_unsync_children(struct kvm *kvm, |
1241 | struct kvm_unsync_walk walker; | 1359 | struct kvm_mmu_page *parent) |
1242 | struct kvm *kvm; | ||
1243 | int zapped; | ||
1244 | }; | ||
1245 | |||
1246 | static int mmu_zap_fn(struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk) | ||
1247 | { | 1360 | { |
1248 | struct zap_walker *zap_walk = container_of(walk, struct zap_walker, | 1361 | int i, zapped = 0; |
1249 | walker); | 1362 | struct mmu_page_path parents; |
1250 | kvm_mmu_zap_page(zap_walk->kvm, sp); | 1363 | struct kvm_mmu_pages pages; |
1251 | zap_walk->zapped = 1; | ||
1252 | return 0; | ||
1253 | } | ||
1254 | 1364 | ||
1255 | static int mmu_zap_unsync_children(struct kvm *kvm, struct kvm_mmu_page *sp) | 1365 | if (parent->role.level == PT_PAGE_TABLE_LEVEL) |
1256 | { | ||
1257 | struct zap_walker walker = { | ||
1258 | .walker = { .entry = mmu_zap_fn, }, | ||
1259 | .kvm = kvm, | ||
1260 | .zapped = 0, | ||
1261 | }; | ||
1262 | |||
1263 | if (sp->role.level == PT_PAGE_TABLE_LEVEL) | ||
1264 | return 0; | 1366 | return 0; |
1265 | mmu_unsync_walk(sp, &walker.walker); | 1367 | |
1266 | return walker.zapped; | 1368 | kvm_mmu_pages_init(parent, &parents, &pages); |
1369 | while (mmu_unsync_walk(parent, &pages)) { | ||
1370 | struct kvm_mmu_page *sp; | ||
1371 | |||
1372 | for_each_sp(pages, sp, parents, i) { | ||
1373 | kvm_mmu_zap_page(kvm, sp); | ||
1374 | mmu_pages_clear_parents(&parents); | ||
1375 | } | ||
1376 | zapped += pages.nr; | ||
1377 | kvm_mmu_pages_init(parent, &parents, &pages); | ||
1378 | } | ||
1379 | |||
1380 | return zapped; | ||
1267 | } | 1381 | } |
1268 | 1382 | ||
1269 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1383 | static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
@@ -1362,7 +1476,7 @@ static void page_header_update_slot(struct kvm *kvm, void *pte, gfn_t gfn) | |||
1362 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); | 1476 | int slot = memslot_id(kvm, gfn_to_memslot(kvm, gfn)); |
1363 | struct kvm_mmu_page *sp = page_header(__pa(pte)); | 1477 | struct kvm_mmu_page *sp = page_header(__pa(pte)); |
1364 | 1478 | ||
1365 | __set_bit(slot, &sp->slot_bitmap); | 1479 | __set_bit(slot, sp->slot_bitmap); |
1366 | } | 1480 | } |
1367 | 1481 | ||
1368 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) | 1482 | static void mmu_convert_notrap(struct kvm_mmu_page *sp) |
@@ -1393,6 +1507,110 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva) | |||
1393 | return page; | 1507 | return page; |
1394 | } | 1508 | } |
1395 | 1509 | ||
1510 | /* | ||
1511 | * The function is based on mtrr_type_lookup() in | ||
1512 | * arch/x86/kernel/cpu/mtrr/generic.c | ||
1513 | */ | ||
1514 | static int get_mtrr_type(struct mtrr_state_type *mtrr_state, | ||
1515 | u64 start, u64 end) | ||
1516 | { | ||
1517 | int i; | ||
1518 | u64 base, mask; | ||
1519 | u8 prev_match, curr_match; | ||
1520 | int num_var_ranges = KVM_NR_VAR_MTRR; | ||
1521 | |||
1522 | if (!mtrr_state->enabled) | ||
1523 | return 0xFF; | ||
1524 | |||
1525 | /* Make end inclusive end, instead of exclusive */ | ||
1526 | end--; | ||
1527 | |||
1528 | /* Look in fixed ranges. Just return the type as per start */ | ||
1529 | if (mtrr_state->have_fixed && (start < 0x100000)) { | ||
1530 | int idx; | ||
1531 | |||
1532 | if (start < 0x80000) { | ||
1533 | idx = 0; | ||
1534 | idx += (start >> 16); | ||
1535 | return mtrr_state->fixed_ranges[idx]; | ||
1536 | } else if (start < 0xC0000) { | ||
1537 | idx = 1 * 8; | ||
1538 | idx += ((start - 0x80000) >> 14); | ||
1539 | return mtrr_state->fixed_ranges[idx]; | ||
1540 | } else if (start < 0x1000000) { | ||
1541 | idx = 3 * 8; | ||
1542 | idx += ((start - 0xC0000) >> 12); | ||
1543 | return mtrr_state->fixed_ranges[idx]; | ||
1544 | } | ||
1545 | } | ||
1546 | |||
1547 | /* | ||
1548 | * Look in variable ranges | ||
1549 | * Look of multiple ranges matching this address and pick type | ||
1550 | * as per MTRR precedence | ||
1551 | */ | ||
1552 | if (!(mtrr_state->enabled & 2)) | ||
1553 | return mtrr_state->def_type; | ||
1554 | |||
1555 | prev_match = 0xFF; | ||
1556 | for (i = 0; i < num_var_ranges; ++i) { | ||
1557 | unsigned short start_state, end_state; | ||
1558 | |||
1559 | if (!(mtrr_state->var_ranges[i].mask_lo & (1 << 11))) | ||
1560 | continue; | ||
1561 | |||
1562 | base = (((u64)mtrr_state->var_ranges[i].base_hi) << 32) + | ||
1563 | (mtrr_state->var_ranges[i].base_lo & PAGE_MASK); | ||
1564 | mask = (((u64)mtrr_state->var_ranges[i].mask_hi) << 32) + | ||
1565 | (mtrr_state->var_ranges[i].mask_lo & PAGE_MASK); | ||
1566 | |||
1567 | start_state = ((start & mask) == (base & mask)); | ||
1568 | end_state = ((end & mask) == (base & mask)); | ||
1569 | if (start_state != end_state) | ||
1570 | return 0xFE; | ||
1571 | |||
1572 | if ((start & mask) != (base & mask)) | ||
1573 | continue; | ||
1574 | |||
1575 | curr_match = mtrr_state->var_ranges[i].base_lo & 0xff; | ||
1576 | if (prev_match == 0xFF) { | ||
1577 | prev_match = curr_match; | ||
1578 | continue; | ||
1579 | } | ||
1580 | |||
1581 | if (prev_match == MTRR_TYPE_UNCACHABLE || | ||
1582 | curr_match == MTRR_TYPE_UNCACHABLE) | ||
1583 | return MTRR_TYPE_UNCACHABLE; | ||
1584 | |||
1585 | if ((prev_match == MTRR_TYPE_WRBACK && | ||
1586 | curr_match == MTRR_TYPE_WRTHROUGH) || | ||
1587 | (prev_match == MTRR_TYPE_WRTHROUGH && | ||
1588 | curr_match == MTRR_TYPE_WRBACK)) { | ||
1589 | prev_match = MTRR_TYPE_WRTHROUGH; | ||
1590 | curr_match = MTRR_TYPE_WRTHROUGH; | ||
1591 | } | ||
1592 | |||
1593 | if (prev_match != curr_match) | ||
1594 | return MTRR_TYPE_UNCACHABLE; | ||
1595 | } | ||
1596 | |||
1597 | if (prev_match != 0xFF) | ||
1598 | return prev_match; | ||
1599 | |||
1600 | return mtrr_state->def_type; | ||
1601 | } | ||
1602 | |||
1603 | static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1604 | { | ||
1605 | u8 mtrr; | ||
1606 | |||
1607 | mtrr = get_mtrr_type(&vcpu->arch.mtrr_state, gfn << PAGE_SHIFT, | ||
1608 | (gfn << PAGE_SHIFT) + PAGE_SIZE); | ||
1609 | if (mtrr == 0xfe || mtrr == 0xff) | ||
1610 | mtrr = MTRR_TYPE_WRBACK; | ||
1611 | return mtrr; | ||
1612 | } | ||
1613 | |||
1396 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1614 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1397 | { | 1615 | { |
1398 | unsigned index; | 1616 | unsigned index; |
@@ -1409,9 +1627,15 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1409 | if (s->role.word != sp->role.word) | 1627 | if (s->role.word != sp->role.word) |
1410 | return 1; | 1628 | return 1; |
1411 | } | 1629 | } |
1412 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
1413 | ++vcpu->kvm->stat.mmu_unsync; | 1630 | ++vcpu->kvm->stat.mmu_unsync; |
1414 | sp->unsync = 1; | 1631 | sp->unsync = 1; |
1632 | |||
1633 | if (sp->global) { | ||
1634 | list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); | ||
1635 | ++vcpu->kvm->stat.mmu_unsync_global; | ||
1636 | } else | ||
1637 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
1638 | |||
1415 | mmu_convert_notrap(sp); | 1639 | mmu_convert_notrap(sp); |
1416 | return 0; | 1640 | return 0; |
1417 | } | 1641 | } |
@@ -1437,11 +1661,24 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
1437 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1661 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
1438 | unsigned pte_access, int user_fault, | 1662 | unsigned pte_access, int user_fault, |
1439 | int write_fault, int dirty, int largepage, | 1663 | int write_fault, int dirty, int largepage, |
1440 | gfn_t gfn, pfn_t pfn, bool speculative, | 1664 | int global, gfn_t gfn, pfn_t pfn, bool speculative, |
1441 | bool can_unsync) | 1665 | bool can_unsync) |
1442 | { | 1666 | { |
1443 | u64 spte; | 1667 | u64 spte; |
1444 | int ret = 0; | 1668 | int ret = 0; |
1669 | u64 mt_mask = shadow_mt_mask; | ||
1670 | struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); | ||
1671 | |||
1672 | if (!(vcpu->arch.cr4 & X86_CR4_PGE)) | ||
1673 | global = 0; | ||
1674 | if (!global && sp->global) { | ||
1675 | sp->global = 0; | ||
1676 | if (sp->unsync) { | ||
1677 | kvm_unlink_unsync_global(vcpu->kvm, sp); | ||
1678 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
1679 | } | ||
1680 | } | ||
1681 | |||
1445 | /* | 1682 | /* |
1446 | * We don't set the accessed bit, since we sometimes want to see | 1683 | * We don't set the accessed bit, since we sometimes want to see |
1447 | * whether the guest actually used the pte (in order to detect | 1684 | * whether the guest actually used the pte (in order to detect |
@@ -1460,6 +1697,11 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1460 | spte |= shadow_user_mask; | 1697 | spte |= shadow_user_mask; |
1461 | if (largepage) | 1698 | if (largepage) |
1462 | spte |= PT_PAGE_SIZE_MASK; | 1699 | spte |= PT_PAGE_SIZE_MASK; |
1700 | if (mt_mask) { | ||
1701 | mt_mask = get_memory_type(vcpu, gfn) << | ||
1702 | kvm_x86_ops->get_mt_mask_shift(); | ||
1703 | spte |= mt_mask; | ||
1704 | } | ||
1463 | 1705 | ||
1464 | spte |= (u64)pfn << PAGE_SHIFT; | 1706 | spte |= (u64)pfn << PAGE_SHIFT; |
1465 | 1707 | ||
@@ -1474,6 +1716,15 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1474 | 1716 | ||
1475 | spte |= PT_WRITABLE_MASK; | 1717 | spte |= PT_WRITABLE_MASK; |
1476 | 1718 | ||
1719 | /* | ||
1720 | * Optimization: for pte sync, if spte was writable the hash | ||
1721 | * lookup is unnecessary (and expensive). Write protection | ||
1722 | * is responsibility of mmu_get_page / kvm_sync_page. | ||
1723 | * Same reasoning can be applied to dirty page accounting. | ||
1724 | */ | ||
1725 | if (!can_unsync && is_writeble_pte(*shadow_pte)) | ||
1726 | goto set_pte; | ||
1727 | |||
1477 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { | 1728 | if (mmu_need_write_protect(vcpu, gfn, can_unsync)) { |
1478 | pgprintk("%s: found shadow page for %lx, marking ro\n", | 1729 | pgprintk("%s: found shadow page for %lx, marking ro\n", |
1479 | __func__, gfn); | 1730 | __func__, gfn); |
@@ -1495,8 +1746,8 @@ set_pte: | |||
1495 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1746 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
1496 | unsigned pt_access, unsigned pte_access, | 1747 | unsigned pt_access, unsigned pte_access, |
1497 | int user_fault, int write_fault, int dirty, | 1748 | int user_fault, int write_fault, int dirty, |
1498 | int *ptwrite, int largepage, gfn_t gfn, | 1749 | int *ptwrite, int largepage, int global, |
1499 | pfn_t pfn, bool speculative) | 1750 | gfn_t gfn, pfn_t pfn, bool speculative) |
1500 | { | 1751 | { |
1501 | int was_rmapped = 0; | 1752 | int was_rmapped = 0; |
1502 | int was_writeble = is_writeble_pte(*shadow_pte); | 1753 | int was_writeble = is_writeble_pte(*shadow_pte); |
@@ -1529,7 +1780,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1529 | } | 1780 | } |
1530 | } | 1781 | } |
1531 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, | 1782 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, |
1532 | dirty, largepage, gfn, pfn, speculative, true)) { | 1783 | dirty, largepage, global, gfn, pfn, speculative, true)) { |
1533 | if (write_fault) | 1784 | if (write_fault) |
1534 | *ptwrite = 1; | 1785 | *ptwrite = 1; |
1535 | kvm_x86_ops->tlb_flush(vcpu); | 1786 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1586,7 +1837,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk, | |||
1586 | || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { | 1837 | || (walk->largepage && level == PT_DIRECTORY_LEVEL)) { |
1587 | mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, | 1838 | mmu_set_spte(vcpu, sptep, ACC_ALL, ACC_ALL, |
1588 | 0, walk->write, 1, &walk->pt_write, | 1839 | 0, walk->write, 1, &walk->pt_write, |
1589 | walk->largepage, gfn, walk->pfn, false); | 1840 | walk->largepage, 0, gfn, walk->pfn, false); |
1590 | ++vcpu->stat.pf_fixed; | 1841 | ++vcpu->stat.pf_fixed; |
1591 | return 1; | 1842 | return 1; |
1592 | } | 1843 | } |
@@ -1773,6 +2024,15 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
1773 | } | 2024 | } |
1774 | } | 2025 | } |
1775 | 2026 | ||
2027 | static void mmu_sync_global(struct kvm_vcpu *vcpu) | ||
2028 | { | ||
2029 | struct kvm *kvm = vcpu->kvm; | ||
2030 | struct kvm_mmu_page *sp, *n; | ||
2031 | |||
2032 | list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) | ||
2033 | kvm_sync_page(vcpu, sp); | ||
2034 | } | ||
2035 | |||
1776 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2036 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
1777 | { | 2037 | { |
1778 | spin_lock(&vcpu->kvm->mmu_lock); | 2038 | spin_lock(&vcpu->kvm->mmu_lock); |
@@ -1780,6 +2040,13 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
1780 | spin_unlock(&vcpu->kvm->mmu_lock); | 2040 | spin_unlock(&vcpu->kvm->mmu_lock); |
1781 | } | 2041 | } |
1782 | 2042 | ||
2043 | void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) | ||
2044 | { | ||
2045 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2046 | mmu_sync_global(vcpu); | ||
2047 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2048 | } | ||
2049 | |||
1783 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 2050 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) |
1784 | { | 2051 | { |
1785 | return vaddr; | 2052 | return vaddr; |
@@ -2178,7 +2445,8 @@ static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
2178 | } | 2445 | } |
2179 | 2446 | ||
2180 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 2447 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
2181 | const u8 *new, int bytes) | 2448 | const u8 *new, int bytes, |
2449 | bool guest_initiated) | ||
2182 | { | 2450 | { |
2183 | gfn_t gfn = gpa >> PAGE_SHIFT; | 2451 | gfn_t gfn = gpa >> PAGE_SHIFT; |
2184 | struct kvm_mmu_page *sp; | 2452 | struct kvm_mmu_page *sp; |
@@ -2204,15 +2472,17 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
2204 | kvm_mmu_free_some_pages(vcpu); | 2472 | kvm_mmu_free_some_pages(vcpu); |
2205 | ++vcpu->kvm->stat.mmu_pte_write; | 2473 | ++vcpu->kvm->stat.mmu_pte_write; |
2206 | kvm_mmu_audit(vcpu, "pre pte write"); | 2474 | kvm_mmu_audit(vcpu, "pre pte write"); |
2207 | if (gfn == vcpu->arch.last_pt_write_gfn | 2475 | if (guest_initiated) { |
2208 | && !last_updated_pte_accessed(vcpu)) { | 2476 | if (gfn == vcpu->arch.last_pt_write_gfn |
2209 | ++vcpu->arch.last_pt_write_count; | 2477 | && !last_updated_pte_accessed(vcpu)) { |
2210 | if (vcpu->arch.last_pt_write_count >= 3) | 2478 | ++vcpu->arch.last_pt_write_count; |
2211 | flooded = 1; | 2479 | if (vcpu->arch.last_pt_write_count >= 3) |
2212 | } else { | 2480 | flooded = 1; |
2213 | vcpu->arch.last_pt_write_gfn = gfn; | 2481 | } else { |
2214 | vcpu->arch.last_pt_write_count = 1; | 2482 | vcpu->arch.last_pt_write_gfn = gfn; |
2215 | vcpu->arch.last_pte_updated = NULL; | 2483 | vcpu->arch.last_pt_write_count = 1; |
2484 | vcpu->arch.last_pte_updated = NULL; | ||
2485 | } | ||
2216 | } | 2486 | } |
2217 | index = kvm_page_table_hashfn(gfn); | 2487 | index = kvm_page_table_hashfn(gfn); |
2218 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; | 2488 | bucket = &vcpu->kvm->arch.mmu_page_hash[index]; |
@@ -2352,9 +2622,7 @@ EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); | |||
2352 | 2622 | ||
2353 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) | 2623 | void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) |
2354 | { | 2624 | { |
2355 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2356 | vcpu->arch.mmu.invlpg(vcpu, gva); | 2625 | vcpu->arch.mmu.invlpg(vcpu, gva); |
2357 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2358 | kvm_mmu_flush_tlb(vcpu); | 2626 | kvm_mmu_flush_tlb(vcpu); |
2359 | ++vcpu->stat.invlpg; | 2627 | ++vcpu->stat.invlpg; |
2360 | } | 2628 | } |
@@ -2451,7 +2719,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
2451 | int i; | 2719 | int i; |
2452 | u64 *pt; | 2720 | u64 *pt; |
2453 | 2721 | ||
2454 | if (!test_bit(slot, &sp->slot_bitmap)) | 2722 | if (!test_bit(slot, sp->slot_bitmap)) |
2455 | continue; | 2723 | continue; |
2456 | 2724 | ||
2457 | pt = sp->spt; | 2725 | pt = sp->spt; |
@@ -2860,8 +3128,8 @@ static void audit_write_protection(struct kvm_vcpu *vcpu) | |||
2860 | if (sp->role.metaphysical) | 3128 | if (sp->role.metaphysical) |
2861 | continue; | 3129 | continue; |
2862 | 3130 | ||
2863 | slot = gfn_to_memslot(vcpu->kvm, sp->gfn); | ||
2864 | gfn = unalias_gfn(vcpu->kvm, sp->gfn); | 3131 | gfn = unalias_gfn(vcpu->kvm, sp->gfn); |
3132 | slot = gfn_to_memslot_unaliased(vcpu->kvm, sp->gfn); | ||
2865 | rmapp = &slot->rmap[gfn - slot->base_gfn]; | 3133 | rmapp = &slot->rmap[gfn - slot->base_gfn]; |
2866 | if (*rmapp) | 3134 | if (*rmapp) |
2867 | printk(KERN_ERR "%s: (%s) shadow page has writable" | 3135 | printk(KERN_ERR "%s: (%s) shadow page has writable" |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 84eee43bbe74..9fd78b6e17ad 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -82,6 +82,7 @@ struct shadow_walker { | |||
82 | int *ptwrite; | 82 | int *ptwrite; |
83 | pfn_t pfn; | 83 | pfn_t pfn; |
84 | u64 *sptep; | 84 | u64 *sptep; |
85 | gpa_t pte_gpa; | ||
85 | }; | 86 | }; |
86 | 87 | ||
87 | static gfn_t gpte_to_gfn(pt_element_t gpte) | 88 | static gfn_t gpte_to_gfn(pt_element_t gpte) |
@@ -222,7 +223,7 @@ walk: | |||
222 | if (ret) | 223 | if (ret) |
223 | goto walk; | 224 | goto walk; |
224 | pte |= PT_DIRTY_MASK; | 225 | pte |= PT_DIRTY_MASK; |
225 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte)); | 226 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); |
226 | walker->ptes[walker->level - 1] = pte; | 227 | walker->ptes[walker->level - 1] = pte; |
227 | } | 228 | } |
228 | 229 | ||
@@ -274,7 +275,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
274 | return; | 275 | return; |
275 | kvm_get_pfn(pfn); | 276 | kvm_get_pfn(pfn); |
276 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 277 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
277 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), | 278 | gpte & PT_DIRTY_MASK, NULL, largepage, |
279 | gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), | ||
278 | pfn, true); | 280 | pfn, true); |
279 | } | 281 | } |
280 | 282 | ||
@@ -301,8 +303,9 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw, | |||
301 | mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, | 303 | mmu_set_spte(vcpu, sptep, access, gw->pte_access & access, |
302 | sw->user_fault, sw->write_fault, | 304 | sw->user_fault, sw->write_fault, |
303 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | 305 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, |
304 | sw->ptwrite, sw->largepage, gw->gfn, sw->pfn, | 306 | sw->ptwrite, sw->largepage, |
305 | false); | 307 | gw->ptes[gw->level-1] & PT_GLOBAL_MASK, |
308 | gw->gfn, sw->pfn, false); | ||
306 | sw->sptep = sptep; | 309 | sw->sptep = sptep; |
307 | return 1; | 310 | return 1; |
308 | } | 311 | } |
@@ -466,10 +469,22 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, | |||
466 | struct kvm_vcpu *vcpu, u64 addr, | 469 | struct kvm_vcpu *vcpu, u64 addr, |
467 | u64 *sptep, int level) | 470 | u64 *sptep, int level) |
468 | { | 471 | { |
472 | struct shadow_walker *sw = | ||
473 | container_of(_sw, struct shadow_walker, walker); | ||
469 | 474 | ||
470 | if (level == PT_PAGE_TABLE_LEVEL) { | 475 | /* FIXME: properly handle invlpg on large guest pages */ |
471 | if (is_shadow_present_pte(*sptep)) | 476 | if (level == PT_PAGE_TABLE_LEVEL || |
477 | ((level == PT_DIRECTORY_LEVEL) && is_large_pte(*sptep))) { | ||
478 | struct kvm_mmu_page *sp = page_header(__pa(sptep)); | ||
479 | |||
480 | sw->pte_gpa = (sp->gfn << PAGE_SHIFT); | ||
481 | sw->pte_gpa += (sptep - sp->spt) * sizeof(pt_element_t); | ||
482 | |||
483 | if (is_shadow_present_pte(*sptep)) { | ||
472 | rmap_remove(vcpu->kvm, sptep); | 484 | rmap_remove(vcpu->kvm, sptep); |
485 | if (is_large_pte(*sptep)) | ||
486 | --vcpu->kvm->stat.lpages; | ||
487 | } | ||
473 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); | 488 | set_shadow_pte(sptep, shadow_trap_nonpresent_pte); |
474 | return 1; | 489 | return 1; |
475 | } | 490 | } |
@@ -480,11 +495,26 @@ static int FNAME(shadow_invlpg_entry)(struct kvm_shadow_walk *_sw, | |||
480 | 495 | ||
481 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | 496 | static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) |
482 | { | 497 | { |
498 | pt_element_t gpte; | ||
483 | struct shadow_walker walker = { | 499 | struct shadow_walker walker = { |
484 | .walker = { .entry = FNAME(shadow_invlpg_entry), }, | 500 | .walker = { .entry = FNAME(shadow_invlpg_entry), }, |
501 | .pte_gpa = -1, | ||
485 | }; | 502 | }; |
486 | 503 | ||
504 | spin_lock(&vcpu->kvm->mmu_lock); | ||
487 | walk_shadow(&walker.walker, vcpu, gva); | 505 | walk_shadow(&walker.walker, vcpu, gva); |
506 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
507 | if (walker.pte_gpa == -1) | ||
508 | return; | ||
509 | if (kvm_read_guest_atomic(vcpu->kvm, walker.pte_gpa, &gpte, | ||
510 | sizeof(pt_element_t))) | ||
511 | return; | ||
512 | if (is_present_pte(gpte) && (gpte & PT_ACCESSED_MASK)) { | ||
513 | if (mmu_topup_memory_caches(vcpu)) | ||
514 | return; | ||
515 | kvm_mmu_pte_write(vcpu, walker.pte_gpa, (const u8 *)&gpte, | ||
516 | sizeof(pt_element_t), 0); | ||
517 | } | ||
488 | } | 518 | } |
489 | 519 | ||
490 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 520 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
@@ -580,7 +610,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
580 | nr_present++; | 610 | nr_present++; |
581 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 611 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
582 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 612 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
583 | is_dirty_pte(gpte), 0, gfn, | 613 | is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, |
584 | spte_to_pfn(sp->spt[i]), true, false); | 614 | spte_to_pfn(sp->spt[i]), true, false); |
585 | } | 615 | } |
586 | 616 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9c4ce657d963..1452851ae258 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -28,6 +28,8 @@ | |||
28 | 28 | ||
29 | #include <asm/desc.h> | 29 | #include <asm/desc.h> |
30 | 30 | ||
31 | #include <asm/virtext.h> | ||
32 | |||
31 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 33 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
32 | 34 | ||
33 | MODULE_AUTHOR("Qumranet"); | 35 | MODULE_AUTHOR("Qumranet"); |
@@ -245,34 +247,19 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
245 | 247 | ||
246 | static int has_svm(void) | 248 | static int has_svm(void) |
247 | { | 249 | { |
248 | uint32_t eax, ebx, ecx, edx; | 250 | const char *msg; |
249 | |||
250 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) { | ||
251 | printk(KERN_INFO "has_svm: not amd\n"); | ||
252 | return 0; | ||
253 | } | ||
254 | 251 | ||
255 | cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | 252 | if (!cpu_has_svm(&msg)) { |
256 | if (eax < SVM_CPUID_FUNC) { | 253 | printk(KERN_INFO "has_svn: %s\n", msg); |
257 | printk(KERN_INFO "has_svm: can't execute cpuid_8000000a\n"); | ||
258 | return 0; | 254 | return 0; |
259 | } | 255 | } |
260 | 256 | ||
261 | cpuid(0x80000001, &eax, &ebx, &ecx, &edx); | ||
262 | if (!(ecx & (1 << SVM_CPUID_FEATURE_SHIFT))) { | ||
263 | printk(KERN_DEBUG "has_svm: svm not available\n"); | ||
264 | return 0; | ||
265 | } | ||
266 | return 1; | 257 | return 1; |
267 | } | 258 | } |
268 | 259 | ||
269 | static void svm_hardware_disable(void *garbage) | 260 | static void svm_hardware_disable(void *garbage) |
270 | { | 261 | { |
271 | uint64_t efer; | 262 | cpu_svm_disable(); |
272 | |||
273 | wrmsrl(MSR_VM_HSAVE_PA, 0); | ||
274 | rdmsrl(MSR_EFER, efer); | ||
275 | wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); | ||
276 | } | 263 | } |
277 | 264 | ||
278 | static void svm_hardware_enable(void *garbage) | 265 | static void svm_hardware_enable(void *garbage) |
@@ -772,6 +759,22 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
772 | var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; | 759 | var->l = (s->attrib >> SVM_SELECTOR_L_SHIFT) & 1; |
773 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; | 760 | var->db = (s->attrib >> SVM_SELECTOR_DB_SHIFT) & 1; |
774 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; | 761 | var->g = (s->attrib >> SVM_SELECTOR_G_SHIFT) & 1; |
762 | |||
763 | /* | ||
764 | * SVM always stores 0 for the 'G' bit in the CS selector in | ||
765 | * the VMCB on a VMEXIT. This hurts cross-vendor migration: | ||
766 | * Intel's VMENTRY has a check on the 'G' bit. | ||
767 | */ | ||
768 | if (seg == VCPU_SREG_CS) | ||
769 | var->g = s->limit > 0xfffff; | ||
770 | |||
771 | /* | ||
772 | * Work around a bug where the busy flag in the tr selector | ||
773 | * isn't exposed | ||
774 | */ | ||
775 | if (seg == VCPU_SREG_TR) | ||
776 | var->type |= 0x2; | ||
777 | |||
775 | var->unusable = !var->present; | 778 | var->unusable = !var->present; |
776 | } | 779 | } |
777 | 780 | ||
@@ -1099,6 +1102,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1099 | rep = (io_info & SVM_IOIO_REP_MASK) != 0; | 1102 | rep = (io_info & SVM_IOIO_REP_MASK) != 0; |
1100 | down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; | 1103 | down = (svm->vmcb->save.rflags & X86_EFLAGS_DF) != 0; |
1101 | 1104 | ||
1105 | skip_emulated_instruction(&svm->vcpu); | ||
1102 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); | 1106 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); |
1103 | } | 1107 | } |
1104 | 1108 | ||
@@ -1912,6 +1916,11 @@ static int get_npt_level(void) | |||
1912 | #endif | 1916 | #endif |
1913 | } | 1917 | } |
1914 | 1918 | ||
1919 | static int svm_get_mt_mask_shift(void) | ||
1920 | { | ||
1921 | return 0; | ||
1922 | } | ||
1923 | |||
1915 | static struct kvm_x86_ops svm_x86_ops = { | 1924 | static struct kvm_x86_ops svm_x86_ops = { |
1916 | .cpu_has_kvm_support = has_svm, | 1925 | .cpu_has_kvm_support = has_svm, |
1917 | .disabled_by_bios = is_disabled, | 1926 | .disabled_by_bios = is_disabled, |
@@ -1967,6 +1976,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
1967 | 1976 | ||
1968 | .set_tss_addr = svm_set_tss_addr, | 1977 | .set_tss_addr = svm_set_tss_addr, |
1969 | .get_tdp_level = get_npt_level, | 1978 | .get_tdp_level = get_npt_level, |
1979 | .get_mt_mask_shift = svm_get_mt_mask_shift, | ||
1970 | }; | 1980 | }; |
1971 | 1981 | ||
1972 | static int __init svm_init(void) | 1982 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a4018b01e1f9..6259d7467648 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -16,7 +16,6 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include "irq.h" | 18 | #include "irq.h" |
19 | #include "vmx.h" | ||
20 | #include "mmu.h" | 19 | #include "mmu.h" |
21 | 20 | ||
22 | #include <linux/kvm_host.h> | 21 | #include <linux/kvm_host.h> |
@@ -31,6 +30,8 @@ | |||
31 | 30 | ||
32 | #include <asm/io.h> | 31 | #include <asm/io.h> |
33 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
33 | #include <asm/vmx.h> | ||
34 | #include <asm/virtext.h> | ||
34 | 35 | ||
35 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 36 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
36 | 37 | ||
@@ -90,6 +91,11 @@ struct vcpu_vmx { | |||
90 | } rmode; | 91 | } rmode; |
91 | int vpid; | 92 | int vpid; |
92 | bool emulation_required; | 93 | bool emulation_required; |
94 | |||
95 | /* Support for vnmi-less CPUs */ | ||
96 | int soft_vnmi_blocked; | ||
97 | ktime_t entry_time; | ||
98 | s64 vnmi_blocked_time; | ||
93 | }; | 99 | }; |
94 | 100 | ||
95 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 101 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
@@ -122,7 +128,7 @@ static struct vmcs_config { | |||
122 | u32 vmentry_ctrl; | 128 | u32 vmentry_ctrl; |
123 | } vmcs_config; | 129 | } vmcs_config; |
124 | 130 | ||
125 | struct vmx_capability { | 131 | static struct vmx_capability { |
126 | u32 ept; | 132 | u32 ept; |
127 | u32 vpid; | 133 | u32 vpid; |
128 | } vmx_capability; | 134 | } vmx_capability; |
@@ -957,6 +963,13 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
957 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); | 963 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); |
958 | 964 | ||
959 | break; | 965 | break; |
966 | case MSR_IA32_CR_PAT: | ||
967 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | ||
968 | vmcs_write64(GUEST_IA32_PAT, data); | ||
969 | vcpu->arch.pat = data; | ||
970 | break; | ||
971 | } | ||
972 | /* Otherwise falls through to kvm_set_msr_common */ | ||
960 | default: | 973 | default: |
961 | vmx_load_host_state(vmx); | 974 | vmx_load_host_state(vmx); |
962 | msr = find_msr_entry(vmx, msr_index); | 975 | msr = find_msr_entry(vmx, msr_index); |
@@ -1032,8 +1045,7 @@ static int vmx_get_irq(struct kvm_vcpu *vcpu) | |||
1032 | 1045 | ||
1033 | static __init int cpu_has_kvm_support(void) | 1046 | static __init int cpu_has_kvm_support(void) |
1034 | { | 1047 | { |
1035 | unsigned long ecx = cpuid_ecx(1); | 1048 | return cpu_has_vmx(); |
1036 | return test_bit(5, &ecx); /* CPUID.1:ECX.VMX[bit 5] -> VT */ | ||
1037 | } | 1049 | } |
1038 | 1050 | ||
1039 | static __init int vmx_disabled_by_bios(void) | 1051 | static __init int vmx_disabled_by_bios(void) |
@@ -1079,13 +1091,22 @@ static void vmclear_local_vcpus(void) | |||
1079 | __vcpu_clear(vmx); | 1091 | __vcpu_clear(vmx); |
1080 | } | 1092 | } |
1081 | 1093 | ||
1082 | static void hardware_disable(void *garbage) | 1094 | |
1095 | /* Just like cpu_vmxoff(), but with the __kvm_handle_fault_on_reboot() | ||
1096 | * tricks. | ||
1097 | */ | ||
1098 | static void kvm_cpu_vmxoff(void) | ||
1083 | { | 1099 | { |
1084 | vmclear_local_vcpus(); | ||
1085 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); | 1100 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); |
1086 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | 1101 | write_cr4(read_cr4() & ~X86_CR4_VMXE); |
1087 | } | 1102 | } |
1088 | 1103 | ||
1104 | static void hardware_disable(void *garbage) | ||
1105 | { | ||
1106 | vmclear_local_vcpus(); | ||
1107 | kvm_cpu_vmxoff(); | ||
1108 | } | ||
1109 | |||
1089 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, | 1110 | static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, |
1090 | u32 msr, u32 *result) | 1111 | u32 msr, u32 *result) |
1091 | { | 1112 | { |
@@ -1176,12 +1197,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1176 | #ifdef CONFIG_X86_64 | 1197 | #ifdef CONFIG_X86_64 |
1177 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | 1198 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; |
1178 | #endif | 1199 | #endif |
1179 | opt = 0; | 1200 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT; |
1180 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, | 1201 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, |
1181 | &_vmexit_control) < 0) | 1202 | &_vmexit_control) < 0) |
1182 | return -EIO; | 1203 | return -EIO; |
1183 | 1204 | ||
1184 | min = opt = 0; | 1205 | min = 0; |
1206 | opt = VM_ENTRY_LOAD_IA32_PAT; | ||
1185 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, | 1207 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, |
1186 | &_vmentry_control) < 0) | 1208 | &_vmentry_control) < 0) |
1187 | return -EIO; | 1209 | return -EIO; |
@@ -2087,8 +2109,9 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) | |||
2087 | */ | 2109 | */ |
2088 | static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | 2110 | static int vmx_vcpu_setup(struct vcpu_vmx *vmx) |
2089 | { | 2111 | { |
2090 | u32 host_sysenter_cs; | 2112 | u32 host_sysenter_cs, msr_low, msr_high; |
2091 | u32 junk; | 2113 | u32 junk; |
2114 | u64 host_pat; | ||
2092 | unsigned long a; | 2115 | unsigned long a; |
2093 | struct descriptor_table dt; | 2116 | struct descriptor_table dt; |
2094 | int i; | 2117 | int i; |
@@ -2176,6 +2199,20 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2176 | rdmsrl(MSR_IA32_SYSENTER_EIP, a); | 2199 | rdmsrl(MSR_IA32_SYSENTER_EIP, a); |
2177 | vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ | 2200 | vmcs_writel(HOST_IA32_SYSENTER_EIP, a); /* 22.2.3 */ |
2178 | 2201 | ||
2202 | if (vmcs_config.vmexit_ctrl & VM_EXIT_LOAD_IA32_PAT) { | ||
2203 | rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); | ||
2204 | host_pat = msr_low | ((u64) msr_high << 32); | ||
2205 | vmcs_write64(HOST_IA32_PAT, host_pat); | ||
2206 | } | ||
2207 | if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { | ||
2208 | rdmsr(MSR_IA32_CR_PAT, msr_low, msr_high); | ||
2209 | host_pat = msr_low | ((u64) msr_high << 32); | ||
2210 | /* Write the default value follow host pat */ | ||
2211 | vmcs_write64(GUEST_IA32_PAT, host_pat); | ||
2212 | /* Keep arch.pat sync with GUEST_IA32_PAT */ | ||
2213 | vmx->vcpu.arch.pat = host_pat; | ||
2214 | } | ||
2215 | |||
2179 | for (i = 0; i < NR_VMX_MSR; ++i) { | 2216 | for (i = 0; i < NR_VMX_MSR; ++i) { |
2180 | u32 index = vmx_msr_index[i]; | 2217 | u32 index = vmx_msr_index[i]; |
2181 | u32 data_low, data_high; | 2218 | u32 data_low, data_high; |
@@ -2230,6 +2267,8 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2230 | 2267 | ||
2231 | vmx->vcpu.arch.rmode.active = 0; | 2268 | vmx->vcpu.arch.rmode.active = 0; |
2232 | 2269 | ||
2270 | vmx->soft_vnmi_blocked = 0; | ||
2271 | |||
2233 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 2272 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
2234 | kvm_set_cr8(&vmx->vcpu, 0); | 2273 | kvm_set_cr8(&vmx->vcpu, 0); |
2235 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 2274 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
@@ -2335,6 +2374,29 @@ out: | |||
2335 | return ret; | 2374 | return ret; |
2336 | } | 2375 | } |
2337 | 2376 | ||
2377 | static void enable_irq_window(struct kvm_vcpu *vcpu) | ||
2378 | { | ||
2379 | u32 cpu_based_vm_exec_control; | ||
2380 | |||
2381 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
2382 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | ||
2383 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
2384 | } | ||
2385 | |||
2386 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | ||
2387 | { | ||
2388 | u32 cpu_based_vm_exec_control; | ||
2389 | |||
2390 | if (!cpu_has_virtual_nmis()) { | ||
2391 | enable_irq_window(vcpu); | ||
2392 | return; | ||
2393 | } | ||
2394 | |||
2395 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
2396 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | ||
2397 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
2398 | } | ||
2399 | |||
2338 | static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | 2400 | static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) |
2339 | { | 2401 | { |
2340 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2402 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -2358,10 +2420,54 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | |||
2358 | 2420 | ||
2359 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 2421 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
2360 | { | 2422 | { |
2423 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2424 | |||
2425 | if (!cpu_has_virtual_nmis()) { | ||
2426 | /* | ||
2427 | * Tracking the NMI-blocked state in software is built upon | ||
2428 | * finding the next open IRQ window. This, in turn, depends on | ||
2429 | * well-behaving guests: They have to keep IRQs disabled at | ||
2430 | * least as long as the NMI handler runs. Otherwise we may | ||
2431 | * cause NMI nesting, maybe breaking the guest. But as this is | ||
2432 | * highly unlikely, we can live with the residual risk. | ||
2433 | */ | ||
2434 | vmx->soft_vnmi_blocked = 1; | ||
2435 | vmx->vnmi_blocked_time = 0; | ||
2436 | } | ||
2437 | |||
2438 | ++vcpu->stat.nmi_injections; | ||
2439 | if (vcpu->arch.rmode.active) { | ||
2440 | vmx->rmode.irq.pending = true; | ||
2441 | vmx->rmode.irq.vector = NMI_VECTOR; | ||
2442 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | ||
2443 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
2444 | NMI_VECTOR | INTR_TYPE_SOFT_INTR | | ||
2445 | INTR_INFO_VALID_MASK); | ||
2446 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | ||
2447 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | ||
2448 | return; | ||
2449 | } | ||
2361 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2450 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, |
2362 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2451 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
2363 | } | 2452 | } |
2364 | 2453 | ||
2454 | static void vmx_update_window_states(struct kvm_vcpu *vcpu) | ||
2455 | { | ||
2456 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
2457 | |||
2458 | vcpu->arch.nmi_window_open = | ||
2459 | !(guest_intr & (GUEST_INTR_STATE_STI | | ||
2460 | GUEST_INTR_STATE_MOV_SS | | ||
2461 | GUEST_INTR_STATE_NMI)); | ||
2462 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | ||
2463 | vcpu->arch.nmi_window_open = 0; | ||
2464 | |||
2465 | vcpu->arch.interrupt_window_open = | ||
2466 | ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
2467 | !(guest_intr & (GUEST_INTR_STATE_STI | | ||
2468 | GUEST_INTR_STATE_MOV_SS))); | ||
2469 | } | ||
2470 | |||
2365 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | 2471 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) |
2366 | { | 2472 | { |
2367 | int word_index = __ffs(vcpu->arch.irq_summary); | 2473 | int word_index = __ffs(vcpu->arch.irq_summary); |
@@ -2374,40 +2480,49 @@ static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | |||
2374 | kvm_queue_interrupt(vcpu, irq); | 2480 | kvm_queue_interrupt(vcpu, irq); |
2375 | } | 2481 | } |
2376 | 2482 | ||
2377 | |||
2378 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, | 2483 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, |
2379 | struct kvm_run *kvm_run) | 2484 | struct kvm_run *kvm_run) |
2380 | { | 2485 | { |
2381 | u32 cpu_based_vm_exec_control; | 2486 | vmx_update_window_states(vcpu); |
2382 | |||
2383 | vcpu->arch.interrupt_window_open = | ||
2384 | ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
2385 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); | ||
2386 | 2487 | ||
2387 | if (vcpu->arch.interrupt_window_open && | 2488 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { |
2388 | vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) | 2489 | if (vcpu->arch.interrupt.pending) { |
2389 | kvm_do_inject_irq(vcpu); | 2490 | enable_nmi_window(vcpu); |
2491 | } else if (vcpu->arch.nmi_window_open) { | ||
2492 | vcpu->arch.nmi_pending = false; | ||
2493 | vcpu->arch.nmi_injected = true; | ||
2494 | } else { | ||
2495 | enable_nmi_window(vcpu); | ||
2496 | return; | ||
2497 | } | ||
2498 | } | ||
2499 | if (vcpu->arch.nmi_injected) { | ||
2500 | vmx_inject_nmi(vcpu); | ||
2501 | if (vcpu->arch.nmi_pending) | ||
2502 | enable_nmi_window(vcpu); | ||
2503 | else if (vcpu->arch.irq_summary | ||
2504 | || kvm_run->request_interrupt_window) | ||
2505 | enable_irq_window(vcpu); | ||
2506 | return; | ||
2507 | } | ||
2390 | 2508 | ||
2391 | if (vcpu->arch.interrupt_window_open && vcpu->arch.interrupt.pending) | 2509 | if (vcpu->arch.interrupt_window_open) { |
2392 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | 2510 | if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) |
2511 | kvm_do_inject_irq(vcpu); | ||
2393 | 2512 | ||
2394 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 2513 | if (vcpu->arch.interrupt.pending) |
2514 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | ||
2515 | } | ||
2395 | if (!vcpu->arch.interrupt_window_open && | 2516 | if (!vcpu->arch.interrupt_window_open && |
2396 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) | 2517 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) |
2397 | /* | 2518 | enable_irq_window(vcpu); |
2398 | * Interrupts blocked. Wait for unblock. | ||
2399 | */ | ||
2400 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | ||
2401 | else | ||
2402 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; | ||
2403 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
2404 | } | 2519 | } |
2405 | 2520 | ||
2406 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | 2521 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) |
2407 | { | 2522 | { |
2408 | int ret; | 2523 | int ret; |
2409 | struct kvm_userspace_memory_region tss_mem = { | 2524 | struct kvm_userspace_memory_region tss_mem = { |
2410 | .slot = 8, | 2525 | .slot = TSS_PRIVATE_MEMSLOT, |
2411 | .guest_phys_addr = addr, | 2526 | .guest_phys_addr = addr, |
2412 | .memory_size = PAGE_SIZE * 3, | 2527 | .memory_size = PAGE_SIZE * 3, |
2413 | .flags = 0, | 2528 | .flags = 0, |
@@ -2492,7 +2607,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2492 | set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); | 2607 | set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); |
2493 | } | 2608 | } |
2494 | 2609 | ||
2495 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) /* nmi */ | 2610 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
2496 | return 1; /* already handled by vmx_vcpu_run() */ | 2611 | return 1; /* already handled by vmx_vcpu_run() */ |
2497 | 2612 | ||
2498 | if (is_no_device(intr_info)) { | 2613 | if (is_no_device(intr_info)) { |
@@ -2581,6 +2696,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2581 | rep = (exit_qualification & 32) != 0; | 2696 | rep = (exit_qualification & 32) != 0; |
2582 | port = exit_qualification >> 16; | 2697 | port = exit_qualification >> 16; |
2583 | 2698 | ||
2699 | skip_emulated_instruction(vcpu); | ||
2584 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); | 2700 | return kvm_emulate_pio(vcpu, kvm_run, in, size, port); |
2585 | } | 2701 | } |
2586 | 2702 | ||
@@ -2767,6 +2883,7 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
2767 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2883 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
2768 | 2884 | ||
2769 | KVMTRACE_0D(PEND_INTR, vcpu, handler); | 2885 | KVMTRACE_0D(PEND_INTR, vcpu, handler); |
2886 | ++vcpu->stat.irq_window_exits; | ||
2770 | 2887 | ||
2771 | /* | 2888 | /* |
2772 | * If the user space waits to inject interrupts, exit as soon as | 2889 | * If the user space waits to inject interrupts, exit as soon as |
@@ -2775,7 +2892,6 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
2775 | if (kvm_run->request_interrupt_window && | 2892 | if (kvm_run->request_interrupt_window && |
2776 | !vcpu->arch.irq_summary) { | 2893 | !vcpu->arch.irq_summary) { |
2777 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 2894 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
2778 | ++vcpu->stat.irq_window_exits; | ||
2779 | return 0; | 2895 | return 0; |
2780 | } | 2896 | } |
2781 | return 1; | 2897 | return 1; |
@@ -2832,6 +2948,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2832 | 2948 | ||
2833 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2949 | static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
2834 | { | 2950 | { |
2951 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
2835 | unsigned long exit_qualification; | 2952 | unsigned long exit_qualification; |
2836 | u16 tss_selector; | 2953 | u16 tss_selector; |
2837 | int reason; | 2954 | int reason; |
@@ -2839,6 +2956,15 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2839 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 2956 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
2840 | 2957 | ||
2841 | reason = (u32)exit_qualification >> 30; | 2958 | reason = (u32)exit_qualification >> 30; |
2959 | if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && | ||
2960 | (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && | ||
2961 | (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) | ||
2962 | == INTR_TYPE_NMI_INTR) { | ||
2963 | vcpu->arch.nmi_injected = false; | ||
2964 | if (cpu_has_virtual_nmis()) | ||
2965 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2966 | GUEST_INTR_STATE_NMI); | ||
2967 | } | ||
2842 | tss_selector = exit_qualification; | 2968 | tss_selector = exit_qualification; |
2843 | 2969 | ||
2844 | return kvm_task_switch(vcpu, tss_selector, reason); | 2970 | return kvm_task_switch(vcpu, tss_selector, reason); |
@@ -2927,16 +3053,12 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
2927 | while (!guest_state_valid(vcpu)) { | 3053 | while (!guest_state_valid(vcpu)) { |
2928 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3054 | err = emulate_instruction(vcpu, kvm_run, 0, 0, 0); |
2929 | 3055 | ||
2930 | switch (err) { | 3056 | if (err == EMULATE_DO_MMIO) |
2931 | case EMULATE_DONE: | 3057 | break; |
2932 | break; | 3058 | |
2933 | case EMULATE_DO_MMIO: | 3059 | if (err != EMULATE_DONE) { |
2934 | kvm_report_emulation_failure(vcpu, "mmio"); | 3060 | kvm_report_emulation_failure(vcpu, "emulation failure"); |
2935 | /* TODO: Handle MMIO */ | 3061 | return; |
2936 | return; | ||
2937 | default: | ||
2938 | kvm_report_emulation_failure(vcpu, "emulation failure"); | ||
2939 | return; | ||
2940 | } | 3062 | } |
2941 | 3063 | ||
2942 | if (signal_pending(current)) | 3064 | if (signal_pending(current)) |
@@ -2948,8 +3070,10 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu, | |||
2948 | local_irq_disable(); | 3070 | local_irq_disable(); |
2949 | preempt_disable(); | 3071 | preempt_disable(); |
2950 | 3072 | ||
2951 | /* Guest state should be valid now, no more emulation should be needed */ | 3073 | /* Guest state should be valid now except if we need to |
2952 | vmx->emulation_required = 0; | 3074 | * emulate an MMIO */ |
3075 | if (guest_state_valid(vcpu)) | ||
3076 | vmx->emulation_required = 0; | ||
2953 | } | 3077 | } |
2954 | 3078 | ||
2955 | /* | 3079 | /* |
@@ -2996,6 +3120,11 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2996 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), | 3120 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), |
2997 | (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); | 3121 | (u32)((u64)kvm_rip_read(vcpu) >> 32), entryexit); |
2998 | 3122 | ||
3123 | /* If we need to emulate an MMIO from handle_invalid_guest_state | ||
3124 | * we just return 0 */ | ||
3125 | if (vmx->emulation_required && emulate_invalid_guest_state) | ||
3126 | return 0; | ||
3127 | |||
2999 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3128 | /* Access CR3 don't cause VMExit in paging mode, so we need |
3000 | * to sync with guest real CR3. */ | 3129 | * to sync with guest real CR3. */ |
3001 | if (vm_need_ept() && is_paging(vcpu)) { | 3130 | if (vm_need_ept() && is_paging(vcpu)) { |
@@ -3012,9 +3141,32 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3012 | 3141 | ||
3013 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && | 3142 | if ((vectoring_info & VECTORING_INFO_VALID_MASK) && |
3014 | (exit_reason != EXIT_REASON_EXCEPTION_NMI && | 3143 | (exit_reason != EXIT_REASON_EXCEPTION_NMI && |
3015 | exit_reason != EXIT_REASON_EPT_VIOLATION)) | 3144 | exit_reason != EXIT_REASON_EPT_VIOLATION && |
3016 | printk(KERN_WARNING "%s: unexpected, valid vectoring info and " | 3145 | exit_reason != EXIT_REASON_TASK_SWITCH)) |
3017 | "exit reason is 0x%x\n", __func__, exit_reason); | 3146 | printk(KERN_WARNING "%s: unexpected, valid vectoring info " |
3147 | "(0x%x) and exit reason is 0x%x\n", | ||
3148 | __func__, vectoring_info, exit_reason); | ||
3149 | |||
3150 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { | ||
3151 | if (vcpu->arch.interrupt_window_open) { | ||
3152 | vmx->soft_vnmi_blocked = 0; | ||
3153 | vcpu->arch.nmi_window_open = 1; | ||
3154 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | ||
3155 | vcpu->arch.nmi_pending) { | ||
3156 | /* | ||
3157 | * This CPU don't support us in finding the end of an | ||
3158 | * NMI-blocked window if the guest runs with IRQs | ||
3159 | * disabled. So we pull the trigger after 1 s of | ||
3160 | * futile waiting, but inform the user about this. | ||
3161 | */ | ||
3162 | printk(KERN_WARNING "%s: Breaking out of NMI-blocked " | ||
3163 | "state on VCPU %d after 1 s timeout\n", | ||
3164 | __func__, vcpu->vcpu_id); | ||
3165 | vmx->soft_vnmi_blocked = 0; | ||
3166 | vmx->vcpu.arch.nmi_window_open = 1; | ||
3167 | } | ||
3168 | } | ||
3169 | |||
3018 | if (exit_reason < kvm_vmx_max_exit_handlers | 3170 | if (exit_reason < kvm_vmx_max_exit_handlers |
3019 | && kvm_vmx_exit_handlers[exit_reason]) | 3171 | && kvm_vmx_exit_handlers[exit_reason]) |
3020 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); | 3172 | return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run); |
@@ -3042,51 +3194,6 @@ static void update_tpr_threshold(struct kvm_vcpu *vcpu) | |||
3042 | vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); | 3194 | vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); |
3043 | } | 3195 | } |
3044 | 3196 | ||
3045 | static void enable_irq_window(struct kvm_vcpu *vcpu) | ||
3046 | { | ||
3047 | u32 cpu_based_vm_exec_control; | ||
3048 | |||
3049 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
3050 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | ||
3051 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
3052 | } | ||
3053 | |||
3054 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | ||
3055 | { | ||
3056 | u32 cpu_based_vm_exec_control; | ||
3057 | |||
3058 | if (!cpu_has_virtual_nmis()) | ||
3059 | return; | ||
3060 | |||
3061 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
3062 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | ||
3063 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
3064 | } | ||
3065 | |||
3066 | static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) | ||
3067 | { | ||
3068 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
3069 | return !(guest_intr & (GUEST_INTR_STATE_NMI | | ||
3070 | GUEST_INTR_STATE_MOV_SS | | ||
3071 | GUEST_INTR_STATE_STI)); | ||
3072 | } | ||
3073 | |||
3074 | static int vmx_irq_enabled(struct kvm_vcpu *vcpu) | ||
3075 | { | ||
3076 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
3077 | return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | | ||
3078 | GUEST_INTR_STATE_STI)) && | ||
3079 | (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); | ||
3080 | } | ||
3081 | |||
3082 | static void enable_intr_window(struct kvm_vcpu *vcpu) | ||
3083 | { | ||
3084 | if (vcpu->arch.nmi_pending) | ||
3085 | enable_nmi_window(vcpu); | ||
3086 | else if (kvm_cpu_has_interrupt(vcpu)) | ||
3087 | enable_irq_window(vcpu); | ||
3088 | } | ||
3089 | |||
3090 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 3197 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
3091 | { | 3198 | { |
3092 | u32 exit_intr_info; | 3199 | u32 exit_intr_info; |
@@ -3109,7 +3216,9 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | |||
3109 | if (unblock_nmi && vector != DF_VECTOR) | 3216 | if (unblock_nmi && vector != DF_VECTOR) |
3110 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3217 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
3111 | GUEST_INTR_STATE_NMI); | 3218 | GUEST_INTR_STATE_NMI); |
3112 | } | 3219 | } else if (unlikely(vmx->soft_vnmi_blocked)) |
3220 | vmx->vnmi_blocked_time += | ||
3221 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | ||
3113 | 3222 | ||
3114 | idt_vectoring_info = vmx->idt_vectoring_info; | 3223 | idt_vectoring_info = vmx->idt_vectoring_info; |
3115 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3224 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; |
@@ -3147,26 +3256,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
3147 | { | 3256 | { |
3148 | update_tpr_threshold(vcpu); | 3257 | update_tpr_threshold(vcpu); |
3149 | 3258 | ||
3150 | if (cpu_has_virtual_nmis()) { | 3259 | vmx_update_window_states(vcpu); |
3151 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { | 3260 | |
3152 | if (vcpu->arch.interrupt.pending) { | 3261 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { |
3153 | enable_nmi_window(vcpu); | 3262 | if (vcpu->arch.interrupt.pending) { |
3154 | } else if (vmx_nmi_enabled(vcpu)) { | 3263 | enable_nmi_window(vcpu); |
3155 | vcpu->arch.nmi_pending = false; | 3264 | } else if (vcpu->arch.nmi_window_open) { |
3156 | vcpu->arch.nmi_injected = true; | 3265 | vcpu->arch.nmi_pending = false; |
3157 | } else { | 3266 | vcpu->arch.nmi_injected = true; |
3158 | enable_intr_window(vcpu); | 3267 | } else { |
3159 | return; | 3268 | enable_nmi_window(vcpu); |
3160 | } | ||
3161 | } | ||
3162 | if (vcpu->arch.nmi_injected) { | ||
3163 | vmx_inject_nmi(vcpu); | ||
3164 | enable_intr_window(vcpu); | ||
3165 | return; | 3269 | return; |
3166 | } | 3270 | } |
3167 | } | 3271 | } |
3272 | if (vcpu->arch.nmi_injected) { | ||
3273 | vmx_inject_nmi(vcpu); | ||
3274 | if (vcpu->arch.nmi_pending) | ||
3275 | enable_nmi_window(vcpu); | ||
3276 | else if (kvm_cpu_has_interrupt(vcpu)) | ||
3277 | enable_irq_window(vcpu); | ||
3278 | return; | ||
3279 | } | ||
3168 | if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { | 3280 | if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { |
3169 | if (vmx_irq_enabled(vcpu)) | 3281 | if (vcpu->arch.interrupt_window_open) |
3170 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); | 3282 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); |
3171 | else | 3283 | else |
3172 | enable_irq_window(vcpu); | 3284 | enable_irq_window(vcpu); |
@@ -3174,6 +3286,8 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
3174 | if (vcpu->arch.interrupt.pending) { | 3286 | if (vcpu->arch.interrupt.pending) { |
3175 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | 3287 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); |
3176 | kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); | 3288 | kvm_timer_intr_post(vcpu, vcpu->arch.interrupt.nr); |
3289 | if (kvm_cpu_has_interrupt(vcpu)) | ||
3290 | enable_irq_window(vcpu); | ||
3177 | } | 3291 | } |
3178 | } | 3292 | } |
3179 | 3293 | ||
@@ -3213,6 +3327,10 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3213 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3327 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3214 | u32 intr_info; | 3328 | u32 intr_info; |
3215 | 3329 | ||
3330 | /* Record the guest's net vcpu time for enforced NMI injections. */ | ||
3331 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | ||
3332 | vmx->entry_time = ktime_get(); | ||
3333 | |||
3216 | /* Handle invalid guest state instead of entering VMX */ | 3334 | /* Handle invalid guest state instead of entering VMX */ |
3217 | if (vmx->emulation_required && emulate_invalid_guest_state) { | 3335 | if (vmx->emulation_required && emulate_invalid_guest_state) { |
3218 | handle_invalid_guest_state(vcpu, kvm_run); | 3336 | handle_invalid_guest_state(vcpu, kvm_run); |
@@ -3327,9 +3445,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3327 | if (vmx->rmode.irq.pending) | 3445 | if (vmx->rmode.irq.pending) |
3328 | fixup_rmode_irq(vmx); | 3446 | fixup_rmode_irq(vmx); |
3329 | 3447 | ||
3330 | vcpu->arch.interrupt_window_open = | 3448 | vmx_update_window_states(vcpu); |
3331 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
3332 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; | ||
3333 | 3449 | ||
3334 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 3450 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
3335 | vmx->launched = 1; | 3451 | vmx->launched = 1; |
@@ -3337,7 +3453,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3337 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 3453 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
3338 | 3454 | ||
3339 | /* We need to handle NMIs before interrupts are enabled */ | 3455 | /* We need to handle NMIs before interrupts are enabled */ |
3340 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && | 3456 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && |
3341 | (intr_info & INTR_INFO_VALID_MASK)) { | 3457 | (intr_info & INTR_INFO_VALID_MASK)) { |
3342 | KVMTRACE_0D(NMI, vcpu, handler); | 3458 | KVMTRACE_0D(NMI, vcpu, handler); |
3343 | asm("int $2"); | 3459 | asm("int $2"); |
@@ -3455,6 +3571,11 @@ static int get_ept_level(void) | |||
3455 | return VMX_EPT_DEFAULT_GAW + 1; | 3571 | return VMX_EPT_DEFAULT_GAW + 1; |
3456 | } | 3572 | } |
3457 | 3573 | ||
3574 | static int vmx_get_mt_mask_shift(void) | ||
3575 | { | ||
3576 | return VMX_EPT_MT_EPTE_SHIFT; | ||
3577 | } | ||
3578 | |||
3458 | static struct kvm_x86_ops vmx_x86_ops = { | 3579 | static struct kvm_x86_ops vmx_x86_ops = { |
3459 | .cpu_has_kvm_support = cpu_has_kvm_support, | 3580 | .cpu_has_kvm_support = cpu_has_kvm_support, |
3460 | .disabled_by_bios = vmx_disabled_by_bios, | 3581 | .disabled_by_bios = vmx_disabled_by_bios, |
@@ -3510,6 +3631,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3510 | 3631 | ||
3511 | .set_tss_addr = vmx_set_tss_addr, | 3632 | .set_tss_addr = vmx_set_tss_addr, |
3512 | .get_tdp_level = get_ept_level, | 3633 | .get_tdp_level = get_ept_level, |
3634 | .get_mt_mask_shift = vmx_get_mt_mask_shift, | ||
3513 | }; | 3635 | }; |
3514 | 3636 | ||
3515 | static int __init vmx_init(void) | 3637 | static int __init vmx_init(void) |
@@ -3566,10 +3688,10 @@ static int __init vmx_init(void) | |||
3566 | bypass_guest_pf = 0; | 3688 | bypass_guest_pf = 0; |
3567 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | 3689 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | |
3568 | VMX_EPT_WRITABLE_MASK | | 3690 | VMX_EPT_WRITABLE_MASK | |
3569 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT | | ||
3570 | VMX_EPT_IGMT_BIT); | 3691 | VMX_EPT_IGMT_BIT); |
3571 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 3692 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
3572 | VMX_EPT_EXECUTABLE_MASK); | 3693 | VMX_EPT_EXECUTABLE_MASK, |
3694 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3573 | kvm_enable_tdp(); | 3695 | kvm_enable_tdp(); |
3574 | } else | 3696 | } else |
3575 | kvm_disable_tdp(); | 3697 | kvm_disable_tdp(); |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f1f8ff2f1fa2..cc17546a2406 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -34,11 +34,13 @@ | |||
34 | #include <linux/module.h> | 34 | #include <linux/module.h> |
35 | #include <linux/mman.h> | 35 | #include <linux/mman.h> |
36 | #include <linux/highmem.h> | 36 | #include <linux/highmem.h> |
37 | #include <linux/iommu.h> | ||
37 | #include <linux/intel-iommu.h> | 38 | #include <linux/intel-iommu.h> |
38 | 39 | ||
39 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
40 | #include <asm/msr.h> | 41 | #include <asm/msr.h> |
41 | #include <asm/desc.h> | 42 | #include <asm/desc.h> |
43 | #include <asm/mtrr.h> | ||
42 | 44 | ||
43 | #define MAX_IO_MSRS 256 | 45 | #define MAX_IO_MSRS 256 |
44 | #define CR0_RESERVED_BITS \ | 46 | #define CR0_RESERVED_BITS \ |
@@ -86,6 +88,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
86 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 88 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
87 | { "hypercalls", VCPU_STAT(hypercalls) }, | 89 | { "hypercalls", VCPU_STAT(hypercalls) }, |
88 | { "request_irq", VCPU_STAT(request_irq_exits) }, | 90 | { "request_irq", VCPU_STAT(request_irq_exits) }, |
91 | { "request_nmi", VCPU_STAT(request_nmi_exits) }, | ||
89 | { "irq_exits", VCPU_STAT(irq_exits) }, | 92 | { "irq_exits", VCPU_STAT(irq_exits) }, |
90 | { "host_state_reload", VCPU_STAT(host_state_reload) }, | 93 | { "host_state_reload", VCPU_STAT(host_state_reload) }, |
91 | { "efer_reload", VCPU_STAT(efer_reload) }, | 94 | { "efer_reload", VCPU_STAT(efer_reload) }, |
@@ -93,6 +96,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
93 | { "insn_emulation", VCPU_STAT(insn_emulation) }, | 96 | { "insn_emulation", VCPU_STAT(insn_emulation) }, |
94 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, | 97 | { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, |
95 | { "irq_injections", VCPU_STAT(irq_injections) }, | 98 | { "irq_injections", VCPU_STAT(irq_injections) }, |
99 | { "nmi_injections", VCPU_STAT(nmi_injections) }, | ||
96 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, | 100 | { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, |
97 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, | 101 | { "mmu_pte_write", VM_STAT(mmu_pte_write) }, |
98 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, | 102 | { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, |
@@ -101,6 +105,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
101 | { "mmu_recycled", VM_STAT(mmu_recycled) }, | 105 | { "mmu_recycled", VM_STAT(mmu_recycled) }, |
102 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, | 106 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, |
103 | { "mmu_unsync", VM_STAT(mmu_unsync) }, | 107 | { "mmu_unsync", VM_STAT(mmu_unsync) }, |
108 | { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, | ||
104 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 109 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
105 | { "largepages", VM_STAT(lpages) }, | 110 | { "largepages", VM_STAT(lpages) }, |
106 | { NULL } | 111 | { NULL } |
@@ -312,6 +317,7 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
312 | kvm_x86_ops->set_cr0(vcpu, cr0); | 317 | kvm_x86_ops->set_cr0(vcpu, cr0); |
313 | vcpu->arch.cr0 = cr0; | 318 | vcpu->arch.cr0 = cr0; |
314 | 319 | ||
320 | kvm_mmu_sync_global(vcpu); | ||
315 | kvm_mmu_reset_context(vcpu); | 321 | kvm_mmu_reset_context(vcpu); |
316 | return; | 322 | return; |
317 | } | 323 | } |
@@ -355,6 +361,7 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
355 | } | 361 | } |
356 | kvm_x86_ops->set_cr4(vcpu, cr4); | 362 | kvm_x86_ops->set_cr4(vcpu, cr4); |
357 | vcpu->arch.cr4 = cr4; | 363 | vcpu->arch.cr4 = cr4; |
364 | kvm_mmu_sync_global(vcpu); | ||
358 | kvm_mmu_reset_context(vcpu); | 365 | kvm_mmu_reset_context(vcpu); |
359 | } | 366 | } |
360 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 367 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
@@ -449,7 +456,7 @@ static u32 msrs_to_save[] = { | |||
449 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 456 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
450 | #endif | 457 | #endif |
451 | MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 458 | MSR_IA32_TIME_STAMP_COUNTER, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
452 | MSR_IA32_PERF_STATUS, | 459 | MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT |
453 | }; | 460 | }; |
454 | 461 | ||
455 | static unsigned num_msrs_to_save; | 462 | static unsigned num_msrs_to_save; |
@@ -648,10 +655,38 @@ static bool msr_mtrr_valid(unsigned msr) | |||
648 | 655 | ||
649 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 656 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
650 | { | 657 | { |
658 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; | ||
659 | |||
651 | if (!msr_mtrr_valid(msr)) | 660 | if (!msr_mtrr_valid(msr)) |
652 | return 1; | 661 | return 1; |
653 | 662 | ||
654 | vcpu->arch.mtrr[msr - 0x200] = data; | 663 | if (msr == MSR_MTRRdefType) { |
664 | vcpu->arch.mtrr_state.def_type = data; | ||
665 | vcpu->arch.mtrr_state.enabled = (data & 0xc00) >> 10; | ||
666 | } else if (msr == MSR_MTRRfix64K_00000) | ||
667 | p[0] = data; | ||
668 | else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) | ||
669 | p[1 + msr - MSR_MTRRfix16K_80000] = data; | ||
670 | else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) | ||
671 | p[3 + msr - MSR_MTRRfix4K_C0000] = data; | ||
672 | else if (msr == MSR_IA32_CR_PAT) | ||
673 | vcpu->arch.pat = data; | ||
674 | else { /* Variable MTRRs */ | ||
675 | int idx, is_mtrr_mask; | ||
676 | u64 *pt; | ||
677 | |||
678 | idx = (msr - 0x200) / 2; | ||
679 | is_mtrr_mask = msr - 0x200 - 2 * idx; | ||
680 | if (!is_mtrr_mask) | ||
681 | pt = | ||
682 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; | ||
683 | else | ||
684 | pt = | ||
685 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; | ||
686 | *pt = data; | ||
687 | } | ||
688 | |||
689 | kvm_mmu_reset_context(vcpu); | ||
655 | return 0; | 690 | return 0; |
656 | } | 691 | } |
657 | 692 | ||
@@ -747,10 +782,37 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
747 | 782 | ||
748 | static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 783 | static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
749 | { | 784 | { |
785 | u64 *p = (u64 *)&vcpu->arch.mtrr_state.fixed_ranges; | ||
786 | |||
750 | if (!msr_mtrr_valid(msr)) | 787 | if (!msr_mtrr_valid(msr)) |
751 | return 1; | 788 | return 1; |
752 | 789 | ||
753 | *pdata = vcpu->arch.mtrr[msr - 0x200]; | 790 | if (msr == MSR_MTRRdefType) |
791 | *pdata = vcpu->arch.mtrr_state.def_type + | ||
792 | (vcpu->arch.mtrr_state.enabled << 10); | ||
793 | else if (msr == MSR_MTRRfix64K_00000) | ||
794 | *pdata = p[0]; | ||
795 | else if (msr == MSR_MTRRfix16K_80000 || msr == MSR_MTRRfix16K_A0000) | ||
796 | *pdata = p[1 + msr - MSR_MTRRfix16K_80000]; | ||
797 | else if (msr >= MSR_MTRRfix4K_C0000 && msr <= MSR_MTRRfix4K_F8000) | ||
798 | *pdata = p[3 + msr - MSR_MTRRfix4K_C0000]; | ||
799 | else if (msr == MSR_IA32_CR_PAT) | ||
800 | *pdata = vcpu->arch.pat; | ||
801 | else { /* Variable MTRRs */ | ||
802 | int idx, is_mtrr_mask; | ||
803 | u64 *pt; | ||
804 | |||
805 | idx = (msr - 0x200) / 2; | ||
806 | is_mtrr_mask = msr - 0x200 - 2 * idx; | ||
807 | if (!is_mtrr_mask) | ||
808 | pt = | ||
809 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].base_lo; | ||
810 | else | ||
811 | pt = | ||
812 | (u64 *)&vcpu->arch.mtrr_state.var_ranges[idx].mask_lo; | ||
813 | *pdata = *pt; | ||
814 | } | ||
815 | |||
754 | return 0; | 816 | return 0; |
755 | } | 817 | } |
756 | 818 | ||
@@ -903,7 +965,6 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
903 | case KVM_CAP_IRQCHIP: | 965 | case KVM_CAP_IRQCHIP: |
904 | case KVM_CAP_HLT: | 966 | case KVM_CAP_HLT: |
905 | case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: | 967 | case KVM_CAP_MMU_SHADOW_CACHE_CONTROL: |
906 | case KVM_CAP_USER_MEMORY: | ||
907 | case KVM_CAP_SET_TSS_ADDR: | 968 | case KVM_CAP_SET_TSS_ADDR: |
908 | case KVM_CAP_EXT_CPUID: | 969 | case KVM_CAP_EXT_CPUID: |
909 | case KVM_CAP_CLOCKSOURCE: | 970 | case KVM_CAP_CLOCKSOURCE: |
@@ -929,7 +990,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
929 | r = !tdp_enabled; | 990 | r = !tdp_enabled; |
930 | break; | 991 | break; |
931 | case KVM_CAP_IOMMU: | 992 | case KVM_CAP_IOMMU: |
932 | r = intel_iommu_found(); | 993 | r = iommu_found(); |
933 | break; | 994 | break; |
934 | default: | 995 | default: |
935 | r = 0; | 996 | r = 0; |
@@ -1188,6 +1249,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1188 | int t, times = entry->eax & 0xff; | 1249 | int t, times = entry->eax & 0xff; |
1189 | 1250 | ||
1190 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | 1251 | entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; |
1252 | entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | ||
1191 | for (t = 1; t < times && *nent < maxnent; ++t) { | 1253 | for (t = 1; t < times && *nent < maxnent; ++t) { |
1192 | do_cpuid_1_ent(&entry[t], function, 0); | 1254 | do_cpuid_1_ent(&entry[t], function, 0); |
1193 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; | 1255 | entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC; |
@@ -1218,7 +1280,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1218 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 1280 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
1219 | /* read more entries until level_type is zero */ | 1281 | /* read more entries until level_type is zero */ |
1220 | for (i = 1; *nent < maxnent; ++i) { | 1282 | for (i = 1; *nent < maxnent; ++i) { |
1221 | level_type = entry[i - 1].ecx & 0xff; | 1283 | level_type = entry[i - 1].ecx & 0xff00; |
1222 | if (!level_type) | 1284 | if (!level_type) |
1223 | break; | 1285 | break; |
1224 | do_cpuid_1_ent(&entry[i], function, i); | 1286 | do_cpuid_1_ent(&entry[i], function, i); |
@@ -1318,6 +1380,15 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
1318 | return 0; | 1380 | return 0; |
1319 | } | 1381 | } |
1320 | 1382 | ||
1383 | static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) | ||
1384 | { | ||
1385 | vcpu_load(vcpu); | ||
1386 | kvm_inject_nmi(vcpu); | ||
1387 | vcpu_put(vcpu); | ||
1388 | |||
1389 | return 0; | ||
1390 | } | ||
1391 | |||
1321 | static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, | 1392 | static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, |
1322 | struct kvm_tpr_access_ctl *tac) | 1393 | struct kvm_tpr_access_ctl *tac) |
1323 | { | 1394 | { |
@@ -1377,6 +1448,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1377 | r = 0; | 1448 | r = 0; |
1378 | break; | 1449 | break; |
1379 | } | 1450 | } |
1451 | case KVM_NMI: { | ||
1452 | r = kvm_vcpu_ioctl_nmi(vcpu); | ||
1453 | if (r) | ||
1454 | goto out; | ||
1455 | r = 0; | ||
1456 | break; | ||
1457 | } | ||
1380 | case KVM_SET_CPUID: { | 1458 | case KVM_SET_CPUID: { |
1381 | struct kvm_cpuid __user *cpuid_arg = argp; | 1459 | struct kvm_cpuid __user *cpuid_arg = argp; |
1382 | struct kvm_cpuid cpuid; | 1460 | struct kvm_cpuid cpuid; |
@@ -1968,7 +2046,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1968 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); | 2046 | ret = kvm_write_guest(vcpu->kvm, gpa, val, bytes); |
1969 | if (ret < 0) | 2047 | if (ret < 0) |
1970 | return 0; | 2048 | return 0; |
1971 | kvm_mmu_pte_write(vcpu, gpa, val, bytes); | 2049 | kvm_mmu_pte_write(vcpu, gpa, val, bytes, 1); |
1972 | return 1; | 2050 | return 1; |
1973 | } | 2051 | } |
1974 | 2052 | ||
@@ -2404,8 +2482,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2404 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); | 2482 | val = kvm_register_read(vcpu, VCPU_REGS_RAX); |
2405 | memcpy(vcpu->arch.pio_data, &val, 4); | 2483 | memcpy(vcpu->arch.pio_data, &val, 4); |
2406 | 2484 | ||
2407 | kvm_x86_ops->skip_emulated_instruction(vcpu); | ||
2408 | |||
2409 | pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); | 2485 | pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); |
2410 | if (pio_dev) { | 2486 | if (pio_dev) { |
2411 | kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); | 2487 | kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); |
@@ -2541,7 +2617,7 @@ int kvm_arch_init(void *opaque) | |||
2541 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 2617 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
2542 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | 2618 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); |
2543 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 2619 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
2544 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 2620 | PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); |
2545 | return 0; | 2621 | return 0; |
2546 | 2622 | ||
2547 | out: | 2623 | out: |
@@ -2729,7 +2805,7 @@ static int move_to_next_stateful_cpuid_entry(struct kvm_vcpu *vcpu, int i) | |||
2729 | 2805 | ||
2730 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; | 2806 | e->flags &= ~KVM_CPUID_FLAG_STATE_READ_NEXT; |
2731 | /* when no next entry is found, the current entry[i] is reselected */ | 2807 | /* when no next entry is found, the current entry[i] is reselected */ |
2732 | for (j = i + 1; j == i; j = (j + 1) % nent) { | 2808 | for (j = i + 1; ; j = (j + 1) % nent) { |
2733 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; | 2809 | struct kvm_cpuid_entry2 *ej = &vcpu->arch.cpuid_entries[j]; |
2734 | if (ej->function == e->function) { | 2810 | if (ej->function == e->function) { |
2735 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; | 2811 | ej->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT; |
@@ -2973,7 +3049,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2973 | pr_debug("vcpu %d received sipi with vector # %x\n", | 3049 | pr_debug("vcpu %d received sipi with vector # %x\n", |
2974 | vcpu->vcpu_id, vcpu->arch.sipi_vector); | 3050 | vcpu->vcpu_id, vcpu->arch.sipi_vector); |
2975 | kvm_lapic_reset(vcpu); | 3051 | kvm_lapic_reset(vcpu); |
2976 | r = kvm_x86_ops->vcpu_reset(vcpu); | 3052 | r = kvm_arch_vcpu_reset(vcpu); |
2977 | if (r) | 3053 | if (r) |
2978 | return r; | 3054 | return r; |
2979 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 3055 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
@@ -3275,9 +3351,9 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | |||
3275 | kvm_desct->padding = 0; | 3351 | kvm_desct->padding = 0; |
3276 | } | 3352 | } |
3277 | 3353 | ||
3278 | static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, | 3354 | static void get_segment_descriptor_dtable(struct kvm_vcpu *vcpu, |
3279 | u16 selector, | 3355 | u16 selector, |
3280 | struct descriptor_table *dtable) | 3356 | struct descriptor_table *dtable) |
3281 | { | 3357 | { |
3282 | if (selector & 1 << 2) { | 3358 | if (selector & 1 << 2) { |
3283 | struct kvm_segment kvm_seg; | 3359 | struct kvm_segment kvm_seg; |
@@ -3302,7 +3378,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3302 | struct descriptor_table dtable; | 3378 | struct descriptor_table dtable; |
3303 | u16 index = selector >> 3; | 3379 | u16 index = selector >> 3; |
3304 | 3380 | ||
3305 | get_segment_descritptor_dtable(vcpu, selector, &dtable); | 3381 | get_segment_descriptor_dtable(vcpu, selector, &dtable); |
3306 | 3382 | ||
3307 | if (dtable.limit < index * 8 + 7) { | 3383 | if (dtable.limit < index * 8 + 7) { |
3308 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 3384 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
@@ -3321,7 +3397,7 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3321 | struct descriptor_table dtable; | 3397 | struct descriptor_table dtable; |
3322 | u16 index = selector >> 3; | 3398 | u16 index = selector >> 3; |
3323 | 3399 | ||
3324 | get_segment_descritptor_dtable(vcpu, selector, &dtable); | 3400 | get_segment_descriptor_dtable(vcpu, selector, &dtable); |
3325 | 3401 | ||
3326 | if (dtable.limit < index * 8 + 7) | 3402 | if (dtable.limit < index * 8 + 7) |
3327 | return 1; | 3403 | return 1; |
@@ -3900,6 +3976,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) | |||
3900 | /* We do fxsave: this must be aligned. */ | 3976 | /* We do fxsave: this must be aligned. */ |
3901 | BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); | 3977 | BUG_ON((unsigned long)&vcpu->arch.host_fx_image & 0xF); |
3902 | 3978 | ||
3979 | vcpu->arch.mtrr_state.have_fixed = 1; | ||
3903 | vcpu_load(vcpu); | 3980 | vcpu_load(vcpu); |
3904 | r = kvm_arch_vcpu_reset(vcpu); | 3981 | r = kvm_arch_vcpu_reset(vcpu); |
3905 | if (r == 0) | 3982 | if (r == 0) |
@@ -3925,6 +4002,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) | |||
3925 | 4002 | ||
3926 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) | 4003 | int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu) |
3927 | { | 4004 | { |
4005 | vcpu->arch.nmi_pending = false; | ||
4006 | vcpu->arch.nmi_injected = false; | ||
4007 | |||
3928 | return kvm_x86_ops->vcpu_reset(vcpu); | 4008 | return kvm_x86_ops->vcpu_reset(vcpu); |
3929 | } | 4009 | } |
3930 | 4010 | ||
@@ -4012,6 +4092,7 @@ struct kvm *kvm_arch_create_vm(void) | |||
4012 | return ERR_PTR(-ENOMEM); | 4092 | return ERR_PTR(-ENOMEM); |
4013 | 4093 | ||
4014 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 4094 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
4095 | INIT_LIST_HEAD(&kvm->arch.oos_global_pages); | ||
4015 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 4096 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
4016 | 4097 | ||
4017 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 4098 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
@@ -4048,8 +4129,8 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
4048 | 4129 | ||
4049 | void kvm_arch_destroy_vm(struct kvm *kvm) | 4130 | void kvm_arch_destroy_vm(struct kvm *kvm) |
4050 | { | 4131 | { |
4051 | kvm_iommu_unmap_guest(kvm); | ||
4052 | kvm_free_all_assigned_devices(kvm); | 4132 | kvm_free_all_assigned_devices(kvm); |
4133 | kvm_iommu_unmap_guest(kvm); | ||
4053 | kvm_free_pit(kvm); | 4134 | kvm_free_pit(kvm); |
4054 | kfree(kvm->arch.vpic); | 4135 | kfree(kvm->arch.vpic); |
4055 | kfree(kvm->arch.vioapic); | 4136 | kfree(kvm->arch.vioapic); |
@@ -4127,7 +4208,8 @@ void kvm_arch_flush_shadow(struct kvm *kvm) | |||
4127 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 4208 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
4128 | { | 4209 | { |
4129 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE | 4210 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE |
4130 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED; | 4211 | || vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED |
4212 | || vcpu->arch.nmi_pending; | ||
4131 | } | 4213 | } |
4132 | 4214 | ||
4133 | static void vcpu_kick_intr(void *info) | 4215 | static void vcpu_kick_intr(void *info) |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index ea051173b0da..d174db7a3370 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ | 58 | #define SrcMem32 (4<<4) /* Memory operand (32-bit). */ |
59 | #define SrcImm (5<<4) /* Immediate operand. */ | 59 | #define SrcImm (5<<4) /* Immediate operand. */ |
60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ | 60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ |
61 | #define SrcOne (7<<4) /* Implied '1' */ | ||
61 | #define SrcMask (7<<4) | 62 | #define SrcMask (7<<4) |
62 | /* Generic ModRM decode. */ | 63 | /* Generic ModRM decode. */ |
63 | #define ModRM (1<<7) | 64 | #define ModRM (1<<7) |
@@ -70,17 +71,23 @@ | |||
70 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 71 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
71 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ | 72 | #define GroupDual (1<<15) /* Alternate decoding of mod == 3 */ |
72 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ | 73 | #define GroupMask 0xff /* Group number stored in bits 0:7 */ |
74 | /* Source 2 operand type */ | ||
75 | #define Src2None (0<<29) | ||
76 | #define Src2CL (1<<29) | ||
77 | #define Src2ImmByte (2<<29) | ||
78 | #define Src2One (3<<29) | ||
79 | #define Src2Mask (7<<29) | ||
73 | 80 | ||
74 | enum { | 81 | enum { |
75 | Group1_80, Group1_81, Group1_82, Group1_83, | 82 | Group1_80, Group1_81, Group1_82, Group1_83, |
76 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, | 83 | Group1A, Group3_Byte, Group3, Group4, Group5, Group7, |
77 | }; | 84 | }; |
78 | 85 | ||
79 | static u16 opcode_table[256] = { | 86 | static u32 opcode_table[256] = { |
80 | /* 0x00 - 0x07 */ | 87 | /* 0x00 - 0x07 */ |
81 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 88 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
82 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 89 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
83 | 0, 0, 0, 0, | 90 | ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0, |
84 | /* 0x08 - 0x0F */ | 91 | /* 0x08 - 0x0F */ |
85 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, | 92 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, |
86 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, | 93 | ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM, |
@@ -195,7 +202,7 @@ static u16 opcode_table[256] = { | |||
195 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, | 202 | ImplicitOps, ImplicitOps, Group | Group4, Group | Group5, |
196 | }; | 203 | }; |
197 | 204 | ||
198 | static u16 twobyte_table[256] = { | 205 | static u32 twobyte_table[256] = { |
199 | /* 0x00 - 0x0F */ | 206 | /* 0x00 - 0x0F */ |
200 | 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, | 207 | 0, Group | GroupDual | Group7, 0, 0, 0, 0, ImplicitOps, 0, |
201 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, | 208 | ImplicitOps, ImplicitOps, 0, 0, 0, ImplicitOps | ModRM, 0, 0, |
@@ -230,9 +237,14 @@ static u16 twobyte_table[256] = { | |||
230 | /* 0x90 - 0x9F */ | 237 | /* 0x90 - 0x9F */ |
231 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 238 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
232 | /* 0xA0 - 0xA7 */ | 239 | /* 0xA0 - 0xA7 */ |
233 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, | 240 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, |
241 | DstMem | SrcReg | Src2ImmByte | ModRM, | ||
242 | DstMem | SrcReg | Src2CL | ModRM, 0, 0, | ||
234 | /* 0xA8 - 0xAF */ | 243 | /* 0xA8 - 0xAF */ |
235 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, | 244 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, |
245 | DstMem | SrcReg | Src2ImmByte | ModRM, | ||
246 | DstMem | SrcReg | Src2CL | ModRM, | ||
247 | ModRM, 0, | ||
236 | /* 0xB0 - 0xB7 */ | 248 | /* 0xB0 - 0xB7 */ |
237 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, | 249 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, |
238 | DstMem | SrcReg | ModRM | BitOp, | 250 | DstMem | SrcReg | ModRM | BitOp, |
@@ -253,7 +265,7 @@ static u16 twobyte_table[256] = { | |||
253 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | 265 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
254 | }; | 266 | }; |
255 | 267 | ||
256 | static u16 group_table[] = { | 268 | static u32 group_table[] = { |
257 | [Group1_80*8] = | 269 | [Group1_80*8] = |
258 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 270 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, |
259 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, | 271 | ByteOp | DstMem | SrcImm | ModRM, ByteOp | DstMem | SrcImm | ModRM, |
@@ -297,9 +309,9 @@ static u16 group_table[] = { | |||
297 | SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, | 309 | SrcMem16 | ModRM | Mov, SrcMem | ModRM | ByteOp, |
298 | }; | 310 | }; |
299 | 311 | ||
300 | static u16 group2_table[] = { | 312 | static u32 group2_table[] = { |
301 | [Group7*8] = | 313 | [Group7*8] = |
302 | SrcNone | ModRM, 0, 0, 0, | 314 | SrcNone | ModRM, 0, 0, SrcNone | ModRM, |
303 | SrcNone | ModRM | DstMem | Mov, 0, | 315 | SrcNone | ModRM | DstMem | Mov, 0, |
304 | SrcMem16 | ModRM | Mov, 0, | 316 | SrcMem16 | ModRM | Mov, 0, |
305 | }; | 317 | }; |
@@ -359,49 +371,48 @@ static u16 group2_table[] = { | |||
359 | "andl %"_msk",%"_LO32 _tmp"; " \ | 371 | "andl %"_msk",%"_LO32 _tmp"; " \ |
360 | "orl %"_LO32 _tmp",%"_sav"; " | 372 | "orl %"_LO32 _tmp",%"_sav"; " |
361 | 373 | ||
374 | #ifdef CONFIG_X86_64 | ||
375 | #define ON64(x) x | ||
376 | #else | ||
377 | #define ON64(x) | ||
378 | #endif | ||
379 | |||
380 | #define ____emulate_2op(_op, _src, _dst, _eflags, _x, _y, _suffix) \ | ||
381 | do { \ | ||
382 | __asm__ __volatile__ ( \ | ||
383 | _PRE_EFLAGS("0", "4", "2") \ | ||
384 | _op _suffix " %"_x"3,%1; " \ | ||
385 | _POST_EFLAGS("0", "4", "2") \ | ||
386 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
387 | "=&r" (_tmp) \ | ||
388 | : _y ((_src).val), "i" (EFLAGS_MASK)); \ | ||
389 | } while (0) | ||
390 | |||
391 | |||
362 | /* Raw emulation: instruction has two explicit operands. */ | 392 | /* Raw emulation: instruction has two explicit operands. */ |
363 | #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ | 393 | #define __emulate_2op_nobyte(_op,_src,_dst,_eflags,_wx,_wy,_lx,_ly,_qx,_qy) \ |
364 | do { \ | 394 | do { \ |
365 | unsigned long _tmp; \ | 395 | unsigned long _tmp; \ |
366 | \ | 396 | \ |
367 | switch ((_dst).bytes) { \ | 397 | switch ((_dst).bytes) { \ |
368 | case 2: \ | 398 | case 2: \ |
369 | __asm__ __volatile__ ( \ | 399 | ____emulate_2op(_op,_src,_dst,_eflags,_wx,_wy,"w"); \ |
370 | _PRE_EFLAGS("0", "4", "2") \ | 400 | break; \ |
371 | _op"w %"_wx"3,%1; " \ | 401 | case 4: \ |
372 | _POST_EFLAGS("0", "4", "2") \ | 402 | ____emulate_2op(_op,_src,_dst,_eflags,_lx,_ly,"l"); \ |
373 | : "=m" (_eflags), "=m" ((_dst).val), \ | 403 | break; \ |
374 | "=&r" (_tmp) \ | 404 | case 8: \ |
375 | : _wy ((_src).val), "i" (EFLAGS_MASK)); \ | 405 | ON64(____emulate_2op(_op,_src,_dst,_eflags,_qx,_qy,"q")); \ |
376 | break; \ | 406 | break; \ |
377 | case 4: \ | 407 | } \ |
378 | __asm__ __volatile__ ( \ | ||
379 | _PRE_EFLAGS("0", "4", "2") \ | ||
380 | _op"l %"_lx"3,%1; " \ | ||
381 | _POST_EFLAGS("0", "4", "2") \ | ||
382 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
383 | "=&r" (_tmp) \ | ||
384 | : _ly ((_src).val), "i" (EFLAGS_MASK)); \ | ||
385 | break; \ | ||
386 | case 8: \ | ||
387 | __emulate_2op_8byte(_op, _src, _dst, \ | ||
388 | _eflags, _qx, _qy); \ | ||
389 | break; \ | ||
390 | } \ | ||
391 | } while (0) | 408 | } while (0) |
392 | 409 | ||
393 | #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ | 410 | #define __emulate_2op(_op,_src,_dst,_eflags,_bx,_by,_wx,_wy,_lx,_ly,_qx,_qy) \ |
394 | do { \ | 411 | do { \ |
395 | unsigned long __tmp; \ | 412 | unsigned long _tmp; \ |
396 | switch ((_dst).bytes) { \ | 413 | switch ((_dst).bytes) { \ |
397 | case 1: \ | 414 | case 1: \ |
398 | __asm__ __volatile__ ( \ | 415 | ____emulate_2op(_op,_src,_dst,_eflags,_bx,_by,"b"); \ |
399 | _PRE_EFLAGS("0", "4", "2") \ | ||
400 | _op"b %"_bx"3,%1; " \ | ||
401 | _POST_EFLAGS("0", "4", "2") \ | ||
402 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
403 | "=&r" (__tmp) \ | ||
404 | : _by ((_src).val), "i" (EFLAGS_MASK)); \ | ||
405 | break; \ | 416 | break; \ |
406 | default: \ | 417 | default: \ |
407 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ | 418 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ |
@@ -425,71 +436,68 @@ static u16 group2_table[] = { | |||
425 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ | 436 | __emulate_2op_nobyte(_op, _src, _dst, _eflags, \ |
426 | "w", "r", _LO32, "r", "", "r") | 437 | "w", "r", _LO32, "r", "", "r") |
427 | 438 | ||
428 | /* Instruction has only one explicit operand (no source operand). */ | 439 | /* Instruction has three operands and one operand is stored in ECX register */ |
429 | #define emulate_1op(_op, _dst, _eflags) \ | 440 | #define __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, _suffix, _type) \ |
430 | do { \ | 441 | do { \ |
431 | unsigned long _tmp; \ | 442 | unsigned long _tmp; \ |
432 | \ | 443 | _type _clv = (_cl).val; \ |
433 | switch ((_dst).bytes) { \ | 444 | _type _srcv = (_src).val; \ |
434 | case 1: \ | 445 | _type _dstv = (_dst).val; \ |
435 | __asm__ __volatile__ ( \ | 446 | \ |
436 | _PRE_EFLAGS("0", "3", "2") \ | 447 | __asm__ __volatile__ ( \ |
437 | _op"b %1; " \ | 448 | _PRE_EFLAGS("0", "5", "2") \ |
438 | _POST_EFLAGS("0", "3", "2") \ | 449 | _op _suffix " %4,%1 \n" \ |
439 | : "=m" (_eflags), "=m" ((_dst).val), \ | 450 | _POST_EFLAGS("0", "5", "2") \ |
440 | "=&r" (_tmp) \ | 451 | : "=m" (_eflags), "+r" (_dstv), "=&r" (_tmp) \ |
441 | : "i" (EFLAGS_MASK)); \ | 452 | : "c" (_clv) , "r" (_srcv), "i" (EFLAGS_MASK) \ |
442 | break; \ | 453 | ); \ |
443 | case 2: \ | 454 | \ |
444 | __asm__ __volatile__ ( \ | 455 | (_cl).val = (unsigned long) _clv; \ |
445 | _PRE_EFLAGS("0", "3", "2") \ | 456 | (_src).val = (unsigned long) _srcv; \ |
446 | _op"w %1; " \ | 457 | (_dst).val = (unsigned long) _dstv; \ |
447 | _POST_EFLAGS("0", "3", "2") \ | ||
448 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
449 | "=&r" (_tmp) \ | ||
450 | : "i" (EFLAGS_MASK)); \ | ||
451 | break; \ | ||
452 | case 4: \ | ||
453 | __asm__ __volatile__ ( \ | ||
454 | _PRE_EFLAGS("0", "3", "2") \ | ||
455 | _op"l %1; " \ | ||
456 | _POST_EFLAGS("0", "3", "2") \ | ||
457 | : "=m" (_eflags), "=m" ((_dst).val), \ | ||
458 | "=&r" (_tmp) \ | ||
459 | : "i" (EFLAGS_MASK)); \ | ||
460 | break; \ | ||
461 | case 8: \ | ||
462 | __emulate_1op_8byte(_op, _dst, _eflags); \ | ||
463 | break; \ | ||
464 | } \ | ||
465 | } while (0) | 458 | } while (0) |
466 | 459 | ||
467 | /* Emulate an instruction with quadword operands (x86/64 only). */ | 460 | #define emulate_2op_cl(_op, _cl, _src, _dst, _eflags) \ |
468 | #if defined(CONFIG_X86_64) | 461 | do { \ |
469 | #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) \ | 462 | switch ((_dst).bytes) { \ |
470 | do { \ | 463 | case 2: \ |
471 | __asm__ __volatile__ ( \ | 464 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
472 | _PRE_EFLAGS("0", "4", "2") \ | 465 | "w", unsigned short); \ |
473 | _op"q %"_qx"3,%1; " \ | 466 | break; \ |
474 | _POST_EFLAGS("0", "4", "2") \ | 467 | case 4: \ |
475 | : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ | 468 | __emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ |
476 | : _qy ((_src).val), "i" (EFLAGS_MASK)); \ | 469 | "l", unsigned int); \ |
470 | break; \ | ||
471 | case 8: \ | ||
472 | ON64(__emulate_2op_cl(_op, _cl, _src, _dst, _eflags, \ | ||
473 | "q", unsigned long)); \ | ||
474 | break; \ | ||
475 | } \ | ||
477 | } while (0) | 476 | } while (0) |
478 | 477 | ||
479 | #define __emulate_1op_8byte(_op, _dst, _eflags) \ | 478 | #define __emulate_1op(_op, _dst, _eflags, _suffix) \ |
480 | do { \ | 479 | do { \ |
481 | __asm__ __volatile__ ( \ | 480 | unsigned long _tmp; \ |
482 | _PRE_EFLAGS("0", "3", "2") \ | 481 | \ |
483 | _op"q %1; " \ | 482 | __asm__ __volatile__ ( \ |
484 | _POST_EFLAGS("0", "3", "2") \ | 483 | _PRE_EFLAGS("0", "3", "2") \ |
485 | : "=m" (_eflags), "=m" ((_dst).val), "=&r" (_tmp) \ | 484 | _op _suffix " %1; " \ |
486 | : "i" (EFLAGS_MASK)); \ | 485 | _POST_EFLAGS("0", "3", "2") \ |
486 | : "=m" (_eflags), "+m" ((_dst).val), \ | ||
487 | "=&r" (_tmp) \ | ||
488 | : "i" (EFLAGS_MASK)); \ | ||
487 | } while (0) | 489 | } while (0) |
488 | 490 | ||
489 | #elif defined(__i386__) | 491 | /* Instruction has only one explicit operand (no source operand). */ |
490 | #define __emulate_2op_8byte(_op, _src, _dst, _eflags, _qx, _qy) | 492 | #define emulate_1op(_op, _dst, _eflags) \ |
491 | #define __emulate_1op_8byte(_op, _dst, _eflags) | 493 | do { \ |
492 | #endif /* __i386__ */ | 494 | switch ((_dst).bytes) { \ |
495 | case 1: __emulate_1op(_op, _dst, _eflags, "b"); break; \ | ||
496 | case 2: __emulate_1op(_op, _dst, _eflags, "w"); break; \ | ||
497 | case 4: __emulate_1op(_op, _dst, _eflags, "l"); break; \ | ||
498 | case 8: ON64(__emulate_1op(_op, _dst, _eflags, "q")); break; \ | ||
499 | } \ | ||
500 | } while (0) | ||
493 | 501 | ||
494 | /* Fetch next part of the instruction being emulated. */ | 502 | /* Fetch next part of the instruction being emulated. */ |
495 | #define insn_fetch(_type, _size, _eip) \ | 503 | #define insn_fetch(_type, _size, _eip) \ |
@@ -1041,6 +1049,33 @@ done_prefixes: | |||
1041 | c->src.bytes = 1; | 1049 | c->src.bytes = 1; |
1042 | c->src.val = insn_fetch(s8, 1, c->eip); | 1050 | c->src.val = insn_fetch(s8, 1, c->eip); |
1043 | break; | 1051 | break; |
1052 | case SrcOne: | ||
1053 | c->src.bytes = 1; | ||
1054 | c->src.val = 1; | ||
1055 | break; | ||
1056 | } | ||
1057 | |||
1058 | /* | ||
1059 | * Decode and fetch the second source operand: register, memory | ||
1060 | * or immediate. | ||
1061 | */ | ||
1062 | switch (c->d & Src2Mask) { | ||
1063 | case Src2None: | ||
1064 | break; | ||
1065 | case Src2CL: | ||
1066 | c->src2.bytes = 1; | ||
1067 | c->src2.val = c->regs[VCPU_REGS_RCX] & 0x8; | ||
1068 | break; | ||
1069 | case Src2ImmByte: | ||
1070 | c->src2.type = OP_IMM; | ||
1071 | c->src2.ptr = (unsigned long *)c->eip; | ||
1072 | c->src2.bytes = 1; | ||
1073 | c->src2.val = insn_fetch(u8, 1, c->eip); | ||
1074 | break; | ||
1075 | case Src2One: | ||
1076 | c->src2.bytes = 1; | ||
1077 | c->src2.val = 1; | ||
1078 | break; | ||
1044 | } | 1079 | } |
1045 | 1080 | ||
1046 | /* Decode and fetch the destination operand: register or memory. */ | 1081 | /* Decode and fetch the destination operand: register or memory. */ |
@@ -1100,20 +1135,33 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | |||
1100 | c->regs[VCPU_REGS_RSP]); | 1135 | c->regs[VCPU_REGS_RSP]); |
1101 | } | 1136 | } |
1102 | 1137 | ||
1103 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | 1138 | static int emulate_pop(struct x86_emulate_ctxt *ctxt, |
1104 | struct x86_emulate_ops *ops) | 1139 | struct x86_emulate_ops *ops) |
1105 | { | 1140 | { |
1106 | struct decode_cache *c = &ctxt->decode; | 1141 | struct decode_cache *c = &ctxt->decode; |
1107 | int rc; | 1142 | int rc; |
1108 | 1143 | ||
1109 | rc = ops->read_std(register_address(c, ss_base(ctxt), | 1144 | rc = ops->read_emulated(register_address(c, ss_base(ctxt), |
1110 | c->regs[VCPU_REGS_RSP]), | 1145 | c->regs[VCPU_REGS_RSP]), |
1111 | &c->dst.val, c->dst.bytes, ctxt->vcpu); | 1146 | &c->src.val, c->src.bytes, ctxt->vcpu); |
1112 | if (rc != 0) | 1147 | if (rc != 0) |
1113 | return rc; | 1148 | return rc; |
1114 | 1149 | ||
1115 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->dst.bytes); | 1150 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes); |
1151 | return rc; | ||
1152 | } | ||
1153 | |||
1154 | static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | ||
1155 | struct x86_emulate_ops *ops) | ||
1156 | { | ||
1157 | struct decode_cache *c = &ctxt->decode; | ||
1158 | int rc; | ||
1116 | 1159 | ||
1160 | c->src.bytes = c->dst.bytes; | ||
1161 | rc = emulate_pop(ctxt, ops); | ||
1162 | if (rc != 0) | ||
1163 | return rc; | ||
1164 | c->dst.val = c->src.val; | ||
1117 | return 0; | 1165 | return 0; |
1118 | } | 1166 | } |
1119 | 1167 | ||
@@ -1415,24 +1463,15 @@ special_insn: | |||
1415 | emulate_1op("dec", c->dst, ctxt->eflags); | 1463 | emulate_1op("dec", c->dst, ctxt->eflags); |
1416 | break; | 1464 | break; |
1417 | case 0x50 ... 0x57: /* push reg */ | 1465 | case 0x50 ... 0x57: /* push reg */ |
1418 | c->dst.type = OP_MEM; | 1466 | emulate_push(ctxt); |
1419 | c->dst.bytes = c->op_bytes; | ||
1420 | c->dst.val = c->src.val; | ||
1421 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | ||
1422 | -c->op_bytes); | ||
1423 | c->dst.ptr = (void *) register_address( | ||
1424 | c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); | ||
1425 | break; | 1467 | break; |
1426 | case 0x58 ... 0x5f: /* pop reg */ | 1468 | case 0x58 ... 0x5f: /* pop reg */ |
1427 | pop_instruction: | 1469 | pop_instruction: |
1428 | if ((rc = ops->read_std(register_address(c, ss_base(ctxt), | 1470 | c->src.bytes = c->op_bytes; |
1429 | c->regs[VCPU_REGS_RSP]), c->dst.ptr, | 1471 | rc = emulate_pop(ctxt, ops); |
1430 | c->op_bytes, ctxt->vcpu)) != 0) | 1472 | if (rc != 0) |
1431 | goto done; | 1473 | goto done; |
1432 | 1474 | c->dst.val = c->src.val; | |
1433 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | ||
1434 | c->op_bytes); | ||
1435 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
1436 | break; | 1475 | break; |
1437 | case 0x63: /* movsxd */ | 1476 | case 0x63: /* movsxd */ |
1438 | if (ctxt->mode != X86EMUL_MODE_PROT64) | 1477 | if (ctxt->mode != X86EMUL_MODE_PROT64) |
@@ -1591,7 +1630,9 @@ special_insn: | |||
1591 | emulate_push(ctxt); | 1630 | emulate_push(ctxt); |
1592 | break; | 1631 | break; |
1593 | case 0x9d: /* popf */ | 1632 | case 0x9d: /* popf */ |
1633 | c->dst.type = OP_REG; | ||
1594 | c->dst.ptr = (unsigned long *) &ctxt->eflags; | 1634 | c->dst.ptr = (unsigned long *) &ctxt->eflags; |
1635 | c->dst.bytes = c->op_bytes; | ||
1595 | goto pop_instruction; | 1636 | goto pop_instruction; |
1596 | case 0xa0 ... 0xa1: /* mov */ | 1637 | case 0xa0 ... 0xa1: /* mov */ |
1597 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 1638 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
@@ -1689,7 +1730,9 @@ special_insn: | |||
1689 | emulate_grp2(ctxt); | 1730 | emulate_grp2(ctxt); |
1690 | break; | 1731 | break; |
1691 | case 0xc3: /* ret */ | 1732 | case 0xc3: /* ret */ |
1733 | c->dst.type = OP_REG; | ||
1692 | c->dst.ptr = &c->eip; | 1734 | c->dst.ptr = &c->eip; |
1735 | c->dst.bytes = c->op_bytes; | ||
1693 | goto pop_instruction; | 1736 | goto pop_instruction; |
1694 | case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ | 1737 | case 0xc6 ... 0xc7: /* mov (sole member of Grp11) */ |
1695 | mov: | 1738 | mov: |
@@ -1778,7 +1821,7 @@ special_insn: | |||
1778 | c->eip = saved_eip; | 1821 | c->eip = saved_eip; |
1779 | goto cannot_emulate; | 1822 | goto cannot_emulate; |
1780 | } | 1823 | } |
1781 | return 0; | 1824 | break; |
1782 | case 0xf4: /* hlt */ | 1825 | case 0xf4: /* hlt */ |
1783 | ctxt->vcpu->arch.halt_request = 1; | 1826 | ctxt->vcpu->arch.halt_request = 1; |
1784 | break; | 1827 | break; |
@@ -1999,12 +2042,20 @@ twobyte_insn: | |||
1999 | c->src.val &= (c->dst.bytes << 3) - 1; | 2042 | c->src.val &= (c->dst.bytes << 3) - 1; |
2000 | emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); | 2043 | emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags); |
2001 | break; | 2044 | break; |
2045 | case 0xa4: /* shld imm8, r, r/m */ | ||
2046 | case 0xa5: /* shld cl, r, r/m */ | ||
2047 | emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags); | ||
2048 | break; | ||
2002 | case 0xab: | 2049 | case 0xab: |
2003 | bts: /* bts */ | 2050 | bts: /* bts */ |
2004 | /* only subword offset */ | 2051 | /* only subword offset */ |
2005 | c->src.val &= (c->dst.bytes << 3) - 1; | 2052 | c->src.val &= (c->dst.bytes << 3) - 1; |
2006 | emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); | 2053 | emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); |
2007 | break; | 2054 | break; |
2055 | case 0xac: /* shrd imm8, r, r/m */ | ||
2056 | case 0xad: /* shrd cl, r, r/m */ | ||
2057 | emulate_2op_cl("shrd", c->src2, c->src, c->dst, ctxt->eflags); | ||
2058 | break; | ||
2008 | case 0xae: /* clflush */ | 2059 | case 0xae: /* clflush */ |
2009 | break; | 2060 | break; |
2010 | case 0xb0 ... 0xb1: /* cmpxchg */ | 2061 | case 0xb0 ... 0xb1: /* cmpxchg */ |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a5d8e1ace1cf..a7ed208f81e3 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void) | |||
590 | * a straightforward 1 to 1 mapping, so force that here. */ | 590 | * a straightforward 1 to 1 mapping, so force that here. */ |
591 | __get_cpu_var(vector_irq)[vector] = i; | 591 | __get_cpu_var(vector_irq)[vector] = i; |
592 | if (vector != SYSCALL_VECTOR) { | 592 | if (vector != SYSCALL_VECTOR) { |
593 | set_intr_gate(vector, interrupt[vector]); | 593 | set_intr_gate(vector, |
594 | interrupt[vector-FIRST_EXTERNAL_VECTOR]); | ||
594 | set_irq_chip_and_handler_name(i, &lguest_irq_controller, | 595 | set_irq_chip_and_handler_name(i, &lguest_irq_controller, |
595 | handle_level_irq, | 596 | handle_level_irq, |
596 | "level"); | 597 | "level"); |
@@ -737,7 +738,7 @@ static void lguest_time_init(void) | |||
737 | 738 | ||
738 | /* We can't set cpumask in the initializer: damn C limitations! Set it | 739 | /* We can't set cpumask in the initializer: damn C limitations! Set it |
739 | * here and register our timer device. */ | 740 | * here and register our timer device. */ |
740 | lguest_clockevent.cpumask = cpumask_of_cpu(0); | 741 | lguest_clockevent.cpumask = cpumask_of(0); |
741 | clockevents_register_device(&lguest_clockevent); | 742 | clockevents_register_device(&lguest_clockevent); |
742 | 743 | ||
743 | /* Finally, we unblock the timer interrupt. */ | 744 | /* Finally, we unblock the timer interrupt. */ |
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index 5c7cef34c9e7..10b9bd35a8ff 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -30,21 +30,6 @@ ENTRY(lguest_entry) | |||
30 | movl $lguest_data - __PAGE_OFFSET, %edx | 30 | movl $lguest_data - __PAGE_OFFSET, %edx |
31 | int $LGUEST_TRAP_ENTRY | 31 | int $LGUEST_TRAP_ENTRY |
32 | 32 | ||
33 | /* The Host put the toplevel pagetable in lguest_data.pgdir. The movsl | ||
34 | * instruction uses %esi implicitly as the source for the copy we're | ||
35 | * about to do. */ | ||
36 | movl lguest_data - __PAGE_OFFSET + LGUEST_DATA_pgdir, %esi | ||
37 | |||
38 | /* Copy first 32 entries of page directory to __PAGE_OFFSET entries. | ||
39 | * This means the first 128M of kernel memory will be mapped at | ||
40 | * PAGE_OFFSET where the kernel expects to run. This will get it far | ||
41 | * enough through boot to switch to its own pagetables. */ | ||
42 | movl $32, %ecx | ||
43 | movl %esi, %edi | ||
44 | addl $((__PAGE_OFFSET >> 22) * 4), %edi | ||
45 | rep | ||
46 | movsl | ||
47 | |||
48 | /* Set up the initial stack so we can run C code. */ | 33 | /* Set up the initial stack so we can run C code. */ |
49 | movl $(init_thread_union+THREAD_SIZE),%esp | 34 | movl $(init_thread_union+THREAD_SIZE),%esp |
50 | 35 | ||
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 9e68075544f6..4a20b2f9a381 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c | |||
@@ -39,7 +39,7 @@ static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned lon | |||
39 | #define __do_strncpy_from_user(dst, src, count, res) \ | 39 | #define __do_strncpy_from_user(dst, src, count, res) \ |
40 | do { \ | 40 | do { \ |
41 | int __d0, __d1, __d2; \ | 41 | int __d0, __d1, __d2; \ |
42 | might_sleep(); \ | 42 | might_fault(); \ |
43 | __asm__ __volatile__( \ | 43 | __asm__ __volatile__( \ |
44 | " testl %1,%1\n" \ | 44 | " testl %1,%1\n" \ |
45 | " jz 2f\n" \ | 45 | " jz 2f\n" \ |
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(strncpy_from_user); | |||
126 | #define __do_clear_user(addr,size) \ | 126 | #define __do_clear_user(addr,size) \ |
127 | do { \ | 127 | do { \ |
128 | int __d0; \ | 128 | int __d0; \ |
129 | might_sleep(); \ | 129 | might_fault(); \ |
130 | __asm__ __volatile__( \ | 130 | __asm__ __volatile__( \ |
131 | "0: rep; stosl\n" \ | 131 | "0: rep; stosl\n" \ |
132 | " movl %2,%0\n" \ | 132 | " movl %2,%0\n" \ |
@@ -155,7 +155,7 @@ do { \ | |||
155 | unsigned long | 155 | unsigned long |
156 | clear_user(void __user *to, unsigned long n) | 156 | clear_user(void __user *to, unsigned long n) |
157 | { | 157 | { |
158 | might_sleep(); | 158 | might_fault(); |
159 | if (access_ok(VERIFY_WRITE, to, n)) | 159 | if (access_ok(VERIFY_WRITE, to, n)) |
160 | __do_clear_user(to, n); | 160 | __do_clear_user(to, n); |
161 | return n; | 161 | return n; |
@@ -197,7 +197,7 @@ long strnlen_user(const char __user *s, long n) | |||
197 | unsigned long mask = -__addr_ok(s); | 197 | unsigned long mask = -__addr_ok(s); |
198 | unsigned long res, tmp; | 198 | unsigned long res, tmp; |
199 | 199 | ||
200 | might_sleep(); | 200 | might_fault(); |
201 | 201 | ||
202 | __asm__ __volatile__( | 202 | __asm__ __volatile__( |
203 | " testl %0, %0\n" | 203 | " testl %0, %0\n" |
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index f4df6e7c718b..64d6c84e6353 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #define __do_strncpy_from_user(dst,src,count,res) \ | 15 | #define __do_strncpy_from_user(dst,src,count,res) \ |
16 | do { \ | 16 | do { \ |
17 | long __d0, __d1, __d2; \ | 17 | long __d0, __d1, __d2; \ |
18 | might_sleep(); \ | 18 | might_fault(); \ |
19 | __asm__ __volatile__( \ | 19 | __asm__ __volatile__( \ |
20 | " testq %1,%1\n" \ | 20 | " testq %1,%1\n" \ |
21 | " jz 2f\n" \ | 21 | " jz 2f\n" \ |
@@ -64,7 +64,7 @@ EXPORT_SYMBOL(strncpy_from_user); | |||
64 | unsigned long __clear_user(void __user *addr, unsigned long size) | 64 | unsigned long __clear_user(void __user *addr, unsigned long size) |
65 | { | 65 | { |
66 | long __d0; | 66 | long __d0; |
67 | might_sleep(); | 67 | might_fault(); |
68 | /* no memory constraint because it doesn't change any memory gcc knows | 68 | /* no memory constraint because it doesn't change any memory gcc knows |
69 | about */ | 69 | about */ |
70 | asm volatile( | 70 | asm volatile( |
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 37b9ae4d44c5..df167f265622 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c | |||
@@ -133,29 +133,28 @@ void __init time_init_hook(void) | |||
133 | **/ | 133 | **/ |
134 | void mca_nmi_hook(void) | 134 | void mca_nmi_hook(void) |
135 | { | 135 | { |
136 | /* If I recall correctly, there's a whole bunch of other things that | 136 | /* |
137 | * If I recall correctly, there's a whole bunch of other things that | ||
137 | * we can do to check for NMI problems, but that's all I know about | 138 | * we can do to check for NMI problems, but that's all I know about |
138 | * at the moment. | 139 | * at the moment. |
139 | */ | 140 | */ |
140 | 141 | pr_warning("NMI generated from unknown source!\n"); | |
141 | printk("NMI generated from unknown source!\n"); | ||
142 | } | 142 | } |
143 | #endif | 143 | #endif |
144 | 144 | ||
145 | static __init int no_ipi_broadcast(char *str) | 145 | static __init int no_ipi_broadcast(char *str) |
146 | { | 146 | { |
147 | get_option(&str, &no_broadcast); | 147 | get_option(&str, &no_broadcast); |
148 | printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : | 148 | pr_info("Using %s mode\n", |
149 | "IPI Broadcast"); | 149 | no_broadcast ? "No IPI Broadcast" : "IPI Broadcast"); |
150 | return 1; | 150 | return 1; |
151 | } | 151 | } |
152 | |||
153 | __setup("no_ipi_broadcast=", no_ipi_broadcast); | 152 | __setup("no_ipi_broadcast=", no_ipi_broadcast); |
154 | 153 | ||
155 | static int __init print_ipi_mode(void) | 154 | static int __init print_ipi_mode(void) |
156 | { | 155 | { |
157 | printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : | 156 | pr_info("Using IPI %s mode\n", |
158 | "Shortcut"); | 157 | no_broadcast ? "No-Shortcut" : "Shortcut"); |
159 | return 0; | 158 | return 0; |
160 | } | 159 | } |
161 | 160 | ||
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c index 3c3b471ea496..bc4c7840b2a8 100644 --- a/arch/x86/mach-generic/bigsmp.c +++ b/arch/x86/mach-generic/bigsmp.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/bigsmp/apic.h> | 17 | #include <asm/bigsmp/apic.h> |
18 | #include <asm/bigsmp/ipi.h> | 18 | #include <asm/bigsmp/ipi.h> |
19 | #include <asm/mach-default/mach_mpparse.h> | 19 | #include <asm/mach-default/mach_mpparse.h> |
20 | #include <asm/mach-default/mach_wakecpu.h> | ||
20 | 21 | ||
21 | static int dmi_bigsmp; /* can be set by dmi scanners */ | 22 | static int dmi_bigsmp; /* can be set by dmi scanners */ |
22 | 23 | ||
@@ -41,9 +42,10 @@ static const struct dmi_system_id bigsmp_dmi_table[] = { | |||
41 | { } | 42 | { } |
42 | }; | 43 | }; |
43 | 44 | ||
44 | static cpumask_t vector_allocation_domain(int cpu) | 45 | static void vector_allocation_domain(int cpu, cpumask_t *retmask) |
45 | { | 46 | { |
46 | return cpumask_of_cpu(cpu); | 47 | cpus_clear(*retmask); |
48 | cpu_set(cpu, *retmask); | ||
47 | } | 49 | } |
48 | 50 | ||
49 | static int probe_bigsmp(void) | 51 | static int probe_bigsmp(void) |
diff --git a/arch/x86/mach-generic/default.c b/arch/x86/mach-generic/default.c index 9e835a11a13a..e63a4a76d8cd 100644 --- a/arch/x86/mach-generic/default.c +++ b/arch/x86/mach-generic/default.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/mach-default/mach_apic.h> | 16 | #include <asm/mach-default/mach_apic.h> |
17 | #include <asm/mach-default/mach_ipi.h> | 17 | #include <asm/mach-default/mach_ipi.h> |
18 | #include <asm/mach-default/mach_mpparse.h> | 18 | #include <asm/mach-default/mach_mpparse.h> |
19 | #include <asm/mach-default/mach_wakecpu.h> | ||
19 | 20 | ||
20 | /* should be called last. */ | 21 | /* should be called last. */ |
21 | static int probe_default(void) | 22 | static int probe_default(void) |
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c index 28459cab3ddb..4ba5ccaa1584 100644 --- a/arch/x86/mach-generic/es7000.c +++ b/arch/x86/mach-generic/es7000.c | |||
@@ -16,7 +16,19 @@ | |||
16 | #include <asm/es7000/apic.h> | 16 | #include <asm/es7000/apic.h> |
17 | #include <asm/es7000/ipi.h> | 17 | #include <asm/es7000/ipi.h> |
18 | #include <asm/es7000/mpparse.h> | 18 | #include <asm/es7000/mpparse.h> |
19 | #include <asm/es7000/wakecpu.h> | 19 | #include <asm/mach-default/mach_wakecpu.h> |
20 | |||
21 | void __init es7000_update_genapic_to_cluster(void) | ||
22 | { | ||
23 | genapic->target_cpus = target_cpus_cluster; | ||
24 | genapic->int_delivery_mode = INT_DELIVERY_MODE_CLUSTER; | ||
25 | genapic->int_dest_mode = INT_DEST_MODE_CLUSTER; | ||
26 | genapic->no_balance_irq = NO_BALANCE_IRQ_CLUSTER; | ||
27 | |||
28 | genapic->init_apic_ldr = init_apic_ldr_cluster; | ||
29 | |||
30 | genapic->cpu_mask_to_apicid = cpu_mask_to_apicid_cluster; | ||
31 | } | ||
20 | 32 | ||
21 | static int probe_es7000(void) | 33 | static int probe_es7000(void) |
22 | { | 34 | { |
@@ -75,7 +87,7 @@ static int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
75 | } | 87 | } |
76 | #endif | 88 | #endif |
77 | 89 | ||
78 | static cpumask_t vector_allocation_domain(int cpu) | 90 | static void vector_allocation_domain(int cpu, cpumask_t *retmask) |
79 | { | 91 | { |
80 | /* Careful. Some cpus do not strictly honor the set of cpus | 92 | /* Careful. Some cpus do not strictly honor the set of cpus |
81 | * specified in the interrupt destination when using lowest | 93 | * specified in the interrupt destination when using lowest |
@@ -85,8 +97,7 @@ static cpumask_t vector_allocation_domain(int cpu) | |||
85 | * deliver interrupts to the wrong hyperthread when only one | 97 | * deliver interrupts to the wrong hyperthread when only one |
86 | * hyperthread was specified in the interrupt desitination. | 98 | * hyperthread was specified in the interrupt desitination. |
87 | */ | 99 | */ |
88 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 100 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; |
89 | return domain; | ||
90 | } | 101 | } |
91 | 102 | ||
92 | struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); | 103 | struct genapic __initdata_refok apic_es7000 = APIC_INIT("es7000", probe_es7000); |
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c index 71a309b122e6..511d7941364f 100644 --- a/arch/x86/mach-generic/numaq.c +++ b/arch/x86/mach-generic/numaq.c | |||
@@ -38,7 +38,7 @@ static int acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
38 | return 0; | 38 | return 0; |
39 | } | 39 | } |
40 | 40 | ||
41 | static cpumask_t vector_allocation_domain(int cpu) | 41 | static void vector_allocation_domain(int cpu, cpumask_t *retmask) |
42 | { | 42 | { |
43 | /* Careful. Some cpus do not strictly honor the set of cpus | 43 | /* Careful. Some cpus do not strictly honor the set of cpus |
44 | * specified in the interrupt destination when using lowest | 44 | * specified in the interrupt destination when using lowest |
@@ -48,8 +48,7 @@ static cpumask_t vector_allocation_domain(int cpu) | |||
48 | * deliver interrupts to the wrong hyperthread when only one | 48 | * deliver interrupts to the wrong hyperthread when only one |
49 | * hyperthread was specified in the interrupt desitination. | 49 | * hyperthread was specified in the interrupt desitination. |
50 | */ | 50 | */ |
51 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 51 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; |
52 | return domain; | ||
53 | } | 52 | } |
54 | 53 | ||
55 | struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); | 54 | struct genapic apic_numaq = APIC_INIT("NUMAQ", probe_numaq); |
diff --git a/arch/x86/mach-generic/probe.c b/arch/x86/mach-generic/probe.c index 5a7e4619e1c4..c346d9d0226f 100644 --- a/arch/x86/mach-generic/probe.c +++ b/arch/x86/mach-generic/probe.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <asm/mpspec.h> | 15 | #include <asm/mpspec.h> |
16 | #include <asm/apicdef.h> | 16 | #include <asm/apicdef.h> |
17 | #include <asm/genapic.h> | 17 | #include <asm/genapic.h> |
18 | #include <asm/setup.h> | ||
18 | 19 | ||
19 | extern struct genapic apic_numaq; | 20 | extern struct genapic apic_numaq; |
20 | extern struct genapic apic_summit; | 21 | extern struct genapic apic_summit; |
@@ -57,6 +58,9 @@ static int __init parse_apic(char *arg) | |||
57 | } | 58 | } |
58 | } | 59 | } |
59 | 60 | ||
61 | if (x86_quirks->update_genapic) | ||
62 | x86_quirks->update_genapic(); | ||
63 | |||
60 | /* Parsed again by __setup for debug/verbose */ | 64 | /* Parsed again by __setup for debug/verbose */ |
61 | return 0; | 65 | return 0; |
62 | } | 66 | } |
@@ -72,12 +76,15 @@ void __init generic_bigsmp_probe(void) | |||
72 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support | 76 | * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support |
73 | */ | 77 | */ |
74 | 78 | ||
75 | if (!cmdline_apic && genapic == &apic_default) | 79 | if (!cmdline_apic && genapic == &apic_default) { |
76 | if (apic_bigsmp.probe()) { | 80 | if (apic_bigsmp.probe()) { |
77 | genapic = &apic_bigsmp; | 81 | genapic = &apic_bigsmp; |
82 | if (x86_quirks->update_genapic) | ||
83 | x86_quirks->update_genapic(); | ||
78 | printk(KERN_INFO "Overriding APIC driver with %s\n", | 84 | printk(KERN_INFO "Overriding APIC driver with %s\n", |
79 | genapic->name); | 85 | genapic->name); |
80 | } | 86 | } |
87 | } | ||
81 | #endif | 88 | #endif |
82 | } | 89 | } |
83 | 90 | ||
@@ -94,6 +101,9 @@ void __init generic_apic_probe(void) | |||
94 | /* Not visible without early console */ | 101 | /* Not visible without early console */ |
95 | if (!apic_probe[i]) | 102 | if (!apic_probe[i]) |
96 | panic("Didn't find an APIC driver"); | 103 | panic("Didn't find an APIC driver"); |
104 | |||
105 | if (x86_quirks->update_genapic) | ||
106 | x86_quirks->update_genapic(); | ||
97 | } | 107 | } |
98 | printk(KERN_INFO "Using APIC driver %s\n", genapic->name); | 108 | printk(KERN_INFO "Using APIC driver %s\n", genapic->name); |
99 | } | 109 | } |
@@ -108,6 +118,8 @@ int __init mps_oem_check(struct mp_config_table *mpc, char *oem, | |||
108 | if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { | 118 | if (apic_probe[i]->mps_oem_check(mpc, oem, productid)) { |
109 | if (!cmdline_apic) { | 119 | if (!cmdline_apic) { |
110 | genapic = apic_probe[i]; | 120 | genapic = apic_probe[i]; |
121 | if (x86_quirks->update_genapic) | ||
122 | x86_quirks->update_genapic(); | ||
111 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | 123 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", |
112 | genapic->name); | 124 | genapic->name); |
113 | } | 125 | } |
@@ -124,6 +136,8 @@ int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
124 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { | 136 | if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { |
125 | if (!cmdline_apic) { | 137 | if (!cmdline_apic) { |
126 | genapic = apic_probe[i]; | 138 | genapic = apic_probe[i]; |
139 | if (x86_quirks->update_genapic) | ||
140 | x86_quirks->update_genapic(); | ||
127 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", | 141 | printk(KERN_INFO "Switched to APIC driver `%s'.\n", |
128 | genapic->name); | 142 | genapic->name); |
129 | } | 143 | } |
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c index 6272b5e69da6..2821ffc188b5 100644 --- a/arch/x86/mach-generic/summit.c +++ b/arch/x86/mach-generic/summit.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <asm/summit/apic.h> | 16 | #include <asm/summit/apic.h> |
17 | #include <asm/summit/ipi.h> | 17 | #include <asm/summit/ipi.h> |
18 | #include <asm/summit/mpparse.h> | 18 | #include <asm/summit/mpparse.h> |
19 | #include <asm/mach-default/mach_wakecpu.h> | ||
19 | 20 | ||
20 | static int probe_summit(void) | 21 | static int probe_summit(void) |
21 | { | 22 | { |
@@ -23,7 +24,7 @@ static int probe_summit(void) | |||
23 | return 0; | 24 | return 0; |
24 | } | 25 | } |
25 | 26 | ||
26 | static cpumask_t vector_allocation_domain(int cpu) | 27 | static void vector_allocation_domain(int cpu, cpumask_t *retmask) |
27 | { | 28 | { |
28 | /* Careful. Some cpus do not strictly honor the set of cpus | 29 | /* Careful. Some cpus do not strictly honor the set of cpus |
29 | * specified in the interrupt destination when using lowest | 30 | * specified in the interrupt destination when using lowest |
@@ -33,8 +34,7 @@ static cpumask_t vector_allocation_domain(int cpu) | |||
33 | * deliver interrupts to the wrong hyperthread when only one | 34 | * deliver interrupts to the wrong hyperthread when only one |
34 | * hyperthread was specified in the interrupt desitination. | 35 | * hyperthread was specified in the interrupt desitination. |
35 | */ | 36 | */ |
36 | cpumask_t domain = { { [0] = APIC_ALL_CPUS, } }; | 37 | *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } }; |
37 | return domain; | ||
38 | } | 38 | } |
39 | 39 | ||
40 | struct genapic apic_summit = APIC_INIT("summit", probe_summit); | 40 | struct genapic apic_summit = APIC_INIT("summit", probe_summit); |
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index 52145007bd7e..9840b7ec749a 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c | |||
@@ -63,11 +63,6 @@ static int voyager_extended_cpus = 1; | |||
63 | /* Used for the invalidate map that's also checked in the spinlock */ | 63 | /* Used for the invalidate map that's also checked in the spinlock */ |
64 | static volatile unsigned long smp_invalidate_needed; | 64 | static volatile unsigned long smp_invalidate_needed; |
65 | 65 | ||
66 | /* Bitmask of currently online CPUs - used by setup.c for | ||
67 | /proc/cpuinfo, visible externally but still physical */ | ||
68 | cpumask_t cpu_online_map = CPU_MASK_NONE; | ||
69 | EXPORT_SYMBOL(cpu_online_map); | ||
70 | |||
71 | /* Bitmask of CPUs present in the system - exported by i386_syms.c, used | 66 | /* Bitmask of CPUs present in the system - exported by i386_syms.c, used |
72 | * by scheduler but indexed physically */ | 67 | * by scheduler but indexed physically */ |
73 | cpumask_t phys_cpu_present_map = CPU_MASK_NONE; | 68 | cpumask_t phys_cpu_present_map = CPU_MASK_NONE; |
@@ -218,8 +213,6 @@ static cpumask_t smp_commenced_mask = CPU_MASK_NONE; | |||
218 | /* This is for the new dynamic CPU boot code */ | 213 | /* This is for the new dynamic CPU boot code */ |
219 | cpumask_t cpu_callin_map = CPU_MASK_NONE; | 214 | cpumask_t cpu_callin_map = CPU_MASK_NONE; |
220 | cpumask_t cpu_callout_map = CPU_MASK_NONE; | 215 | cpumask_t cpu_callout_map = CPU_MASK_NONE; |
221 | cpumask_t cpu_possible_map = CPU_MASK_NONE; | ||
222 | EXPORT_SYMBOL(cpu_possible_map); | ||
223 | 216 | ||
224 | /* The per processor IRQ masks (these are usually kept in sync) */ | 217 | /* The per processor IRQ masks (these are usually kept in sync) */ |
225 | static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; | 218 | static __u16 vic_irq_mask[NR_CPUS] __cacheline_aligned; |
@@ -364,9 +357,8 @@ void __init find_smp_config(void) | |||
364 | printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); | 357 | printk("VOYAGER SMP: Boot cpu is %d\n", boot_cpu_id); |
365 | 358 | ||
366 | /* initialize the CPU structures (moved from smp_boot_cpus) */ | 359 | /* initialize the CPU structures (moved from smp_boot_cpus) */ |
367 | for (i = 0; i < NR_CPUS; i++) { | 360 | for (i = 0; i < nr_cpu_ids; i++) |
368 | cpu_irq_affinity[i] = ~0; | 361 | cpu_irq_affinity[i] = ~0; |
369 | } | ||
370 | cpu_online_map = cpumask_of_cpu(boot_cpu_id); | 362 | cpu_online_map = cpumask_of_cpu(boot_cpu_id); |
371 | 363 | ||
372 | /* The boot CPU must be extended */ | 364 | /* The boot CPU must be extended */ |
@@ -679,7 +671,7 @@ void __init smp_boot_cpus(void) | |||
679 | 671 | ||
680 | /* loop over all the extended VIC CPUs and boot them. The | 672 | /* loop over all the extended VIC CPUs and boot them. The |
681 | * Quad CPUs must be bootstrapped by their extended VIC cpu */ | 673 | * Quad CPUs must be bootstrapped by their extended VIC cpu */ |
682 | for (i = 0; i < NR_CPUS; i++) { | 674 | for (i = 0; i < nr_cpu_ids; i++) { |
683 | if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) | 675 | if (i == boot_cpu_id || !cpu_isset(i, phys_cpu_present_map)) |
684 | continue; | 676 | continue; |
685 | do_boot_cpu(i); | 677 | do_boot_cpu(i); |
@@ -1234,7 +1226,7 @@ int setup_profiling_timer(unsigned int multiplier) | |||
1234 | * new values until the next timer interrupt in which they do process | 1226 | * new values until the next timer interrupt in which they do process |
1235 | * accounting. | 1227 | * accounting. |
1236 | */ | 1228 | */ |
1237 | for (i = 0; i < NR_CPUS; ++i) | 1229 | for (i = 0; i < nr_cpu_ids; ++i) |
1238 | per_cpu(prof_multiplier, i) = multiplier; | 1230 | per_cpu(prof_multiplier, i) = multiplier; |
1239 | 1231 | ||
1240 | return 0; | 1232 | return 0; |
@@ -1264,7 +1256,7 @@ void __init voyager_smp_intr_init(void) | |||
1264 | int i; | 1256 | int i; |
1265 | 1257 | ||
1266 | /* initialize the per cpu irq mask to all disabled */ | 1258 | /* initialize the per cpu irq mask to all disabled */ |
1267 | for (i = 0; i < NR_CPUS; i++) | 1259 | for (i = 0; i < nr_cpu_ids; i++) |
1268 | vic_irq_mask[i] = 0xFFFF; | 1260 | vic_irq_mask[i] = 0xFFFF; |
1269 | 1261 | ||
1270 | VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); | 1262 | VIC_SET_GATE(VIC_CPI_LEVEL0, vic_cpi_interrupt); |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fea4565ff576..d8cc96a2738f 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -8,9 +8,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o | |||
8 | 8 | ||
9 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 9 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
10 | 10 | ||
11 | obj-$(CONFIG_MMIOTRACE_HOOKS) += kmmio.o | ||
12 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | 11 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o |
13 | mmiotrace-y := pf_in.o mmio-mod.o | 12 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o |
14 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 13 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
15 | 14 | ||
16 | obj-$(CONFIG_NUMA) += numa_$(BITS).o | 15 | obj-$(CONFIG_NUMA) += numa_$(BITS).o |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 31e8730fa246..9e268b6b204e 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -53,7 +53,7 @@ | |||
53 | 53 | ||
54 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) | 54 | static inline int kmmio_fault(struct pt_regs *regs, unsigned long addr) |
55 | { | 55 | { |
56 | #ifdef CONFIG_MMIOTRACE_HOOKS | 56 | #ifdef CONFIG_MMIOTRACE |
57 | if (unlikely(is_kmmio_active())) | 57 | if (unlikely(is_kmmio_active())) |
58 | if (kmmio_handler(regs, addr) == 1) | 58 | if (kmmio_handler(regs, addr) == 1) |
59 | return -1; | 59 | return -1; |
@@ -393,7 +393,7 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code, | |||
393 | if (pte && pte_present(*pte) && !pte_exec(*pte)) | 393 | if (pte && pte_present(*pte) && !pte_exec(*pte)) |
394 | printk(KERN_CRIT "kernel tried to execute " | 394 | printk(KERN_CRIT "kernel tried to execute " |
395 | "NX-protected page - exploit attempt? " | 395 | "NX-protected page - exploit attempt? " |
396 | "(uid: %d)\n", current->uid); | 396 | "(uid: %d)\n", current_uid()); |
397 | } | 397 | } |
398 | #endif | 398 | #endif |
399 | 399 | ||
@@ -413,6 +413,7 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
413 | unsigned long error_code) | 413 | unsigned long error_code) |
414 | { | 414 | { |
415 | unsigned long flags = oops_begin(); | 415 | unsigned long flags = oops_begin(); |
416 | int sig = SIGKILL; | ||
416 | struct task_struct *tsk; | 417 | struct task_struct *tsk; |
417 | 418 | ||
418 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", | 419 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", |
@@ -423,8 +424,8 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
423 | tsk->thread.trap_no = 14; | 424 | tsk->thread.trap_no = 14; |
424 | tsk->thread.error_code = error_code; | 425 | tsk->thread.error_code = error_code; |
425 | if (__die("Bad pagetable", regs, error_code)) | 426 | if (__die("Bad pagetable", regs, error_code)) |
426 | regs = NULL; | 427 | sig = 0; |
427 | oops_end(flags, regs, SIGKILL); | 428 | oops_end(flags, regs, sig); |
428 | } | 429 | } |
429 | #endif | 430 | #endif |
430 | 431 | ||
@@ -590,6 +591,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
590 | int fault; | 591 | int fault; |
591 | #ifdef CONFIG_X86_64 | 592 | #ifdef CONFIG_X86_64 |
592 | unsigned long flags; | 593 | unsigned long flags; |
594 | int sig; | ||
593 | #endif | 595 | #endif |
594 | 596 | ||
595 | tsk = current; | 597 | tsk = current; |
@@ -665,7 +667,6 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
665 | if (unlikely(in_atomic() || !mm)) | 667 | if (unlikely(in_atomic() || !mm)) |
666 | goto bad_area_nosemaphore; | 668 | goto bad_area_nosemaphore; |
667 | 669 | ||
668 | again: | ||
669 | /* | 670 | /* |
670 | * When running in the kernel we expect faults to occur only to | 671 | * When running in the kernel we expect faults to occur only to |
671 | * addresses in user space. All other faults represent errors in the | 672 | * addresses in user space. All other faults represent errors in the |
@@ -849,32 +850,22 @@ no_context: | |||
849 | bust_spinlocks(0); | 850 | bust_spinlocks(0); |
850 | do_exit(SIGKILL); | 851 | do_exit(SIGKILL); |
851 | #else | 852 | #else |
853 | sig = SIGKILL; | ||
852 | if (__die("Oops", regs, error_code)) | 854 | if (__die("Oops", regs, error_code)) |
853 | regs = NULL; | 855 | sig = 0; |
854 | /* Executive summary in case the body of the oops scrolled away */ | 856 | /* Executive summary in case the body of the oops scrolled away */ |
855 | printk(KERN_EMERG "CR2: %016lx\n", address); | 857 | printk(KERN_EMERG "CR2: %016lx\n", address); |
856 | oops_end(flags, regs, SIGKILL); | 858 | oops_end(flags, regs, sig); |
857 | #endif | 859 | #endif |
858 | 860 | ||
859 | /* | ||
860 | * We ran out of memory, or some other thing happened to us that made | ||
861 | * us unable to handle the page fault gracefully. | ||
862 | */ | ||
863 | out_of_memory: | 861 | out_of_memory: |
862 | /* | ||
863 | * We ran out of memory, call the OOM killer, and return the userspace | ||
864 | * (which will retry the fault, or kill us if we got oom-killed). | ||
865 | */ | ||
864 | up_read(&mm->mmap_sem); | 866 | up_read(&mm->mmap_sem); |
865 | if (is_global_init(tsk)) { | 867 | pagefault_out_of_memory(); |
866 | yield(); | 868 | return; |
867 | /* | ||
868 | * Re-lookup the vma - in theory the vma tree might | ||
869 | * have changed: | ||
870 | */ | ||
871 | goto again; | ||
872 | } | ||
873 | |||
874 | printk("VM: killing process %s\n", tsk->comm); | ||
875 | if (error_code & PF_USER) | ||
876 | do_group_exit(SIGKILL); | ||
877 | goto no_context; | ||
878 | 869 | ||
879 | do_sigbus: | 870 | do_sigbus: |
880 | up_read(&mm->mmap_sem); | 871 | up_read(&mm->mmap_sem); |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c483f4242079..88f1b10de3be 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/highmem.h> | 22 | #include <linux/highmem.h> |
23 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
24 | #include <linux/pci.h> | ||
24 | #include <linux/pfn.h> | 25 | #include <linux/pfn.h> |
25 | #include <linux/poison.h> | 26 | #include <linux/poison.h> |
26 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
@@ -67,7 +68,7 @@ static unsigned long __meminitdata table_top; | |||
67 | 68 | ||
68 | static int __initdata after_init_bootmem; | 69 | static int __initdata after_init_bootmem; |
69 | 70 | ||
70 | static __init void *alloc_low_page(unsigned long *phys) | 71 | static __init void *alloc_low_page(void) |
71 | { | 72 | { |
72 | unsigned long pfn = table_end++; | 73 | unsigned long pfn = table_end++; |
73 | void *adr; | 74 | void *adr; |
@@ -77,7 +78,6 @@ static __init void *alloc_low_page(unsigned long *phys) | |||
77 | 78 | ||
78 | adr = __va(pfn * PAGE_SIZE); | 79 | adr = __va(pfn * PAGE_SIZE); |
79 | memset(adr, 0, PAGE_SIZE); | 80 | memset(adr, 0, PAGE_SIZE); |
80 | *phys = pfn * PAGE_SIZE; | ||
81 | return adr; | 81 | return adr; |
82 | } | 82 | } |
83 | 83 | ||
@@ -92,16 +92,17 @@ static pmd_t * __init one_md_table_init(pgd_t *pgd) | |||
92 | pmd_t *pmd_table; | 92 | pmd_t *pmd_table; |
93 | 93 | ||
94 | #ifdef CONFIG_X86_PAE | 94 | #ifdef CONFIG_X86_PAE |
95 | unsigned long phys; | ||
96 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { | 95 | if (!(pgd_val(*pgd) & _PAGE_PRESENT)) { |
97 | if (after_init_bootmem) | 96 | if (after_init_bootmem) |
98 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 97 | pmd_table = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
99 | else | 98 | else |
100 | pmd_table = (pmd_t *)alloc_low_page(&phys); | 99 | pmd_table = (pmd_t *)alloc_low_page(); |
101 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); | 100 | paravirt_alloc_pmd(&init_mm, __pa(pmd_table) >> PAGE_SHIFT); |
102 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); | 101 | set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); |
103 | pud = pud_offset(pgd, 0); | 102 | pud = pud_offset(pgd, 0); |
104 | BUG_ON(pmd_table != pmd_offset(pud, 0)); | 103 | BUG_ON(pmd_table != pmd_offset(pud, 0)); |
104 | |||
105 | return pmd_table; | ||
105 | } | 106 | } |
106 | #endif | 107 | #endif |
107 | pud = pud_offset(pgd, 0); | 108 | pud = pud_offset(pgd, 0); |
@@ -126,10 +127,8 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
126 | if (!page_table) | 127 | if (!page_table) |
127 | page_table = | 128 | page_table = |
128 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); | 129 | (pte_t *)alloc_bootmem_low_pages(PAGE_SIZE); |
129 | } else { | 130 | } else |
130 | unsigned long phys; | 131 | page_table = (pte_t *)alloc_low_page(); |
131 | page_table = (pte_t *)alloc_low_page(&phys); | ||
132 | } | ||
133 | 132 | ||
134 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); | 133 | paravirt_alloc_pte(&init_mm, __pa(page_table) >> PAGE_SHIFT); |
135 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); | 134 | set_pmd(pmd, __pmd(__pa(page_table) | _PAGE_TABLE)); |
@@ -329,6 +328,8 @@ int devmem_is_allowed(unsigned long pagenr) | |||
329 | { | 328 | { |
330 | if (pagenr <= 256) | 329 | if (pagenr <= 256) |
331 | return 1; | 330 | return 1; |
331 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
332 | return 0; | ||
332 | if (!page_is_ram(pagenr)) | 333 | if (!page_is_ram(pagenr)) |
333 | return 1; | 334 | return 1; |
334 | return 0; | 335 | return 0; |
@@ -436,8 +437,12 @@ static void __init set_highmem_pages_init(void) | |||
436 | #endif /* !CONFIG_NUMA */ | 437 | #endif /* !CONFIG_NUMA */ |
437 | 438 | ||
438 | #else | 439 | #else |
439 | # define permanent_kmaps_init(pgd_base) do { } while (0) | 440 | static inline void permanent_kmaps_init(pgd_t *pgd_base) |
440 | # define set_highmem_pages_init() do { } while (0) | 441 | { |
442 | } | ||
443 | static inline void set_highmem_pages_init(void) | ||
444 | { | ||
445 | } | ||
441 | #endif /* CONFIG_HIGHMEM */ | 446 | #endif /* CONFIG_HIGHMEM */ |
442 | 447 | ||
443 | void __init native_pagetable_setup_start(pgd_t *base) | 448 | void __init native_pagetable_setup_start(pgd_t *base) |
@@ -969,7 +974,7 @@ void __init mem_init(void) | |||
969 | int codesize, reservedpages, datasize, initsize; | 974 | int codesize, reservedpages, datasize, initsize; |
970 | int tmp; | 975 | int tmp; |
971 | 976 | ||
972 | start_periodic_check_for_corruption(); | 977 | pci_iommu_alloc(); |
973 | 978 | ||
974 | #ifdef CONFIG_FLATMEM | 979 | #ifdef CONFIG_FLATMEM |
975 | BUG_ON(!mem_map); | 980 | BUG_ON(!mem_map); |
@@ -1040,11 +1045,25 @@ void __init mem_init(void) | |||
1040 | (unsigned long)&_text, (unsigned long)&_etext, | 1045 | (unsigned long)&_text, (unsigned long)&_etext, |
1041 | ((unsigned long)&_etext - (unsigned long)&_text) >> 10); | 1046 | ((unsigned long)&_etext - (unsigned long)&_text) >> 10); |
1042 | 1047 | ||
1048 | /* | ||
1049 | * Check boundaries twice: Some fundamental inconsistencies can | ||
1050 | * be detected at build time already. | ||
1051 | */ | ||
1052 | #define __FIXADDR_TOP (-PAGE_SIZE) | ||
1053 | #ifdef CONFIG_HIGHMEM | ||
1054 | BUILD_BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | ||
1055 | BUILD_BUG_ON(VMALLOC_END > PKMAP_BASE); | ||
1056 | #endif | ||
1057 | #define high_memory (-128UL << 20) | ||
1058 | BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); | ||
1059 | #undef high_memory | ||
1060 | #undef __FIXADDR_TOP | ||
1061 | |||
1043 | #ifdef CONFIG_HIGHMEM | 1062 | #ifdef CONFIG_HIGHMEM |
1044 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | 1063 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); |
1045 | BUG_ON(VMALLOC_END > PKMAP_BASE); | 1064 | BUG_ON(VMALLOC_END > PKMAP_BASE); |
1046 | #endif | 1065 | #endif |
1047 | BUG_ON(VMALLOC_START > VMALLOC_END); | 1066 | BUG_ON(VMALLOC_START >= VMALLOC_END); |
1048 | BUG_ON((unsigned long)high_memory > VMALLOC_START); | 1067 | BUG_ON((unsigned long)high_memory > VMALLOC_START); |
1049 | 1068 | ||
1050 | if (boot_cpu_data.wp_works_ok < 0) | 1069 | if (boot_cpu_data.wp_works_ok < 0) |
@@ -1062,7 +1081,7 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
1062 | unsigned long start_pfn = start >> PAGE_SHIFT; | 1081 | unsigned long start_pfn = start >> PAGE_SHIFT; |
1063 | unsigned long nr_pages = size >> PAGE_SHIFT; | 1082 | unsigned long nr_pages = size >> PAGE_SHIFT; |
1064 | 1083 | ||
1065 | return __add_pages(zone, start_pfn, nr_pages); | 1084 | return __add_pages(nid, zone, start_pfn, nr_pages); |
1066 | } | 1085 | } |
1067 | #endif | 1086 | #endif |
1068 | 1087 | ||
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 9db01db6e3cd..23f68e77ad1f 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -857,7 +857,7 @@ int arch_add_memory(int nid, u64 start, u64 size) | |||
857 | if (last_mapped_pfn > max_pfn_mapped) | 857 | if (last_mapped_pfn > max_pfn_mapped) |
858 | max_pfn_mapped = last_mapped_pfn; | 858 | max_pfn_mapped = last_mapped_pfn; |
859 | 859 | ||
860 | ret = __add_pages(zone, start_pfn, nr_pages); | 860 | ret = __add_pages(nid, zone, start_pfn, nr_pages); |
861 | WARN_ON_ONCE(ret); | 861 | WARN_ON_ONCE(ret); |
862 | 862 | ||
863 | return ret; | 863 | return ret; |
@@ -888,6 +888,8 @@ int devmem_is_allowed(unsigned long pagenr) | |||
888 | { | 888 | { |
889 | if (pagenr <= 256) | 889 | if (pagenr <= 256) |
890 | return 1; | 890 | return 1; |
891 | if (iomem_is_exclusive(pagenr << PAGE_SHIFT)) | ||
892 | return 0; | ||
891 | if (!page_is_ram(pagenr)) | 893 | if (!page_is_ram(pagenr)) |
892 | return 1; | 894 | return 1; |
893 | return 0; | 895 | return 0; |
@@ -902,8 +904,6 @@ void __init mem_init(void) | |||
902 | long codesize, reservedpages, datasize, initsize; | 904 | long codesize, reservedpages, datasize, initsize; |
903 | unsigned long absent_pages; | 905 | unsigned long absent_pages; |
904 | 906 | ||
905 | start_periodic_check_for_corruption(); | ||
906 | |||
907 | pci_iommu_alloc(); | 907 | pci_iommu_alloc(); |
908 | 908 | ||
909 | /* clear_bss() already clear the empty_zero_page */ | 909 | /* clear_bss() already clear the empty_zero_page */ |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index d4c4307ff3e0..bd85d42819e1 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -223,7 +223,8 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
223 | * Check if the request spans more than any BAR in the iomem resource | 223 | * Check if the request spans more than any BAR in the iomem resource |
224 | * tree. | 224 | * tree. |
225 | */ | 225 | */ |
226 | WARN_ON(iomem_map_sanity_check(phys_addr, size)); | 226 | WARN_ONCE(iomem_map_sanity_check(phys_addr, size), |
227 | KERN_INFO "Info: mapping multiple BARs. Your kernel is fine."); | ||
227 | 228 | ||
228 | /* | 229 | /* |
229 | * Don't allow anybody to remap normal RAM that we're using.. | 230 | * Don't allow anybody to remap normal RAM that we're using.. |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 8518c678d83f..d1f7439d173c 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -239,7 +239,7 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
239 | start_pfn = node_remap_start_pfn[node]; | 239 | start_pfn = node_remap_start_pfn[node]; |
240 | size = node_remap_size[node]; | 240 | size = node_remap_size[node]; |
241 | 241 | ||
242 | printk(KERN_DEBUG "%s: node %d\n", __FUNCTION__, node); | 242 | printk(KERN_DEBUG "%s: node %d\n", __func__, node); |
243 | 243 | ||
244 | for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { | 244 | for (pfn = 0; pfn < size; pfn += PTRS_PER_PTE) { |
245 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); | 245 | unsigned long vaddr = start_va + (pfn << PAGE_SHIFT); |
@@ -251,7 +251,7 @@ void resume_map_numa_kva(pgd_t *pgd_base) | |||
251 | PAGE_KERNEL_LARGE_EXEC)); | 251 | PAGE_KERNEL_LARGE_EXEC)); |
252 | 252 | ||
253 | printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", | 253 | printk(KERN_DEBUG "%s: %08lx -> pfn %08lx\n", |
254 | __FUNCTION__, vaddr, start_pfn + pfn); | 254 | __func__, vaddr, start_pfn + pfn); |
255 | } | 255 | } |
256 | } | 256 | } |
257 | } | 257 | } |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index cebcbf152d46..71a14f89f89e 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -278,7 +278,7 @@ void __init numa_init_array(void) | |||
278 | int rr, i; | 278 | int rr, i; |
279 | 279 | ||
280 | rr = first_node(node_online_map); | 280 | rr = first_node(node_online_map); |
281 | for (i = 0; i < NR_CPUS; i++) { | 281 | for (i = 0; i < nr_cpu_ids; i++) { |
282 | if (early_cpu_to_node(i) != NUMA_NO_NODE) | 282 | if (early_cpu_to_node(i) != NUMA_NO_NODE) |
283 | continue; | 283 | continue; |
284 | numa_set_node(i, rr); | 284 | numa_set_node(i, rr); |
@@ -549,7 +549,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long last_pfn) | |||
549 | memnodemap[0] = 0; | 549 | memnodemap[0] = 0; |
550 | node_set_online(0); | 550 | node_set_online(0); |
551 | node_set(0, node_possible_map); | 551 | node_set(0, node_possible_map); |
552 | for (i = 0; i < NR_CPUS; i++) | 552 | for (i = 0; i < nr_cpu_ids; i++) |
553 | numa_set_node(i, 0); | 553 | numa_set_node(i, 0); |
554 | e820_register_active_regions(0, start_pfn, last_pfn); | 554 | e820_register_active_regions(0, start_pfn, last_pfn); |
555 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); | 555 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, last_pfn << PAGE_SHIFT); |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index eb1bf000d12e..85cbd3cd3723 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -596,6 +596,242 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | |||
596 | free_memtype(addr, addr + size); | 596 | free_memtype(addr, addr + size); |
597 | } | 597 | } |
598 | 598 | ||
599 | /* | ||
600 | * Internal interface to reserve a range of physical memory with prot. | ||
601 | * Reserved non RAM regions only and after successful reserve_memtype, | ||
602 | * this func also keeps identity mapping (if any) in sync with this new prot. | ||
603 | */ | ||
604 | static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) | ||
605 | { | ||
606 | int is_ram = 0; | ||
607 | int id_sz, ret; | ||
608 | unsigned long flags; | ||
609 | unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); | ||
610 | |||
611 | is_ram = pagerange_is_ram(paddr, paddr + size); | ||
612 | |||
613 | if (is_ram != 0) { | ||
614 | /* | ||
615 | * For mapping RAM pages, drivers need to call | ||
616 | * set_memory_[uc|wc|wb] directly, for reserve and free, before | ||
617 | * setting up the PTE. | ||
618 | */ | ||
619 | WARN_ON_ONCE(1); | ||
620 | return 0; | ||
621 | } | ||
622 | |||
623 | ret = reserve_memtype(paddr, paddr + size, want_flags, &flags); | ||
624 | if (ret) | ||
625 | return ret; | ||
626 | |||
627 | if (flags != want_flags) { | ||
628 | free_memtype(paddr, paddr + size); | ||
629 | printk(KERN_ERR | ||
630 | "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n", | ||
631 | current->comm, current->pid, | ||
632 | cattr_name(want_flags), | ||
633 | (unsigned long long)paddr, | ||
634 | (unsigned long long)(paddr + size), | ||
635 | cattr_name(flags)); | ||
636 | return -EINVAL; | ||
637 | } | ||
638 | |||
639 | /* Need to keep identity mapping in sync */ | ||
640 | if (paddr >= __pa(high_memory)) | ||
641 | return 0; | ||
642 | |||
643 | id_sz = (__pa(high_memory) < paddr + size) ? | ||
644 | __pa(high_memory) - paddr : | ||
645 | size; | ||
646 | |||
647 | if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) { | ||
648 | free_memtype(paddr, paddr + size); | ||
649 | printk(KERN_ERR | ||
650 | "%s:%d reserve_pfn_range ioremap_change_attr failed %s " | ||
651 | "for %Lx-%Lx\n", | ||
652 | current->comm, current->pid, | ||
653 | cattr_name(flags), | ||
654 | (unsigned long long)paddr, | ||
655 | (unsigned long long)(paddr + size)); | ||
656 | return -EINVAL; | ||
657 | } | ||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | /* | ||
662 | * Internal interface to free a range of physical memory. | ||
663 | * Frees non RAM regions only. | ||
664 | */ | ||
665 | static void free_pfn_range(u64 paddr, unsigned long size) | ||
666 | { | ||
667 | int is_ram; | ||
668 | |||
669 | is_ram = pagerange_is_ram(paddr, paddr + size); | ||
670 | if (is_ram == 0) | ||
671 | free_memtype(paddr, paddr + size); | ||
672 | } | ||
673 | |||
674 | /* | ||
675 | * track_pfn_vma_copy is called when vma that is covering the pfnmap gets | ||
676 | * copied through copy_page_range(). | ||
677 | * | ||
678 | * If the vma has a linear pfn mapping for the entire range, we get the prot | ||
679 | * from pte and reserve the entire vma range with single reserve_pfn_range call. | ||
680 | * Otherwise, we reserve the entire vma range, my ging through the PTEs page | ||
681 | * by page to get physical address and protection. | ||
682 | */ | ||
683 | int track_pfn_vma_copy(struct vm_area_struct *vma) | ||
684 | { | ||
685 | int retval = 0; | ||
686 | unsigned long i, j; | ||
687 | resource_size_t paddr; | ||
688 | unsigned long prot; | ||
689 | unsigned long vma_start = vma->vm_start; | ||
690 | unsigned long vma_end = vma->vm_end; | ||
691 | unsigned long vma_size = vma_end - vma_start; | ||
692 | |||
693 | if (!pat_enabled) | ||
694 | return 0; | ||
695 | |||
696 | if (is_linear_pfn_mapping(vma)) { | ||
697 | /* | ||
698 | * reserve the whole chunk covered by vma. We need the | ||
699 | * starting address and protection from pte. | ||
700 | */ | ||
701 | if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { | ||
702 | WARN_ON_ONCE(1); | ||
703 | return -EINVAL; | ||
704 | } | ||
705 | return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); | ||
706 | } | ||
707 | |||
708 | /* reserve entire vma page by page, using pfn and prot from pte */ | ||
709 | for (i = 0; i < vma_size; i += PAGE_SIZE) { | ||
710 | if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) | ||
711 | continue; | ||
712 | |||
713 | retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); | ||
714 | if (retval) | ||
715 | goto cleanup_ret; | ||
716 | } | ||
717 | return 0; | ||
718 | |||
719 | cleanup_ret: | ||
720 | /* Reserve error: Cleanup partial reservation and return error */ | ||
721 | for (j = 0; j < i; j += PAGE_SIZE) { | ||
722 | if (follow_phys(vma, vma_start + j, 0, &prot, &paddr)) | ||
723 | continue; | ||
724 | |||
725 | free_pfn_range(paddr, PAGE_SIZE); | ||
726 | } | ||
727 | |||
728 | return retval; | ||
729 | } | ||
730 | |||
731 | /* | ||
732 | * track_pfn_vma_new is called when a _new_ pfn mapping is being established | ||
733 | * for physical range indicated by pfn and size. | ||
734 | * | ||
735 | * prot is passed in as a parameter for the new mapping. If the vma has a | ||
736 | * linear pfn mapping for the entire range reserve the entire vma range with | ||
737 | * single reserve_pfn_range call. | ||
738 | * Otherwise, we look t the pfn and size and reserve only the specified range | ||
739 | * page by page. | ||
740 | * | ||
741 | * Note that this function can be called with caller trying to map only a | ||
742 | * subrange/page inside the vma. | ||
743 | */ | ||
744 | int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, | ||
745 | unsigned long pfn, unsigned long size) | ||
746 | { | ||
747 | int retval = 0; | ||
748 | unsigned long i, j; | ||
749 | resource_size_t base_paddr; | ||
750 | resource_size_t paddr; | ||
751 | unsigned long vma_start = vma->vm_start; | ||
752 | unsigned long vma_end = vma->vm_end; | ||
753 | unsigned long vma_size = vma_end - vma_start; | ||
754 | |||
755 | if (!pat_enabled) | ||
756 | return 0; | ||
757 | |||
758 | if (is_linear_pfn_mapping(vma)) { | ||
759 | /* reserve the whole chunk starting from vm_pgoff */ | ||
760 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | ||
761 | return reserve_pfn_range(paddr, vma_size, prot); | ||
762 | } | ||
763 | |||
764 | /* reserve page by page using pfn and size */ | ||
765 | base_paddr = (resource_size_t)pfn << PAGE_SHIFT; | ||
766 | for (i = 0; i < size; i += PAGE_SIZE) { | ||
767 | paddr = base_paddr + i; | ||
768 | retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); | ||
769 | if (retval) | ||
770 | goto cleanup_ret; | ||
771 | } | ||
772 | return 0; | ||
773 | |||
774 | cleanup_ret: | ||
775 | /* Reserve error: Cleanup partial reservation and return error */ | ||
776 | for (j = 0; j < i; j += PAGE_SIZE) { | ||
777 | paddr = base_paddr + j; | ||
778 | free_pfn_range(paddr, PAGE_SIZE); | ||
779 | } | ||
780 | |||
781 | return retval; | ||
782 | } | ||
783 | |||
784 | /* | ||
785 | * untrack_pfn_vma is called while unmapping a pfnmap for a region. | ||
786 | * untrack can be called for a specific region indicated by pfn and size or | ||
787 | * can be for the entire vma (in which case size can be zero). | ||
788 | */ | ||
789 | void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, | ||
790 | unsigned long size) | ||
791 | { | ||
792 | unsigned long i; | ||
793 | resource_size_t paddr; | ||
794 | unsigned long prot; | ||
795 | unsigned long vma_start = vma->vm_start; | ||
796 | unsigned long vma_end = vma->vm_end; | ||
797 | unsigned long vma_size = vma_end - vma_start; | ||
798 | |||
799 | if (!pat_enabled) | ||
800 | return; | ||
801 | |||
802 | if (is_linear_pfn_mapping(vma)) { | ||
803 | /* free the whole chunk starting from vm_pgoff */ | ||
804 | paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; | ||
805 | free_pfn_range(paddr, vma_size); | ||
806 | return; | ||
807 | } | ||
808 | |||
809 | if (size != 0 && size != vma_size) { | ||
810 | /* free page by page, using pfn and size */ | ||
811 | paddr = (resource_size_t)pfn << PAGE_SHIFT; | ||
812 | for (i = 0; i < size; i += PAGE_SIZE) { | ||
813 | paddr = paddr + i; | ||
814 | free_pfn_range(paddr, PAGE_SIZE); | ||
815 | } | ||
816 | } else { | ||
817 | /* free entire vma, page by page, using the pfn from pte */ | ||
818 | for (i = 0; i < vma_size; i += PAGE_SIZE) { | ||
819 | if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) | ||
820 | continue; | ||
821 | |||
822 | free_pfn_range(paddr, PAGE_SIZE); | ||
823 | } | ||
824 | } | ||
825 | } | ||
826 | |||
827 | pgprot_t pgprot_writecombine(pgprot_t prot) | ||
828 | { | ||
829 | if (pat_enabled) | ||
830 | return __pgprot(pgprot_val(prot) | _PAGE_CACHE_WC); | ||
831 | else | ||
832 | return pgprot_noncached(prot); | ||
833 | } | ||
834 | |||
599 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) | 835 | #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) |
600 | 836 | ||
601 | /* get Nth element of the linked list */ | 837 | /* get Nth element of the linked list */ |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 51c0a2fc14fe..09737c8af074 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -382,7 +382,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
382 | if (!node_online(i)) | 382 | if (!node_online(i)) |
383 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); | 383 | setup_node_bootmem(i, nodes[i].start, nodes[i].end); |
384 | 384 | ||
385 | for (i = 0; i < NR_CPUS; i++) { | 385 | for (i = 0; i < nr_cpu_ids; i++) { |
386 | int node = early_cpu_to_node(i); | 386 | int node = early_cpu_to_node(i); |
387 | 387 | ||
388 | if (node == NUMA_NO_NODE) | 388 | if (node == NUMA_NO_NODE) |
diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 509513760a6e..98658f25f542 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c | |||
@@ -65,11 +65,13 @@ static unsigned long reset_value[NUM_COUNTERS]; | |||
65 | #define IBS_FETCH_BEGIN 3 | 65 | #define IBS_FETCH_BEGIN 3 |
66 | #define IBS_OP_BEGIN 4 | 66 | #define IBS_OP_BEGIN 4 |
67 | 67 | ||
68 | /* The function interface needs to be fixed, something like add | 68 | /* |
69 | data. Should then be added to linux/oprofile.h. */ | 69 | * The function interface needs to be fixed, something like add |
70 | * data. Should then be added to linux/oprofile.h. | ||
71 | */ | ||
70 | extern void | 72 | extern void |
71 | oprofile_add_ibs_sample(struct pt_regs *const regs, | 73 | oprofile_add_ibs_sample(struct pt_regs * const regs, |
72 | unsigned int *const ibs_sample, int ibs_code); | 74 | unsigned int * const ibs_sample, int ibs_code); |
73 | 75 | ||
74 | struct ibs_fetch_sample { | 76 | struct ibs_fetch_sample { |
75 | /* MSRC001_1031 IBS Fetch Linear Address Register */ | 77 | /* MSRC001_1031 IBS Fetch Linear Address Register */ |
@@ -104,11 +106,6 @@ struct ibs_op_sample { | |||
104 | unsigned int ibs_dc_phys_high; | 106 | unsigned int ibs_dc_phys_high; |
105 | }; | 107 | }; |
106 | 108 | ||
107 | /* | ||
108 | * unitialize the APIC for the IBS interrupts if needed on AMD Family10h+ | ||
109 | */ | ||
110 | static void clear_ibs_nmi(void); | ||
111 | |||
112 | static int ibs_allowed; /* AMD Family10h and later */ | 109 | static int ibs_allowed; /* AMD Family10h and later */ |
113 | 110 | ||
114 | struct op_ibs_config { | 111 | struct op_ibs_config { |
@@ -223,7 +220,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, | |||
223 | (unsigned int *)&ibs_fetch, | 220 | (unsigned int *)&ibs_fetch, |
224 | IBS_FETCH_BEGIN); | 221 | IBS_FETCH_BEGIN); |
225 | 222 | ||
226 | /*reenable the IRQ */ | 223 | /* reenable the IRQ */ |
227 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 224 | rdmsr(MSR_AMD64_IBSFETCHCTL, low, high); |
228 | high &= ~IBS_FETCH_HIGH_VALID_BIT; | 225 | high &= ~IBS_FETCH_HIGH_VALID_BIT; |
229 | high |= IBS_FETCH_HIGH_ENABLE; | 226 | high |= IBS_FETCH_HIGH_ENABLE; |
@@ -331,8 +328,10 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
331 | unsigned int low, high; | 328 | unsigned int low, high; |
332 | int i; | 329 | int i; |
333 | 330 | ||
334 | /* Subtle: stop on all counters to avoid race with | 331 | /* |
335 | * setting our pm callback */ | 332 | * Subtle: stop on all counters to avoid race with setting our |
333 | * pm callback | ||
334 | */ | ||
336 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { | 335 | for (i = 0 ; i < NUM_COUNTERS ; ++i) { |
337 | if (!reset_value[i]) | 336 | if (!reset_value[i]) |
338 | continue; | 337 | continue; |
@@ -343,13 +342,15 @@ static void op_amd_stop(struct op_msrs const * const msrs) | |||
343 | 342 | ||
344 | #ifdef CONFIG_OPROFILE_IBS | 343 | #ifdef CONFIG_OPROFILE_IBS |
345 | if (ibs_allowed && ibs_config.fetch_enabled) { | 344 | if (ibs_allowed && ibs_config.fetch_enabled) { |
346 | low = 0; /* clear max count and enable */ | 345 | /* clear max count and enable */ |
346 | low = 0; | ||
347 | high = 0; | 347 | high = 0; |
348 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); | 348 | wrmsr(MSR_AMD64_IBSFETCHCTL, low, high); |
349 | } | 349 | } |
350 | 350 | ||
351 | if (ibs_allowed && ibs_config.op_enabled) { | 351 | if (ibs_allowed && ibs_config.op_enabled) { |
352 | low = 0; /* clear max count and enable */ | 352 | /* clear max count and enable */ |
353 | low = 0; | ||
353 | high = 0; | 354 | high = 0; |
354 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); | 355 | wrmsr(MSR_AMD64_IBSOPCTL, low, high); |
355 | } | 356 | } |
@@ -370,18 +371,7 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) | |||
370 | } | 371 | } |
371 | } | 372 | } |
372 | 373 | ||
373 | #ifndef CONFIG_OPROFILE_IBS | 374 | #ifdef CONFIG_OPROFILE_IBS |
374 | |||
375 | /* no IBS support */ | ||
376 | |||
377 | static int op_amd_init(struct oprofile_operations *ops) | ||
378 | { | ||
379 | return 0; | ||
380 | } | ||
381 | |||
382 | static void op_amd_exit(void) {} | ||
383 | |||
384 | #else | ||
385 | 375 | ||
386 | static u8 ibs_eilvt_off; | 376 | static u8 ibs_eilvt_off; |
387 | 377 | ||
@@ -395,7 +385,7 @@ static inline void apic_clear_ibs_nmi_per_cpu(void *arg) | |||
395 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); | 385 | setup_APIC_eilvt_ibs(0, APIC_EILVT_MSG_FIX, 1); |
396 | } | 386 | } |
397 | 387 | ||
398 | static int pfm_amd64_setup_eilvt(void) | 388 | static int init_ibs_nmi(void) |
399 | { | 389 | { |
400 | #define IBSCTL_LVTOFFSETVAL (1 << 8) | 390 | #define IBSCTL_LVTOFFSETVAL (1 << 8) |
401 | #define IBSCTL 0x1cc | 391 | #define IBSCTL 0x1cc |
@@ -443,18 +433,22 @@ static int pfm_amd64_setup_eilvt(void) | |||
443 | return 0; | 433 | return 0; |
444 | } | 434 | } |
445 | 435 | ||
446 | /* | 436 | /* uninitialize the APIC for the IBS interrupts if needed */ |
447 | * initialize the APIC for the IBS interrupts | 437 | static void clear_ibs_nmi(void) |
448 | * if available (AMD Family10h rev B0 and later) | 438 | { |
449 | */ | 439 | if (ibs_allowed) |
450 | static void setup_ibs(void) | 440 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); |
441 | } | ||
442 | |||
443 | /* initialize the APIC for the IBS interrupts if available */ | ||
444 | static void ibs_init(void) | ||
451 | { | 445 | { |
452 | ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); | 446 | ibs_allowed = boot_cpu_has(X86_FEATURE_IBS); |
453 | 447 | ||
454 | if (!ibs_allowed) | 448 | if (!ibs_allowed) |
455 | return; | 449 | return; |
456 | 450 | ||
457 | if (pfm_amd64_setup_eilvt()) { | 451 | if (init_ibs_nmi()) { |
458 | ibs_allowed = 0; | 452 | ibs_allowed = 0; |
459 | return; | 453 | return; |
460 | } | 454 | } |
@@ -462,14 +456,12 @@ static void setup_ibs(void) | |||
462 | printk(KERN_INFO "oprofile: AMD IBS detected\n"); | 456 | printk(KERN_INFO "oprofile: AMD IBS detected\n"); |
463 | } | 457 | } |
464 | 458 | ||
465 | 459 | static void ibs_exit(void) | |
466 | /* | ||
467 | * unitialize the APIC for the IBS interrupts if needed on AMD Family10h | ||
468 | * rev B0 and later */ | ||
469 | static void clear_ibs_nmi(void) | ||
470 | { | 460 | { |
471 | if (ibs_allowed) | 461 | if (!ibs_allowed) |
472 | on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); | 462 | return; |
463 | |||
464 | clear_ibs_nmi(); | ||
473 | } | 465 | } |
474 | 466 | ||
475 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); | 467 | static int (*create_arch_files)(struct super_block *sb, struct dentry *root); |
@@ -519,7 +511,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) | |||
519 | 511 | ||
520 | static int op_amd_init(struct oprofile_operations *ops) | 512 | static int op_amd_init(struct oprofile_operations *ops) |
521 | { | 513 | { |
522 | setup_ibs(); | 514 | ibs_init(); |
523 | create_arch_files = ops->create_files; | 515 | create_arch_files = ops->create_files; |
524 | ops->create_files = setup_ibs_files; | 516 | ops->create_files = setup_ibs_files; |
525 | return 0; | 517 | return 0; |
@@ -527,10 +519,21 @@ static int op_amd_init(struct oprofile_operations *ops) | |||
527 | 519 | ||
528 | static void op_amd_exit(void) | 520 | static void op_amd_exit(void) |
529 | { | 521 | { |
530 | clear_ibs_nmi(); | 522 | ibs_exit(); |
531 | } | 523 | } |
532 | 524 | ||
533 | #endif | 525 | #else |
526 | |||
527 | /* no IBS support */ | ||
528 | |||
529 | static int op_amd_init(struct oprofile_operations *ops) | ||
530 | { | ||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | static void op_amd_exit(void) {} | ||
535 | |||
536 | #endif /* CONFIG_OPROFILE_IBS */ | ||
534 | 537 | ||
535 | struct op_x86_model_spec const op_amd_spec = { | 538 | struct op_x86_model_spec const op_amd_spec = { |
536 | .init = op_amd_init, | 539 | .init = op_amd_init, |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 1d88d2b39771..c0ecf250fe51 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -4,7 +4,7 @@ | |||
4 | #include <linux/irq.h> | 4 | #include <linux/irq.h> |
5 | #include <linux/dmi.h> | 5 | #include <linux/dmi.h> |
6 | #include <asm/numa.h> | 6 | #include <asm/numa.h> |
7 | #include "pci.h" | 7 | #include <asm/pci_x86.h> |
8 | 8 | ||
9 | struct pci_root_info { | 9 | struct pci_root_info { |
10 | char *name; | 10 | char *name; |
@@ -210,11 +210,10 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do | |||
210 | if (bus && node != -1) { | 210 | if (bus && node != -1) { |
211 | #ifdef CONFIG_ACPI_NUMA | 211 | #ifdef CONFIG_ACPI_NUMA |
212 | if (pxm >= 0) | 212 | if (pxm >= 0) |
213 | printk(KERN_DEBUG "bus %02x -> pxm %d -> node %d\n", | 213 | dev_printk(KERN_DEBUG, &bus->dev, |
214 | busnum, pxm, node); | 214 | "on NUMA node %d (pxm %d)\n", node, pxm); |
215 | #else | 215 | #else |
216 | printk(KERN_DEBUG "bus %02x -> node %d\n", | 216 | dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); |
217 | busnum, node); | ||
218 | #endif | 217 | #endif |
219 | } | 218 | } |
220 | 219 | ||
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 22e057665e55..9bb09823b362 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -2,7 +2,7 @@ | |||
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/topology.h> | 3 | #include <linux/topology.h> |
4 | #include <linux/cpu.h> | 4 | #include <linux/cpu.h> |
5 | #include "pci.h" | 5 | #include <asm/pci_x86.h> |
6 | 6 | ||
7 | #ifdef CONFIG_X86_64 | 7 | #ifdef CONFIG_X86_64 |
8 | #include <asm/pci-direct.h> | 8 | #include <asm/pci-direct.h> |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index b67732bbb85a..82d22fc601ae 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -14,8 +14,7 @@ | |||
14 | #include <asm/segment.h> | 14 | #include <asm/segment.h> |
15 | #include <asm/io.h> | 15 | #include <asm/io.h> |
16 | #include <asm/smp.h> | 16 | #include <asm/smp.h> |
17 | 17 | #include <asm/pci_x86.h> | |
18 | #include "pci.h" | ||
19 | 18 | ||
20 | unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | | 19 | unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | |
21 | PCI_PROBE_MMCONF; | 20 | PCI_PROBE_MMCONF; |
@@ -23,6 +22,12 @@ unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 | | |||
23 | unsigned int pci_early_dump_regs; | 22 | unsigned int pci_early_dump_regs; |
24 | static int pci_bf_sort; | 23 | static int pci_bf_sort; |
25 | int pci_routeirq; | 24 | int pci_routeirq; |
25 | int noioapicquirk; | ||
26 | #ifdef CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS | ||
27 | int noioapicreroute = 0; | ||
28 | #else | ||
29 | int noioapicreroute = 1; | ||
30 | #endif | ||
26 | int pcibios_last_bus = -1; | 31 | int pcibios_last_bus = -1; |
27 | unsigned long pirq_table_addr; | 32 | unsigned long pirq_table_addr; |
28 | struct pci_bus *pci_root_bus; | 33 | struct pci_bus *pci_root_bus; |
@@ -519,6 +524,17 @@ char * __devinit pcibios_setup(char *str) | |||
519 | } else if (!strcmp(str, "skip_isa_align")) { | 524 | } else if (!strcmp(str, "skip_isa_align")) { |
520 | pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; | 525 | pci_probe |= PCI_CAN_SKIP_ISA_ALIGN; |
521 | return NULL; | 526 | return NULL; |
527 | } else if (!strcmp(str, "noioapicquirk")) { | ||
528 | noioapicquirk = 1; | ||
529 | return NULL; | ||
530 | } else if (!strcmp(str, "ioapicreroute")) { | ||
531 | if (noioapicreroute != -1) | ||
532 | noioapicreroute = 0; | ||
533 | return NULL; | ||
534 | } else if (!strcmp(str, "noioapicreroute")) { | ||
535 | if (noioapicreroute != -1) | ||
536 | noioapicreroute = 1; | ||
537 | return NULL; | ||
522 | } | 538 | } |
523 | return str; | 539 | return str; |
524 | } | 540 | } |
@@ -535,17 +551,25 @@ int pcibios_enable_device(struct pci_dev *dev, int mask) | |||
535 | if ((err = pci_enable_resources(dev, mask)) < 0) | 551 | if ((err = pci_enable_resources(dev, mask)) < 0) |
536 | return err; | 552 | return err; |
537 | 553 | ||
538 | if (!dev->msi_enabled) | 554 | if (!pci_dev_msi_enabled(dev)) |
539 | return pcibios_enable_irq(dev); | 555 | return pcibios_enable_irq(dev); |
540 | return 0; | 556 | return 0; |
541 | } | 557 | } |
542 | 558 | ||
543 | void pcibios_disable_device (struct pci_dev *dev) | 559 | void pcibios_disable_device (struct pci_dev *dev) |
544 | { | 560 | { |
545 | if (!dev->msi_enabled && pcibios_disable_irq) | 561 | if (!pci_dev_msi_enabled(dev) && pcibios_disable_irq) |
546 | pcibios_disable_irq(dev); | 562 | pcibios_disable_irq(dev); |
547 | } | 563 | } |
548 | 564 | ||
565 | int pci_ext_cfg_avail(struct pci_dev *dev) | ||
566 | { | ||
567 | if (raw_pci_ext_ops) | ||
568 | return 1; | ||
569 | else | ||
570 | return 0; | ||
571 | } | ||
572 | |||
549 | struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) | 573 | struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) |
550 | { | 574 | { |
551 | struct pci_bus *bus = NULL; | 575 | struct pci_bus *bus = NULL; |
diff --git a/arch/x86/pci/direct.c b/arch/x86/pci/direct.c index 9915293500fb..bd13c3e4c6db 100644 --- a/arch/x86/pci/direct.c +++ b/arch/x86/pci/direct.c | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/dmi.h> | 7 | #include <linux/dmi.h> |
8 | #include "pci.h" | 8 | #include <asm/pci_x86.h> |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * Functions for accessing PCI base (first 256 bytes) and extended | 11 | * Functions for accessing PCI base (first 256 bytes) and extended |
@@ -173,7 +173,7 @@ static int pci_conf2_write(unsigned int seg, unsigned int bus, | |||
173 | 173 | ||
174 | #undef PCI_CONF2_ADDRESS | 174 | #undef PCI_CONF2_ADDRESS |
175 | 175 | ||
176 | static struct pci_raw_ops pci_direct_conf2 = { | 176 | struct pci_raw_ops pci_direct_conf2 = { |
177 | .read = pci_conf2_read, | 177 | .read = pci_conf2_read, |
178 | .write = pci_conf2_write, | 178 | .write = pci_conf2_write, |
179 | }; | 179 | }; |
@@ -289,6 +289,7 @@ int __init pci_direct_probe(void) | |||
289 | 289 | ||
290 | if (pci_check_type1()) { | 290 | if (pci_check_type1()) { |
291 | raw_pci_ops = &pci_direct_conf1; | 291 | raw_pci_ops = &pci_direct_conf1; |
292 | port_cf9_safe = true; | ||
292 | return 1; | 293 | return 1; |
293 | } | 294 | } |
294 | release_resource(region); | 295 | release_resource(region); |
@@ -305,6 +306,7 @@ int __init pci_direct_probe(void) | |||
305 | 306 | ||
306 | if (pci_check_type2()) { | 307 | if (pci_check_type2()) { |
307 | raw_pci_ops = &pci_direct_conf2; | 308 | raw_pci_ops = &pci_direct_conf2; |
309 | port_cf9_safe = true; | ||
308 | return 2; | 310 | return 2; |
309 | } | 311 | } |
310 | 312 | ||
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 86631ccbc25a..f6adf2c6d751 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c | |||
@@ -2,7 +2,7 @@ | |||
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <asm/pci-direct.h> | 3 | #include <asm/pci-direct.h> |
4 | #include <asm/io.h> | 4 | #include <asm/io.h> |
5 | #include "pci.h" | 5 | #include <asm/pci_x86.h> |
6 | 6 | ||
7 | /* Direct PCI access. This is used for PCI accesses in early boot before | 7 | /* Direct PCI access. This is used for PCI accesses in early boot before |
8 | the PCI subsystem works. */ | 8 | the PCI subsystem works. */ |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index 2051dc96b8e9..7d388d5cf548 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -6,8 +6,7 @@ | |||
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/pci.h> | 7 | #include <linux/pci.h> |
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include "pci.h" | 9 | #include <asm/pci_x86.h> |
10 | |||
11 | 10 | ||
12 | static void __devinit pci_fixup_i450nx(struct pci_dev *d) | 11 | static void __devinit pci_fixup_i450nx(struct pci_dev *d) |
13 | { | 12 | { |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 844df0cbbd3e..f884740da318 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -34,8 +34,8 @@ | |||
34 | 34 | ||
35 | #include <asm/pat.h> | 35 | #include <asm/pat.h> |
36 | #include <asm/e820.h> | 36 | #include <asm/e820.h> |
37 | #include <asm/pci_x86.h> | ||
37 | 38 | ||
38 | #include "pci.h" | ||
39 | 39 | ||
40 | static int | 40 | static int |
41 | skip_isa_ioresource_align(struct pci_dev *dev) { | 41 | skip_isa_ioresource_align(struct pci_dev *dev) { |
@@ -129,7 +129,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
129 | pr = pci_find_parent_resource(dev, r); | 129 | pr = pci_find_parent_resource(dev, r); |
130 | if (!r->start || !pr || | 130 | if (!r->start || !pr || |
131 | request_resource(pr, r) < 0) { | 131 | request_resource(pr, r) < 0) { |
132 | dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); | 132 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); |
133 | /* | 133 | /* |
134 | * Something is wrong with the region. | 134 | * Something is wrong with the region. |
135 | * Invalidate the resource to prevent | 135 | * Invalidate the resource to prevent |
@@ -170,7 +170,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
170 | r->flags, disabled, pass); | 170 | r->flags, disabled, pass); |
171 | pr = pci_find_parent_resource(dev, r); | 171 | pr = pci_find_parent_resource(dev, r); |
172 | if (!pr || request_resource(pr, r) < 0) { | 172 | if (!pr || request_resource(pr, r) < 0) { |
173 | dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); | 173 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); |
174 | /* We'll assign a new address later */ | 174 | /* We'll assign a new address later */ |
175 | r->end -= r->start; | 175 | r->end -= r->start; |
176 | r->start = 0; | 176 | r->start = 0; |
diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c index d6c950f81858..25a1f8efed4a 100644 --- a/arch/x86/pci/init.c +++ b/arch/x86/pci/init.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include <linux/pci.h> | 1 | #include <linux/pci.h> |
2 | #include <linux/init.h> | 2 | #include <linux/init.h> |
3 | #include "pci.h" | 3 | #include <asm/pci_x86.h> |
4 | 4 | ||
5 | /* arch_initcall has too random ordering, so call the initializers | 5 | /* arch_initcall has too random ordering, so call the initializers |
6 | in the right sequence from here. */ | 6 | in the right sequence from here. */ |
@@ -12,7 +12,8 @@ static __init int pci_arch_init(void) | |||
12 | type = pci_direct_probe(); | 12 | type = pci_direct_probe(); |
13 | #endif | 13 | #endif |
14 | 14 | ||
15 | pci_mmcfg_early_init(); | 15 | if (!(pci_probe & PCI_PROBE_NOEARLY)) |
16 | pci_mmcfg_early_init(); | ||
16 | 17 | ||
17 | #ifdef CONFIG_PCI_OLPC | 18 | #ifdef CONFIG_PCI_OLPC |
18 | if (!pci_olpc_init()) | 19 | if (!pci_olpc_init()) |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index bf69dbe08bff..4064345cf144 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -16,8 +16,7 @@ | |||
16 | #include <asm/io_apic.h> | 16 | #include <asm/io_apic.h> |
17 | #include <linux/irq.h> | 17 | #include <linux/irq.h> |
18 | #include <linux/acpi.h> | 18 | #include <linux/acpi.h> |
19 | 19 | #include <asm/pci_x86.h> | |
20 | #include "pci.h" | ||
21 | 20 | ||
22 | #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) | 21 | #define PIRQ_SIGNATURE (('$' << 0) + ('P' << 8) + ('I' << 16) + ('R' << 24)) |
23 | #define PIRQ_VERSION 0x0100 | 22 | #define PIRQ_VERSION 0x0100 |
@@ -534,7 +533,7 @@ static int pirq_bios_set(struct pci_dev *router, struct pci_dev *dev, int pirq, | |||
534 | { | 533 | { |
535 | struct pci_dev *bridge; | 534 | struct pci_dev *bridge; |
536 | int pin = pci_get_interrupt_pin(dev, &bridge); | 535 | int pin = pci_get_interrupt_pin(dev, &bridge); |
537 | return pcibios_set_irq_routing(bridge, pin, irq); | 536 | return pcibios_set_irq_routing(bridge, pin - 1, irq); |
538 | } | 537 | } |
539 | 538 | ||
540 | #endif | 539 | #endif |
@@ -888,7 +887,6 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
888 | dev_dbg(&dev->dev, "no interrupt pin\n"); | 887 | dev_dbg(&dev->dev, "no interrupt pin\n"); |
889 | return 0; | 888 | return 0; |
890 | } | 889 | } |
891 | pin = pin - 1; | ||
892 | 890 | ||
893 | /* Find IRQ routing entry */ | 891 | /* Find IRQ routing entry */ |
894 | 892 | ||
@@ -898,17 +896,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
898 | info = pirq_get_info(dev); | 896 | info = pirq_get_info(dev); |
899 | if (!info) { | 897 | if (!info) { |
900 | dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", | 898 | dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", |
901 | 'A' + pin); | 899 | 'A' + pin - 1); |
902 | return 0; | 900 | return 0; |
903 | } | 901 | } |
904 | pirq = info->irq[pin].link; | 902 | pirq = info->irq[pin - 1].link; |
905 | mask = info->irq[pin].bitmap; | 903 | mask = info->irq[pin - 1].bitmap; |
906 | if (!pirq) { | 904 | if (!pirq) { |
907 | dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); | 905 | dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin - 1); |
908 | return 0; | 906 | return 0; |
909 | } | 907 | } |
910 | dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", | 908 | dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", |
911 | 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); | 909 | 'A' + pin - 1, pirq, mask, pirq_table->exclusive_irqs); |
912 | mask &= pcibios_irq_mask; | 910 | mask &= pcibios_irq_mask; |
913 | 911 | ||
914 | /* Work around broken HP Pavilion Notebooks which assign USB to | 912 | /* Work around broken HP Pavilion Notebooks which assign USB to |
@@ -950,7 +948,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
950 | newirq = i; | 948 | newirq = i; |
951 | } | 949 | } |
952 | } | 950 | } |
953 | dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); | 951 | dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin - 1, newirq); |
954 | 952 | ||
955 | /* Check if it is hardcoded */ | 953 | /* Check if it is hardcoded */ |
956 | if ((pirq & 0xf0) == 0xf0) { | 954 | if ((pirq & 0xf0) == 0xf0) { |
@@ -978,18 +976,18 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
978 | return 0; | 976 | return 0; |
979 | } | 977 | } |
980 | } | 978 | } |
981 | dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); | 979 | dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq); |
982 | 980 | ||
983 | /* Update IRQ for all devices with the same pirq value */ | 981 | /* Update IRQ for all devices with the same pirq value */ |
984 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { | 982 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { |
985 | pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); | 983 | pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); |
986 | if (!pin) | 984 | if (!pin) |
987 | continue; | 985 | continue; |
988 | pin--; | 986 | |
989 | info = pirq_get_info(dev2); | 987 | info = pirq_get_info(dev2); |
990 | if (!info) | 988 | if (!info) |
991 | continue; | 989 | continue; |
992 | if (info->irq[pin].link == pirq) { | 990 | if (info->irq[pin - 1].link == pirq) { |
993 | /* | 991 | /* |
994 | * We refuse to override the dev->irq | 992 | * We refuse to override the dev->irq |
995 | * information. Give a warning! | 993 | * information. Give a warning! |
@@ -1043,6 +1041,9 @@ static void __init pcibios_fixup_irqs(void) | |||
1043 | dev = NULL; | 1041 | dev = NULL; |
1044 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1042 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
1045 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | 1043 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); |
1044 | if (!pin) | ||
1045 | continue; | ||
1046 | |||
1046 | #ifdef CONFIG_X86_IO_APIC | 1047 | #ifdef CONFIG_X86_IO_APIC |
1047 | /* | 1048 | /* |
1048 | * Recalculate IRQ numbers if we use the I/O APIC. | 1049 | * Recalculate IRQ numbers if we use the I/O APIC. |
@@ -1050,15 +1051,11 @@ static void __init pcibios_fixup_irqs(void) | |||
1050 | if (io_apic_assign_pci_irqs) { | 1051 | if (io_apic_assign_pci_irqs) { |
1051 | int irq; | 1052 | int irq; |
1052 | 1053 | ||
1053 | if (!pin) | ||
1054 | continue; | ||
1055 | |||
1056 | /* | 1054 | /* |
1057 | * interrupt pins are numbered starting from 1 | 1055 | * interrupt pins are numbered starting from 1 |
1058 | */ | 1056 | */ |
1059 | pin--; | ||
1060 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, | 1057 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, |
1061 | PCI_SLOT(dev->devfn), pin); | 1058 | PCI_SLOT(dev->devfn), pin - 1); |
1062 | /* | 1059 | /* |
1063 | * Busses behind bridges are typically not listed in the | 1060 | * Busses behind bridges are typically not listed in the |
1064 | * MP-table. In this case we have to look up the IRQ | 1061 | * MP-table. In this case we have to look up the IRQ |
@@ -1071,22 +1068,22 @@ static void __init pcibios_fixup_irqs(void) | |||
1071 | struct pci_dev *bridge = dev->bus->self; | 1068 | struct pci_dev *bridge = dev->bus->self; |
1072 | int bus; | 1069 | int bus; |
1073 | 1070 | ||
1074 | pin = (pin + PCI_SLOT(dev->devfn)) % 4; | 1071 | pin = pci_swizzle_interrupt_pin(dev, pin); |
1075 | bus = bridge->bus->number; | 1072 | bus = bridge->bus->number; |
1076 | irq = IO_APIC_get_PCI_irq_vector(bus, | 1073 | irq = IO_APIC_get_PCI_irq_vector(bus, |
1077 | PCI_SLOT(bridge->devfn), pin); | 1074 | PCI_SLOT(bridge->devfn), pin - 1); |
1078 | if (irq >= 0) | 1075 | if (irq >= 0) |
1079 | dev_warn(&dev->dev, | 1076 | dev_warn(&dev->dev, |
1080 | "using bridge %s INT %c to " | 1077 | "using bridge %s INT %c to " |
1081 | "get IRQ %d\n", | 1078 | "get IRQ %d\n", |
1082 | pci_name(bridge), | 1079 | pci_name(bridge), |
1083 | 'A' + pin, irq); | 1080 | 'A' + pin - 1, irq); |
1084 | } | 1081 | } |
1085 | if (irq >= 0) { | 1082 | if (irq >= 0) { |
1086 | dev_info(&dev->dev, | 1083 | dev_info(&dev->dev, |
1087 | "PCI->APIC IRQ transform: INT %c " | 1084 | "PCI->APIC IRQ transform: INT %c " |
1088 | "-> IRQ %d\n", | 1085 | "-> IRQ %d\n", |
1089 | 'A' + pin, irq); | 1086 | 'A' + pin - 1, irq); |
1090 | dev->irq = irq; | 1087 | dev->irq = irq; |
1091 | } | 1088 | } |
1092 | } | 1089 | } |
@@ -1094,7 +1091,7 @@ static void __init pcibios_fixup_irqs(void) | |||
1094 | /* | 1091 | /* |
1095 | * Still no IRQ? Try to lookup one... | 1092 | * Still no IRQ? Try to lookup one... |
1096 | */ | 1093 | */ |
1097 | if (pin && !dev->irq) | 1094 | if (!dev->irq) |
1098 | pcibios_lookup_irq(dev, 0); | 1095 | pcibios_lookup_irq(dev, 0); |
1099 | } | 1096 | } |
1100 | } | 1097 | } |
@@ -1221,12 +1218,10 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1221 | if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { | 1218 | if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { |
1222 | char *msg = ""; | 1219 | char *msg = ""; |
1223 | 1220 | ||
1224 | pin--; /* interrupt pins are numbered starting from 1 */ | ||
1225 | |||
1226 | if (io_apic_assign_pci_irqs) { | 1221 | if (io_apic_assign_pci_irqs) { |
1227 | int irq; | 1222 | int irq; |
1228 | 1223 | ||
1229 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin); | 1224 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1); |
1230 | /* | 1225 | /* |
1231 | * Busses behind bridges are typically not listed in the MP-table. | 1226 | * Busses behind bridges are typically not listed in the MP-table. |
1232 | * In this case we have to look up the IRQ based on the parent bus, | 1227 | * In this case we have to look up the IRQ based on the parent bus, |
@@ -1237,20 +1232,20 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1237 | while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ | 1232 | while (irq < 0 && dev->bus->parent) { /* go back to the bridge */ |
1238 | struct pci_dev *bridge = dev->bus->self; | 1233 | struct pci_dev *bridge = dev->bus->self; |
1239 | 1234 | ||
1240 | pin = (pin + PCI_SLOT(dev->devfn)) % 4; | 1235 | pin = pci_swizzle_interrupt_pin(dev, pin); |
1241 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1236 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1242 | PCI_SLOT(bridge->devfn), pin); | 1237 | PCI_SLOT(bridge->devfn), pin - 1); |
1243 | if (irq >= 0) | 1238 | if (irq >= 0) |
1244 | dev_warn(&dev->dev, "using bridge %s " | 1239 | dev_warn(&dev->dev, "using bridge %s " |
1245 | "INT %c to get IRQ %d\n", | 1240 | "INT %c to get IRQ %d\n", |
1246 | pci_name(bridge), 'A' + pin, | 1241 | pci_name(bridge), 'A' + pin - 1, |
1247 | irq); | 1242 | irq); |
1248 | dev = bridge; | 1243 | dev = bridge; |
1249 | } | 1244 | } |
1250 | dev = temp_dev; | 1245 | dev = temp_dev; |
1251 | if (irq >= 0) { | 1246 | if (irq >= 0) { |
1252 | dev_info(&dev->dev, "PCI->APIC IRQ transform: " | 1247 | dev_info(&dev->dev, "PCI->APIC IRQ transform: " |
1253 | "INT %c -> IRQ %d\n", 'A' + pin, irq); | 1248 | "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); |
1254 | dev->irq = irq; | 1249 | dev->irq = irq; |
1255 | return 0; | 1250 | return 0; |
1256 | } else | 1251 | } else |
@@ -1269,7 +1264,7 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1269 | return 0; | 1264 | return 0; |
1270 | 1265 | ||
1271 | dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", | 1266 | dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", |
1272 | 'A' + pin, msg); | 1267 | 'A' + pin - 1, msg); |
1273 | } | 1268 | } |
1274 | return 0; | 1269 | return 0; |
1275 | } | 1270 | } |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index b722dd481b39..f1065b129e9c 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -3,7 +3,7 @@ | |||
3 | */ | 3 | */ |
4 | #include <linux/init.h> | 4 | #include <linux/init.h> |
5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
6 | #include "pci.h" | 6 | #include <asm/pci_x86.h> |
7 | 7 | ||
8 | /* | 8 | /* |
9 | * Discover remaining PCI buses in case there are peer host bridges. | 9 | * Discover remaining PCI buses in case there are peer host bridges. |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 654a2234f8f3..89bf9242c80a 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -15,8 +15,7 @@ | |||
15 | #include <linux/acpi.h> | 15 | #include <linux/acpi.h> |
16 | #include <linux/bitmap.h> | 16 | #include <linux/bitmap.h> |
17 | #include <asm/e820.h> | 17 | #include <asm/e820.h> |
18 | 18 | #include <asm/pci_x86.h> | |
19 | #include "pci.h" | ||
20 | 19 | ||
21 | /* aperture is up to 256MB but BIOS may reserve less */ | 20 | /* aperture is up to 256MB but BIOS may reserve less */ |
22 | #define MMCONFIG_APER_MIN (2 * 1024*1024) | 21 | #define MMCONFIG_APER_MIN (2 * 1024*1024) |
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index f3c761dce695..8b2d561046a3 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
@@ -13,7 +13,7 @@ | |||
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/acpi.h> | 14 | #include <linux/acpi.h> |
15 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
16 | #include "pci.h" | 16 | #include <asm/pci_x86.h> |
17 | 17 | ||
18 | /* Assume systems with more busses have correct MCFG */ | 18 | /* Assume systems with more busses have correct MCFG */ |
19 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) | 19 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) |
diff --git a/arch/x86/pci/mmconfig_64.c b/arch/x86/pci/mmconfig_64.c index a1994163c99d..30007ffc8e11 100644 --- a/arch/x86/pci/mmconfig_64.c +++ b/arch/x86/pci/mmconfig_64.c | |||
@@ -10,8 +10,7 @@ | |||
10 | #include <linux/acpi.h> | 10 | #include <linux/acpi.h> |
11 | #include <linux/bitmap.h> | 11 | #include <linux/bitmap.h> |
12 | #include <asm/e820.h> | 12 | #include <asm/e820.h> |
13 | 13 | #include <asm/pci_x86.h> | |
14 | #include "pci.h" | ||
15 | 14 | ||
16 | /* Static virtual mapping of the MMCONFIG aperture */ | 15 | /* Static virtual mapping of the MMCONFIG aperture */ |
17 | struct mmcfg_virt { | 16 | struct mmcfg_virt { |
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c index 1177845d3186..2089354968a2 100644 --- a/arch/x86/pci/numaq_32.c +++ b/arch/x86/pci/numaq_32.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/nodemask.h> | 7 | #include <linux/nodemask.h> |
8 | #include <mach_apic.h> | 8 | #include <mach_apic.h> |
9 | #include <asm/mpspec.h> | 9 | #include <asm/mpspec.h> |
10 | #include "pci.h" | 10 | #include <asm/pci_x86.h> |
11 | 11 | ||
12 | #define XQUAD_PORTIO_BASE 0xfe400000 | 12 | #define XQUAD_PORTIO_BASE 0xfe400000 |
13 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ | 13 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ |
diff --git a/arch/x86/pci/olpc.c b/arch/x86/pci/olpc.c index e11e9e803d5f..b889d824f7c6 100644 --- a/arch/x86/pci/olpc.c +++ b/arch/x86/pci/olpc.c | |||
@@ -29,7 +29,7 @@ | |||
29 | #include <linux/init.h> | 29 | #include <linux/init.h> |
30 | #include <asm/olpc.h> | 30 | #include <asm/olpc.h> |
31 | #include <asm/geode.h> | 31 | #include <asm/geode.h> |
32 | #include "pci.h" | 32 | #include <asm/pci_x86.h> |
33 | 33 | ||
34 | /* | 34 | /* |
35 | * In the tables below, the first two line (8 longwords) are the | 35 | * In the tables below, the first two line (8 longwords) are the |
diff --git a/arch/x86/pci/pcbios.c b/arch/x86/pci/pcbios.c index 37472fc6f729..b82cae970dfd 100644 --- a/arch/x86/pci/pcbios.c +++ b/arch/x86/pci/pcbios.c | |||
@@ -6,9 +6,8 @@ | |||
6 | #include <linux/init.h> | 6 | #include <linux/init.h> |
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/uaccess.h> | 8 | #include <linux/uaccess.h> |
9 | #include "pci.h" | 9 | #include <asm/pci_x86.h> |
10 | #include "pci-functions.h" | 10 | #include <asm/mach-default/pci-functions.h> |
11 | |||
12 | 11 | ||
13 | /* BIOS32 signature: "_32_" */ | 12 | /* BIOS32 signature: "_32_" */ |
14 | #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) | 13 | #define BIOS32_SIGNATURE (('_' << 0) + ('3' << 8) + ('2' << 16) + ('_' << 24)) |
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 42f4cb19faca..bcead7a46871 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c | |||
@@ -9,11 +9,10 @@ | |||
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | 10 | ||
11 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
12 | #include <asm/pci_x86.h> | ||
12 | #include <asm/visws/cobalt.h> | 13 | #include <asm/visws/cobalt.h> |
13 | #include <asm/visws/lithium.h> | 14 | #include <asm/visws/lithium.h> |
14 | 15 | ||
15 | #include "pci.h" | ||
16 | |||
17 | static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } | 16 | static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } |
18 | static void pci_visws_disable_irq(struct pci_dev *dev) { } | 17 | static void pci_visws_disable_irq(struct pci_dev *dev) { } |
19 | 18 | ||
@@ -25,24 +24,6 @@ static void pci_visws_disable_irq(struct pci_dev *dev) { } | |||
25 | 24 | ||
26 | unsigned int pci_bus0, pci_bus1; | 25 | unsigned int pci_bus0, pci_bus1; |
27 | 26 | ||
28 | static inline u8 bridge_swizzle(u8 pin, u8 slot) | ||
29 | { | ||
30 | return (((pin - 1) + slot) % 4) + 1; | ||
31 | } | ||
32 | |||
33 | static u8 __init visws_swizzle(struct pci_dev *dev, u8 *pinp) | ||
34 | { | ||
35 | u8 pin = *pinp; | ||
36 | |||
37 | while (dev->bus->self) { /* Move up the chain of bridges. */ | ||
38 | pin = bridge_swizzle(pin, PCI_SLOT(dev->devfn)); | ||
39 | dev = dev->bus->self; | ||
40 | } | ||
41 | *pinp = pin; | ||
42 | |||
43 | return PCI_SLOT(dev->devfn); | ||
44 | } | ||
45 | |||
46 | static int __init visws_map_irq(struct pci_dev *dev, u8 slot, u8 pin) | 27 | static int __init visws_map_irq(struct pci_dev *dev, u8 slot, u8 pin) |
47 | { | 28 | { |
48 | int irq, bus = dev->bus->number; | 29 | int irq, bus = dev->bus->number; |
@@ -107,7 +88,7 @@ int __init pci_visws_init(void) | |||
107 | raw_pci_ops = &pci_direct_conf1; | 88 | raw_pci_ops = &pci_direct_conf1; |
108 | pci_scan_bus_with_sysdata(pci_bus0); | 89 | pci_scan_bus_with_sysdata(pci_bus0); |
109 | pci_scan_bus_with_sysdata(pci_bus1); | 90 | pci_scan_bus_with_sysdata(pci_bus1); |
110 | pci_fixup_irqs(visws_swizzle, visws_map_irq); | 91 | pci_fixup_irqs(pci_common_swizzle, visws_map_irq); |
111 | pcibios_resource_survey(); | 92 | pcibios_resource_survey(); |
112 | return 0; | 93 | return 0; |
113 | } | 94 | } |
diff --git a/arch/x86/scripts/strip-symbols b/arch/x86/scripts/strip-symbols new file mode 100644 index 000000000000..a2f1ccb827c7 --- /dev/null +++ b/arch/x86/scripts/strip-symbols | |||
@@ -0,0 +1 @@ | |||
__cpu_vendor_dev_X86_VENDOR_* | |||
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index 1ef0f90813d6..d9d35824c56f 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -9,6 +9,9 @@ | |||
9 | * Also alternative() doesn't work. | 9 | * Also alternative() doesn't work. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | /* Disable profiling for userspace code: */ | ||
13 | #define DISABLE_BRANCH_PROFILING | ||
14 | |||
12 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
13 | #include <linux/posix-timers.h> | 16 | #include <linux/posix-timers.h> |
14 | #include <linux/time.h> | 17 | #include <linux/time.h> |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 513f330c5832..1241f118ab56 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -310,7 +310,7 @@ int __init sysenter_setup(void) | |||
310 | } | 310 | } |
311 | 311 | ||
312 | /* Setup a VMA at program startup for the vsyscall page */ | 312 | /* Setup a VMA at program startup for the vsyscall page */ |
313 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | 313 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
314 | { | 314 | { |
315 | struct mm_struct *mm = current->mm; | 315 | struct mm_struct *mm = current->mm; |
316 | unsigned long addr; | 316 | unsigned long addr; |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 257ba4a10abf..9c98cc6ba978 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -98,7 +98,7 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) | |||
98 | 98 | ||
99 | /* Setup a VMA at program startup for the vsyscall page. | 99 | /* Setup a VMA at program startup for the vsyscall page. |
100 | Not called for compat tasks */ | 100 | Not called for compat tasks */ |
101 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | 101 | int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) |
102 | { | 102 | { |
103 | struct mm_struct *mm = current->mm; | 103 | struct mm_struct *mm = current->mm; |
104 | unsigned long addr; | 104 | unsigned long addr; |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 5e4686d70f62..bea215230b20 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/console.h> | 28 | #include <linux/console.h> |
29 | 29 | ||
30 | #include <xen/interface/xen.h> | 30 | #include <xen/interface/xen.h> |
31 | #include <xen/interface/version.h> | ||
31 | #include <xen/interface/physdev.h> | 32 | #include <xen/interface/physdev.h> |
32 | #include <xen/interface/vcpu.h> | 33 | #include <xen/interface/vcpu.h> |
33 | #include <xen/features.h> | 34 | #include <xen/features.h> |
@@ -793,7 +794,7 @@ static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | |||
793 | 794 | ||
794 | ret = 0; | 795 | ret = 0; |
795 | 796 | ||
796 | switch(msr) { | 797 | switch (msr) { |
797 | #ifdef CONFIG_X86_64 | 798 | #ifdef CONFIG_X86_64 |
798 | unsigned which; | 799 | unsigned which; |
799 | u64 base; | 800 | u64 base; |
@@ -1453,7 +1454,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1453 | 1454 | ||
1454 | ident_pte = 0; | 1455 | ident_pte = 0; |
1455 | pfn = 0; | 1456 | pfn = 0; |
1456 | for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { | 1457 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { |
1457 | pte_t *pte_page; | 1458 | pte_t *pte_page; |
1458 | 1459 | ||
1459 | /* Reuse or allocate a page of ptes */ | 1460 | /* Reuse or allocate a page of ptes */ |
@@ -1471,7 +1472,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1471 | } | 1472 | } |
1472 | 1473 | ||
1473 | /* Install mappings */ | 1474 | /* Install mappings */ |
1474 | for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | 1475 | for (pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { |
1475 | pte_t pte; | 1476 | pte_t pte; |
1476 | 1477 | ||
1477 | if (pfn > max_pfn_mapped) | 1478 | if (pfn > max_pfn_mapped) |
@@ -1485,7 +1486,7 @@ static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | |||
1485 | } | 1486 | } |
1486 | } | 1487 | } |
1487 | 1488 | ||
1488 | for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) | 1489 | for (pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) |
1489 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); | 1490 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); |
1490 | 1491 | ||
1491 | set_page_prot(pmd, PAGE_KERNEL_RO); | 1492 | set_page_prot(pmd, PAGE_KERNEL_RO); |
@@ -1499,7 +1500,7 @@ static void convert_pfn_mfn(void *v) | |||
1499 | 1500 | ||
1500 | /* All levels are converted the same way, so just treat them | 1501 | /* All levels are converted the same way, so just treat them |
1501 | as ptes. */ | 1502 | as ptes. */ |
1502 | for(i = 0; i < PTRS_PER_PTE; i++) | 1503 | for (i = 0; i < PTRS_PER_PTE; i++) |
1503 | pte[i] = xen_make_pte(pte[i].pte); | 1504 | pte[i] = xen_make_pte(pte[i].pte); |
1504 | } | 1505 | } |
1505 | 1506 | ||
@@ -1514,7 +1515,8 @@ static void convert_pfn_mfn(void *v) | |||
1514 | * of the physical mapping once some sort of allocator has been set | 1515 | * of the physical mapping once some sort of allocator has been set |
1515 | * up. | 1516 | * up. |
1516 | */ | 1517 | */ |
1517 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1518 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
1519 | unsigned long max_pfn) | ||
1518 | { | 1520 | { |
1519 | pud_t *l3; | 1521 | pud_t *l3; |
1520 | pmd_t *l2; | 1522 | pmd_t *l2; |
@@ -1577,7 +1579,8 @@ static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pf | |||
1577 | #else /* !CONFIG_X86_64 */ | 1579 | #else /* !CONFIG_X86_64 */ |
1578 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; | 1580 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; |
1579 | 1581 | ||
1580 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1582 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, |
1583 | unsigned long max_pfn) | ||
1581 | { | 1584 | { |
1582 | pmd_t *kernel_pmd; | 1585 | pmd_t *kernel_pmd; |
1583 | 1586 | ||
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 636ef4caa52d..503c240e26c7 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -154,13 +154,13 @@ void xen_setup_mfn_list_list(void) | |||
154 | { | 154 | { |
155 | unsigned pfn, idx; | 155 | unsigned pfn, idx; |
156 | 156 | ||
157 | for(pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { | 157 | for (pfn = 0; pfn < MAX_DOMAIN_PAGES; pfn += P2M_ENTRIES_PER_PAGE) { |
158 | unsigned topidx = p2m_top_index(pfn); | 158 | unsigned topidx = p2m_top_index(pfn); |
159 | 159 | ||
160 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); | 160 | p2m_top_mfn[topidx] = virt_to_mfn(p2m_top[topidx]); |
161 | } | 161 | } |
162 | 162 | ||
163 | for(idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { | 163 | for (idx = 0; idx < ARRAY_SIZE(p2m_top_mfn_list); idx++) { |
164 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; | 164 | unsigned topidx = idx * P2M_ENTRIES_PER_PAGE; |
165 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); | 165 | p2m_top_mfn_list[idx] = virt_to_mfn(&p2m_top_mfn[topidx]); |
166 | } | 166 | } |
@@ -179,7 +179,7 @@ void __init xen_build_dynamic_phys_to_machine(void) | |||
179 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | 179 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); |
180 | unsigned pfn; | 180 | unsigned pfn; |
181 | 181 | ||
182 | for(pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { | 182 | for (pfn = 0; pfn < max_pfn; pfn += P2M_ENTRIES_PER_PAGE) { |
183 | unsigned topidx = p2m_top_index(pfn); | 183 | unsigned topidx = p2m_top_index(pfn); |
184 | 184 | ||
185 | p2m_top[topidx] = &mfn_list[pfn]; | 185 | p2m_top[topidx] = &mfn_list[pfn]; |
@@ -207,7 +207,7 @@ static void alloc_p2m(unsigned long **pp, unsigned long *mfnp) | |||
207 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); | 207 | p = (void *)__get_free_page(GFP_KERNEL | __GFP_NOFAIL); |
208 | BUG_ON(p == NULL); | 208 | BUG_ON(p == NULL); |
209 | 209 | ||
210 | for(i = 0; i < P2M_ENTRIES_PER_PAGE; i++) | 210 | for (i = 0; i < P2M_ENTRIES_PER_PAGE; i++) |
211 | p[i] = INVALID_P2M_ENTRY; | 211 | p[i] = INVALID_P2M_ENTRY; |
212 | 212 | ||
213 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) | 213 | if (cmpxchg(pp, p2m_missing, p) != p2m_missing) |
@@ -407,7 +407,8 @@ out: | |||
407 | preempt_enable(); | 407 | preempt_enable(); |
408 | } | 408 | } |
409 | 409 | ||
410 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 410 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
411 | unsigned long addr, pte_t *ptep) | ||
411 | { | 412 | { |
412 | /* Just return the pte as-is. We preserve the bits on commit */ | 413 | /* Just return the pte as-is. We preserve the bits on commit */ |
413 | return *ptep; | 414 | return *ptep; |
@@ -878,7 +879,8 @@ static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd) | |||
878 | 879 | ||
879 | if (user_pgd) { | 880 | if (user_pgd) { |
880 | xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); | 881 | xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD); |
881 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | 882 | xen_do_pin(MMUEXT_PIN_L4_TABLE, |
883 | PFN_DOWN(__pa(user_pgd))); | ||
882 | } | 884 | } |
883 | } | 885 | } |
884 | #else /* CONFIG_X86_32 */ | 886 | #else /* CONFIG_X86_32 */ |
@@ -993,7 +995,8 @@ static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd) | |||
993 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | 995 | pgd_t *user_pgd = xen_get_user_pgd(pgd); |
994 | 996 | ||
995 | if (user_pgd) { | 997 | if (user_pgd) { |
996 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | 998 | xen_do_pin(MMUEXT_UNPIN_TABLE, |
999 | PFN_DOWN(__pa(user_pgd))); | ||
997 | xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); | 1000 | xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD); |
998 | } | 1001 | } |
999 | } | 1002 | } |
@@ -1079,7 +1082,7 @@ static void drop_other_mm_ref(void *info) | |||
1079 | 1082 | ||
1080 | static void xen_drop_mm_ref(struct mm_struct *mm) | 1083 | static void xen_drop_mm_ref(struct mm_struct *mm) |
1081 | { | 1084 | { |
1082 | cpumask_t mask; | 1085 | cpumask_var_t mask; |
1083 | unsigned cpu; | 1086 | unsigned cpu; |
1084 | 1087 | ||
1085 | if (current->active_mm == mm) { | 1088 | if (current->active_mm == mm) { |
@@ -1091,7 +1094,16 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1091 | } | 1094 | } |
1092 | 1095 | ||
1093 | /* Get the "official" set of cpus referring to our pagetable. */ | 1096 | /* Get the "official" set of cpus referring to our pagetable. */ |
1094 | mask = mm->cpu_vm_mask; | 1097 | if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) { |
1098 | for_each_online_cpu(cpu) { | ||
1099 | if (!cpumask_test_cpu(cpu, &mm->cpu_vm_mask) | ||
1100 | && per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd)) | ||
1101 | continue; | ||
1102 | smp_call_function_single(cpu, drop_other_mm_ref, mm, 1); | ||
1103 | } | ||
1104 | return; | ||
1105 | } | ||
1106 | cpumask_copy(mask, &mm->cpu_vm_mask); | ||
1095 | 1107 | ||
1096 | /* It's possible that a vcpu may have a stale reference to our | 1108 | /* It's possible that a vcpu may have a stale reference to our |
1097 | cr3, because its in lazy mode, and it hasn't yet flushed | 1109 | cr3, because its in lazy mode, and it hasn't yet flushed |
@@ -1100,11 +1112,12 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1100 | if needed. */ | 1112 | if needed. */ |
1101 | for_each_online_cpu(cpu) { | 1113 | for_each_online_cpu(cpu) { |
1102 | if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) | 1114 | if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd)) |
1103 | cpu_set(cpu, mask); | 1115 | cpumask_set_cpu(cpu, mask); |
1104 | } | 1116 | } |
1105 | 1117 | ||
1106 | if (!cpus_empty(mask)) | 1118 | if (!cpumask_empty(mask)) |
1107 | smp_call_function_mask(mask, drop_other_mm_ref, mm, 1); | 1119 | smp_call_function_many(mask, drop_other_mm_ref, mm, 1); |
1120 | free_cpumask_var(mask); | ||
1108 | } | 1121 | } |
1109 | #else | 1122 | #else |
1110 | static void xen_drop_mm_ref(struct mm_struct *mm) | 1123 | static void xen_drop_mm_ref(struct mm_struct *mm) |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 8ea8a0d0b0de..c738644b5435 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -154,7 +154,7 @@ void xen_mc_flush(void) | |||
154 | ret, smp_processor_id()); | 154 | ret, smp_processor_id()); |
155 | dump_stack(); | 155 | dump_stack(); |
156 | for (i = 0; i < b->mcidx; i++) { | 156 | for (i = 0; i < b->mcidx; i++) { |
157 | printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", | 157 | printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\n", |
158 | i+1, b->mcidx, | 158 | i+1, b->mcidx, |
159 | b->debug[i].op, | 159 | b->debug[i].op, |
160 | b->debug[i].args[0], | 160 | b->debug[i].args[0], |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index d67901083888..15c6c68db6a2 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -28,6 +28,9 @@ | |||
28 | /* These are code, but not functions. Defined in entry.S */ | 28 | /* These are code, but not functions. Defined in entry.S */ |
29 | extern const char xen_hypervisor_callback[]; | 29 | extern const char xen_hypervisor_callback[]; |
30 | extern const char xen_failsafe_callback[]; | 30 | extern const char xen_failsafe_callback[]; |
31 | extern void xen_sysenter_target(void); | ||
32 | extern void xen_syscall_target(void); | ||
33 | extern void xen_syscall32_target(void); | ||
31 | 34 | ||
32 | 35 | ||
33 | /** | 36 | /** |
@@ -110,7 +113,6 @@ static __cpuinit int register_callback(unsigned type, const void *func) | |||
110 | 113 | ||
111 | void __cpuinit xen_enable_sysenter(void) | 114 | void __cpuinit xen_enable_sysenter(void) |
112 | { | 115 | { |
113 | extern void xen_sysenter_target(void); | ||
114 | int ret; | 116 | int ret; |
115 | unsigned sysenter_feature; | 117 | unsigned sysenter_feature; |
116 | 118 | ||
@@ -132,8 +134,6 @@ void __cpuinit xen_enable_syscall(void) | |||
132 | { | 134 | { |
133 | #ifdef CONFIG_X86_64 | 135 | #ifdef CONFIG_X86_64 |
134 | int ret; | 136 | int ret; |
135 | extern void xen_syscall_target(void); | ||
136 | extern void xen_syscall32_target(void); | ||
137 | 137 | ||
138 | ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); | 138 | ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); |
139 | if (ret != 0) { | 139 | if (ret != 0) { |
@@ -160,7 +160,8 @@ void __init xen_arch_setup(void) | |||
160 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); | 160 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); |
161 | 161 | ||
162 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 162 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
163 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); | 163 | HYPERVISOR_vm_assist(VMASST_CMD_enable, |
164 | VMASST_TYPE_pae_extended_cr3); | ||
164 | 165 | ||
165 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || | 166 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || |
166 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) | 167 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index acd9b6705e02..c44e2069c7c7 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -33,7 +33,7 @@ | |||
33 | #include "xen-ops.h" | 33 | #include "xen-ops.h" |
34 | #include "mmu.h" | 34 | #include "mmu.h" |
35 | 35 | ||
36 | cpumask_t xen_cpu_initialized_map; | 36 | cpumask_var_t xen_cpu_initialized_map; |
37 | 37 | ||
38 | static DEFINE_PER_CPU(int, resched_irq); | 38 | static DEFINE_PER_CPU(int, resched_irq); |
39 | static DEFINE_PER_CPU(int, callfunc_irq); | 39 | static DEFINE_PER_CPU(int, callfunc_irq); |
@@ -158,7 +158,7 @@ static void __init xen_fill_possible_map(void) | |||
158 | { | 158 | { |
159 | int i, rc; | 159 | int i, rc; |
160 | 160 | ||
161 | for (i = 0; i < NR_CPUS; i++) { | 161 | for (i = 0; i < nr_cpu_ids; i++) { |
162 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | 162 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); |
163 | if (rc >= 0) { | 163 | if (rc >= 0) { |
164 | num_processors++; | 164 | num_processors++; |
@@ -192,11 +192,14 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) | |||
192 | if (xen_smp_intr_init(0)) | 192 | if (xen_smp_intr_init(0)) |
193 | BUG(); | 193 | BUG(); |
194 | 194 | ||
195 | xen_cpu_initialized_map = cpumask_of_cpu(0); | 195 | if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL)) |
196 | panic("could not allocate xen_cpu_initialized_map\n"); | ||
197 | |||
198 | cpumask_copy(xen_cpu_initialized_map, cpumask_of(0)); | ||
196 | 199 | ||
197 | /* Restrict the possible_map according to max_cpus. */ | 200 | /* Restrict the possible_map according to max_cpus. */ |
198 | while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { | 201 | while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { |
199 | for (cpu = NR_CPUS - 1; !cpu_possible(cpu); cpu--) | 202 | for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--) |
200 | continue; | 203 | continue; |
201 | cpu_clear(cpu, cpu_possible_map); | 204 | cpu_clear(cpu, cpu_possible_map); |
202 | } | 205 | } |
@@ -221,7 +224,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
221 | struct vcpu_guest_context *ctxt; | 224 | struct vcpu_guest_context *ctxt; |
222 | struct desc_struct *gdt; | 225 | struct desc_struct *gdt; |
223 | 226 | ||
224 | if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) | 227 | if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map)) |
225 | return 0; | 228 | return 0; |
226 | 229 | ||
227 | ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); | 230 | ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); |
@@ -408,24 +411,23 @@ static void xen_smp_send_reschedule(int cpu) | |||
408 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | 411 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); |
409 | } | 412 | } |
410 | 413 | ||
411 | static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | 414 | static void xen_send_IPI_mask(const struct cpumask *mask, |
415 | enum ipi_vector vector) | ||
412 | { | 416 | { |
413 | unsigned cpu; | 417 | unsigned cpu; |
414 | 418 | ||
415 | cpus_and(mask, mask, cpu_online_map); | 419 | for_each_cpu_and(cpu, mask, cpu_online_mask) |
416 | |||
417 | for_each_cpu_mask_nr(cpu, mask) | ||
418 | xen_send_IPI_one(cpu, vector); | 420 | xen_send_IPI_one(cpu, vector); |
419 | } | 421 | } |
420 | 422 | ||
421 | static void xen_smp_send_call_function_ipi(cpumask_t mask) | 423 | static void xen_smp_send_call_function_ipi(const struct cpumask *mask) |
422 | { | 424 | { |
423 | int cpu; | 425 | int cpu; |
424 | 426 | ||
425 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | 427 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); |
426 | 428 | ||
427 | /* Make sure other vcpus get a chance to run if they need to. */ | 429 | /* Make sure other vcpus get a chance to run if they need to. */ |
428 | for_each_cpu_mask_nr(cpu, mask) { | 430 | for_each_cpu(cpu, mask) { |
429 | if (xen_vcpu_stolen(cpu)) { | 431 | if (xen_vcpu_stolen(cpu)) { |
430 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | 432 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); |
431 | break; | 433 | break; |
@@ -435,7 +437,8 @@ static void xen_smp_send_call_function_ipi(cpumask_t mask) | |||
435 | 437 | ||
436 | static void xen_smp_send_call_function_single_ipi(int cpu) | 438 | static void xen_smp_send_call_function_single_ipi(int cpu) |
437 | { | 439 | { |
438 | xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); | 440 | xen_send_IPI_mask(cpumask_of(cpu), |
441 | XEN_CALL_FUNCTION_SINGLE_VECTOR); | ||
439 | } | 442 | } |
440 | 443 | ||
441 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | 444 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) |
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 2a234db5949b..212ffe012b76 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -35,7 +35,8 @@ void xen_post_suspend(int suspend_cancelled) | |||
35 | pfn_to_mfn(xen_start_info->console.domU.mfn); | 35 | pfn_to_mfn(xen_start_info->console.domU.mfn); |
36 | } else { | 36 | } else { |
37 | #ifdef CONFIG_SMP | 37 | #ifdef CONFIG_SMP |
38 | xen_cpu_initialized_map = cpu_online_map; | 38 | BUG_ON(xen_cpu_initialized_map == NULL); |
39 | cpumask_copy(xen_cpu_initialized_map, cpu_online_mask); | ||
39 | #endif | 40 | #endif |
40 | xen_vcpu_restore(); | 41 | xen_vcpu_restore(); |
41 | } | 42 | } |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index c9f7cda48ed7..14f240623497 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -132,8 +132,7 @@ static void do_stolen_accounting(void) | |||
132 | *snap = state; | 132 | *snap = state; |
133 | 133 | ||
134 | /* Add the appropriate number of ticks of stolen time, | 134 | /* Add the appropriate number of ticks of stolen time, |
135 | including any left-overs from last time. Passing NULL to | 135 | including any left-overs from last time. */ |
136 | account_steal_time accounts the time as stolen. */ | ||
137 | stolen = runnable + offline + __get_cpu_var(residual_stolen); | 136 | stolen = runnable + offline + __get_cpu_var(residual_stolen); |
138 | 137 | ||
139 | if (stolen < 0) | 138 | if (stolen < 0) |
@@ -141,11 +140,10 @@ static void do_stolen_accounting(void) | |||
141 | 140 | ||
142 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); | 141 | ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen); |
143 | __get_cpu_var(residual_stolen) = stolen; | 142 | __get_cpu_var(residual_stolen) = stolen; |
144 | account_steal_time(NULL, ticks); | 143 | account_steal_ticks(ticks); |
145 | 144 | ||
146 | /* Add the appropriate number of ticks of blocked time, | 145 | /* Add the appropriate number of ticks of blocked time, |
147 | including any left-overs from last time. Passing idle to | 146 | including any left-overs from last time. */ |
148 | account_steal_time accounts the time as idle/wait. */ | ||
149 | blocked += __get_cpu_var(residual_blocked); | 147 | blocked += __get_cpu_var(residual_blocked); |
150 | 148 | ||
151 | if (blocked < 0) | 149 | if (blocked < 0) |
@@ -153,7 +151,7 @@ static void do_stolen_accounting(void) | |||
153 | 151 | ||
154 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); | 152 | ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked); |
155 | __get_cpu_var(residual_blocked) = blocked; | 153 | __get_cpu_var(residual_blocked) = blocked; |
156 | account_steal_time(idle_task(smp_processor_id()), ticks); | 154 | account_idle_ticks(ticks); |
157 | } | 155 | } |
158 | 156 | ||
159 | /* | 157 | /* |
@@ -437,7 +435,7 @@ void xen_setup_timer(int cpu) | |||
437 | evt = &per_cpu(xen_clock_events, cpu); | 435 | evt = &per_cpu(xen_clock_events, cpu); |
438 | memcpy(evt, xen_clockevent, sizeof(*evt)); | 436 | memcpy(evt, xen_clockevent, sizeof(*evt)); |
439 | 437 | ||
440 | evt->cpumask = cpumask_of_cpu(cpu); | 438 | evt->cpumask = cpumask_of(cpu); |
441 | evt->irq = irq; | 439 | evt->irq = irq; |
442 | 440 | ||
443 | setup_runstate_info(cpu); | 441 | setup_runstate_info(cpu); |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 9e1afae8461f..c1f8faf0a2c5 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -58,7 +58,7 @@ void __init xen_init_spinlocks(void); | |||
58 | __cpuinit void xen_init_lock_cpu(int cpu); | 58 | __cpuinit void xen_init_lock_cpu(int cpu); |
59 | void xen_uninit_lock_cpu(int cpu); | 59 | void xen_uninit_lock_cpu(int cpu); |
60 | 60 | ||
61 | extern cpumask_t xen_cpu_initialized_map; | 61 | extern cpumask_var_t xen_cpu_initialized_map; |
62 | #else | 62 | #else |
63 | static inline void xen_smp_init(void) {} | 63 | static inline void xen_smp_init(void) {} |
64 | #endif | 64 | #endif |