diff options
Diffstat (limited to 'arch/x86')
309 files changed, 15340 insertions, 8624 deletions
diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild new file mode 100644 index 000000000000..ad8ec356fb36 --- /dev/null +++ b/arch/x86/Kbuild | |||
@@ -0,0 +1,16 @@ | |||
1 | |||
2 | obj-$(CONFIG_KVM) += kvm/ | ||
3 | |||
4 | # Xen paravirtualization support | ||
5 | obj-$(CONFIG_XEN) += xen/ | ||
6 | |||
7 | # lguest paravirtualization support | ||
8 | obj-$(CONFIG_LGUEST_GUEST) += lguest/ | ||
9 | |||
10 | obj-y += kernel/ | ||
11 | obj-y += mm/ | ||
12 | |||
13 | obj-y += crypto/ | ||
14 | obj-y += vdso/ | ||
15 | obj-$(CONFIG_IA32_EMULATION) += ia32/ | ||
16 | |||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index a6efe0a2e9ae..d1430ef6b4f9 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -28,11 +28,13 @@ config X86 | |||
28 | select HAVE_KPROBES | 28 | select HAVE_KPROBES |
29 | select ARCH_WANT_OPTIONAL_GPIOLIB | 29 | select ARCH_WANT_OPTIONAL_GPIOLIB |
30 | select ARCH_WANT_FRAME_POINTERS | 30 | select ARCH_WANT_FRAME_POINTERS |
31 | select HAVE_DMA_ATTRS | ||
31 | select HAVE_KRETPROBES | 32 | select HAVE_KRETPROBES |
32 | select HAVE_FTRACE_MCOUNT_RECORD | 33 | select HAVE_FTRACE_MCOUNT_RECORD |
33 | select HAVE_DYNAMIC_FTRACE | 34 | select HAVE_DYNAMIC_FTRACE |
34 | select HAVE_FUNCTION_TRACER | 35 | select HAVE_FUNCTION_TRACER |
35 | select HAVE_FUNCTION_GRAPH_TRACER | 36 | select HAVE_FUNCTION_GRAPH_TRACER |
37 | select HAVE_FUNCTION_GRAPH_FP_TEST | ||
36 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST | 38 | select HAVE_FUNCTION_TRACE_MCOUNT_TEST |
37 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE | 39 | select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE |
38 | select HAVE_FTRACE_SYSCALLS | 40 | select HAVE_FTRACE_SYSCALLS |
@@ -46,6 +48,12 @@ config X86 | |||
46 | select HAVE_KERNEL_GZIP | 48 | select HAVE_KERNEL_GZIP |
47 | select HAVE_KERNEL_BZIP2 | 49 | select HAVE_KERNEL_BZIP2 |
48 | select HAVE_KERNEL_LZMA | 50 | select HAVE_KERNEL_LZMA |
51 | select HAVE_ARCH_KMEMCHECK | ||
52 | |||
53 | config OUTPUT_FORMAT | ||
54 | string | ||
55 | default "elf32-i386" if X86_32 | ||
56 | default "elf64-x86-64" if X86_64 | ||
49 | 57 | ||
50 | config ARCH_DEFCONFIG | 58 | config ARCH_DEFCONFIG |
51 | string | 59 | string |
@@ -274,15 +282,9 @@ config SPARSE_IRQ | |||
274 | 282 | ||
275 | If you don't know what to do here, say N. | 283 | If you don't know what to do here, say N. |
276 | 284 | ||
277 | config NUMA_MIGRATE_IRQ_DESC | 285 | config NUMA_IRQ_DESC |
278 | bool "Move irq desc when changing irq smp_affinity" | 286 | def_bool y |
279 | depends on SPARSE_IRQ && NUMA | 287 | depends on SPARSE_IRQ && NUMA |
280 | depends on BROKEN | ||
281 | default n | ||
282 | ---help--- | ||
283 | This enables moving irq_desc to cpu/node that irq will use handled. | ||
284 | |||
285 | If you don't know what to do here, say N. | ||
286 | 288 | ||
287 | config X86_MPPARSE | 289 | config X86_MPPARSE |
288 | bool "Enable MPS table" if ACPI | 290 | bool "Enable MPS table" if ACPI |
@@ -355,7 +357,7 @@ config X86_UV | |||
355 | depends on X86_64 | 357 | depends on X86_64 |
356 | depends on X86_EXTENDED_PLATFORM | 358 | depends on X86_EXTENDED_PLATFORM |
357 | depends on NUMA | 359 | depends on NUMA |
358 | select X86_X2APIC | 360 | depends on X86_X2APIC |
359 | ---help--- | 361 | ---help--- |
360 | This option is needed in order to support SGI Ultraviolet systems. | 362 | This option is needed in order to support SGI Ultraviolet systems. |
361 | If you don't have one of these, you should say N here. | 363 | If you don't have one of these, you should say N here. |
@@ -740,6 +742,7 @@ config X86_UP_IOAPIC | |||
740 | config X86_LOCAL_APIC | 742 | config X86_LOCAL_APIC |
741 | def_bool y | 743 | def_bool y |
742 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC | 744 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC |
745 | select HAVE_PERF_COUNTERS if (!M386 && !M486) | ||
743 | 746 | ||
744 | config X86_IO_APIC | 747 | config X86_IO_APIC |
745 | def_bool y | 748 | def_bool y |
@@ -789,10 +792,26 @@ config X86_MCE | |||
789 | to disable it. MCE support simply ignores non-MCE processors like | 792 | to disable it. MCE support simply ignores non-MCE processors like |
790 | the 386 and 486, so nearly everyone can say Y here. | 793 | the 386 and 486, so nearly everyone can say Y here. |
791 | 794 | ||
795 | config X86_OLD_MCE | ||
796 | depends on X86_32 && X86_MCE | ||
797 | bool "Use legacy machine check code (will go away)" | ||
798 | default n | ||
799 | select X86_ANCIENT_MCE | ||
800 | ---help--- | ||
801 | Use the old i386 machine check code. This is merely intended for | ||
802 | testing in a transition period. Try this if you run into any machine | ||
803 | check related software problems, but report the problem to | ||
804 | linux-kernel. When in doubt say no. | ||
805 | |||
806 | config X86_NEW_MCE | ||
807 | depends on X86_MCE | ||
808 | bool | ||
809 | default y if (!X86_OLD_MCE && X86_32) || X86_64 | ||
810 | |||
792 | config X86_MCE_INTEL | 811 | config X86_MCE_INTEL |
793 | def_bool y | 812 | def_bool y |
794 | prompt "Intel MCE features" | 813 | prompt "Intel MCE features" |
795 | depends on X86_64 && X86_MCE && X86_LOCAL_APIC | 814 | depends on X86_NEW_MCE && X86_LOCAL_APIC |
796 | ---help--- | 815 | ---help--- |
797 | Additional support for intel specific MCE features such as | 816 | Additional support for intel specific MCE features such as |
798 | the thermal monitor. | 817 | the thermal monitor. |
@@ -800,19 +819,36 @@ config X86_MCE_INTEL | |||
800 | config X86_MCE_AMD | 819 | config X86_MCE_AMD |
801 | def_bool y | 820 | def_bool y |
802 | prompt "AMD MCE features" | 821 | prompt "AMD MCE features" |
803 | depends on X86_64 && X86_MCE && X86_LOCAL_APIC | 822 | depends on X86_NEW_MCE && X86_LOCAL_APIC |
804 | ---help--- | 823 | ---help--- |
805 | Additional support for AMD specific MCE features such as | 824 | Additional support for AMD specific MCE features such as |
806 | the DRAM Error Threshold. | 825 | the DRAM Error Threshold. |
807 | 826 | ||
827 | config X86_ANCIENT_MCE | ||
828 | def_bool n | ||
829 | depends on X86_32 | ||
830 | prompt "Support for old Pentium 5 / WinChip machine checks" | ||
831 | ---help--- | ||
832 | Include support for machine check handling on old Pentium 5 or WinChip | ||
833 | systems. These typically need to be enabled explicitely on the command | ||
834 | line. | ||
835 | |||
808 | config X86_MCE_THRESHOLD | 836 | config X86_MCE_THRESHOLD |
809 | depends on X86_MCE_AMD || X86_MCE_INTEL | 837 | depends on X86_MCE_AMD || X86_MCE_INTEL |
810 | bool | 838 | bool |
811 | default y | 839 | default y |
812 | 840 | ||
841 | config X86_MCE_INJECT | ||
842 | depends on X86_NEW_MCE | ||
843 | tristate "Machine check injector support" | ||
844 | ---help--- | ||
845 | Provide support for injecting machine checks for testing purposes. | ||
846 | If you don't know what a machine check is and you don't do kernel | ||
847 | QA it is safe to say n. | ||
848 | |||
813 | config X86_MCE_NONFATAL | 849 | config X86_MCE_NONFATAL |
814 | tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" | 850 | tristate "Check for non-fatal errors on AMD Athlon/Duron / Intel Pentium 4" |
815 | depends on X86_32 && X86_MCE | 851 | depends on X86_OLD_MCE |
816 | ---help--- | 852 | ---help--- |
817 | Enabling this feature starts a timer that triggers every 5 seconds which | 853 | Enabling this feature starts a timer that triggers every 5 seconds which |
818 | will look at the machine check registers to see if anything happened. | 854 | will look at the machine check registers to see if anything happened. |
@@ -825,11 +861,15 @@ config X86_MCE_NONFATAL | |||
825 | 861 | ||
826 | config X86_MCE_P4THERMAL | 862 | config X86_MCE_P4THERMAL |
827 | bool "check for P4 thermal throttling interrupt." | 863 | bool "check for P4 thermal throttling interrupt." |
828 | depends on X86_32 && X86_MCE && (X86_UP_APIC || SMP) | 864 | depends on X86_OLD_MCE && X86_MCE && (X86_UP_APIC || SMP) |
829 | ---help--- | 865 | ---help--- |
830 | Enabling this feature will cause a message to be printed when the P4 | 866 | Enabling this feature will cause a message to be printed when the P4 |
831 | enters thermal throttling. | 867 | enters thermal throttling. |
832 | 868 | ||
869 | config X86_THERMAL_VECTOR | ||
870 | def_bool y | ||
871 | depends on X86_MCE_P4THERMAL || X86_MCE_INTEL | ||
872 | |||
833 | config VM86 | 873 | config VM86 |
834 | bool "Enable VM86 support" if EMBEDDED | 874 | bool "Enable VM86 support" if EMBEDDED |
835 | default y | 875 | default y |
@@ -1466,9 +1506,7 @@ config KEXEC_JUMP | |||
1466 | 1506 | ||
1467 | config PHYSICAL_START | 1507 | config PHYSICAL_START |
1468 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) | 1508 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) |
1469 | default "0x1000000" if X86_NUMAQ | 1509 | default "0x1000000" |
1470 | default "0x200000" if X86_64 | ||
1471 | default "0x100000" | ||
1472 | ---help--- | 1510 | ---help--- |
1473 | This gives the physical address where the kernel is loaded. | 1511 | This gives the physical address where the kernel is loaded. |
1474 | 1512 | ||
@@ -1487,15 +1525,15 @@ config PHYSICAL_START | |||
1487 | to be specifically compiled to run from a specific memory area | 1525 | to be specifically compiled to run from a specific memory area |
1488 | (normally a reserved region) and this option comes handy. | 1526 | (normally a reserved region) and this option comes handy. |
1489 | 1527 | ||
1490 | So if you are using bzImage for capturing the crash dump, leave | 1528 | So if you are using bzImage for capturing the crash dump, |
1491 | the value here unchanged to 0x100000 and set CONFIG_RELOCATABLE=y. | 1529 | leave the value here unchanged to 0x1000000 and set |
1492 | Otherwise if you plan to use vmlinux for capturing the crash dump | 1530 | CONFIG_RELOCATABLE=y. Otherwise if you plan to use vmlinux |
1493 | change this value to start of the reserved region (Typically 16MB | 1531 | for capturing the crash dump change this value to start of |
1494 | 0x1000000). In other words, it can be set based on the "X" value as | 1532 | the reserved region. In other words, it can be set based on |
1495 | specified in the "crashkernel=YM@XM" command line boot parameter | 1533 | the "X" value as specified in the "crashkernel=YM@XM" |
1496 | passed to the panic-ed kernel. Typically this parameter is set as | 1534 | command line boot parameter passed to the panic-ed |
1497 | crashkernel=64M@16M. Please take a look at | 1535 | kernel. Please take a look at Documentation/kdump/kdump.txt |
1498 | Documentation/kdump/kdump.txt for more details about crash dumps. | 1536 | for more details about crash dumps. |
1499 | 1537 | ||
1500 | Usage of bzImage for capturing the crash dump is recommended as | 1538 | Usage of bzImage for capturing the crash dump is recommended as |
1501 | one does not have to build two kernels. Same kernel can be used | 1539 | one does not have to build two kernels. Same kernel can be used |
@@ -1508,8 +1546,8 @@ config PHYSICAL_START | |||
1508 | Don't change this unless you know what you are doing. | 1546 | Don't change this unless you know what you are doing. |
1509 | 1547 | ||
1510 | config RELOCATABLE | 1548 | config RELOCATABLE |
1511 | bool "Build a relocatable kernel (EXPERIMENTAL)" | 1549 | bool "Build a relocatable kernel" |
1512 | depends on EXPERIMENTAL | 1550 | default y |
1513 | ---help--- | 1551 | ---help--- |
1514 | This builds a kernel image that retains relocation information | 1552 | This builds a kernel image that retains relocation information |
1515 | so it can be loaded someplace besides the default 1MB. | 1553 | so it can be loaded someplace besides the default 1MB. |
@@ -1524,12 +1562,16 @@ config RELOCATABLE | |||
1524 | it has been loaded at and the compile time physical address | 1562 | it has been loaded at and the compile time physical address |
1525 | (CONFIG_PHYSICAL_START) is ignored. | 1563 | (CONFIG_PHYSICAL_START) is ignored. |
1526 | 1564 | ||
1565 | # Relocation on x86-32 needs some additional build support | ||
1566 | config X86_NEED_RELOCS | ||
1567 | def_bool y | ||
1568 | depends on X86_32 && RELOCATABLE | ||
1569 | |||
1527 | config PHYSICAL_ALIGN | 1570 | config PHYSICAL_ALIGN |
1528 | hex | 1571 | hex |
1529 | prompt "Alignment value to which kernel should be aligned" if X86_32 | 1572 | prompt "Alignment value to which kernel should be aligned" if X86_32 |
1530 | default "0x100000" if X86_32 | 1573 | default "0x1000000" |
1531 | default "0x200000" if X86_64 | 1574 | range 0x2000 0x1000000 |
1532 | range 0x2000 0x400000 | ||
1533 | ---help--- | 1575 | ---help--- |
1534 | This value puts the alignment restrictions on physical address | 1576 | This value puts the alignment restrictions on physical address |
1535 | where kernel is loaded and run from. Kernel is compiled for an | 1577 | where kernel is loaded and run from. Kernel is compiled for an |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index d8359e73317f..d105f29bb6bb 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -159,14 +159,30 @@ config IOMMU_DEBUG | |||
159 | options. See Documentation/x86_64/boot-options.txt for more | 159 | options. See Documentation/x86_64/boot-options.txt for more |
160 | details. | 160 | details. |
161 | 161 | ||
162 | config IOMMU_STRESS | ||
163 | bool "Enable IOMMU stress-test mode" | ||
164 | ---help--- | ||
165 | This option disables various optimizations in IOMMU related | ||
166 | code to do real stress testing of the IOMMU code. This option | ||
167 | will cause a performance drop and should only be enabled for | ||
168 | testing. | ||
169 | |||
162 | config IOMMU_LEAK | 170 | config IOMMU_LEAK |
163 | bool "IOMMU leak tracing" | 171 | bool "IOMMU leak tracing" |
164 | depends on DEBUG_KERNEL | 172 | depends on IOMMU_DEBUG && DMA_API_DEBUG |
165 | depends on IOMMU_DEBUG | ||
166 | ---help--- | 173 | ---help--- |
167 | Add a simple leak tracer to the IOMMU code. This is useful when you | 174 | Add a simple leak tracer to the IOMMU code. This is useful when you |
168 | are debugging a buggy device driver that leaks IOMMU mappings. | 175 | are debugging a buggy device driver that leaks IOMMU mappings. |
169 | 176 | ||
177 | config X86_DS_SELFTEST | ||
178 | bool "DS selftest" | ||
179 | default y | ||
180 | depends on DEBUG_KERNEL | ||
181 | depends on X86_DS | ||
182 | ---help--- | ||
183 | Perform Debug Store selftests at boot time. | ||
184 | If in doubt, say "N". | ||
185 | |||
170 | config HAVE_MMIOTRACE_SUPPORT | 186 | config HAVE_MMIOTRACE_SUPPORT |
171 | def_bool y | 187 | def_bool y |
172 | 188 | ||
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 8c86b72afdc2..1b68659c41b4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -7,8 +7,6 @@ else | |||
7 | KBUILD_DEFCONFIG := $(ARCH)_defconfig | 7 | KBUILD_DEFCONFIG := $(ARCH)_defconfig |
8 | endif | 8 | endif |
9 | 9 | ||
10 | core-$(CONFIG_KVM) += arch/x86/kvm/ | ||
11 | |||
12 | # BITS is used as extension for files which are available in a 32 bit | 10 | # BITS is used as extension for files which are available in a 32 bit |
13 | # and a 64 bit version to simplify shared Makefiles. | 11 | # and a 64 bit version to simplify shared Makefiles. |
14 | # e.g.: obj-y += foo_$(BITS).o | 12 | # e.g.: obj-y += foo_$(BITS).o |
@@ -83,6 +81,11 @@ ifdef CONFIG_CC_STACKPROTECTOR | |||
83 | endif | 81 | endif |
84 | endif | 82 | endif |
85 | 83 | ||
84 | # Don't unroll struct assignments with kmemcheck enabled | ||
85 | ifeq ($(CONFIG_KMEMCHECK),y) | ||
86 | KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) | ||
87 | endif | ||
88 | |||
86 | # Stackpointer is addressed different for 32 bit and 64 bit x86 | 89 | # Stackpointer is addressed different for 32 bit and 64 bit x86 |
87 | sp-$(CONFIG_X86_32) := esp | 90 | sp-$(CONFIG_X86_32) := esp |
88 | sp-$(CONFIG_X86_64) := rsp | 91 | sp-$(CONFIG_X86_64) := rsp |
@@ -118,21 +121,8 @@ head-y += arch/x86/kernel/init_task.o | |||
118 | 121 | ||
119 | libs-y += arch/x86/lib/ | 122 | libs-y += arch/x86/lib/ |
120 | 123 | ||
121 | # Sub architecture files that needs linking first | 124 | # See arch/x86/Kbuild for content of core part of the kernel |
122 | core-y += $(fcore-y) | 125 | core-y += arch/x86/ |
123 | |||
124 | # Xen paravirtualization support | ||
125 | core-$(CONFIG_XEN) += arch/x86/xen/ | ||
126 | |||
127 | # lguest paravirtualization support | ||
128 | core-$(CONFIG_LGUEST_GUEST) += arch/x86/lguest/ | ||
129 | |||
130 | core-y += arch/x86/kernel/ | ||
131 | core-y += arch/x86/mm/ | ||
132 | |||
133 | core-y += arch/x86/crypto/ | ||
134 | core-y += arch/x86/vdso/ | ||
135 | core-$(CONFIG_IA32_EMULATION) += arch/x86/ia32/ | ||
136 | 126 | ||
137 | # drivers-y are linked after core-y | 127 | # drivers-y are linked after core-y |
138 | drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ | 128 | drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ |
diff --git a/arch/x86/boot/.gitignore b/arch/x86/boot/.gitignore index 172cf8a98bdd..851fe936d242 100644 --- a/arch/x86/boot/.gitignore +++ b/arch/x86/boot/.gitignore | |||
@@ -3,6 +3,8 @@ bzImage | |||
3 | cpustr.h | 3 | cpustr.h |
4 | mkcpustr | 4 | mkcpustr |
5 | offsets.h | 5 | offsets.h |
6 | voffset.h | ||
7 | zoffset.h | ||
6 | setup | 8 | setup |
7 | setup.bin | 9 | setup.bin |
8 | setup.elf | 10 | setup.elf |
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index 6633b6e7505a..ec749c2bfdd7 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -26,9 +26,10 @@ targets := vmlinux.bin setup.bin setup.elf bzImage | |||
26 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf | 26 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf |
27 | subdir- := compressed | 27 | subdir- := compressed |
28 | 28 | ||
29 | setup-y += a20.o cmdline.o copy.o cpu.o cpucheck.o edd.o | 29 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o edd.o |
30 | setup-y += header.o main.o mca.o memory.o pm.o pmjump.o | 30 | setup-y += header.o main.o mca.o memory.o pm.o pmjump.o |
31 | setup-y += printf.o string.o tty.o video.o video-mode.o version.o | 31 | setup-y += printf.o regs.o string.o tty.o video.o video-mode.o |
32 | setup-y += version.o | ||
32 | setup-$(CONFIG_X86_APM_BOOT) += apm.o | 33 | setup-$(CONFIG_X86_APM_BOOT) += apm.o |
33 | 34 | ||
34 | # The link order of the video-*.o modules can matter. In particular, | 35 | # The link order of the video-*.o modules can matter. In particular, |
@@ -69,6 +70,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D__KERNEL__ \ | |||
69 | $(call cc-option, -mpreferred-stack-boundary=2) | 70 | $(call cc-option, -mpreferred-stack-boundary=2) |
70 | KBUILD_CFLAGS += $(call cc-option, -m32) | 71 | KBUILD_CFLAGS += $(call cc-option, -m32) |
71 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 72 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
73 | GCOV_PROFILE := n | ||
72 | 74 | ||
73 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) | 75 | $(obj)/bzImage: asflags-y := $(SVGA_MODE) |
74 | 76 | ||
@@ -86,19 +88,27 @@ $(obj)/vmlinux.bin: $(obj)/compressed/vmlinux FORCE | |||
86 | 88 | ||
87 | SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) | 89 | SETUP_OBJS = $(addprefix $(obj)/,$(setup-y)) |
88 | 90 | ||
89 | sed-offsets := -e 's/^00*/0/' \ | 91 | sed-voffset := -e 's/^\([0-9a-fA-F]*\) . \(_text\|_end\)$$/\#define VO_\2 0x\1/p' |
90 | -e 's/^\([0-9a-fA-F]*\) . \(input_data\|input_data_end\)$$/\#define \2 0x\1/p' | ||
91 | 92 | ||
92 | quiet_cmd_offsets = OFFSETS $@ | 93 | quiet_cmd_voffset = VOFFSET $@ |
93 | cmd_offsets = $(NM) $< | sed -n $(sed-offsets) > $@ | 94 | cmd_voffset = $(NM) $< | sed -n $(sed-voffset) > $@ |
94 | 95 | ||
95 | $(obj)/offsets.h: $(obj)/compressed/vmlinux FORCE | 96 | targets += voffset.h |
96 | $(call if_changed,offsets) | 97 | $(obj)/voffset.h: vmlinux FORCE |
98 | $(call if_changed,voffset) | ||
99 | |||
100 | sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' | ||
101 | |||
102 | quiet_cmd_zoffset = ZOFFSET $@ | ||
103 | cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ | ||
104 | |||
105 | targets += zoffset.h | ||
106 | $(obj)/zoffset.h: $(obj)/compressed/vmlinux FORCE | ||
107 | $(call if_changed,zoffset) | ||
97 | 108 | ||
98 | targets += offsets.h | ||
99 | 109 | ||
100 | AFLAGS_header.o += -I$(obj) | 110 | AFLAGS_header.o += -I$(obj) |
101 | $(obj)/header.o: $(obj)/offsets.h | 111 | $(obj)/header.o: $(obj)/voffset.h $(obj)/zoffset.h |
102 | 112 | ||
103 | LDFLAGS_setup.elf := -T | 113 | LDFLAGS_setup.elf := -T |
104 | $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE | 114 | $(obj)/setup.elf: $(src)/setup.ld $(SETUP_OBJS) FORCE |
diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 7c19ce8c2442..64a31a6d751a 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c | |||
@@ -2,7 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007-2008 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007-2008 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation | 5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin |
6 | * | 6 | * |
7 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
8 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -90,8 +90,11 @@ static int a20_test_long(void) | |||
90 | 90 | ||
91 | static void enable_a20_bios(void) | 91 | static void enable_a20_bios(void) |
92 | { | 92 | { |
93 | asm volatile("pushfl; int $0x15; popfl" | 93 | struct biosregs ireg; |
94 | : : "a" ((u16)0x2401)); | 94 | |
95 | initregs(&ireg); | ||
96 | ireg.ax = 0x2401; | ||
97 | intcall(0x15, &ireg, NULL); | ||
95 | } | 98 | } |
96 | 99 | ||
97 | static void enable_a20_kbc(void) | 100 | static void enable_a20_kbc(void) |
diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c index 7aa6033001f9..ee274834ea8b 100644 --- a/arch/x86/boot/apm.c +++ b/arch/x86/boot/apm.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * Original APM BIOS checking by Stephen Rothwell, May 1994 | 7 | * Original APM BIOS checking by Stephen Rothwell, May 1994 |
7 | * (sfr@canb.auug.org.au) | 8 | * (sfr@canb.auug.org.au) |
@@ -19,75 +20,56 @@ | |||
19 | 20 | ||
20 | int query_apm_bios(void) | 21 | int query_apm_bios(void) |
21 | { | 22 | { |
22 | u16 ax, bx, cx, dx, di; | 23 | struct biosregs ireg, oreg; |
23 | u32 ebx, esi; | ||
24 | u8 err; | ||
25 | 24 | ||
26 | /* APM BIOS installation check */ | 25 | /* APM BIOS installation check */ |
27 | ax = 0x5300; | 26 | initregs(&ireg); |
28 | bx = cx = 0; | 27 | ireg.ah = 0x53; |
29 | asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" | 28 | intcall(0x15, &ireg, &oreg); |
30 | : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) | ||
31 | : : "esi", "edi"); | ||
32 | 29 | ||
33 | if (err) | 30 | if (oreg.flags & X86_EFLAGS_CF) |
34 | return -1; /* No APM BIOS */ | 31 | return -1; /* No APM BIOS */ |
35 | 32 | ||
36 | if (bx != 0x504d) /* "PM" signature */ | 33 | if (oreg.bx != 0x504d) /* "PM" signature */ |
37 | return -1; | 34 | return -1; |
38 | 35 | ||
39 | if (!(cx & 0x02)) /* 32 bits supported? */ | 36 | if (!(oreg.cx & 0x02)) /* 32 bits supported? */ |
40 | return -1; | 37 | return -1; |
41 | 38 | ||
42 | /* Disconnect first, just in case */ | 39 | /* Disconnect first, just in case */ |
43 | ax = 0x5304; | 40 | ireg.al = 0x04; |
44 | bx = 0; | 41 | intcall(0x15, &ireg, NULL); |
45 | asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" | ||
46 | : "+a" (ax), "+b" (bx) | ||
47 | : : "ecx", "edx", "esi", "edi"); | ||
48 | |||
49 | /* Paranoia */ | ||
50 | ebx = esi = 0; | ||
51 | cx = dx = di = 0; | ||
52 | 42 | ||
53 | /* 32-bit connect */ | 43 | /* 32-bit connect */ |
54 | asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %6" | 44 | ireg.al = 0x03; |
55 | : "=a" (ax), "+b" (ebx), "+c" (cx), "+d" (dx), | 45 | intcall(0x15, &ireg, &oreg); |
56 | "+S" (esi), "+D" (di), "=m" (err) | 46 | |
57 | : "a" (0x5303)); | 47 | boot_params.apm_bios_info.cseg = oreg.ax; |
58 | 48 | boot_params.apm_bios_info.offset = oreg.ebx; | |
59 | boot_params.apm_bios_info.cseg = ax; | 49 | boot_params.apm_bios_info.cseg_16 = oreg.cx; |
60 | boot_params.apm_bios_info.offset = ebx; | 50 | boot_params.apm_bios_info.dseg = oreg.dx; |
61 | boot_params.apm_bios_info.cseg_16 = cx; | 51 | boot_params.apm_bios_info.cseg_len = oreg.si; |
62 | boot_params.apm_bios_info.dseg = dx; | 52 | boot_params.apm_bios_info.cseg_16_len = oreg.hsi; |
63 | boot_params.apm_bios_info.cseg_len = (u16)esi; | 53 | boot_params.apm_bios_info.dseg_len = oreg.di; |
64 | boot_params.apm_bios_info.cseg_16_len = esi >> 16; | 54 | |
65 | boot_params.apm_bios_info.dseg_len = di; | 55 | if (oreg.flags & X86_EFLAGS_CF) |
66 | |||
67 | if (err) | ||
68 | return -1; | 56 | return -1; |
69 | 57 | ||
70 | /* Redo the installation check as the 32-bit connect; | 58 | /* Redo the installation check as the 32-bit connect; |
71 | some BIOSes return different flags this way... */ | 59 | some BIOSes return different flags this way... */ |
72 | 60 | ||
73 | ax = 0x5300; | 61 | ireg.al = 0x00; |
74 | bx = cx = 0; | 62 | intcall(0x15, &ireg, &oreg); |
75 | asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp ; setc %0" | ||
76 | : "=d" (err), "+a" (ax), "+b" (bx), "+c" (cx) | ||
77 | : : "esi", "edi"); | ||
78 | 63 | ||
79 | if (err || bx != 0x504d) { | 64 | if ((oreg.eflags & X86_EFLAGS_CF) || oreg.bx != 0x504d) { |
80 | /* Failure with 32-bit connect, try to disconect and ignore */ | 65 | /* Failure with 32-bit connect, try to disconect and ignore */ |
81 | ax = 0x5304; | 66 | ireg.al = 0x04; |
82 | bx = 0; | 67 | intcall(0x15, &ireg, NULL); |
83 | asm volatile("pushl %%ebp ; int $0x15 ; popl %%ebp" | ||
84 | : "+a" (ax), "+b" (bx) | ||
85 | : : "ecx", "edx", "esi", "edi"); | ||
86 | return -1; | 68 | return -1; |
87 | } | 69 | } |
88 | 70 | ||
89 | boot_params.apm_bios_info.version = ax; | 71 | boot_params.apm_bios_info.version = oreg.ax; |
90 | boot_params.apm_bios_info.flags = cx; | 72 | boot_params.apm_bios_info.flags = oreg.cx; |
91 | return 0; | 73 | return 0; |
92 | } | 74 | } |
93 | 75 | ||
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S new file mode 100644 index 000000000000..1dfbf64e52a2 --- /dev/null +++ b/arch/x86/boot/bioscall.S | |||
@@ -0,0 +1,82 @@ | |||
1 | /* ----------------------------------------------------------------------- | ||
2 | * | ||
3 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available under | ||
6 | * the terms of the GNU General Public License version 2 or (at your | ||
7 | * option) any later version; incorporated herein by reference. | ||
8 | * | ||
9 | * ----------------------------------------------------------------------- */ | ||
10 | |||
11 | /* | ||
12 | * "Glove box" for BIOS calls. Avoids the constant problems with BIOSes | ||
13 | * touching registers they shouldn't be. | ||
14 | */ | ||
15 | |||
16 | .code16gcc | ||
17 | .text | ||
18 | .globl intcall | ||
19 | .type intcall, @function | ||
20 | intcall: | ||
21 | /* Self-modify the INT instruction. Ugly, but works. */ | ||
22 | cmpb %al, 3f | ||
23 | je 1f | ||
24 | movb %al, 3f | ||
25 | jmp 1f /* Synchronize pipeline */ | ||
26 | 1: | ||
27 | /* Save state */ | ||
28 | pushfl | ||
29 | pushw %fs | ||
30 | pushw %gs | ||
31 | pushal | ||
32 | |||
33 | /* Copy input state to stack frame */ | ||
34 | subw $44, %sp | ||
35 | movw %dx, %si | ||
36 | movw %sp, %di | ||
37 | movw $11, %cx | ||
38 | rep; movsd | ||
39 | |||
40 | /* Pop full state from the stack */ | ||
41 | popal | ||
42 | popw %gs | ||
43 | popw %fs | ||
44 | popw %es | ||
45 | popw %ds | ||
46 | popfl | ||
47 | |||
48 | /* Actual INT */ | ||
49 | .byte 0xcd /* INT opcode */ | ||
50 | 3: .byte 0 | ||
51 | |||
52 | /* Push full state to the stack */ | ||
53 | pushfl | ||
54 | pushw %ds | ||
55 | pushw %es | ||
56 | pushw %fs | ||
57 | pushw %gs | ||
58 | pushal | ||
59 | |||
60 | /* Re-establish C environment invariants */ | ||
61 | cld | ||
62 | movzwl %sp, %esp | ||
63 | movw %cs, %ax | ||
64 | movw %ax, %ds | ||
65 | movw %ax, %es | ||
66 | |||
67 | /* Copy output state from stack frame */ | ||
68 | movw 68(%esp), %di /* Original %cx == 3rd argument */ | ||
69 | andw %di, %di | ||
70 | jz 4f | ||
71 | movw %sp, %si | ||
72 | movw $11, %cx | ||
73 | rep; movsd | ||
74 | 4: addw $44, %sp | ||
75 | |||
76 | /* Restore state and return */ | ||
77 | popal | ||
78 | popw %gs | ||
79 | popw %fs | ||
80 | popfl | ||
81 | retl | ||
82 | .size intcall, .-intcall | ||
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 7b2692e897e5..98239d2658f2 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -26,6 +27,7 @@ | |||
26 | #include <asm/setup.h> | 27 | #include <asm/setup.h> |
27 | #include "bitops.h" | 28 | #include "bitops.h" |
28 | #include <asm/cpufeature.h> | 29 | #include <asm/cpufeature.h> |
30 | #include <asm/processor-flags.h> | ||
29 | 31 | ||
30 | /* Useful macros */ | 32 | /* Useful macros */ |
31 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) | 33 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) |
@@ -241,6 +243,49 @@ int enable_a20(void); | |||
241 | /* apm.c */ | 243 | /* apm.c */ |
242 | int query_apm_bios(void); | 244 | int query_apm_bios(void); |
243 | 245 | ||
246 | /* bioscall.c */ | ||
247 | struct biosregs { | ||
248 | union { | ||
249 | struct { | ||
250 | u32 edi; | ||
251 | u32 esi; | ||
252 | u32 ebp; | ||
253 | u32 _esp; | ||
254 | u32 ebx; | ||
255 | u32 edx; | ||
256 | u32 ecx; | ||
257 | u32 eax; | ||
258 | u32 _fsgs; | ||
259 | u32 _dses; | ||
260 | u32 eflags; | ||
261 | }; | ||
262 | struct { | ||
263 | u16 di, hdi; | ||
264 | u16 si, hsi; | ||
265 | u16 bp, hbp; | ||
266 | u16 _sp, _hsp; | ||
267 | u16 bx, hbx; | ||
268 | u16 dx, hdx; | ||
269 | u16 cx, hcx; | ||
270 | u16 ax, hax; | ||
271 | u16 gs, fs; | ||
272 | u16 es, ds; | ||
273 | u16 flags, hflags; | ||
274 | }; | ||
275 | struct { | ||
276 | u8 dil, dih, edi2, edi3; | ||
277 | u8 sil, sih, esi2, esi3; | ||
278 | u8 bpl, bph, ebp2, ebp3; | ||
279 | u8 _spl, _sph, _esp2, _esp3; | ||
280 | u8 bl, bh, ebx2, ebx3; | ||
281 | u8 dl, dh, edx2, edx3; | ||
282 | u8 cl, ch, ecx2, ecx3; | ||
283 | u8 al, ah, eax2, eax3; | ||
284 | }; | ||
285 | }; | ||
286 | }; | ||
287 | void intcall(u8 int_no, const struct biosregs *ireg, struct biosregs *oreg); | ||
288 | |||
244 | /* cmdline.c */ | 289 | /* cmdline.c */ |
245 | int cmdline_find_option(const char *option, char *buffer, int bufsize); | 290 | int cmdline_find_option(const char *option, char *buffer, int bufsize); |
246 | int cmdline_find_option_bool(const char *option); | 291 | int cmdline_find_option_bool(const char *option); |
@@ -279,6 +324,9 @@ int sprintf(char *buf, const char *fmt, ...); | |||
279 | int vsprintf(char *buf, const char *fmt, va_list args); | 324 | int vsprintf(char *buf, const char *fmt, va_list args); |
280 | int printf(const char *fmt, ...); | 325 | int printf(const char *fmt, ...); |
281 | 326 | ||
327 | /* regs.c */ | ||
328 | void initregs(struct biosregs *regs); | ||
329 | |||
282 | /* string.c */ | 330 | /* string.c */ |
283 | int strcmp(const char *str1, const char *str2); | 331 | int strcmp(const char *str1, const char *str2); |
284 | size_t strnlen(const char *s, size_t maxlen); | 332 | size_t strnlen(const char *s, size_t maxlen); |
diff --git a/arch/x86/boot/compressed/.gitignore b/arch/x86/boot/compressed/.gitignore index 63eff3b04d01..4a46fab7162e 100644 --- a/arch/x86/boot/compressed/.gitignore +++ b/arch/x86/boot/compressed/.gitignore | |||
@@ -1,3 +1,6 @@ | |||
1 | relocs | 1 | relocs |
2 | vmlinux.bin.all | 2 | vmlinux.bin.all |
3 | vmlinux.relocs | 3 | vmlinux.relocs |
4 | vmlinux.lds | ||
5 | mkpiggy | ||
6 | piggy.S | ||
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 65551c9f8571..e2ff504b4ddc 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -15,11 +15,14 @@ KBUILD_CFLAGS += $(call cc-option,-ffreestanding) | |||
15 | KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) | 15 | KBUILD_CFLAGS += $(call cc-option,-fno-stack-protector) |
16 | 16 | ||
17 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 17 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
18 | GCOV_PROFILE := n | ||
18 | 19 | ||
19 | LDFLAGS := -m elf_$(UTS_MACHINE) | 20 | LDFLAGS := -m elf_$(UTS_MACHINE) |
20 | LDFLAGS_vmlinux := -T | 21 | LDFLAGS_vmlinux := -T |
21 | 22 | ||
22 | $(obj)/vmlinux: $(src)/vmlinux_$(BITS).lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE | 23 | hostprogs-y := mkpiggy |
24 | |||
25 | $(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/piggy.o FORCE | ||
23 | $(call if_changed,ld) | 26 | $(call if_changed,ld) |
24 | @: | 27 | @: |
25 | 28 | ||
@@ -29,7 +32,7 @@ $(obj)/vmlinux.bin: vmlinux FORCE | |||
29 | 32 | ||
30 | 33 | ||
31 | targets += vmlinux.bin.all vmlinux.relocs relocs | 34 | targets += vmlinux.bin.all vmlinux.relocs relocs |
32 | hostprogs-$(CONFIG_X86_32) += relocs | 35 | hostprogs-$(CONFIG_X86_NEED_RELOCS) += relocs |
33 | 36 | ||
34 | quiet_cmd_relocs = RELOCS $@ | 37 | quiet_cmd_relocs = RELOCS $@ |
35 | cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< | 38 | cmd_relocs = $(obj)/relocs $< > $@;$(obj)/relocs --abs-relocs $< |
@@ -37,46 +40,22 @@ $(obj)/vmlinux.relocs: vmlinux $(obj)/relocs FORCE | |||
37 | $(call if_changed,relocs) | 40 | $(call if_changed,relocs) |
38 | 41 | ||
39 | vmlinux.bin.all-y := $(obj)/vmlinux.bin | 42 | vmlinux.bin.all-y := $(obj)/vmlinux.bin |
40 | vmlinux.bin.all-$(CONFIG_RELOCATABLE) += $(obj)/vmlinux.relocs | 43 | vmlinux.bin.all-$(CONFIG_X86_NEED_RELOCS) += $(obj)/vmlinux.relocs |
41 | quiet_cmd_relocbin = BUILD $@ | ||
42 | cmd_relocbin = cat $(filter-out FORCE,$^) > $@ | ||
43 | $(obj)/vmlinux.bin.all: $(vmlinux.bin.all-y) FORCE | ||
44 | $(call if_changed,relocbin) | ||
45 | |||
46 | ifeq ($(CONFIG_X86_32),y) | ||
47 | 44 | ||
48 | ifdef CONFIG_RELOCATABLE | 45 | $(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE |
49 | $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin.all FORCE | ||
50 | $(call if_changed,gzip) | ||
51 | $(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin.all FORCE | ||
52 | $(call if_changed,bzip2) | ||
53 | $(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin.all FORCE | ||
54 | $(call if_changed,lzma) | ||
55 | else | ||
56 | $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE | ||
57 | $(call if_changed,gzip) | 46 | $(call if_changed,gzip) |
58 | $(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE | 47 | $(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE |
59 | $(call if_changed,bzip2) | 48 | $(call if_changed,bzip2) |
60 | $(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE | 49 | $(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE |
61 | $(call if_changed,lzma) | 50 | $(call if_changed,lzma) |
62 | endif | ||
63 | LDFLAGS_piggy.o := -r --format binary --oformat elf32-i386 -T | ||
64 | 51 | ||
65 | else | 52 | suffix-$(CONFIG_KERNEL_GZIP) := gz |
53 | suffix-$(CONFIG_KERNEL_BZIP2) := bz2 | ||
54 | suffix-$(CONFIG_KERNEL_LZMA) := lzma | ||
66 | 55 | ||
67 | $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE | 56 | quiet_cmd_mkpiggy = MKPIGGY $@ |
68 | $(call if_changed,gzip) | 57 | cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false ) |
69 | $(obj)/vmlinux.bin.bz2: $(obj)/vmlinux.bin FORCE | ||
70 | $(call if_changed,bzip2) | ||
71 | $(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE | ||
72 | $(call if_changed,lzma) | ||
73 | |||
74 | LDFLAGS_piggy.o := -r --format binary --oformat elf64-x86-64 -T | ||
75 | endif | ||
76 | 58 | ||
77 | suffix_$(CONFIG_KERNEL_GZIP) = gz | 59 | targets += piggy.S |
78 | suffix_$(CONFIG_KERNEL_BZIP2) = bz2 | 60 | $(obj)/piggy.S: $(obj)/vmlinux.bin.$(suffix-y) $(obj)/mkpiggy FORCE |
79 | suffix_$(CONFIG_KERNEL_LZMA) = lzma | 61 | $(call if_changed,mkpiggy) |
80 | |||
81 | $(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix_y) FORCE | ||
82 | $(call if_changed,ld) | ||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 3a8a866fb2e2..75e4f001e706 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -12,16 +12,16 @@ | |||
12 | * the page directory. [According to comments etc elsewhere on a compressed | 12 | * the page directory. [According to comments etc elsewhere on a compressed |
13 | * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] | 13 | * kernel it will end up at 0x1000 + 1Mb I hope so as I assume this. - AC] |
14 | * | 14 | * |
15 | * Page 0 is deliberately kept safe, since System Management Mode code in | 15 | * Page 0 is deliberately kept safe, since System Management Mode code in |
16 | * laptops may need to access the BIOS data stored there. This is also | 16 | * laptops may need to access the BIOS data stored there. This is also |
17 | * useful for future device drivers that either access the BIOS via VM86 | 17 | * useful for future device drivers that either access the BIOS via VM86 |
18 | * mode. | 18 | * mode. |
19 | */ | 19 | */ |
20 | 20 | ||
21 | /* | 21 | /* |
22 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 | 22 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 |
23 | */ | 23 | */ |
24 | .text | 24 | .text |
25 | 25 | ||
26 | #include <linux/linkage.h> | 26 | #include <linux/linkage.h> |
27 | #include <asm/segment.h> | 27 | #include <asm/segment.h> |
@@ -29,161 +29,151 @@ | |||
29 | #include <asm/boot.h> | 29 | #include <asm/boot.h> |
30 | #include <asm/asm-offsets.h> | 30 | #include <asm/asm-offsets.h> |
31 | 31 | ||
32 | .section ".text.head","ax",@progbits | 32 | .section ".text.head","ax",@progbits |
33 | ENTRY(startup_32) | 33 | ENTRY(startup_32) |
34 | cld | 34 | cld |
35 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | 35 | /* |
36 | * us to not reload segments */ | 36 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking |
37 | testb $(1<<6), BP_loadflags(%esi) | 37 | * us to not reload segments |
38 | jnz 1f | 38 | */ |
39 | testb $(1<<6), BP_loadflags(%esi) | ||
40 | jnz 1f | ||
39 | 41 | ||
40 | cli | 42 | cli |
41 | movl $(__BOOT_DS),%eax | 43 | movl $__BOOT_DS, %eax |
42 | movl %eax,%ds | 44 | movl %eax, %ds |
43 | movl %eax,%es | 45 | movl %eax, %es |
44 | movl %eax,%fs | 46 | movl %eax, %fs |
45 | movl %eax,%gs | 47 | movl %eax, %gs |
46 | movl %eax,%ss | 48 | movl %eax, %ss |
47 | 1: | 49 | 1: |
48 | 50 | ||
49 | /* Calculate the delta between where we were compiled to run | 51 | /* |
52 | * Calculate the delta between where we were compiled to run | ||
50 | * at and where we were actually loaded at. This can only be done | 53 | * at and where we were actually loaded at. This can only be done |
51 | * with a short local call on x86. Nothing else will tell us what | 54 | * with a short local call on x86. Nothing else will tell us what |
52 | * address we are running at. The reserved chunk of the real-mode | 55 | * address we are running at. The reserved chunk of the real-mode |
53 | * data at 0x1e4 (defined as a scratch field) are used as the stack | 56 | * data at 0x1e4 (defined as a scratch field) are used as the stack |
54 | * for this calculation. Only 4 bytes are needed. | 57 | * for this calculation. Only 4 bytes are needed. |
55 | */ | 58 | */ |
56 | leal (0x1e4+4)(%esi), %esp | 59 | leal (BP_scratch+4)(%esi), %esp |
57 | call 1f | 60 | call 1f |
58 | 1: popl %ebp | 61 | 1: popl %ebp |
59 | subl $1b, %ebp | 62 | subl $1b, %ebp |
60 | 63 | ||
61 | /* %ebp contains the address we are loaded at by the boot loader and %ebx | 64 | /* |
65 | * %ebp contains the address we are loaded at by the boot loader and %ebx | ||
62 | * contains the address where we should move the kernel image temporarily | 66 | * contains the address where we should move the kernel image temporarily |
63 | * for safe in-place decompression. | 67 | * for safe in-place decompression. |
64 | */ | 68 | */ |
65 | 69 | ||
66 | #ifdef CONFIG_RELOCATABLE | 70 | #ifdef CONFIG_RELOCATABLE |
67 | movl %ebp, %ebx | 71 | movl %ebp, %ebx |
68 | addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebx | 72 | movl BP_kernel_alignment(%esi), %eax |
69 | andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebx | 73 | decl %eax |
74 | addl %eax, %ebx | ||
75 | notl %eax | ||
76 | andl %eax, %ebx | ||
70 | #else | 77 | #else |
71 | movl $LOAD_PHYSICAL_ADDR, %ebx | 78 | movl $LOAD_PHYSICAL_ADDR, %ebx |
72 | #endif | 79 | #endif |
73 | 80 | ||
74 | /* Replace the compressed data size with the uncompressed size */ | 81 | /* Target address to relocate to for decompression */ |
75 | subl input_len(%ebp), %ebx | 82 | addl $z_extract_offset, %ebx |
76 | movl output_len(%ebp), %eax | 83 | |
77 | addl %eax, %ebx | 84 | /* Set up the stack */ |
78 | /* Add 8 bytes for every 32K input block */ | 85 | leal boot_stack_end(%ebx), %esp |
79 | shrl $12, %eax | 86 | |
80 | addl %eax, %ebx | 87 | /* Zero EFLAGS */ |
81 | /* Add 32K + 18 bytes of extra slack */ | 88 | pushl $0 |
82 | addl $(32768 + 18), %ebx | 89 | popfl |
83 | /* Align on a 4K boundary */ | 90 | |
84 | addl $4095, %ebx | 91 | /* |
85 | andl $~4095, %ebx | 92 | * Copy the compressed kernel to the end of our buffer |
86 | |||
87 | /* Copy the compressed kernel to the end of our buffer | ||
88 | * where decompression in place becomes safe. | 93 | * where decompression in place becomes safe. |
89 | */ | 94 | */ |
90 | pushl %esi | 95 | pushl %esi |
91 | leal _end(%ebp), %esi | 96 | leal (_bss-4)(%ebp), %esi |
92 | leal _end(%ebx), %edi | 97 | leal (_bss-4)(%ebx), %edi |
93 | movl $(_end - startup_32), %ecx | 98 | movl $(_bss - startup_32), %ecx |
99 | shrl $2, %ecx | ||
94 | std | 100 | std |
95 | rep | 101 | rep movsl |
96 | movsb | ||
97 | cld | 102 | cld |
98 | popl %esi | 103 | popl %esi |
99 | |||
100 | /* Compute the kernel start address. | ||
101 | */ | ||
102 | #ifdef CONFIG_RELOCATABLE | ||
103 | addl $(CONFIG_PHYSICAL_ALIGN - 1), %ebp | ||
104 | andl $(~(CONFIG_PHYSICAL_ALIGN - 1)), %ebp | ||
105 | #else | ||
106 | movl $LOAD_PHYSICAL_ADDR, %ebp | ||
107 | #endif | ||
108 | 104 | ||
109 | /* | 105 | /* |
110 | * Jump to the relocated address. | 106 | * Jump to the relocated address. |
111 | */ | 107 | */ |
112 | leal relocated(%ebx), %eax | 108 | leal relocated(%ebx), %eax |
113 | jmp *%eax | 109 | jmp *%eax |
114 | ENDPROC(startup_32) | 110 | ENDPROC(startup_32) |
115 | 111 | ||
116 | .section ".text" | 112 | .text |
117 | relocated: | 113 | relocated: |
118 | 114 | ||
119 | /* | 115 | /* |
120 | * Clear BSS | 116 | * Clear BSS (stack is currently empty) |
121 | */ | ||
122 | xorl %eax,%eax | ||
123 | leal _edata(%ebx),%edi | ||
124 | leal _end(%ebx), %ecx | ||
125 | subl %edi,%ecx | ||
126 | cld | ||
127 | rep | ||
128 | stosb | ||
129 | |||
130 | /* | ||
131 | * Setup the stack for the decompressor | ||
132 | */ | 117 | */ |
133 | leal boot_stack_end(%ebx), %esp | 118 | xorl %eax, %eax |
119 | leal _bss(%ebx), %edi | ||
120 | leal _ebss(%ebx), %ecx | ||
121 | subl %edi, %ecx | ||
122 | shrl $2, %ecx | ||
123 | rep stosl | ||
134 | 124 | ||
135 | /* | 125 | /* |
136 | * Do the decompression, and jump to the new kernel.. | 126 | * Do the decompression, and jump to the new kernel.. |
137 | */ | 127 | */ |
138 | movl output_len(%ebx), %eax | 128 | leal z_extract_offset_negative(%ebx), %ebp |
139 | pushl %eax | 129 | /* push arguments for decompress_kernel: */ |
140 | # push arguments for decompress_kernel: | 130 | pushl %ebp /* output address */ |
141 | pushl %ebp # output address | 131 | pushl $z_input_len /* input_len */ |
142 | movl input_len(%ebx), %eax | 132 | leal input_data(%ebx), %eax |
143 | pushl %eax # input_len | 133 | pushl %eax /* input_data */ |
144 | leal input_data(%ebx), %eax | 134 | leal boot_heap(%ebx), %eax |
145 | pushl %eax # input_data | 135 | pushl %eax /* heap area */ |
146 | leal boot_heap(%ebx), %eax | 136 | pushl %esi /* real mode pointer */ |
147 | pushl %eax # heap area | 137 | call decompress_kernel |
148 | pushl %esi # real mode pointer | 138 | addl $20, %esp |
149 | call decompress_kernel | ||
150 | addl $20, %esp | ||
151 | popl %ecx | ||
152 | 139 | ||
153 | #if CONFIG_RELOCATABLE | 140 | #if CONFIG_RELOCATABLE |
154 | /* Find the address of the relocations. | 141 | /* |
142 | * Find the address of the relocations. | ||
155 | */ | 143 | */ |
156 | movl %ebp, %edi | 144 | leal z_output_len(%ebp), %edi |
157 | addl %ecx, %edi | ||
158 | 145 | ||
159 | /* Calculate the delta between where vmlinux was compiled to run | 146 | /* |
147 | * Calculate the delta between where vmlinux was compiled to run | ||
160 | * and where it was actually loaded. | 148 | * and where it was actually loaded. |
161 | */ | 149 | */ |
162 | movl %ebp, %ebx | 150 | movl %ebp, %ebx |
163 | subl $LOAD_PHYSICAL_ADDR, %ebx | 151 | subl $LOAD_PHYSICAL_ADDR, %ebx |
164 | jz 2f /* Nothing to be done if loaded at compiled addr. */ | 152 | jz 2f /* Nothing to be done if loaded at compiled addr. */ |
165 | /* | 153 | /* |
166 | * Process relocations. | 154 | * Process relocations. |
167 | */ | 155 | */ |
168 | 156 | ||
169 | 1: subl $4, %edi | 157 | 1: subl $4, %edi |
170 | movl 0(%edi), %ecx | 158 | movl (%edi), %ecx |
171 | testl %ecx, %ecx | 159 | testl %ecx, %ecx |
172 | jz 2f | 160 | jz 2f |
173 | addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) | 161 | addl %ebx, -__PAGE_OFFSET(%ebx, %ecx) |
174 | jmp 1b | 162 | jmp 1b |
175 | 2: | 163 | 2: |
176 | #endif | 164 | #endif |
177 | 165 | ||
178 | /* | 166 | /* |
179 | * Jump to the decompressed kernel. | 167 | * Jump to the decompressed kernel. |
180 | */ | 168 | */ |
181 | xorl %ebx,%ebx | 169 | xorl %ebx, %ebx |
182 | jmp *%ebp | 170 | jmp *%ebp |
183 | 171 | ||
184 | .bss | 172 | /* |
185 | /* Stack and heap for uncompression */ | 173 | * Stack and heap for uncompression |
186 | .balign 4 | 174 | */ |
175 | .bss | ||
176 | .balign 4 | ||
187 | boot_heap: | 177 | boot_heap: |
188 | .fill BOOT_HEAP_SIZE, 1, 0 | 178 | .fill BOOT_HEAP_SIZE, 1, 0 |
189 | boot_stack: | 179 | boot_stack: |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index ed4a82948002..f62c284db9eb 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -21,8 +21,8 @@ | |||
21 | /* | 21 | /* |
22 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 | 22 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 |
23 | */ | 23 | */ |
24 | .code32 | 24 | .code32 |
25 | .text | 25 | .text |
26 | 26 | ||
27 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
28 | #include <asm/segment.h> | 28 | #include <asm/segment.h> |
@@ -33,12 +33,14 @@ | |||
33 | #include <asm/processor-flags.h> | 33 | #include <asm/processor-flags.h> |
34 | #include <asm/asm-offsets.h> | 34 | #include <asm/asm-offsets.h> |
35 | 35 | ||
36 | .section ".text.head" | 36 | .section ".text.head" |
37 | .code32 | 37 | .code32 |
38 | ENTRY(startup_32) | 38 | ENTRY(startup_32) |
39 | cld | 39 | cld |
40 | /* test KEEP_SEGMENTS flag to see if the bootloader is asking | 40 | /* |
41 | * us to not reload segments */ | 41 | * Test KEEP_SEGMENTS flag to see if the bootloader is asking |
42 | * us to not reload segments | ||
43 | */ | ||
42 | testb $(1<<6), BP_loadflags(%esi) | 44 | testb $(1<<6), BP_loadflags(%esi) |
43 | jnz 1f | 45 | jnz 1f |
44 | 46 | ||
@@ -49,14 +51,15 @@ ENTRY(startup_32) | |||
49 | movl %eax, %ss | 51 | movl %eax, %ss |
50 | 1: | 52 | 1: |
51 | 53 | ||
52 | /* Calculate the delta between where we were compiled to run | 54 | /* |
55 | * Calculate the delta between where we were compiled to run | ||
53 | * at and where we were actually loaded at. This can only be done | 56 | * at and where we were actually loaded at. This can only be done |
54 | * with a short local call on x86. Nothing else will tell us what | 57 | * with a short local call on x86. Nothing else will tell us what |
55 | * address we are running at. The reserved chunk of the real-mode | 58 | * address we are running at. The reserved chunk of the real-mode |
56 | * data at 0x1e4 (defined as a scratch field) are used as the stack | 59 | * data at 0x1e4 (defined as a scratch field) are used as the stack |
57 | * for this calculation. Only 4 bytes are needed. | 60 | * for this calculation. Only 4 bytes are needed. |
58 | */ | 61 | */ |
59 | leal (0x1e4+4)(%esi), %esp | 62 | leal (BP_scratch+4)(%esi), %esp |
60 | call 1f | 63 | call 1f |
61 | 1: popl %ebp | 64 | 1: popl %ebp |
62 | subl $1b, %ebp | 65 | subl $1b, %ebp |
@@ -70,32 +73,28 @@ ENTRY(startup_32) | |||
70 | testl %eax, %eax | 73 | testl %eax, %eax |
71 | jnz no_longmode | 74 | jnz no_longmode |
72 | 75 | ||
73 | /* Compute the delta between where we were compiled to run at | 76 | /* |
77 | * Compute the delta between where we were compiled to run at | ||
74 | * and where the code will actually run at. | 78 | * and where the code will actually run at. |
75 | */ | 79 | * |
76 | /* %ebp contains the address we are loaded at by the boot loader and %ebx | 80 | * %ebp contains the address we are loaded at by the boot loader and %ebx |
77 | * contains the address where we should move the kernel image temporarily | 81 | * contains the address where we should move the kernel image temporarily |
78 | * for safe in-place decompression. | 82 | * for safe in-place decompression. |
79 | */ | 83 | */ |
80 | 84 | ||
81 | #ifdef CONFIG_RELOCATABLE | 85 | #ifdef CONFIG_RELOCATABLE |
82 | movl %ebp, %ebx | 86 | movl %ebp, %ebx |
83 | addl $(PMD_PAGE_SIZE -1), %ebx | 87 | movl BP_kernel_alignment(%esi), %eax |
84 | andl $PMD_PAGE_MASK, %ebx | 88 | decl %eax |
89 | addl %eax, %ebx | ||
90 | notl %eax | ||
91 | andl %eax, %ebx | ||
85 | #else | 92 | #else |
86 | movl $CONFIG_PHYSICAL_START, %ebx | 93 | movl $LOAD_PHYSICAL_ADDR, %ebx |
87 | #endif | 94 | #endif |
88 | 95 | ||
89 | /* Replace the compressed data size with the uncompressed size */ | 96 | /* Target address to relocate to for decompression */ |
90 | subl input_len(%ebp), %ebx | 97 | addl $z_extract_offset, %ebx |
91 | movl output_len(%ebp), %eax | ||
92 | addl %eax, %ebx | ||
93 | /* Add 8 bytes for every 32K input block */ | ||
94 | shrl $12, %eax | ||
95 | addl %eax, %ebx | ||
96 | /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ | ||
97 | addl $(32768 + 18 + 4095), %ebx | ||
98 | andl $~4095, %ebx | ||
99 | 98 | ||
100 | /* | 99 | /* |
101 | * Prepare for entering 64 bit mode | 100 | * Prepare for entering 64 bit mode |
@@ -114,7 +113,7 @@ ENTRY(startup_32) | |||
114 | /* | 113 | /* |
115 | * Build early 4G boot pagetable | 114 | * Build early 4G boot pagetable |
116 | */ | 115 | */ |
117 | /* Initialize Page tables to 0*/ | 116 | /* Initialize Page tables to 0 */ |
118 | leal pgtable(%ebx), %edi | 117 | leal pgtable(%ebx), %edi |
119 | xorl %eax, %eax | 118 | xorl %eax, %eax |
120 | movl $((4096*6)/4), %ecx | 119 | movl $((4096*6)/4), %ecx |
@@ -155,7 +154,8 @@ ENTRY(startup_32) | |||
155 | btsl $_EFER_LME, %eax | 154 | btsl $_EFER_LME, %eax |
156 | wrmsr | 155 | wrmsr |
157 | 156 | ||
158 | /* Setup for the jump to 64bit mode | 157 | /* |
158 | * Setup for the jump to 64bit mode | ||
159 | * | 159 | * |
160 | * When the jump is performend we will be in long mode but | 160 | * When the jump is performend we will be in long mode but |
161 | * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 | 161 | * in 32bit compatibility mode with EFER.LME = 1, CS.L = 0, CS.D = 1 |
@@ -184,7 +184,8 @@ no_longmode: | |||
184 | 184 | ||
185 | #include "../../kernel/verify_cpu_64.S" | 185 | #include "../../kernel/verify_cpu_64.S" |
186 | 186 | ||
187 | /* Be careful here startup_64 needs to be at a predictable | 187 | /* |
188 | * Be careful here startup_64 needs to be at a predictable | ||
188 | * address so I can export it in an ELF header. Bootloaders | 189 | * address so I can export it in an ELF header. Bootloaders |
189 | * should look at the ELF header to find this address, as | 190 | * should look at the ELF header to find this address, as |
190 | * it may change in the future. | 191 | * it may change in the future. |
@@ -192,7 +193,8 @@ no_longmode: | |||
192 | .code64 | 193 | .code64 |
193 | .org 0x200 | 194 | .org 0x200 |
194 | ENTRY(startup_64) | 195 | ENTRY(startup_64) |
195 | /* We come here either from startup_32 or directly from a | 196 | /* |
197 | * We come here either from startup_32 or directly from a | ||
196 | * 64bit bootloader. If we come here from a bootloader we depend on | 198 | * 64bit bootloader. If we come here from a bootloader we depend on |
197 | * an identity mapped page table being provied that maps our | 199 | * an identity mapped page table being provied that maps our |
198 | * entire text+data+bss and hopefully all of memory. | 200 | * entire text+data+bss and hopefully all of memory. |
@@ -209,50 +211,54 @@ ENTRY(startup_64) | |||
209 | movl $0x20, %eax | 211 | movl $0x20, %eax |
210 | ltr %ax | 212 | ltr %ax |
211 | 213 | ||
212 | /* Compute the decompressed kernel start address. It is where | 214 | /* |
215 | * Compute the decompressed kernel start address. It is where | ||
213 | * we were loaded at aligned to a 2M boundary. %rbp contains the | 216 | * we were loaded at aligned to a 2M boundary. %rbp contains the |
214 | * decompressed kernel start address. | 217 | * decompressed kernel start address. |
215 | * | 218 | * |
216 | * If it is a relocatable kernel then decompress and run the kernel | 219 | * If it is a relocatable kernel then decompress and run the kernel |
217 | * from load address aligned to 2MB addr, otherwise decompress and | 220 | * from load address aligned to 2MB addr, otherwise decompress and |
218 | * run the kernel from CONFIG_PHYSICAL_START | 221 | * run the kernel from LOAD_PHYSICAL_ADDR |
222 | * | ||
223 | * We cannot rely on the calculation done in 32-bit mode, since we | ||
224 | * may have been invoked via the 64-bit entry point. | ||
219 | */ | 225 | */ |
220 | 226 | ||
221 | /* Start with the delta to where the kernel will run at. */ | 227 | /* Start with the delta to where the kernel will run at. */ |
222 | #ifdef CONFIG_RELOCATABLE | 228 | #ifdef CONFIG_RELOCATABLE |
223 | leaq startup_32(%rip) /* - $startup_32 */, %rbp | 229 | leaq startup_32(%rip) /* - $startup_32 */, %rbp |
224 | addq $(PMD_PAGE_SIZE - 1), %rbp | 230 | movl BP_kernel_alignment(%rsi), %eax |
225 | andq $PMD_PAGE_MASK, %rbp | 231 | decl %eax |
226 | movq %rbp, %rbx | 232 | addq %rax, %rbp |
233 | notq %rax | ||
234 | andq %rax, %rbp | ||
227 | #else | 235 | #else |
228 | movq $CONFIG_PHYSICAL_START, %rbp | 236 | movq $LOAD_PHYSICAL_ADDR, %rbp |
229 | movq %rbp, %rbx | ||
230 | #endif | 237 | #endif |
231 | 238 | ||
232 | /* Replace the compressed data size with the uncompressed size */ | 239 | /* Target address to relocate to for decompression */ |
233 | movl input_len(%rip), %eax | 240 | leaq z_extract_offset(%rbp), %rbx |
234 | subq %rax, %rbx | 241 | |
235 | movl output_len(%rip), %eax | 242 | /* Set up the stack */ |
236 | addq %rax, %rbx | 243 | leaq boot_stack_end(%rbx), %rsp |
237 | /* Add 8 bytes for every 32K input block */ | 244 | |
238 | shrq $12, %rax | 245 | /* Zero EFLAGS */ |
239 | addq %rax, %rbx | 246 | pushq $0 |
240 | /* Add 32K + 18 bytes of extra slack and align on a 4K boundary */ | 247 | popfq |
241 | addq $(32768 + 18 + 4095), %rbx | 248 | |
242 | andq $~4095, %rbx | 249 | /* |
243 | 250 | * Copy the compressed kernel to the end of our buffer | |
244 | /* Copy the compressed kernel to the end of our buffer | ||
245 | * where decompression in place becomes safe. | 251 | * where decompression in place becomes safe. |
246 | */ | 252 | */ |
247 | leaq _end_before_pgt(%rip), %r8 | 253 | pushq %rsi |
248 | leaq _end_before_pgt(%rbx), %r9 | 254 | leaq (_bss-8)(%rip), %rsi |
249 | movq $_end_before_pgt /* - $startup_32 */, %rcx | 255 | leaq (_bss-8)(%rbx), %rdi |
250 | 1: subq $8, %r8 | 256 | movq $_bss /* - $startup_32 */, %rcx |
251 | subq $8, %r9 | 257 | shrq $3, %rcx |
252 | movq 0(%r8), %rax | 258 | std |
253 | movq %rax, 0(%r9) | 259 | rep movsq |
254 | subq $8, %rcx | 260 | cld |
255 | jnz 1b | 261 | popq %rsi |
256 | 262 | ||
257 | /* | 263 | /* |
258 | * Jump to the relocated address. | 264 | * Jump to the relocated address. |
@@ -260,37 +266,28 @@ ENTRY(startup_64) | |||
260 | leaq relocated(%rbx), %rax | 266 | leaq relocated(%rbx), %rax |
261 | jmp *%rax | 267 | jmp *%rax |
262 | 268 | ||
263 | .section ".text" | 269 | .text |
264 | relocated: | 270 | relocated: |
265 | 271 | ||
266 | /* | 272 | /* |
267 | * Clear BSS | 273 | * Clear BSS (stack is currently empty) |
268 | */ | 274 | */ |
269 | xorq %rax, %rax | 275 | xorl %eax, %eax |
270 | leaq _edata(%rbx), %rdi | 276 | leaq _bss(%rip), %rdi |
271 | leaq _end_before_pgt(%rbx), %rcx | 277 | leaq _ebss(%rip), %rcx |
272 | subq %rdi, %rcx | 278 | subq %rdi, %rcx |
273 | cld | 279 | shrq $3, %rcx |
274 | rep | 280 | rep stosq |
275 | stosb | ||
276 | |||
277 | /* Setup the stack */ | ||
278 | leaq boot_stack_end(%rip), %rsp | ||
279 | |||
280 | /* zero EFLAGS after setting rsp */ | ||
281 | pushq $0 | ||
282 | popfq | ||
283 | 281 | ||
284 | /* | 282 | /* |
285 | * Do the decompression, and jump to the new kernel.. | 283 | * Do the decompression, and jump to the new kernel.. |
286 | */ | 284 | */ |
287 | pushq %rsi # Save the real mode argument | 285 | pushq %rsi /* Save the real mode argument */ |
288 | movq %rsi, %rdi # real mode address | 286 | movq %rsi, %rdi /* real mode address */ |
289 | leaq boot_heap(%rip), %rsi # malloc area for uncompression | 287 | leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ |
290 | leaq input_data(%rip), %rdx # input_data | 288 | leaq input_data(%rip), %rdx /* input_data */ |
291 | movl input_len(%rip), %eax | 289 | movl $z_input_len, %ecx /* input_len */ |
292 | movq %rax, %rcx # input_len | 290 | movq %rbp, %r8 /* output target address */ |
293 | movq %rbp, %r8 # output | ||
294 | call decompress_kernel | 291 | call decompress_kernel |
295 | popq %rsi | 292 | popq %rsi |
296 | 293 | ||
@@ -311,11 +308,21 @@ gdt: | |||
311 | .quad 0x0000000000000000 /* TS continued */ | 308 | .quad 0x0000000000000000 /* TS continued */ |
312 | gdt_end: | 309 | gdt_end: |
313 | 310 | ||
314 | .bss | 311 | /* |
315 | /* Stack and heap for uncompression */ | 312 | * Stack and heap for uncompression |
316 | .balign 4 | 313 | */ |
314 | .bss | ||
315 | .balign 4 | ||
317 | boot_heap: | 316 | boot_heap: |
318 | .fill BOOT_HEAP_SIZE, 1, 0 | 317 | .fill BOOT_HEAP_SIZE, 1, 0 |
319 | boot_stack: | 318 | boot_stack: |
320 | .fill BOOT_STACK_SIZE, 1, 0 | 319 | .fill BOOT_STACK_SIZE, 1, 0 |
321 | boot_stack_end: | 320 | boot_stack_end: |
321 | |||
322 | /* | ||
323 | * Space for page tables (not in .bss so not zeroed) | ||
324 | */ | ||
325 | .section ".pgtable","a",@nobits | ||
326 | .balign 4096 | ||
327 | pgtable: | ||
328 | .fill 6*4096, 1, 0 | ||
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index e45be73684ff..842b2a36174a 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -325,21 +325,19 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
325 | free_mem_ptr = heap; /* Heap */ | 325 | free_mem_ptr = heap; /* Heap */ |
326 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 326 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
327 | 327 | ||
328 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) | ||
329 | error("Destination address inappropriately aligned"); | ||
328 | #ifdef CONFIG_X86_64 | 330 | #ifdef CONFIG_X86_64 |
329 | if ((unsigned long)output & (__KERNEL_ALIGN - 1)) | 331 | if (heap > 0x3fffffffffffUL) |
330 | error("Destination address not 2M aligned"); | ||
331 | if ((unsigned long)output >= 0xffffffffffUL) | ||
332 | error("Destination address too large"); | 332 | error("Destination address too large"); |
333 | #else | 333 | #else |
334 | if ((u32)output & (CONFIG_PHYSICAL_ALIGN - 1)) | ||
335 | error("Destination address not CONFIG_PHYSICAL_ALIGN aligned"); | ||
336 | if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) | 334 | if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) |
337 | error("Destination address too large"); | 335 | error("Destination address too large"); |
336 | #endif | ||
338 | #ifndef CONFIG_RELOCATABLE | 337 | #ifndef CONFIG_RELOCATABLE |
339 | if ((u32)output != LOAD_PHYSICAL_ADDR) | 338 | if ((unsigned long)output != LOAD_PHYSICAL_ADDR) |
340 | error("Wrong destination address"); | 339 | error("Wrong destination address"); |
341 | #endif | 340 | #endif |
342 | #endif | ||
343 | 341 | ||
344 | if (!quiet) | 342 | if (!quiet) |
345 | putstr("\nDecompressing Linux... "); | 343 | putstr("\nDecompressing Linux... "); |
diff --git a/arch/x86/boot/compressed/mkpiggy.c b/arch/x86/boot/compressed/mkpiggy.c new file mode 100644 index 000000000000..bcbd36c41432 --- /dev/null +++ b/arch/x86/boot/compressed/mkpiggy.c | |||
@@ -0,0 +1,97 @@ | |||
1 | /* ----------------------------------------------------------------------- * | ||
2 | * | ||
3 | * Copyright (C) 2009 Intel Corporation. All rights reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License version | ||
7 | * 2 as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it will be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write to the Free Software | ||
16 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
17 | * 02110-1301, USA. | ||
18 | * | ||
19 | * H. Peter Anvin <hpa@linux.intel.com> | ||
20 | * | ||
21 | * ----------------------------------------------------------------------- */ | ||
22 | |||
23 | /* | ||
24 | * Compute the desired load offset from a compressed program; outputs | ||
25 | * a small assembly wrapper with the appropriate symbols defined. | ||
26 | */ | ||
27 | |||
28 | #include <stdlib.h> | ||
29 | #include <stdio.h> | ||
30 | #include <string.h> | ||
31 | #include <inttypes.h> | ||
32 | |||
33 | static uint32_t getle32(const void *p) | ||
34 | { | ||
35 | const uint8_t *cp = p; | ||
36 | |||
37 | return (uint32_t)cp[0] + ((uint32_t)cp[1] << 8) + | ||
38 | ((uint32_t)cp[2] << 16) + ((uint32_t)cp[3] << 24); | ||
39 | } | ||
40 | |||
41 | int main(int argc, char *argv[]) | ||
42 | { | ||
43 | uint32_t olen; | ||
44 | long ilen; | ||
45 | unsigned long offs; | ||
46 | FILE *f; | ||
47 | |||
48 | if (argc < 2) { | ||
49 | fprintf(stderr, "Usage: %s compressed_file\n", argv[0]); | ||
50 | return 1; | ||
51 | } | ||
52 | |||
53 | /* Get the information for the compressed kernel image first */ | ||
54 | |||
55 | f = fopen(argv[1], "r"); | ||
56 | if (!f) { | ||
57 | perror(argv[1]); | ||
58 | return 1; | ||
59 | } | ||
60 | |||
61 | |||
62 | if (fseek(f, -4L, SEEK_END)) { | ||
63 | perror(argv[1]); | ||
64 | } | ||
65 | fread(&olen, sizeof olen, 1, f); | ||
66 | ilen = ftell(f); | ||
67 | olen = getle32(&olen); | ||
68 | fclose(f); | ||
69 | |||
70 | /* | ||
71 | * Now we have the input (compressed) and output (uncompressed) | ||
72 | * sizes, compute the necessary decompression offset... | ||
73 | */ | ||
74 | |||
75 | offs = (olen > ilen) ? olen - ilen : 0; | ||
76 | offs += olen >> 12; /* Add 8 bytes for each 32K block */ | ||
77 | offs += 32*1024 + 18; /* Add 32K + 18 bytes slack */ | ||
78 | offs = (offs+4095) & ~4095; /* Round to a 4K boundary */ | ||
79 | |||
80 | printf(".section \".rodata.compressed\",\"a\",@progbits\n"); | ||
81 | printf(".globl z_input_len\n"); | ||
82 | printf("z_input_len = %lu\n", ilen); | ||
83 | printf(".globl z_output_len\n"); | ||
84 | printf("z_output_len = %lu\n", (unsigned long)olen); | ||
85 | printf(".globl z_extract_offset\n"); | ||
86 | printf("z_extract_offset = 0x%lx\n", offs); | ||
87 | /* z_extract_offset_negative allows simplification of head_32.S */ | ||
88 | printf(".globl z_extract_offset_negative\n"); | ||
89 | printf("z_extract_offset_negative = -0x%lx\n", offs); | ||
90 | |||
91 | printf(".globl input_data, input_data_end\n"); | ||
92 | printf("input_data:\n"); | ||
93 | printf(".incbin \"%s\"\n", argv[1]); | ||
94 | printf("input_data_end:\n"); | ||
95 | |||
96 | return 0; | ||
97 | } | ||
diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux.lds.S index bef1ac891bce..cc353e1b3ffd 100644 --- a/arch/x86/boot/compressed/vmlinux_64.lds +++ b/arch/x86/boot/compressed/vmlinux.lds.S | |||
@@ -1,6 +1,17 @@ | |||
1 | OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") | 1 | OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) |
2 | |||
3 | #undef i386 | ||
4 | |||
5 | #include <asm/page_types.h> | ||
6 | |||
7 | #ifdef CONFIG_X86_64 | ||
2 | OUTPUT_ARCH(i386:x86-64) | 8 | OUTPUT_ARCH(i386:x86-64) |
3 | ENTRY(startup_64) | 9 | ENTRY(startup_64) |
10 | #else | ||
11 | OUTPUT_ARCH(i386) | ||
12 | ENTRY(startup_32) | ||
13 | #endif | ||
14 | |||
4 | SECTIONS | 15 | SECTIONS |
5 | { | 16 | { |
6 | /* Be careful parts of head_64.S assume startup_32 is at | 17 | /* Be careful parts of head_64.S assume startup_32 is at |
@@ -33,16 +44,22 @@ SECTIONS | |||
33 | *(.data.*) | 44 | *(.data.*) |
34 | _edata = . ; | 45 | _edata = . ; |
35 | } | 46 | } |
47 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
36 | .bss : { | 48 | .bss : { |
37 | _bss = . ; | 49 | _bss = . ; |
38 | *(.bss) | 50 | *(.bss) |
39 | *(.bss.*) | 51 | *(.bss.*) |
40 | *(COMMON) | 52 | *(COMMON) |
41 | . = ALIGN(8); | 53 | . = ALIGN(8); /* For convenience during zeroing */ |
42 | _end_before_pgt = . ; | ||
43 | . = ALIGN(4096); | ||
44 | pgtable = . ; | ||
45 | . = . + 4096 * 6; | ||
46 | _ebss = .; | 54 | _ebss = .; |
47 | } | 55 | } |
56 | #ifdef CONFIG_X86_64 | ||
57 | . = ALIGN(PAGE_SIZE); | ||
58 | .pgtable : { | ||
59 | _pgtable = . ; | ||
60 | *(.pgtable) | ||
61 | _epgtable = . ; | ||
62 | } | ||
63 | #endif | ||
64 | _end = .; | ||
48 | } | 65 | } |
diff --git a/arch/x86/boot/compressed/vmlinux.scr b/arch/x86/boot/compressed/vmlinux.scr deleted file mode 100644 index f02382ae5c48..000000000000 --- a/arch/x86/boot/compressed/vmlinux.scr +++ /dev/null | |||
@@ -1,10 +0,0 @@ | |||
1 | SECTIONS | ||
2 | { | ||
3 | .rodata.compressed : { | ||
4 | input_len = .; | ||
5 | LONG(input_data_end - input_data) input_data = .; | ||
6 | *(.data) | ||
7 | output_len = . - 4; | ||
8 | input_data_end = .; | ||
9 | } | ||
10 | } | ||
diff --git a/arch/x86/boot/compressed/vmlinux_32.lds b/arch/x86/boot/compressed/vmlinux_32.lds deleted file mode 100644 index bb3c48379c40..000000000000 --- a/arch/x86/boot/compressed/vmlinux_32.lds +++ /dev/null | |||
@@ -1,43 +0,0 @@ | |||
1 | OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") | ||
2 | OUTPUT_ARCH(i386) | ||
3 | ENTRY(startup_32) | ||
4 | SECTIONS | ||
5 | { | ||
6 | /* Be careful parts of head_32.S assume startup_32 is at | ||
7 | * address 0. | ||
8 | */ | ||
9 | . = 0; | ||
10 | .text.head : { | ||
11 | _head = . ; | ||
12 | *(.text.head) | ||
13 | _ehead = . ; | ||
14 | } | ||
15 | .rodata.compressed : { | ||
16 | *(.rodata.compressed) | ||
17 | } | ||
18 | .text : { | ||
19 | _text = .; /* Text */ | ||
20 | *(.text) | ||
21 | *(.text.*) | ||
22 | _etext = . ; | ||
23 | } | ||
24 | .rodata : { | ||
25 | _rodata = . ; | ||
26 | *(.rodata) /* read-only data */ | ||
27 | *(.rodata.*) | ||
28 | _erodata = . ; | ||
29 | } | ||
30 | .data : { | ||
31 | _data = . ; | ||
32 | *(.data) | ||
33 | *(.data.*) | ||
34 | _edata = . ; | ||
35 | } | ||
36 | .bss : { | ||
37 | _bss = . ; | ||
38 | *(.bss) | ||
39 | *(.bss.*) | ||
40 | *(COMMON) | ||
41 | _end = . ; | ||
42 | } | ||
43 | } | ||
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 1aae8f3e5ca1..c501a5b466f8 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -22,17 +23,17 @@ | |||
22 | */ | 23 | */ |
23 | static int read_mbr(u8 devno, void *buf) | 24 | static int read_mbr(u8 devno, void *buf) |
24 | { | 25 | { |
25 | u16 ax, bx, cx, dx; | 26 | struct biosregs ireg, oreg; |
26 | 27 | ||
27 | ax = 0x0201; /* Legacy Read, one sector */ | 28 | initregs(&ireg); |
28 | cx = 0x0001; /* Sector 0-0-1 */ | 29 | ireg.ax = 0x0201; /* Legacy Read, one sector */ |
29 | dx = devno; | 30 | ireg.cx = 0x0001; /* Sector 0-0-1 */ |
30 | bx = (size_t)buf; | 31 | ireg.dl = devno; |
31 | asm volatile("pushfl; stc; int $0x13; setc %%al; popfl" | 32 | ireg.bx = (size_t)buf; |
32 | : "+a" (ax), "+c" (cx), "+d" (dx), "+b" (bx) | ||
33 | : : "esi", "edi", "memory"); | ||
34 | 33 | ||
35 | return -(u8)ax; /* 0 or -1 */ | 34 | intcall(0x13, &ireg, &oreg); |
35 | |||
36 | return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ | ||
36 | } | 37 | } |
37 | 38 | ||
38 | static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) | 39 | static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) |
@@ -72,56 +73,46 @@ static u32 read_mbr_sig(u8 devno, struct edd_info *ei, u32 *mbrsig) | |||
72 | 73 | ||
73 | static int get_edd_info(u8 devno, struct edd_info *ei) | 74 | static int get_edd_info(u8 devno, struct edd_info *ei) |
74 | { | 75 | { |
75 | u16 ax, bx, cx, dx, di; | 76 | struct biosregs ireg, oreg; |
76 | 77 | ||
77 | memset(ei, 0, sizeof *ei); | 78 | memset(ei, 0, sizeof *ei); |
78 | 79 | ||
79 | /* Check Extensions Present */ | 80 | /* Check Extensions Present */ |
80 | 81 | ||
81 | ax = 0x4100; | 82 | initregs(&ireg); |
82 | bx = EDDMAGIC1; | 83 | ireg.ah = 0x41; |
83 | dx = devno; | 84 | ireg.bx = EDDMAGIC1; |
84 | asm("pushfl; stc; int $0x13; setc %%al; popfl" | 85 | ireg.dl = devno; |
85 | : "+a" (ax), "+b" (bx), "=c" (cx), "+d" (dx) | 86 | intcall(0x13, &ireg, &oreg); |
86 | : : "esi", "edi"); | ||
87 | 87 | ||
88 | if ((u8)ax) | 88 | if (oreg.eflags & X86_EFLAGS_CF) |
89 | return -1; /* No extended information */ | 89 | return -1; /* No extended information */ |
90 | 90 | ||
91 | if (bx != EDDMAGIC2) | 91 | if (oreg.bx != EDDMAGIC2) |
92 | return -1; | 92 | return -1; |
93 | 93 | ||
94 | ei->device = devno; | 94 | ei->device = devno; |
95 | ei->version = ax >> 8; /* EDD version number */ | 95 | ei->version = oreg.ah; /* EDD version number */ |
96 | ei->interface_support = cx; /* EDD functionality subsets */ | 96 | ei->interface_support = oreg.cx; /* EDD functionality subsets */ |
97 | 97 | ||
98 | /* Extended Get Device Parameters */ | 98 | /* Extended Get Device Parameters */ |
99 | 99 | ||
100 | ei->params.length = sizeof(ei->params); | 100 | ei->params.length = sizeof(ei->params); |
101 | ax = 0x4800; | 101 | ireg.ah = 0x48; |
102 | dx = devno; | 102 | ireg.si = (size_t)&ei->params; |
103 | asm("pushfl; int $0x13; popfl" | 103 | intcall(0x13, &ireg, &oreg); |
104 | : "+a" (ax), "+d" (dx), "=m" (ei->params) | ||
105 | : "S" (&ei->params) | ||
106 | : "ebx", "ecx", "edi"); | ||
107 | 104 | ||
108 | /* Get legacy CHS parameters */ | 105 | /* Get legacy CHS parameters */ |
109 | 106 | ||
110 | /* Ralf Brown recommends setting ES:DI to 0:0 */ | 107 | /* Ralf Brown recommends setting ES:DI to 0:0 */ |
111 | ax = 0x0800; | 108 | ireg.ah = 0x08; |
112 | dx = devno; | 109 | ireg.es = 0; |
113 | di = 0; | 110 | intcall(0x13, &ireg, &oreg); |
114 | asm("pushw %%es; " | 111 | |
115 | "movw %%di,%%es; " | 112 | if (!(oreg.eflags & X86_EFLAGS_CF)) { |
116 | "pushfl; stc; int $0x13; setc %%al; popfl; " | 113 | ei->legacy_max_cylinder = oreg.ch + ((oreg.cl & 0xc0) << 2); |
117 | "popw %%es" | 114 | ei->legacy_max_head = oreg.dh; |
118 | : "+a" (ax), "=b" (bx), "=c" (cx), "+d" (dx), "+D" (di) | 115 | ei->legacy_sectors_per_track = oreg.cl & 0x3f; |
119 | : : "esi"); | ||
120 | |||
121 | if ((u8)ax == 0) { | ||
122 | ei->legacy_max_cylinder = (cx >> 8) + ((cx & 0xc0) << 2); | ||
123 | ei->legacy_max_head = dx >> 8; | ||
124 | ei->legacy_sectors_per_track = cx & 0x3f; | ||
125 | } | 116 | } |
126 | 117 | ||
127 | return 0; | 118 | return 0; |
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 5d84d1c74e4c..b31cc54b4641 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -22,7 +22,8 @@ | |||
22 | #include <asm/page_types.h> | 22 | #include <asm/page_types.h> |
23 | #include <asm/setup.h> | 23 | #include <asm/setup.h> |
24 | #include "boot.h" | 24 | #include "boot.h" |
25 | #include "offsets.h" | 25 | #include "voffset.h" |
26 | #include "zoffset.h" | ||
26 | 27 | ||
27 | BOOTSEG = 0x07C0 /* original address of boot-sector */ | 28 | BOOTSEG = 0x07C0 /* original address of boot-sector */ |
28 | SYSSEG = 0x1000 /* historical load address >> 4 */ | 29 | SYSSEG = 0x1000 /* historical load address >> 4 */ |
@@ -115,7 +116,7 @@ _start: | |||
115 | # Part 2 of the header, from the old setup.S | 116 | # Part 2 of the header, from the old setup.S |
116 | 117 | ||
117 | .ascii "HdrS" # header signature | 118 | .ascii "HdrS" # header signature |
118 | .word 0x0209 # header version number (>= 0x0105) | 119 | .word 0x020a # header version number (>= 0x0105) |
119 | # or else old loadlin-1.5 will fail) | 120 | # or else old loadlin-1.5 will fail) |
120 | .globl realmode_swtch | 121 | .globl realmode_swtch |
121 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 122 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
@@ -168,7 +169,11 @@ heap_end_ptr: .word _end+STACK_SIZE-512 | |||
168 | # end of setup code can be used by setup | 169 | # end of setup code can be used by setup |
169 | # for local heap purposes. | 170 | # for local heap purposes. |
170 | 171 | ||
171 | pad1: .word 0 | 172 | ext_loader_ver: |
173 | .byte 0 # Extended boot loader version | ||
174 | ext_loader_type: | ||
175 | .byte 0 # Extended boot loader type | ||
176 | |||
172 | cmd_line_ptr: .long 0 # (Header version 0x0202 or later) | 177 | cmd_line_ptr: .long 0 # (Header version 0x0202 or later) |
173 | # If nonzero, a 32-bit pointer | 178 | # If nonzero, a 32-bit pointer |
174 | # to the kernel command line. | 179 | # to the kernel command line. |
@@ -200,7 +205,7 @@ relocatable_kernel: .byte 1 | |||
200 | #else | 205 | #else |
201 | relocatable_kernel: .byte 0 | 206 | relocatable_kernel: .byte 0 |
202 | #endif | 207 | #endif |
203 | pad2: .byte 0 | 208 | min_alignment: .byte MIN_KERNEL_ALIGN_LG2 # minimum alignment |
204 | pad3: .word 0 | 209 | pad3: .word 0 |
205 | 210 | ||
206 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | 211 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, |
@@ -212,16 +217,27 @@ hardware_subarch: .long 0 # subarchitecture, added with 2.07 | |||
212 | 217 | ||
213 | hardware_subarch_data: .quad 0 | 218 | hardware_subarch_data: .quad 0 |
214 | 219 | ||
215 | payload_offset: .long input_data | 220 | payload_offset: .long ZO_input_data |
216 | payload_length: .long input_data_end-input_data | 221 | payload_length: .long ZO_z_input_len |
217 | 222 | ||
218 | setup_data: .quad 0 # 64-bit physical pointer to | 223 | setup_data: .quad 0 # 64-bit physical pointer to |
219 | # single linked list of | 224 | # single linked list of |
220 | # struct setup_data | 225 | # struct setup_data |
221 | 226 | ||
227 | pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr | ||
228 | |||
229 | #define ZO_INIT_SIZE (ZO__end - ZO_startup_32 + ZO_z_extract_offset) | ||
230 | #define VO_INIT_SIZE (VO__end - VO__text) | ||
231 | #if ZO_INIT_SIZE > VO_INIT_SIZE | ||
232 | #define INIT_SIZE ZO_INIT_SIZE | ||
233 | #else | ||
234 | #define INIT_SIZE VO_INIT_SIZE | ||
235 | #endif | ||
236 | init_size: .long INIT_SIZE # kernel initialization size | ||
237 | |||
222 | # End of setup header ##################################################### | 238 | # End of setup header ##################################################### |
223 | 239 | ||
224 | .section ".inittext", "ax" | 240 | .section ".entrytext", "ax" |
225 | start_of_setup: | 241 | start_of_setup: |
226 | #ifdef SAFE_RESET_DISK_CONTROLLER | 242 | #ifdef SAFE_RESET_DISK_CONTROLLER |
227 | # Reset the disk controller. | 243 | # Reset the disk controller. |
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 58f0415d3ae0..140172b895bd 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -61,11 +62,10 @@ static void copy_boot_params(void) | |||
61 | */ | 62 | */ |
62 | static void keyboard_set_repeat(void) | 63 | static void keyboard_set_repeat(void) |
63 | { | 64 | { |
64 | u16 ax = 0x0305; | 65 | struct biosregs ireg; |
65 | u16 bx = 0; | 66 | initregs(&ireg); |
66 | asm volatile("int $0x16" | 67 | ireg.ax = 0x0305; |
67 | : "+a" (ax), "+b" (bx) | 68 | intcall(0x16, &ireg, NULL); |
68 | : : "ecx", "edx", "esi", "edi"); | ||
69 | } | 69 | } |
70 | 70 | ||
71 | /* | 71 | /* |
@@ -73,18 +73,22 @@ static void keyboard_set_repeat(void) | |||
73 | */ | 73 | */ |
74 | static void query_ist(void) | 74 | static void query_ist(void) |
75 | { | 75 | { |
76 | struct biosregs ireg, oreg; | ||
77 | |||
76 | /* Some older BIOSes apparently crash on this call, so filter | 78 | /* Some older BIOSes apparently crash on this call, so filter |
77 | it from machines too old to have SpeedStep at all. */ | 79 | it from machines too old to have SpeedStep at all. */ |
78 | if (cpu.level < 6) | 80 | if (cpu.level < 6) |
79 | return; | 81 | return; |
80 | 82 | ||
81 | asm("int $0x15" | 83 | initregs(&ireg); |
82 | : "=a" (boot_params.ist_info.signature), | 84 | ireg.ax = 0xe980; /* IST Support */ |
83 | "=b" (boot_params.ist_info.command), | 85 | ireg.edx = 0x47534943; /* Request value */ |
84 | "=c" (boot_params.ist_info.event), | 86 | intcall(0x15, &ireg, &oreg); |
85 | "=d" (boot_params.ist_info.perf_level) | 87 | |
86 | : "a" (0x0000e980), /* IST Support */ | 88 | boot_params.ist_info.signature = oreg.eax; |
87 | "d" (0x47534943)); /* Request value */ | 89 | boot_params.ist_info.command = oreg.ebx; |
90 | boot_params.ist_info.event = oreg.ecx; | ||
91 | boot_params.ist_info.perf_level = oreg.edx; | ||
88 | } | 92 | } |
89 | 93 | ||
90 | /* | 94 | /* |
@@ -93,13 +97,12 @@ static void query_ist(void) | |||
93 | static void set_bios_mode(void) | 97 | static void set_bios_mode(void) |
94 | { | 98 | { |
95 | #ifdef CONFIG_X86_64 | 99 | #ifdef CONFIG_X86_64 |
96 | u32 eax, ebx; | 100 | struct biosregs ireg; |
97 | 101 | ||
98 | eax = 0xec00; | 102 | initregs(&ireg); |
99 | ebx = 2; | 103 | ireg.ax = 0xec00; |
100 | asm volatile("int $0x15" | 104 | ireg.bx = 2; |
101 | : "+a" (eax), "+b" (ebx) | 105 | intcall(0x15, &ireg, NULL); |
102 | : : "ecx", "edx", "esi", "edi"); | ||
103 | #endif | 106 | #endif |
104 | } | 107 | } |
105 | 108 | ||
diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c index 911eaae5d696..a95a531148ef 100644 --- a/arch/x86/boot/mca.c +++ b/arch/x86/boot/mca.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -16,26 +17,22 @@ | |||
16 | 17 | ||
17 | int query_mca(void) | 18 | int query_mca(void) |
18 | { | 19 | { |
19 | u8 err; | 20 | struct biosregs ireg, oreg; |
20 | u16 es, bx, len; | 21 | u16 len; |
21 | 22 | ||
22 | asm("pushw %%es ; " | 23 | initregs(&ireg); |
23 | "int $0x15 ; " | 24 | ireg.ah = 0xc0; |
24 | "setc %0 ; " | 25 | intcall(0x15, &ireg, &oreg); |
25 | "movw %%es, %1 ; " | 26 | |
26 | "popw %%es" | 27 | if (oreg.eflags & X86_EFLAGS_CF) |
27 | : "=acd" (err), "=acdSD" (es), "=b" (bx) | ||
28 | : "a" (0xc000)); | ||
29 | |||
30 | if (err) | ||
31 | return -1; /* No MCA present */ | 28 | return -1; /* No MCA present */ |
32 | 29 | ||
33 | set_fs(es); | 30 | set_fs(oreg.es); |
34 | len = rdfs16(bx); | 31 | len = rdfs16(oreg.bx); |
35 | 32 | ||
36 | if (len > sizeof(boot_params.sys_desc_table)) | 33 | if (len > sizeof(boot_params.sys_desc_table)) |
37 | len = sizeof(boot_params.sys_desc_table); | 34 | len = sizeof(boot_params.sys_desc_table); |
38 | 35 | ||
39 | copy_from_fs(&boot_params.sys_desc_table, bx, len); | 36 | copy_from_fs(&boot_params.sys_desc_table, oreg.bx, len); |
40 | return 0; | 37 | return 0; |
41 | } | 38 | } |
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 74b3d2ba84e9..cae3feb1035e 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c | |||
@@ -20,12 +20,16 @@ | |||
20 | static int detect_memory_e820(void) | 20 | static int detect_memory_e820(void) |
21 | { | 21 | { |
22 | int count = 0; | 22 | int count = 0; |
23 | u32 next = 0; | 23 | struct biosregs ireg, oreg; |
24 | u32 size, id, edi; | ||
25 | u8 err; | ||
26 | struct e820entry *desc = boot_params.e820_map; | 24 | struct e820entry *desc = boot_params.e820_map; |
27 | static struct e820entry buf; /* static so it is zeroed */ | 25 | static struct e820entry buf; /* static so it is zeroed */ |
28 | 26 | ||
27 | initregs(&ireg); | ||
28 | ireg.ax = 0xe820; | ||
29 | ireg.cx = sizeof buf; | ||
30 | ireg.edx = SMAP; | ||
31 | ireg.di = (size_t)&buf; | ||
32 | |||
29 | /* | 33 | /* |
30 | * Note: at least one BIOS is known which assumes that the | 34 | * Note: at least one BIOS is known which assumes that the |
31 | * buffer pointed to by one e820 call is the same one as | 35 | * buffer pointed to by one e820 call is the same one as |
@@ -41,22 +45,13 @@ static int detect_memory_e820(void) | |||
41 | */ | 45 | */ |
42 | 46 | ||
43 | do { | 47 | do { |
44 | size = sizeof buf; | 48 | intcall(0x15, &ireg, &oreg); |
45 | 49 | ireg.ebx = oreg.ebx; /* for next iteration... */ | |
46 | /* Important: %edx and %esi are clobbered by some BIOSes, | ||
47 | so they must be either used for the error output | ||
48 | or explicitly marked clobbered. Given that, assume there | ||
49 | is something out there clobbering %ebp and %edi, too. */ | ||
50 | asm("pushl %%ebp; int $0x15; popl %%ebp; setc %0" | ||
51 | : "=d" (err), "+b" (next), "=a" (id), "+c" (size), | ||
52 | "=D" (edi), "+m" (buf) | ||
53 | : "D" (&buf), "d" (SMAP), "a" (0xe820) | ||
54 | : "esi"); | ||
55 | 50 | ||
56 | /* BIOSes which terminate the chain with CF = 1 as opposed | 51 | /* BIOSes which terminate the chain with CF = 1 as opposed |
57 | to %ebx = 0 don't always report the SMAP signature on | 52 | to %ebx = 0 don't always report the SMAP signature on |
58 | the final, failing, probe. */ | 53 | the final, failing, probe. */ |
59 | if (err) | 54 | if (oreg.eflags & X86_EFLAGS_CF) |
60 | break; | 55 | break; |
61 | 56 | ||
62 | /* Some BIOSes stop returning SMAP in the middle of | 57 | /* Some BIOSes stop returning SMAP in the middle of |
@@ -64,60 +59,64 @@ static int detect_memory_e820(void) | |||
64 | screwed up the map at that point, we might have a | 59 | screwed up the map at that point, we might have a |
65 | partial map, the full map, or complete garbage, so | 60 | partial map, the full map, or complete garbage, so |
66 | just return failure. */ | 61 | just return failure. */ |
67 | if (id != SMAP) { | 62 | if (oreg.eax != SMAP) { |
68 | count = 0; | 63 | count = 0; |
69 | break; | 64 | break; |
70 | } | 65 | } |
71 | 66 | ||
72 | *desc++ = buf; | 67 | *desc++ = buf; |
73 | count++; | 68 | count++; |
74 | } while (next && count < ARRAY_SIZE(boot_params.e820_map)); | 69 | } while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map)); |
75 | 70 | ||
76 | return boot_params.e820_entries = count; | 71 | return boot_params.e820_entries = count; |
77 | } | 72 | } |
78 | 73 | ||
79 | static int detect_memory_e801(void) | 74 | static int detect_memory_e801(void) |
80 | { | 75 | { |
81 | u16 ax, bx, cx, dx; | 76 | struct biosregs ireg, oreg; |
82 | u8 err; | ||
83 | 77 | ||
84 | bx = cx = dx = 0; | 78 | initregs(&ireg); |
85 | ax = 0xe801; | 79 | ireg.ax = 0xe801; |
86 | asm("stc; int $0x15; setc %0" | 80 | intcall(0x15, &ireg, &oreg); |
87 | : "=m" (err), "+a" (ax), "+b" (bx), "+c" (cx), "+d" (dx)); | ||
88 | 81 | ||
89 | if (err) | 82 | if (oreg.eflags & X86_EFLAGS_CF) |
90 | return -1; | 83 | return -1; |
91 | 84 | ||
92 | /* Do we really need to do this? */ | 85 | /* Do we really need to do this? */ |
93 | if (cx || dx) { | 86 | if (oreg.cx || oreg.dx) { |
94 | ax = cx; | 87 | oreg.ax = oreg.cx; |
95 | bx = dx; | 88 | oreg.bx = oreg.dx; |
96 | } | 89 | } |
97 | 90 | ||
98 | if (ax > 15*1024) | 91 | if (oreg.ax > 15*1024) { |
99 | return -1; /* Bogus! */ | 92 | return -1; /* Bogus! */ |
100 | 93 | } else if (oreg.ax == 15*1024) { | |
101 | /* This ignores memory above 16MB if we have a memory hole | 94 | boot_params.alt_mem_k = (oreg.dx << 6) + oreg.ax; |
102 | there. If someone actually finds a machine with a memory | 95 | } else { |
103 | hole at 16MB and no support for 0E820h they should probably | 96 | /* |
104 | generate a fake e820 map. */ | 97 | * This ignores memory above 16MB if we have a memory |
105 | boot_params.alt_mem_k = (ax == 15*1024) ? (dx << 6)+ax : ax; | 98 | * hole there. If someone actually finds a machine |
99 | * with a memory hole at 16MB and no support for | ||
100 | * 0E820h they should probably generate a fake e820 | ||
101 | * map. | ||
102 | */ | ||
103 | boot_params.alt_mem_k = oreg.ax; | ||
104 | } | ||
106 | 105 | ||
107 | return 0; | 106 | return 0; |
108 | } | 107 | } |
109 | 108 | ||
110 | static int detect_memory_88(void) | 109 | static int detect_memory_88(void) |
111 | { | 110 | { |
112 | u16 ax; | 111 | struct biosregs ireg, oreg; |
113 | u8 err; | ||
114 | 112 | ||
115 | ax = 0x8800; | 113 | initregs(&ireg); |
116 | asm("stc; int $0x15; setc %0" : "=bcdm" (err), "+a" (ax)); | 114 | ireg.ah = 0x88; |
115 | intcall(0x15, &ireg, &oreg); | ||
117 | 116 | ||
118 | boot_params.screen_info.ext_mem_k = ax; | 117 | boot_params.screen_info.ext_mem_k = oreg.ax; |
119 | 118 | ||
120 | return -err; | 119 | return -(oreg.eflags & X86_EFLAGS_CF); /* 0 or -1 */ |
121 | } | 120 | } |
122 | 121 | ||
123 | int detect_memory(void) | 122 | int detect_memory(void) |
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c new file mode 100644 index 000000000000..958019b1cfa5 --- /dev/null +++ b/arch/x86/boot/regs.c | |||
@@ -0,0 +1,29 @@ | |||
1 | /* ----------------------------------------------------------------------- | ||
2 | * | ||
3 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
4 | * | ||
5 | * This file is part of the Linux kernel, and is made available under | ||
6 | * the terms of the GNU General Public License version 2 or (at your | ||
7 | * option) any later version; incorporated herein by reference. | ||
8 | * | ||
9 | * ----------------------------------------------------------------------- */ | ||
10 | |||
11 | /* | ||
12 | * Simple helper function for initializing a register set. | ||
13 | * | ||
14 | * Note that this sets EFLAGS_CF in the input register set; this | ||
15 | * makes it easier to catch functions which do nothing but don't | ||
16 | * explicitly set CF. | ||
17 | */ | ||
18 | |||
19 | #include "boot.h" | ||
20 | |||
21 | void initregs(struct biosregs *reg) | ||
22 | { | ||
23 | memset(reg, 0, sizeof *reg); | ||
24 | reg->eflags |= X86_EFLAGS_CF; | ||
25 | reg->ds = ds(); | ||
26 | reg->es = ds(); | ||
27 | reg->fs = fs(); | ||
28 | reg->gs = gs(); | ||
29 | } | ||
diff --git a/arch/x86/boot/setup.ld b/arch/x86/boot/setup.ld index bb8dc2de7969..0f6ec455a2b1 100644 --- a/arch/x86/boot/setup.ld +++ b/arch/x86/boot/setup.ld | |||
@@ -15,8 +15,11 @@ SECTIONS | |||
15 | 15 | ||
16 | . = 497; | 16 | . = 497; |
17 | .header : { *(.header) } | 17 | .header : { *(.header) } |
18 | .entrytext : { *(.entrytext) } | ||
18 | .inittext : { *(.inittext) } | 19 | .inittext : { *(.inittext) } |
19 | .initdata : { *(.initdata) } | 20 | .initdata : { *(.initdata) } |
21 | __end_init = .; | ||
22 | |||
20 | .text : { *(.text) } | 23 | .text : { *(.text) } |
21 | .text32 : { *(.text32) } | 24 | .text32 : { *(.text32) } |
22 | 25 | ||
@@ -52,4 +55,7 @@ SECTIONS | |||
52 | 55 | ||
53 | . = ASSERT(_end <= 0x8000, "Setup too big!"); | 56 | . = ASSERT(_end <= 0x8000, "Setup too big!"); |
54 | . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); | 57 | . = ASSERT(hdr == 0x1f1, "The setup header has the wrong offset!"); |
58 | /* Necessary for the very-old-loader check to work... */ | ||
59 | . = ASSERT(__end_init <= 5*512, "init sections too big!"); | ||
60 | |||
55 | } | 61 | } |
diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index 7e8e8b25f5f6..01ec69c901c7 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -22,24 +23,23 @@ | |||
22 | 23 | ||
23 | void __attribute__((section(".inittext"))) putchar(int ch) | 24 | void __attribute__((section(".inittext"))) putchar(int ch) |
24 | { | 25 | { |
25 | unsigned char c = ch; | 26 | struct biosregs ireg; |
26 | 27 | ||
27 | if (c == '\n') | 28 | if (ch == '\n') |
28 | putchar('\r'); /* \n -> \r\n */ | 29 | putchar('\r'); /* \n -> \r\n */ |
29 | 30 | ||
30 | /* int $0x10 is known to have bugs involving touching registers | 31 | initregs(&ireg); |
31 | it shouldn't. Be extra conservative... */ | 32 | ireg.bx = 0x0007; |
32 | asm volatile("pushal; pushw %%ds; int $0x10; popw %%ds; popal" | 33 | ireg.cx = 0x0001; |
33 | : : "b" (0x0007), "c" (0x0001), "a" (0x0e00|ch)); | 34 | ireg.ah = 0x0e; |
35 | ireg.al = ch; | ||
36 | intcall(0x10, &ireg, NULL); | ||
34 | } | 37 | } |
35 | 38 | ||
36 | void __attribute__((section(".inittext"))) puts(const char *str) | 39 | void __attribute__((section(".inittext"))) puts(const char *str) |
37 | { | 40 | { |
38 | int n = 0; | 41 | while (*str) |
39 | while (*str) { | ||
40 | putchar(*str++); | 42 | putchar(*str++); |
41 | n++; | ||
42 | } | ||
43 | } | 43 | } |
44 | 44 | ||
45 | /* | 45 | /* |
@@ -49,14 +49,13 @@ void __attribute__((section(".inittext"))) puts(const char *str) | |||
49 | 49 | ||
50 | static u8 gettime(void) | 50 | static u8 gettime(void) |
51 | { | 51 | { |
52 | u16 ax = 0x0200; | 52 | struct biosregs ireg, oreg; |
53 | u16 cx, dx; | ||
54 | 53 | ||
55 | asm volatile("int $0x1a" | 54 | initregs(&ireg); |
56 | : "+a" (ax), "=c" (cx), "=d" (dx) | 55 | ireg.ah = 0x02; |
57 | : : "ebx", "esi", "edi"); | 56 | intcall(0x1a, &ireg, &oreg); |
58 | 57 | ||
59 | return dx >> 8; | 58 | return oreg.dh; |
60 | } | 59 | } |
61 | 60 | ||
62 | /* | 61 | /* |
@@ -64,19 +63,24 @@ static u8 gettime(void) | |||
64 | */ | 63 | */ |
65 | int getchar(void) | 64 | int getchar(void) |
66 | { | 65 | { |
67 | u16 ax = 0; | 66 | struct biosregs ireg, oreg; |
68 | asm volatile("int $0x16" : "+a" (ax)); | 67 | |
68 | initregs(&ireg); | ||
69 | /* ireg.ah = 0x00; */ | ||
70 | intcall(0x16, &ireg, &oreg); | ||
69 | 71 | ||
70 | return ax & 0xff; | 72 | return oreg.al; |
71 | } | 73 | } |
72 | 74 | ||
73 | static int kbd_pending(void) | 75 | static int kbd_pending(void) |
74 | { | 76 | { |
75 | u8 pending; | 77 | struct biosregs ireg, oreg; |
76 | asm volatile("int $0x16; setnz %0" | 78 | |
77 | : "=qm" (pending) | 79 | initregs(&ireg); |
78 | : "a" (0x0100)); | 80 | ireg.ah = 0x01; |
79 | return pending; | 81 | intcall(0x16, &ireg, &oreg); |
82 | |||
83 | return !(oreg.eflags & X86_EFLAGS_ZF); | ||
80 | } | 84 | } |
81 | 85 | ||
82 | void kbd_flush(void) | 86 | void kbd_flush(void) |
diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 3fa979c9c363..d660be492363 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -29,21 +30,21 @@ static int bios_set_mode(struct mode_info *mi) | |||
29 | 30 | ||
30 | static int set_bios_mode(u8 mode) | 31 | static int set_bios_mode(u8 mode) |
31 | { | 32 | { |
32 | u16 ax; | 33 | struct biosregs ireg, oreg; |
33 | u8 new_mode; | 34 | u8 new_mode; |
34 | 35 | ||
35 | ax = mode; /* AH=0x00 Set Video Mode */ | 36 | initregs(&ireg); |
36 | asm volatile(INT10 | 37 | ireg.al = mode; /* AH=0x00 Set Video Mode */ |
37 | : "+a" (ax) | 38 | intcall(0x10, &ireg, NULL); |
38 | : : "ebx", "ecx", "edx", "esi", "edi"); | ||
39 | 39 | ||
40 | ax = 0x0f00; /* Get Current Video Mode */ | 40 | |
41 | asm volatile(INT10 | 41 | ireg.ah = 0x0f; /* Get Current Video Mode */ |
42 | : "+a" (ax) | 42 | intcall(0x10, &ireg, &oreg); |
43 | : : "ebx", "ecx", "edx", "esi", "edi"); | ||
44 | 43 | ||
45 | do_restore = 1; /* Assume video contents were lost */ | 44 | do_restore = 1; /* Assume video contents were lost */ |
46 | new_mode = ax & 0x7f; /* Not all BIOSes are clean with the top bit */ | 45 | |
46 | /* Not all BIOSes are clean with the top bit */ | ||
47 | new_mode = ireg.al & 0x7f; | ||
47 | 48 | ||
48 | if (new_mode == mode) | 49 | if (new_mode == mode) |
49 | return 0; /* Mode change OK */ | 50 | return 0; /* Mode change OK */ |
@@ -53,10 +54,8 @@ static int set_bios_mode(u8 mode) | |||
53 | /* Mode setting failed, but we didn't end up where we | 54 | /* Mode setting failed, but we didn't end up where we |
54 | started. That's bad. Try to revert to the original | 55 | started. That's bad. Try to revert to the original |
55 | video mode. */ | 56 | video mode. */ |
56 | ax = boot_params.screen_info.orig_video_mode; | 57 | ireg.ax = boot_params.screen_info.orig_video_mode; |
57 | asm volatile(INT10 | 58 | intcall(0x10, &ireg, NULL); |
58 | : "+a" (ax) | ||
59 | : : "ebx", "ecx", "edx", "esi", "edi"); | ||
60 | } | 59 | } |
61 | #endif | 60 | #endif |
62 | return -1; | 61 | return -1; |
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 4a58c8ce3f69..c700147d6ffb 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -31,7 +32,7 @@ static inline void vesa_store_mode_params_graphics(void) {} | |||
31 | static int vesa_probe(void) | 32 | static int vesa_probe(void) |
32 | { | 33 | { |
33 | #if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID) | 34 | #if defined(CONFIG_VIDEO_VESA) || defined(CONFIG_FIRMWARE_EDID) |
34 | u16 ax, cx, di; | 35 | struct biosregs ireg, oreg; |
35 | u16 mode; | 36 | u16 mode; |
36 | addr_t mode_ptr; | 37 | addr_t mode_ptr; |
37 | struct mode_info *mi; | 38 | struct mode_info *mi; |
@@ -39,13 +40,12 @@ static int vesa_probe(void) | |||
39 | 40 | ||
40 | video_vesa.modes = GET_HEAP(struct mode_info, 0); | 41 | video_vesa.modes = GET_HEAP(struct mode_info, 0); |
41 | 42 | ||
42 | ax = 0x4f00; | 43 | initregs(&ireg); |
43 | di = (size_t)&vginfo; | 44 | ireg.ax = 0x4f00; |
44 | asm(INT10 | 45 | ireg.di = (size_t)&vginfo; |
45 | : "+a" (ax), "+D" (di), "=m" (vginfo) | 46 | intcall(0x10, &ireg, &oreg); |
46 | : : "ebx", "ecx", "edx", "esi"); | ||
47 | 47 | ||
48 | if (ax != 0x004f || | 48 | if (ireg.ax != 0x004f || |
49 | vginfo.signature != VESA_MAGIC || | 49 | vginfo.signature != VESA_MAGIC || |
50 | vginfo.version < 0x0102) | 50 | vginfo.version < 0x0102) |
51 | return 0; /* Not present */ | 51 | return 0; /* Not present */ |
@@ -65,14 +65,12 @@ static int vesa_probe(void) | |||
65 | 65 | ||
66 | memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ | 66 | memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ |
67 | 67 | ||
68 | ax = 0x4f01; | 68 | ireg.ax = 0x4f01; |
69 | cx = mode; | 69 | ireg.cx = mode; |
70 | di = (size_t)&vminfo; | 70 | ireg.di = (size_t)&vminfo; |
71 | asm(INT10 | 71 | intcall(0x10, &ireg, &oreg); |
72 | : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) | ||
73 | : : "ebx", "edx", "esi"); | ||
74 | 72 | ||
75 | if (ax != 0x004f) | 73 | if (ireg.ax != 0x004f) |
76 | continue; | 74 | continue; |
77 | 75 | ||
78 | if ((vminfo.mode_attr & 0x15) == 0x05) { | 76 | if ((vminfo.mode_attr & 0x15) == 0x05) { |
@@ -111,20 +109,19 @@ static int vesa_probe(void) | |||
111 | 109 | ||
112 | static int vesa_set_mode(struct mode_info *mode) | 110 | static int vesa_set_mode(struct mode_info *mode) |
113 | { | 111 | { |
114 | u16 ax, bx, cx, di; | 112 | struct biosregs ireg, oreg; |
115 | int is_graphic; | 113 | int is_graphic; |
116 | u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; | 114 | u16 vesa_mode = mode->mode - VIDEO_FIRST_VESA; |
117 | 115 | ||
118 | memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ | 116 | memset(&vminfo, 0, sizeof vminfo); /* Just in case... */ |
119 | 117 | ||
120 | ax = 0x4f01; | 118 | initregs(&ireg); |
121 | cx = vesa_mode; | 119 | ireg.ax = 0x4f01; |
122 | di = (size_t)&vminfo; | 120 | ireg.cx = vesa_mode; |
123 | asm(INT10 | 121 | ireg.di = (size_t)&vminfo; |
124 | : "+a" (ax), "+c" (cx), "+D" (di), "=m" (vminfo) | 122 | intcall(0x10, &ireg, &oreg); |
125 | : : "ebx", "edx", "esi"); | ||
126 | 123 | ||
127 | if (ax != 0x004f) | 124 | if (oreg.ax != 0x004f) |
128 | return -1; | 125 | return -1; |
129 | 126 | ||
130 | if ((vminfo.mode_attr & 0x15) == 0x05) { | 127 | if ((vminfo.mode_attr & 0x15) == 0x05) { |
@@ -141,14 +138,12 @@ static int vesa_set_mode(struct mode_info *mode) | |||
141 | } | 138 | } |
142 | 139 | ||
143 | 140 | ||
144 | ax = 0x4f02; | 141 | initregs(&ireg); |
145 | bx = vesa_mode; | 142 | ireg.ax = 0x4f02; |
146 | di = 0; | 143 | ireg.bx = vesa_mode; |
147 | asm volatile(INT10 | 144 | intcall(0x10, &ireg, &oreg); |
148 | : "+a" (ax), "+b" (bx), "+D" (di) | ||
149 | : : "ecx", "edx", "esi"); | ||
150 | 145 | ||
151 | if (ax != 0x004f) | 146 | if (oreg.ax != 0x004f) |
152 | return -1; | 147 | return -1; |
153 | 148 | ||
154 | graphic_mode = is_graphic; | 149 | graphic_mode = is_graphic; |
@@ -171,50 +166,45 @@ static int vesa_set_mode(struct mode_info *mode) | |||
171 | /* Switch DAC to 8-bit mode */ | 166 | /* Switch DAC to 8-bit mode */ |
172 | static void vesa_dac_set_8bits(void) | 167 | static void vesa_dac_set_8bits(void) |
173 | { | 168 | { |
169 | struct biosregs ireg, oreg; | ||
174 | u8 dac_size = 6; | 170 | u8 dac_size = 6; |
175 | 171 | ||
176 | /* If possible, switch the DAC to 8-bit mode */ | 172 | /* If possible, switch the DAC to 8-bit mode */ |
177 | if (vginfo.capabilities & 1) { | 173 | if (vginfo.capabilities & 1) { |
178 | u16 ax, bx; | 174 | initregs(&ireg); |
179 | 175 | ireg.ax = 0x4f08; | |
180 | ax = 0x4f08; | 176 | ireg.bh = 0x08; |
181 | bx = 0x0800; | 177 | intcall(0x10, &ireg, &oreg); |
182 | asm volatile(INT10 | 178 | if (oreg.ax == 0x004f) |
183 | : "+a" (ax), "+b" (bx) | 179 | dac_size = oreg.bh; |
184 | : : "ecx", "edx", "esi", "edi"); | ||
185 | |||
186 | if (ax == 0x004f) | ||
187 | dac_size = bx >> 8; | ||
188 | } | 180 | } |
189 | 181 | ||
190 | /* Set the color sizes to the DAC size, and offsets to 0 */ | 182 | /* Set the color sizes to the DAC size, and offsets to 0 */ |
191 | boot_params.screen_info.red_size = dac_size; | 183 | boot_params.screen_info.red_size = dac_size; |
192 | boot_params.screen_info.green_size = dac_size; | 184 | boot_params.screen_info.green_size = dac_size; |
193 | boot_params.screen_info.blue_size = dac_size; | 185 | boot_params.screen_info.blue_size = dac_size; |
194 | boot_params.screen_info.rsvd_size = dac_size; | 186 | boot_params.screen_info.rsvd_size = dac_size; |
195 | 187 | ||
196 | boot_params.screen_info.red_pos = 0; | 188 | boot_params.screen_info.red_pos = 0; |
197 | boot_params.screen_info.green_pos = 0; | 189 | boot_params.screen_info.green_pos = 0; |
198 | boot_params.screen_info.blue_pos = 0; | 190 | boot_params.screen_info.blue_pos = 0; |
199 | boot_params.screen_info.rsvd_pos = 0; | 191 | boot_params.screen_info.rsvd_pos = 0; |
200 | } | 192 | } |
201 | 193 | ||
202 | /* Save the VESA protected mode info */ | 194 | /* Save the VESA protected mode info */ |
203 | static void vesa_store_pm_info(void) | 195 | static void vesa_store_pm_info(void) |
204 | { | 196 | { |
205 | u16 ax, bx, di, es; | 197 | struct biosregs ireg, oreg; |
206 | 198 | ||
207 | ax = 0x4f0a; | 199 | initregs(&ireg); |
208 | bx = di = 0; | 200 | ireg.ax = 0x4f0a; |
209 | asm("pushw %%es; "INT10"; movw %%es,%0; popw %%es" | 201 | intcall(0x10, &ireg, &oreg); |
210 | : "=d" (es), "+a" (ax), "+b" (bx), "+D" (di) | ||
211 | : : "ecx", "esi"); | ||
212 | 202 | ||
213 | if (ax != 0x004f) | 203 | if (oreg.ax != 0x004f) |
214 | return; | 204 | return; |
215 | 205 | ||
216 | boot_params.screen_info.vesapm_seg = es; | 206 | boot_params.screen_info.vesapm_seg = oreg.es; |
217 | boot_params.screen_info.vesapm_off = di; | 207 | boot_params.screen_info.vesapm_off = oreg.di; |
218 | } | 208 | } |
219 | 209 | ||
220 | /* | 210 | /* |
@@ -252,7 +242,7 @@ static void vesa_store_mode_params_graphics(void) | |||
252 | void vesa_store_edid(void) | 242 | void vesa_store_edid(void) |
253 | { | 243 | { |
254 | #ifdef CONFIG_FIRMWARE_EDID | 244 | #ifdef CONFIG_FIRMWARE_EDID |
255 | u16 ax, bx, cx, dx, di; | 245 | struct biosregs ireg, oreg; |
256 | 246 | ||
257 | /* Apparently used as a nonsense token... */ | 247 | /* Apparently used as a nonsense token... */ |
258 | memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); | 248 | memset(&boot_params.edid_info, 0x13, sizeof boot_params.edid_info); |
@@ -260,33 +250,26 @@ void vesa_store_edid(void) | |||
260 | if (vginfo.version < 0x0200) | 250 | if (vginfo.version < 0x0200) |
261 | return; /* EDID requires VBE 2.0+ */ | 251 | return; /* EDID requires VBE 2.0+ */ |
262 | 252 | ||
263 | ax = 0x4f15; /* VBE DDC */ | 253 | initregs(&ireg); |
264 | bx = 0x0000; /* Report DDC capabilities */ | 254 | ireg.ax = 0x4f15; /* VBE DDC */ |
265 | cx = 0; /* Controller 0 */ | 255 | /* ireg.bx = 0x0000; */ /* Report DDC capabilities */ |
266 | di = 0; /* ES:DI must be 0 by spec */ | 256 | /* ireg.cx = 0; */ /* Controller 0 */ |
267 | 257 | ireg.es = 0; /* ES:DI must be 0 by spec */ | |
268 | /* Note: The VBE DDC spec is different from the main VESA spec; | 258 | intcall(0x10, &ireg, &oreg); |
269 | we genuinely have to assume all registers are destroyed here. */ | ||
270 | |||
271 | asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" | ||
272 | : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di) | ||
273 | : : "esi", "edx"); | ||
274 | 259 | ||
275 | if (ax != 0x004f) | 260 | if (oreg.ax != 0x004f) |
276 | return; /* No EDID */ | 261 | return; /* No EDID */ |
277 | 262 | ||
278 | /* BH = time in seconds to transfer EDD information */ | 263 | /* BH = time in seconds to transfer EDD information */ |
279 | /* BL = DDC level supported */ | 264 | /* BL = DDC level supported */ |
280 | 265 | ||
281 | ax = 0x4f15; /* VBE DDC */ | 266 | ireg.ax = 0x4f15; /* VBE DDC */ |
282 | bx = 0x0001; /* Read EDID */ | 267 | ireg.bx = 0x0001; /* Read EDID */ |
283 | cx = 0; /* Controller 0 */ | 268 | /* ireg.cx = 0; */ /* Controller 0 */ |
284 | dx = 0; /* EDID block number */ | 269 | /* ireg.dx = 0; */ /* EDID block number */ |
285 | di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ | 270 | ireg.es = ds(); |
286 | asm(INT10 | 271 | ireg.di =(size_t)&boot_params.edid_info; /* (ES:)Pointer to block */ |
287 | : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info), | 272 | intcall(0x10, &ireg, &oreg); |
288 | "+c" (cx), "+D" (di) | ||
289 | : : "esi"); | ||
290 | #endif /* CONFIG_FIRMWARE_EDID */ | 273 | #endif /* CONFIG_FIRMWARE_EDID */ |
291 | } | 274 | } |
292 | 275 | ||
diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 9e0587a37768..8f8d827e254d 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -39,30 +40,30 @@ static __videocard video_vga; | |||
39 | /* Set basic 80x25 mode */ | 40 | /* Set basic 80x25 mode */ |
40 | static u8 vga_set_basic_mode(void) | 41 | static u8 vga_set_basic_mode(void) |
41 | { | 42 | { |
43 | struct biosregs ireg, oreg; | ||
42 | u16 ax; | 44 | u16 ax; |
43 | u8 rows; | 45 | u8 rows; |
44 | u8 mode; | 46 | u8 mode; |
45 | 47 | ||
48 | initregs(&ireg); | ||
49 | |||
46 | #ifdef CONFIG_VIDEO_400_HACK | 50 | #ifdef CONFIG_VIDEO_400_HACK |
47 | if (adapter >= ADAPTER_VGA) { | 51 | if (adapter >= ADAPTER_VGA) { |
48 | asm volatile(INT10 | 52 | ireg.ax = 0x1202; |
49 | : : "a" (0x1202), "b" (0x0030) | 53 | ireg.bx = 0x0030; |
50 | : "ecx", "edx", "esi", "edi"); | 54 | intcall(0x10, &ireg, NULL); |
51 | } | 55 | } |
52 | #endif | 56 | #endif |
53 | 57 | ||
54 | ax = 0x0f00; | 58 | ax = 0x0f00; |
55 | asm volatile(INT10 | 59 | intcall(0x10, &ireg, &oreg); |
56 | : "+a" (ax) | 60 | mode = oreg.al; |
57 | : : "ebx", "ecx", "edx", "esi", "edi"); | ||
58 | |||
59 | mode = (u8)ax; | ||
60 | 61 | ||
61 | set_fs(0); | 62 | set_fs(0); |
62 | rows = rdfs8(0x484); /* rows minus one */ | 63 | rows = rdfs8(0x484); /* rows minus one */ |
63 | 64 | ||
64 | #ifndef CONFIG_VIDEO_400_HACK | 65 | #ifndef CONFIG_VIDEO_400_HACK |
65 | if ((ax == 0x5003 || ax == 0x5007) && | 66 | if ((oreg.ax == 0x5003 || oreg.ax == 0x5007) && |
66 | (rows == 0 || rows == 24)) | 67 | (rows == 0 || rows == 24)) |
67 | return mode; | 68 | return mode; |
68 | #endif | 69 | #endif |
@@ -71,10 +72,8 @@ static u8 vga_set_basic_mode(void) | |||
71 | mode = 3; | 72 | mode = 3; |
72 | 73 | ||
73 | /* Set the mode */ | 74 | /* Set the mode */ |
74 | ax = mode; | 75 | ireg.ax = mode; /* AH=0: set mode */ |
75 | asm volatile(INT10 | 76 | intcall(0x10, &ireg, NULL); |
76 | : "+a" (ax) | ||
77 | : : "ebx", "ecx", "edx", "esi", "edi"); | ||
78 | do_restore = 1; | 77 | do_restore = 1; |
79 | return mode; | 78 | return mode; |
80 | } | 79 | } |
@@ -82,43 +81,69 @@ static u8 vga_set_basic_mode(void) | |||
82 | static void vga_set_8font(void) | 81 | static void vga_set_8font(void) |
83 | { | 82 | { |
84 | /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ | 83 | /* Set 8x8 font - 80x43 on EGA, 80x50 on VGA */ |
84 | struct biosregs ireg; | ||
85 | |||
86 | initregs(&ireg); | ||
85 | 87 | ||
86 | /* Set 8x8 font */ | 88 | /* Set 8x8 font */ |
87 | asm volatile(INT10 : : "a" (0x1112), "b" (0)); | 89 | ireg.ax = 0x1112; |
90 | /* ireg.bl = 0; */ | ||
91 | intcall(0x10, &ireg, NULL); | ||
88 | 92 | ||
89 | /* Use alternate print screen */ | 93 | /* Use alternate print screen */ |
90 | asm volatile(INT10 : : "a" (0x1200), "b" (0x20)); | 94 | ireg.ax = 0x1200; |
95 | ireg.bl = 0x20; | ||
96 | intcall(0x10, &ireg, NULL); | ||
91 | 97 | ||
92 | /* Turn off cursor emulation */ | 98 | /* Turn off cursor emulation */ |
93 | asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); | 99 | ireg.ax = 0x1201; |
100 | ireg.bl = 0x34; | ||
101 | intcall(0x10, &ireg, NULL); | ||
94 | 102 | ||
95 | /* Cursor is scan lines 6-7 */ | 103 | /* Cursor is scan lines 6-7 */ |
96 | asm volatile(INT10 : : "a" (0x0100), "c" (0x0607)); | 104 | ireg.ax = 0x0100; |
105 | ireg.cx = 0x0607; | ||
106 | intcall(0x10, &ireg, NULL); | ||
97 | } | 107 | } |
98 | 108 | ||
99 | static void vga_set_14font(void) | 109 | static void vga_set_14font(void) |
100 | { | 110 | { |
101 | /* Set 9x14 font - 80x28 on VGA */ | 111 | /* Set 9x14 font - 80x28 on VGA */ |
112 | struct biosregs ireg; | ||
113 | |||
114 | initregs(&ireg); | ||
102 | 115 | ||
103 | /* Set 9x14 font */ | 116 | /* Set 9x14 font */ |
104 | asm volatile(INT10 : : "a" (0x1111), "b" (0)); | 117 | ireg.ax = 0x1111; |
118 | /* ireg.bl = 0; */ | ||
119 | intcall(0x10, &ireg, NULL); | ||
105 | 120 | ||
106 | /* Turn off cursor emulation */ | 121 | /* Turn off cursor emulation */ |
107 | asm volatile(INT10 : : "a" (0x1201), "b" (0x34)); | 122 | ireg.ax = 0x1201; |
123 | ireg.bl = 0x34; | ||
124 | intcall(0x10, &ireg, NULL); | ||
108 | 125 | ||
109 | /* Cursor is scan lines 11-12 */ | 126 | /* Cursor is scan lines 11-12 */ |
110 | asm volatile(INT10 : : "a" (0x0100), "c" (0x0b0c)); | 127 | ireg.ax = 0x0100; |
128 | ireg.cx = 0x0b0c; | ||
129 | intcall(0x10, &ireg, NULL); | ||
111 | } | 130 | } |
112 | 131 | ||
113 | static void vga_set_80x43(void) | 132 | static void vga_set_80x43(void) |
114 | { | 133 | { |
115 | /* Set 80x43 mode on VGA (not EGA) */ | 134 | /* Set 80x43 mode on VGA (not EGA) */ |
135 | struct biosregs ireg; | ||
136 | |||
137 | initregs(&ireg); | ||
116 | 138 | ||
117 | /* Set 350 scans */ | 139 | /* Set 350 scans */ |
118 | asm volatile(INT10 : : "a" (0x1201), "b" (0x30)); | 140 | ireg.ax = 0x1201; |
141 | ireg.bl = 0x30; | ||
142 | intcall(0x10, &ireg, NULL); | ||
119 | 143 | ||
120 | /* Reset video mode */ | 144 | /* Reset video mode */ |
121 | asm volatile(INT10 : : "a" (0x0003)); | 145 | ireg.ax = 0x0003; |
146 | intcall(0x10, &ireg, NULL); | ||
122 | 147 | ||
123 | vga_set_8font(); | 148 | vga_set_8font(); |
124 | } | 149 | } |
@@ -225,8 +250,6 @@ static int vga_set_mode(struct mode_info *mode) | |||
225 | */ | 250 | */ |
226 | static int vga_probe(void) | 251 | static int vga_probe(void) |
227 | { | 252 | { |
228 | u16 ega_bx; | ||
229 | |||
230 | static const char *card_name[] = { | 253 | static const char *card_name[] = { |
231 | "CGA/MDA/HGC", "EGA", "VGA" | 254 | "CGA/MDA/HGC", "EGA", "VGA" |
232 | }; | 255 | }; |
@@ -240,26 +263,26 @@ static int vga_probe(void) | |||
240 | sizeof(ega_modes)/sizeof(struct mode_info), | 263 | sizeof(ega_modes)/sizeof(struct mode_info), |
241 | sizeof(vga_modes)/sizeof(struct mode_info), | 264 | sizeof(vga_modes)/sizeof(struct mode_info), |
242 | }; | 265 | }; |
243 | u8 vga_flag; | ||
244 | 266 | ||
245 | asm(INT10 | 267 | struct biosregs ireg, oreg; |
246 | : "=b" (ega_bx) | 268 | |
247 | : "a" (0x1200), "b" (0x10) /* Check EGA/VGA */ | 269 | initregs(&ireg); |
248 | : "ecx", "edx", "esi", "edi"); | 270 | |
271 | ireg.ax = 0x1200; | ||
272 | ireg.bl = 0x10; /* Check EGA/VGA */ | ||
273 | intcall(0x10, &ireg, &oreg); | ||
249 | 274 | ||
250 | #ifndef _WAKEUP | 275 | #ifndef _WAKEUP |
251 | boot_params.screen_info.orig_video_ega_bx = ega_bx; | 276 | boot_params.screen_info.orig_video_ega_bx = oreg.bx; |
252 | #endif | 277 | #endif |
253 | 278 | ||
254 | /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ | 279 | /* If we have MDA/CGA/HGC then BL will be unchanged at 0x10 */ |
255 | if ((u8)ega_bx != 0x10) { | 280 | if (oreg.bl != 0x10) { |
256 | /* EGA/VGA */ | 281 | /* EGA/VGA */ |
257 | asm(INT10 | 282 | ireg.ax = 0x1a00; |
258 | : "=a" (vga_flag) | 283 | intcall(0x10, &ireg, &oreg); |
259 | : "a" (0x1a00) | ||
260 | : "ebx", "ecx", "edx", "esi", "edi"); | ||
261 | 284 | ||
262 | if (vga_flag == 0x1a) { | 285 | if (oreg.al == 0x1a) { |
263 | adapter = ADAPTER_VGA; | 286 | adapter = ADAPTER_VGA; |
264 | #ifndef _WAKEUP | 287 | #ifndef _WAKEUP |
265 | boot_params.screen_info.orig_video_isVGA = 1; | 288 | boot_params.screen_info.orig_video_isVGA = 1; |
diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index 3bef2c1febe9..bad728b76fc2 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * | 2 | * |
3 | * Copyright (C) 1991, 1992 Linus Torvalds | 3 | * Copyright (C) 1991, 1992 Linus Torvalds |
4 | * Copyright 2007 rPath, Inc. - All Rights Reserved | 4 | * Copyright 2007 rPath, Inc. - All Rights Reserved |
5 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | ||
5 | * | 6 | * |
6 | * This file is part of the Linux kernel, and is made available under | 7 | * This file is part of the Linux kernel, and is made available under |
7 | * the terms of the GNU General Public License version 2. | 8 | * the terms of the GNU General Public License version 2. |
@@ -18,33 +19,29 @@ | |||
18 | 19 | ||
19 | static void store_cursor_position(void) | 20 | static void store_cursor_position(void) |
20 | { | 21 | { |
21 | u16 curpos; | 22 | struct biosregs ireg, oreg; |
22 | u16 ax, bx; | ||
23 | 23 | ||
24 | ax = 0x0300; | 24 | initregs(&ireg); |
25 | bx = 0; | 25 | ireg.ah = 0x03; |
26 | asm(INT10 | 26 | intcall(0x10, &ireg, &oreg); |
27 | : "=d" (curpos), "+a" (ax), "+b" (bx) | ||
28 | : : "ecx", "esi", "edi"); | ||
29 | 27 | ||
30 | boot_params.screen_info.orig_x = curpos; | 28 | boot_params.screen_info.orig_x = oreg.dl; |
31 | boot_params.screen_info.orig_y = curpos >> 8; | 29 | boot_params.screen_info.orig_y = oreg.dh; |
32 | } | 30 | } |
33 | 31 | ||
34 | static void store_video_mode(void) | 32 | static void store_video_mode(void) |
35 | { | 33 | { |
36 | u16 ax, page; | 34 | struct biosregs ireg, oreg; |
37 | 35 | ||
38 | /* N.B.: the saving of the video page here is a bit silly, | 36 | /* N.B.: the saving of the video page here is a bit silly, |
39 | since we pretty much assume page 0 everywhere. */ | 37 | since we pretty much assume page 0 everywhere. */ |
40 | ax = 0x0f00; | 38 | initregs(&ireg); |
41 | asm(INT10 | 39 | ireg.ah = 0x0f; |
42 | : "+a" (ax), "=b" (page) | 40 | intcall(0x10, &ireg, &oreg); |
43 | : : "ecx", "edx", "esi", "edi"); | ||
44 | 41 | ||
45 | /* Not all BIOSes are clean with respect to the top bit */ | 42 | /* Not all BIOSes are clean with respect to the top bit */ |
46 | boot_params.screen_info.orig_video_mode = ax & 0x7f; | 43 | boot_params.screen_info.orig_video_mode = oreg.al & 0x7f; |
47 | boot_params.screen_info.orig_video_page = page >> 8; | 44 | boot_params.screen_info.orig_video_page = oreg.bh; |
48 | } | 45 | } |
49 | 46 | ||
50 | /* | 47 | /* |
@@ -257,7 +254,7 @@ static void restore_screen(void) | |||
257 | int y; | 254 | int y; |
258 | addr_t dst = 0; | 255 | addr_t dst = 0; |
259 | u16 *src = saved.data; | 256 | u16 *src = saved.data; |
260 | u16 ax, bx, dx; | 257 | struct biosregs ireg; |
261 | 258 | ||
262 | if (graphic_mode) | 259 | if (graphic_mode) |
263 | return; /* Can't restore onto a graphic mode */ | 260 | return; /* Can't restore onto a graphic mode */ |
@@ -296,12 +293,11 @@ static void restore_screen(void) | |||
296 | } | 293 | } |
297 | 294 | ||
298 | /* Restore cursor position */ | 295 | /* Restore cursor position */ |
299 | ax = 0x0200; /* Set cursor position */ | 296 | initregs(&ireg); |
300 | bx = 0; /* Page number (<< 8) */ | 297 | ireg.ah = 0x02; /* Set cursor position */ |
301 | dx = (saved.cury << 8)+saved.curx; | 298 | ireg.dh = saved.cury; |
302 | asm volatile(INT10 | 299 | ireg.dl = saved.curx; |
303 | : "+a" (ax), "+b" (bx), "+d" (dx) | 300 | intcall(0x10, &ireg, NULL); |
304 | : : "ecx", "esi", "edi"); | ||
305 | } | 301 | } |
306 | #else | 302 | #else |
307 | #define save_screen() ((void)0) | 303 | #define save_screen() ((void)0) |
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index ee63f5d14461..5bb174a997fc 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h | |||
@@ -112,20 +112,6 @@ extern int force_x, force_y; /* Don't query the BIOS for cols/rows */ | |||
112 | extern int do_restore; /* Restore screen contents */ | 112 | extern int do_restore; /* Restore screen contents */ |
113 | extern int graphic_mode; /* Graphics mode with linear frame buffer */ | 113 | extern int graphic_mode; /* Graphics mode with linear frame buffer */ |
114 | 114 | ||
115 | /* | ||
116 | * int $0x10 is notorious for touching registers it shouldn't. | ||
117 | * gcc doesn't like %ebp being clobbered, so define it as a push/pop | ||
118 | * sequence here. | ||
119 | * | ||
120 | * A number of systems, including the original PC can clobber %bp in | ||
121 | * certain circumstances, like when scrolling. There exists at least | ||
122 | * one Trident video card which could clobber DS under a set of | ||
123 | * circumstances that we are unlikely to encounter (scrolling when | ||
124 | * using an extended graphics mode of more than 800x600 pixels), but | ||
125 | * it's cheap insurance to deal with that here. | ||
126 | */ | ||
127 | #define INT10 "pushl %%ebp; pushw %%ds; int $0x10; popw %%ds; popl %%ebp" | ||
128 | |||
129 | /* Accessing VGA indexed registers */ | 115 | /* Accessing VGA indexed registers */ |
130 | static inline u8 in_idx(u16 port, u8 index) | 116 | static inline u8 in_idx(u16 port, u8 index) |
131 | { | 117 | { |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 235b81d0f6f2..edb992ebef92 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -1,12 +1,13 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.29-rc4 | 3 | # Linux kernel version: 2.6.30-rc2 |
4 | # Tue Feb 24 15:50:58 2009 | 4 | # Mon May 11 16:21:55 2009 |
5 | # | 5 | # |
6 | # CONFIG_64BIT is not set | 6 | # CONFIG_64BIT is not set |
7 | CONFIG_X86_32=y | 7 | CONFIG_X86_32=y |
8 | # CONFIG_X86_64 is not set | 8 | # CONFIG_X86_64 is not set |
9 | CONFIG_X86=y | 9 | CONFIG_X86=y |
10 | CONFIG_OUTPUT_FORMAT="elf32-i386" | ||
10 | CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" | 11 | CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" |
11 | CONFIG_GENERIC_TIME=y | 12 | CONFIG_GENERIC_TIME=y |
12 | CONFIG_GENERIC_CMOS_UPDATE=y | 13 | CONFIG_GENERIC_CMOS_UPDATE=y |
@@ -33,6 +34,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y | |||
33 | CONFIG_ARCH_HAS_DEFAULT_IDLE=y | 34 | CONFIG_ARCH_HAS_DEFAULT_IDLE=y |
34 | CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y | 35 | CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y |
35 | CONFIG_HAVE_SETUP_PER_CPU_AREA=y | 36 | CONFIG_HAVE_SETUP_PER_CPU_AREA=y |
37 | CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y | ||
36 | # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set | 38 | # CONFIG_HAVE_CPUMASK_OF_CPU_MAP is not set |
37 | CONFIG_ARCH_HIBERNATION_POSSIBLE=y | 39 | CONFIG_ARCH_HIBERNATION_POSSIBLE=y |
38 | CONFIG_ARCH_SUSPEND_POSSIBLE=y | 40 | CONFIG_ARCH_SUSPEND_POSSIBLE=y |
@@ -40,15 +42,16 @@ CONFIG_ARCH_SUSPEND_POSSIBLE=y | |||
40 | CONFIG_ARCH_POPULATES_NODE_MAP=y | 42 | CONFIG_ARCH_POPULATES_NODE_MAP=y |
41 | # CONFIG_AUDIT_ARCH is not set | 43 | # CONFIG_AUDIT_ARCH is not set |
42 | CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y | 44 | CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y |
45 | CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y | ||
43 | CONFIG_GENERIC_HARDIRQS=y | 46 | CONFIG_GENERIC_HARDIRQS=y |
47 | CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y | ||
44 | CONFIG_GENERIC_IRQ_PROBE=y | 48 | CONFIG_GENERIC_IRQ_PROBE=y |
45 | CONFIG_GENERIC_PENDING_IRQ=y | 49 | CONFIG_GENERIC_PENDING_IRQ=y |
46 | CONFIG_X86_SMP=y | ||
47 | CONFIG_USE_GENERIC_SMP_HELPERS=y | 50 | CONFIG_USE_GENERIC_SMP_HELPERS=y |
48 | CONFIG_X86_32_SMP=y | 51 | CONFIG_X86_32_SMP=y |
49 | CONFIG_X86_HT=y | 52 | CONFIG_X86_HT=y |
50 | CONFIG_X86_BIOS_REBOOT=y | ||
51 | CONFIG_X86_TRAMPOLINE=y | 53 | CONFIG_X86_TRAMPOLINE=y |
54 | CONFIG_X86_32_LAZY_GS=y | ||
52 | CONFIG_KTIME_SCALAR=y | 55 | CONFIG_KTIME_SCALAR=y |
53 | CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" | 56 | CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" |
54 | 57 | ||
@@ -60,10 +63,17 @@ CONFIG_LOCK_KERNEL=y | |||
60 | CONFIG_INIT_ENV_ARG_LIMIT=32 | 63 | CONFIG_INIT_ENV_ARG_LIMIT=32 |
61 | CONFIG_LOCALVERSION="" | 64 | CONFIG_LOCALVERSION="" |
62 | # CONFIG_LOCALVERSION_AUTO is not set | 65 | # CONFIG_LOCALVERSION_AUTO is not set |
66 | CONFIG_HAVE_KERNEL_GZIP=y | ||
67 | CONFIG_HAVE_KERNEL_BZIP2=y | ||
68 | CONFIG_HAVE_KERNEL_LZMA=y | ||
69 | CONFIG_KERNEL_GZIP=y | ||
70 | # CONFIG_KERNEL_BZIP2 is not set | ||
71 | # CONFIG_KERNEL_LZMA is not set | ||
63 | CONFIG_SWAP=y | 72 | CONFIG_SWAP=y |
64 | CONFIG_SYSVIPC=y | 73 | CONFIG_SYSVIPC=y |
65 | CONFIG_SYSVIPC_SYSCTL=y | 74 | CONFIG_SYSVIPC_SYSCTL=y |
66 | CONFIG_POSIX_MQUEUE=y | 75 | CONFIG_POSIX_MQUEUE=y |
76 | CONFIG_POSIX_MQUEUE_SYSCTL=y | ||
67 | CONFIG_BSD_PROCESS_ACCT=y | 77 | CONFIG_BSD_PROCESS_ACCT=y |
68 | # CONFIG_BSD_PROCESS_ACCT_V3 is not set | 78 | # CONFIG_BSD_PROCESS_ACCT_V3 is not set |
69 | CONFIG_TASKSTATS=y | 79 | CONFIG_TASKSTATS=y |
@@ -113,23 +123,26 @@ CONFIG_PID_NS=y | |||
113 | CONFIG_NET_NS=y | 123 | CONFIG_NET_NS=y |
114 | CONFIG_BLK_DEV_INITRD=y | 124 | CONFIG_BLK_DEV_INITRD=y |
115 | CONFIG_INITRAMFS_SOURCE="" | 125 | CONFIG_INITRAMFS_SOURCE="" |
126 | CONFIG_RD_GZIP=y | ||
127 | CONFIG_RD_BZIP2=y | ||
128 | CONFIG_RD_LZMA=y | ||
116 | CONFIG_CC_OPTIMIZE_FOR_SIZE=y | 129 | CONFIG_CC_OPTIMIZE_FOR_SIZE=y |
117 | CONFIG_SYSCTL=y | 130 | CONFIG_SYSCTL=y |
131 | CONFIG_ANON_INODES=y | ||
118 | # CONFIG_EMBEDDED is not set | 132 | # CONFIG_EMBEDDED is not set |
119 | CONFIG_UID16=y | 133 | CONFIG_UID16=y |
120 | CONFIG_SYSCTL_SYSCALL=y | 134 | CONFIG_SYSCTL_SYSCALL=y |
121 | CONFIG_KALLSYMS=y | 135 | CONFIG_KALLSYMS=y |
122 | CONFIG_KALLSYMS_ALL=y | 136 | CONFIG_KALLSYMS_ALL=y |
123 | CONFIG_KALLSYMS_EXTRA_PASS=y | 137 | CONFIG_KALLSYMS_EXTRA_PASS=y |
138 | # CONFIG_STRIP_ASM_SYMS is not set | ||
124 | CONFIG_HOTPLUG=y | 139 | CONFIG_HOTPLUG=y |
125 | CONFIG_PRINTK=y | 140 | CONFIG_PRINTK=y |
126 | CONFIG_BUG=y | 141 | CONFIG_BUG=y |
127 | CONFIG_ELF_CORE=y | 142 | CONFIG_ELF_CORE=y |
128 | CONFIG_PCSPKR_PLATFORM=y | 143 | CONFIG_PCSPKR_PLATFORM=y |
129 | # CONFIG_COMPAT_BRK is not set | ||
130 | CONFIG_BASE_FULL=y | 144 | CONFIG_BASE_FULL=y |
131 | CONFIG_FUTEX=y | 145 | CONFIG_FUTEX=y |
132 | CONFIG_ANON_INODES=y | ||
133 | CONFIG_EPOLL=y | 146 | CONFIG_EPOLL=y |
134 | CONFIG_SIGNALFD=y | 147 | CONFIG_SIGNALFD=y |
135 | CONFIG_TIMERFD=y | 148 | CONFIG_TIMERFD=y |
@@ -139,6 +152,7 @@ CONFIG_AIO=y | |||
139 | CONFIG_VM_EVENT_COUNTERS=y | 152 | CONFIG_VM_EVENT_COUNTERS=y |
140 | CONFIG_PCI_QUIRKS=y | 153 | CONFIG_PCI_QUIRKS=y |
141 | CONFIG_SLUB_DEBUG=y | 154 | CONFIG_SLUB_DEBUG=y |
155 | # CONFIG_COMPAT_BRK is not set | ||
142 | # CONFIG_SLAB is not set | 156 | # CONFIG_SLAB is not set |
143 | CONFIG_SLUB=y | 157 | CONFIG_SLUB=y |
144 | # CONFIG_SLOB is not set | 158 | # CONFIG_SLOB is not set |
@@ -154,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y | |||
154 | CONFIG_HAVE_KPROBES=y | 168 | CONFIG_HAVE_KPROBES=y |
155 | CONFIG_HAVE_KRETPROBES=y | 169 | CONFIG_HAVE_KRETPROBES=y |
156 | CONFIG_HAVE_ARCH_TRACEHOOK=y | 170 | CONFIG_HAVE_ARCH_TRACEHOOK=y |
171 | CONFIG_HAVE_DMA_API_DEBUG=y | ||
172 | # CONFIG_SLOW_WORK is not set | ||
157 | CONFIG_HAVE_GENERIC_DMA_COHERENT=y | 173 | CONFIG_HAVE_GENERIC_DMA_COHERENT=y |
158 | CONFIG_SLABINFO=y | 174 | CONFIG_SLABINFO=y |
159 | CONFIG_RT_MUTEXES=y | 175 | CONFIG_RT_MUTEXES=y |
@@ -167,7 +183,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y | |||
167 | CONFIG_STOP_MACHINE=y | 183 | CONFIG_STOP_MACHINE=y |
168 | CONFIG_BLOCK=y | 184 | CONFIG_BLOCK=y |
169 | # CONFIG_LBD is not set | 185 | # CONFIG_LBD is not set |
170 | CONFIG_BLK_DEV_IO_TRACE=y | ||
171 | CONFIG_BLK_DEV_BSG=y | 186 | CONFIG_BLK_DEV_BSG=y |
172 | # CONFIG_BLK_DEV_INTEGRITY is not set | 187 | # CONFIG_BLK_DEV_INTEGRITY is not set |
173 | 188 | ||
@@ -194,12 +209,12 @@ CONFIG_HIGH_RES_TIMERS=y | |||
194 | CONFIG_GENERIC_CLOCKEVENTS_BUILD=y | 209 | CONFIG_GENERIC_CLOCKEVENTS_BUILD=y |
195 | CONFIG_SMP=y | 210 | CONFIG_SMP=y |
196 | CONFIG_SPARSE_IRQ=y | 211 | CONFIG_SPARSE_IRQ=y |
197 | CONFIG_X86_FIND_SMP_CONFIG=y | ||
198 | CONFIG_X86_MPPARSE=y | 212 | CONFIG_X86_MPPARSE=y |
213 | # CONFIG_X86_BIGSMP is not set | ||
214 | CONFIG_X86_EXTENDED_PLATFORM=y | ||
199 | # CONFIG_X86_ELAN is not set | 215 | # CONFIG_X86_ELAN is not set |
200 | # CONFIG_X86_GENERICARCH is not set | ||
201 | # CONFIG_X86_VSMP is not set | ||
202 | # CONFIG_X86_RDC321X is not set | 216 | # CONFIG_X86_RDC321X is not set |
217 | # CONFIG_X86_32_NON_STANDARD is not set | ||
203 | CONFIG_SCHED_OMIT_FRAME_POINTER=y | 218 | CONFIG_SCHED_OMIT_FRAME_POINTER=y |
204 | # CONFIG_PARAVIRT_GUEST is not set | 219 | # CONFIG_PARAVIRT_GUEST is not set |
205 | # CONFIG_MEMTEST is not set | 220 | # CONFIG_MEMTEST is not set |
@@ -230,8 +245,10 @@ CONFIG_M686=y | |||
230 | # CONFIG_GENERIC_CPU is not set | 245 | # CONFIG_GENERIC_CPU is not set |
231 | CONFIG_X86_GENERIC=y | 246 | CONFIG_X86_GENERIC=y |
232 | CONFIG_X86_CPU=y | 247 | CONFIG_X86_CPU=y |
248 | CONFIG_X86_L1_CACHE_BYTES=64 | ||
249 | CONFIG_X86_INTERNODE_CACHE_BYTES=64 | ||
233 | CONFIG_X86_CMPXCHG=y | 250 | CONFIG_X86_CMPXCHG=y |
234 | CONFIG_X86_L1_CACHE_SHIFT=7 | 251 | CONFIG_X86_L1_CACHE_SHIFT=5 |
235 | CONFIG_X86_XADD=y | 252 | CONFIG_X86_XADD=y |
236 | # CONFIG_X86_PPRO_FENCE is not set | 253 | # CONFIG_X86_PPRO_FENCE is not set |
237 | CONFIG_X86_WP_WORKS_OK=y | 254 | CONFIG_X86_WP_WORKS_OK=y |
@@ -247,7 +264,7 @@ CONFIG_X86_DEBUGCTLMSR=y | |||
247 | CONFIG_CPU_SUP_INTEL=y | 264 | CONFIG_CPU_SUP_INTEL=y |
248 | CONFIG_CPU_SUP_CYRIX_32=y | 265 | CONFIG_CPU_SUP_CYRIX_32=y |
249 | CONFIG_CPU_SUP_AMD=y | 266 | CONFIG_CPU_SUP_AMD=y |
250 | CONFIG_CPU_SUP_CENTAUR_32=y | 267 | CONFIG_CPU_SUP_CENTAUR=y |
251 | CONFIG_CPU_SUP_TRANSMETA_32=y | 268 | CONFIG_CPU_SUP_TRANSMETA_32=y |
252 | CONFIG_CPU_SUP_UMC_32=y | 269 | CONFIG_CPU_SUP_UMC_32=y |
253 | CONFIG_X86_DS=y | 270 | CONFIG_X86_DS=y |
@@ -279,6 +296,7 @@ CONFIG_MICROCODE_AMD=y | |||
279 | CONFIG_MICROCODE_OLD_INTERFACE=y | 296 | CONFIG_MICROCODE_OLD_INTERFACE=y |
280 | CONFIG_X86_MSR=y | 297 | CONFIG_X86_MSR=y |
281 | CONFIG_X86_CPUID=y | 298 | CONFIG_X86_CPUID=y |
299 | # CONFIG_X86_CPU_DEBUG is not set | ||
282 | # CONFIG_NOHIGHMEM is not set | 300 | # CONFIG_NOHIGHMEM is not set |
283 | CONFIG_HIGHMEM4G=y | 301 | CONFIG_HIGHMEM4G=y |
284 | # CONFIG_HIGHMEM64G is not set | 302 | # CONFIG_HIGHMEM64G is not set |
@@ -302,6 +320,8 @@ CONFIG_ZONE_DMA_FLAG=1 | |||
302 | CONFIG_BOUNCE=y | 320 | CONFIG_BOUNCE=y |
303 | CONFIG_VIRT_TO_BUS=y | 321 | CONFIG_VIRT_TO_BUS=y |
304 | CONFIG_UNEVICTABLE_LRU=y | 322 | CONFIG_UNEVICTABLE_LRU=y |
323 | CONFIG_HAVE_MLOCK=y | ||
324 | CONFIG_HAVE_MLOCKED_PAGE_BIT=y | ||
305 | CONFIG_HIGHPTE=y | 325 | CONFIG_HIGHPTE=y |
306 | CONFIG_X86_CHECK_BIOS_CORRUPTION=y | 326 | CONFIG_X86_CHECK_BIOS_CORRUPTION=y |
307 | CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y | 327 | CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y |
@@ -312,6 +332,7 @@ CONFIG_MTRR=y | |||
312 | CONFIG_X86_PAT=y | 332 | CONFIG_X86_PAT=y |
313 | CONFIG_EFI=y | 333 | CONFIG_EFI=y |
314 | CONFIG_SECCOMP=y | 334 | CONFIG_SECCOMP=y |
335 | # CONFIG_CC_STACKPROTECTOR is not set | ||
315 | # CONFIG_HZ_100 is not set | 336 | # CONFIG_HZ_100 is not set |
316 | # CONFIG_HZ_250 is not set | 337 | # CONFIG_HZ_250 is not set |
317 | # CONFIG_HZ_300 is not set | 338 | # CONFIG_HZ_300 is not set |
@@ -322,8 +343,9 @@ CONFIG_KEXEC=y | |||
322 | CONFIG_CRASH_DUMP=y | 343 | CONFIG_CRASH_DUMP=y |
323 | # CONFIG_KEXEC_JUMP is not set | 344 | # CONFIG_KEXEC_JUMP is not set |
324 | CONFIG_PHYSICAL_START=0x1000000 | 345 | CONFIG_PHYSICAL_START=0x1000000 |
325 | # CONFIG_RELOCATABLE is not set | 346 | CONFIG_RELOCATABLE=y |
326 | CONFIG_PHYSICAL_ALIGN=0x200000 | 347 | CONFIG_X86_NEED_RELOCS=y |
348 | CONFIG_PHYSICAL_ALIGN=0x1000000 | ||
327 | CONFIG_HOTPLUG_CPU=y | 349 | CONFIG_HOTPLUG_CPU=y |
328 | # CONFIG_COMPAT_VDSO is not set | 350 | # CONFIG_COMPAT_VDSO is not set |
329 | # CONFIG_CMDLINE_BOOL is not set | 351 | # CONFIG_CMDLINE_BOOL is not set |
@@ -363,7 +385,6 @@ CONFIG_ACPI_THERMAL=y | |||
363 | CONFIG_ACPI_BLACKLIST_YEAR=0 | 385 | CONFIG_ACPI_BLACKLIST_YEAR=0 |
364 | # CONFIG_ACPI_DEBUG is not set | 386 | # CONFIG_ACPI_DEBUG is not set |
365 | # CONFIG_ACPI_PCI_SLOT is not set | 387 | # CONFIG_ACPI_PCI_SLOT is not set |
366 | CONFIG_ACPI_SYSTEM=y | ||
367 | CONFIG_X86_PM_TIMER=y | 388 | CONFIG_X86_PM_TIMER=y |
368 | CONFIG_ACPI_CONTAINER=y | 389 | CONFIG_ACPI_CONTAINER=y |
369 | # CONFIG_ACPI_SBS is not set | 390 | # CONFIG_ACPI_SBS is not set |
@@ -425,6 +446,7 @@ CONFIG_PCI_BIOS=y | |||
425 | CONFIG_PCI_DIRECT=y | 446 | CONFIG_PCI_DIRECT=y |
426 | CONFIG_PCI_MMCONFIG=y | 447 | CONFIG_PCI_MMCONFIG=y |
427 | CONFIG_PCI_DOMAINS=y | 448 | CONFIG_PCI_DOMAINS=y |
449 | # CONFIG_DMAR is not set | ||
428 | CONFIG_PCIEPORTBUS=y | 450 | CONFIG_PCIEPORTBUS=y |
429 | # CONFIG_HOTPLUG_PCI_PCIE is not set | 451 | # CONFIG_HOTPLUG_PCI_PCIE is not set |
430 | CONFIG_PCIEAER=y | 452 | CONFIG_PCIEAER=y |
@@ -435,6 +457,7 @@ CONFIG_PCI_MSI=y | |||
435 | # CONFIG_PCI_DEBUG is not set | 457 | # CONFIG_PCI_DEBUG is not set |
436 | # CONFIG_PCI_STUB is not set | 458 | # CONFIG_PCI_STUB is not set |
437 | CONFIG_HT_IRQ=y | 459 | CONFIG_HT_IRQ=y |
460 | # CONFIG_PCI_IOV is not set | ||
438 | CONFIG_ISA_DMA_API=y | 461 | CONFIG_ISA_DMA_API=y |
439 | # CONFIG_ISA is not set | 462 | # CONFIG_ISA is not set |
440 | # CONFIG_MCA is not set | 463 | # CONFIG_MCA is not set |
@@ -481,7 +504,6 @@ CONFIG_NET=y | |||
481 | # | 504 | # |
482 | # Networking options | 505 | # Networking options |
483 | # | 506 | # |
484 | CONFIG_COMPAT_NET_DEV_OPS=y | ||
485 | CONFIG_PACKET=y | 507 | CONFIG_PACKET=y |
486 | CONFIG_PACKET_MMAP=y | 508 | CONFIG_PACKET_MMAP=y |
487 | CONFIG_UNIX=y | 509 | CONFIG_UNIX=y |
@@ -639,6 +661,7 @@ CONFIG_LLC=y | |||
639 | # CONFIG_LAPB is not set | 661 | # CONFIG_LAPB is not set |
640 | # CONFIG_ECONET is not set | 662 | # CONFIG_ECONET is not set |
641 | # CONFIG_WAN_ROUTER is not set | 663 | # CONFIG_WAN_ROUTER is not set |
664 | # CONFIG_PHONET is not set | ||
642 | CONFIG_NET_SCHED=y | 665 | CONFIG_NET_SCHED=y |
643 | 666 | ||
644 | # | 667 | # |
@@ -696,6 +719,7 @@ CONFIG_NET_SCH_FIFO=y | |||
696 | # | 719 | # |
697 | # CONFIG_NET_PKTGEN is not set | 720 | # CONFIG_NET_PKTGEN is not set |
698 | # CONFIG_NET_TCPPROBE is not set | 721 | # CONFIG_NET_TCPPROBE is not set |
722 | # CONFIG_NET_DROP_MONITOR is not set | ||
699 | CONFIG_HAMRADIO=y | 723 | CONFIG_HAMRADIO=y |
700 | 724 | ||
701 | # | 725 | # |
@@ -706,12 +730,10 @@ CONFIG_HAMRADIO=y | |||
706 | # CONFIG_IRDA is not set | 730 | # CONFIG_IRDA is not set |
707 | # CONFIG_BT is not set | 731 | # CONFIG_BT is not set |
708 | # CONFIG_AF_RXRPC is not set | 732 | # CONFIG_AF_RXRPC is not set |
709 | # CONFIG_PHONET is not set | ||
710 | CONFIG_FIB_RULES=y | 733 | CONFIG_FIB_RULES=y |
711 | CONFIG_WIRELESS=y | 734 | CONFIG_WIRELESS=y |
712 | CONFIG_CFG80211=y | 735 | CONFIG_CFG80211=y |
713 | # CONFIG_CFG80211_REG_DEBUG is not set | 736 | # CONFIG_CFG80211_REG_DEBUG is not set |
714 | CONFIG_NL80211=y | ||
715 | CONFIG_WIRELESS_OLD_REGULATORY=y | 737 | CONFIG_WIRELESS_OLD_REGULATORY=y |
716 | CONFIG_WIRELESS_EXT=y | 738 | CONFIG_WIRELESS_EXT=y |
717 | CONFIG_WIRELESS_EXT_SYSFS=y | 739 | CONFIG_WIRELESS_EXT_SYSFS=y |
@@ -789,6 +811,7 @@ CONFIG_MISC_DEVICES=y | |||
789 | # CONFIG_ICS932S401 is not set | 811 | # CONFIG_ICS932S401 is not set |
790 | # CONFIG_ENCLOSURE_SERVICES is not set | 812 | # CONFIG_ENCLOSURE_SERVICES is not set |
791 | # CONFIG_HP_ILO is not set | 813 | # CONFIG_HP_ILO is not set |
814 | # CONFIG_ISL29003 is not set | ||
792 | # CONFIG_C2PORT is not set | 815 | # CONFIG_C2PORT is not set |
793 | 816 | ||
794 | # | 817 | # |
@@ -842,6 +865,7 @@ CONFIG_SCSI_SPI_ATTRS=y | |||
842 | # CONFIG_SCSI_LOWLEVEL is not set | 865 | # CONFIG_SCSI_LOWLEVEL is not set |
843 | # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set | 866 | # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set |
844 | # CONFIG_SCSI_DH is not set | 867 | # CONFIG_SCSI_DH is not set |
868 | # CONFIG_SCSI_OSD_INITIATOR is not set | ||
845 | CONFIG_ATA=y | 869 | CONFIG_ATA=y |
846 | # CONFIG_ATA_NONSTANDARD is not set | 870 | # CONFIG_ATA_NONSTANDARD is not set |
847 | CONFIG_ATA_ACPI=y | 871 | CONFIG_ATA_ACPI=y |
@@ -940,6 +964,7 @@ CONFIG_DM_ZERO=y | |||
940 | CONFIG_MACINTOSH_DRIVERS=y | 964 | CONFIG_MACINTOSH_DRIVERS=y |
941 | CONFIG_MAC_EMUMOUSEBTN=y | 965 | CONFIG_MAC_EMUMOUSEBTN=y |
942 | CONFIG_NETDEVICES=y | 966 | CONFIG_NETDEVICES=y |
967 | CONFIG_COMPAT_NET_DEV_OPS=y | ||
943 | # CONFIG_IFB is not set | 968 | # CONFIG_IFB is not set |
944 | # CONFIG_DUMMY is not set | 969 | # CONFIG_DUMMY is not set |
945 | # CONFIG_BONDING is not set | 970 | # CONFIG_BONDING is not set |
@@ -977,6 +1002,8 @@ CONFIG_MII=y | |||
977 | CONFIG_NET_VENDOR_3COM=y | 1002 | CONFIG_NET_VENDOR_3COM=y |
978 | # CONFIG_VORTEX is not set | 1003 | # CONFIG_VORTEX is not set |
979 | # CONFIG_TYPHOON is not set | 1004 | # CONFIG_TYPHOON is not set |
1005 | # CONFIG_ETHOC is not set | ||
1006 | # CONFIG_DNET is not set | ||
980 | CONFIG_NET_TULIP=y | 1007 | CONFIG_NET_TULIP=y |
981 | # CONFIG_DE2104X is not set | 1008 | # CONFIG_DE2104X is not set |
982 | # CONFIG_TULIP is not set | 1009 | # CONFIG_TULIP is not set |
@@ -1026,6 +1053,7 @@ CONFIG_E1000=y | |||
1026 | CONFIG_E1000E=y | 1053 | CONFIG_E1000E=y |
1027 | # CONFIG_IP1000 is not set | 1054 | # CONFIG_IP1000 is not set |
1028 | # CONFIG_IGB is not set | 1055 | # CONFIG_IGB is not set |
1056 | # CONFIG_IGBVF is not set | ||
1029 | # CONFIG_NS83820 is not set | 1057 | # CONFIG_NS83820 is not set |
1030 | # CONFIG_HAMACHI is not set | 1058 | # CONFIG_HAMACHI is not set |
1031 | # CONFIG_YELLOWFIN is not set | 1059 | # CONFIG_YELLOWFIN is not set |
@@ -1040,6 +1068,7 @@ CONFIG_BNX2=y | |||
1040 | # CONFIG_QLA3XXX is not set | 1068 | # CONFIG_QLA3XXX is not set |
1041 | # CONFIG_ATL1 is not set | 1069 | # CONFIG_ATL1 is not set |
1042 | # CONFIG_ATL1E is not set | 1070 | # CONFIG_ATL1E is not set |
1071 | # CONFIG_ATL1C is not set | ||
1043 | # CONFIG_JME is not set | 1072 | # CONFIG_JME is not set |
1044 | CONFIG_NETDEV_10000=y | 1073 | CONFIG_NETDEV_10000=y |
1045 | # CONFIG_CHELSIO_T1 is not set | 1074 | # CONFIG_CHELSIO_T1 is not set |
@@ -1049,6 +1078,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y | |||
1049 | # CONFIG_IXGBE is not set | 1078 | # CONFIG_IXGBE is not set |
1050 | # CONFIG_IXGB is not set | 1079 | # CONFIG_IXGB is not set |
1051 | # CONFIG_S2IO is not set | 1080 | # CONFIG_S2IO is not set |
1081 | # CONFIG_VXGE is not set | ||
1052 | # CONFIG_MYRI10GE is not set | 1082 | # CONFIG_MYRI10GE is not set |
1053 | # CONFIG_NETXEN_NIC is not set | 1083 | # CONFIG_NETXEN_NIC is not set |
1054 | # CONFIG_NIU is not set | 1084 | # CONFIG_NIU is not set |
@@ -1058,6 +1088,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y | |||
1058 | # CONFIG_BNX2X is not set | 1088 | # CONFIG_BNX2X is not set |
1059 | # CONFIG_QLGE is not set | 1089 | # CONFIG_QLGE is not set |
1060 | # CONFIG_SFC is not set | 1090 | # CONFIG_SFC is not set |
1091 | # CONFIG_BE2NET is not set | ||
1061 | CONFIG_TR=y | 1092 | CONFIG_TR=y |
1062 | # CONFIG_IBMOL is not set | 1093 | # CONFIG_IBMOL is not set |
1063 | # CONFIG_IBMLS is not set | 1094 | # CONFIG_IBMLS is not set |
@@ -1073,8 +1104,8 @@ CONFIG_WLAN_80211=y | |||
1073 | # CONFIG_LIBERTAS is not set | 1104 | # CONFIG_LIBERTAS is not set |
1074 | # CONFIG_LIBERTAS_THINFIRM is not set | 1105 | # CONFIG_LIBERTAS_THINFIRM is not set |
1075 | # CONFIG_AIRO is not set | 1106 | # CONFIG_AIRO is not set |
1076 | # CONFIG_HERMES is not set | ||
1077 | # CONFIG_ATMEL is not set | 1107 | # CONFIG_ATMEL is not set |
1108 | # CONFIG_AT76C50X_USB is not set | ||
1078 | # CONFIG_AIRO_CS is not set | 1109 | # CONFIG_AIRO_CS is not set |
1079 | # CONFIG_PCMCIA_WL3501 is not set | 1110 | # CONFIG_PCMCIA_WL3501 is not set |
1080 | # CONFIG_PRISM54 is not set | 1111 | # CONFIG_PRISM54 is not set |
@@ -1084,21 +1115,21 @@ CONFIG_WLAN_80211=y | |||
1084 | # CONFIG_RTL8187 is not set | 1115 | # CONFIG_RTL8187 is not set |
1085 | # CONFIG_ADM8211 is not set | 1116 | # CONFIG_ADM8211 is not set |
1086 | # CONFIG_MAC80211_HWSIM is not set | 1117 | # CONFIG_MAC80211_HWSIM is not set |
1118 | # CONFIG_MWL8K is not set | ||
1087 | # CONFIG_P54_COMMON is not set | 1119 | # CONFIG_P54_COMMON is not set |
1088 | CONFIG_ATH5K=y | 1120 | CONFIG_ATH5K=y |
1089 | # CONFIG_ATH5K_DEBUG is not set | 1121 | # CONFIG_ATH5K_DEBUG is not set |
1090 | # CONFIG_ATH9K is not set | 1122 | # CONFIG_ATH9K is not set |
1123 | # CONFIG_AR9170_USB is not set | ||
1091 | # CONFIG_IPW2100 is not set | 1124 | # CONFIG_IPW2100 is not set |
1092 | # CONFIG_IPW2200 is not set | 1125 | # CONFIG_IPW2200 is not set |
1093 | # CONFIG_IWLCORE is not set | 1126 | # CONFIG_IWLWIFI is not set |
1094 | # CONFIG_IWLWIFI_LEDS is not set | ||
1095 | # CONFIG_IWLAGN is not set | ||
1096 | # CONFIG_IWL3945 is not set | ||
1097 | # CONFIG_HOSTAP is not set | 1127 | # CONFIG_HOSTAP is not set |
1098 | # CONFIG_B43 is not set | 1128 | # CONFIG_B43 is not set |
1099 | # CONFIG_B43LEGACY is not set | 1129 | # CONFIG_B43LEGACY is not set |
1100 | # CONFIG_ZD1211RW is not set | 1130 | # CONFIG_ZD1211RW is not set |
1101 | # CONFIG_RT2X00 is not set | 1131 | # CONFIG_RT2X00 is not set |
1132 | # CONFIG_HERMES is not set | ||
1102 | 1133 | ||
1103 | # | 1134 | # |
1104 | # Enable WiMAX (Networking options) to see the WiMAX drivers | 1135 | # Enable WiMAX (Networking options) to see the WiMAX drivers |
@@ -1209,6 +1240,8 @@ CONFIG_INPUT_TABLET=y | |||
1209 | # CONFIG_TABLET_USB_KBTAB is not set | 1240 | # CONFIG_TABLET_USB_KBTAB is not set |
1210 | # CONFIG_TABLET_USB_WACOM is not set | 1241 | # CONFIG_TABLET_USB_WACOM is not set |
1211 | CONFIG_INPUT_TOUCHSCREEN=y | 1242 | CONFIG_INPUT_TOUCHSCREEN=y |
1243 | # CONFIG_TOUCHSCREEN_AD7879_I2C is not set | ||
1244 | # CONFIG_TOUCHSCREEN_AD7879 is not set | ||
1212 | # CONFIG_TOUCHSCREEN_FUJITSU is not set | 1245 | # CONFIG_TOUCHSCREEN_FUJITSU is not set |
1213 | # CONFIG_TOUCHSCREEN_GUNZE is not set | 1246 | # CONFIG_TOUCHSCREEN_GUNZE is not set |
1214 | # CONFIG_TOUCHSCREEN_ELO is not set | 1247 | # CONFIG_TOUCHSCREEN_ELO is not set |
@@ -1303,6 +1336,7 @@ CONFIG_UNIX98_PTYS=y | |||
1303 | # CONFIG_LEGACY_PTYS is not set | 1336 | # CONFIG_LEGACY_PTYS is not set |
1304 | # CONFIG_IPMI_HANDLER is not set | 1337 | # CONFIG_IPMI_HANDLER is not set |
1305 | CONFIG_HW_RANDOM=y | 1338 | CONFIG_HW_RANDOM=y |
1339 | # CONFIG_HW_RANDOM_TIMERIOMEM is not set | ||
1306 | CONFIG_HW_RANDOM_INTEL=y | 1340 | CONFIG_HW_RANDOM_INTEL=y |
1307 | CONFIG_HW_RANDOM_AMD=y | 1341 | CONFIG_HW_RANDOM_AMD=y |
1308 | CONFIG_HW_RANDOM_GEODE=y | 1342 | CONFIG_HW_RANDOM_GEODE=y |
@@ -1390,7 +1424,6 @@ CONFIG_I2C_I801=y | |||
1390 | # CONFIG_SENSORS_PCF8574 is not set | 1424 | # CONFIG_SENSORS_PCF8574 is not set |
1391 | # CONFIG_PCF8575 is not set | 1425 | # CONFIG_PCF8575 is not set |
1392 | # CONFIG_SENSORS_PCA9539 is not set | 1426 | # CONFIG_SENSORS_PCA9539 is not set |
1393 | # CONFIG_SENSORS_PCF8591 is not set | ||
1394 | # CONFIG_SENSORS_MAX6875 is not set | 1427 | # CONFIG_SENSORS_MAX6875 is not set |
1395 | # CONFIG_SENSORS_TSL2550 is not set | 1428 | # CONFIG_SENSORS_TSL2550 is not set |
1396 | # CONFIG_I2C_DEBUG_CORE is not set | 1429 | # CONFIG_I2C_DEBUG_CORE is not set |
@@ -1424,6 +1457,7 @@ CONFIG_HWMON=y | |||
1424 | # CONFIG_SENSORS_ADT7475 is not set | 1457 | # CONFIG_SENSORS_ADT7475 is not set |
1425 | # CONFIG_SENSORS_K8TEMP is not set | 1458 | # CONFIG_SENSORS_K8TEMP is not set |
1426 | # CONFIG_SENSORS_ASB100 is not set | 1459 | # CONFIG_SENSORS_ASB100 is not set |
1460 | # CONFIG_SENSORS_ATK0110 is not set | ||
1427 | # CONFIG_SENSORS_ATXP1 is not set | 1461 | # CONFIG_SENSORS_ATXP1 is not set |
1428 | # CONFIG_SENSORS_DS1621 is not set | 1462 | # CONFIG_SENSORS_DS1621 is not set |
1429 | # CONFIG_SENSORS_I5K_AMB is not set | 1463 | # CONFIG_SENSORS_I5K_AMB is not set |
@@ -1433,6 +1467,7 @@ CONFIG_HWMON=y | |||
1433 | # CONFIG_SENSORS_FSCHER is not set | 1467 | # CONFIG_SENSORS_FSCHER is not set |
1434 | # CONFIG_SENSORS_FSCPOS is not set | 1468 | # CONFIG_SENSORS_FSCPOS is not set |
1435 | # CONFIG_SENSORS_FSCHMD is not set | 1469 | # CONFIG_SENSORS_FSCHMD is not set |
1470 | # CONFIG_SENSORS_G760A is not set | ||
1436 | # CONFIG_SENSORS_GL518SM is not set | 1471 | # CONFIG_SENSORS_GL518SM is not set |
1437 | # CONFIG_SENSORS_GL520SM is not set | 1472 | # CONFIG_SENSORS_GL520SM is not set |
1438 | # CONFIG_SENSORS_CORETEMP is not set | 1473 | # CONFIG_SENSORS_CORETEMP is not set |
@@ -1448,11 +1483,14 @@ CONFIG_HWMON=y | |||
1448 | # CONFIG_SENSORS_LM90 is not set | 1483 | # CONFIG_SENSORS_LM90 is not set |
1449 | # CONFIG_SENSORS_LM92 is not set | 1484 | # CONFIG_SENSORS_LM92 is not set |
1450 | # CONFIG_SENSORS_LM93 is not set | 1485 | # CONFIG_SENSORS_LM93 is not set |
1486 | # CONFIG_SENSORS_LTC4215 is not set | ||
1451 | # CONFIG_SENSORS_LTC4245 is not set | 1487 | # CONFIG_SENSORS_LTC4245 is not set |
1488 | # CONFIG_SENSORS_LM95241 is not set | ||
1452 | # CONFIG_SENSORS_MAX1619 is not set | 1489 | # CONFIG_SENSORS_MAX1619 is not set |
1453 | # CONFIG_SENSORS_MAX6650 is not set | 1490 | # CONFIG_SENSORS_MAX6650 is not set |
1454 | # CONFIG_SENSORS_PC87360 is not set | 1491 | # CONFIG_SENSORS_PC87360 is not set |
1455 | # CONFIG_SENSORS_PC87427 is not set | 1492 | # CONFIG_SENSORS_PC87427 is not set |
1493 | # CONFIG_SENSORS_PCF8591 is not set | ||
1456 | # CONFIG_SENSORS_SIS5595 is not set | 1494 | # CONFIG_SENSORS_SIS5595 is not set |
1457 | # CONFIG_SENSORS_DME1737 is not set | 1495 | # CONFIG_SENSORS_DME1737 is not set |
1458 | # CONFIG_SENSORS_SMSC47M1 is not set | 1496 | # CONFIG_SENSORS_SMSC47M1 is not set |
@@ -1643,7 +1681,6 @@ CONFIG_FB_EFI=y | |||
1643 | # CONFIG_FB_3DFX is not set | 1681 | # CONFIG_FB_3DFX is not set |
1644 | # CONFIG_FB_VOODOO1 is not set | 1682 | # CONFIG_FB_VOODOO1 is not set |
1645 | # CONFIG_FB_VT8623 is not set | 1683 | # CONFIG_FB_VT8623 is not set |
1646 | # CONFIG_FB_CYBLA is not set | ||
1647 | # CONFIG_FB_TRIDENT is not set | 1684 | # CONFIG_FB_TRIDENT is not set |
1648 | # CONFIG_FB_ARK is not set | 1685 | # CONFIG_FB_ARK is not set |
1649 | # CONFIG_FB_PM3 is not set | 1686 | # CONFIG_FB_PM3 is not set |
@@ -1652,6 +1689,7 @@ CONFIG_FB_EFI=y | |||
1652 | # CONFIG_FB_VIRTUAL is not set | 1689 | # CONFIG_FB_VIRTUAL is not set |
1653 | # CONFIG_FB_METRONOME is not set | 1690 | # CONFIG_FB_METRONOME is not set |
1654 | # CONFIG_FB_MB862XX is not set | 1691 | # CONFIG_FB_MB862XX is not set |
1692 | # CONFIG_FB_BROADSHEET is not set | ||
1655 | CONFIG_BACKLIGHT_LCD_SUPPORT=y | 1693 | CONFIG_BACKLIGHT_LCD_SUPPORT=y |
1656 | # CONFIG_LCD_CLASS_DEVICE is not set | 1694 | # CONFIG_LCD_CLASS_DEVICE is not set |
1657 | CONFIG_BACKLIGHT_CLASS_DEVICE=y | 1695 | CONFIG_BACKLIGHT_CLASS_DEVICE=y |
@@ -1738,6 +1776,8 @@ CONFIG_SND_PCI=y | |||
1738 | # CONFIG_SND_INDIGO is not set | 1776 | # CONFIG_SND_INDIGO is not set |
1739 | # CONFIG_SND_INDIGOIO is not set | 1777 | # CONFIG_SND_INDIGOIO is not set |
1740 | # CONFIG_SND_INDIGODJ is not set | 1778 | # CONFIG_SND_INDIGODJ is not set |
1779 | # CONFIG_SND_INDIGOIOX is not set | ||
1780 | # CONFIG_SND_INDIGODJX is not set | ||
1741 | # CONFIG_SND_EMU10K1 is not set | 1781 | # CONFIG_SND_EMU10K1 is not set |
1742 | # CONFIG_SND_EMU10K1X is not set | 1782 | # CONFIG_SND_EMU10K1X is not set |
1743 | # CONFIG_SND_ENS1370 is not set | 1783 | # CONFIG_SND_ENS1370 is not set |
@@ -1811,15 +1851,17 @@ CONFIG_USB_HIDDEV=y | |||
1811 | # | 1851 | # |
1812 | # Special HID drivers | 1852 | # Special HID drivers |
1813 | # | 1853 | # |
1814 | CONFIG_HID_COMPAT=y | ||
1815 | CONFIG_HID_A4TECH=y | 1854 | CONFIG_HID_A4TECH=y |
1816 | CONFIG_HID_APPLE=y | 1855 | CONFIG_HID_APPLE=y |
1817 | CONFIG_HID_BELKIN=y | 1856 | CONFIG_HID_BELKIN=y |
1818 | CONFIG_HID_CHERRY=y | 1857 | CONFIG_HID_CHERRY=y |
1819 | CONFIG_HID_CHICONY=y | 1858 | CONFIG_HID_CHICONY=y |
1820 | CONFIG_HID_CYPRESS=y | 1859 | CONFIG_HID_CYPRESS=y |
1860 | # CONFIG_DRAGONRISE_FF is not set | ||
1821 | CONFIG_HID_EZKEY=y | 1861 | CONFIG_HID_EZKEY=y |
1862 | CONFIG_HID_KYE=y | ||
1822 | CONFIG_HID_GYRATION=y | 1863 | CONFIG_HID_GYRATION=y |
1864 | CONFIG_HID_KENSINGTON=y | ||
1823 | CONFIG_HID_LOGITECH=y | 1865 | CONFIG_HID_LOGITECH=y |
1824 | CONFIG_LOGITECH_FF=y | 1866 | CONFIG_LOGITECH_FF=y |
1825 | # CONFIG_LOGIRUMBLEPAD2_FF is not set | 1867 | # CONFIG_LOGIRUMBLEPAD2_FF is not set |
@@ -1885,11 +1927,11 @@ CONFIG_USB_PRINTER=y | |||
1885 | # CONFIG_USB_TMC is not set | 1927 | # CONFIG_USB_TMC is not set |
1886 | 1928 | ||
1887 | # | 1929 | # |
1888 | # NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; | 1930 | # NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may |
1889 | # | 1931 | # |
1890 | 1932 | ||
1891 | # | 1933 | # |
1892 | # see USB_STORAGE Help for more information | 1934 | # also be needed; see USB_STORAGE Help for more info |
1893 | # | 1935 | # |
1894 | CONFIG_USB_STORAGE=y | 1936 | CONFIG_USB_STORAGE=y |
1895 | # CONFIG_USB_STORAGE_DEBUG is not set | 1937 | # CONFIG_USB_STORAGE_DEBUG is not set |
@@ -1931,7 +1973,6 @@ CONFIG_USB_LIBUSUAL=y | |||
1931 | # CONFIG_USB_LED is not set | 1973 | # CONFIG_USB_LED is not set |
1932 | # CONFIG_USB_CYPRESS_CY7C63 is not set | 1974 | # CONFIG_USB_CYPRESS_CY7C63 is not set |
1933 | # CONFIG_USB_CYTHERM is not set | 1975 | # CONFIG_USB_CYTHERM is not set |
1934 | # CONFIG_USB_PHIDGET is not set | ||
1935 | # CONFIG_USB_IDMOUSE is not set | 1976 | # CONFIG_USB_IDMOUSE is not set |
1936 | # CONFIG_USB_FTDI_ELAN is not set | 1977 | # CONFIG_USB_FTDI_ELAN is not set |
1937 | # CONFIG_USB_APPLEDISPLAY is not set | 1978 | # CONFIG_USB_APPLEDISPLAY is not set |
@@ -1947,6 +1988,7 @@ CONFIG_USB_LIBUSUAL=y | |||
1947 | # | 1988 | # |
1948 | # OTG and related infrastructure | 1989 | # OTG and related infrastructure |
1949 | # | 1990 | # |
1991 | # CONFIG_NOP_USB_XCEIV is not set | ||
1950 | # CONFIG_UWB is not set | 1992 | # CONFIG_UWB is not set |
1951 | # CONFIG_MMC is not set | 1993 | # CONFIG_MMC is not set |
1952 | # CONFIG_MEMSTICK is not set | 1994 | # CONFIG_MEMSTICK is not set |
@@ -1958,8 +2000,10 @@ CONFIG_LEDS_CLASS=y | |||
1958 | # | 2000 | # |
1959 | # CONFIG_LEDS_ALIX2 is not set | 2001 | # CONFIG_LEDS_ALIX2 is not set |
1960 | # CONFIG_LEDS_PCA9532 is not set | 2002 | # CONFIG_LEDS_PCA9532 is not set |
2003 | # CONFIG_LEDS_LP5521 is not set | ||
1961 | # CONFIG_LEDS_CLEVO_MAIL is not set | 2004 | # CONFIG_LEDS_CLEVO_MAIL is not set |
1962 | # CONFIG_LEDS_PCA955X is not set | 2005 | # CONFIG_LEDS_PCA955X is not set |
2006 | # CONFIG_LEDS_BD2802 is not set | ||
1963 | 2007 | ||
1964 | # | 2008 | # |
1965 | # LED Triggers | 2009 | # LED Triggers |
@@ -1969,6 +2013,10 @@ CONFIG_LEDS_TRIGGERS=y | |||
1969 | # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set | 2013 | # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set |
1970 | # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set | 2014 | # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set |
1971 | # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set | 2015 | # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set |
2016 | |||
2017 | # | ||
2018 | # iptables trigger is under Netfilter config (LED target) | ||
2019 | # | ||
1972 | # CONFIG_ACCESSIBILITY is not set | 2020 | # CONFIG_ACCESSIBILITY is not set |
1973 | # CONFIG_INFINIBAND is not set | 2021 | # CONFIG_INFINIBAND is not set |
1974 | CONFIG_EDAC=y | 2022 | CONFIG_EDAC=y |
@@ -2037,6 +2085,7 @@ CONFIG_DMADEVICES=y | |||
2037 | # DMA Devices | 2085 | # DMA Devices |
2038 | # | 2086 | # |
2039 | # CONFIG_INTEL_IOATDMA is not set | 2087 | # CONFIG_INTEL_IOATDMA is not set |
2088 | # CONFIG_AUXDISPLAY is not set | ||
2040 | # CONFIG_UIO is not set | 2089 | # CONFIG_UIO is not set |
2041 | # CONFIG_STAGING is not set | 2090 | # CONFIG_STAGING is not set |
2042 | CONFIG_X86_PLATFORM_DEVICES=y | 2091 | CONFIG_X86_PLATFORM_DEVICES=y |
@@ -2071,6 +2120,7 @@ CONFIG_DMIID=y | |||
2071 | # | 2120 | # |
2072 | # CONFIG_EXT2_FS is not set | 2121 | # CONFIG_EXT2_FS is not set |
2073 | CONFIG_EXT3_FS=y | 2122 | CONFIG_EXT3_FS=y |
2123 | # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set | ||
2074 | CONFIG_EXT3_FS_XATTR=y | 2124 | CONFIG_EXT3_FS_XATTR=y |
2075 | CONFIG_EXT3_FS_POSIX_ACL=y | 2125 | CONFIG_EXT3_FS_POSIX_ACL=y |
2076 | CONFIG_EXT3_FS_SECURITY=y | 2126 | CONFIG_EXT3_FS_SECURITY=y |
@@ -2101,6 +2151,11 @@ CONFIG_AUTOFS4_FS=y | |||
2101 | CONFIG_GENERIC_ACL=y | 2151 | CONFIG_GENERIC_ACL=y |
2102 | 2152 | ||
2103 | # | 2153 | # |
2154 | # Caches | ||
2155 | # | ||
2156 | # CONFIG_FSCACHE is not set | ||
2157 | |||
2158 | # | ||
2104 | # CD-ROM/DVD Filesystems | 2159 | # CD-ROM/DVD Filesystems |
2105 | # | 2160 | # |
2106 | CONFIG_ISO9660_FS=y | 2161 | CONFIG_ISO9660_FS=y |
@@ -2151,6 +2206,7 @@ CONFIG_MISC_FILESYSTEMS=y | |||
2151 | # CONFIG_ROMFS_FS is not set | 2206 | # CONFIG_ROMFS_FS is not set |
2152 | # CONFIG_SYSV_FS is not set | 2207 | # CONFIG_SYSV_FS is not set |
2153 | # CONFIG_UFS_FS is not set | 2208 | # CONFIG_UFS_FS is not set |
2209 | # CONFIG_NILFS2_FS is not set | ||
2154 | CONFIG_NETWORK_FILESYSTEMS=y | 2210 | CONFIG_NETWORK_FILESYSTEMS=y |
2155 | CONFIG_NFS_FS=y | 2211 | CONFIG_NFS_FS=y |
2156 | CONFIG_NFS_V3=y | 2212 | CONFIG_NFS_V3=y |
@@ -2164,7 +2220,6 @@ CONFIG_NFS_ACL_SUPPORT=y | |||
2164 | CONFIG_NFS_COMMON=y | 2220 | CONFIG_NFS_COMMON=y |
2165 | CONFIG_SUNRPC=y | 2221 | CONFIG_SUNRPC=y |
2166 | CONFIG_SUNRPC_GSS=y | 2222 | CONFIG_SUNRPC_GSS=y |
2167 | # CONFIG_SUNRPC_REGISTER_V4 is not set | ||
2168 | CONFIG_RPCSEC_GSS_KRB5=y | 2223 | CONFIG_RPCSEC_GSS_KRB5=y |
2169 | # CONFIG_RPCSEC_GSS_SPKM3 is not set | 2224 | # CONFIG_RPCSEC_GSS_SPKM3 is not set |
2170 | # CONFIG_SMB_FS is not set | 2225 | # CONFIG_SMB_FS is not set |
@@ -2251,6 +2306,7 @@ CONFIG_DEBUG_FS=y | |||
2251 | CONFIG_DEBUG_KERNEL=y | 2306 | CONFIG_DEBUG_KERNEL=y |
2252 | # CONFIG_DEBUG_SHIRQ is not set | 2307 | # CONFIG_DEBUG_SHIRQ is not set |
2253 | # CONFIG_DETECT_SOFTLOCKUP is not set | 2308 | # CONFIG_DETECT_SOFTLOCKUP is not set |
2309 | # CONFIG_DETECT_HUNG_TASK is not set | ||
2254 | # CONFIG_SCHED_DEBUG is not set | 2310 | # CONFIG_SCHED_DEBUG is not set |
2255 | CONFIG_SCHEDSTATS=y | 2311 | CONFIG_SCHEDSTATS=y |
2256 | CONFIG_TIMER_STATS=y | 2312 | CONFIG_TIMER_STATS=y |
@@ -2266,6 +2322,7 @@ CONFIG_TIMER_STATS=y | |||
2266 | # CONFIG_LOCK_STAT is not set | 2322 | # CONFIG_LOCK_STAT is not set |
2267 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set | 2323 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set |
2268 | # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set | 2324 | # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set |
2325 | CONFIG_STACKTRACE=y | ||
2269 | # CONFIG_DEBUG_KOBJECT is not set | 2326 | # CONFIG_DEBUG_KOBJECT is not set |
2270 | # CONFIG_DEBUG_HIGHMEM is not set | 2327 | # CONFIG_DEBUG_HIGHMEM is not set |
2271 | CONFIG_DEBUG_BUGVERBOSE=y | 2328 | CONFIG_DEBUG_BUGVERBOSE=y |
@@ -2289,13 +2346,19 @@ CONFIG_FRAME_POINTER=y | |||
2289 | # CONFIG_FAULT_INJECTION is not set | 2346 | # CONFIG_FAULT_INJECTION is not set |
2290 | # CONFIG_LATENCYTOP is not set | 2347 | # CONFIG_LATENCYTOP is not set |
2291 | CONFIG_SYSCTL_SYSCALL_CHECK=y | 2348 | CONFIG_SYSCTL_SYSCALL_CHECK=y |
2349 | # CONFIG_DEBUG_PAGEALLOC is not set | ||
2292 | CONFIG_USER_STACKTRACE_SUPPORT=y | 2350 | CONFIG_USER_STACKTRACE_SUPPORT=y |
2351 | CONFIG_NOP_TRACER=y | ||
2293 | CONFIG_HAVE_FUNCTION_TRACER=y | 2352 | CONFIG_HAVE_FUNCTION_TRACER=y |
2294 | CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y | 2353 | CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y |
2295 | CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y | 2354 | CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y |
2296 | CONFIG_HAVE_DYNAMIC_FTRACE=y | 2355 | CONFIG_HAVE_DYNAMIC_FTRACE=y |
2297 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y | 2356 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y |
2298 | CONFIG_HAVE_HW_BRANCH_TRACER=y | 2357 | CONFIG_HAVE_HW_BRANCH_TRACER=y |
2358 | CONFIG_HAVE_FTRACE_SYSCALLS=y | ||
2359 | CONFIG_RING_BUFFER=y | ||
2360 | CONFIG_TRACING=y | ||
2361 | CONFIG_TRACING_SUPPORT=y | ||
2299 | 2362 | ||
2300 | # | 2363 | # |
2301 | # Tracers | 2364 | # Tracers |
@@ -2305,13 +2368,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y | |||
2305 | # CONFIG_SYSPROF_TRACER is not set | 2368 | # CONFIG_SYSPROF_TRACER is not set |
2306 | # CONFIG_SCHED_TRACER is not set | 2369 | # CONFIG_SCHED_TRACER is not set |
2307 | # CONFIG_CONTEXT_SWITCH_TRACER is not set | 2370 | # CONFIG_CONTEXT_SWITCH_TRACER is not set |
2371 | # CONFIG_EVENT_TRACER is not set | ||
2372 | # CONFIG_FTRACE_SYSCALLS is not set | ||
2308 | # CONFIG_BOOT_TRACER is not set | 2373 | # CONFIG_BOOT_TRACER is not set |
2309 | # CONFIG_TRACE_BRANCH_PROFILING is not set | 2374 | # CONFIG_TRACE_BRANCH_PROFILING is not set |
2310 | # CONFIG_POWER_TRACER is not set | 2375 | # CONFIG_POWER_TRACER is not set |
2311 | # CONFIG_STACK_TRACER is not set | 2376 | # CONFIG_STACK_TRACER is not set |
2312 | # CONFIG_HW_BRANCH_TRACER is not set | 2377 | # CONFIG_HW_BRANCH_TRACER is not set |
2378 | # CONFIG_KMEMTRACE is not set | ||
2379 | # CONFIG_WORKQUEUE_TRACER is not set | ||
2380 | CONFIG_BLK_DEV_IO_TRACE=y | ||
2381 | # CONFIG_FTRACE_STARTUP_TEST is not set | ||
2382 | # CONFIG_MMIOTRACE is not set | ||
2313 | CONFIG_PROVIDE_OHCI1394_DMA_INIT=y | 2383 | CONFIG_PROVIDE_OHCI1394_DMA_INIT=y |
2314 | # CONFIG_DYNAMIC_PRINTK_DEBUG is not set | 2384 | # CONFIG_DYNAMIC_DEBUG is not set |
2385 | # CONFIG_DMA_API_DEBUG is not set | ||
2315 | # CONFIG_SAMPLES is not set | 2386 | # CONFIG_SAMPLES is not set |
2316 | CONFIG_HAVE_ARCH_KGDB=y | 2387 | CONFIG_HAVE_ARCH_KGDB=y |
2317 | # CONFIG_KGDB is not set | 2388 | # CONFIG_KGDB is not set |
@@ -2321,7 +2392,6 @@ CONFIG_EARLY_PRINTK=y | |||
2321 | CONFIG_EARLY_PRINTK_DBGP=y | 2392 | CONFIG_EARLY_PRINTK_DBGP=y |
2322 | CONFIG_DEBUG_STACKOVERFLOW=y | 2393 | CONFIG_DEBUG_STACKOVERFLOW=y |
2323 | CONFIG_DEBUG_STACK_USAGE=y | 2394 | CONFIG_DEBUG_STACK_USAGE=y |
2324 | # CONFIG_DEBUG_PAGEALLOC is not set | ||
2325 | # CONFIG_DEBUG_PER_CPU_MAPS is not set | 2395 | # CONFIG_DEBUG_PER_CPU_MAPS is not set |
2326 | # CONFIG_X86_PTDUMP is not set | 2396 | # CONFIG_X86_PTDUMP is not set |
2327 | CONFIG_DEBUG_RODATA=y | 2397 | CONFIG_DEBUG_RODATA=y |
@@ -2329,7 +2399,7 @@ CONFIG_DEBUG_RODATA=y | |||
2329 | CONFIG_DEBUG_NX_TEST=m | 2399 | CONFIG_DEBUG_NX_TEST=m |
2330 | # CONFIG_4KSTACKS is not set | 2400 | # CONFIG_4KSTACKS is not set |
2331 | CONFIG_DOUBLEFAULT=y | 2401 | CONFIG_DOUBLEFAULT=y |
2332 | # CONFIG_MMIOTRACE is not set | 2402 | CONFIG_HAVE_MMIOTRACE_SUPPORT=y |
2333 | CONFIG_IO_DELAY_TYPE_0X80=0 | 2403 | CONFIG_IO_DELAY_TYPE_0X80=0 |
2334 | CONFIG_IO_DELAY_TYPE_0XED=1 | 2404 | CONFIG_IO_DELAY_TYPE_0XED=1 |
2335 | CONFIG_IO_DELAY_TYPE_UDELAY=2 | 2405 | CONFIG_IO_DELAY_TYPE_UDELAY=2 |
@@ -2365,6 +2435,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y | |||
2365 | CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 | 2435 | CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 |
2366 | # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set | 2436 | # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set |
2367 | # CONFIG_SECURITY_SMACK is not set | 2437 | # CONFIG_SECURITY_SMACK is not set |
2438 | # CONFIG_SECURITY_TOMOYO is not set | ||
2439 | # CONFIG_IMA is not set | ||
2368 | CONFIG_CRYPTO=y | 2440 | CONFIG_CRYPTO=y |
2369 | 2441 | ||
2370 | # | 2442 | # |
@@ -2380,10 +2452,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y | |||
2380 | CONFIG_CRYPTO_HASH=y | 2452 | CONFIG_CRYPTO_HASH=y |
2381 | CONFIG_CRYPTO_HASH2=y | 2453 | CONFIG_CRYPTO_HASH2=y |
2382 | CONFIG_CRYPTO_RNG2=y | 2454 | CONFIG_CRYPTO_RNG2=y |
2455 | CONFIG_CRYPTO_PCOMP=y | ||
2383 | CONFIG_CRYPTO_MANAGER=y | 2456 | CONFIG_CRYPTO_MANAGER=y |
2384 | CONFIG_CRYPTO_MANAGER2=y | 2457 | CONFIG_CRYPTO_MANAGER2=y |
2385 | # CONFIG_CRYPTO_GF128MUL is not set | 2458 | # CONFIG_CRYPTO_GF128MUL is not set |
2386 | # CONFIG_CRYPTO_NULL is not set | 2459 | # CONFIG_CRYPTO_NULL is not set |
2460 | CONFIG_CRYPTO_WORKQUEUE=y | ||
2387 | # CONFIG_CRYPTO_CRYPTD is not set | 2461 | # CONFIG_CRYPTO_CRYPTD is not set |
2388 | CONFIG_CRYPTO_AUTHENC=y | 2462 | CONFIG_CRYPTO_AUTHENC=y |
2389 | # CONFIG_CRYPTO_TEST is not set | 2463 | # CONFIG_CRYPTO_TEST is not set |
@@ -2456,6 +2530,7 @@ CONFIG_CRYPTO_DES=y | |||
2456 | # Compression | 2530 | # Compression |
2457 | # | 2531 | # |
2458 | # CONFIG_CRYPTO_DEFLATE is not set | 2532 | # CONFIG_CRYPTO_DEFLATE is not set |
2533 | # CONFIG_CRYPTO_ZLIB is not set | ||
2459 | # CONFIG_CRYPTO_LZO is not set | 2534 | # CONFIG_CRYPTO_LZO is not set |
2460 | 2535 | ||
2461 | # | 2536 | # |
@@ -2467,11 +2542,13 @@ CONFIG_CRYPTO_HW=y | |||
2467 | # CONFIG_CRYPTO_DEV_GEODE is not set | 2542 | # CONFIG_CRYPTO_DEV_GEODE is not set |
2468 | # CONFIG_CRYPTO_DEV_HIFN_795X is not set | 2543 | # CONFIG_CRYPTO_DEV_HIFN_795X is not set |
2469 | CONFIG_HAVE_KVM=y | 2544 | CONFIG_HAVE_KVM=y |
2545 | CONFIG_HAVE_KVM_IRQCHIP=y | ||
2470 | CONFIG_VIRTUALIZATION=y | 2546 | CONFIG_VIRTUALIZATION=y |
2471 | # CONFIG_KVM is not set | 2547 | # CONFIG_KVM is not set |
2472 | # CONFIG_LGUEST is not set | 2548 | # CONFIG_LGUEST is not set |
2473 | # CONFIG_VIRTIO_PCI is not set | 2549 | # CONFIG_VIRTIO_PCI is not set |
2474 | # CONFIG_VIRTIO_BALLOON is not set | 2550 | # CONFIG_VIRTIO_BALLOON is not set |
2551 | CONFIG_BINARY_PRINTF=y | ||
2475 | 2552 | ||
2476 | # | 2553 | # |
2477 | # Library routines | 2554 | # Library routines |
@@ -2489,7 +2566,10 @@ CONFIG_CRC32=y | |||
2489 | # CONFIG_LIBCRC32C is not set | 2566 | # CONFIG_LIBCRC32C is not set |
2490 | CONFIG_AUDIT_GENERIC=y | 2567 | CONFIG_AUDIT_GENERIC=y |
2491 | CONFIG_ZLIB_INFLATE=y | 2568 | CONFIG_ZLIB_INFLATE=y |
2492 | CONFIG_PLIST=y | 2569 | CONFIG_DECOMPRESS_GZIP=y |
2570 | CONFIG_DECOMPRESS_BZIP2=y | ||
2571 | CONFIG_DECOMPRESS_LZMA=y | ||
2493 | CONFIG_HAS_IOMEM=y | 2572 | CONFIG_HAS_IOMEM=y |
2494 | CONFIG_HAS_IOPORT=y | 2573 | CONFIG_HAS_IOPORT=y |
2495 | CONFIG_HAS_DMA=y | 2574 | CONFIG_HAS_DMA=y |
2575 | CONFIG_NLATTR=y | ||
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 9fe5d212ab4c..cee1dd2e69b2 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -1,12 +1,13 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.29-rc4 | 3 | # Linux kernel version: 2.6.30-rc2 |
4 | # Tue Feb 24 15:44:16 2009 | 4 | # Mon May 11 16:22:00 2009 |
5 | # | 5 | # |
6 | CONFIG_64BIT=y | 6 | CONFIG_64BIT=y |
7 | # CONFIG_X86_32 is not set | 7 | # CONFIG_X86_32 is not set |
8 | CONFIG_X86_64=y | 8 | CONFIG_X86_64=y |
9 | CONFIG_X86=y | 9 | CONFIG_X86=y |
10 | CONFIG_OUTPUT_FORMAT="elf64-x86-64" | ||
10 | CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" | 11 | CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" |
11 | CONFIG_GENERIC_TIME=y | 12 | CONFIG_GENERIC_TIME=y |
12 | CONFIG_GENERIC_CMOS_UPDATE=y | 13 | CONFIG_GENERIC_CMOS_UPDATE=y |
@@ -34,6 +35,7 @@ CONFIG_ARCH_HAS_CPU_RELAX=y | |||
34 | CONFIG_ARCH_HAS_DEFAULT_IDLE=y | 35 | CONFIG_ARCH_HAS_DEFAULT_IDLE=y |
35 | CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y | 36 | CONFIG_ARCH_HAS_CACHE_LINE_SIZE=y |
36 | CONFIG_HAVE_SETUP_PER_CPU_AREA=y | 37 | CONFIG_HAVE_SETUP_PER_CPU_AREA=y |
38 | CONFIG_HAVE_DYNAMIC_PER_CPU_AREA=y | ||
37 | CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y | 39 | CONFIG_HAVE_CPUMASK_OF_CPU_MAP=y |
38 | CONFIG_ARCH_HIBERNATION_POSSIBLE=y | 40 | CONFIG_ARCH_HIBERNATION_POSSIBLE=y |
39 | CONFIG_ARCH_SUSPEND_POSSIBLE=y | 41 | CONFIG_ARCH_SUSPEND_POSSIBLE=y |
@@ -41,14 +43,14 @@ CONFIG_ZONE_DMA32=y | |||
41 | CONFIG_ARCH_POPULATES_NODE_MAP=y | 43 | CONFIG_ARCH_POPULATES_NODE_MAP=y |
42 | CONFIG_AUDIT_ARCH=y | 44 | CONFIG_AUDIT_ARCH=y |
43 | CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y | 45 | CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING=y |
46 | CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC=y | ||
44 | CONFIG_GENERIC_HARDIRQS=y | 47 | CONFIG_GENERIC_HARDIRQS=y |
48 | CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ=y | ||
45 | CONFIG_GENERIC_IRQ_PROBE=y | 49 | CONFIG_GENERIC_IRQ_PROBE=y |
46 | CONFIG_GENERIC_PENDING_IRQ=y | 50 | CONFIG_GENERIC_PENDING_IRQ=y |
47 | CONFIG_X86_SMP=y | ||
48 | CONFIG_USE_GENERIC_SMP_HELPERS=y | 51 | CONFIG_USE_GENERIC_SMP_HELPERS=y |
49 | CONFIG_X86_64_SMP=y | 52 | CONFIG_X86_64_SMP=y |
50 | CONFIG_X86_HT=y | 53 | CONFIG_X86_HT=y |
51 | CONFIG_X86_BIOS_REBOOT=y | ||
52 | CONFIG_X86_TRAMPOLINE=y | 54 | CONFIG_X86_TRAMPOLINE=y |
53 | # CONFIG_KTIME_SCALAR is not set | 55 | # CONFIG_KTIME_SCALAR is not set |
54 | CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" | 56 | CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" |
@@ -61,10 +63,17 @@ CONFIG_LOCK_KERNEL=y | |||
61 | CONFIG_INIT_ENV_ARG_LIMIT=32 | 63 | CONFIG_INIT_ENV_ARG_LIMIT=32 |
62 | CONFIG_LOCALVERSION="" | 64 | CONFIG_LOCALVERSION="" |
63 | # CONFIG_LOCALVERSION_AUTO is not set | 65 | # CONFIG_LOCALVERSION_AUTO is not set |
66 | CONFIG_HAVE_KERNEL_GZIP=y | ||
67 | CONFIG_HAVE_KERNEL_BZIP2=y | ||
68 | CONFIG_HAVE_KERNEL_LZMA=y | ||
69 | CONFIG_KERNEL_GZIP=y | ||
70 | # CONFIG_KERNEL_BZIP2 is not set | ||
71 | # CONFIG_KERNEL_LZMA is not set | ||
64 | CONFIG_SWAP=y | 72 | CONFIG_SWAP=y |
65 | CONFIG_SYSVIPC=y | 73 | CONFIG_SYSVIPC=y |
66 | CONFIG_SYSVIPC_SYSCTL=y | 74 | CONFIG_SYSVIPC_SYSCTL=y |
67 | CONFIG_POSIX_MQUEUE=y | 75 | CONFIG_POSIX_MQUEUE=y |
76 | CONFIG_POSIX_MQUEUE_SYSCTL=y | ||
68 | CONFIG_BSD_PROCESS_ACCT=y | 77 | CONFIG_BSD_PROCESS_ACCT=y |
69 | # CONFIG_BSD_PROCESS_ACCT_V3 is not set | 78 | # CONFIG_BSD_PROCESS_ACCT_V3 is not set |
70 | CONFIG_TASKSTATS=y | 79 | CONFIG_TASKSTATS=y |
@@ -114,23 +123,26 @@ CONFIG_PID_NS=y | |||
114 | CONFIG_NET_NS=y | 123 | CONFIG_NET_NS=y |
115 | CONFIG_BLK_DEV_INITRD=y | 124 | CONFIG_BLK_DEV_INITRD=y |
116 | CONFIG_INITRAMFS_SOURCE="" | 125 | CONFIG_INITRAMFS_SOURCE="" |
126 | CONFIG_RD_GZIP=y | ||
127 | CONFIG_RD_BZIP2=y | ||
128 | CONFIG_RD_LZMA=y | ||
117 | CONFIG_CC_OPTIMIZE_FOR_SIZE=y | 129 | CONFIG_CC_OPTIMIZE_FOR_SIZE=y |
118 | CONFIG_SYSCTL=y | 130 | CONFIG_SYSCTL=y |
131 | CONFIG_ANON_INODES=y | ||
119 | # CONFIG_EMBEDDED is not set | 132 | # CONFIG_EMBEDDED is not set |
120 | CONFIG_UID16=y | 133 | CONFIG_UID16=y |
121 | CONFIG_SYSCTL_SYSCALL=y | 134 | CONFIG_SYSCTL_SYSCALL=y |
122 | CONFIG_KALLSYMS=y | 135 | CONFIG_KALLSYMS=y |
123 | CONFIG_KALLSYMS_ALL=y | 136 | CONFIG_KALLSYMS_ALL=y |
124 | CONFIG_KALLSYMS_EXTRA_PASS=y | 137 | CONFIG_KALLSYMS_EXTRA_PASS=y |
138 | # CONFIG_STRIP_ASM_SYMS is not set | ||
125 | CONFIG_HOTPLUG=y | 139 | CONFIG_HOTPLUG=y |
126 | CONFIG_PRINTK=y | 140 | CONFIG_PRINTK=y |
127 | CONFIG_BUG=y | 141 | CONFIG_BUG=y |
128 | CONFIG_ELF_CORE=y | 142 | CONFIG_ELF_CORE=y |
129 | CONFIG_PCSPKR_PLATFORM=y | 143 | CONFIG_PCSPKR_PLATFORM=y |
130 | # CONFIG_COMPAT_BRK is not set | ||
131 | CONFIG_BASE_FULL=y | 144 | CONFIG_BASE_FULL=y |
132 | CONFIG_FUTEX=y | 145 | CONFIG_FUTEX=y |
133 | CONFIG_ANON_INODES=y | ||
134 | CONFIG_EPOLL=y | 146 | CONFIG_EPOLL=y |
135 | CONFIG_SIGNALFD=y | 147 | CONFIG_SIGNALFD=y |
136 | CONFIG_TIMERFD=y | 148 | CONFIG_TIMERFD=y |
@@ -140,6 +152,7 @@ CONFIG_AIO=y | |||
140 | CONFIG_VM_EVENT_COUNTERS=y | 152 | CONFIG_VM_EVENT_COUNTERS=y |
141 | CONFIG_PCI_QUIRKS=y | 153 | CONFIG_PCI_QUIRKS=y |
142 | CONFIG_SLUB_DEBUG=y | 154 | CONFIG_SLUB_DEBUG=y |
155 | # CONFIG_COMPAT_BRK is not set | ||
143 | # CONFIG_SLAB is not set | 156 | # CONFIG_SLAB is not set |
144 | CONFIG_SLUB=y | 157 | CONFIG_SLUB=y |
145 | # CONFIG_SLOB is not set | 158 | # CONFIG_SLOB is not set |
@@ -155,6 +168,8 @@ CONFIG_HAVE_IOREMAP_PROT=y | |||
155 | CONFIG_HAVE_KPROBES=y | 168 | CONFIG_HAVE_KPROBES=y |
156 | CONFIG_HAVE_KRETPROBES=y | 169 | CONFIG_HAVE_KRETPROBES=y |
157 | CONFIG_HAVE_ARCH_TRACEHOOK=y | 170 | CONFIG_HAVE_ARCH_TRACEHOOK=y |
171 | CONFIG_HAVE_DMA_API_DEBUG=y | ||
172 | # CONFIG_SLOW_WORK is not set | ||
158 | # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set | 173 | # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set |
159 | CONFIG_SLABINFO=y | 174 | CONFIG_SLABINFO=y |
160 | CONFIG_RT_MUTEXES=y | 175 | CONFIG_RT_MUTEXES=y |
@@ -167,7 +182,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y | |||
167 | # CONFIG_MODULE_SRCVERSION_ALL is not set | 182 | # CONFIG_MODULE_SRCVERSION_ALL is not set |
168 | CONFIG_STOP_MACHINE=y | 183 | CONFIG_STOP_MACHINE=y |
169 | CONFIG_BLOCK=y | 184 | CONFIG_BLOCK=y |
170 | CONFIG_BLK_DEV_IO_TRACE=y | ||
171 | CONFIG_BLK_DEV_BSG=y | 185 | CONFIG_BLK_DEV_BSG=y |
172 | # CONFIG_BLK_DEV_INTEGRITY is not set | 186 | # CONFIG_BLK_DEV_INTEGRITY is not set |
173 | CONFIG_BLOCK_COMPAT=y | 187 | CONFIG_BLOCK_COMPAT=y |
@@ -195,12 +209,10 @@ CONFIG_HIGH_RES_TIMERS=y | |||
195 | CONFIG_GENERIC_CLOCKEVENTS_BUILD=y | 209 | CONFIG_GENERIC_CLOCKEVENTS_BUILD=y |
196 | CONFIG_SMP=y | 210 | CONFIG_SMP=y |
197 | CONFIG_SPARSE_IRQ=y | 211 | CONFIG_SPARSE_IRQ=y |
198 | # CONFIG_NUMA_MIGRATE_IRQ_DESC is not set | ||
199 | CONFIG_X86_FIND_SMP_CONFIG=y | ||
200 | CONFIG_X86_MPPARSE=y | 212 | CONFIG_X86_MPPARSE=y |
201 | # CONFIG_X86_ELAN is not set | 213 | CONFIG_X86_EXTENDED_PLATFORM=y |
202 | # CONFIG_X86_GENERICARCH is not set | ||
203 | # CONFIG_X86_VSMP is not set | 214 | # CONFIG_X86_VSMP is not set |
215 | # CONFIG_X86_UV is not set | ||
204 | CONFIG_SCHED_OMIT_FRAME_POINTER=y | 216 | CONFIG_SCHED_OMIT_FRAME_POINTER=y |
205 | # CONFIG_PARAVIRT_GUEST is not set | 217 | # CONFIG_PARAVIRT_GUEST is not set |
206 | # CONFIG_MEMTEST is not set | 218 | # CONFIG_MEMTEST is not set |
@@ -230,10 +242,10 @@ CONFIG_SCHED_OMIT_FRAME_POINTER=y | |||
230 | # CONFIG_MCORE2 is not set | 242 | # CONFIG_MCORE2 is not set |
231 | CONFIG_GENERIC_CPU=y | 243 | CONFIG_GENERIC_CPU=y |
232 | CONFIG_X86_CPU=y | 244 | CONFIG_X86_CPU=y |
233 | CONFIG_X86_L1_CACHE_BYTES=128 | 245 | CONFIG_X86_L1_CACHE_BYTES=64 |
234 | CONFIG_X86_INTERNODE_CACHE_BYTES=128 | 246 | CONFIG_X86_INTERNODE_CACHE_BYTES=64 |
235 | CONFIG_X86_CMPXCHG=y | 247 | CONFIG_X86_CMPXCHG=y |
236 | CONFIG_X86_L1_CACHE_SHIFT=7 | 248 | CONFIG_X86_L1_CACHE_SHIFT=6 |
237 | CONFIG_X86_WP_WORKS_OK=y | 249 | CONFIG_X86_WP_WORKS_OK=y |
238 | CONFIG_X86_TSC=y | 250 | CONFIG_X86_TSC=y |
239 | CONFIG_X86_CMPXCHG64=y | 251 | CONFIG_X86_CMPXCHG64=y |
@@ -242,7 +254,7 @@ CONFIG_X86_MINIMUM_CPU_FAMILY=64 | |||
242 | CONFIG_X86_DEBUGCTLMSR=y | 254 | CONFIG_X86_DEBUGCTLMSR=y |
243 | CONFIG_CPU_SUP_INTEL=y | 255 | CONFIG_CPU_SUP_INTEL=y |
244 | CONFIG_CPU_SUP_AMD=y | 256 | CONFIG_CPU_SUP_AMD=y |
245 | CONFIG_CPU_SUP_CENTAUR_64=y | 257 | CONFIG_CPU_SUP_CENTAUR=y |
246 | CONFIG_X86_DS=y | 258 | CONFIG_X86_DS=y |
247 | CONFIG_X86_PTRACE_BTS=y | 259 | CONFIG_X86_PTRACE_BTS=y |
248 | CONFIG_HPET_TIMER=y | 260 | CONFIG_HPET_TIMER=y |
@@ -269,6 +281,7 @@ CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y | |||
269 | CONFIG_X86_MCE=y | 281 | CONFIG_X86_MCE=y |
270 | CONFIG_X86_MCE_INTEL=y | 282 | CONFIG_X86_MCE_INTEL=y |
271 | CONFIG_X86_MCE_AMD=y | 283 | CONFIG_X86_MCE_AMD=y |
284 | CONFIG_X86_MCE_THRESHOLD=y | ||
272 | # CONFIG_I8K is not set | 285 | # CONFIG_I8K is not set |
273 | CONFIG_MICROCODE=y | 286 | CONFIG_MICROCODE=y |
274 | CONFIG_MICROCODE_INTEL=y | 287 | CONFIG_MICROCODE_INTEL=y |
@@ -276,6 +289,7 @@ CONFIG_MICROCODE_AMD=y | |||
276 | CONFIG_MICROCODE_OLD_INTERFACE=y | 289 | CONFIG_MICROCODE_OLD_INTERFACE=y |
277 | CONFIG_X86_MSR=y | 290 | CONFIG_X86_MSR=y |
278 | CONFIG_X86_CPUID=y | 291 | CONFIG_X86_CPUID=y |
292 | # CONFIG_X86_CPU_DEBUG is not set | ||
279 | CONFIG_ARCH_PHYS_ADDR_T_64BIT=y | 293 | CONFIG_ARCH_PHYS_ADDR_T_64BIT=y |
280 | CONFIG_DIRECT_GBPAGES=y | 294 | CONFIG_DIRECT_GBPAGES=y |
281 | CONFIG_NUMA=y | 295 | CONFIG_NUMA=y |
@@ -309,6 +323,8 @@ CONFIG_ZONE_DMA_FLAG=1 | |||
309 | CONFIG_BOUNCE=y | 323 | CONFIG_BOUNCE=y |
310 | CONFIG_VIRT_TO_BUS=y | 324 | CONFIG_VIRT_TO_BUS=y |
311 | CONFIG_UNEVICTABLE_LRU=y | 325 | CONFIG_UNEVICTABLE_LRU=y |
326 | CONFIG_HAVE_MLOCK=y | ||
327 | CONFIG_HAVE_MLOCKED_PAGE_BIT=y | ||
312 | CONFIG_X86_CHECK_BIOS_CORRUPTION=y | 328 | CONFIG_X86_CHECK_BIOS_CORRUPTION=y |
313 | CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y | 329 | CONFIG_X86_BOOTPARAM_MEMORY_CORRUPTION_CHECK=y |
314 | CONFIG_X86_RESERVE_LOW_64K=y | 330 | CONFIG_X86_RESERVE_LOW_64K=y |
@@ -317,6 +333,7 @@ CONFIG_MTRR=y | |||
317 | CONFIG_X86_PAT=y | 333 | CONFIG_X86_PAT=y |
318 | CONFIG_EFI=y | 334 | CONFIG_EFI=y |
319 | CONFIG_SECCOMP=y | 335 | CONFIG_SECCOMP=y |
336 | # CONFIG_CC_STACKPROTECTOR is not set | ||
320 | # CONFIG_HZ_100 is not set | 337 | # CONFIG_HZ_100 is not set |
321 | # CONFIG_HZ_250 is not set | 338 | # CONFIG_HZ_250 is not set |
322 | # CONFIG_HZ_300 is not set | 339 | # CONFIG_HZ_300 is not set |
@@ -325,9 +342,10 @@ CONFIG_HZ=1000 | |||
325 | CONFIG_SCHED_HRTICK=y | 342 | CONFIG_SCHED_HRTICK=y |
326 | CONFIG_KEXEC=y | 343 | CONFIG_KEXEC=y |
327 | CONFIG_CRASH_DUMP=y | 344 | CONFIG_CRASH_DUMP=y |
345 | # CONFIG_KEXEC_JUMP is not set | ||
328 | CONFIG_PHYSICAL_START=0x1000000 | 346 | CONFIG_PHYSICAL_START=0x1000000 |
329 | # CONFIG_RELOCATABLE is not set | 347 | CONFIG_RELOCATABLE=y |
330 | CONFIG_PHYSICAL_ALIGN=0x200000 | 348 | CONFIG_PHYSICAL_ALIGN=0x1000000 |
331 | CONFIG_HOTPLUG_CPU=y | 349 | CONFIG_HOTPLUG_CPU=y |
332 | # CONFIG_COMPAT_VDSO is not set | 350 | # CONFIG_COMPAT_VDSO is not set |
333 | # CONFIG_CMDLINE_BOOL is not set | 351 | # CONFIG_CMDLINE_BOOL is not set |
@@ -370,7 +388,6 @@ CONFIG_ACPI_NUMA=y | |||
370 | CONFIG_ACPI_BLACKLIST_YEAR=0 | 388 | CONFIG_ACPI_BLACKLIST_YEAR=0 |
371 | # CONFIG_ACPI_DEBUG is not set | 389 | # CONFIG_ACPI_DEBUG is not set |
372 | # CONFIG_ACPI_PCI_SLOT is not set | 390 | # CONFIG_ACPI_PCI_SLOT is not set |
373 | CONFIG_ACPI_SYSTEM=y | ||
374 | CONFIG_X86_PM_TIMER=y | 391 | CONFIG_X86_PM_TIMER=y |
375 | CONFIG_ACPI_CONTAINER=y | 392 | CONFIG_ACPI_CONTAINER=y |
376 | # CONFIG_ACPI_SBS is not set | 393 | # CONFIG_ACPI_SBS is not set |
@@ -436,6 +453,7 @@ CONFIG_PCI_MSI=y | |||
436 | # CONFIG_PCI_DEBUG is not set | 453 | # CONFIG_PCI_DEBUG is not set |
437 | # CONFIG_PCI_STUB is not set | 454 | # CONFIG_PCI_STUB is not set |
438 | CONFIG_HT_IRQ=y | 455 | CONFIG_HT_IRQ=y |
456 | # CONFIG_PCI_IOV is not set | ||
439 | CONFIG_ISA_DMA_API=y | 457 | CONFIG_ISA_DMA_API=y |
440 | CONFIG_K8_NB=y | 458 | CONFIG_K8_NB=y |
441 | CONFIG_PCCARD=y | 459 | CONFIG_PCCARD=y |
@@ -481,7 +499,6 @@ CONFIG_NET=y | |||
481 | # | 499 | # |
482 | # Networking options | 500 | # Networking options |
483 | # | 501 | # |
484 | CONFIG_COMPAT_NET_DEV_OPS=y | ||
485 | CONFIG_PACKET=y | 502 | CONFIG_PACKET=y |
486 | CONFIG_PACKET_MMAP=y | 503 | CONFIG_PACKET_MMAP=y |
487 | CONFIG_UNIX=y | 504 | CONFIG_UNIX=y |
@@ -639,6 +656,7 @@ CONFIG_LLC=y | |||
639 | # CONFIG_LAPB is not set | 656 | # CONFIG_LAPB is not set |
640 | # CONFIG_ECONET is not set | 657 | # CONFIG_ECONET is not set |
641 | # CONFIG_WAN_ROUTER is not set | 658 | # CONFIG_WAN_ROUTER is not set |
659 | # CONFIG_PHONET is not set | ||
642 | CONFIG_NET_SCHED=y | 660 | CONFIG_NET_SCHED=y |
643 | 661 | ||
644 | # | 662 | # |
@@ -696,6 +714,7 @@ CONFIG_NET_SCH_FIFO=y | |||
696 | # | 714 | # |
697 | # CONFIG_NET_PKTGEN is not set | 715 | # CONFIG_NET_PKTGEN is not set |
698 | # CONFIG_NET_TCPPROBE is not set | 716 | # CONFIG_NET_TCPPROBE is not set |
717 | # CONFIG_NET_DROP_MONITOR is not set | ||
699 | CONFIG_HAMRADIO=y | 718 | CONFIG_HAMRADIO=y |
700 | 719 | ||
701 | # | 720 | # |
@@ -706,12 +725,10 @@ CONFIG_HAMRADIO=y | |||
706 | # CONFIG_IRDA is not set | 725 | # CONFIG_IRDA is not set |
707 | # CONFIG_BT is not set | 726 | # CONFIG_BT is not set |
708 | # CONFIG_AF_RXRPC is not set | 727 | # CONFIG_AF_RXRPC is not set |
709 | # CONFIG_PHONET is not set | ||
710 | CONFIG_FIB_RULES=y | 728 | CONFIG_FIB_RULES=y |
711 | CONFIG_WIRELESS=y | 729 | CONFIG_WIRELESS=y |
712 | CONFIG_CFG80211=y | 730 | CONFIG_CFG80211=y |
713 | # CONFIG_CFG80211_REG_DEBUG is not set | 731 | # CONFIG_CFG80211_REG_DEBUG is not set |
714 | CONFIG_NL80211=y | ||
715 | CONFIG_WIRELESS_OLD_REGULATORY=y | 732 | CONFIG_WIRELESS_OLD_REGULATORY=y |
716 | CONFIG_WIRELESS_EXT=y | 733 | CONFIG_WIRELESS_EXT=y |
717 | CONFIG_WIRELESS_EXT_SYSFS=y | 734 | CONFIG_WIRELESS_EXT_SYSFS=y |
@@ -788,9 +805,8 @@ CONFIG_MISC_DEVICES=y | |||
788 | # CONFIG_TIFM_CORE is not set | 805 | # CONFIG_TIFM_CORE is not set |
789 | # CONFIG_ICS932S401 is not set | 806 | # CONFIG_ICS932S401 is not set |
790 | # CONFIG_ENCLOSURE_SERVICES is not set | 807 | # CONFIG_ENCLOSURE_SERVICES is not set |
791 | # CONFIG_SGI_XP is not set | ||
792 | # CONFIG_HP_ILO is not set | 808 | # CONFIG_HP_ILO is not set |
793 | # CONFIG_SGI_GRU is not set | 809 | # CONFIG_ISL29003 is not set |
794 | # CONFIG_C2PORT is not set | 810 | # CONFIG_C2PORT is not set |
795 | 811 | ||
796 | # | 812 | # |
@@ -844,6 +860,7 @@ CONFIG_SCSI_SPI_ATTRS=y | |||
844 | # CONFIG_SCSI_LOWLEVEL is not set | 860 | # CONFIG_SCSI_LOWLEVEL is not set |
845 | # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set | 861 | # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set |
846 | # CONFIG_SCSI_DH is not set | 862 | # CONFIG_SCSI_DH is not set |
863 | # CONFIG_SCSI_OSD_INITIATOR is not set | ||
847 | CONFIG_ATA=y | 864 | CONFIG_ATA=y |
848 | # CONFIG_ATA_NONSTANDARD is not set | 865 | # CONFIG_ATA_NONSTANDARD is not set |
849 | CONFIG_ATA_ACPI=y | 866 | CONFIG_ATA_ACPI=y |
@@ -940,6 +957,7 @@ CONFIG_DM_ZERO=y | |||
940 | CONFIG_MACINTOSH_DRIVERS=y | 957 | CONFIG_MACINTOSH_DRIVERS=y |
941 | CONFIG_MAC_EMUMOUSEBTN=y | 958 | CONFIG_MAC_EMUMOUSEBTN=y |
942 | CONFIG_NETDEVICES=y | 959 | CONFIG_NETDEVICES=y |
960 | CONFIG_COMPAT_NET_DEV_OPS=y | ||
943 | # CONFIG_IFB is not set | 961 | # CONFIG_IFB is not set |
944 | # CONFIG_DUMMY is not set | 962 | # CONFIG_DUMMY is not set |
945 | # CONFIG_BONDING is not set | 963 | # CONFIG_BONDING is not set |
@@ -977,6 +995,8 @@ CONFIG_MII=y | |||
977 | CONFIG_NET_VENDOR_3COM=y | 995 | CONFIG_NET_VENDOR_3COM=y |
978 | # CONFIG_VORTEX is not set | 996 | # CONFIG_VORTEX is not set |
979 | # CONFIG_TYPHOON is not set | 997 | # CONFIG_TYPHOON is not set |
998 | # CONFIG_ETHOC is not set | ||
999 | # CONFIG_DNET is not set | ||
980 | CONFIG_NET_TULIP=y | 1000 | CONFIG_NET_TULIP=y |
981 | # CONFIG_DE2104X is not set | 1001 | # CONFIG_DE2104X is not set |
982 | # CONFIG_TULIP is not set | 1002 | # CONFIG_TULIP is not set |
@@ -1026,6 +1046,7 @@ CONFIG_E1000=y | |||
1026 | # CONFIG_E1000E is not set | 1046 | # CONFIG_E1000E is not set |
1027 | # CONFIG_IP1000 is not set | 1047 | # CONFIG_IP1000 is not set |
1028 | # CONFIG_IGB is not set | 1048 | # CONFIG_IGB is not set |
1049 | # CONFIG_IGBVF is not set | ||
1029 | # CONFIG_NS83820 is not set | 1050 | # CONFIG_NS83820 is not set |
1030 | # CONFIG_HAMACHI is not set | 1051 | # CONFIG_HAMACHI is not set |
1031 | # CONFIG_YELLOWFIN is not set | 1052 | # CONFIG_YELLOWFIN is not set |
@@ -1040,6 +1061,7 @@ CONFIG_TIGON3=y | |||
1040 | # CONFIG_QLA3XXX is not set | 1061 | # CONFIG_QLA3XXX is not set |
1041 | # CONFIG_ATL1 is not set | 1062 | # CONFIG_ATL1 is not set |
1042 | # CONFIG_ATL1E is not set | 1063 | # CONFIG_ATL1E is not set |
1064 | # CONFIG_ATL1C is not set | ||
1043 | # CONFIG_JME is not set | 1065 | # CONFIG_JME is not set |
1044 | CONFIG_NETDEV_10000=y | 1066 | CONFIG_NETDEV_10000=y |
1045 | # CONFIG_CHELSIO_T1 is not set | 1067 | # CONFIG_CHELSIO_T1 is not set |
@@ -1049,6 +1071,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y | |||
1049 | # CONFIG_IXGBE is not set | 1071 | # CONFIG_IXGBE is not set |
1050 | # CONFIG_IXGB is not set | 1072 | # CONFIG_IXGB is not set |
1051 | # CONFIG_S2IO is not set | 1073 | # CONFIG_S2IO is not set |
1074 | # CONFIG_VXGE is not set | ||
1052 | # CONFIG_MYRI10GE is not set | 1075 | # CONFIG_MYRI10GE is not set |
1053 | # CONFIG_NETXEN_NIC is not set | 1076 | # CONFIG_NETXEN_NIC is not set |
1054 | # CONFIG_NIU is not set | 1077 | # CONFIG_NIU is not set |
@@ -1058,6 +1081,7 @@ CONFIG_CHELSIO_T3_DEPENDS=y | |||
1058 | # CONFIG_BNX2X is not set | 1081 | # CONFIG_BNX2X is not set |
1059 | # CONFIG_QLGE is not set | 1082 | # CONFIG_QLGE is not set |
1060 | # CONFIG_SFC is not set | 1083 | # CONFIG_SFC is not set |
1084 | # CONFIG_BE2NET is not set | ||
1061 | CONFIG_TR=y | 1085 | CONFIG_TR=y |
1062 | # CONFIG_IBMOL is not set | 1086 | # CONFIG_IBMOL is not set |
1063 | # CONFIG_3C359 is not set | 1087 | # CONFIG_3C359 is not set |
@@ -1072,8 +1096,8 @@ CONFIG_WLAN_80211=y | |||
1072 | # CONFIG_LIBERTAS is not set | 1096 | # CONFIG_LIBERTAS is not set |
1073 | # CONFIG_LIBERTAS_THINFIRM is not set | 1097 | # CONFIG_LIBERTAS_THINFIRM is not set |
1074 | # CONFIG_AIRO is not set | 1098 | # CONFIG_AIRO is not set |
1075 | # CONFIG_HERMES is not set | ||
1076 | # CONFIG_ATMEL is not set | 1099 | # CONFIG_ATMEL is not set |
1100 | # CONFIG_AT76C50X_USB is not set | ||
1077 | # CONFIG_AIRO_CS is not set | 1101 | # CONFIG_AIRO_CS is not set |
1078 | # CONFIG_PCMCIA_WL3501 is not set | 1102 | # CONFIG_PCMCIA_WL3501 is not set |
1079 | # CONFIG_PRISM54 is not set | 1103 | # CONFIG_PRISM54 is not set |
@@ -1083,21 +1107,21 @@ CONFIG_WLAN_80211=y | |||
1083 | # CONFIG_RTL8187 is not set | 1107 | # CONFIG_RTL8187 is not set |
1084 | # CONFIG_ADM8211 is not set | 1108 | # CONFIG_ADM8211 is not set |
1085 | # CONFIG_MAC80211_HWSIM is not set | 1109 | # CONFIG_MAC80211_HWSIM is not set |
1110 | # CONFIG_MWL8K is not set | ||
1086 | # CONFIG_P54_COMMON is not set | 1111 | # CONFIG_P54_COMMON is not set |
1087 | CONFIG_ATH5K=y | 1112 | CONFIG_ATH5K=y |
1088 | # CONFIG_ATH5K_DEBUG is not set | 1113 | # CONFIG_ATH5K_DEBUG is not set |
1089 | # CONFIG_ATH9K is not set | 1114 | # CONFIG_ATH9K is not set |
1115 | # CONFIG_AR9170_USB is not set | ||
1090 | # CONFIG_IPW2100 is not set | 1116 | # CONFIG_IPW2100 is not set |
1091 | # CONFIG_IPW2200 is not set | 1117 | # CONFIG_IPW2200 is not set |
1092 | # CONFIG_IWLCORE is not set | 1118 | # CONFIG_IWLWIFI is not set |
1093 | # CONFIG_IWLWIFI_LEDS is not set | ||
1094 | # CONFIG_IWLAGN is not set | ||
1095 | # CONFIG_IWL3945 is not set | ||
1096 | # CONFIG_HOSTAP is not set | 1119 | # CONFIG_HOSTAP is not set |
1097 | # CONFIG_B43 is not set | 1120 | # CONFIG_B43 is not set |
1098 | # CONFIG_B43LEGACY is not set | 1121 | # CONFIG_B43LEGACY is not set |
1099 | # CONFIG_ZD1211RW is not set | 1122 | # CONFIG_ZD1211RW is not set |
1100 | # CONFIG_RT2X00 is not set | 1123 | # CONFIG_RT2X00 is not set |
1124 | # CONFIG_HERMES is not set | ||
1101 | 1125 | ||
1102 | # | 1126 | # |
1103 | # Enable WiMAX (Networking options) to see the WiMAX drivers | 1127 | # Enable WiMAX (Networking options) to see the WiMAX drivers |
@@ -1208,6 +1232,8 @@ CONFIG_INPUT_TABLET=y | |||
1208 | # CONFIG_TABLET_USB_KBTAB is not set | 1232 | # CONFIG_TABLET_USB_KBTAB is not set |
1209 | # CONFIG_TABLET_USB_WACOM is not set | 1233 | # CONFIG_TABLET_USB_WACOM is not set |
1210 | CONFIG_INPUT_TOUCHSCREEN=y | 1234 | CONFIG_INPUT_TOUCHSCREEN=y |
1235 | # CONFIG_TOUCHSCREEN_AD7879_I2C is not set | ||
1236 | # CONFIG_TOUCHSCREEN_AD7879 is not set | ||
1211 | # CONFIG_TOUCHSCREEN_FUJITSU is not set | 1237 | # CONFIG_TOUCHSCREEN_FUJITSU is not set |
1212 | # CONFIG_TOUCHSCREEN_GUNZE is not set | 1238 | # CONFIG_TOUCHSCREEN_GUNZE is not set |
1213 | # CONFIG_TOUCHSCREEN_ELO is not set | 1239 | # CONFIG_TOUCHSCREEN_ELO is not set |
@@ -1301,6 +1327,7 @@ CONFIG_UNIX98_PTYS=y | |||
1301 | # CONFIG_LEGACY_PTYS is not set | 1327 | # CONFIG_LEGACY_PTYS is not set |
1302 | # CONFIG_IPMI_HANDLER is not set | 1328 | # CONFIG_IPMI_HANDLER is not set |
1303 | CONFIG_HW_RANDOM=y | 1329 | CONFIG_HW_RANDOM=y |
1330 | # CONFIG_HW_RANDOM_TIMERIOMEM is not set | ||
1304 | # CONFIG_HW_RANDOM_INTEL is not set | 1331 | # CONFIG_HW_RANDOM_INTEL is not set |
1305 | # CONFIG_HW_RANDOM_AMD is not set | 1332 | # CONFIG_HW_RANDOM_AMD is not set |
1306 | CONFIG_NVRAM=y | 1333 | CONFIG_NVRAM=y |
@@ -1382,7 +1409,6 @@ CONFIG_I2C_I801=y | |||
1382 | # CONFIG_SENSORS_PCF8574 is not set | 1409 | # CONFIG_SENSORS_PCF8574 is not set |
1383 | # CONFIG_PCF8575 is not set | 1410 | # CONFIG_PCF8575 is not set |
1384 | # CONFIG_SENSORS_PCA9539 is not set | 1411 | # CONFIG_SENSORS_PCA9539 is not set |
1385 | # CONFIG_SENSORS_PCF8591 is not set | ||
1386 | # CONFIG_SENSORS_MAX6875 is not set | 1412 | # CONFIG_SENSORS_MAX6875 is not set |
1387 | # CONFIG_SENSORS_TSL2550 is not set | 1413 | # CONFIG_SENSORS_TSL2550 is not set |
1388 | # CONFIG_I2C_DEBUG_CORE is not set | 1414 | # CONFIG_I2C_DEBUG_CORE is not set |
@@ -1416,6 +1442,7 @@ CONFIG_HWMON=y | |||
1416 | # CONFIG_SENSORS_ADT7475 is not set | 1442 | # CONFIG_SENSORS_ADT7475 is not set |
1417 | # CONFIG_SENSORS_K8TEMP is not set | 1443 | # CONFIG_SENSORS_K8TEMP is not set |
1418 | # CONFIG_SENSORS_ASB100 is not set | 1444 | # CONFIG_SENSORS_ASB100 is not set |
1445 | # CONFIG_SENSORS_ATK0110 is not set | ||
1419 | # CONFIG_SENSORS_ATXP1 is not set | 1446 | # CONFIG_SENSORS_ATXP1 is not set |
1420 | # CONFIG_SENSORS_DS1621 is not set | 1447 | # CONFIG_SENSORS_DS1621 is not set |
1421 | # CONFIG_SENSORS_I5K_AMB is not set | 1448 | # CONFIG_SENSORS_I5K_AMB is not set |
@@ -1425,6 +1452,7 @@ CONFIG_HWMON=y | |||
1425 | # CONFIG_SENSORS_FSCHER is not set | 1452 | # CONFIG_SENSORS_FSCHER is not set |
1426 | # CONFIG_SENSORS_FSCPOS is not set | 1453 | # CONFIG_SENSORS_FSCPOS is not set |
1427 | # CONFIG_SENSORS_FSCHMD is not set | 1454 | # CONFIG_SENSORS_FSCHMD is not set |
1455 | # CONFIG_SENSORS_G760A is not set | ||
1428 | # CONFIG_SENSORS_GL518SM is not set | 1456 | # CONFIG_SENSORS_GL518SM is not set |
1429 | # CONFIG_SENSORS_GL520SM is not set | 1457 | # CONFIG_SENSORS_GL520SM is not set |
1430 | # CONFIG_SENSORS_CORETEMP is not set | 1458 | # CONFIG_SENSORS_CORETEMP is not set |
@@ -1440,11 +1468,14 @@ CONFIG_HWMON=y | |||
1440 | # CONFIG_SENSORS_LM90 is not set | 1468 | # CONFIG_SENSORS_LM90 is not set |
1441 | # CONFIG_SENSORS_LM92 is not set | 1469 | # CONFIG_SENSORS_LM92 is not set |
1442 | # CONFIG_SENSORS_LM93 is not set | 1470 | # CONFIG_SENSORS_LM93 is not set |
1471 | # CONFIG_SENSORS_LTC4215 is not set | ||
1443 | # CONFIG_SENSORS_LTC4245 is not set | 1472 | # CONFIG_SENSORS_LTC4245 is not set |
1473 | # CONFIG_SENSORS_LM95241 is not set | ||
1444 | # CONFIG_SENSORS_MAX1619 is not set | 1474 | # CONFIG_SENSORS_MAX1619 is not set |
1445 | # CONFIG_SENSORS_MAX6650 is not set | 1475 | # CONFIG_SENSORS_MAX6650 is not set |
1446 | # CONFIG_SENSORS_PC87360 is not set | 1476 | # CONFIG_SENSORS_PC87360 is not set |
1447 | # CONFIG_SENSORS_PC87427 is not set | 1477 | # CONFIG_SENSORS_PC87427 is not set |
1478 | # CONFIG_SENSORS_PCF8591 is not set | ||
1448 | # CONFIG_SENSORS_SIS5595 is not set | 1479 | # CONFIG_SENSORS_SIS5595 is not set |
1449 | # CONFIG_SENSORS_DME1737 is not set | 1480 | # CONFIG_SENSORS_DME1737 is not set |
1450 | # CONFIG_SENSORS_SMSC47M1 is not set | 1481 | # CONFIG_SENSORS_SMSC47M1 is not set |
@@ -1635,6 +1666,7 @@ CONFIG_FB_EFI=y | |||
1635 | # CONFIG_FB_VIRTUAL is not set | 1666 | # CONFIG_FB_VIRTUAL is not set |
1636 | # CONFIG_FB_METRONOME is not set | 1667 | # CONFIG_FB_METRONOME is not set |
1637 | # CONFIG_FB_MB862XX is not set | 1668 | # CONFIG_FB_MB862XX is not set |
1669 | # CONFIG_FB_BROADSHEET is not set | ||
1638 | CONFIG_BACKLIGHT_LCD_SUPPORT=y | 1670 | CONFIG_BACKLIGHT_LCD_SUPPORT=y |
1639 | # CONFIG_LCD_CLASS_DEVICE is not set | 1671 | # CONFIG_LCD_CLASS_DEVICE is not set |
1640 | CONFIG_BACKLIGHT_CLASS_DEVICE=y | 1672 | CONFIG_BACKLIGHT_CLASS_DEVICE=y |
@@ -1720,6 +1752,8 @@ CONFIG_SND_PCI=y | |||
1720 | # CONFIG_SND_INDIGO is not set | 1752 | # CONFIG_SND_INDIGO is not set |
1721 | # CONFIG_SND_INDIGOIO is not set | 1753 | # CONFIG_SND_INDIGOIO is not set |
1722 | # CONFIG_SND_INDIGODJ is not set | 1754 | # CONFIG_SND_INDIGODJ is not set |
1755 | # CONFIG_SND_INDIGOIOX is not set | ||
1756 | # CONFIG_SND_INDIGODJX is not set | ||
1723 | # CONFIG_SND_EMU10K1 is not set | 1757 | # CONFIG_SND_EMU10K1 is not set |
1724 | # CONFIG_SND_EMU10K1X is not set | 1758 | # CONFIG_SND_EMU10K1X is not set |
1725 | # CONFIG_SND_ENS1370 is not set | 1759 | # CONFIG_SND_ENS1370 is not set |
@@ -1792,15 +1826,17 @@ CONFIG_USB_HIDDEV=y | |||
1792 | # | 1826 | # |
1793 | # Special HID drivers | 1827 | # Special HID drivers |
1794 | # | 1828 | # |
1795 | CONFIG_HID_COMPAT=y | ||
1796 | CONFIG_HID_A4TECH=y | 1829 | CONFIG_HID_A4TECH=y |
1797 | CONFIG_HID_APPLE=y | 1830 | CONFIG_HID_APPLE=y |
1798 | CONFIG_HID_BELKIN=y | 1831 | CONFIG_HID_BELKIN=y |
1799 | CONFIG_HID_CHERRY=y | 1832 | CONFIG_HID_CHERRY=y |
1800 | CONFIG_HID_CHICONY=y | 1833 | CONFIG_HID_CHICONY=y |
1801 | CONFIG_HID_CYPRESS=y | 1834 | CONFIG_HID_CYPRESS=y |
1835 | # CONFIG_DRAGONRISE_FF is not set | ||
1802 | CONFIG_HID_EZKEY=y | 1836 | CONFIG_HID_EZKEY=y |
1837 | CONFIG_HID_KYE=y | ||
1803 | CONFIG_HID_GYRATION=y | 1838 | CONFIG_HID_GYRATION=y |
1839 | CONFIG_HID_KENSINGTON=y | ||
1804 | CONFIG_HID_LOGITECH=y | 1840 | CONFIG_HID_LOGITECH=y |
1805 | CONFIG_LOGITECH_FF=y | 1841 | CONFIG_LOGITECH_FF=y |
1806 | # CONFIG_LOGIRUMBLEPAD2_FF is not set | 1842 | # CONFIG_LOGIRUMBLEPAD2_FF is not set |
@@ -1866,11 +1902,11 @@ CONFIG_USB_PRINTER=y | |||
1866 | # CONFIG_USB_TMC is not set | 1902 | # CONFIG_USB_TMC is not set |
1867 | 1903 | ||
1868 | # | 1904 | # |
1869 | # NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may also be needed; | 1905 | # NOTE: USB_STORAGE depends on SCSI but BLK_DEV_SD may |
1870 | # | 1906 | # |
1871 | 1907 | ||
1872 | # | 1908 | # |
1873 | # see USB_STORAGE Help for more information | 1909 | # also be needed; see USB_STORAGE Help for more info |
1874 | # | 1910 | # |
1875 | CONFIG_USB_STORAGE=y | 1911 | CONFIG_USB_STORAGE=y |
1876 | # CONFIG_USB_STORAGE_DEBUG is not set | 1912 | # CONFIG_USB_STORAGE_DEBUG is not set |
@@ -1912,7 +1948,6 @@ CONFIG_USB_LIBUSUAL=y | |||
1912 | # CONFIG_USB_LED is not set | 1948 | # CONFIG_USB_LED is not set |
1913 | # CONFIG_USB_CYPRESS_CY7C63 is not set | 1949 | # CONFIG_USB_CYPRESS_CY7C63 is not set |
1914 | # CONFIG_USB_CYTHERM is not set | 1950 | # CONFIG_USB_CYTHERM is not set |
1915 | # CONFIG_USB_PHIDGET is not set | ||
1916 | # CONFIG_USB_IDMOUSE is not set | 1951 | # CONFIG_USB_IDMOUSE is not set |
1917 | # CONFIG_USB_FTDI_ELAN is not set | 1952 | # CONFIG_USB_FTDI_ELAN is not set |
1918 | # CONFIG_USB_APPLEDISPLAY is not set | 1953 | # CONFIG_USB_APPLEDISPLAY is not set |
@@ -1928,6 +1963,7 @@ CONFIG_USB_LIBUSUAL=y | |||
1928 | # | 1963 | # |
1929 | # OTG and related infrastructure | 1964 | # OTG and related infrastructure |
1930 | # | 1965 | # |
1966 | # CONFIG_NOP_USB_XCEIV is not set | ||
1931 | # CONFIG_UWB is not set | 1967 | # CONFIG_UWB is not set |
1932 | # CONFIG_MMC is not set | 1968 | # CONFIG_MMC is not set |
1933 | # CONFIG_MEMSTICK is not set | 1969 | # CONFIG_MEMSTICK is not set |
@@ -1939,8 +1975,10 @@ CONFIG_LEDS_CLASS=y | |||
1939 | # | 1975 | # |
1940 | # CONFIG_LEDS_ALIX2 is not set | 1976 | # CONFIG_LEDS_ALIX2 is not set |
1941 | # CONFIG_LEDS_PCA9532 is not set | 1977 | # CONFIG_LEDS_PCA9532 is not set |
1978 | # CONFIG_LEDS_LP5521 is not set | ||
1942 | # CONFIG_LEDS_CLEVO_MAIL is not set | 1979 | # CONFIG_LEDS_CLEVO_MAIL is not set |
1943 | # CONFIG_LEDS_PCA955X is not set | 1980 | # CONFIG_LEDS_PCA955X is not set |
1981 | # CONFIG_LEDS_BD2802 is not set | ||
1944 | 1982 | ||
1945 | # | 1983 | # |
1946 | # LED Triggers | 1984 | # LED Triggers |
@@ -1950,6 +1988,10 @@ CONFIG_LEDS_TRIGGERS=y | |||
1950 | # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set | 1988 | # CONFIG_LEDS_TRIGGER_HEARTBEAT is not set |
1951 | # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set | 1989 | # CONFIG_LEDS_TRIGGER_BACKLIGHT is not set |
1952 | # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set | 1990 | # CONFIG_LEDS_TRIGGER_DEFAULT_ON is not set |
1991 | |||
1992 | # | ||
1993 | # iptables trigger is under Netfilter config (LED target) | ||
1994 | # | ||
1953 | # CONFIG_ACCESSIBILITY is not set | 1995 | # CONFIG_ACCESSIBILITY is not set |
1954 | # CONFIG_INFINIBAND is not set | 1996 | # CONFIG_INFINIBAND is not set |
1955 | CONFIG_EDAC=y | 1997 | CONFIG_EDAC=y |
@@ -2018,6 +2060,7 @@ CONFIG_DMADEVICES=y | |||
2018 | # DMA Devices | 2060 | # DMA Devices |
2019 | # | 2061 | # |
2020 | # CONFIG_INTEL_IOATDMA is not set | 2062 | # CONFIG_INTEL_IOATDMA is not set |
2063 | # CONFIG_AUXDISPLAY is not set | ||
2021 | # CONFIG_UIO is not set | 2064 | # CONFIG_UIO is not set |
2022 | # CONFIG_STAGING is not set | 2065 | # CONFIG_STAGING is not set |
2023 | CONFIG_X86_PLATFORM_DEVICES=y | 2066 | CONFIG_X86_PLATFORM_DEVICES=y |
@@ -2051,6 +2094,7 @@ CONFIG_DMIID=y | |||
2051 | # | 2094 | # |
2052 | # CONFIG_EXT2_FS is not set | 2095 | # CONFIG_EXT2_FS is not set |
2053 | CONFIG_EXT3_FS=y | 2096 | CONFIG_EXT3_FS=y |
2097 | # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set | ||
2054 | CONFIG_EXT3_FS_XATTR=y | 2098 | CONFIG_EXT3_FS_XATTR=y |
2055 | CONFIG_EXT3_FS_POSIX_ACL=y | 2099 | CONFIG_EXT3_FS_POSIX_ACL=y |
2056 | CONFIG_EXT3_FS_SECURITY=y | 2100 | CONFIG_EXT3_FS_SECURITY=y |
@@ -2082,6 +2126,11 @@ CONFIG_AUTOFS4_FS=y | |||
2082 | CONFIG_GENERIC_ACL=y | 2126 | CONFIG_GENERIC_ACL=y |
2083 | 2127 | ||
2084 | # | 2128 | # |
2129 | # Caches | ||
2130 | # | ||
2131 | # CONFIG_FSCACHE is not set | ||
2132 | |||
2133 | # | ||
2085 | # CD-ROM/DVD Filesystems | 2134 | # CD-ROM/DVD Filesystems |
2086 | # | 2135 | # |
2087 | CONFIG_ISO9660_FS=y | 2136 | CONFIG_ISO9660_FS=y |
@@ -2132,6 +2181,7 @@ CONFIG_MISC_FILESYSTEMS=y | |||
2132 | # CONFIG_ROMFS_FS is not set | 2181 | # CONFIG_ROMFS_FS is not set |
2133 | # CONFIG_SYSV_FS is not set | 2182 | # CONFIG_SYSV_FS is not set |
2134 | # CONFIG_UFS_FS is not set | 2183 | # CONFIG_UFS_FS is not set |
2184 | # CONFIG_NILFS2_FS is not set | ||
2135 | CONFIG_NETWORK_FILESYSTEMS=y | 2185 | CONFIG_NETWORK_FILESYSTEMS=y |
2136 | CONFIG_NFS_FS=y | 2186 | CONFIG_NFS_FS=y |
2137 | CONFIG_NFS_V3=y | 2187 | CONFIG_NFS_V3=y |
@@ -2145,7 +2195,6 @@ CONFIG_NFS_ACL_SUPPORT=y | |||
2145 | CONFIG_NFS_COMMON=y | 2195 | CONFIG_NFS_COMMON=y |
2146 | CONFIG_SUNRPC=y | 2196 | CONFIG_SUNRPC=y |
2147 | CONFIG_SUNRPC_GSS=y | 2197 | CONFIG_SUNRPC_GSS=y |
2148 | # CONFIG_SUNRPC_REGISTER_V4 is not set | ||
2149 | CONFIG_RPCSEC_GSS_KRB5=y | 2198 | CONFIG_RPCSEC_GSS_KRB5=y |
2150 | # CONFIG_RPCSEC_GSS_SPKM3 is not set | 2199 | # CONFIG_RPCSEC_GSS_SPKM3 is not set |
2151 | # CONFIG_SMB_FS is not set | 2200 | # CONFIG_SMB_FS is not set |
@@ -2232,6 +2281,7 @@ CONFIG_DEBUG_FS=y | |||
2232 | CONFIG_DEBUG_KERNEL=y | 2281 | CONFIG_DEBUG_KERNEL=y |
2233 | # CONFIG_DEBUG_SHIRQ is not set | 2282 | # CONFIG_DEBUG_SHIRQ is not set |
2234 | # CONFIG_DETECT_SOFTLOCKUP is not set | 2283 | # CONFIG_DETECT_SOFTLOCKUP is not set |
2284 | # CONFIG_DETECT_HUNG_TASK is not set | ||
2235 | # CONFIG_SCHED_DEBUG is not set | 2285 | # CONFIG_SCHED_DEBUG is not set |
2236 | CONFIG_SCHEDSTATS=y | 2286 | CONFIG_SCHEDSTATS=y |
2237 | CONFIG_TIMER_STATS=y | 2287 | CONFIG_TIMER_STATS=y |
@@ -2247,6 +2297,7 @@ CONFIG_TIMER_STATS=y | |||
2247 | # CONFIG_LOCK_STAT is not set | 2297 | # CONFIG_LOCK_STAT is not set |
2248 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set | 2298 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set |
2249 | # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set | 2299 | # CONFIG_DEBUG_LOCKING_API_SELFTESTS is not set |
2300 | CONFIG_STACKTRACE=y | ||
2250 | # CONFIG_DEBUG_KOBJECT is not set | 2301 | # CONFIG_DEBUG_KOBJECT is not set |
2251 | CONFIG_DEBUG_BUGVERBOSE=y | 2302 | CONFIG_DEBUG_BUGVERBOSE=y |
2252 | # CONFIG_DEBUG_INFO is not set | 2303 | # CONFIG_DEBUG_INFO is not set |
@@ -2269,13 +2320,19 @@ CONFIG_FRAME_POINTER=y | |||
2269 | # CONFIG_FAULT_INJECTION is not set | 2320 | # CONFIG_FAULT_INJECTION is not set |
2270 | # CONFIG_LATENCYTOP is not set | 2321 | # CONFIG_LATENCYTOP is not set |
2271 | CONFIG_SYSCTL_SYSCALL_CHECK=y | 2322 | CONFIG_SYSCTL_SYSCALL_CHECK=y |
2323 | # CONFIG_DEBUG_PAGEALLOC is not set | ||
2272 | CONFIG_USER_STACKTRACE_SUPPORT=y | 2324 | CONFIG_USER_STACKTRACE_SUPPORT=y |
2325 | CONFIG_NOP_TRACER=y | ||
2273 | CONFIG_HAVE_FUNCTION_TRACER=y | 2326 | CONFIG_HAVE_FUNCTION_TRACER=y |
2274 | CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y | 2327 | CONFIG_HAVE_FUNCTION_GRAPH_TRACER=y |
2275 | CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y | 2328 | CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST=y |
2276 | CONFIG_HAVE_DYNAMIC_FTRACE=y | 2329 | CONFIG_HAVE_DYNAMIC_FTRACE=y |
2277 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y | 2330 | CONFIG_HAVE_FTRACE_MCOUNT_RECORD=y |
2278 | CONFIG_HAVE_HW_BRANCH_TRACER=y | 2331 | CONFIG_HAVE_HW_BRANCH_TRACER=y |
2332 | CONFIG_HAVE_FTRACE_SYSCALLS=y | ||
2333 | CONFIG_RING_BUFFER=y | ||
2334 | CONFIG_TRACING=y | ||
2335 | CONFIG_TRACING_SUPPORT=y | ||
2279 | 2336 | ||
2280 | # | 2337 | # |
2281 | # Tracers | 2338 | # Tracers |
@@ -2285,13 +2342,21 @@ CONFIG_HAVE_HW_BRANCH_TRACER=y | |||
2285 | # CONFIG_SYSPROF_TRACER is not set | 2342 | # CONFIG_SYSPROF_TRACER is not set |
2286 | # CONFIG_SCHED_TRACER is not set | 2343 | # CONFIG_SCHED_TRACER is not set |
2287 | # CONFIG_CONTEXT_SWITCH_TRACER is not set | 2344 | # CONFIG_CONTEXT_SWITCH_TRACER is not set |
2345 | # CONFIG_EVENT_TRACER is not set | ||
2346 | # CONFIG_FTRACE_SYSCALLS is not set | ||
2288 | # CONFIG_BOOT_TRACER is not set | 2347 | # CONFIG_BOOT_TRACER is not set |
2289 | # CONFIG_TRACE_BRANCH_PROFILING is not set | 2348 | # CONFIG_TRACE_BRANCH_PROFILING is not set |
2290 | # CONFIG_POWER_TRACER is not set | 2349 | # CONFIG_POWER_TRACER is not set |
2291 | # CONFIG_STACK_TRACER is not set | 2350 | # CONFIG_STACK_TRACER is not set |
2292 | # CONFIG_HW_BRANCH_TRACER is not set | 2351 | # CONFIG_HW_BRANCH_TRACER is not set |
2352 | # CONFIG_KMEMTRACE is not set | ||
2353 | # CONFIG_WORKQUEUE_TRACER is not set | ||
2354 | CONFIG_BLK_DEV_IO_TRACE=y | ||
2355 | # CONFIG_FTRACE_STARTUP_TEST is not set | ||
2356 | # CONFIG_MMIOTRACE is not set | ||
2293 | CONFIG_PROVIDE_OHCI1394_DMA_INIT=y | 2357 | CONFIG_PROVIDE_OHCI1394_DMA_INIT=y |
2294 | # CONFIG_DYNAMIC_PRINTK_DEBUG is not set | 2358 | # CONFIG_DYNAMIC_DEBUG is not set |
2359 | # CONFIG_DMA_API_DEBUG is not set | ||
2295 | # CONFIG_SAMPLES is not set | 2360 | # CONFIG_SAMPLES is not set |
2296 | CONFIG_HAVE_ARCH_KGDB=y | 2361 | CONFIG_HAVE_ARCH_KGDB=y |
2297 | # CONFIG_KGDB is not set | 2362 | # CONFIG_KGDB is not set |
@@ -2301,14 +2366,13 @@ CONFIG_EARLY_PRINTK=y | |||
2301 | CONFIG_EARLY_PRINTK_DBGP=y | 2366 | CONFIG_EARLY_PRINTK_DBGP=y |
2302 | CONFIG_DEBUG_STACKOVERFLOW=y | 2367 | CONFIG_DEBUG_STACKOVERFLOW=y |
2303 | CONFIG_DEBUG_STACK_USAGE=y | 2368 | CONFIG_DEBUG_STACK_USAGE=y |
2304 | # CONFIG_DEBUG_PAGEALLOC is not set | ||
2305 | # CONFIG_DEBUG_PER_CPU_MAPS is not set | 2369 | # CONFIG_DEBUG_PER_CPU_MAPS is not set |
2306 | # CONFIG_X86_PTDUMP is not set | 2370 | # CONFIG_X86_PTDUMP is not set |
2307 | CONFIG_DEBUG_RODATA=y | 2371 | CONFIG_DEBUG_RODATA=y |
2308 | # CONFIG_DEBUG_RODATA_TEST is not set | 2372 | # CONFIG_DEBUG_RODATA_TEST is not set |
2309 | CONFIG_DEBUG_NX_TEST=m | 2373 | CONFIG_DEBUG_NX_TEST=m |
2310 | # CONFIG_IOMMU_DEBUG is not set | 2374 | # CONFIG_IOMMU_DEBUG is not set |
2311 | # CONFIG_MMIOTRACE is not set | 2375 | CONFIG_HAVE_MMIOTRACE_SUPPORT=y |
2312 | CONFIG_IO_DELAY_TYPE_0X80=0 | 2376 | CONFIG_IO_DELAY_TYPE_0X80=0 |
2313 | CONFIG_IO_DELAY_TYPE_0XED=1 | 2377 | CONFIG_IO_DELAY_TYPE_0XED=1 |
2314 | CONFIG_IO_DELAY_TYPE_UDELAY=2 | 2378 | CONFIG_IO_DELAY_TYPE_UDELAY=2 |
@@ -2344,6 +2408,8 @@ CONFIG_SECURITY_SELINUX_AVC_STATS=y | |||
2344 | CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 | 2408 | CONFIG_SECURITY_SELINUX_CHECKREQPROT_VALUE=1 |
2345 | # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set | 2409 | # CONFIG_SECURITY_SELINUX_POLICYDB_VERSION_MAX is not set |
2346 | # CONFIG_SECURITY_SMACK is not set | 2410 | # CONFIG_SECURITY_SMACK is not set |
2411 | # CONFIG_SECURITY_TOMOYO is not set | ||
2412 | # CONFIG_IMA is not set | ||
2347 | CONFIG_CRYPTO=y | 2413 | CONFIG_CRYPTO=y |
2348 | 2414 | ||
2349 | # | 2415 | # |
@@ -2359,10 +2425,12 @@ CONFIG_CRYPTO_BLKCIPHER2=y | |||
2359 | CONFIG_CRYPTO_HASH=y | 2425 | CONFIG_CRYPTO_HASH=y |
2360 | CONFIG_CRYPTO_HASH2=y | 2426 | CONFIG_CRYPTO_HASH2=y |
2361 | CONFIG_CRYPTO_RNG2=y | 2427 | CONFIG_CRYPTO_RNG2=y |
2428 | CONFIG_CRYPTO_PCOMP=y | ||
2362 | CONFIG_CRYPTO_MANAGER=y | 2429 | CONFIG_CRYPTO_MANAGER=y |
2363 | CONFIG_CRYPTO_MANAGER2=y | 2430 | CONFIG_CRYPTO_MANAGER2=y |
2364 | # CONFIG_CRYPTO_GF128MUL is not set | 2431 | # CONFIG_CRYPTO_GF128MUL is not set |
2365 | # CONFIG_CRYPTO_NULL is not set | 2432 | # CONFIG_CRYPTO_NULL is not set |
2433 | CONFIG_CRYPTO_WORKQUEUE=y | ||
2366 | # CONFIG_CRYPTO_CRYPTD is not set | 2434 | # CONFIG_CRYPTO_CRYPTD is not set |
2367 | CONFIG_CRYPTO_AUTHENC=y | 2435 | CONFIG_CRYPTO_AUTHENC=y |
2368 | # CONFIG_CRYPTO_TEST is not set | 2436 | # CONFIG_CRYPTO_TEST is not set |
@@ -2414,6 +2482,7 @@ CONFIG_CRYPTO_SHA1=y | |||
2414 | # | 2482 | # |
2415 | CONFIG_CRYPTO_AES=y | 2483 | CONFIG_CRYPTO_AES=y |
2416 | # CONFIG_CRYPTO_AES_X86_64 is not set | 2484 | # CONFIG_CRYPTO_AES_X86_64 is not set |
2485 | # CONFIG_CRYPTO_AES_NI_INTEL is not set | ||
2417 | # CONFIG_CRYPTO_ANUBIS is not set | 2486 | # CONFIG_CRYPTO_ANUBIS is not set |
2418 | CONFIG_CRYPTO_ARC4=y | 2487 | CONFIG_CRYPTO_ARC4=y |
2419 | # CONFIG_CRYPTO_BLOWFISH is not set | 2488 | # CONFIG_CRYPTO_BLOWFISH is not set |
@@ -2435,6 +2504,7 @@ CONFIG_CRYPTO_DES=y | |||
2435 | # Compression | 2504 | # Compression |
2436 | # | 2505 | # |
2437 | # CONFIG_CRYPTO_DEFLATE is not set | 2506 | # CONFIG_CRYPTO_DEFLATE is not set |
2507 | # CONFIG_CRYPTO_ZLIB is not set | ||
2438 | # CONFIG_CRYPTO_LZO is not set | 2508 | # CONFIG_CRYPTO_LZO is not set |
2439 | 2509 | ||
2440 | # | 2510 | # |
@@ -2444,10 +2514,12 @@ CONFIG_CRYPTO_DES=y | |||
2444 | CONFIG_CRYPTO_HW=y | 2514 | CONFIG_CRYPTO_HW=y |
2445 | # CONFIG_CRYPTO_DEV_HIFN_795X is not set | 2515 | # CONFIG_CRYPTO_DEV_HIFN_795X is not set |
2446 | CONFIG_HAVE_KVM=y | 2516 | CONFIG_HAVE_KVM=y |
2517 | CONFIG_HAVE_KVM_IRQCHIP=y | ||
2447 | CONFIG_VIRTUALIZATION=y | 2518 | CONFIG_VIRTUALIZATION=y |
2448 | # CONFIG_KVM is not set | 2519 | # CONFIG_KVM is not set |
2449 | # CONFIG_VIRTIO_PCI is not set | 2520 | # CONFIG_VIRTIO_PCI is not set |
2450 | # CONFIG_VIRTIO_BALLOON is not set | 2521 | # CONFIG_VIRTIO_BALLOON is not set |
2522 | CONFIG_BINARY_PRINTF=y | ||
2451 | 2523 | ||
2452 | # | 2524 | # |
2453 | # Library routines | 2525 | # Library routines |
@@ -2464,7 +2536,10 @@ CONFIG_CRC32=y | |||
2464 | # CONFIG_CRC7 is not set | 2536 | # CONFIG_CRC7 is not set |
2465 | # CONFIG_LIBCRC32C is not set | 2537 | # CONFIG_LIBCRC32C is not set |
2466 | CONFIG_ZLIB_INFLATE=y | 2538 | CONFIG_ZLIB_INFLATE=y |
2467 | CONFIG_PLIST=y | 2539 | CONFIG_DECOMPRESS_GZIP=y |
2540 | CONFIG_DECOMPRESS_BZIP2=y | ||
2541 | CONFIG_DECOMPRESS_LZMA=y | ||
2468 | CONFIG_HAS_IOMEM=y | 2542 | CONFIG_HAS_IOMEM=y |
2469 | CONFIG_HAS_IOPORT=y | 2543 | CONFIG_HAS_IOPORT=y |
2470 | CONFIG_HAS_DMA=y | 2544 | CONFIG_HAS_DMA=y |
2545 | CONFIG_NLATTR=y | ||
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index ebe7deedd5b4..cfb0010fa940 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -2,6 +2,8 @@ | |||
2 | # Arch-specific CryptoAPI modules. | 2 | # Arch-specific CryptoAPI modules. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_CRYPTO_FPU) += fpu.o | ||
6 | |||
5 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o | 7 | obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o |
6 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o | 8 | obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o |
7 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o | 9 | obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o |
diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index caba99601703..eb0566e83319 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S | |||
@@ -845,7 +845,7 @@ ENTRY(aesni_cbc_enc) | |||
845 | */ | 845 | */ |
846 | ENTRY(aesni_cbc_dec) | 846 | ENTRY(aesni_cbc_dec) |
847 | cmp $16, LEN | 847 | cmp $16, LEN |
848 | jb .Lcbc_dec_ret | 848 | jb .Lcbc_dec_just_ret |
849 | mov 480(KEYP), KLEN | 849 | mov 480(KEYP), KLEN |
850 | add $240, KEYP | 850 | add $240, KEYP |
851 | movups (IVP), IV | 851 | movups (IVP), IV |
@@ -891,6 +891,7 @@ ENTRY(aesni_cbc_dec) | |||
891 | add $16, OUTP | 891 | add $16, OUTP |
892 | cmp $16, LEN | 892 | cmp $16, LEN |
893 | jge .Lcbc_dec_loop1 | 893 | jge .Lcbc_dec_loop1 |
894 | movups IV, (IVP) | ||
895 | .Lcbc_dec_ret: | 894 | .Lcbc_dec_ret: |
895 | movups IV, (IVP) | ||
896 | .Lcbc_dec_just_ret: | ||
896 | ret | 897 | ret |
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 02af0af65497..c580c5ec1cad 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -21,6 +21,22 @@ | |||
21 | #include <asm/i387.h> | 21 | #include <asm/i387.h> |
22 | #include <asm/aes.h> | 22 | #include <asm/aes.h> |
23 | 23 | ||
24 | #if defined(CONFIG_CRYPTO_CTR) || defined(CONFIG_CRYPTO_CTR_MODULE) | ||
25 | #define HAS_CTR | ||
26 | #endif | ||
27 | |||
28 | #if defined(CONFIG_CRYPTO_LRW) || defined(CONFIG_CRYPTO_LRW_MODULE) | ||
29 | #define HAS_LRW | ||
30 | #endif | ||
31 | |||
32 | #if defined(CONFIG_CRYPTO_PCBC) || defined(CONFIG_CRYPTO_PCBC_MODULE) | ||
33 | #define HAS_PCBC | ||
34 | #endif | ||
35 | |||
36 | #if defined(CONFIG_CRYPTO_XTS) || defined(CONFIG_CRYPTO_XTS_MODULE) | ||
37 | #define HAS_XTS | ||
38 | #endif | ||
39 | |||
24 | struct async_aes_ctx { | 40 | struct async_aes_ctx { |
25 | struct cryptd_ablkcipher *cryptd_tfm; | 41 | struct cryptd_ablkcipher *cryptd_tfm; |
26 | }; | 42 | }; |
@@ -137,6 +153,41 @@ static struct crypto_alg aesni_alg = { | |||
137 | } | 153 | } |
138 | }; | 154 | }; |
139 | 155 | ||
156 | static void __aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
157 | { | ||
158 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | ||
159 | |||
160 | aesni_enc(ctx, dst, src); | ||
161 | } | ||
162 | |||
163 | static void __aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) | ||
164 | { | ||
165 | struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm)); | ||
166 | |||
167 | aesni_dec(ctx, dst, src); | ||
168 | } | ||
169 | |||
170 | static struct crypto_alg __aesni_alg = { | ||
171 | .cra_name = "__aes-aesni", | ||
172 | .cra_driver_name = "__driver-aes-aesni", | ||
173 | .cra_priority = 0, | ||
174 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | ||
175 | .cra_blocksize = AES_BLOCK_SIZE, | ||
176 | .cra_ctxsize = sizeof(struct crypto_aes_ctx)+AESNI_ALIGN-1, | ||
177 | .cra_alignmask = 0, | ||
178 | .cra_module = THIS_MODULE, | ||
179 | .cra_list = LIST_HEAD_INIT(__aesni_alg.cra_list), | ||
180 | .cra_u = { | ||
181 | .cipher = { | ||
182 | .cia_min_keysize = AES_MIN_KEY_SIZE, | ||
183 | .cia_max_keysize = AES_MAX_KEY_SIZE, | ||
184 | .cia_setkey = aes_set_key, | ||
185 | .cia_encrypt = __aes_encrypt, | ||
186 | .cia_decrypt = __aes_decrypt | ||
187 | } | ||
188 | } | ||
189 | }; | ||
190 | |||
140 | static int ecb_encrypt(struct blkcipher_desc *desc, | 191 | static int ecb_encrypt(struct blkcipher_desc *desc, |
141 | struct scatterlist *dst, struct scatterlist *src, | 192 | struct scatterlist *dst, struct scatterlist *src, |
142 | unsigned int nbytes) | 193 | unsigned int nbytes) |
@@ -147,6 +198,7 @@ static int ecb_encrypt(struct blkcipher_desc *desc, | |||
147 | 198 | ||
148 | blkcipher_walk_init(&walk, dst, src, nbytes); | 199 | blkcipher_walk_init(&walk, dst, src, nbytes); |
149 | err = blkcipher_walk_virt(desc, &walk); | 200 | err = blkcipher_walk_virt(desc, &walk); |
201 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
150 | 202 | ||
151 | kernel_fpu_begin(); | 203 | kernel_fpu_begin(); |
152 | while ((nbytes = walk.nbytes)) { | 204 | while ((nbytes = walk.nbytes)) { |
@@ -170,6 +222,7 @@ static int ecb_decrypt(struct blkcipher_desc *desc, | |||
170 | 222 | ||
171 | blkcipher_walk_init(&walk, dst, src, nbytes); | 223 | blkcipher_walk_init(&walk, dst, src, nbytes); |
172 | err = blkcipher_walk_virt(desc, &walk); | 224 | err = blkcipher_walk_virt(desc, &walk); |
225 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
173 | 226 | ||
174 | kernel_fpu_begin(); | 227 | kernel_fpu_begin(); |
175 | while ((nbytes = walk.nbytes)) { | 228 | while ((nbytes = walk.nbytes)) { |
@@ -215,6 +268,7 @@ static int cbc_encrypt(struct blkcipher_desc *desc, | |||
215 | 268 | ||
216 | blkcipher_walk_init(&walk, dst, src, nbytes); | 269 | blkcipher_walk_init(&walk, dst, src, nbytes); |
217 | err = blkcipher_walk_virt(desc, &walk); | 270 | err = blkcipher_walk_virt(desc, &walk); |
271 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
218 | 272 | ||
219 | kernel_fpu_begin(); | 273 | kernel_fpu_begin(); |
220 | while ((nbytes = walk.nbytes)) { | 274 | while ((nbytes = walk.nbytes)) { |
@@ -238,6 +292,7 @@ static int cbc_decrypt(struct blkcipher_desc *desc, | |||
238 | 292 | ||
239 | blkcipher_walk_init(&walk, dst, src, nbytes); | 293 | blkcipher_walk_init(&walk, dst, src, nbytes); |
240 | err = blkcipher_walk_virt(desc, &walk); | 294 | err = blkcipher_walk_virt(desc, &walk); |
295 | desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP; | ||
241 | 296 | ||
242 | kernel_fpu_begin(); | 297 | kernel_fpu_begin(); |
243 | while ((nbytes = walk.nbytes)) { | 298 | while ((nbytes = walk.nbytes)) { |
@@ -277,8 +332,16 @@ static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key, | |||
277 | unsigned int key_len) | 332 | unsigned int key_len) |
278 | { | 333 | { |
279 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); | 334 | struct async_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); |
335 | struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base; | ||
336 | int err; | ||
280 | 337 | ||
281 | return crypto_ablkcipher_setkey(&ctx->cryptd_tfm->base, key, key_len); | 338 | crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); |
339 | crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm) | ||
340 | & CRYPTO_TFM_REQ_MASK); | ||
341 | err = crypto_ablkcipher_setkey(child, key, key_len); | ||
342 | crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child) | ||
343 | & CRYPTO_TFM_RES_MASK); | ||
344 | return err; | ||
282 | } | 345 | } |
283 | 346 | ||
284 | static int ablk_encrypt(struct ablkcipher_request *req) | 347 | static int ablk_encrypt(struct ablkcipher_request *req) |
@@ -411,6 +474,163 @@ static struct crypto_alg ablk_cbc_alg = { | |||
411 | }, | 474 | }, |
412 | }; | 475 | }; |
413 | 476 | ||
477 | #ifdef HAS_CTR | ||
478 | static int ablk_ctr_init(struct crypto_tfm *tfm) | ||
479 | { | ||
480 | struct cryptd_ablkcipher *cryptd_tfm; | ||
481 | |||
482 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(ctr(__driver-aes-aesni))", | ||
483 | 0, 0); | ||
484 | if (IS_ERR(cryptd_tfm)) | ||
485 | return PTR_ERR(cryptd_tfm); | ||
486 | ablk_init_common(tfm, cryptd_tfm); | ||
487 | return 0; | ||
488 | } | ||
489 | |||
490 | static struct crypto_alg ablk_ctr_alg = { | ||
491 | .cra_name = "ctr(aes)", | ||
492 | .cra_driver_name = "ctr-aes-aesni", | ||
493 | .cra_priority = 400, | ||
494 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
495 | .cra_blocksize = 1, | ||
496 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
497 | .cra_alignmask = 0, | ||
498 | .cra_type = &crypto_ablkcipher_type, | ||
499 | .cra_module = THIS_MODULE, | ||
500 | .cra_list = LIST_HEAD_INIT(ablk_ctr_alg.cra_list), | ||
501 | .cra_init = ablk_ctr_init, | ||
502 | .cra_exit = ablk_exit, | ||
503 | .cra_u = { | ||
504 | .ablkcipher = { | ||
505 | .min_keysize = AES_MIN_KEY_SIZE, | ||
506 | .max_keysize = AES_MAX_KEY_SIZE, | ||
507 | .ivsize = AES_BLOCK_SIZE, | ||
508 | .setkey = ablk_set_key, | ||
509 | .encrypt = ablk_encrypt, | ||
510 | .decrypt = ablk_decrypt, | ||
511 | .geniv = "chainiv", | ||
512 | }, | ||
513 | }, | ||
514 | }; | ||
515 | #endif | ||
516 | |||
517 | #ifdef HAS_LRW | ||
518 | static int ablk_lrw_init(struct crypto_tfm *tfm) | ||
519 | { | ||
520 | struct cryptd_ablkcipher *cryptd_tfm; | ||
521 | |||
522 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(lrw(__driver-aes-aesni))", | ||
523 | 0, 0); | ||
524 | if (IS_ERR(cryptd_tfm)) | ||
525 | return PTR_ERR(cryptd_tfm); | ||
526 | ablk_init_common(tfm, cryptd_tfm); | ||
527 | return 0; | ||
528 | } | ||
529 | |||
530 | static struct crypto_alg ablk_lrw_alg = { | ||
531 | .cra_name = "lrw(aes)", | ||
532 | .cra_driver_name = "lrw-aes-aesni", | ||
533 | .cra_priority = 400, | ||
534 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
535 | .cra_blocksize = AES_BLOCK_SIZE, | ||
536 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
537 | .cra_alignmask = 0, | ||
538 | .cra_type = &crypto_ablkcipher_type, | ||
539 | .cra_module = THIS_MODULE, | ||
540 | .cra_list = LIST_HEAD_INIT(ablk_lrw_alg.cra_list), | ||
541 | .cra_init = ablk_lrw_init, | ||
542 | .cra_exit = ablk_exit, | ||
543 | .cra_u = { | ||
544 | .ablkcipher = { | ||
545 | .min_keysize = AES_MIN_KEY_SIZE + AES_BLOCK_SIZE, | ||
546 | .max_keysize = AES_MAX_KEY_SIZE + AES_BLOCK_SIZE, | ||
547 | .ivsize = AES_BLOCK_SIZE, | ||
548 | .setkey = ablk_set_key, | ||
549 | .encrypt = ablk_encrypt, | ||
550 | .decrypt = ablk_decrypt, | ||
551 | }, | ||
552 | }, | ||
553 | }; | ||
554 | #endif | ||
555 | |||
556 | #ifdef HAS_PCBC | ||
557 | static int ablk_pcbc_init(struct crypto_tfm *tfm) | ||
558 | { | ||
559 | struct cryptd_ablkcipher *cryptd_tfm; | ||
560 | |||
561 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(pcbc(__driver-aes-aesni))", | ||
562 | 0, 0); | ||
563 | if (IS_ERR(cryptd_tfm)) | ||
564 | return PTR_ERR(cryptd_tfm); | ||
565 | ablk_init_common(tfm, cryptd_tfm); | ||
566 | return 0; | ||
567 | } | ||
568 | |||
569 | static struct crypto_alg ablk_pcbc_alg = { | ||
570 | .cra_name = "pcbc(aes)", | ||
571 | .cra_driver_name = "pcbc-aes-aesni", | ||
572 | .cra_priority = 400, | ||
573 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
574 | .cra_blocksize = AES_BLOCK_SIZE, | ||
575 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
576 | .cra_alignmask = 0, | ||
577 | .cra_type = &crypto_ablkcipher_type, | ||
578 | .cra_module = THIS_MODULE, | ||
579 | .cra_list = LIST_HEAD_INIT(ablk_pcbc_alg.cra_list), | ||
580 | .cra_init = ablk_pcbc_init, | ||
581 | .cra_exit = ablk_exit, | ||
582 | .cra_u = { | ||
583 | .ablkcipher = { | ||
584 | .min_keysize = AES_MIN_KEY_SIZE, | ||
585 | .max_keysize = AES_MAX_KEY_SIZE, | ||
586 | .ivsize = AES_BLOCK_SIZE, | ||
587 | .setkey = ablk_set_key, | ||
588 | .encrypt = ablk_encrypt, | ||
589 | .decrypt = ablk_decrypt, | ||
590 | }, | ||
591 | }, | ||
592 | }; | ||
593 | #endif | ||
594 | |||
595 | #ifdef HAS_XTS | ||
596 | static int ablk_xts_init(struct crypto_tfm *tfm) | ||
597 | { | ||
598 | struct cryptd_ablkcipher *cryptd_tfm; | ||
599 | |||
600 | cryptd_tfm = cryptd_alloc_ablkcipher("fpu(xts(__driver-aes-aesni))", | ||
601 | 0, 0); | ||
602 | if (IS_ERR(cryptd_tfm)) | ||
603 | return PTR_ERR(cryptd_tfm); | ||
604 | ablk_init_common(tfm, cryptd_tfm); | ||
605 | return 0; | ||
606 | } | ||
607 | |||
608 | static struct crypto_alg ablk_xts_alg = { | ||
609 | .cra_name = "xts(aes)", | ||
610 | .cra_driver_name = "xts-aes-aesni", | ||
611 | .cra_priority = 400, | ||
612 | .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER|CRYPTO_ALG_ASYNC, | ||
613 | .cra_blocksize = AES_BLOCK_SIZE, | ||
614 | .cra_ctxsize = sizeof(struct async_aes_ctx), | ||
615 | .cra_alignmask = 0, | ||
616 | .cra_type = &crypto_ablkcipher_type, | ||
617 | .cra_module = THIS_MODULE, | ||
618 | .cra_list = LIST_HEAD_INIT(ablk_xts_alg.cra_list), | ||
619 | .cra_init = ablk_xts_init, | ||
620 | .cra_exit = ablk_exit, | ||
621 | .cra_u = { | ||
622 | .ablkcipher = { | ||
623 | .min_keysize = 2 * AES_MIN_KEY_SIZE, | ||
624 | .max_keysize = 2 * AES_MAX_KEY_SIZE, | ||
625 | .ivsize = AES_BLOCK_SIZE, | ||
626 | .setkey = ablk_set_key, | ||
627 | .encrypt = ablk_encrypt, | ||
628 | .decrypt = ablk_decrypt, | ||
629 | }, | ||
630 | }, | ||
631 | }; | ||
632 | #endif | ||
633 | |||
414 | static int __init aesni_init(void) | 634 | static int __init aesni_init(void) |
415 | { | 635 | { |
416 | int err; | 636 | int err; |
@@ -421,6 +641,8 @@ static int __init aesni_init(void) | |||
421 | } | 641 | } |
422 | if ((err = crypto_register_alg(&aesni_alg))) | 642 | if ((err = crypto_register_alg(&aesni_alg))) |
423 | goto aes_err; | 643 | goto aes_err; |
644 | if ((err = crypto_register_alg(&__aesni_alg))) | ||
645 | goto __aes_err; | ||
424 | if ((err = crypto_register_alg(&blk_ecb_alg))) | 646 | if ((err = crypto_register_alg(&blk_ecb_alg))) |
425 | goto blk_ecb_err; | 647 | goto blk_ecb_err; |
426 | if ((err = crypto_register_alg(&blk_cbc_alg))) | 648 | if ((err = crypto_register_alg(&blk_cbc_alg))) |
@@ -429,9 +651,41 @@ static int __init aesni_init(void) | |||
429 | goto ablk_ecb_err; | 651 | goto ablk_ecb_err; |
430 | if ((err = crypto_register_alg(&ablk_cbc_alg))) | 652 | if ((err = crypto_register_alg(&ablk_cbc_alg))) |
431 | goto ablk_cbc_err; | 653 | goto ablk_cbc_err; |
654 | #ifdef HAS_CTR | ||
655 | if ((err = crypto_register_alg(&ablk_ctr_alg))) | ||
656 | goto ablk_ctr_err; | ||
657 | #endif | ||
658 | #ifdef HAS_LRW | ||
659 | if ((err = crypto_register_alg(&ablk_lrw_alg))) | ||
660 | goto ablk_lrw_err; | ||
661 | #endif | ||
662 | #ifdef HAS_PCBC | ||
663 | if ((err = crypto_register_alg(&ablk_pcbc_alg))) | ||
664 | goto ablk_pcbc_err; | ||
665 | #endif | ||
666 | #ifdef HAS_XTS | ||
667 | if ((err = crypto_register_alg(&ablk_xts_alg))) | ||
668 | goto ablk_xts_err; | ||
669 | #endif | ||
432 | 670 | ||
433 | return err; | 671 | return err; |
434 | 672 | ||
673 | #ifdef HAS_XTS | ||
674 | ablk_xts_err: | ||
675 | #endif | ||
676 | #ifdef HAS_PCBC | ||
677 | crypto_unregister_alg(&ablk_pcbc_alg); | ||
678 | ablk_pcbc_err: | ||
679 | #endif | ||
680 | #ifdef HAS_LRW | ||
681 | crypto_unregister_alg(&ablk_lrw_alg); | ||
682 | ablk_lrw_err: | ||
683 | #endif | ||
684 | #ifdef HAS_CTR | ||
685 | crypto_unregister_alg(&ablk_ctr_alg); | ||
686 | ablk_ctr_err: | ||
687 | #endif | ||
688 | crypto_unregister_alg(&ablk_cbc_alg); | ||
435 | ablk_cbc_err: | 689 | ablk_cbc_err: |
436 | crypto_unregister_alg(&ablk_ecb_alg); | 690 | crypto_unregister_alg(&ablk_ecb_alg); |
437 | ablk_ecb_err: | 691 | ablk_ecb_err: |
@@ -439,6 +693,8 @@ ablk_ecb_err: | |||
439 | blk_cbc_err: | 693 | blk_cbc_err: |
440 | crypto_unregister_alg(&blk_ecb_alg); | 694 | crypto_unregister_alg(&blk_ecb_alg); |
441 | blk_ecb_err: | 695 | blk_ecb_err: |
696 | crypto_unregister_alg(&__aesni_alg); | ||
697 | __aes_err: | ||
442 | crypto_unregister_alg(&aesni_alg); | 698 | crypto_unregister_alg(&aesni_alg); |
443 | aes_err: | 699 | aes_err: |
444 | return err; | 700 | return err; |
@@ -446,10 +702,23 @@ aes_err: | |||
446 | 702 | ||
447 | static void __exit aesni_exit(void) | 703 | static void __exit aesni_exit(void) |
448 | { | 704 | { |
705 | #ifdef HAS_XTS | ||
706 | crypto_unregister_alg(&ablk_xts_alg); | ||
707 | #endif | ||
708 | #ifdef HAS_PCBC | ||
709 | crypto_unregister_alg(&ablk_pcbc_alg); | ||
710 | #endif | ||
711 | #ifdef HAS_LRW | ||
712 | crypto_unregister_alg(&ablk_lrw_alg); | ||
713 | #endif | ||
714 | #ifdef HAS_CTR | ||
715 | crypto_unregister_alg(&ablk_ctr_alg); | ||
716 | #endif | ||
449 | crypto_unregister_alg(&ablk_cbc_alg); | 717 | crypto_unregister_alg(&ablk_cbc_alg); |
450 | crypto_unregister_alg(&ablk_ecb_alg); | 718 | crypto_unregister_alg(&ablk_ecb_alg); |
451 | crypto_unregister_alg(&blk_cbc_alg); | 719 | crypto_unregister_alg(&blk_cbc_alg); |
452 | crypto_unregister_alg(&blk_ecb_alg); | 720 | crypto_unregister_alg(&blk_ecb_alg); |
721 | crypto_unregister_alg(&__aesni_alg); | ||
453 | crypto_unregister_alg(&aesni_alg); | 722 | crypto_unregister_alg(&aesni_alg); |
454 | } | 723 | } |
455 | 724 | ||
diff --git a/arch/x86/crypto/fpu.c b/arch/x86/crypto/fpu.c new file mode 100644 index 000000000000..daef6cd2b45d --- /dev/null +++ b/arch/x86/crypto/fpu.c | |||
@@ -0,0 +1,166 @@ | |||
1 | /* | ||
2 | * FPU: Wrapper for blkcipher touching fpu | ||
3 | * | ||
4 | * Copyright (c) Intel Corp. | ||
5 | * Author: Huang Ying <ying.huang@intel.com> | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms of the GNU General Public License as published by the Free | ||
9 | * Software Foundation; either version 2 of the License, or (at your option) | ||
10 | * any later version. | ||
11 | * | ||
12 | */ | ||
13 | |||
14 | #include <crypto/algapi.h> | ||
15 | #include <linux/err.h> | ||
16 | #include <linux/init.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <asm/i387.h> | ||
20 | |||
21 | struct crypto_fpu_ctx { | ||
22 | struct crypto_blkcipher *child; | ||
23 | }; | ||
24 | |||
25 | static int crypto_fpu_setkey(struct crypto_tfm *parent, const u8 *key, | ||
26 | unsigned int keylen) | ||
27 | { | ||
28 | struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(parent); | ||
29 | struct crypto_blkcipher *child = ctx->child; | ||
30 | int err; | ||
31 | |||
32 | crypto_blkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); | ||
33 | crypto_blkcipher_set_flags(child, crypto_tfm_get_flags(parent) & | ||
34 | CRYPTO_TFM_REQ_MASK); | ||
35 | err = crypto_blkcipher_setkey(child, key, keylen); | ||
36 | crypto_tfm_set_flags(parent, crypto_blkcipher_get_flags(child) & | ||
37 | CRYPTO_TFM_RES_MASK); | ||
38 | return err; | ||
39 | } | ||
40 | |||
41 | static int crypto_fpu_encrypt(struct blkcipher_desc *desc_in, | ||
42 | struct scatterlist *dst, struct scatterlist *src, | ||
43 | unsigned int nbytes) | ||
44 | { | ||
45 | int err; | ||
46 | struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); | ||
47 | struct crypto_blkcipher *child = ctx->child; | ||
48 | struct blkcipher_desc desc = { | ||
49 | .tfm = child, | ||
50 | .info = desc_in->info, | ||
51 | .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, | ||
52 | }; | ||
53 | |||
54 | kernel_fpu_begin(); | ||
55 | err = crypto_blkcipher_crt(desc.tfm)->encrypt(&desc, dst, src, nbytes); | ||
56 | kernel_fpu_end(); | ||
57 | return err; | ||
58 | } | ||
59 | |||
60 | static int crypto_fpu_decrypt(struct blkcipher_desc *desc_in, | ||
61 | struct scatterlist *dst, struct scatterlist *src, | ||
62 | unsigned int nbytes) | ||
63 | { | ||
64 | int err; | ||
65 | struct crypto_fpu_ctx *ctx = crypto_blkcipher_ctx(desc_in->tfm); | ||
66 | struct crypto_blkcipher *child = ctx->child; | ||
67 | struct blkcipher_desc desc = { | ||
68 | .tfm = child, | ||
69 | .info = desc_in->info, | ||
70 | .flags = desc_in->flags & ~CRYPTO_TFM_REQ_MAY_SLEEP, | ||
71 | }; | ||
72 | |||
73 | kernel_fpu_begin(); | ||
74 | err = crypto_blkcipher_crt(desc.tfm)->decrypt(&desc, dst, src, nbytes); | ||
75 | kernel_fpu_end(); | ||
76 | return err; | ||
77 | } | ||
78 | |||
79 | static int crypto_fpu_init_tfm(struct crypto_tfm *tfm) | ||
80 | { | ||
81 | struct crypto_instance *inst = crypto_tfm_alg_instance(tfm); | ||
82 | struct crypto_spawn *spawn = crypto_instance_ctx(inst); | ||
83 | struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); | ||
84 | struct crypto_blkcipher *cipher; | ||
85 | |||
86 | cipher = crypto_spawn_blkcipher(spawn); | ||
87 | if (IS_ERR(cipher)) | ||
88 | return PTR_ERR(cipher); | ||
89 | |||
90 | ctx->child = cipher; | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static void crypto_fpu_exit_tfm(struct crypto_tfm *tfm) | ||
95 | { | ||
96 | struct crypto_fpu_ctx *ctx = crypto_tfm_ctx(tfm); | ||
97 | crypto_free_blkcipher(ctx->child); | ||
98 | } | ||
99 | |||
100 | static struct crypto_instance *crypto_fpu_alloc(struct rtattr **tb) | ||
101 | { | ||
102 | struct crypto_instance *inst; | ||
103 | struct crypto_alg *alg; | ||
104 | int err; | ||
105 | |||
106 | err = crypto_check_attr_type(tb, CRYPTO_ALG_TYPE_BLKCIPHER); | ||
107 | if (err) | ||
108 | return ERR_PTR(err); | ||
109 | |||
110 | alg = crypto_get_attr_alg(tb, CRYPTO_ALG_TYPE_BLKCIPHER, | ||
111 | CRYPTO_ALG_TYPE_MASK); | ||
112 | if (IS_ERR(alg)) | ||
113 | return ERR_CAST(alg); | ||
114 | |||
115 | inst = crypto_alloc_instance("fpu", alg); | ||
116 | if (IS_ERR(inst)) | ||
117 | goto out_put_alg; | ||
118 | |||
119 | inst->alg.cra_flags = alg->cra_flags; | ||
120 | inst->alg.cra_priority = alg->cra_priority; | ||
121 | inst->alg.cra_blocksize = alg->cra_blocksize; | ||
122 | inst->alg.cra_alignmask = alg->cra_alignmask; | ||
123 | inst->alg.cra_type = alg->cra_type; | ||
124 | inst->alg.cra_blkcipher.ivsize = alg->cra_blkcipher.ivsize; | ||
125 | inst->alg.cra_blkcipher.min_keysize = alg->cra_blkcipher.min_keysize; | ||
126 | inst->alg.cra_blkcipher.max_keysize = alg->cra_blkcipher.max_keysize; | ||
127 | inst->alg.cra_ctxsize = sizeof(struct crypto_fpu_ctx); | ||
128 | inst->alg.cra_init = crypto_fpu_init_tfm; | ||
129 | inst->alg.cra_exit = crypto_fpu_exit_tfm; | ||
130 | inst->alg.cra_blkcipher.setkey = crypto_fpu_setkey; | ||
131 | inst->alg.cra_blkcipher.encrypt = crypto_fpu_encrypt; | ||
132 | inst->alg.cra_blkcipher.decrypt = crypto_fpu_decrypt; | ||
133 | |||
134 | out_put_alg: | ||
135 | crypto_mod_put(alg); | ||
136 | return inst; | ||
137 | } | ||
138 | |||
139 | static void crypto_fpu_free(struct crypto_instance *inst) | ||
140 | { | ||
141 | crypto_drop_spawn(crypto_instance_ctx(inst)); | ||
142 | kfree(inst); | ||
143 | } | ||
144 | |||
145 | static struct crypto_template crypto_fpu_tmpl = { | ||
146 | .name = "fpu", | ||
147 | .alloc = crypto_fpu_alloc, | ||
148 | .free = crypto_fpu_free, | ||
149 | .module = THIS_MODULE, | ||
150 | }; | ||
151 | |||
152 | static int __init crypto_fpu_module_init(void) | ||
153 | { | ||
154 | return crypto_register_template(&crypto_fpu_tmpl); | ||
155 | } | ||
156 | |||
157 | static void __exit crypto_fpu_module_exit(void) | ||
158 | { | ||
159 | crypto_unregister_template(&crypto_fpu_tmpl); | ||
160 | } | ||
161 | |||
162 | module_init(crypto_fpu_module_init); | ||
163 | module_exit(crypto_fpu_module_exit); | ||
164 | |||
165 | MODULE_LICENSE("GPL"); | ||
166 | MODULE_DESCRIPTION("FPU block cipher wrapper"); | ||
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index a505202086e8..e590261ba059 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -825,9 +825,11 @@ ia32_sys_call_table: | |||
825 | .quad compat_sys_signalfd4 | 825 | .quad compat_sys_signalfd4 |
826 | .quad sys_eventfd2 | 826 | .quad sys_eventfd2 |
827 | .quad sys_epoll_create1 | 827 | .quad sys_epoll_create1 |
828 | .quad sys_dup3 /* 330 */ | 828 | .quad sys_dup3 /* 330 */ |
829 | .quad sys_pipe2 | 829 | .quad sys_pipe2 |
830 | .quad sys_inotify_init1 | 830 | .quad sys_inotify_init1 |
831 | .quad compat_sys_preadv | 831 | .quad compat_sys_preadv |
832 | .quad compat_sys_pwritev | 832 | .quad compat_sys_pwritev |
833 | .quad compat_sys_rt_tgsigqueueinfo /* 335 */ | ||
834 | .quad sys_perf_counter_open | ||
833 | ia32_syscall_end: | 835 | ia32_syscall_end: |
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 4518dc500903..20d1465a2ab0 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h | |||
@@ -144,6 +144,7 @@ static inline unsigned int acpi_processor_cstate_check(unsigned int max_cstate) | |||
144 | 144 | ||
145 | #else /* !CONFIG_ACPI */ | 145 | #else /* !CONFIG_ACPI */ |
146 | 146 | ||
147 | #define acpi_disabled 1 | ||
147 | #define acpi_lapic 0 | 148 | #define acpi_lapic 0 |
148 | #define acpi_ioapic 0 | 149 | #define acpi_ioapic 0 |
149 | static inline void acpi_noirq_set(void) { } | 150 | static inline void acpi_noirq_set(void) { } |
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index f6aa18eadf71..1a37bcdc8606 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h | |||
@@ -3,6 +3,7 @@ | |||
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <linux/stddef.h> | 5 | #include <linux/stddef.h> |
6 | #include <linux/stringify.h> | ||
6 | #include <asm/asm.h> | 7 | #include <asm/asm.h> |
7 | 8 | ||
8 | /* | 9 | /* |
@@ -74,6 +75,22 @@ static inline void alternatives_smp_switch(int smp) {} | |||
74 | 75 | ||
75 | const unsigned char *const *find_nop_table(void); | 76 | const unsigned char *const *find_nop_table(void); |
76 | 77 | ||
78 | /* alternative assembly primitive: */ | ||
79 | #define ALTERNATIVE(oldinstr, newinstr, feature) \ | ||
80 | \ | ||
81 | "661:\n\t" oldinstr "\n662:\n" \ | ||
82 | ".section .altinstructions,\"a\"\n" \ | ||
83 | _ASM_ALIGN "\n" \ | ||
84 | _ASM_PTR "661b\n" /* label */ \ | ||
85 | _ASM_PTR "663f\n" /* new instruction */ \ | ||
86 | " .byte " __stringify(feature) "\n" /* feature bit */ \ | ||
87 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
88 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
89 | ".previous\n" \ | ||
90 | ".section .altinstr_replacement, \"ax\"\n" \ | ||
91 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
92 | ".previous" | ||
93 | |||
77 | /* | 94 | /* |
78 | * Alternative instructions for different CPU types or capabilities. | 95 | * Alternative instructions for different CPU types or capabilities. |
79 | * | 96 | * |
@@ -87,18 +104,7 @@ const unsigned char *const *find_nop_table(void); | |||
87 | * without volatile and memory clobber. | 104 | * without volatile and memory clobber. |
88 | */ | 105 | */ |
89 | #define alternative(oldinstr, newinstr, feature) \ | 106 | #define alternative(oldinstr, newinstr, feature) \ |
90 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | 107 | asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory") |
91 | ".section .altinstructions,\"a\"\n" \ | ||
92 | _ASM_ALIGN "\n" \ | ||
93 | _ASM_PTR "661b\n" /* label */ \ | ||
94 | _ASM_PTR "663f\n" /* new instruction */ \ | ||
95 | " .byte %c0\n" /* feature bit */ \ | ||
96 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
97 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
98 | ".previous\n" \ | ||
99 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
100 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
101 | ".previous" :: "i" (feature) : "memory") | ||
102 | 108 | ||
103 | /* | 109 | /* |
104 | * Alternative inline assembly with input. | 110 | * Alternative inline assembly with input. |
@@ -109,35 +115,16 @@ const unsigned char *const *find_nop_table(void); | |||
109 | * Best is to use constraints that are fixed size (like (%1) ... "r") | 115 | * Best is to use constraints that are fixed size (like (%1) ... "r") |
110 | * If you use variable sized constraints like "m" or "g" in the | 116 | * If you use variable sized constraints like "m" or "g" in the |
111 | * replacement make sure to pad to the worst case length. | 117 | * replacement make sure to pad to the worst case length. |
118 | * Leaving an unused argument 0 to keep API compatibility. | ||
112 | */ | 119 | */ |
113 | #define alternative_input(oldinstr, newinstr, feature, input...) \ | 120 | #define alternative_input(oldinstr, newinstr, feature, input...) \ |
114 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | 121 | asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ |
115 | ".section .altinstructions,\"a\"\n" \ | 122 | : : "i" (0), ## input) |
116 | _ASM_ALIGN "\n" \ | ||
117 | _ASM_PTR "661b\n" /* label */ \ | ||
118 | _ASM_PTR "663f\n" /* new instruction */ \ | ||
119 | " .byte %c0\n" /* feature bit */ \ | ||
120 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
121 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
122 | ".previous\n" \ | ||
123 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
124 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
125 | ".previous" :: "i" (feature), ##input) | ||
126 | 123 | ||
127 | /* Like alternative_input, but with a single output argument */ | 124 | /* Like alternative_input, but with a single output argument */ |
128 | #define alternative_io(oldinstr, newinstr, feature, output, input...) \ | 125 | #define alternative_io(oldinstr, newinstr, feature, output, input...) \ |
129 | asm volatile ("661:\n\t" oldinstr "\n662:\n" \ | 126 | asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) \ |
130 | ".section .altinstructions,\"a\"\n" \ | 127 | : output : "i" (0), ## input) |
131 | _ASM_ALIGN "\n" \ | ||
132 | _ASM_PTR "661b\n" /* label */ \ | ||
133 | _ASM_PTR "663f\n" /* new instruction */ \ | ||
134 | " .byte %c[feat]\n" /* feature bit */ \ | ||
135 | " .byte 662b-661b\n" /* sourcelen */ \ | ||
136 | " .byte 664f-663f\n" /* replacementlen */ \ | ||
137 | ".previous\n" \ | ||
138 | ".section .altinstr_replacement,\"ax\"\n" \ | ||
139 | "663:\n\t" newinstr "\n664:\n" /* replacement */ \ | ||
140 | ".previous" : output : [feat] "i" (feature), ##input) | ||
141 | 128 | ||
142 | /* | 129 | /* |
143 | * use this macro(s) if you need more than one output parameter | 130 | * use this macro(s) if you need more than one output parameter |
diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index f712344329bc..bdf96f119f06 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h | |||
@@ -27,9 +27,13 @@ extern int amd_iommu_init(void); | |||
27 | extern int amd_iommu_init_dma_ops(void); | 27 | extern int amd_iommu_init_dma_ops(void); |
28 | extern void amd_iommu_detect(void); | 28 | extern void amd_iommu_detect(void); |
29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); | 29 | extern irqreturn_t amd_iommu_int_handler(int irq, void *data); |
30 | extern void amd_iommu_flush_all_domains(void); | ||
31 | extern void amd_iommu_flush_all_devices(void); | ||
32 | extern void amd_iommu_shutdown(void); | ||
30 | #else | 33 | #else |
31 | static inline int amd_iommu_init(void) { return -ENODEV; } | 34 | static inline int amd_iommu_init(void) { return -ENODEV; } |
32 | static inline void amd_iommu_detect(void) { } | 35 | static inline void amd_iommu_detect(void) { } |
36 | static inline void amd_iommu_shutdown(void) { } | ||
33 | #endif | 37 | #endif |
34 | 38 | ||
35 | #endif /* _ASM_X86_AMD_IOMMU_H */ | 39 | #endif /* _ASM_X86_AMD_IOMMU_H */ |
diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 95c8cd9d22b5..0c878caaa0a2 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h | |||
@@ -194,6 +194,27 @@ | |||
194 | #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ | 194 | #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ |
195 | #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops | 195 | #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops |
196 | domain for an IOMMU */ | 196 | domain for an IOMMU */ |
197 | extern bool amd_iommu_dump; | ||
198 | #define DUMP_printk(format, arg...) \ | ||
199 | do { \ | ||
200 | if (amd_iommu_dump) \ | ||
201 | printk(KERN_INFO "AMD IOMMU: " format, ## arg); \ | ||
202 | } while(0); | ||
203 | |||
204 | /* | ||
205 | * Make iterating over all IOMMUs easier | ||
206 | */ | ||
207 | #define for_each_iommu(iommu) \ | ||
208 | list_for_each_entry((iommu), &amd_iommu_list, list) | ||
209 | #define for_each_iommu_safe(iommu, next) \ | ||
210 | list_for_each_entry_safe((iommu), (next), &amd_iommu_list, list) | ||
211 | |||
212 | #define APERTURE_RANGE_SHIFT 27 /* 128 MB */ | ||
213 | #define APERTURE_RANGE_SIZE (1ULL << APERTURE_RANGE_SHIFT) | ||
214 | #define APERTURE_RANGE_PAGES (APERTURE_RANGE_SIZE >> PAGE_SHIFT) | ||
215 | #define APERTURE_MAX_RANGES 32 /* allows 4GB of DMA address space */ | ||
216 | #define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) | ||
217 | #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) | ||
197 | 218 | ||
198 | /* | 219 | /* |
199 | * This structure contains generic data for IOMMU protection domains | 220 | * This structure contains generic data for IOMMU protection domains |
@@ -210,6 +231,26 @@ struct protection_domain { | |||
210 | }; | 231 | }; |
211 | 232 | ||
212 | /* | 233 | /* |
234 | * For dynamic growth the aperture size is split into ranges of 128MB of | ||
235 | * DMA address space each. This struct represents one such range. | ||
236 | */ | ||
237 | struct aperture_range { | ||
238 | |||
239 | /* address allocation bitmap */ | ||
240 | unsigned long *bitmap; | ||
241 | |||
242 | /* | ||
243 | * Array of PTE pages for the aperture. In this array we save all the | ||
244 | * leaf pages of the domain page table used for the aperture. This way | ||
245 | * we don't need to walk the page table to find a specific PTE. We can | ||
246 | * just calculate its address in constant time. | ||
247 | */ | ||
248 | u64 *pte_pages[64]; | ||
249 | |||
250 | unsigned long offset; | ||
251 | }; | ||
252 | |||
253 | /* | ||
213 | * Data container for a dma_ops specific protection domain | 254 | * Data container for a dma_ops specific protection domain |
214 | */ | 255 | */ |
215 | struct dma_ops_domain { | 256 | struct dma_ops_domain { |
@@ -222,18 +263,10 @@ struct dma_ops_domain { | |||
222 | unsigned long aperture_size; | 263 | unsigned long aperture_size; |
223 | 264 | ||
224 | /* address we start to search for free addresses */ | 265 | /* address we start to search for free addresses */ |
225 | unsigned long next_bit; | 266 | unsigned long next_address; |
226 | |||
227 | /* address allocation bitmap */ | ||
228 | unsigned long *bitmap; | ||
229 | 267 | ||
230 | /* | 268 | /* address space relevant data */ |
231 | * Array of PTE pages for the aperture. In this array we save all the | 269 | struct aperture_range *aperture[APERTURE_MAX_RANGES]; |
232 | * leaf pages of the domain page table used for the aperture. This way | ||
233 | * we don't need to walk the page table to find a specific PTE. We can | ||
234 | * just calculate its address in constant time. | ||
235 | */ | ||
236 | u64 **pte_pages; | ||
237 | 270 | ||
238 | /* This will be set to true when TLB needs to be flushed */ | 271 | /* This will be set to true when TLB needs to be flushed */ |
239 | bool need_flush; | 272 | bool need_flush; |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 42f2f8377422..bb7d47925847 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -107,8 +107,7 @@ extern u32 native_safe_apic_wait_icr_idle(void); | |||
107 | extern void native_apic_icr_write(u32 low, u32 id); | 107 | extern void native_apic_icr_write(u32 low, u32 id); |
108 | extern u64 native_apic_icr_read(void); | 108 | extern u64 native_apic_icr_read(void); |
109 | 109 | ||
110 | #define EIM_8BIT_APIC_ID 0 | 110 | extern int x2apic_mode; |
111 | #define EIM_32BIT_APIC_ID 1 | ||
112 | 111 | ||
113 | #ifdef CONFIG_X86_X2APIC | 112 | #ifdef CONFIG_X86_X2APIC |
114 | /* | 113 | /* |
@@ -166,10 +165,9 @@ static inline u64 native_x2apic_icr_read(void) | |||
166 | return val; | 165 | return val; |
167 | } | 166 | } |
168 | 167 | ||
169 | extern int x2apic, x2apic_phys; | 168 | extern int x2apic_phys; |
170 | extern void check_x2apic(void); | 169 | extern void check_x2apic(void); |
171 | extern void enable_x2apic(void); | 170 | extern void enable_x2apic(void); |
172 | extern void enable_IR_x2apic(void); | ||
173 | extern void x2apic_icr_write(u32 low, u32 id); | 171 | extern void x2apic_icr_write(u32 low, u32 id); |
174 | static inline int x2apic_enabled(void) | 172 | static inline int x2apic_enabled(void) |
175 | { | 173 | { |
@@ -183,6 +181,8 @@ static inline int x2apic_enabled(void) | |||
183 | return 1; | 181 | return 1; |
184 | return 0; | 182 | return 0; |
185 | } | 183 | } |
184 | |||
185 | #define x2apic_supported() (cpu_has_x2apic) | ||
186 | #else | 186 | #else |
187 | static inline void check_x2apic(void) | 187 | static inline void check_x2apic(void) |
188 | { | 188 | { |
@@ -190,28 +190,20 @@ static inline void check_x2apic(void) | |||
190 | static inline void enable_x2apic(void) | 190 | static inline void enable_x2apic(void) |
191 | { | 191 | { |
192 | } | 192 | } |
193 | static inline void enable_IR_x2apic(void) | ||
194 | { | ||
195 | } | ||
196 | static inline int x2apic_enabled(void) | 193 | static inline int x2apic_enabled(void) |
197 | { | 194 | { |
198 | return 0; | 195 | return 0; |
199 | } | 196 | } |
200 | 197 | ||
201 | #define x2apic 0 | 198 | #define x2apic_preenabled 0 |
202 | 199 | #define x2apic_supported() 0 | |
203 | #endif | 200 | #endif |
204 | 201 | ||
205 | extern int get_physical_broadcast(void); | 202 | extern void enable_IR_x2apic(void); |
206 | 203 | ||
207 | #ifdef CONFIG_X86_X2APIC | 204 | extern int get_physical_broadcast(void); |
208 | static inline void ack_x2APIC_irq(void) | ||
209 | { | ||
210 | /* Docs say use 0 for future compatibility */ | ||
211 | native_apic_msr_write(APIC_EOI, 0); | ||
212 | } | ||
213 | #endif | ||
214 | 205 | ||
206 | extern void apic_disable(void); | ||
215 | extern int lapic_get_maxlvt(void); | 207 | extern int lapic_get_maxlvt(void); |
216 | extern void clear_local_APIC(void); | 208 | extern void clear_local_APIC(void); |
217 | extern void connect_bsp_APIC(void); | 209 | extern void connect_bsp_APIC(void); |
@@ -252,7 +244,7 @@ static inline void lapic_shutdown(void) { } | |||
252 | #define local_apic_timer_c2_ok 1 | 244 | #define local_apic_timer_c2_ok 1 |
253 | static inline void init_apic_mappings(void) { } | 245 | static inline void init_apic_mappings(void) { } |
254 | static inline void disable_local_APIC(void) { } | 246 | static inline void disable_local_APIC(void) { } |
255 | 247 | static inline void apic_disable(void) { } | |
256 | #endif /* !CONFIG_X86_LOCAL_APIC */ | 248 | #endif /* !CONFIG_X86_LOCAL_APIC */ |
257 | 249 | ||
258 | #ifdef CONFIG_X86_64 | 250 | #ifdef CONFIG_X86_64 |
@@ -410,7 +402,7 @@ static inline unsigned default_get_apic_id(unsigned long x) | |||
410 | { | 402 | { |
411 | unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); | 403 | unsigned int ver = GET_APIC_VERSION(apic_read(APIC_LVR)); |
412 | 404 | ||
413 | if (APIC_XAPIC(ver)) | 405 | if (APIC_XAPIC(ver) || boot_cpu_has(X86_FEATURE_EXTD_APICID)) |
414 | return (x >> 24) & 0xFF; | 406 | return (x >> 24) & 0xFF; |
415 | else | 407 | else |
416 | return (x >> 24) & 0x0F; | 408 | return (x >> 24) & 0x0F; |
@@ -478,6 +470,9 @@ static inline unsigned int read_apic_id(void) | |||
478 | extern void default_setup_apic_routing(void); | 470 | extern void default_setup_apic_routing(void); |
479 | 471 | ||
480 | #ifdef CONFIG_X86_32 | 472 | #ifdef CONFIG_X86_32 |
473 | |||
474 | extern struct apic apic_default; | ||
475 | |||
481 | /* | 476 | /* |
482 | * Set up the logical destination ID. | 477 | * Set up the logical destination ID. |
483 | * | 478 | * |
diff --git a/arch/x86/include/asm/apicdef.h b/arch/x86/include/asm/apicdef.h index bc9514fb3b13..7ddb36ab933b 100644 --- a/arch/x86/include/asm/apicdef.h +++ b/arch/x86/include/asm/apicdef.h | |||
@@ -22,6 +22,7 @@ | |||
22 | # define APIC_INTEGRATED(x) (1) | 22 | # define APIC_INTEGRATED(x) (1) |
23 | #endif | 23 | #endif |
24 | #define APIC_XAPIC(x) ((x) >= 0x14) | 24 | #define APIC_XAPIC(x) ((x) >= 0x14) |
25 | #define APIC_EXT_SPACE(x) ((x) & 0x80000000) | ||
25 | #define APIC_TASKPRI 0x80 | 26 | #define APIC_TASKPRI 0x80 |
26 | #define APIC_TPRI_MASK 0xFFu | 27 | #define APIC_TPRI_MASK 0xFFu |
27 | #define APIC_ARBPRI 0x90 | 28 | #define APIC_ARBPRI 0x90 |
@@ -116,7 +117,9 @@ | |||
116 | #define APIC_TDR_DIV_32 0x8 | 117 | #define APIC_TDR_DIV_32 0x8 |
117 | #define APIC_TDR_DIV_64 0x9 | 118 | #define APIC_TDR_DIV_64 0x9 |
118 | #define APIC_TDR_DIV_128 0xA | 119 | #define APIC_TDR_DIV_128 0xA |
119 | #define APIC_EILVT0 0x500 | 120 | #define APIC_EFEAT 0x400 |
121 | #define APIC_ECTRL 0x410 | ||
122 | #define APIC_EILVTn(n) (0x500 + 0x10 * n) | ||
120 | #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ | 123 | #define APIC_EILVT_NR_AMD_K8 1 /* # of extended interrupts */ |
121 | #define APIC_EILVT_NR_AMD_10H 4 | 124 | #define APIC_EILVT_NR_AMD_10H 4 |
122 | #define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) | 125 | #define APIC_EILVT_LVTOFF(x) (((x) >> 4) & 0xF) |
@@ -125,9 +128,6 @@ | |||
125 | #define APIC_EILVT_MSG_NMI 0x4 | 128 | #define APIC_EILVT_MSG_NMI 0x4 |
126 | #define APIC_EILVT_MSG_EXT 0x7 | 129 | #define APIC_EILVT_MSG_EXT 0x7 |
127 | #define APIC_EILVT_MASKED (1 << 16) | 130 | #define APIC_EILVT_MASKED (1 << 16) |
128 | #define APIC_EILVT1 0x510 | ||
129 | #define APIC_EILVT2 0x520 | ||
130 | #define APIC_EILVT3 0x530 | ||
131 | 131 | ||
132 | #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) | 132 | #define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) |
133 | #define APIC_BASE_MSR 0x800 | 133 | #define APIC_BASE_MSR 0x800 |
diff --git a/arch/x86/include/asm/atomic_32.h b/arch/x86/include/asm/atomic_32.h index 85b46fba4229..2503d4e64c2a 100644 --- a/arch/x86/include/asm/atomic_32.h +++ b/arch/x86/include/asm/atomic_32.h | |||
@@ -247,5 +247,240 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) | |||
247 | #define smp_mb__before_atomic_inc() barrier() | 247 | #define smp_mb__before_atomic_inc() barrier() |
248 | #define smp_mb__after_atomic_inc() barrier() | 248 | #define smp_mb__after_atomic_inc() barrier() |
249 | 249 | ||
250 | #include <asm-generic/atomic.h> | 250 | /* An 64bit atomic type */ |
251 | |||
252 | typedef struct { | ||
253 | unsigned long long counter; | ||
254 | } atomic64_t; | ||
255 | |||
256 | #define ATOMIC64_INIT(val) { (val) } | ||
257 | |||
258 | /** | ||
259 | * atomic64_read - read atomic64 variable | ||
260 | * @ptr: pointer of type atomic64_t | ||
261 | * | ||
262 | * Atomically reads the value of @v. | ||
263 | * Doesn't imply a read memory barrier. | ||
264 | */ | ||
265 | #define __atomic64_read(ptr) ((ptr)->counter) | ||
266 | |||
267 | static inline unsigned long long | ||
268 | cmpxchg8b(unsigned long long *ptr, unsigned long long old, unsigned long long new) | ||
269 | { | ||
270 | asm volatile( | ||
271 | |||
272 | LOCK_PREFIX "cmpxchg8b (%[ptr])\n" | ||
273 | |||
274 | : "=A" (old) | ||
275 | |||
276 | : [ptr] "D" (ptr), | ||
277 | "A" (old), | ||
278 | "b" (ll_low(new)), | ||
279 | "c" (ll_high(new)) | ||
280 | |||
281 | : "memory"); | ||
282 | |||
283 | return old; | ||
284 | } | ||
285 | |||
286 | static inline unsigned long long | ||
287 | atomic64_cmpxchg(atomic64_t *ptr, unsigned long long old_val, | ||
288 | unsigned long long new_val) | ||
289 | { | ||
290 | return cmpxchg8b(&ptr->counter, old_val, new_val); | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * atomic64_xchg - xchg atomic64 variable | ||
295 | * @ptr: pointer to type atomic64_t | ||
296 | * @new_val: value to assign | ||
297 | * | ||
298 | * Atomically xchgs the value of @ptr to @new_val and returns | ||
299 | * the old value. | ||
300 | */ | ||
301 | |||
302 | static inline unsigned long long | ||
303 | atomic64_xchg(atomic64_t *ptr, unsigned long long new_val) | ||
304 | { | ||
305 | unsigned long long old_val; | ||
306 | |||
307 | do { | ||
308 | old_val = atomic_read(ptr); | ||
309 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
310 | |||
311 | return old_val; | ||
312 | } | ||
313 | |||
314 | /** | ||
315 | * atomic64_set - set atomic64 variable | ||
316 | * @ptr: pointer to type atomic64_t | ||
317 | * @new_val: value to assign | ||
318 | * | ||
319 | * Atomically sets the value of @ptr to @new_val. | ||
320 | */ | ||
321 | static inline void atomic64_set(atomic64_t *ptr, unsigned long long new_val) | ||
322 | { | ||
323 | atomic64_xchg(ptr, new_val); | ||
324 | } | ||
325 | |||
326 | /** | ||
327 | * atomic64_read - read atomic64 variable | ||
328 | * @ptr: pointer to type atomic64_t | ||
329 | * | ||
330 | * Atomically reads the value of @ptr and returns it. | ||
331 | */ | ||
332 | static inline unsigned long long atomic64_read(atomic64_t *ptr) | ||
333 | { | ||
334 | unsigned long long curr_val; | ||
335 | |||
336 | do { | ||
337 | curr_val = __atomic64_read(ptr); | ||
338 | } while (atomic64_cmpxchg(ptr, curr_val, curr_val) != curr_val); | ||
339 | |||
340 | return curr_val; | ||
341 | } | ||
342 | |||
343 | /** | ||
344 | * atomic64_add_return - add and return | ||
345 | * @delta: integer value to add | ||
346 | * @ptr: pointer to type atomic64_t | ||
347 | * | ||
348 | * Atomically adds @delta to @ptr and returns @delta + *@ptr | ||
349 | */ | ||
350 | static inline unsigned long long | ||
351 | atomic64_add_return(unsigned long long delta, atomic64_t *ptr) | ||
352 | { | ||
353 | unsigned long long old_val, new_val; | ||
354 | |||
355 | do { | ||
356 | old_val = atomic_read(ptr); | ||
357 | new_val = old_val + delta; | ||
358 | |||
359 | } while (atomic64_cmpxchg(ptr, old_val, new_val) != old_val); | ||
360 | |||
361 | return new_val; | ||
362 | } | ||
363 | |||
364 | static inline long atomic64_sub_return(unsigned long long delta, atomic64_t *ptr) | ||
365 | { | ||
366 | return atomic64_add_return(-delta, ptr); | ||
367 | } | ||
368 | |||
369 | static inline long atomic64_inc_return(atomic64_t *ptr) | ||
370 | { | ||
371 | return atomic64_add_return(1, ptr); | ||
372 | } | ||
373 | |||
374 | static inline long atomic64_dec_return(atomic64_t *ptr) | ||
375 | { | ||
376 | return atomic64_sub_return(1, ptr); | ||
377 | } | ||
378 | |||
379 | /** | ||
380 | * atomic64_add - add integer to atomic64 variable | ||
381 | * @delta: integer value to add | ||
382 | * @ptr: pointer to type atomic64_t | ||
383 | * | ||
384 | * Atomically adds @delta to @ptr. | ||
385 | */ | ||
386 | static inline void atomic64_add(unsigned long long delta, atomic64_t *ptr) | ||
387 | { | ||
388 | atomic64_add_return(delta, ptr); | ||
389 | } | ||
390 | |||
391 | /** | ||
392 | * atomic64_sub - subtract the atomic64 variable | ||
393 | * @delta: integer value to subtract | ||
394 | * @ptr: pointer to type atomic64_t | ||
395 | * | ||
396 | * Atomically subtracts @delta from @ptr. | ||
397 | */ | ||
398 | static inline void atomic64_sub(unsigned long long delta, atomic64_t *ptr) | ||
399 | { | ||
400 | atomic64_add(-delta, ptr); | ||
401 | } | ||
402 | |||
403 | /** | ||
404 | * atomic64_sub_and_test - subtract value from variable and test result | ||
405 | * @delta: integer value to subtract | ||
406 | * @ptr: pointer to type atomic64_t | ||
407 | * | ||
408 | * Atomically subtracts @delta from @ptr and returns | ||
409 | * true if the result is zero, or false for all | ||
410 | * other cases. | ||
411 | */ | ||
412 | static inline int | ||
413 | atomic64_sub_and_test(unsigned long long delta, atomic64_t *ptr) | ||
414 | { | ||
415 | unsigned long long old_val = atomic64_sub_return(delta, ptr); | ||
416 | |||
417 | return old_val == 0; | ||
418 | } | ||
419 | |||
420 | /** | ||
421 | * atomic64_inc - increment atomic64 variable | ||
422 | * @ptr: pointer to type atomic64_t | ||
423 | * | ||
424 | * Atomically increments @ptr by 1. | ||
425 | */ | ||
426 | static inline void atomic64_inc(atomic64_t *ptr) | ||
427 | { | ||
428 | atomic64_add(1, ptr); | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * atomic64_dec - decrement atomic64 variable | ||
433 | * @ptr: pointer to type atomic64_t | ||
434 | * | ||
435 | * Atomically decrements @ptr by 1. | ||
436 | */ | ||
437 | static inline void atomic64_dec(atomic64_t *ptr) | ||
438 | { | ||
439 | atomic64_sub(1, ptr); | ||
440 | } | ||
441 | |||
442 | /** | ||
443 | * atomic64_dec_and_test - decrement and test | ||
444 | * @ptr: pointer to type atomic64_t | ||
445 | * | ||
446 | * Atomically decrements @ptr by 1 and | ||
447 | * returns true if the result is 0, or false for all other | ||
448 | * cases. | ||
449 | */ | ||
450 | static inline int atomic64_dec_and_test(atomic64_t *ptr) | ||
451 | { | ||
452 | return atomic64_sub_and_test(1, ptr); | ||
453 | } | ||
454 | |||
455 | /** | ||
456 | * atomic64_inc_and_test - increment and test | ||
457 | * @ptr: pointer to type atomic64_t | ||
458 | * | ||
459 | * Atomically increments @ptr by 1 | ||
460 | * and returns true if the result is zero, or false for all | ||
461 | * other cases. | ||
462 | */ | ||
463 | static inline int atomic64_inc_and_test(atomic64_t *ptr) | ||
464 | { | ||
465 | return atomic64_sub_and_test(-1, ptr); | ||
466 | } | ||
467 | |||
468 | /** | ||
469 | * atomic64_add_negative - add and test if negative | ||
470 | * @delta: integer value to add | ||
471 | * @ptr: pointer to type atomic64_t | ||
472 | * | ||
473 | * Atomically adds @delta to @ptr and returns true | ||
474 | * if the result is negative, or false when | ||
475 | * result is greater than or equal to zero. | ||
476 | */ | ||
477 | static inline int | ||
478 | atomic64_add_negative(unsigned long long delta, atomic64_t *ptr) | ||
479 | { | ||
480 | long long old_val = atomic64_add_return(delta, ptr); | ||
481 | |||
482 | return old_val < 0; | ||
483 | } | ||
484 | |||
485 | #include <asm-generic/atomic-long.h> | ||
251 | #endif /* _ASM_X86_ATOMIC_32_H */ | 486 | #endif /* _ASM_X86_ATOMIC_32_H */ |
diff --git a/arch/x86/include/asm/atomic_64.h b/arch/x86/include/asm/atomic_64.h index 8c21731984da..0d6360220007 100644 --- a/arch/x86/include/asm/atomic_64.h +++ b/arch/x86/include/asm/atomic_64.h | |||
@@ -455,5 +455,5 @@ static inline void atomic_or_long(unsigned long *v1, unsigned long v2) | |||
455 | #define smp_mb__before_atomic_inc() barrier() | 455 | #define smp_mb__before_atomic_inc() barrier() |
456 | #define smp_mb__after_atomic_inc() barrier() | 456 | #define smp_mb__after_atomic_inc() barrier() |
457 | 457 | ||
458 | #include <asm-generic/atomic.h> | 458 | #include <asm-generic/atomic-long.h> |
459 | #endif /* _ASM_X86_ATOMIC_64_H */ | 459 | #endif /* _ASM_X86_ATOMIC_64_H */ |
diff --git a/arch/x86/include/asm/bitsperlong.h b/arch/x86/include/asm/bitsperlong.h new file mode 100644 index 000000000000..b0ae1c4dc791 --- /dev/null +++ b/arch/x86/include/asm/bitsperlong.h | |||
@@ -0,0 +1,13 @@ | |||
1 | #ifndef __ASM_X86_BITSPERLONG_H | ||
2 | #define __ASM_X86_BITSPERLONG_H | ||
3 | |||
4 | #ifdef __x86_64__ | ||
5 | # define __BITS_PER_LONG 64 | ||
6 | #else | ||
7 | # define __BITS_PER_LONG 32 | ||
8 | #endif | ||
9 | |||
10 | #include <asm-generic/bitsperlong.h> | ||
11 | |||
12 | #endif /* __ASM_X86_BITSPERLONG_H */ | ||
13 | |||
diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 6ba23dd9fc92..418e632d4a80 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h | |||
@@ -8,11 +8,26 @@ | |||
8 | 8 | ||
9 | #ifdef __KERNEL__ | 9 | #ifdef __KERNEL__ |
10 | 10 | ||
11 | #include <asm/page_types.h> | ||
12 | |||
11 | /* Physical address where kernel should be loaded. */ | 13 | /* Physical address where kernel should be loaded. */ |
12 | #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ | 14 | #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \ |
13 | + (CONFIG_PHYSICAL_ALIGN - 1)) \ | 15 | + (CONFIG_PHYSICAL_ALIGN - 1)) \ |
14 | & ~(CONFIG_PHYSICAL_ALIGN - 1)) | 16 | & ~(CONFIG_PHYSICAL_ALIGN - 1)) |
15 | 17 | ||
18 | /* Minimum kernel alignment, as a power of two */ | ||
19 | #ifdef CONFIG_x86_64 | ||
20 | #define MIN_KERNEL_ALIGN_LG2 PMD_SHIFT | ||
21 | #else | ||
22 | #define MIN_KERNEL_ALIGN_LG2 (PAGE_SHIFT+1) | ||
23 | #endif | ||
24 | #define MIN_KERNEL_ALIGN (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2) | ||
25 | |||
26 | #if (CONFIG_PHYSICAL_ALIGN & (CONFIG_PHYSICAL_ALIGN-1)) || \ | ||
27 | (CONFIG_PHYSICAL_ALIGN < (_AC(1, UL) << MIN_KERNEL_ALIGN_LG2)) | ||
28 | #error "Invalid value for CONFIG_PHYSICAL_ALIGN" | ||
29 | #endif | ||
30 | |||
16 | #ifdef CONFIG_KERNEL_BZIP2 | 31 | #ifdef CONFIG_KERNEL_BZIP2 |
17 | #define BOOT_HEAP_SIZE 0x400000 | 32 | #define BOOT_HEAP_SIZE 0x400000 |
18 | #else /* !CONFIG_KERNEL_BZIP2 */ | 33 | #else /* !CONFIG_KERNEL_BZIP2 */ |
diff --git a/arch/x86/include/asm/bootparam.h b/arch/x86/include/asm/bootparam.h index 433adaebf9b6..1724e8de317c 100644 --- a/arch/x86/include/asm/bootparam.h +++ b/arch/x86/include/asm/bootparam.h | |||
@@ -50,7 +50,8 @@ struct setup_header { | |||
50 | __u32 ramdisk_size; | 50 | __u32 ramdisk_size; |
51 | __u32 bootsect_kludge; | 51 | __u32 bootsect_kludge; |
52 | __u16 heap_end_ptr; | 52 | __u16 heap_end_ptr; |
53 | __u16 _pad1; | 53 | __u8 ext_loader_ver; |
54 | __u8 ext_loader_type; | ||
54 | __u32 cmd_line_ptr; | 55 | __u32 cmd_line_ptr; |
55 | __u32 initrd_addr_max; | 56 | __u32 initrd_addr_max; |
56 | __u32 kernel_alignment; | 57 | __u32 kernel_alignment; |
diff --git a/arch/x86/include/asm/cpu_debug.h b/arch/x86/include/asm/cpu_debug.h index 222802029fa6..d96c1ee3a95c 100644 --- a/arch/x86/include/asm/cpu_debug.h +++ b/arch/x86/include/asm/cpu_debug.h | |||
@@ -86,105 +86,7 @@ enum cpu_file_bit { | |||
86 | CPU_VALUE_BIT, /* value */ | 86 | CPU_VALUE_BIT, /* value */ |
87 | }; | 87 | }; |
88 | 88 | ||
89 | #define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) | 89 | #define CPU_FILE_VALUE (1 << CPU_VALUE_BIT) |
90 | |||
91 | /* | ||
92 | * DisplayFamily_DisplayModel Processor Families/Processor Number Series | ||
93 | * -------------------------- ------------------------------------------ | ||
94 | * 05_01, 05_02, 05_04 Pentium, Pentium with MMX | ||
95 | * | ||
96 | * 06_01 Pentium Pro | ||
97 | * 06_03, 06_05 Pentium II Xeon, Pentium II | ||
98 | * 06_07, 06_08, 06_0A, 06_0B Pentium III Xeon, Pentum III | ||
99 | * | ||
100 | * 06_09, 060D Pentium M | ||
101 | * | ||
102 | * 06_0E Core Duo, Core Solo | ||
103 | * | ||
104 | * 06_0F Xeon 3000, 3200, 5100, 5300, 7300 series, | ||
105 | * Core 2 Quad, Core 2 Extreme, Core 2 Duo, | ||
106 | * Pentium dual-core | ||
107 | * 06_17 Xeon 5200, 5400 series, Core 2 Quad Q9650 | ||
108 | * | ||
109 | * 06_1C Atom | ||
110 | * | ||
111 | * 0F_00, 0F_01, 0F_02 Xeon, Xeon MP, Pentium 4 | ||
112 | * 0F_03, 0F_04 Xeon, Xeon MP, Pentium 4, Pentium D | ||
113 | * | ||
114 | * 0F_06 Xeon 7100, 5000 Series, Xeon MP, | ||
115 | * Pentium 4, Pentium D | ||
116 | */ | ||
117 | |||
118 | /* Register processors bits */ | ||
119 | enum cpu_processor_bit { | ||
120 | CPU_NONE, | ||
121 | /* Intel */ | ||
122 | CPU_INTEL_PENTIUM_BIT, | ||
123 | CPU_INTEL_P6_BIT, | ||
124 | CPU_INTEL_PENTIUM_M_BIT, | ||
125 | CPU_INTEL_CORE_BIT, | ||
126 | CPU_INTEL_CORE2_BIT, | ||
127 | CPU_INTEL_ATOM_BIT, | ||
128 | CPU_INTEL_XEON_P4_BIT, | ||
129 | CPU_INTEL_XEON_MP_BIT, | ||
130 | /* AMD */ | ||
131 | CPU_AMD_K6_BIT, | ||
132 | CPU_AMD_K7_BIT, | ||
133 | CPU_AMD_K8_BIT, | ||
134 | CPU_AMD_0F_BIT, | ||
135 | CPU_AMD_10_BIT, | ||
136 | CPU_AMD_11_BIT, | ||
137 | }; | ||
138 | |||
139 | #define CPU_INTEL_PENTIUM (1 << CPU_INTEL_PENTIUM_BIT) | ||
140 | #define CPU_INTEL_P6 (1 << CPU_INTEL_P6_BIT) | ||
141 | #define CPU_INTEL_PENTIUM_M (1 << CPU_INTEL_PENTIUM_M_BIT) | ||
142 | #define CPU_INTEL_CORE (1 << CPU_INTEL_CORE_BIT) | ||
143 | #define CPU_INTEL_CORE2 (1 << CPU_INTEL_CORE2_BIT) | ||
144 | #define CPU_INTEL_ATOM (1 << CPU_INTEL_ATOM_BIT) | ||
145 | #define CPU_INTEL_XEON_P4 (1 << CPU_INTEL_XEON_P4_BIT) | ||
146 | #define CPU_INTEL_XEON_MP (1 << CPU_INTEL_XEON_MP_BIT) | ||
147 | |||
148 | #define CPU_INTEL_PX (CPU_INTEL_P6 | CPU_INTEL_PENTIUM_M) | ||
149 | #define CPU_INTEL_COREX (CPU_INTEL_CORE | CPU_INTEL_CORE2) | ||
150 | #define CPU_INTEL_XEON (CPU_INTEL_XEON_P4 | CPU_INTEL_XEON_MP) | ||
151 | #define CPU_CO_AT (CPU_INTEL_CORE | CPU_INTEL_ATOM) | ||
152 | #define CPU_C2_AT (CPU_INTEL_CORE2 | CPU_INTEL_ATOM) | ||
153 | #define CPU_CX_AT (CPU_INTEL_COREX | CPU_INTEL_ATOM) | ||
154 | #define CPU_CX_XE (CPU_INTEL_COREX | CPU_INTEL_XEON) | ||
155 | #define CPU_P6_XE (CPU_INTEL_P6 | CPU_INTEL_XEON) | ||
156 | #define CPU_PM_CO_AT (CPU_INTEL_PENTIUM_M | CPU_CO_AT) | ||
157 | #define CPU_C2_AT_XE (CPU_C2_AT | CPU_INTEL_XEON) | ||
158 | #define CPU_CX_AT_XE (CPU_CX_AT | CPU_INTEL_XEON) | ||
159 | #define CPU_P6_CX_AT (CPU_INTEL_P6 | CPU_CX_AT) | ||
160 | #define CPU_P6_CX_XE (CPU_P6_XE | CPU_INTEL_COREX) | ||
161 | #define CPU_P6_CX_AT_XE (CPU_INTEL_P6 | CPU_CX_AT_XE) | ||
162 | #define CPU_PM_CX_AT_XE (CPU_INTEL_PENTIUM_M | CPU_CX_AT_XE) | ||
163 | #define CPU_PM_CX_AT (CPU_INTEL_PENTIUM_M | CPU_CX_AT) | ||
164 | #define CPU_PM_CX_XE (CPU_INTEL_PENTIUM_M | CPU_CX_XE) | ||
165 | #define CPU_PX_CX_AT (CPU_INTEL_PX | CPU_CX_AT) | ||
166 | #define CPU_PX_CX_AT_XE (CPU_INTEL_PX | CPU_CX_AT_XE) | ||
167 | |||
168 | /* Select all supported Intel CPUs */ | ||
169 | #define CPU_INTEL_ALL (CPU_INTEL_PENTIUM | CPU_PX_CX_AT_XE) | ||
170 | |||
171 | #define CPU_AMD_K6 (1 << CPU_AMD_K6_BIT) | ||
172 | #define CPU_AMD_K7 (1 << CPU_AMD_K7_BIT) | ||
173 | #define CPU_AMD_K8 (1 << CPU_AMD_K8_BIT) | ||
174 | #define CPU_AMD_0F (1 << CPU_AMD_0F_BIT) | ||
175 | #define CPU_AMD_10 (1 << CPU_AMD_10_BIT) | ||
176 | #define CPU_AMD_11 (1 << CPU_AMD_11_BIT) | ||
177 | |||
178 | #define CPU_K10_PLUS (CPU_AMD_10 | CPU_AMD_11) | ||
179 | #define CPU_K0F_PLUS (CPU_AMD_0F | CPU_K10_PLUS) | ||
180 | #define CPU_K8_PLUS (CPU_AMD_K8 | CPU_K0F_PLUS) | ||
181 | #define CPU_K7_PLUS (CPU_AMD_K7 | CPU_K8_PLUS) | ||
182 | |||
183 | /* Select all supported AMD CPUs */ | ||
184 | #define CPU_AMD_ALL (CPU_AMD_K6 | CPU_K7_PLUS) | ||
185 | |||
186 | /* Select all supported CPUs */ | ||
187 | #define CPU_ALL (CPU_INTEL_ALL | CPU_AMD_ALL) | ||
188 | 90 | ||
189 | #define MAX_CPU_FILES 512 | 91 | #define MAX_CPU_FILES 512 |
190 | 92 | ||
@@ -220,7 +122,6 @@ struct cpu_debug_range { | |||
220 | unsigned min; /* Register range min */ | 122 | unsigned min; /* Register range min */ |
221 | unsigned max; /* Register range max */ | 123 | unsigned max; /* Register range max */ |
222 | unsigned flag; /* Supported flags */ | 124 | unsigned flag; /* Supported flags */ |
223 | unsigned model; /* Supported models */ | ||
224 | }; | 125 | }; |
225 | 126 | ||
226 | #endif /* _ASM_X86_CPU_DEBUG_H */ | 127 | #endif /* _ASM_X86_CPU_DEBUG_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index bb83b1c397aa..4a28d22d4793 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -22,7 +22,7 @@ | |||
22 | #define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ | 22 | #define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ |
23 | #define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ | 23 | #define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */ |
24 | #define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ | 24 | #define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ |
25 | #define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ | 25 | #define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Exception */ |
26 | #define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ | 26 | #define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ |
27 | #define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ | 27 | #define X86_FEATURE_APIC (0*32+ 9) /* Onboard APIC */ |
28 | #define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ | 28 | #define X86_FEATURE_SEP (0*32+11) /* SYSENTER/SYSEXIT */ |
@@ -94,6 +94,7 @@ | |||
94 | #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ | 94 | #define X86_FEATURE_TSC_RELIABLE (3*32+23) /* TSC is known to be reliable */ |
95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ | 95 | #define X86_FEATURE_NONSTOP_TSC (3*32+24) /* TSC does not stop in C states */ |
96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ | 96 | #define X86_FEATURE_CLFLUSH_MONITOR (3*32+25) /* "" clflush reqd with monitor */ |
97 | #define X86_FEATURE_EXTD_APICID (3*32+26) /* has extended APICID (8 bits) */ | ||
97 | 98 | ||
98 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ | 99 | /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ |
99 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ | 100 | #define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */ |
@@ -115,6 +116,8 @@ | |||
115 | #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ | 116 | #define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */ |
116 | #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ | 117 | #define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */ |
117 | #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ | 118 | #define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */ |
119 | #define X86_FEATURE_MOVBE (4*32+22) /* MOVBE instruction */ | ||
120 | #define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */ | ||
118 | #define X86_FEATURE_AES (4*32+25) /* AES instructions */ | 121 | #define X86_FEATURE_AES (4*32+25) /* AES instructions */ |
119 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ | 122 | #define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ |
120 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ | 123 | #define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */ |
@@ -192,11 +195,11 @@ extern const char * const x86_power_flags[32]; | |||
192 | #define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) | 195 | #define clear_cpu_cap(c, bit) clear_bit(bit, (unsigned long *)((c)->x86_capability)) |
193 | #define setup_clear_cpu_cap(bit) do { \ | 196 | #define setup_clear_cpu_cap(bit) do { \ |
194 | clear_cpu_cap(&boot_cpu_data, bit); \ | 197 | clear_cpu_cap(&boot_cpu_data, bit); \ |
195 | set_bit(bit, (unsigned long *)cleared_cpu_caps); \ | 198 | set_bit(bit, (unsigned long *)cpu_caps_cleared); \ |
196 | } while (0) | 199 | } while (0) |
197 | #define setup_force_cpu_cap(bit) do { \ | 200 | #define setup_force_cpu_cap(bit) do { \ |
198 | set_cpu_cap(&boot_cpu_data, bit); \ | 201 | set_cpu_cap(&boot_cpu_data, bit); \ |
199 | clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ | 202 | set_bit(bit, (unsigned long *)cpu_caps_set); \ |
200 | } while (0) | 203 | } while (0) |
201 | 204 | ||
202 | #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) | 205 | #define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) |
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index c45f415ce315..c993e9e0fed4 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_DESC_H | 1 | #ifndef _ASM_X86_DESC_H |
2 | #define _ASM_X86_DESC_H | 2 | #define _ASM_X86_DESC_H |
3 | 3 | ||
4 | #ifndef __ASSEMBLY__ | ||
5 | #include <asm/desc_defs.h> | 4 | #include <asm/desc_defs.h> |
6 | #include <asm/ldt.h> | 5 | #include <asm/ldt.h> |
7 | #include <asm/mmu.h> | 6 | #include <asm/mmu.h> |
@@ -380,29 +379,4 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist) | |||
380 | _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); | 379 | _set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS); |
381 | } | 380 | } |
382 | 381 | ||
383 | #else | ||
384 | /* | ||
385 | * GET_DESC_BASE reads the descriptor base of the specified segment. | ||
386 | * | ||
387 | * Args: | ||
388 | * idx - descriptor index | ||
389 | * gdt - GDT pointer | ||
390 | * base - 32bit register to which the base will be written | ||
391 | * lo_w - lo word of the "base" register | ||
392 | * lo_b - lo byte of the "base" register | ||
393 | * hi_b - hi byte of the low word of the "base" register | ||
394 | * | ||
395 | * Example: | ||
396 | * GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | ||
397 | * Will read the base address of GDT_ENTRY_ESPFIX_SS and put it into %eax. | ||
398 | */ | ||
399 | #define GET_DESC_BASE(idx, gdt, base, lo_w, lo_b, hi_b) \ | ||
400 | movb idx * 8 + 4(gdt), lo_b; \ | ||
401 | movb idx * 8 + 7(gdt), hi_b; \ | ||
402 | shll $16, base; \ | ||
403 | movw idx * 8 + 2(gdt), lo_w; | ||
404 | |||
405 | |||
406 | #endif /* __ASSEMBLY__ */ | ||
407 | |||
408 | #endif /* _ASM_X86_DESC_H */ | 382 | #endif /* _ASM_X86_DESC_H */ |
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index f82fdc412c64..1c3f9435f1c9 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h | |||
@@ -6,6 +6,7 @@ | |||
6 | * Documentation/DMA-API.txt for documentation. | 6 | * Documentation/DMA-API.txt for documentation. |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/kmemcheck.h> | ||
9 | #include <linux/scatterlist.h> | 10 | #include <linux/scatterlist.h> |
10 | #include <linux/dma-debug.h> | 11 | #include <linux/dma-debug.h> |
11 | #include <linux/dma-attrs.h> | 12 | #include <linux/dma-attrs.h> |
@@ -32,6 +33,8 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) | |||
32 | #endif | 33 | #endif |
33 | } | 34 | } |
34 | 35 | ||
36 | #include <asm-generic/dma-mapping-common.h> | ||
37 | |||
35 | /* Make sure we keep the same behaviour */ | 38 | /* Make sure we keep the same behaviour */ |
36 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) | 39 | static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) |
37 | { | 40 | { |
@@ -52,171 +55,6 @@ extern int dma_set_mask(struct device *dev, u64 mask); | |||
52 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, | 55 | extern void *dma_generic_alloc_coherent(struct device *dev, size_t size, |
53 | dma_addr_t *dma_addr, gfp_t flag); | 56 | dma_addr_t *dma_addr, gfp_t flag); |
54 | 57 | ||
55 | static inline dma_addr_t | ||
56 | dma_map_single(struct device *hwdev, void *ptr, size_t size, | ||
57 | enum dma_data_direction dir) | ||
58 | { | ||
59 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
60 | dma_addr_t addr; | ||
61 | |||
62 | BUG_ON(!valid_dma_direction(dir)); | ||
63 | addr = ops->map_page(hwdev, virt_to_page(ptr), | ||
64 | (unsigned long)ptr & ~PAGE_MASK, size, | ||
65 | dir, NULL); | ||
66 | debug_dma_map_page(hwdev, virt_to_page(ptr), | ||
67 | (unsigned long)ptr & ~PAGE_MASK, size, | ||
68 | dir, addr, true); | ||
69 | return addr; | ||
70 | } | ||
71 | |||
72 | static inline void | ||
73 | dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, | ||
74 | enum dma_data_direction dir) | ||
75 | { | ||
76 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
77 | |||
78 | BUG_ON(!valid_dma_direction(dir)); | ||
79 | if (ops->unmap_page) | ||
80 | ops->unmap_page(dev, addr, size, dir, NULL); | ||
81 | debug_dma_unmap_page(dev, addr, size, dir, true); | ||
82 | } | ||
83 | |||
84 | static inline int | ||
85 | dma_map_sg(struct device *hwdev, struct scatterlist *sg, | ||
86 | int nents, enum dma_data_direction dir) | ||
87 | { | ||
88 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
89 | int ents; | ||
90 | |||
91 | BUG_ON(!valid_dma_direction(dir)); | ||
92 | ents = ops->map_sg(hwdev, sg, nents, dir, NULL); | ||
93 | debug_dma_map_sg(hwdev, sg, nents, ents, dir); | ||
94 | |||
95 | return ents; | ||
96 | } | ||
97 | |||
98 | static inline void | ||
99 | dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, | ||
100 | enum dma_data_direction dir) | ||
101 | { | ||
102 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
103 | |||
104 | BUG_ON(!valid_dma_direction(dir)); | ||
105 | debug_dma_unmap_sg(hwdev, sg, nents, dir); | ||
106 | if (ops->unmap_sg) | ||
107 | ops->unmap_sg(hwdev, sg, nents, dir, NULL); | ||
108 | } | ||
109 | |||
110 | static inline void | ||
111 | dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, | ||
112 | size_t size, enum dma_data_direction dir) | ||
113 | { | ||
114 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
115 | |||
116 | BUG_ON(!valid_dma_direction(dir)); | ||
117 | if (ops->sync_single_for_cpu) | ||
118 | ops->sync_single_for_cpu(hwdev, dma_handle, size, dir); | ||
119 | debug_dma_sync_single_for_cpu(hwdev, dma_handle, size, dir); | ||
120 | flush_write_buffers(); | ||
121 | } | ||
122 | |||
123 | static inline void | ||
124 | dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, | ||
125 | size_t size, enum dma_data_direction dir) | ||
126 | { | ||
127 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
128 | |||
129 | BUG_ON(!valid_dma_direction(dir)); | ||
130 | if (ops->sync_single_for_device) | ||
131 | ops->sync_single_for_device(hwdev, dma_handle, size, dir); | ||
132 | debug_dma_sync_single_for_device(hwdev, dma_handle, size, dir); | ||
133 | flush_write_buffers(); | ||
134 | } | ||
135 | |||
136 | static inline void | ||
137 | dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, | ||
138 | unsigned long offset, size_t size, | ||
139 | enum dma_data_direction dir) | ||
140 | { | ||
141 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
142 | |||
143 | BUG_ON(!valid_dma_direction(dir)); | ||
144 | if (ops->sync_single_range_for_cpu) | ||
145 | ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, | ||
146 | size, dir); | ||
147 | debug_dma_sync_single_range_for_cpu(hwdev, dma_handle, | ||
148 | offset, size, dir); | ||
149 | flush_write_buffers(); | ||
150 | } | ||
151 | |||
152 | static inline void | ||
153 | dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, | ||
154 | unsigned long offset, size_t size, | ||
155 | enum dma_data_direction dir) | ||
156 | { | ||
157 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
158 | |||
159 | BUG_ON(!valid_dma_direction(dir)); | ||
160 | if (ops->sync_single_range_for_device) | ||
161 | ops->sync_single_range_for_device(hwdev, dma_handle, | ||
162 | offset, size, dir); | ||
163 | debug_dma_sync_single_range_for_device(hwdev, dma_handle, | ||
164 | offset, size, dir); | ||
165 | flush_write_buffers(); | ||
166 | } | ||
167 | |||
168 | static inline void | ||
169 | dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, | ||
170 | int nelems, enum dma_data_direction dir) | ||
171 | { | ||
172 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
173 | |||
174 | BUG_ON(!valid_dma_direction(dir)); | ||
175 | if (ops->sync_sg_for_cpu) | ||
176 | ops->sync_sg_for_cpu(hwdev, sg, nelems, dir); | ||
177 | debug_dma_sync_sg_for_cpu(hwdev, sg, nelems, dir); | ||
178 | flush_write_buffers(); | ||
179 | } | ||
180 | |||
181 | static inline void | ||
182 | dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, | ||
183 | int nelems, enum dma_data_direction dir) | ||
184 | { | ||
185 | struct dma_map_ops *ops = get_dma_ops(hwdev); | ||
186 | |||
187 | BUG_ON(!valid_dma_direction(dir)); | ||
188 | if (ops->sync_sg_for_device) | ||
189 | ops->sync_sg_for_device(hwdev, sg, nelems, dir); | ||
190 | debug_dma_sync_sg_for_device(hwdev, sg, nelems, dir); | ||
191 | |||
192 | flush_write_buffers(); | ||
193 | } | ||
194 | |||
195 | static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, | ||
196 | size_t offset, size_t size, | ||
197 | enum dma_data_direction dir) | ||
198 | { | ||
199 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
200 | dma_addr_t addr; | ||
201 | |||
202 | BUG_ON(!valid_dma_direction(dir)); | ||
203 | addr = ops->map_page(dev, page, offset, size, dir, NULL); | ||
204 | debug_dma_map_page(dev, page, offset, size, dir, addr, false); | ||
205 | |||
206 | return addr; | ||
207 | } | ||
208 | |||
209 | static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, | ||
210 | size_t size, enum dma_data_direction dir) | ||
211 | { | ||
212 | struct dma_map_ops *ops = get_dma_ops(dev); | ||
213 | |||
214 | BUG_ON(!valid_dma_direction(dir)); | ||
215 | if (ops->unmap_page) | ||
216 | ops->unmap_page(dev, addr, size, dir, NULL); | ||
217 | debug_dma_unmap_page(dev, addr, size, dir, false); | ||
218 | } | ||
219 | |||
220 | static inline void | 58 | static inline void |
221 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, | 59 | dma_cache_sync(struct device *dev, void *vaddr, size_t size, |
222 | enum dma_data_direction dir) | 60 | enum dma_data_direction dir) |
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h index a8f672ba100c..70dac199b093 100644 --- a/arch/x86/include/asm/ds.h +++ b/arch/x86/include/asm/ds.h | |||
@@ -15,8 +15,8 @@ | |||
15 | * - buffer allocation (memory accounting) | 15 | * - buffer allocation (memory accounting) |
16 | * | 16 | * |
17 | * | 17 | * |
18 | * Copyright (C) 2007-2008 Intel Corporation. | 18 | * Copyright (C) 2007-2009 Intel Corporation. |
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2008 | 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 |
20 | */ | 20 | */ |
21 | 21 | ||
22 | #ifndef _ASM_X86_DS_H | 22 | #ifndef _ASM_X86_DS_H |
@@ -83,8 +83,10 @@ enum ds_feature { | |||
83 | * The interrupt threshold is independent from the overflow callback | 83 | * The interrupt threshold is independent from the overflow callback |
84 | * to allow users to use their own overflow interrupt handling mechanism. | 84 | * to allow users to use their own overflow interrupt handling mechanism. |
85 | * | 85 | * |
86 | * task: the task to request recording for; | 86 | * The function might sleep. |
87 | * NULL for per-cpu recording on the current cpu | 87 | * |
88 | * task: the task to request recording for | ||
89 | * cpu: the cpu to request recording for | ||
88 | * base: the base pointer for the (non-pageable) buffer; | 90 | * base: the base pointer for the (non-pageable) buffer; |
89 | * size: the size of the provided buffer in bytes | 91 | * size: the size of the provided buffer in bytes |
90 | * ovfl: pointer to a function to be called on buffer overflow; | 92 | * ovfl: pointer to a function to be called on buffer overflow; |
@@ -93,19 +95,28 @@ enum ds_feature { | |||
93 | * -1 if no interrupt threshold is requested. | 95 | * -1 if no interrupt threshold is requested. |
94 | * flags: a bit-mask of the above flags | 96 | * flags: a bit-mask of the above flags |
95 | */ | 97 | */ |
96 | extern struct bts_tracer *ds_request_bts(struct task_struct *task, | 98 | extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, |
97 | void *base, size_t size, | 99 | void *base, size_t size, |
98 | bts_ovfl_callback_t ovfl, | 100 | bts_ovfl_callback_t ovfl, |
99 | size_t th, unsigned int flags); | 101 | size_t th, unsigned int flags); |
100 | extern struct pebs_tracer *ds_request_pebs(struct task_struct *task, | 102 | extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, |
101 | void *base, size_t size, | 103 | bts_ovfl_callback_t ovfl, |
102 | pebs_ovfl_callback_t ovfl, | 104 | size_t th, unsigned int flags); |
103 | size_t th, unsigned int flags); | 105 | extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, |
106 | void *base, size_t size, | ||
107 | pebs_ovfl_callback_t ovfl, | ||
108 | size_t th, unsigned int flags); | ||
109 | extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, | ||
110 | void *base, size_t size, | ||
111 | pebs_ovfl_callback_t ovfl, | ||
112 | size_t th, unsigned int flags); | ||
104 | 113 | ||
105 | /* | 114 | /* |
106 | * Release BTS or PEBS resources | 115 | * Release BTS or PEBS resources |
107 | * Suspend and resume BTS or PEBS tracing | 116 | * Suspend and resume BTS or PEBS tracing |
108 | * | 117 | * |
118 | * Must be called with irq's enabled. | ||
119 | * | ||
109 | * tracer: the tracer handle returned from ds_request_~() | 120 | * tracer: the tracer handle returned from ds_request_~() |
110 | */ | 121 | */ |
111 | extern void ds_release_bts(struct bts_tracer *tracer); | 122 | extern void ds_release_bts(struct bts_tracer *tracer); |
@@ -115,6 +126,28 @@ extern void ds_release_pebs(struct pebs_tracer *tracer); | |||
115 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); | 126 | extern void ds_suspend_pebs(struct pebs_tracer *tracer); |
116 | extern void ds_resume_pebs(struct pebs_tracer *tracer); | 127 | extern void ds_resume_pebs(struct pebs_tracer *tracer); |
117 | 128 | ||
129 | /* | ||
130 | * Release BTS or PEBS resources | ||
131 | * Suspend and resume BTS or PEBS tracing | ||
132 | * | ||
133 | * Cpu tracers must call this on the traced cpu. | ||
134 | * Task tracers must call ds_release_~_noirq() for themselves. | ||
135 | * | ||
136 | * May be called with irq's disabled. | ||
137 | * | ||
138 | * Returns 0 if successful; | ||
139 | * -EPERM if the cpu tracer does not trace the current cpu. | ||
140 | * -EPERM if the task tracer does not trace itself. | ||
141 | * | ||
142 | * tracer: the tracer handle returned from ds_request_~() | ||
143 | */ | ||
144 | extern int ds_release_bts_noirq(struct bts_tracer *tracer); | ||
145 | extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); | ||
146 | extern int ds_resume_bts_noirq(struct bts_tracer *tracer); | ||
147 | extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); | ||
148 | extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); | ||
149 | extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); | ||
150 | |||
118 | 151 | ||
119 | /* | 152 | /* |
120 | * The raw DS buffer state as it is used for BTS and PEBS recording. | 153 | * The raw DS buffer state as it is used for BTS and PEBS recording. |
@@ -170,9 +203,9 @@ struct bts_struct { | |||
170 | } lbr; | 203 | } lbr; |
171 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ | 204 | /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ |
172 | struct { | 205 | struct { |
173 | __u64 jiffies; | 206 | __u64 clock; |
174 | pid_t pid; | 207 | pid_t pid; |
175 | } timestamp; | 208 | } event; |
176 | } variant; | 209 | } variant; |
177 | }; | 210 | }; |
178 | 211 | ||
@@ -201,8 +234,12 @@ struct bts_trace { | |||
201 | struct pebs_trace { | 234 | struct pebs_trace { |
202 | struct ds_trace ds; | 235 | struct ds_trace ds; |
203 | 236 | ||
204 | /* the PEBS reset value */ | 237 | /* the number of valid counters in the below array */ |
205 | unsigned long long reset_value; | 238 | unsigned int counters; |
239 | |||
240 | #define MAX_PEBS_COUNTERS 4 | ||
241 | /* the counter reset value */ | ||
242 | unsigned long long counter_reset[MAX_PEBS_COUNTERS]; | ||
206 | }; | 243 | }; |
207 | 244 | ||
208 | 245 | ||
@@ -237,9 +274,11 @@ extern int ds_reset_pebs(struct pebs_tracer *tracer); | |||
237 | * Returns 0 on success; -Eerrno on error | 274 | * Returns 0 on success; -Eerrno on error |
238 | * | 275 | * |
239 | * tracer: the tracer handle returned from ds_request_pebs() | 276 | * tracer: the tracer handle returned from ds_request_pebs() |
277 | * counter: the index of the counter | ||
240 | * value: the new counter reset value | 278 | * value: the new counter reset value |
241 | */ | 279 | */ |
242 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value); | 280 | extern int ds_set_pebs_reset(struct pebs_tracer *tracer, |
281 | unsigned int counter, u64 value); | ||
243 | 282 | ||
244 | /* | 283 | /* |
245 | * Initialization | 284 | * Initialization |
@@ -252,21 +291,12 @@ extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); | |||
252 | */ | 291 | */ |
253 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); | 292 | extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); |
254 | 293 | ||
255 | /* | ||
256 | * Task clone/init and cleanup work | ||
257 | */ | ||
258 | extern void ds_copy_thread(struct task_struct *tsk, struct task_struct *father); | ||
259 | extern void ds_exit_thread(struct task_struct *tsk); | ||
260 | |||
261 | #else /* CONFIG_X86_DS */ | 294 | #else /* CONFIG_X86_DS */ |
262 | 295 | ||
263 | struct cpuinfo_x86; | 296 | struct cpuinfo_x86; |
264 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} | 297 | static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} |
265 | static inline void ds_switch_to(struct task_struct *prev, | 298 | static inline void ds_switch_to(struct task_struct *prev, |
266 | struct task_struct *next) {} | 299 | struct task_struct *next) {} |
267 | static inline void ds_copy_thread(struct task_struct *tsk, | ||
268 | struct task_struct *father) {} | ||
269 | static inline void ds_exit_thread(struct task_struct *tsk) {} | ||
270 | 300 | ||
271 | #endif /* CONFIG_X86_DS */ | 301 | #endif /* CONFIG_X86_DS */ |
272 | #endif /* _ASM_X86_DS_H */ | 302 | #endif /* _ASM_X86_DS_H */ |
diff --git a/arch/x86/include/asm/entry_arch.h b/arch/x86/include/asm/entry_arch.h index c2e6bedaf258..ff8cbfa07851 100644 --- a/arch/x86/include/asm/entry_arch.h +++ b/arch/x86/include/asm/entry_arch.h | |||
@@ -14,6 +14,7 @@ BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) | |||
14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) | 14 | BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) |
15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) | 15 | BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) |
16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) | 16 | BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) |
17 | BUILD_INTERRUPT(reboot_interrupt,REBOOT_VECTOR) | ||
17 | 18 | ||
18 | BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, | 19 | BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0, |
19 | smp_invalidate_interrupt) | 20 | smp_invalidate_interrupt) |
@@ -49,11 +50,19 @@ BUILD_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) | |||
49 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) | 50 | BUILD_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) |
50 | 51 | ||
51 | #ifdef CONFIG_PERF_COUNTERS | 52 | #ifdef CONFIG_PERF_COUNTERS |
52 | BUILD_INTERRUPT(perf_counter_interrupt, LOCAL_PERF_VECTOR) | 53 | BUILD_INTERRUPT(perf_pending_interrupt, LOCAL_PENDING_VECTOR) |
53 | #endif | 54 | #endif |
54 | 55 | ||
55 | #ifdef CONFIG_X86_MCE_P4THERMAL | 56 | #ifdef CONFIG_X86_THERMAL_VECTOR |
56 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) | 57 | BUILD_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) |
57 | #endif | 58 | #endif |
58 | 59 | ||
60 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
61 | BUILD_INTERRUPT(threshold_interrupt,THRESHOLD_APIC_VECTOR) | ||
62 | #endif | ||
63 | |||
64 | #ifdef CONFIG_X86_NEW_MCE | ||
65 | BUILD_INTERRUPT(mce_self_interrupt,MCE_SELF_VECTOR) | ||
66 | #endif | ||
67 | |||
59 | #endif | 68 | #endif |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 37555e52f980..82e3e8f01043 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -13,6 +13,8 @@ typedef struct { | |||
13 | unsigned int irq_spurious_count; | 13 | unsigned int irq_spurious_count; |
14 | #endif | 14 | #endif |
15 | unsigned int generic_irqs; /* arch dependent */ | 15 | unsigned int generic_irqs; /* arch dependent */ |
16 | unsigned int apic_perf_irqs; | ||
17 | unsigned int apic_pending_irqs; | ||
16 | #ifdef CONFIG_SMP | 18 | #ifdef CONFIG_SMP |
17 | unsigned int irq_resched_count; | 19 | unsigned int irq_resched_count; |
18 | unsigned int irq_call_count; | 20 | unsigned int irq_call_count; |
@@ -20,7 +22,7 @@ typedef struct { | |||
20 | #endif | 22 | #endif |
21 | #ifdef CONFIG_X86_MCE | 23 | #ifdef CONFIG_X86_MCE |
22 | unsigned int irq_thermal_count; | 24 | unsigned int irq_thermal_count; |
23 | # ifdef CONFIG_X86_64 | 25 | # ifdef CONFIG_X86_MCE_THRESHOLD |
24 | unsigned int irq_threshold_count; | 26 | unsigned int irq_threshold_count; |
25 | # endif | 27 | # endif |
26 | #endif | 28 | #endif |
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b762ea49bd70..ba180d93b08c 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -29,9 +29,12 @@ | |||
29 | extern void apic_timer_interrupt(void); | 29 | extern void apic_timer_interrupt(void); |
30 | extern void generic_interrupt(void); | 30 | extern void generic_interrupt(void); |
31 | extern void error_interrupt(void); | 31 | extern void error_interrupt(void); |
32 | extern void perf_pending_interrupt(void); | ||
33 | |||
32 | extern void spurious_interrupt(void); | 34 | extern void spurious_interrupt(void); |
33 | extern void thermal_interrupt(void); | 35 | extern void thermal_interrupt(void); |
34 | extern void reschedule_interrupt(void); | 36 | extern void reschedule_interrupt(void); |
37 | extern void mce_self_interrupt(void); | ||
35 | 38 | ||
36 | extern void invalidate_interrupt(void); | 39 | extern void invalidate_interrupt(void); |
37 | extern void invalidate_interrupt0(void); | 40 | extern void invalidate_interrupt0(void); |
@@ -44,6 +47,7 @@ extern void invalidate_interrupt6(void); | |||
44 | extern void invalidate_interrupt7(void); | 47 | extern void invalidate_interrupt7(void); |
45 | 48 | ||
46 | extern void irq_move_cleanup_interrupt(void); | 49 | extern void irq_move_cleanup_interrupt(void); |
50 | extern void reboot_interrupt(void); | ||
47 | extern void threshold_interrupt(void); | 51 | extern void threshold_interrupt(void); |
48 | 52 | ||
49 | extern void call_function_interrupt(void); | 53 | extern void call_function_interrupt(void); |
@@ -63,7 +67,26 @@ extern unsigned long io_apic_irqs; | |||
63 | extern void init_VISWS_APIC_irqs(void); | 67 | extern void init_VISWS_APIC_irqs(void); |
64 | extern void setup_IO_APIC(void); | 68 | extern void setup_IO_APIC(void); |
65 | extern void disable_IO_APIC(void); | 69 | extern void disable_IO_APIC(void); |
66 | extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); | 70 | |
71 | struct io_apic_irq_attr { | ||
72 | int ioapic; | ||
73 | int ioapic_pin; | ||
74 | int trigger; | ||
75 | int polarity; | ||
76 | }; | ||
77 | |||
78 | static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr, | ||
79 | int ioapic, int ioapic_pin, | ||
80 | int trigger, int polarity) | ||
81 | { | ||
82 | irq_attr->ioapic = ioapic; | ||
83 | irq_attr->ioapic_pin = ioapic_pin; | ||
84 | irq_attr->trigger = trigger; | ||
85 | irq_attr->polarity = polarity; | ||
86 | } | ||
87 | |||
88 | extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, | ||
89 | struct io_apic_irq_attr *irq_attr); | ||
67 | extern void setup_ioapic_dest(void); | 90 | extern void setup_ioapic_dest(void); |
68 | 91 | ||
69 | extern void enable_IO_APIC(void); | 92 | extern void enable_IO_APIC(void); |
@@ -78,7 +101,11 @@ extern void eisa_set_level_irq(unsigned int irq); | |||
78 | /* SMP */ | 101 | /* SMP */ |
79 | extern void smp_apic_timer_interrupt(struct pt_regs *); | 102 | extern void smp_apic_timer_interrupt(struct pt_regs *); |
80 | extern void smp_spurious_interrupt(struct pt_regs *); | 103 | extern void smp_spurious_interrupt(struct pt_regs *); |
104 | extern void smp_generic_interrupt(struct pt_regs *); | ||
81 | extern void smp_error_interrupt(struct pt_regs *); | 105 | extern void smp_error_interrupt(struct pt_regs *); |
106 | #ifdef CONFIG_X86_IO_APIC | ||
107 | extern asmlinkage void smp_irq_move_cleanup_interrupt(void); | ||
108 | #endif | ||
82 | #ifdef CONFIG_SMP | 109 | #ifdef CONFIG_SMP |
83 | extern void smp_reschedule_interrupt(struct pt_regs *); | 110 | extern void smp_reschedule_interrupt(struct pt_regs *); |
84 | extern void smp_call_function_interrupt(struct pt_regs *); | 111 | extern void smp_call_function_interrupt(struct pt_regs *); |
diff --git a/arch/x86/include/asm/i387.h b/arch/x86/include/asm/i387.h index 71c9e5183982..175adf58dd4f 100644 --- a/arch/x86/include/asm/i387.h +++ b/arch/x86/include/asm/i387.h | |||
@@ -67,7 +67,7 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
67 | ".previous\n" | 67 | ".previous\n" |
68 | _ASM_EXTABLE(1b, 3b) | 68 | _ASM_EXTABLE(1b, 3b) |
69 | : [err] "=r" (err) | 69 | : [err] "=r" (err) |
70 | #if 0 /* See comment in __save_init_fpu() below. */ | 70 | #if 0 /* See comment in fxsave() below. */ |
71 | : [fx] "r" (fx), "m" (*fx), "0" (0)); | 71 | : [fx] "r" (fx), "m" (*fx), "0" (0)); |
72 | #else | 72 | #else |
73 | : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); | 73 | : [fx] "cdaSDb" (fx), "m" (*fx), "0" (0)); |
@@ -75,14 +75,6 @@ static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | |||
75 | return err; | 75 | return err; |
76 | } | 76 | } |
77 | 77 | ||
78 | static inline int restore_fpu_checking(struct task_struct *tsk) | ||
79 | { | ||
80 | if (task_thread_info(tsk)->status & TS_XSAVE) | ||
81 | return xrstor_checking(&tsk->thread.xstate->xsave); | ||
82 | else | ||
83 | return fxrstor_checking(&tsk->thread.xstate->fxsave); | ||
84 | } | ||
85 | |||
86 | /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception | 78 | /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception |
87 | is pending. Clear the x87 state here by setting it to fixed | 79 | is pending. Clear the x87 state here by setting it to fixed |
88 | values. The kernel data segment can be sometimes 0 and sometimes | 80 | values. The kernel data segment can be sometimes 0 and sometimes |
@@ -120,7 +112,7 @@ static inline int fxsave_user(struct i387_fxsave_struct __user *fx) | |||
120 | ".previous\n" | 112 | ".previous\n" |
121 | _ASM_EXTABLE(1b, 3b) | 113 | _ASM_EXTABLE(1b, 3b) |
122 | : [err] "=r" (err), "=m" (*fx) | 114 | : [err] "=r" (err), "=m" (*fx) |
123 | #if 0 /* See comment in __fxsave_clear() below. */ | 115 | #if 0 /* See comment in fxsave() below. */ |
124 | : [fx] "r" (fx), "0" (0)); | 116 | : [fx] "r" (fx), "0" (0)); |
125 | #else | 117 | #else |
126 | : [fx] "cdaSDb" (fx), "0" (0)); | 118 | : [fx] "cdaSDb" (fx), "0" (0)); |
@@ -185,12 +177,9 @@ static inline void tolerant_fwait(void) | |||
185 | asm volatile("fnclex ; fwait"); | 177 | asm volatile("fnclex ; fwait"); |
186 | } | 178 | } |
187 | 179 | ||
188 | static inline void restore_fpu(struct task_struct *tsk) | 180 | /* perform fxrstor iff the processor has extended states, otherwise frstor */ |
181 | static inline int fxrstor_checking(struct i387_fxsave_struct *fx) | ||
189 | { | 182 | { |
190 | if (task_thread_info(tsk)->status & TS_XSAVE) { | ||
191 | xrstor_checking(&tsk->thread.xstate->xsave); | ||
192 | return; | ||
193 | } | ||
194 | /* | 183 | /* |
195 | * The "nop" is needed to make the instructions the same | 184 | * The "nop" is needed to make the instructions the same |
196 | * length. | 185 | * length. |
@@ -199,7 +188,9 @@ static inline void restore_fpu(struct task_struct *tsk) | |||
199 | "nop ; frstor %1", | 188 | "nop ; frstor %1", |
200 | "fxrstor %1", | 189 | "fxrstor %1", |
201 | X86_FEATURE_FXSR, | 190 | X86_FEATURE_FXSR, |
202 | "m" (tsk->thread.xstate->fxsave)); | 191 | "m" (*fx)); |
192 | |||
193 | return 0; | ||
203 | } | 194 | } |
204 | 195 | ||
205 | /* We need a safe address that is cheap to find and that is already | 196 | /* We need a safe address that is cheap to find and that is already |
@@ -262,6 +253,14 @@ end: | |||
262 | 253 | ||
263 | #endif /* CONFIG_X86_64 */ | 254 | #endif /* CONFIG_X86_64 */ |
264 | 255 | ||
256 | static inline int restore_fpu_checking(struct task_struct *tsk) | ||
257 | { | ||
258 | if (task_thread_info(tsk)->status & TS_XSAVE) | ||
259 | return xrstor_checking(&tsk->thread.xstate->xsave); | ||
260 | else | ||
261 | return fxrstor_checking(&tsk->thread.xstate->fxsave); | ||
262 | } | ||
263 | |||
265 | /* | 264 | /* |
266 | * Signal frame handlers... | 265 | * Signal frame handlers... |
267 | */ | 266 | */ |
@@ -305,18 +304,18 @@ static inline void kernel_fpu_end(void) | |||
305 | /* | 304 | /* |
306 | * Some instructions like VIA's padlock instructions generate a spurious | 305 | * Some instructions like VIA's padlock instructions generate a spurious |
307 | * DNA fault but don't modify SSE registers. And these instructions | 306 | * DNA fault but don't modify SSE registers. And these instructions |
308 | * get used from interrupt context aswell. To prevent these kernel instructions | 307 | * get used from interrupt context as well. To prevent these kernel instructions |
309 | * in interrupt context interact wrongly with other user/kernel fpu usage, we | 308 | * in interrupt context interacting wrongly with other user/kernel fpu usage, we |
310 | * should use them only in the context of irq_ts_save/restore() | 309 | * should use them only in the context of irq_ts_save/restore() |
311 | */ | 310 | */ |
312 | static inline int irq_ts_save(void) | 311 | static inline int irq_ts_save(void) |
313 | { | 312 | { |
314 | /* | 313 | /* |
315 | * If we are in process context, we are ok to take a spurious DNA fault. | 314 | * If in process context and not atomic, we can take a spurious DNA fault. |
316 | * Otherwise, doing clts() in process context require pre-emption to | 315 | * Otherwise, doing clts() in process context requires disabling preemption |
317 | * be disabled or some heavy lifting like kernel_fpu_begin() | 316 | * or some heavy lifting like kernel_fpu_begin() |
318 | */ | 317 | */ |
319 | if (!in_interrupt()) | 318 | if (!in_atomic()) |
320 | return 0; | 319 | return 0; |
321 | 320 | ||
322 | if (read_cr0() & X86_CR0_TS) { | 321 | if (read_cr0() & X86_CR0_TS) { |
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h index 1a99e6c092af..58d7091eeb1f 100644 --- a/arch/x86/include/asm/i8259.h +++ b/arch/x86/include/asm/i8259.h | |||
@@ -60,8 +60,4 @@ extern struct irq_chip i8259A_chip; | |||
60 | extern void mask_8259A(void); | 60 | extern void mask_8259A(void); |
61 | extern void unmask_8259A(void); | 61 | extern void unmask_8259A(void); |
62 | 62 | ||
63 | #ifdef CONFIG_X86_32 | ||
64 | extern void init_ISA_irqs(void); | ||
65 | #endif | ||
66 | |||
67 | #endif /* _ASM_X86_I8259_H */ | 63 | #endif /* _ASM_X86_I8259_H */ |
diff --git a/arch/x86/include/asm/intel_arch_perfmon.h b/arch/x86/include/asm/intel_arch_perfmon.h deleted file mode 100644 index fa0fd068bc2e..000000000000 --- a/arch/x86/include/asm/intel_arch_perfmon.h +++ /dev/null | |||
@@ -1,31 +0,0 @@ | |||
1 | #ifndef _ASM_X86_INTEL_ARCH_PERFMON_H | ||
2 | #define _ASM_X86_INTEL_ARCH_PERFMON_H | ||
3 | |||
4 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | ||
5 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | ||
6 | |||
7 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | ||
8 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | ||
9 | |||
10 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | ||
11 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | ||
12 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | ||
13 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | ||
14 | |||
15 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL (0x3c) | ||
16 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | ||
17 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX (0) | ||
18 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | ||
19 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | ||
20 | |||
21 | union cpuid10_eax { | ||
22 | struct { | ||
23 | unsigned int version_id:8; | ||
24 | unsigned int num_counters:8; | ||
25 | unsigned int bit_width:8; | ||
26 | unsigned int mask_length:8; | ||
27 | } split; | ||
28 | unsigned int full; | ||
29 | }; | ||
30 | |||
31 | #endif /* _ASM_X86_INTEL_ARCH_PERFMON_H */ | ||
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 9d826e436010..daf866ed0612 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h | |||
@@ -154,22 +154,19 @@ extern int timer_through_8259; | |||
154 | extern int io_apic_get_unique_id(int ioapic, int apic_id); | 154 | extern int io_apic_get_unique_id(int ioapic, int apic_id); |
155 | extern int io_apic_get_version(int ioapic); | 155 | extern int io_apic_get_version(int ioapic); |
156 | extern int io_apic_get_redir_entries(int ioapic); | 156 | extern int io_apic_get_redir_entries(int ioapic); |
157 | extern int io_apic_set_pci_routing(int ioapic, int pin, int irq, | ||
158 | int edge_level, int active_high_low); | ||
159 | #endif /* CONFIG_ACPI */ | 157 | #endif /* CONFIG_ACPI */ |
160 | 158 | ||
159 | struct io_apic_irq_attr; | ||
160 | extern int io_apic_set_pci_routing(struct device *dev, int irq, | ||
161 | struct io_apic_irq_attr *irq_attr); | ||
161 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); | 162 | extern int (*ioapic_renumber_irq)(int ioapic, int irq); |
162 | extern void ioapic_init_mappings(void); | 163 | extern void ioapic_init_mappings(void); |
163 | 164 | ||
164 | #ifdef CONFIG_X86_64 | ||
165 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); | 165 | extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); |
166 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); | 166 | extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); |
167 | extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | 167 | extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); |
168 | extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | 168 | extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); |
169 | extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); | 169 | extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); |
170 | extern void reinit_intr_remapped_IO_APIC(int intr_remapping, | ||
171 | struct IO_APIC_route_entry **ioapic_entries); | ||
172 | #endif | ||
173 | 170 | ||
174 | extern void probe_nr_irqs_gsi(void); | 171 | extern void probe_nr_irqs_gsi(void); |
175 | 172 | ||
diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index 86af26091d6c..0e9fe1d9d971 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h | |||
@@ -1,3 +1,6 @@ | |||
1 | #ifndef _ASM_X86_IOMAP_H | ||
2 | #define _ASM_X86_IOMAP_H | ||
3 | |||
1 | /* | 4 | /* |
2 | * Copyright © 2008 Ingo Molnar | 5 | * Copyright © 2008 Ingo Molnar |
3 | * | 6 | * |
@@ -31,3 +34,5 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); | |||
31 | 34 | ||
32 | void | 35 | void |
33 | iounmap_atomic(void *kvaddr, enum km_type type); | 36 | iounmap_atomic(void *kvaddr, enum km_type type); |
37 | |||
38 | #endif /* _ASM_X86_IOMAP_H */ | ||
diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index af326a2975b5..fd6d21bbee6c 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h | |||
@@ -6,6 +6,7 @@ extern void no_iommu_init(void); | |||
6 | extern struct dma_map_ops nommu_dma_ops; | 6 | extern struct dma_map_ops nommu_dma_ops; |
7 | extern int force_iommu, no_iommu; | 7 | extern int force_iommu, no_iommu; |
8 | extern int iommu_detected; | 8 | extern int iommu_detected; |
9 | extern int iommu_pass_through; | ||
9 | 10 | ||
10 | /* 10 seconds */ | 11 | /* 10 seconds */ |
11 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) | 12 | #define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) |
diff --git a/arch/x86/include/asm/irq_remapping.h b/arch/x86/include/asm/irq_remapping.h index 0396760fccb8..f275e2244505 100644 --- a/arch/x86/include/asm/irq_remapping.h +++ b/arch/x86/include/asm/irq_remapping.h | |||
@@ -1,6 +1,6 @@ | |||
1 | #ifndef _ASM_X86_IRQ_REMAPPING_H | 1 | #ifndef _ASM_X86_IRQ_REMAPPING_H |
2 | #define _ASM_X86_IRQ_REMAPPING_H | 2 | #define _ASM_X86_IRQ_REMAPPING_H |
3 | 3 | ||
4 | #define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8) | 4 | #define IRTE_DEST(dest) ((x2apic_mode) ? dest : dest << 8) |
5 | 5 | ||
6 | #endif /* _ASM_X86_IRQ_REMAPPING_H */ | 6 | #endif /* _ASM_X86_IRQ_REMAPPING_H */ |
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h index 3cbd79bbb47c..5b21f0ec3df2 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h | |||
@@ -25,6 +25,7 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #define NMI_VECTOR 0x02 | 27 | #define NMI_VECTOR 0x02 |
28 | #define MCE_VECTOR 0x12 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * IDT vectors usable for external interrupt sources start | 31 | * IDT vectors usable for external interrupt sources start |
@@ -34,6 +35,7 @@ | |||
34 | 35 | ||
35 | #ifdef CONFIG_X86_32 | 36 | #ifdef CONFIG_X86_32 |
36 | # define SYSCALL_VECTOR 0x80 | 37 | # define SYSCALL_VECTOR 0x80 |
38 | # define IA32_SYSCALL_VECTOR 0x80 | ||
37 | #else | 39 | #else |
38 | # define IA32_SYSCALL_VECTOR 0x80 | 40 | # define IA32_SYSCALL_VECTOR 0x80 |
39 | #endif | 41 | #endif |
@@ -86,13 +88,8 @@ | |||
86 | #define CALL_FUNCTION_VECTOR 0xfc | 88 | #define CALL_FUNCTION_VECTOR 0xfc |
87 | #define CALL_FUNCTION_SINGLE_VECTOR 0xfb | 89 | #define CALL_FUNCTION_SINGLE_VECTOR 0xfb |
88 | #define THERMAL_APIC_VECTOR 0xfa | 90 | #define THERMAL_APIC_VECTOR 0xfa |
89 | 91 | #define THRESHOLD_APIC_VECTOR 0xf9 | |
90 | #ifdef CONFIG_X86_32 | 92 | #define REBOOT_VECTOR 0xf8 |
91 | /* 0xf8 - 0xf9 : free */ | ||
92 | #else | ||
93 | # define THRESHOLD_APIC_VECTOR 0xf9 | ||
94 | # define UV_BAU_MESSAGE 0xf8 | ||
95 | #endif | ||
96 | 93 | ||
97 | /* f0-f7 used for spreading out TLB flushes: */ | 94 | /* f0-f7 used for spreading out TLB flushes: */ |
98 | #define INVALIDATE_TLB_VECTOR_END 0xf7 | 95 | #define INVALIDATE_TLB_VECTOR_END 0xf7 |
@@ -107,14 +104,21 @@ | |||
107 | #define LOCAL_TIMER_VECTOR 0xef | 104 | #define LOCAL_TIMER_VECTOR 0xef |
108 | 105 | ||
109 | /* | 106 | /* |
110 | * Performance monitoring interrupt vector: | 107 | * Generic system vector for platform specific use |
111 | */ | 108 | */ |
112 | #define LOCAL_PERF_VECTOR 0xee | 109 | #define GENERIC_INTERRUPT_VECTOR 0xed |
113 | 110 | ||
114 | /* | 111 | /* |
115 | * Generic system vector for platform specific use | 112 | * Performance monitoring pending work vector: |
116 | */ | 113 | */ |
117 | #define GENERIC_INTERRUPT_VECTOR 0xed | 114 | #define LOCAL_PENDING_VECTOR 0xec |
115 | |||
116 | #define UV_BAU_MESSAGE 0xec | ||
117 | |||
118 | /* | ||
119 | * Self IPI vector for machine checks | ||
120 | */ | ||
121 | #define MCE_SELF_VECTOR 0xeb | ||
118 | 122 | ||
119 | /* | 123 | /* |
120 | * First APIC vector available to drivers: (vectors 0x30-0xee) we | 124 | * First APIC vector available to drivers: (vectors 0x30-0xee) we |
diff --git a/arch/x86/include/asm/k8.h b/arch/x86/include/asm/k8.h index 54c8cc53b24d..c2d1f3b58e5f 100644 --- a/arch/x86/include/asm/k8.h +++ b/arch/x86/include/asm/k8.h | |||
@@ -12,4 +12,17 @@ extern int cache_k8_northbridges(void); | |||
12 | extern void k8_flush_garts(void); | 12 | extern void k8_flush_garts(void); |
13 | extern int k8_scan_nodes(unsigned long start, unsigned long end); | 13 | extern int k8_scan_nodes(unsigned long start, unsigned long end); |
14 | 14 | ||
15 | #ifdef CONFIG_K8_NB | ||
16 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | ||
17 | { | ||
18 | return (node < num_k8_northbridges) ? k8_northbridges[node] : NULL; | ||
19 | } | ||
20 | #else | ||
21 | static inline struct pci_dev *node_to_k8_nb_misc(int node) | ||
22 | { | ||
23 | return NULL; | ||
24 | } | ||
25 | #endif | ||
26 | |||
27 | |||
15 | #endif /* _ASM_X86_K8_H */ | 28 | #endif /* _ASM_X86_K8_H */ |
diff --git a/arch/x86/include/asm/kmap_types.h b/arch/x86/include/asm/kmap_types.h index 5759c165a5cf..9e00a731a7fb 100644 --- a/arch/x86/include/asm/kmap_types.h +++ b/arch/x86/include/asm/kmap_types.h | |||
@@ -2,28 +2,11 @@ | |||
2 | #define _ASM_X86_KMAP_TYPES_H | 2 | #define _ASM_X86_KMAP_TYPES_H |
3 | 3 | ||
4 | #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) | 4 | #if defined(CONFIG_X86_32) && defined(CONFIG_DEBUG_HIGHMEM) |
5 | # define D(n) __KM_FENCE_##n , | 5 | #define __WITH_KM_FENCE |
6 | #else | ||
7 | # define D(n) | ||
8 | #endif | 6 | #endif |
9 | 7 | ||
10 | enum km_type { | 8 | #include <asm-generic/kmap_types.h> |
11 | D(0) KM_BOUNCE_READ, | ||
12 | D(1) KM_SKB_SUNRPC_DATA, | ||
13 | D(2) KM_SKB_DATA_SOFTIRQ, | ||
14 | D(3) KM_USER0, | ||
15 | D(4) KM_USER1, | ||
16 | D(5) KM_BIO_SRC_IRQ, | ||
17 | D(6) KM_BIO_DST_IRQ, | ||
18 | D(7) KM_PTE0, | ||
19 | D(8) KM_PTE1, | ||
20 | D(9) KM_IRQ0, | ||
21 | D(10) KM_IRQ1, | ||
22 | D(11) KM_SOFTIRQ0, | ||
23 | D(12) KM_SOFTIRQ1, | ||
24 | D(13) KM_TYPE_NR | ||
25 | }; | ||
26 | 9 | ||
27 | #undef D | 10 | #undef __WITH_KM_FENCE |
28 | 11 | ||
29 | #endif /* _ASM_X86_KMAP_TYPES_H */ | 12 | #endif /* _ASM_X86_KMAP_TYPES_H */ |
diff --git a/arch/x86/include/asm/kmemcheck.h b/arch/x86/include/asm/kmemcheck.h new file mode 100644 index 000000000000..ed01518f297e --- /dev/null +++ b/arch/x86/include/asm/kmemcheck.h | |||
@@ -0,0 +1,42 @@ | |||
1 | #ifndef ASM_X86_KMEMCHECK_H | ||
2 | #define ASM_X86_KMEMCHECK_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | #include <asm/ptrace.h> | ||
6 | |||
7 | #ifdef CONFIG_KMEMCHECK | ||
8 | bool kmemcheck_active(struct pt_regs *regs); | ||
9 | |||
10 | void kmemcheck_show(struct pt_regs *regs); | ||
11 | void kmemcheck_hide(struct pt_regs *regs); | ||
12 | |||
13 | bool kmemcheck_fault(struct pt_regs *regs, | ||
14 | unsigned long address, unsigned long error_code); | ||
15 | bool kmemcheck_trap(struct pt_regs *regs); | ||
16 | #else | ||
17 | static inline bool kmemcheck_active(struct pt_regs *regs) | ||
18 | { | ||
19 | return false; | ||
20 | } | ||
21 | |||
22 | static inline void kmemcheck_show(struct pt_regs *regs) | ||
23 | { | ||
24 | } | ||
25 | |||
26 | static inline void kmemcheck_hide(struct pt_regs *regs) | ||
27 | { | ||
28 | } | ||
29 | |||
30 | static inline bool kmemcheck_fault(struct pt_regs *regs, | ||
31 | unsigned long address, unsigned long error_code) | ||
32 | { | ||
33 | return false; | ||
34 | } | ||
35 | |||
36 | static inline bool kmemcheck_trap(struct pt_regs *regs) | ||
37 | { | ||
38 | return false; | ||
39 | } | ||
40 | #endif /* CONFIG_KMEMCHECK */ | ||
41 | |||
42 | #endif | ||
diff --git a/arch/x86/include/asm/kvm.h b/arch/x86/include/asm/kvm.h index dc3f6cf11704..125be8b19568 100644 --- a/arch/x86/include/asm/kvm.h +++ b/arch/x86/include/asm/kvm.h | |||
@@ -16,6 +16,7 @@ | |||
16 | #define __KVM_HAVE_MSI | 16 | #define __KVM_HAVE_MSI |
17 | #define __KVM_HAVE_USER_NMI | 17 | #define __KVM_HAVE_USER_NMI |
18 | #define __KVM_HAVE_GUEST_DEBUG | 18 | #define __KVM_HAVE_GUEST_DEBUG |
19 | #define __KVM_HAVE_MSIX | ||
19 | 20 | ||
20 | /* Architectural interrupt line count. */ | 21 | /* Architectural interrupt line count. */ |
21 | #define KVM_NR_INTERRUPTS 256 | 22 | #define KVM_NR_INTERRUPTS 256 |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f0faf58044ff..eabdc1cfab5c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -185,6 +185,7 @@ union kvm_mmu_page_role { | |||
185 | unsigned access:3; | 185 | unsigned access:3; |
186 | unsigned invalid:1; | 186 | unsigned invalid:1; |
187 | unsigned cr4_pge:1; | 187 | unsigned cr4_pge:1; |
188 | unsigned nxe:1; | ||
188 | }; | 189 | }; |
189 | }; | 190 | }; |
190 | 191 | ||
@@ -212,7 +213,6 @@ struct kvm_mmu_page { | |||
212 | int multimapped; /* More than one parent_pte? */ | 213 | int multimapped; /* More than one parent_pte? */ |
213 | int root_count; /* Currently serving as active root */ | 214 | int root_count; /* Currently serving as active root */ |
214 | bool unsync; | 215 | bool unsync; |
215 | bool global; | ||
216 | unsigned int unsync_children; | 216 | unsigned int unsync_children; |
217 | union { | 217 | union { |
218 | u64 *parent_pte; /* !multimapped */ | 218 | u64 *parent_pte; /* !multimapped */ |
@@ -261,13 +261,11 @@ struct kvm_mmu { | |||
261 | union kvm_mmu_page_role base_role; | 261 | union kvm_mmu_page_role base_role; |
262 | 262 | ||
263 | u64 *pae_root; | 263 | u64 *pae_root; |
264 | u64 rsvd_bits_mask[2][4]; | ||
264 | }; | 265 | }; |
265 | 266 | ||
266 | struct kvm_vcpu_arch { | 267 | struct kvm_vcpu_arch { |
267 | u64 host_tsc; | 268 | u64 host_tsc; |
268 | int interrupt_window_open; | ||
269 | unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */ | ||
270 | DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS); | ||
271 | /* | 269 | /* |
272 | * rip and regs accesses must go through | 270 | * rip and regs accesses must go through |
273 | * kvm_{register,rip}_{read,write} functions. | 271 | * kvm_{register,rip}_{read,write} functions. |
@@ -286,6 +284,7 @@ struct kvm_vcpu_arch { | |||
286 | u64 shadow_efer; | 284 | u64 shadow_efer; |
287 | u64 apic_base; | 285 | u64 apic_base; |
288 | struct kvm_lapic *apic; /* kernel irqchip context */ | 286 | struct kvm_lapic *apic; /* kernel irqchip context */ |
287 | int32_t apic_arb_prio; | ||
289 | int mp_state; | 288 | int mp_state; |
290 | int sipi_vector; | 289 | int sipi_vector; |
291 | u64 ia32_misc_enable_msr; | 290 | u64 ia32_misc_enable_msr; |
@@ -320,6 +319,8 @@ struct kvm_vcpu_arch { | |||
320 | struct kvm_pio_request pio; | 319 | struct kvm_pio_request pio; |
321 | void *pio_data; | 320 | void *pio_data; |
322 | 321 | ||
322 | u8 event_exit_inst_len; | ||
323 | |||
323 | struct kvm_queued_exception { | 324 | struct kvm_queued_exception { |
324 | bool pending; | 325 | bool pending; |
325 | bool has_error_code; | 326 | bool has_error_code; |
@@ -329,11 +330,12 @@ struct kvm_vcpu_arch { | |||
329 | 330 | ||
330 | struct kvm_queued_interrupt { | 331 | struct kvm_queued_interrupt { |
331 | bool pending; | 332 | bool pending; |
333 | bool soft; | ||
332 | u8 nr; | 334 | u8 nr; |
333 | } interrupt; | 335 | } interrupt; |
334 | 336 | ||
335 | struct { | 337 | struct { |
336 | int active; | 338 | int vm86_active; |
337 | u8 save_iopl; | 339 | u8 save_iopl; |
338 | struct kvm_save_segment { | 340 | struct kvm_save_segment { |
339 | u16 selector; | 341 | u16 selector; |
@@ -356,9 +358,9 @@ struct kvm_vcpu_arch { | |||
356 | unsigned int time_offset; | 358 | unsigned int time_offset; |
357 | struct page *time_page; | 359 | struct page *time_page; |
358 | 360 | ||
361 | bool singlestep; /* guest is single stepped by KVM */ | ||
359 | bool nmi_pending; | 362 | bool nmi_pending; |
360 | bool nmi_injected; | 363 | bool nmi_injected; |
361 | bool nmi_window_open; | ||
362 | 364 | ||
363 | struct mtrr_state_type mtrr_state; | 365 | struct mtrr_state_type mtrr_state; |
364 | u32 pat; | 366 | u32 pat; |
@@ -392,15 +394,14 @@ struct kvm_arch{ | |||
392 | */ | 394 | */ |
393 | struct list_head active_mmu_pages; | 395 | struct list_head active_mmu_pages; |
394 | struct list_head assigned_dev_head; | 396 | struct list_head assigned_dev_head; |
395 | struct list_head oos_global_pages; | ||
396 | struct iommu_domain *iommu_domain; | 397 | struct iommu_domain *iommu_domain; |
398 | int iommu_flags; | ||
397 | struct kvm_pic *vpic; | 399 | struct kvm_pic *vpic; |
398 | struct kvm_ioapic *vioapic; | 400 | struct kvm_ioapic *vioapic; |
399 | struct kvm_pit *vpit; | 401 | struct kvm_pit *vpit; |
400 | struct hlist_head irq_ack_notifier_list; | 402 | struct hlist_head irq_ack_notifier_list; |
401 | int vapics_in_nmi_mode; | 403 | int vapics_in_nmi_mode; |
402 | 404 | ||
403 | int round_robin_prev_vcpu; | ||
404 | unsigned int tss_addr; | 405 | unsigned int tss_addr; |
405 | struct page *apic_access_page; | 406 | struct page *apic_access_page; |
406 | 407 | ||
@@ -423,7 +424,6 @@ struct kvm_vm_stat { | |||
423 | u32 mmu_recycled; | 424 | u32 mmu_recycled; |
424 | u32 mmu_cache_miss; | 425 | u32 mmu_cache_miss; |
425 | u32 mmu_unsync; | 426 | u32 mmu_unsync; |
426 | u32 mmu_unsync_global; | ||
427 | u32 remote_tlb_flush; | 427 | u32 remote_tlb_flush; |
428 | u32 lpages; | 428 | u32 lpages; |
429 | }; | 429 | }; |
@@ -443,7 +443,6 @@ struct kvm_vcpu_stat { | |||
443 | u32 halt_exits; | 443 | u32 halt_exits; |
444 | u32 halt_wakeup; | 444 | u32 halt_wakeup; |
445 | u32 request_irq_exits; | 445 | u32 request_irq_exits; |
446 | u32 request_nmi_exits; | ||
447 | u32 irq_exits; | 446 | u32 irq_exits; |
448 | u32 host_state_reload; | 447 | u32 host_state_reload; |
449 | u32 efer_reload; | 448 | u32 efer_reload; |
@@ -511,20 +510,22 @@ struct kvm_x86_ops { | |||
511 | void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); | 510 | void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run); |
512 | int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); | 511 | int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu); |
513 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); | 512 | void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); |
513 | void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | ||
514 | u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask); | ||
514 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, | 515 | void (*patch_hypercall)(struct kvm_vcpu *vcpu, |
515 | unsigned char *hypercall_addr); | 516 | unsigned char *hypercall_addr); |
516 | int (*get_irq)(struct kvm_vcpu *vcpu); | 517 | void (*set_irq)(struct kvm_vcpu *vcpu); |
517 | void (*set_irq)(struct kvm_vcpu *vcpu, int vec); | 518 | void (*set_nmi)(struct kvm_vcpu *vcpu); |
518 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, | 519 | void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, |
519 | bool has_error_code, u32 error_code); | 520 | bool has_error_code, u32 error_code); |
520 | bool (*exception_injected)(struct kvm_vcpu *vcpu); | 521 | int (*interrupt_allowed)(struct kvm_vcpu *vcpu); |
521 | void (*inject_pending_irq)(struct kvm_vcpu *vcpu); | 522 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
522 | void (*inject_pending_vectors)(struct kvm_vcpu *vcpu, | 523 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
523 | struct kvm_run *run); | 524 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
524 | 525 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | |
525 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); | 526 | int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); |
526 | int (*get_tdp_level)(void); | 527 | int (*get_tdp_level)(void); |
527 | int (*get_mt_mask_shift)(void); | 528 | u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); |
528 | }; | 529 | }; |
529 | 530 | ||
530 | extern struct kvm_x86_ops *kvm_x86_ops; | 531 | extern struct kvm_x86_ops *kvm_x86_ops; |
@@ -538,7 +539,7 @@ int kvm_mmu_setup(struct kvm_vcpu *vcpu); | |||
538 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); | 539 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); |
539 | void kvm_mmu_set_base_ptes(u64 base_pte); | 540 | void kvm_mmu_set_base_ptes(u64 base_pte); |
540 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 541 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
541 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask); | 542 | u64 dirty_mask, u64 nx_mask, u64 x_mask); |
542 | 543 | ||
543 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); | 544 | int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); |
544 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); | 545 | void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); |
@@ -552,6 +553,7 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
552 | const void *val, int bytes); | 553 | const void *val, int bytes); |
553 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, | 554 | int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes, |
554 | gpa_t addr, unsigned long *ret); | 555 | gpa_t addr, unsigned long *ret); |
556 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn); | ||
555 | 557 | ||
556 | extern bool tdp_enabled; | 558 | extern bool tdp_enabled; |
557 | 559 | ||
@@ -563,6 +565,7 @@ enum emulation_result { | |||
563 | 565 | ||
564 | #define EMULTYPE_NO_DECODE (1 << 0) | 566 | #define EMULTYPE_NO_DECODE (1 << 0) |
565 | #define EMULTYPE_TRAP_UD (1 << 1) | 567 | #define EMULTYPE_TRAP_UD (1 << 1) |
568 | #define EMULTYPE_SKIP (1 << 2) | ||
566 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, | 569 | int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run, |
567 | unsigned long cr2, u16 error_code, int emulation_type); | 570 | unsigned long cr2, u16 error_code, int emulation_type); |
568 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); | 571 | void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context); |
@@ -638,7 +641,6 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); | |||
638 | int kvm_mmu_load(struct kvm_vcpu *vcpu); | 641 | int kvm_mmu_load(struct kvm_vcpu *vcpu); |
639 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); | 642 | void kvm_mmu_unload(struct kvm_vcpu *vcpu); |
640 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); | 643 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu); |
641 | void kvm_mmu_sync_global(struct kvm_vcpu *vcpu); | ||
642 | 644 | ||
643 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); | 645 | int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); |
644 | 646 | ||
@@ -769,6 +771,8 @@ enum { | |||
769 | #define HF_GIF_MASK (1 << 0) | 771 | #define HF_GIF_MASK (1 << 0) |
770 | #define HF_HIF_MASK (1 << 1) | 772 | #define HF_HIF_MASK (1 << 1) |
771 | #define HF_VINTR_MASK (1 << 2) | 773 | #define HF_VINTR_MASK (1 << 2) |
774 | #define HF_NMI_MASK (1 << 3) | ||
775 | #define HF_IRET_MASK (1 << 4) | ||
772 | 776 | ||
773 | /* | 777 | /* |
774 | * Hardware virtualization extension instructions may fault if a | 778 | * Hardware virtualization extension instructions may fault if a |
@@ -791,5 +795,6 @@ asmlinkage void kvm_handle_fault_on_reboot(void); | |||
791 | #define KVM_ARCH_WANT_MMU_NOTIFIER | 795 | #define KVM_ARCH_WANT_MMU_NOTIFIER |
792 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); | 796 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); |
793 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); | 797 | int kvm_age_hva(struct kvm *kvm, unsigned long hva); |
798 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu); | ||
794 | 799 | ||
795 | #endif /* _ASM_X86_KVM_HOST_H */ | 800 | #endif /* _ASM_X86_KVM_HOST_H */ |
diff --git a/arch/x86/include/asm/kvm_x86_emulate.h b/arch/x86/include/asm/kvm_x86_emulate.h index 6a159732881a..b7ed2c423116 100644 --- a/arch/x86/include/asm/kvm_x86_emulate.h +++ b/arch/x86/include/asm/kvm_x86_emulate.h | |||
@@ -143,6 +143,9 @@ struct decode_cache { | |||
143 | struct fetch_cache fetch; | 143 | struct fetch_cache fetch; |
144 | }; | 144 | }; |
145 | 145 | ||
146 | #define X86_SHADOW_INT_MOV_SS 1 | ||
147 | #define X86_SHADOW_INT_STI 2 | ||
148 | |||
146 | struct x86_emulate_ctxt { | 149 | struct x86_emulate_ctxt { |
147 | /* Register state before/after emulation. */ | 150 | /* Register state before/after emulation. */ |
148 | struct kvm_vcpu *vcpu; | 151 | struct kvm_vcpu *vcpu; |
@@ -152,6 +155,9 @@ struct x86_emulate_ctxt { | |||
152 | int mode; | 155 | int mode; |
153 | u32 cs_base; | 156 | u32 cs_base; |
154 | 157 | ||
158 | /* interruptibility state, as a result of execution of STI or MOV SS */ | ||
159 | int interruptibility; | ||
160 | |||
155 | /* decode cache */ | 161 | /* decode cache */ |
156 | struct decode_cache decode; | 162 | struct decode_cache decode; |
157 | }; | 163 | }; |
diff --git a/arch/x86/include/asm/lguest.h b/arch/x86/include/asm/lguest.h index 1caf57628b9c..313389cd50d2 100644 --- a/arch/x86/include/asm/lguest.h +++ b/arch/x86/include/asm/lguest.h | |||
@@ -17,8 +17,13 @@ | |||
17 | /* Pages for switcher itself, then two pages per cpu */ | 17 | /* Pages for switcher itself, then two pages per cpu */ |
18 | #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) | 18 | #define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids) |
19 | 19 | ||
20 | /* We map at -4M for ease of mapping into the guest (one PTE page). */ | 20 | /* We map at -4M (-2M when PAE is activated) for ease of mapping |
21 | * into the guest (one PTE page). */ | ||
22 | #ifdef CONFIG_X86_PAE | ||
23 | #define SWITCHER_ADDR 0xFFE00000 | ||
24 | #else | ||
21 | #define SWITCHER_ADDR 0xFFC00000 | 25 | #define SWITCHER_ADDR 0xFFC00000 |
26 | #endif | ||
22 | 27 | ||
23 | /* Found in switcher.S */ | 28 | /* Found in switcher.S */ |
24 | extern unsigned long default_idt_entries[]; | 29 | extern unsigned long default_idt_entries[]; |
diff --git a/arch/x86/include/asm/lguest_hcall.h b/arch/x86/include/asm/lguest_hcall.h index faae1996487b..d31c4a684078 100644 --- a/arch/x86/include/asm/lguest_hcall.h +++ b/arch/x86/include/asm/lguest_hcall.h | |||
@@ -12,11 +12,13 @@ | |||
12 | #define LHCALL_TS 8 | 12 | #define LHCALL_TS 8 |
13 | #define LHCALL_SET_CLOCKEVENT 9 | 13 | #define LHCALL_SET_CLOCKEVENT 9 |
14 | #define LHCALL_HALT 10 | 14 | #define LHCALL_HALT 10 |
15 | #define LHCALL_SET_PMD 13 | ||
15 | #define LHCALL_SET_PTE 14 | 16 | #define LHCALL_SET_PTE 14 |
16 | #define LHCALL_SET_PMD 15 | 17 | #define LHCALL_SET_PGD 15 |
17 | #define LHCALL_LOAD_TLS 16 | 18 | #define LHCALL_LOAD_TLS 16 |
18 | #define LHCALL_NOTIFY 17 | 19 | #define LHCALL_NOTIFY 17 |
19 | #define LHCALL_LOAD_GDT_ENTRY 18 | 20 | #define LHCALL_LOAD_GDT_ENTRY 18 |
21 | #define LHCALL_SEND_INTERRUPTS 19 | ||
20 | 22 | ||
21 | #define LGUEST_TRAP_ENTRY 0x1F | 23 | #define LGUEST_TRAP_ENTRY 0x1F |
22 | 24 | ||
@@ -32,10 +34,10 @@ | |||
32 | * operations? There are two ways: the direct way is to make a "hypercall", | 34 | * operations? There are two ways: the direct way is to make a "hypercall", |
33 | * to make requests of the Host Itself. | 35 | * to make requests of the Host Itself. |
34 | * | 36 | * |
35 | * We use the KVM hypercall mechanism. Eighteen hypercalls are | 37 | * We use the KVM hypercall mechanism. Seventeen hypercalls are |
36 | * available: the hypercall number is put in the %eax register, and the | 38 | * available: the hypercall number is put in the %eax register, and the |
37 | * arguments (when required) are placed in %ebx, %ecx and %edx. If a return | 39 | * arguments (when required) are placed in %ebx, %ecx, %edx and %esi. |
38 | * value makes sense, it's returned in %eax. | 40 | * If a return value makes sense, it's returned in %eax. |
39 | * | 41 | * |
40 | * Grossly invalid calls result in Sudden Death at the hands of the vengeful | 42 | * Grossly invalid calls result in Sudden Death at the hands of the vengeful |
41 | * Host, rather than returning failure. This reflects Winston Churchill's | 43 | * Host, rather than returning failure. This reflects Winston Churchill's |
@@ -47,8 +49,9 @@ | |||
47 | 49 | ||
48 | #define LHCALL_RING_SIZE 64 | 50 | #define LHCALL_RING_SIZE 64 |
49 | struct hcall_args { | 51 | struct hcall_args { |
50 | /* These map directly onto eax, ebx, ecx, edx in struct lguest_regs */ | 52 | /* These map directly onto eax, ebx, ecx, edx and esi |
51 | unsigned long arg0, arg1, arg2, arg3; | 53 | * in struct lguest_regs */ |
54 | unsigned long arg0, arg1, arg2, arg3, arg4; | ||
52 | }; | 55 | }; |
53 | 56 | ||
54 | #endif /* !__ASSEMBLY__ */ | 57 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 4f8c199584e7..5cdd8d100ec9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -1,8 +1,6 @@ | |||
1 | #ifndef _ASM_X86_MCE_H | 1 | #ifndef _ASM_X86_MCE_H |
2 | #define _ASM_X86_MCE_H | 2 | #define _ASM_X86_MCE_H |
3 | 3 | ||
4 | #ifdef __x86_64__ | ||
5 | |||
6 | #include <linux/types.h> | 4 | #include <linux/types.h> |
7 | #include <asm/ioctls.h> | 5 | #include <asm/ioctls.h> |
8 | 6 | ||
@@ -10,21 +8,35 @@ | |||
10 | * Machine Check support for x86 | 8 | * Machine Check support for x86 |
11 | */ | 9 | */ |
12 | 10 | ||
13 | #define MCG_CTL_P (1UL<<8) /* MCG_CAP register available */ | 11 | #define MCG_BANKCNT_MASK 0xff /* Number of Banks */ |
14 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ | 12 | #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ |
15 | #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ | 13 | #define MCG_EXT_P (1ULL<<9) /* Extended registers available */ |
16 | 14 | #define MCG_CMCI_P (1ULL<<10) /* CMCI supported */ | |
17 | #define MCG_STATUS_RIPV (1UL<<0) /* restart ip valid */ | 15 | #define MCG_EXT_CNT_MASK 0xff0000 /* Number of Extended registers */ |
18 | #define MCG_STATUS_EIPV (1UL<<1) /* ip points to correct instruction */ | 16 | #define MCG_EXT_CNT_SHIFT 16 |
19 | #define MCG_STATUS_MCIP (1UL<<2) /* machine check in progress */ | 17 | #define MCG_EXT_CNT(c) (((c) & MCG_EXT_CNT_MASK) >> MCG_EXT_CNT_SHIFT) |
20 | 18 | #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ | |
21 | #define MCI_STATUS_VAL (1UL<<63) /* valid error */ | 19 | |
22 | #define MCI_STATUS_OVER (1UL<<62) /* previous errors lost */ | 20 | #define MCG_STATUS_RIPV (1ULL<<0) /* restart ip valid */ |
23 | #define MCI_STATUS_UC (1UL<<61) /* uncorrected error */ | 21 | #define MCG_STATUS_EIPV (1ULL<<1) /* ip points to correct instruction */ |
24 | #define MCI_STATUS_EN (1UL<<60) /* error enabled */ | 22 | #define MCG_STATUS_MCIP (1ULL<<2) /* machine check in progress */ |
25 | #define MCI_STATUS_MISCV (1UL<<59) /* misc error reg. valid */ | 23 | |
26 | #define MCI_STATUS_ADDRV (1UL<<58) /* addr reg. valid */ | 24 | #define MCI_STATUS_VAL (1ULL<<63) /* valid error */ |
27 | #define MCI_STATUS_PCC (1UL<<57) /* processor context corrupt */ | 25 | #define MCI_STATUS_OVER (1ULL<<62) /* previous errors lost */ |
26 | #define MCI_STATUS_UC (1ULL<<61) /* uncorrected error */ | ||
27 | #define MCI_STATUS_EN (1ULL<<60) /* error enabled */ | ||
28 | #define MCI_STATUS_MISCV (1ULL<<59) /* misc error reg. valid */ | ||
29 | #define MCI_STATUS_ADDRV (1ULL<<58) /* addr reg. valid */ | ||
30 | #define MCI_STATUS_PCC (1ULL<<57) /* processor context corrupt */ | ||
31 | #define MCI_STATUS_S (1ULL<<56) /* Signaled machine check */ | ||
32 | #define MCI_STATUS_AR (1ULL<<55) /* Action required */ | ||
33 | |||
34 | /* MISC register defines */ | ||
35 | #define MCM_ADDR_SEGOFF 0 /* segment offset */ | ||
36 | #define MCM_ADDR_LINEAR 1 /* linear address */ | ||
37 | #define MCM_ADDR_PHYS 2 /* physical address */ | ||
38 | #define MCM_ADDR_MEM 3 /* memory address */ | ||
39 | #define MCM_ADDR_GENERIC 7 /* generic */ | ||
28 | 40 | ||
29 | /* Fields are zero when not available */ | 41 | /* Fields are zero when not available */ |
30 | struct mce { | 42 | struct mce { |
@@ -34,13 +46,19 @@ struct mce { | |||
34 | __u64 mcgstatus; | 46 | __u64 mcgstatus; |
35 | __u64 ip; | 47 | __u64 ip; |
36 | __u64 tsc; /* cpu time stamp counter */ | 48 | __u64 tsc; /* cpu time stamp counter */ |
37 | __u64 res1; /* for future extension */ | 49 | __u64 time; /* wall time_t when error was detected */ |
38 | __u64 res2; /* dito. */ | 50 | __u8 cpuvendor; /* cpu vendor as encoded in system.h */ |
51 | __u8 pad1; | ||
52 | __u16 pad2; | ||
53 | __u32 cpuid; /* CPUID 1 EAX */ | ||
39 | __u8 cs; /* code segment */ | 54 | __u8 cs; /* code segment */ |
40 | __u8 bank; /* machine check bank */ | 55 | __u8 bank; /* machine check bank */ |
41 | __u8 cpu; /* cpu that raised the error */ | 56 | __u8 cpu; /* cpu number; obsolete; use extcpu now */ |
42 | __u8 finished; /* entry is valid */ | 57 | __u8 finished; /* entry is valid */ |
43 | __u32 pad; | 58 | __u32 extcpu; /* linux cpu number that detected the error */ |
59 | __u32 socketid; /* CPU socket ID */ | ||
60 | __u32 apicid; /* CPU initial apic ID */ | ||
61 | __u64 mcgcap; /* MCGCAP MSR: machine check capabilities of CPU */ | ||
44 | }; | 62 | }; |
45 | 63 | ||
46 | /* | 64 | /* |
@@ -57,7 +75,7 @@ struct mce_log { | |||
57 | unsigned len; /* = MCE_LOG_LEN */ | 75 | unsigned len; /* = MCE_LOG_LEN */ |
58 | unsigned next; | 76 | unsigned next; |
59 | unsigned flags; | 77 | unsigned flags; |
60 | unsigned pad0; | 78 | unsigned recordlen; /* length of struct mce */ |
61 | struct mce entry[MCE_LOG_LEN]; | 79 | struct mce entry[MCE_LOG_LEN]; |
62 | }; | 80 | }; |
63 | 81 | ||
@@ -82,20 +100,41 @@ struct mce_log { | |||
82 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | 100 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) |
83 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | 101 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) |
84 | 102 | ||
85 | #endif /* __x86_64__ */ | ||
86 | |||
87 | #ifdef __KERNEL__ | 103 | #ifdef __KERNEL__ |
88 | 104 | ||
89 | #ifdef CONFIG_X86_32 | 105 | #include <linux/percpu.h> |
106 | #include <linux/init.h> | ||
107 | #include <asm/atomic.h> | ||
108 | |||
90 | extern int mce_disabled; | 109 | extern int mce_disabled; |
91 | #else /* CONFIG_X86_32 */ | 110 | extern int mce_p5_enabled; |
92 | 111 | ||
93 | #include <asm/atomic.h> | 112 | #ifdef CONFIG_X86_MCE |
113 | void mcheck_init(struct cpuinfo_x86 *c); | ||
114 | #else | ||
115 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} | ||
116 | #endif | ||
117 | |||
118 | #ifdef CONFIG_X86_OLD_MCE | ||
119 | extern int nr_mce_banks; | ||
120 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
121 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
122 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
123 | #endif | ||
124 | |||
125 | #ifdef CONFIG_X86_ANCIENT_MCE | ||
126 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
127 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
128 | static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } | ||
129 | #else | ||
130 | static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} | ||
131 | static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | ||
132 | static inline void enable_p5_mce(void) {} | ||
133 | #endif | ||
94 | 134 | ||
95 | void mce_setup(struct mce *m); | 135 | void mce_setup(struct mce *m); |
96 | void mce_log(struct mce *m); | 136 | void mce_log(struct mce *m); |
97 | DECLARE_PER_CPU(struct sys_device, device_mce); | 137 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
98 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
99 | 138 | ||
100 | /* | 139 | /* |
101 | * To support more than 128 would need to escape the predefined | 140 | * To support more than 128 would need to escape the predefined |
@@ -104,6 +143,8 @@ extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | |||
104 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) | 143 | #define MAX_NR_BANKS (MCE_EXTENDED_BANK - 1) |
105 | 144 | ||
106 | #ifdef CONFIG_X86_MCE_INTEL | 145 | #ifdef CONFIG_X86_MCE_INTEL |
146 | extern int mce_cmci_disabled; | ||
147 | extern int mce_ignore_ce; | ||
107 | void mce_intel_feature_init(struct cpuinfo_x86 *c); | 148 | void mce_intel_feature_init(struct cpuinfo_x86 *c); |
108 | void cmci_clear(void); | 149 | void cmci_clear(void); |
109 | void cmci_reenable(void); | 150 | void cmci_reenable(void); |
@@ -123,14 +164,13 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c); | |||
123 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } | 164 | static inline void mce_amd_feature_init(struct cpuinfo_x86 *c) { } |
124 | #endif | 165 | #endif |
125 | 166 | ||
126 | extern int mce_available(struct cpuinfo_x86 *c); | 167 | int mce_available(struct cpuinfo_x86 *c); |
127 | 168 | ||
128 | void mce_log_therm_throt_event(__u64 status); | 169 | DECLARE_PER_CPU(unsigned, mce_exception_count); |
170 | DECLARE_PER_CPU(unsigned, mce_poll_count); | ||
129 | 171 | ||
130 | extern atomic_t mce_entry; | 172 | extern atomic_t mce_entry; |
131 | 173 | ||
132 | extern void do_machine_check(struct pt_regs *, long); | ||
133 | |||
134 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); | 174 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); |
135 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); | 175 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); |
136 | 176 | ||
@@ -139,19 +179,40 @@ enum mcp_flags { | |||
139 | MCP_UC = (1 << 1), /* log uncorrected errors */ | 179 | MCP_UC = (1 << 1), /* log uncorrected errors */ |
140 | MCP_DONTLOG = (1 << 2), /* only clear, don't log */ | 180 | MCP_DONTLOG = (1 << 2), /* only clear, don't log */ |
141 | }; | 181 | }; |
142 | extern void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); | 182 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b); |
143 | 183 | ||
144 | extern int mce_notify_user(void); | 184 | int mce_notify_irq(void); |
185 | void mce_notify_process(void); | ||
145 | 186 | ||
146 | #endif /* !CONFIG_X86_32 */ | 187 | DECLARE_PER_CPU(struct mce, injectm); |
188 | extern struct file_operations mce_chrdev_ops; | ||
147 | 189 | ||
148 | #ifdef CONFIG_X86_MCE | 190 | /* |
149 | extern void mcheck_init(struct cpuinfo_x86 *c); | 191 | * Exception handler |
150 | #else | 192 | */ |
151 | #define mcheck_init(c) do { } while (0) | 193 | |
152 | #endif | 194 | /* Call the installed machine check handler for this CPU setup. */ |
195 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
196 | void do_machine_check(struct pt_regs *, long); | ||
197 | |||
198 | /* | ||
199 | * Threshold handler | ||
200 | */ | ||
153 | 201 | ||
154 | extern void (*mce_threshold_vector)(void); | 202 | extern void (*mce_threshold_vector)(void); |
203 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
204 | |||
205 | /* | ||
206 | * Thermal handler | ||
207 | */ | ||
208 | |||
209 | void intel_init_thermal(struct cpuinfo_x86 *c); | ||
210 | |||
211 | #ifdef CONFIG_X86_NEW_MCE | ||
212 | void mce_log_therm_throt_event(__u64 status); | ||
213 | #else | ||
214 | static inline void mce_log_therm_throt_event(__u64 status) {} | ||
215 | #endif | ||
155 | 216 | ||
156 | #endif /* __KERNEL__ */ | 217 | #endif /* __KERNEL__ */ |
157 | #endif /* _ASM_X86_MCE_H */ | 218 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index c882664716c1..ef51b501e22a 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -9,20 +9,31 @@ struct cpu_signature { | |||
9 | 9 | ||
10 | struct device; | 10 | struct device; |
11 | 11 | ||
12 | enum ucode_state { UCODE_ERROR, UCODE_OK, UCODE_NFOUND }; | ||
13 | |||
12 | struct microcode_ops { | 14 | struct microcode_ops { |
13 | int (*request_microcode_user) (int cpu, const void __user *buf, size_t size); | 15 | enum ucode_state (*request_microcode_user) (int cpu, |
14 | int (*request_microcode_fw) (int cpu, struct device *device); | 16 | const void __user *buf, size_t size); |
15 | 17 | ||
16 | void (*apply_microcode) (int cpu); | 18 | enum ucode_state (*request_microcode_fw) (int cpu, |
19 | struct device *device); | ||
17 | 20 | ||
18 | int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); | ||
19 | void (*microcode_fini_cpu) (int cpu); | 21 | void (*microcode_fini_cpu) (int cpu); |
22 | |||
23 | /* | ||
24 | * The generic 'microcode_core' part guarantees that | ||
25 | * the callbacks below run on a target cpu when they | ||
26 | * are being called. | ||
27 | * See also the "Synchronization" section in microcode_core.c. | ||
28 | */ | ||
29 | int (*apply_microcode) (int cpu); | ||
30 | int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); | ||
20 | }; | 31 | }; |
21 | 32 | ||
22 | struct ucode_cpu_info { | 33 | struct ucode_cpu_info { |
23 | struct cpu_signature cpu_sig; | 34 | struct cpu_signature cpu_sig; |
24 | int valid; | 35 | int valid; |
25 | void *mc; | 36 | void *mc; |
26 | }; | 37 | }; |
27 | extern struct ucode_cpu_info ucode_cpu_info[]; | 38 | extern struct ucode_cpu_info ucode_cpu_info[]; |
28 | 39 | ||
diff --git a/arch/x86/include/asm/mman.h b/arch/x86/include/asm/mman.h index 90bc4108a4fd..751af2550ed9 100644 --- a/arch/x86/include/asm/mman.h +++ b/arch/x86/include/asm/mman.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _ASM_X86_MMAN_H | 1 | #ifndef _ASM_X86_MMAN_H |
2 | #define _ASM_X86_MMAN_H | 2 | #define _ASM_X86_MMAN_H |
3 | 3 | ||
4 | #include <asm-generic/mman.h> | 4 | #include <asm-generic/mman-common.h> |
5 | 5 | ||
6 | #define MAP_32BIT 0x40 /* only give out 32bit addresses */ | 6 | #define MAP_32BIT 0x40 /* only give out 32bit addresses */ |
7 | 7 | ||
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 642fc7fc8cdc..e2a1bb6d71ea 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -61,9 +61,11 @@ extern void get_smp_config(void); | |||
61 | #ifdef CONFIG_X86_MPPARSE | 61 | #ifdef CONFIG_X86_MPPARSE |
62 | extern void find_smp_config(void); | 62 | extern void find_smp_config(void); |
63 | extern void early_reserve_e820_mpc_new(void); | 63 | extern void early_reserve_e820_mpc_new(void); |
64 | extern int enable_update_mptable; | ||
64 | #else | 65 | #else |
65 | static inline void find_smp_config(void) { } | 66 | static inline void find_smp_config(void) { } |
66 | static inline void early_reserve_e820_mpc_new(void) { } | 67 | static inline void early_reserve_e820_mpc_new(void) { } |
68 | #define enable_update_mptable 0 | ||
67 | #endif | 69 | #endif |
68 | 70 | ||
69 | void __cpuinit generic_processor_info(int apicid, int version); | 71 | void __cpuinit generic_processor_info(int apicid, int version); |
@@ -72,20 +74,13 @@ extern void mp_register_ioapic(int id, u32 address, u32 gsi_base); | |||
72 | extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, | 74 | extern void mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, |
73 | u32 gsi); | 75 | u32 gsi); |
74 | extern void mp_config_acpi_legacy_irqs(void); | 76 | extern void mp_config_acpi_legacy_irqs(void); |
75 | extern int mp_register_gsi(u32 gsi, int edge_level, int active_high_low); | 77 | struct device; |
78 | extern int mp_register_gsi(struct device *dev, u32 gsi, int edge_level, | ||
79 | int active_high_low); | ||
76 | extern int acpi_probe_gsi(void); | 80 | extern int acpi_probe_gsi(void); |
77 | #ifdef CONFIG_X86_IO_APIC | 81 | #ifdef CONFIG_X86_IO_APIC |
78 | extern int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, | ||
79 | u32 gsi, int triggering, int polarity); | ||
80 | extern int mp_find_ioapic(int gsi); | 82 | extern int mp_find_ioapic(int gsi); |
81 | extern int mp_find_ioapic_pin(int ioapic, int gsi); | 83 | extern int mp_find_ioapic_pin(int ioapic, int gsi); |
82 | #else | ||
83 | static inline int | ||
84 | mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, | ||
85 | u32 gsi, int triggering, int polarity) | ||
86 | { | ||
87 | return 0; | ||
88 | } | ||
89 | #endif | 84 | #endif |
90 | #else /* !CONFIG_ACPI: */ | 85 | #else /* !CONFIG_ACPI: */ |
91 | static inline int acpi_probe_gsi(void) | 86 | static inline int acpi_probe_gsi(void) |
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ec41fc16c167..1692fb5050e3 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h | |||
@@ -121,7 +121,6 @@ | |||
121 | #define MSR_K8_TOP_MEM1 0xc001001a | 121 | #define MSR_K8_TOP_MEM1 0xc001001a |
122 | #define MSR_K8_TOP_MEM2 0xc001001d | 122 | #define MSR_K8_TOP_MEM2 0xc001001d |
123 | #define MSR_K8_SYSCFG 0xc0010010 | 123 | #define MSR_K8_SYSCFG 0xc0010010 |
124 | #define MSR_K8_HWCR 0xc0010015 | ||
125 | #define MSR_K8_INT_PENDING_MSG 0xc0010055 | 124 | #define MSR_K8_INT_PENDING_MSG 0xc0010055 |
126 | /* C1E active bits in int pending message */ | 125 | /* C1E active bits in int pending message */ |
127 | #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 | 126 | #define K8_INTP_C1E_ACTIVE_MASK 0x18000000 |
@@ -208,7 +207,14 @@ | |||
208 | 207 | ||
209 | #define MSR_IA32_THERM_CONTROL 0x0000019a | 208 | #define MSR_IA32_THERM_CONTROL 0x0000019a |
210 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b | 209 | #define MSR_IA32_THERM_INTERRUPT 0x0000019b |
210 | |||
211 | #define THERM_INT_LOW_ENABLE (1 << 0) | ||
212 | #define THERM_INT_HIGH_ENABLE (1 << 1) | ||
213 | |||
211 | #define MSR_IA32_THERM_STATUS 0x0000019c | 214 | #define MSR_IA32_THERM_STATUS 0x0000019c |
215 | |||
216 | #define THERM_STATUS_PROCHOT (1 << 0) | ||
217 | |||
212 | #define MSR_IA32_MISC_ENABLE 0x000001a0 | 218 | #define MSR_IA32_MISC_ENABLE 0x000001a0 |
213 | 219 | ||
214 | /* MISC_ENABLE bits: architectural */ | 220 | /* MISC_ENABLE bits: architectural */ |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 638bf6241807..48ad9d29484a 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -3,15 +3,23 @@ | |||
3 | 3 | ||
4 | #include <asm/msr-index.h> | 4 | #include <asm/msr-index.h> |
5 | 5 | ||
6 | #ifndef __ASSEMBLY__ | ||
7 | # include <linux/types.h> | ||
8 | #endif | ||
9 | |||
10 | #ifdef __KERNEL__ | 6 | #ifdef __KERNEL__ |
11 | #ifndef __ASSEMBLY__ | 7 | #ifndef __ASSEMBLY__ |
12 | 8 | ||
9 | #include <linux/types.h> | ||
13 | #include <asm/asm.h> | 10 | #include <asm/asm.h> |
14 | #include <asm/errno.h> | 11 | #include <asm/errno.h> |
12 | #include <asm/cpumask.h> | ||
13 | |||
14 | struct msr { | ||
15 | union { | ||
16 | struct { | ||
17 | u32 l; | ||
18 | u32 h; | ||
19 | }; | ||
20 | u64 q; | ||
21 | }; | ||
22 | }; | ||
15 | 23 | ||
16 | static inline unsigned long long native_read_tscp(unsigned int *aux) | 24 | static inline unsigned long long native_read_tscp(unsigned int *aux) |
17 | { | 25 | { |
@@ -216,6 +224,8 @@ do { \ | |||
216 | #ifdef CONFIG_SMP | 224 | #ifdef CONFIG_SMP |
217 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 225 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
218 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); | 226 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); |
227 | void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | ||
228 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs); | ||
219 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 229 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
220 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); | 230 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h); |
221 | #else /* CONFIG_SMP */ | 231 | #else /* CONFIG_SMP */ |
@@ -229,6 +239,16 @@ static inline int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
229 | wrmsr(msr_no, l, h); | 239 | wrmsr(msr_no, l, h); |
230 | return 0; | 240 | return 0; |
231 | } | 241 | } |
242 | static inline void rdmsr_on_cpus(const cpumask_t *m, u32 msr_no, | ||
243 | struct msr *msrs) | ||
244 | { | ||
245 | rdmsr_on_cpu(0, msr_no, &(msrs[0].l), &(msrs[0].h)); | ||
246 | } | ||
247 | static inline void wrmsr_on_cpus(const cpumask_t *m, u32 msr_no, | ||
248 | struct msr *msrs) | ||
249 | { | ||
250 | wrmsr_on_cpu(0, msr_no, msrs[0].l, msrs[0].h); | ||
251 | } | ||
232 | static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, | 252 | static inline int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, |
233 | u32 *l, u32 *h) | 253 | u32 *l, u32 *h) |
234 | { | 254 | { |
@@ -241,6 +261,4 @@ static inline int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | |||
241 | #endif /* CONFIG_SMP */ | 261 | #endif /* CONFIG_SMP */ |
242 | #endif /* __ASSEMBLY__ */ | 262 | #endif /* __ASSEMBLY__ */ |
243 | #endif /* __KERNEL__ */ | 263 | #endif /* __KERNEL__ */ |
244 | |||
245 | |||
246 | #endif /* _ASM_X86_MSR_H */ | 264 | #endif /* _ASM_X86_MSR_H */ |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index c45a0a568dff..c97264409934 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -64,7 +64,7 @@ static inline int nmi_watchdog_active(void) | |||
64 | * but since they are power of two we could use a | 64 | * but since they are power of two we could use a |
65 | * cheaper way --cvg | 65 | * cheaper way --cvg |
66 | */ | 66 | */ |
67 | return nmi_watchdog & 0x3; | 67 | return nmi_watchdog & (NMI_LOCAL_APIC | NMI_IO_APIC); |
68 | } | 68 | } |
69 | #endif | 69 | #endif |
70 | 70 | ||
diff --git a/arch/x86/include/asm/numa_64.h b/arch/x86/include/asm/numa_64.h index 064ed6df4cbe..c4ae822e415f 100644 --- a/arch/x86/include/asm/numa_64.h +++ b/arch/x86/include/asm/numa_64.h | |||
@@ -17,9 +17,6 @@ extern int compute_hash_shift(struct bootnode *nodes, int numblks, | |||
17 | extern void numa_init_array(void); | 17 | extern void numa_init_array(void); |
18 | extern int numa_off; | 18 | extern int numa_off; |
19 | 19 | ||
20 | extern void srat_reserve_add_area(int nodeid); | ||
21 | extern int hotadd_percent; | ||
22 | |||
23 | extern s16 apicid_to_node[MAX_LOCAL_APIC]; | 20 | extern s16 apicid_to_node[MAX_LOCAL_APIC]; |
24 | 21 | ||
25 | extern unsigned long numa_free_all_bootmem(void); | 22 | extern unsigned long numa_free_all_bootmem(void); |
@@ -27,6 +24,13 @@ extern void setup_node_bootmem(int nodeid, unsigned long start, | |||
27 | unsigned long end); | 24 | unsigned long end); |
28 | 25 | ||
29 | #ifdef CONFIG_NUMA | 26 | #ifdef CONFIG_NUMA |
27 | /* | ||
28 | * Too small node sizes may confuse the VM badly. Usually they | ||
29 | * result from BIOS bugs. So dont recognize nodes as standalone | ||
30 | * NUMA entities that have less than this amount of RAM listed: | ||
31 | */ | ||
32 | #define NODE_MIN_SIZE (4*1024*1024) | ||
33 | |||
30 | extern void __init init_cpu_to_node(void); | 34 | extern void __init init_cpu_to_node(void); |
31 | extern void __cpuinit numa_set_node(int cpu, int node); | 35 | extern void __cpuinit numa_set_node(int cpu, int node); |
32 | extern void __cpuinit numa_clear_node(int cpu); | 36 | extern void __cpuinit numa_clear_node(int cpu); |
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 89ed9d70b0aa..625c3f0e741a 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h | |||
@@ -56,7 +56,7 @@ extern bool __virt_addr_valid(unsigned long kaddr); | |||
56 | #endif /* __ASSEMBLY__ */ | 56 | #endif /* __ASSEMBLY__ */ |
57 | 57 | ||
58 | #include <asm-generic/memory_model.h> | 58 | #include <asm-generic/memory_model.h> |
59 | #include <asm-generic/page.h> | 59 | #include <asm-generic/getorder.h> |
60 | 60 | ||
61 | #define __HAVE_ARCH_GATE_AREA 1 | 61 | #define __HAVE_ARCH_GATE_AREA 1 |
62 | 62 | ||
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h index 0f915ae649a7..6f1b7331313f 100644 --- a/arch/x86/include/asm/page_32_types.h +++ b/arch/x86/include/asm/page_32_types.h | |||
@@ -54,10 +54,6 @@ extern unsigned int __VMALLOC_RESERVE; | |||
54 | extern int sysctl_legacy_va_layout; | 54 | extern int sysctl_legacy_va_layout; |
55 | 55 | ||
56 | extern void find_low_pfn_range(void); | 56 | extern void find_low_pfn_range(void); |
57 | extern unsigned long init_memory_mapping(unsigned long start, | ||
58 | unsigned long end); | ||
59 | extern void initmem_init(unsigned long, unsigned long); | ||
60 | extern void free_initmem(void); | ||
61 | extern void setup_bootmem_allocator(void); | 57 | extern void setup_bootmem_allocator(void); |
62 | 58 | ||
63 | #endif /* !__ASSEMBLY__ */ | 59 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index d38c91b70248..7639dbf5d223 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -32,24 +32,16 @@ | |||
32 | */ | 32 | */ |
33 | #define __PAGE_OFFSET _AC(0xffff880000000000, UL) | 33 | #define __PAGE_OFFSET _AC(0xffff880000000000, UL) |
34 | 34 | ||
35 | #define __PHYSICAL_START CONFIG_PHYSICAL_START | 35 | #define __PHYSICAL_START ((CONFIG_PHYSICAL_START + \ |
36 | #define __KERNEL_ALIGN 0x200000 | 36 | (CONFIG_PHYSICAL_ALIGN - 1)) & \ |
37 | 37 | ~(CONFIG_PHYSICAL_ALIGN - 1)) | |
38 | /* | ||
39 | * Make sure kernel is aligned to 2MB address. Catching it at compile | ||
40 | * time is better. Change your config file and compile the kernel | ||
41 | * for a 2MB aligned address (CONFIG_PHYSICAL_START) | ||
42 | */ | ||
43 | #if (CONFIG_PHYSICAL_START % __KERNEL_ALIGN) != 0 | ||
44 | #error "CONFIG_PHYSICAL_START must be a multiple of 2MB" | ||
45 | #endif | ||
46 | 38 | ||
47 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) | 39 | #define __START_KERNEL (__START_KERNEL_map + __PHYSICAL_START) |
48 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) | 40 | #define __START_KERNEL_map _AC(0xffffffff80000000, UL) |
49 | 41 | ||
50 | /* See Documentation/x86_64/mm.txt for a description of the memory map. */ | 42 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ |
51 | #define __PHYSICAL_MASK_SHIFT 46 | 43 | #define __PHYSICAL_MASK_SHIFT 46 |
52 | #define __VIRTUAL_MASK_SHIFT 48 | 44 | #define __VIRTUAL_MASK_SHIFT 47 |
53 | 45 | ||
54 | /* | 46 | /* |
55 | * Kernel image size is limited to 512 MB (see level2_kernel_pgt in | 47 | * Kernel image size is limited to 512 MB (see level2_kernel_pgt in |
@@ -71,12 +63,6 @@ extern unsigned long __phys_addr(unsigned long); | |||
71 | 63 | ||
72 | #define vmemmap ((struct page *)VMEMMAP_START) | 64 | #define vmemmap ((struct page *)VMEMMAP_START) |
73 | 65 | ||
74 | extern unsigned long init_memory_mapping(unsigned long start, | ||
75 | unsigned long end); | ||
76 | |||
77 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); | ||
78 | extern void free_initmem(void); | ||
79 | |||
80 | extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); | 66 | extern void init_extra_mapping_uc(unsigned long phys, unsigned long size); |
81 | extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); | 67 | extern void init_extra_mapping_wb(unsigned long phys, unsigned long size); |
82 | 68 | ||
diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h index 826ad37006ab..6473f5ccff85 100644 --- a/arch/x86/include/asm/page_types.h +++ b/arch/x86/include/asm/page_types.h | |||
@@ -46,6 +46,12 @@ extern int devmem_is_allowed(unsigned long pagenr); | |||
46 | extern unsigned long max_low_pfn_mapped; | 46 | extern unsigned long max_low_pfn_mapped; |
47 | extern unsigned long max_pfn_mapped; | 47 | extern unsigned long max_pfn_mapped; |
48 | 48 | ||
49 | extern unsigned long init_memory_mapping(unsigned long start, | ||
50 | unsigned long end); | ||
51 | |||
52 | extern void initmem_init(unsigned long start_pfn, unsigned long end_pfn); | ||
53 | extern void free_initmem(void); | ||
54 | |||
49 | #endif /* !__ASSEMBLY__ */ | 55 | #endif /* !__ASSEMBLY__ */ |
50 | 56 | ||
51 | #endif /* _ASM_X86_PAGE_DEFS_H */ | 57 | #endif /* _ASM_X86_PAGE_DEFS_H */ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index a53da004e08e..4fb37c8a0832 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -56,6 +56,7 @@ struct desc_ptr; | |||
56 | struct tss_struct; | 56 | struct tss_struct; |
57 | struct mm_struct; | 57 | struct mm_struct; |
58 | struct desc_struct; | 58 | struct desc_struct; |
59 | struct task_struct; | ||
59 | 60 | ||
60 | /* | 61 | /* |
61 | * Wrapper type for pointers to code which uses the non-standard | 62 | * Wrapper type for pointers to code which uses the non-standard |
@@ -203,7 +204,8 @@ struct pv_cpu_ops { | |||
203 | 204 | ||
204 | void (*swapgs)(void); | 205 | void (*swapgs)(void); |
205 | 206 | ||
206 | struct pv_lazy_ops lazy_mode; | 207 | void (*start_context_switch)(struct task_struct *prev); |
208 | void (*end_context_switch)(struct task_struct *next); | ||
207 | }; | 209 | }; |
208 | 210 | ||
209 | struct pv_irq_ops { | 211 | struct pv_irq_ops { |
@@ -1399,25 +1401,23 @@ enum paravirt_lazy_mode { | |||
1399 | }; | 1401 | }; |
1400 | 1402 | ||
1401 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); | 1403 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void); |
1402 | void paravirt_enter_lazy_cpu(void); | 1404 | void paravirt_start_context_switch(struct task_struct *prev); |
1403 | void paravirt_leave_lazy_cpu(void); | 1405 | void paravirt_end_context_switch(struct task_struct *next); |
1406 | |||
1404 | void paravirt_enter_lazy_mmu(void); | 1407 | void paravirt_enter_lazy_mmu(void); |
1405 | void paravirt_leave_lazy_mmu(void); | 1408 | void paravirt_leave_lazy_mmu(void); |
1406 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode); | ||
1407 | 1409 | ||
1408 | #define __HAVE_ARCH_ENTER_LAZY_CPU_MODE | 1410 | #define __HAVE_ARCH_START_CONTEXT_SWITCH |
1409 | static inline void arch_enter_lazy_cpu_mode(void) | 1411 | static inline void arch_start_context_switch(struct task_struct *prev) |
1410 | { | 1412 | { |
1411 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.enter); | 1413 | PVOP_VCALL1(pv_cpu_ops.start_context_switch, prev); |
1412 | } | 1414 | } |
1413 | 1415 | ||
1414 | static inline void arch_leave_lazy_cpu_mode(void) | 1416 | static inline void arch_end_context_switch(struct task_struct *next) |
1415 | { | 1417 | { |
1416 | PVOP_VCALL0(pv_cpu_ops.lazy_mode.leave); | 1418 | PVOP_VCALL1(pv_cpu_ops.end_context_switch, next); |
1417 | } | 1419 | } |
1418 | 1420 | ||
1419 | void arch_flush_lazy_cpu_mode(void); | ||
1420 | |||
1421 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE | 1421 | #define __HAVE_ARCH_ENTER_LAZY_MMU_MODE |
1422 | static inline void arch_enter_lazy_mmu_mode(void) | 1422 | static inline void arch_enter_lazy_mmu_mode(void) |
1423 | { | 1423 | { |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index b51a1e8b0baf..927958d13c19 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -130,6 +130,7 @@ extern void pci_iommu_alloc(void); | |||
130 | 130 | ||
131 | /* generic pci stuff */ | 131 | /* generic pci stuff */ |
132 | #include <asm-generic/pci.h> | 132 | #include <asm-generic/pci.h> |
133 | #define PCIBIOS_MAX_MEM_32 0xffffffff | ||
133 | 134 | ||
134 | #ifdef CONFIG_NUMA | 135 | #ifdef CONFIG_NUMA |
135 | /* Returns the node based on pci bus */ | 136 | /* Returns the node based on pci bus */ |
diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index e60fd3e14bdf..b399988eee3a 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h | |||
@@ -121,6 +121,9 @@ extern int __init pcibios_init(void); | |||
121 | extern int __init pci_mmcfg_arch_init(void); | 121 | extern int __init pci_mmcfg_arch_init(void); |
122 | extern void __init pci_mmcfg_arch_free(void); | 122 | extern void __init pci_mmcfg_arch_free(void); |
123 | 123 | ||
124 | extern struct acpi_mcfg_allocation *pci_mmcfg_config; | ||
125 | extern int pci_mmcfg_config_num; | ||
126 | |||
124 | /* | 127 | /* |
125 | * AMD Fam10h CPUs are buggy, and cannot access MMIO config space | 128 | * AMD Fam10h CPUs are buggy, and cannot access MMIO config space |
126 | * on their northbrige except through the * %eax register. As such, you MUST | 129 | * on their northbrige except through the * %eax register. As such, you MUST |
diff --git a/arch/x86/include/asm/perf_counter.h b/arch/x86/include/asm/perf_counter.h new file mode 100644 index 000000000000..5fb33e160ea0 --- /dev/null +++ b/arch/x86/include/asm/perf_counter.h | |||
@@ -0,0 +1,95 @@ | |||
1 | #ifndef _ASM_X86_PERF_COUNTER_H | ||
2 | #define _ASM_X86_PERF_COUNTER_H | ||
3 | |||
4 | /* | ||
5 | * Performance counter hw details: | ||
6 | */ | ||
7 | |||
8 | #define X86_PMC_MAX_GENERIC 8 | ||
9 | #define X86_PMC_MAX_FIXED 3 | ||
10 | |||
11 | #define X86_PMC_IDX_GENERIC 0 | ||
12 | #define X86_PMC_IDX_FIXED 32 | ||
13 | #define X86_PMC_IDX_MAX 64 | ||
14 | |||
15 | #define MSR_ARCH_PERFMON_PERFCTR0 0xc1 | ||
16 | #define MSR_ARCH_PERFMON_PERFCTR1 0xc2 | ||
17 | |||
18 | #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 | ||
19 | #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 | ||
20 | |||
21 | #define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) | ||
22 | #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) | ||
23 | #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) | ||
24 | #define ARCH_PERFMON_EVENTSEL_USR (1 << 16) | ||
25 | |||
26 | /* | ||
27 | * Includes eventsel and unit mask as well: | ||
28 | */ | ||
29 | #define ARCH_PERFMON_EVENT_MASK 0xffff | ||
30 | |||
31 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c | ||
32 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) | ||
33 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX 0 | ||
34 | #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT \ | ||
35 | (1 << (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX)) | ||
36 | |||
37 | #define ARCH_PERFMON_BRANCH_MISSES_RETIRED 6 | ||
38 | |||
39 | /* | ||
40 | * Intel "Architectural Performance Monitoring" CPUID | ||
41 | * detection/enumeration details: | ||
42 | */ | ||
43 | union cpuid10_eax { | ||
44 | struct { | ||
45 | unsigned int version_id:8; | ||
46 | unsigned int num_counters:8; | ||
47 | unsigned int bit_width:8; | ||
48 | unsigned int mask_length:8; | ||
49 | } split; | ||
50 | unsigned int full; | ||
51 | }; | ||
52 | |||
53 | union cpuid10_edx { | ||
54 | struct { | ||
55 | unsigned int num_counters_fixed:4; | ||
56 | unsigned int reserved:28; | ||
57 | } split; | ||
58 | unsigned int full; | ||
59 | }; | ||
60 | |||
61 | |||
62 | /* | ||
63 | * Fixed-purpose performance counters: | ||
64 | */ | ||
65 | |||
66 | /* | ||
67 | * All 3 fixed-mode PMCs are configured via this single MSR: | ||
68 | */ | ||
69 | #define MSR_ARCH_PERFMON_FIXED_CTR_CTRL 0x38d | ||
70 | |||
71 | /* | ||
72 | * The counts are available in three separate MSRs: | ||
73 | */ | ||
74 | |||
75 | /* Instr_Retired.Any: */ | ||
76 | #define MSR_ARCH_PERFMON_FIXED_CTR0 0x309 | ||
77 | #define X86_PMC_IDX_FIXED_INSTRUCTIONS (X86_PMC_IDX_FIXED + 0) | ||
78 | |||
79 | /* CPU_CLK_Unhalted.Core: */ | ||
80 | #define MSR_ARCH_PERFMON_FIXED_CTR1 0x30a | ||
81 | #define X86_PMC_IDX_FIXED_CPU_CYCLES (X86_PMC_IDX_FIXED + 1) | ||
82 | |||
83 | /* CPU_CLK_Unhalted.Ref: */ | ||
84 | #define MSR_ARCH_PERFMON_FIXED_CTR2 0x30b | ||
85 | #define X86_PMC_IDX_FIXED_BUS_CYCLES (X86_PMC_IDX_FIXED + 2) | ||
86 | |||
87 | #ifdef CONFIG_PERF_COUNTERS | ||
88 | extern void init_hw_perf_counters(void); | ||
89 | extern void perf_counters_lapic_init(void); | ||
90 | #else | ||
91 | static inline void init_hw_perf_counters(void) { } | ||
92 | static inline void perf_counters_lapic_init(void) { } | ||
93 | #endif | ||
94 | |||
95 | #endif /* _ASM_X86_PERF_COUNTER_H */ | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 29d96d168bc0..3cc06e3fceb8 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -81,6 +81,8 @@ static inline void __init paravirt_pagetable_setup_done(pgd_t *base) | |||
81 | #define pte_val(x) native_pte_val(x) | 81 | #define pte_val(x) native_pte_val(x) |
82 | #define __pte(x) native_make_pte(x) | 82 | #define __pte(x) native_make_pte(x) |
83 | 83 | ||
84 | #define arch_end_context_switch(prev) do {} while(0) | ||
85 | |||
84 | #endif /* CONFIG_PARAVIRT */ | 86 | #endif /* CONFIG_PARAVIRT */ |
85 | 87 | ||
86 | /* | 88 | /* |
@@ -315,6 +317,11 @@ static inline int pte_present(pte_t a) | |||
315 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); | 317 | return pte_flags(a) & (_PAGE_PRESENT | _PAGE_PROTNONE); |
316 | } | 318 | } |
317 | 319 | ||
320 | static inline int pte_hidden(pte_t pte) | ||
321 | { | ||
322 | return pte_flags(pte) & _PAGE_HIDDEN; | ||
323 | } | ||
324 | |||
318 | static inline int pmd_present(pmd_t pmd) | 325 | static inline int pmd_present(pmd_t pmd) |
319 | { | 326 | { |
320 | return pmd_flags(pmd) & _PAGE_PRESENT; | 327 | return pmd_flags(pmd) & _PAGE_PRESENT; |
@@ -503,6 +510,8 @@ static inline int pgd_none(pgd_t pgd) | |||
503 | 510 | ||
504 | #ifndef __ASSEMBLY__ | 511 | #ifndef __ASSEMBLY__ |
505 | 512 | ||
513 | extern int direct_gbpages; | ||
514 | |||
506 | /* local pte updates need not use xchg for locking */ | 515 | /* local pte updates need not use xchg for locking */ |
507 | static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) | 516 | static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) |
508 | { | 517 | { |
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 31bd120cf2a2..01fd9461d323 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h | |||
@@ -49,13 +49,17 @@ extern void set_pmd_pfn(unsigned long, unsigned long, pgprot_t); | |||
49 | #endif | 49 | #endif |
50 | 50 | ||
51 | #if defined(CONFIG_HIGHPTE) | 51 | #if defined(CONFIG_HIGHPTE) |
52 | #define __KM_PTE \ | ||
53 | (in_nmi() ? KM_NMI_PTE : \ | ||
54 | in_irq() ? KM_IRQ_PTE : \ | ||
55 | KM_PTE0) | ||
52 | #define pte_offset_map(dir, address) \ | 56 | #define pte_offset_map(dir, address) \ |
53 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE0) + \ | 57 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), __KM_PTE) + \ |
54 | pte_index((address))) | 58 | pte_index((address))) |
55 | #define pte_offset_map_nested(dir, address) \ | 59 | #define pte_offset_map_nested(dir, address) \ |
56 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ | 60 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)), KM_PTE1) + \ |
57 | pte_index((address))) | 61 | pte_index((address))) |
58 | #define pte_unmap(pte) kunmap_atomic((pte), KM_PTE0) | 62 | #define pte_unmap(pte) kunmap_atomic((pte), __KM_PTE) |
59 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) | 63 | #define pte_unmap_nested(pte) kunmap_atomic((pte), KM_PTE1) |
60 | #else | 64 | #else |
61 | #define pte_offset_map(dir, address) \ | 65 | #define pte_offset_map(dir, address) \ |
diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index 2733fad45f98..5e67c1532314 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h | |||
@@ -46,6 +46,10 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */ | |||
46 | # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) | 46 | # define VMALLOC_END (FIXADDR_START - 2 * PAGE_SIZE) |
47 | #endif | 47 | #endif |
48 | 48 | ||
49 | #define MODULES_VADDR VMALLOC_START | ||
50 | #define MODULES_END VMALLOC_END | ||
51 | #define MODULES_LEN (MODULES_VADDR - MODULES_END) | ||
52 | |||
49 | #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) | 53 | #define MAXMEM (VMALLOC_END - PAGE_OFFSET - __VMALLOC_RESERVE) |
50 | 54 | ||
51 | #endif /* _ASM_X86_PGTABLE_32_DEFS_H */ | 55 | #endif /* _ASM_X86_PGTABLE_32_DEFS_H */ |
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 6b87bc6d5018..c57a30117149 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h | |||
@@ -25,10 +25,6 @@ extern pgd_t init_level4_pgt[]; | |||
25 | 25 | ||
26 | extern void paging_init(void); | 26 | extern void paging_init(void); |
27 | 27 | ||
28 | #endif /* !__ASSEMBLY__ */ | ||
29 | |||
30 | #ifndef __ASSEMBLY__ | ||
31 | |||
32 | #define pte_ERROR(e) \ | 28 | #define pte_ERROR(e) \ |
33 | printk("%s:%d: bad pte %p(%016lx).\n", \ | 29 | printk("%s:%d: bad pte %p(%016lx).\n", \ |
34 | __FILE__, __LINE__, &(e), pte_val(e)) | 30 | __FILE__, __LINE__, &(e), pte_val(e)) |
@@ -135,8 +131,6 @@ static inline int pgd_large(pgd_t pgd) { return 0; } | |||
135 | 131 | ||
136 | #define update_mmu_cache(vma, address, pte) do { } while (0) | 132 | #define update_mmu_cache(vma, address, pte) do { } while (0) |
137 | 133 | ||
138 | extern int direct_gbpages; | ||
139 | |||
140 | /* Encode and de-code a swap entry */ | 134 | /* Encode and de-code a swap entry */ |
141 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE | 135 | #if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE |
142 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) | 136 | #define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) |
@@ -171,10 +165,7 @@ extern void cleanup_highmap(void); | |||
171 | 165 | ||
172 | /* fs/proc/kcore.c */ | 166 | /* fs/proc/kcore.c */ |
173 | #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) | 167 | #define kc_vaddr_to_offset(v) ((v) & __VIRTUAL_MASK) |
174 | #define kc_offset_to_vaddr(o) \ | 168 | #define kc_offset_to_vaddr(o) ((o) | ~__VIRTUAL_MASK) |
175 | (((o) & (1UL << (__VIRTUAL_MASK_SHIFT - 1))) \ | ||
176 | ? ((o) | ~__VIRTUAL_MASK) \ | ||
177 | : (o)) | ||
178 | 169 | ||
179 | #define __HAVE_ARCH_PTE_SAME | 170 | #define __HAVE_ARCH_PTE_SAME |
180 | #endif /* !__ASSEMBLY__ */ | 171 | #endif /* !__ASSEMBLY__ */ |
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index fbf42b8e0383..766ea16fbbbd 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -51,11 +51,11 @@ typedef struct { pteval_t pte; } pte_t; | |||
51 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) | 51 | #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) |
52 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) | 52 | #define PGDIR_MASK (~(PGDIR_SIZE - 1)) |
53 | 53 | ||
54 | 54 | /* See Documentation/x86/x86_64/mm.txt for a description of the memory map. */ | |
55 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) | 55 | #define MAXMEM _AC(__AC(1, UL) << MAX_PHYSMEM_BITS, UL) |
56 | #define VMALLOC_START _AC(0xffffc20000000000, UL) | 56 | #define VMALLOC_START _AC(0xffffc90000000000, UL) |
57 | #define VMALLOC_END _AC(0xffffe1ffffffffff, UL) | 57 | #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) |
58 | #define VMEMMAP_START _AC(0xffffe20000000000, UL) | 58 | #define VMEMMAP_START _AC(0xffffea0000000000, UL) |
59 | #define MODULES_VADDR _AC(0xffffffffa0000000, UL) | 59 | #define MODULES_VADDR _AC(0xffffffffa0000000, UL) |
60 | #define MODULES_END _AC(0xffffffffff000000, UL) | 60 | #define MODULES_END _AC(0xffffffffff000000, UL) |
61 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | 61 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) |
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b8238dc8786d..54cb697f4900 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -18,7 +18,7 @@ | |||
18 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ | 18 | #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ |
19 | #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ | 19 | #define _PAGE_BIT_UNUSED1 9 /* available for programmer */ |
20 | #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ | 20 | #define _PAGE_BIT_IOMAP 10 /* flag used to indicate IO mapping */ |
21 | #define _PAGE_BIT_UNUSED3 11 | 21 | #define _PAGE_BIT_HIDDEN 11 /* hidden by kmemcheck */ |
22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ | 22 | #define _PAGE_BIT_PAT_LARGE 12 /* On 2MB or 1GB pages */ |
23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 | 23 | #define _PAGE_BIT_SPECIAL _PAGE_BIT_UNUSED1 |
24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 | 24 | #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 |
@@ -41,13 +41,18 @@ | |||
41 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) | 41 | #define _PAGE_GLOBAL (_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL) |
42 | #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) | 42 | #define _PAGE_UNUSED1 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED1) |
43 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) | 43 | #define _PAGE_IOMAP (_AT(pteval_t, 1) << _PAGE_BIT_IOMAP) |
44 | #define _PAGE_UNUSED3 (_AT(pteval_t, 1) << _PAGE_BIT_UNUSED3) | ||
45 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) | 44 | #define _PAGE_PAT (_AT(pteval_t, 1) << _PAGE_BIT_PAT) |
46 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) | 45 | #define _PAGE_PAT_LARGE (_AT(pteval_t, 1) << _PAGE_BIT_PAT_LARGE) |
47 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) | 46 | #define _PAGE_SPECIAL (_AT(pteval_t, 1) << _PAGE_BIT_SPECIAL) |
48 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) | 47 | #define _PAGE_CPA_TEST (_AT(pteval_t, 1) << _PAGE_BIT_CPA_TEST) |
49 | #define __HAVE_ARCH_PTE_SPECIAL | 48 | #define __HAVE_ARCH_PTE_SPECIAL |
50 | 49 | ||
50 | #ifdef CONFIG_KMEMCHECK | ||
51 | #define _PAGE_HIDDEN (_AT(pteval_t, 1) << _PAGE_BIT_HIDDEN) | ||
52 | #else | ||
53 | #define _PAGE_HIDDEN (_AT(pteval_t, 0)) | ||
54 | #endif | ||
55 | |||
51 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | 56 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
52 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) | 57 | #define _PAGE_NX (_AT(pteval_t, 1) << _PAGE_BIT_NX) |
53 | #else | 58 | #else |
@@ -273,7 +278,6 @@ typedef struct page *pgtable_t; | |||
273 | 278 | ||
274 | extern pteval_t __supported_pte_mask; | 279 | extern pteval_t __supported_pte_mask; |
275 | extern int nx_enabled; | 280 | extern int nx_enabled; |
276 | extern void set_nx(void); | ||
277 | 281 | ||
278 | #define pgprot_writecombine pgprot_writecombine | 282 | #define pgprot_writecombine pgprot_writecombine |
279 | extern pgprot_t pgprot_writecombine(pgprot_t prot); | 283 | extern pgprot_t pgprot_writecombine(pgprot_t prot); |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index c2cceae709c8..c7768269b1cf 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -135,7 +135,8 @@ extern struct cpuinfo_x86 boot_cpu_data; | |||
135 | extern struct cpuinfo_x86 new_cpu_data; | 135 | extern struct cpuinfo_x86 new_cpu_data; |
136 | 136 | ||
137 | extern struct tss_struct doublefault_tss; | 137 | extern struct tss_struct doublefault_tss; |
138 | extern __u32 cleared_cpu_caps[NCAPINTS]; | 138 | extern __u32 cpu_caps_cleared[NCAPINTS]; |
139 | extern __u32 cpu_caps_set[NCAPINTS]; | ||
139 | 140 | ||
140 | #ifdef CONFIG_SMP | 141 | #ifdef CONFIG_SMP |
141 | DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); | 142 | DECLARE_PER_CPU_SHARED_ALIGNED(struct cpuinfo_x86, cpu_info); |
@@ -409,9 +410,6 @@ DECLARE_PER_CPU(unsigned long, stack_canary); | |||
409 | extern unsigned int xstate_size; | 410 | extern unsigned int xstate_size; |
410 | extern void free_thread_xstate(struct task_struct *); | 411 | extern void free_thread_xstate(struct task_struct *); |
411 | extern struct kmem_cache *task_xstate_cachep; | 412 | extern struct kmem_cache *task_xstate_cachep; |
412 | extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); | ||
413 | extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); | ||
414 | extern unsigned short num_cache_leaves; | ||
415 | 413 | ||
416 | struct thread_struct { | 414 | struct thread_struct { |
417 | /* Cached TLS descriptors: */ | 415 | /* Cached TLS descriptors: */ |
@@ -427,8 +425,12 @@ struct thread_struct { | |||
427 | unsigned short fsindex; | 425 | unsigned short fsindex; |
428 | unsigned short gsindex; | 426 | unsigned short gsindex; |
429 | #endif | 427 | #endif |
428 | #ifdef CONFIG_X86_32 | ||
430 | unsigned long ip; | 429 | unsigned long ip; |
430 | #endif | ||
431 | #ifdef CONFIG_X86_64 | ||
431 | unsigned long fs; | 432 | unsigned long fs; |
433 | #endif | ||
432 | unsigned long gs; | 434 | unsigned long gs; |
433 | /* Hardware debugging registers: */ | 435 | /* Hardware debugging registers: */ |
434 | unsigned long debugreg0; | 436 | unsigned long debugreg0; |
@@ -460,14 +462,8 @@ struct thread_struct { | |||
460 | unsigned io_bitmap_max; | 462 | unsigned io_bitmap_max; |
461 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ | 463 | /* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ |
462 | unsigned long debugctlmsr; | 464 | unsigned long debugctlmsr; |
463 | #ifdef CONFIG_X86_DS | 465 | /* Debug Store context; see asm/ds.h */ |
464 | /* Debug Store context; see include/asm-x86/ds.h; goes into MSR_IA32_DS_AREA */ | ||
465 | struct ds_context *ds_ctx; | 466 | struct ds_context *ds_ctx; |
466 | #endif /* CONFIG_X86_DS */ | ||
467 | #ifdef CONFIG_X86_PTRACE_BTS | ||
468 | /* the signal to send on a bts buffer overflow */ | ||
469 | unsigned int bts_ovfl_signal; | ||
470 | #endif /* CONFIG_X86_PTRACE_BTS */ | ||
471 | }; | 467 | }; |
472 | 468 | ||
473 | static inline unsigned long native_get_debugreg(int regno) | 469 | static inline unsigned long native_get_debugreg(int regno) |
@@ -795,6 +791,21 @@ static inline unsigned long get_debugctlmsr(void) | |||
795 | return debugctlmsr; | 791 | return debugctlmsr; |
796 | } | 792 | } |
797 | 793 | ||
794 | static inline unsigned long get_debugctlmsr_on_cpu(int cpu) | ||
795 | { | ||
796 | u64 debugctlmsr = 0; | ||
797 | u32 val1, val2; | ||
798 | |||
799 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
800 | if (boot_cpu_data.x86 < 6) | ||
801 | return 0; | ||
802 | #endif | ||
803 | rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); | ||
804 | debugctlmsr = val1 | ((u64)val2 << 32); | ||
805 | |||
806 | return debugctlmsr; | ||
807 | } | ||
808 | |||
798 | static inline void update_debugctlmsr(unsigned long debugctlmsr) | 809 | static inline void update_debugctlmsr(unsigned long debugctlmsr) |
799 | { | 810 | { |
800 | #ifndef CONFIG_X86_DEBUGCTLMSR | 811 | #ifndef CONFIG_X86_DEBUGCTLMSR |
@@ -804,6 +815,18 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) | |||
804 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); | 815 | wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); |
805 | } | 816 | } |
806 | 817 | ||
818 | static inline void update_debugctlmsr_on_cpu(int cpu, | ||
819 | unsigned long debugctlmsr) | ||
820 | { | ||
821 | #ifndef CONFIG_X86_DEBUGCTLMSR | ||
822 | if (boot_cpu_data.x86 < 6) | ||
823 | return; | ||
824 | #endif | ||
825 | wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, | ||
826 | (u32)((u64)debugctlmsr), | ||
827 | (u32)((u64)debugctlmsr >> 32)); | ||
828 | } | ||
829 | |||
807 | /* | 830 | /* |
808 | * from system description table in BIOS. Mostly for MCA use, but | 831 | * from system description table in BIOS. Mostly for MCA use, but |
809 | * others may find it useful: | 832 | * others may find it useful: |
@@ -814,6 +837,7 @@ extern unsigned int BIOS_revision; | |||
814 | 837 | ||
815 | /* Boot loader type from the setup header: */ | 838 | /* Boot loader type from the setup header: */ |
816 | extern int bootloader_type; | 839 | extern int bootloader_type; |
840 | extern int bootloader_version; | ||
817 | 841 | ||
818 | extern char ignore_fpu_irq; | 842 | extern char ignore_fpu_irq; |
819 | 843 | ||
@@ -874,7 +898,6 @@ static inline void spin_lock_prefetch(const void *x) | |||
874 | .vm86_info = NULL, \ | 898 | .vm86_info = NULL, \ |
875 | .sysenter_cs = __KERNEL_CS, \ | 899 | .sysenter_cs = __KERNEL_CS, \ |
876 | .io_bitmap_ptr = NULL, \ | 900 | .io_bitmap_ptr = NULL, \ |
877 | .fs = __KERNEL_PERCPU, \ | ||
878 | } | 901 | } |
879 | 902 | ||
880 | /* | 903 | /* |
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 624f133943ed..0f0d908349aa 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -236,12 +236,11 @@ extern int do_get_thread_area(struct task_struct *p, int idx, | |||
236 | extern int do_set_thread_area(struct task_struct *p, int idx, | 236 | extern int do_set_thread_area(struct task_struct *p, int idx, |
237 | struct user_desc __user *info, int can_allocate); | 237 | struct user_desc __user *info, int can_allocate); |
238 | 238 | ||
239 | extern void x86_ptrace_untrace(struct task_struct *); | 239 | #ifdef CONFIG_X86_PTRACE_BTS |
240 | extern void x86_ptrace_fork(struct task_struct *child, | 240 | extern void ptrace_bts_untrace(struct task_struct *tsk); |
241 | unsigned long clone_flags); | ||
242 | 241 | ||
243 | #define arch_ptrace_untrace(tsk) x86_ptrace_untrace(tsk) | 242 | #define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) |
244 | #define arch_ptrace_fork(child, flags) x86_ptrace_fork(child, flags) | 243 | #endif /* CONFIG_X86_PTRACE_BTS */ |
245 | 244 | ||
246 | #endif /* __KERNEL__ */ | 245 | #endif /* __KERNEL__ */ |
247 | 246 | ||
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index a4737dddfd58..64cf2d24fad1 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h | |||
@@ -48,9 +48,15 @@ | |||
48 | #endif | 48 | #endif |
49 | 49 | ||
50 | #ifdef CONFIG_X86_64 | 50 | #ifdef CONFIG_X86_64 |
51 | #ifdef CONFIG_PARAVIRT | ||
52 | /* Paravirtualized systems may not have PSE or PGE available */ | ||
51 | #define NEED_PSE 0 | 53 | #define NEED_PSE 0 |
52 | #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) | ||
53 | #define NEED_PGE 0 | 54 | #define NEED_PGE 0 |
55 | #else | ||
56 | #define NEED_PSE (1<<(X86_FEATURE_PSE) & 31) | ||
57 | #define NEED_PGE (1<<(X86_FEATURE_PGE) & 31) | ||
58 | #endif | ||
59 | #define NEED_MSR (1<<(X86_FEATURE_MSR & 31)) | ||
54 | #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) | 60 | #define NEED_FXSR (1<<(X86_FEATURE_FXSR & 31)) |
55 | #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) | 61 | #define NEED_XMM (1<<(X86_FEATURE_XMM & 31)) |
56 | #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) | 62 | #define NEED_XMM2 (1<<(X86_FEATURE_XMM2 & 31)) |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index bdc2ada05ae0..4093d1ed6db2 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -33,7 +33,6 @@ struct x86_quirks { | |||
33 | int (*setup_ioapic_ids)(void); | 33 | int (*setup_ioapic_ids)(void); |
34 | }; | 34 | }; |
35 | 35 | ||
36 | extern void x86_quirk_pre_intr_init(void); | ||
37 | extern void x86_quirk_intr_init(void); | 36 | extern void x86_quirk_intr_init(void); |
38 | 37 | ||
39 | extern void x86_quirk_trap_init(void); | 38 | extern void x86_quirk_trap_init(void); |
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 7761a5d554bb..598457cbd0f8 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h | |||
@@ -117,7 +117,7 @@ typedef unsigned long sigset_t; | |||
117 | #define MINSIGSTKSZ 2048 | 117 | #define MINSIGSTKSZ 2048 |
118 | #define SIGSTKSZ 8192 | 118 | #define SIGSTKSZ 8192 |
119 | 119 | ||
120 | #include <asm-generic/signal.h> | 120 | #include <asm-generic/signal-defs.h> |
121 | 121 | ||
122 | #ifndef __ASSEMBLY__ | 122 | #ifndef __ASSEMBLY__ |
123 | 123 | ||
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 19e0d88b966d..6a84ed166aec 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -180,7 +180,7 @@ extern int safe_smp_processor_id(void); | |||
180 | static inline int logical_smp_processor_id(void) | 180 | static inline int logical_smp_processor_id(void) |
181 | { | 181 | { |
182 | /* we don't want to mark this access volatile - bad code generation */ | 182 | /* we don't want to mark this access volatile - bad code generation */ |
183 | return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); | 183 | return GET_APIC_LOGICAL_ID(apic_read(APIC_LDR)); |
184 | } | 184 | } |
185 | 185 | ||
186 | #endif | 186 | #endif |
diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index e3cc3c063ec5..4517d6b93188 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h | |||
@@ -27,7 +27,7 @@ | |||
27 | #else /* CONFIG_X86_32 */ | 27 | #else /* CONFIG_X86_32 */ |
28 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ | 28 | # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ |
29 | # define MAX_PHYSADDR_BITS 44 | 29 | # define MAX_PHYSADDR_BITS 44 |
30 | # define MAX_PHYSMEM_BITS 44 /* Can be max 45 bits */ | 30 | # define MAX_PHYSMEM_BITS 46 |
31 | #endif | 31 | #endif |
32 | 32 | ||
33 | #endif /* CONFIG_SPARSEMEM */ | 33 | #endif /* CONFIG_SPARSEMEM */ |
diff --git a/arch/x86/include/asm/string_32.h b/arch/x86/include/asm/string_32.h index 0e0e3ba827f7..c86f452256de 100644 --- a/arch/x86/include/asm/string_32.h +++ b/arch/x86/include/asm/string_32.h | |||
@@ -177,10 +177,18 @@ static inline void *__memcpy3d(void *to, const void *from, size_t len) | |||
177 | * No 3D Now! | 177 | * No 3D Now! |
178 | */ | 178 | */ |
179 | 179 | ||
180 | #ifndef CONFIG_KMEMCHECK | ||
180 | #define memcpy(t, f, n) \ | 181 | #define memcpy(t, f, n) \ |
181 | (__builtin_constant_p((n)) \ | 182 | (__builtin_constant_p((n)) \ |
182 | ? __constant_memcpy((t), (f), (n)) \ | 183 | ? __constant_memcpy((t), (f), (n)) \ |
183 | : __memcpy((t), (f), (n))) | 184 | : __memcpy((t), (f), (n))) |
185 | #else | ||
186 | /* | ||
187 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
188 | * because it means that we know both memory operands in advance. | ||
189 | */ | ||
190 | #define memcpy(t, f, n) __memcpy((t), (f), (n)) | ||
191 | #endif | ||
184 | 192 | ||
185 | #endif | 193 | #endif |
186 | 194 | ||
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 2afe164bf1e6..19e2c468fc2c 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h | |||
@@ -27,6 +27,7 @@ static __always_inline void *__inline_memcpy(void *to, const void *from, size_t | |||
27 | function. */ | 27 | function. */ |
28 | 28 | ||
29 | #define __HAVE_ARCH_MEMCPY 1 | 29 | #define __HAVE_ARCH_MEMCPY 1 |
30 | #ifndef CONFIG_KMEMCHECK | ||
30 | #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 | 31 | #if (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) || __GNUC__ > 4 |
31 | extern void *memcpy(void *to, const void *from, size_t len); | 32 | extern void *memcpy(void *to, const void *from, size_t len); |
32 | #else | 33 | #else |
@@ -42,6 +43,13 @@ extern void *__memcpy(void *to, const void *from, size_t len); | |||
42 | __ret; \ | 43 | __ret; \ |
43 | }) | 44 | }) |
44 | #endif | 45 | #endif |
46 | #else | ||
47 | /* | ||
48 | * kmemcheck becomes very happy if we use the REP instructions unconditionally, | ||
49 | * because it means that we know both memory operands in advance. | ||
50 | */ | ||
51 | #define memcpy(dst, src, len) __inline_memcpy((dst), (src), (len)) | ||
52 | #endif | ||
45 | 53 | ||
46 | #define __HAVE_ARCH_MEMSET | 54 | #define __HAVE_ARCH_MEMSET |
47 | void *memset(void *s, int c, size_t n); | 55 | void *memset(void *s, int c, size_t n); |
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 82ada75f3ebf..85574b7c1bc1 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h | |||
@@ -225,6 +225,7 @@ struct __attribute__ ((__packed__)) vmcb { | |||
225 | #define SVM_EVTINJ_VALID_ERR (1 << 11) | 225 | #define SVM_EVTINJ_VALID_ERR (1 << 11) |
226 | 226 | ||
227 | #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK | 227 | #define SVM_EXITINTINFO_VEC_MASK SVM_EVTINJ_VEC_MASK |
228 | #define SVM_EXITINTINFO_TYPE_MASK SVM_EVTINJ_TYPE_MASK | ||
228 | 229 | ||
229 | #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR | 230 | #define SVM_EXITINTINFO_TYPE_INTR SVM_EVTINJ_TYPE_INTR |
230 | #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI | 231 | #define SVM_EXITINTINFO_TYPE_NMI SVM_EVTINJ_TYPE_NMI |
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h index 7043408f6904..372b76edd63f 100644 --- a/arch/x86/include/asm/syscalls.h +++ b/arch/x86/include/asm/syscalls.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * syscalls.h - Linux syscall interfaces (arch-specific) | 2 | * syscalls.h - Linux syscall interfaces (arch-specific) |
3 | * | 3 | * |
4 | * Copyright (c) 2008 Jaswinder Singh | 4 | * Copyright (c) 2008 Jaswinder Singh Rajput |
5 | * | 5 | * |
6 | * This file is released under the GPLv2. | 6 | * This file is released under the GPLv2. |
7 | * See the file COPYING for more details. | 7 | * See the file COPYING for more details. |
@@ -12,50 +12,55 @@ | |||
12 | 12 | ||
13 | #include <linux/compiler.h> | 13 | #include <linux/compiler.h> |
14 | #include <linux/linkage.h> | 14 | #include <linux/linkage.h> |
15 | #include <linux/types.h> | ||
16 | #include <linux/signal.h> | 15 | #include <linux/signal.h> |
16 | #include <linux/types.h> | ||
17 | 17 | ||
18 | /* Common in X86_32 and X86_64 */ | 18 | /* Common in X86_32 and X86_64 */ |
19 | /* kernel/ioport.c */ | 19 | /* kernel/ioport.c */ |
20 | asmlinkage long sys_ioperm(unsigned long, unsigned long, int); | 20 | asmlinkage long sys_ioperm(unsigned long, unsigned long, int); |
21 | 21 | ||
22 | /* kernel/process.c */ | ||
23 | int sys_fork(struct pt_regs *); | ||
24 | int sys_vfork(struct pt_regs *); | ||
25 | |||
22 | /* kernel/ldt.c */ | 26 | /* kernel/ldt.c */ |
23 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); | 27 | asmlinkage int sys_modify_ldt(int, void __user *, unsigned long); |
24 | 28 | ||
29 | /* kernel/signal.c */ | ||
30 | long sys_rt_sigreturn(struct pt_regs *); | ||
31 | |||
25 | /* kernel/tls.c */ | 32 | /* kernel/tls.c */ |
26 | asmlinkage int sys_set_thread_area(struct user_desc __user *); | 33 | asmlinkage int sys_set_thread_area(struct user_desc __user *); |
27 | asmlinkage int sys_get_thread_area(struct user_desc __user *); | 34 | asmlinkage int sys_get_thread_area(struct user_desc __user *); |
28 | 35 | ||
29 | /* X86_32 only */ | 36 | /* X86_32 only */ |
30 | #ifdef CONFIG_X86_32 | 37 | #ifdef CONFIG_X86_32 |
38 | /* kernel/ioport.c */ | ||
39 | long sys_iopl(struct pt_regs *); | ||
40 | |||
31 | /* kernel/process_32.c */ | 41 | /* kernel/process_32.c */ |
32 | int sys_fork(struct pt_regs *); | ||
33 | int sys_clone(struct pt_regs *); | 42 | int sys_clone(struct pt_regs *); |
34 | int sys_vfork(struct pt_regs *); | ||
35 | int sys_execve(struct pt_regs *); | 43 | int sys_execve(struct pt_regs *); |
36 | 44 | ||
37 | /* kernel/signal_32.c */ | 45 | /* kernel/signal.c */ |
38 | asmlinkage int sys_sigsuspend(int, int, old_sigset_t); | 46 | asmlinkage int sys_sigsuspend(int, int, old_sigset_t); |
39 | asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, | 47 | asmlinkage int sys_sigaction(int, const struct old_sigaction __user *, |
40 | struct old_sigaction __user *); | 48 | struct old_sigaction __user *); |
41 | int sys_sigaltstack(struct pt_regs *); | 49 | int sys_sigaltstack(struct pt_regs *); |
42 | unsigned long sys_sigreturn(struct pt_regs *); | 50 | unsigned long sys_sigreturn(struct pt_regs *); |
43 | long sys_rt_sigreturn(struct pt_regs *); | ||
44 | |||
45 | /* kernel/ioport.c */ | ||
46 | long sys_iopl(struct pt_regs *); | ||
47 | 51 | ||
48 | /* kernel/sys_i386_32.c */ | 52 | /* kernel/sys_i386_32.c */ |
53 | struct mmap_arg_struct; | ||
54 | struct sel_arg_struct; | ||
55 | struct oldold_utsname; | ||
56 | struct old_utsname; | ||
57 | |||
49 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, | 58 | asmlinkage long sys_mmap2(unsigned long, unsigned long, unsigned long, |
50 | unsigned long, unsigned long, unsigned long); | 59 | unsigned long, unsigned long, unsigned long); |
51 | struct mmap_arg_struct; | ||
52 | asmlinkage int old_mmap(struct mmap_arg_struct __user *); | 60 | asmlinkage int old_mmap(struct mmap_arg_struct __user *); |
53 | struct sel_arg_struct; | ||
54 | asmlinkage int old_select(struct sel_arg_struct __user *); | 61 | asmlinkage int old_select(struct sel_arg_struct __user *); |
55 | asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); | 62 | asmlinkage int sys_ipc(uint, int, int, int, void __user *, long); |
56 | struct old_utsname; | ||
57 | asmlinkage int sys_uname(struct old_utsname __user *); | 63 | asmlinkage int sys_uname(struct old_utsname __user *); |
58 | struct oldold_utsname; | ||
59 | asmlinkage int sys_olduname(struct oldold_utsname __user *); | 64 | asmlinkage int sys_olduname(struct oldold_utsname __user *); |
60 | 65 | ||
61 | /* kernel/vm86_32.c */ | 66 | /* kernel/vm86_32.c */ |
@@ -65,29 +70,27 @@ int sys_vm86(struct pt_regs *); | |||
65 | #else /* CONFIG_X86_32 */ | 70 | #else /* CONFIG_X86_32 */ |
66 | 71 | ||
67 | /* X86_64 only */ | 72 | /* X86_64 only */ |
73 | /* kernel/ioport.c */ | ||
74 | asmlinkage long sys_iopl(unsigned int, struct pt_regs *); | ||
75 | |||
68 | /* kernel/process_64.c */ | 76 | /* kernel/process_64.c */ |
69 | asmlinkage long sys_fork(struct pt_regs *); | ||
70 | asmlinkage long sys_clone(unsigned long, unsigned long, | 77 | asmlinkage long sys_clone(unsigned long, unsigned long, |
71 | void __user *, void __user *, | 78 | void __user *, void __user *, |
72 | struct pt_regs *); | 79 | struct pt_regs *); |
73 | asmlinkage long sys_vfork(struct pt_regs *); | ||
74 | asmlinkage long sys_execve(char __user *, char __user * __user *, | 80 | asmlinkage long sys_execve(char __user *, char __user * __user *, |
75 | char __user * __user *, | 81 | char __user * __user *, |
76 | struct pt_regs *); | 82 | struct pt_regs *); |
77 | long sys_arch_prctl(int, unsigned long); | 83 | long sys_arch_prctl(int, unsigned long); |
78 | 84 | ||
79 | /* kernel/ioport.c */ | 85 | /* kernel/signal.c */ |
80 | asmlinkage long sys_iopl(unsigned int, struct pt_regs *); | ||
81 | |||
82 | /* kernel/signal_64.c */ | ||
83 | asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, | 86 | asmlinkage long sys_sigaltstack(const stack_t __user *, stack_t __user *, |
84 | struct pt_regs *); | 87 | struct pt_regs *); |
85 | long sys_rt_sigreturn(struct pt_regs *); | ||
86 | 88 | ||
87 | /* kernel/sys_x86_64.c */ | 89 | /* kernel/sys_x86_64.c */ |
90 | struct new_utsname; | ||
91 | |||
88 | asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, | 92 | asmlinkage long sys_mmap(unsigned long, unsigned long, unsigned long, |
89 | unsigned long, unsigned long, unsigned long); | 93 | unsigned long, unsigned long, unsigned long); |
90 | struct new_utsname; | ||
91 | asmlinkage long sys_uname(struct new_utsname __user *); | 94 | asmlinkage long sys_uname(struct new_utsname __user *); |
92 | 95 | ||
93 | #endif /* CONFIG_X86_32 */ | 96 | #endif /* CONFIG_X86_32 */ |
diff --git a/arch/x86/include/asm/termios.h b/arch/x86/include/asm/termios.h index f72956331c49..c4ee8056baca 100644 --- a/arch/x86/include/asm/termios.h +++ b/arch/x86/include/asm/termios.h | |||
@@ -67,6 +67,7 @@ static inline int user_termio_to_kernel_termios(struct ktermios *termios, | |||
67 | SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); | 67 | SET_LOW_TERMIOS_BITS(termios, termio, c_oflag); |
68 | SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); | 68 | SET_LOW_TERMIOS_BITS(termios, termio, c_cflag); |
69 | SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); | 69 | SET_LOW_TERMIOS_BITS(termios, termio, c_lflag); |
70 | get_user(termios->c_line, &termio->c_line); | ||
70 | return copy_from_user(termios->c_cc, termio->c_cc, NCC); | 71 | return copy_from_user(termios->c_cc, termio->c_cc, NCC); |
71 | } | 72 | } |
72 | 73 | ||
diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h deleted file mode 100644 index c62349ee7860..000000000000 --- a/arch/x86/include/asm/therm_throt.h +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | #ifndef _ASM_X86_THERM_THROT_H | ||
2 | #define _ASM_X86_THERM_THROT_H | ||
3 | |||
4 | #include <asm/atomic.h> | ||
5 | |||
6 | extern atomic_t therm_throt_en; | ||
7 | int therm_throt_process(int curr); | ||
8 | |||
9 | #endif /* _ASM_X86_THERM_THROT_H */ | ||
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 8820a73ae090..b0783520988b 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -94,7 +94,8 @@ struct thread_info { | |||
94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ | 94 | #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ |
95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ | 95 | #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ |
96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ | 96 | #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ |
97 | #define TIF_SYSCALL_FTRACE 27 /* for ftrace syscall instrumentation */ | 97 | #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ |
98 | #define TIF_SYSCALL_FTRACE 28 /* for ftrace syscall instrumentation */ | ||
98 | 99 | ||
99 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) | 100 | #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) |
100 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) | 101 | #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) |
@@ -116,6 +117,7 @@ struct thread_info { | |||
116 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) | 117 | #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) |
117 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) | 118 | #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) |
118 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) | 119 | #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) |
120 | #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) | ||
119 | #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) | 121 | #define _TIF_SYSCALL_FTRACE (1 << TIF_SYSCALL_FTRACE) |
120 | 122 | ||
121 | /* work to do in syscall_trace_enter() */ | 123 | /* work to do in syscall_trace_enter() */ |
@@ -152,9 +154,9 @@ struct thread_info { | |||
152 | 154 | ||
153 | /* thread information allocation */ | 155 | /* thread information allocation */ |
154 | #ifdef CONFIG_DEBUG_STACK_USAGE | 156 | #ifdef CONFIG_DEBUG_STACK_USAGE |
155 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_ZERO) | 157 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO) |
156 | #else | 158 | #else |
157 | #define THREAD_FLAGS GFP_KERNEL | 159 | #define THREAD_FLAGS (GFP_KERNEL | __GFP_NOTRACK) |
158 | #endif | 160 | #endif |
159 | 161 | ||
160 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR | 162 | #define __HAVE_ARCH_THREAD_INFO_ALLOCATOR |
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index bd37ed444a21..20ca9c4d4686 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -45,12 +45,16 @@ extern int no_timer_check; | |||
45 | */ | 45 | */ |
46 | 46 | ||
47 | DECLARE_PER_CPU(unsigned long, cyc2ns); | 47 | DECLARE_PER_CPU(unsigned long, cyc2ns); |
48 | DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); | ||
48 | 49 | ||
49 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 50 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
50 | 51 | ||
51 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) | 52 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) |
52 | { | 53 | { |
53 | return cyc * per_cpu(cyc2ns, smp_processor_id()) >> CYC2NS_SCALE_FACTOR; | 54 | int cpu = smp_processor_id(); |
55 | unsigned long long ns = per_cpu(cyc2ns_offset, cpu); | ||
56 | ns += cyc * per_cpu(cyc2ns, cpu) >> CYC2NS_SCALE_FACTOR; | ||
57 | return ns; | ||
54 | } | 58 | } |
55 | 59 | ||
56 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | 60 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) |
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h index b5c9d45c981f..1375cfc93960 100644 --- a/arch/x86/include/asm/timex.h +++ b/arch/x86/include/asm/timex.h | |||
@@ -4,9 +4,7 @@ | |||
4 | #include <asm/processor.h> | 4 | #include <asm/processor.h> |
5 | #include <asm/tsc.h> | 5 | #include <asm/tsc.h> |
6 | 6 | ||
7 | /* The PIT ticks at this frequency (in HZ): */ | 7 | /* Assume we use the PIT time source for the clock tick */ |
8 | #define PIT_TICK_RATE 1193182 | ||
9 | |||
10 | #define CLOCK_TICK_RATE PIT_TICK_RATE | 8 | #define CLOCK_TICK_RATE PIT_TICK_RATE |
11 | 9 | ||
12 | #define ARCH_HAS_READ_CURRENT_TIMER | 10 | #define ARCH_HAS_READ_CURRENT_TIMER |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 16a5c84b0329..7f3eba08e7de 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -17,7 +17,7 @@ | |||
17 | 17 | ||
18 | static inline void __native_flush_tlb(void) | 18 | static inline void __native_flush_tlb(void) |
19 | { | 19 | { |
20 | write_cr3(read_cr3()); | 20 | native_write_cr3(native_read_cr3()); |
21 | } | 21 | } |
22 | 22 | ||
23 | static inline void __native_flush_tlb_global(void) | 23 | static inline void __native_flush_tlb_global(void) |
@@ -32,11 +32,11 @@ static inline void __native_flush_tlb_global(void) | |||
32 | */ | 32 | */ |
33 | raw_local_irq_save(flags); | 33 | raw_local_irq_save(flags); |
34 | 34 | ||
35 | cr4 = read_cr4(); | 35 | cr4 = native_read_cr4(); |
36 | /* clear PGE */ | 36 | /* clear PGE */ |
37 | write_cr4(cr4 & ~X86_CR4_PGE); | 37 | native_write_cr4(cr4 & ~X86_CR4_PGE); |
38 | /* write old PGE again and flush TLBs */ | 38 | /* write old PGE again and flush TLBs */ |
39 | write_cr4(cr4); | 39 | native_write_cr4(cr4); |
40 | 40 | ||
41 | raw_local_irq_restore(flags); | 41 | raw_local_irq_restore(flags); |
42 | } | 42 | } |
@@ -172,6 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start, | |||
172 | flush_tlb_all(); | 172 | flush_tlb_all(); |
173 | } | 173 | } |
174 | 174 | ||
175 | extern void zap_low_mappings(void); | 175 | extern void zap_low_mappings(bool early); |
176 | 176 | ||
177 | #endif /* _ASM_X86_TLBFLUSH_H */ | 177 | #endif /* _ASM_X86_TLBFLUSH_H */ |
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index f44b49abca49..066ef590d7e0 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -203,7 +203,8 @@ struct pci_bus; | |||
203 | void x86_pci_root_bus_res_quirks(struct pci_bus *b); | 203 | void x86_pci_root_bus_res_quirks(struct pci_bus *b); |
204 | 204 | ||
205 | #ifdef CONFIG_SMP | 205 | #ifdef CONFIG_SMP |
206 | #define mc_capable() (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids) | 206 | #define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ |
207 | (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)) | ||
207 | #define smt_capable() (smp_num_siblings > 1) | 208 | #define smt_capable() (smp_num_siblings > 1) |
208 | #endif | 209 | #endif |
209 | 210 | ||
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 0d5342515b86..bfd74c032fca 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_TRAPS_H | 2 | #define _ASM_X86_TRAPS_H |
3 | 3 | ||
4 | #include <asm/debugreg.h> | 4 | #include <asm/debugreg.h> |
5 | #include <asm/siginfo.h> /* TRAP_TRACE, ... */ | ||
5 | 6 | ||
6 | #ifdef CONFIG_X86_32 | 7 | #ifdef CONFIG_X86_32 |
7 | #define dotraplinkage | 8 | #define dotraplinkage |
@@ -13,6 +14,9 @@ asmlinkage void divide_error(void); | |||
13 | asmlinkage void debug(void); | 14 | asmlinkage void debug(void); |
14 | asmlinkage void nmi(void); | 15 | asmlinkage void nmi(void); |
15 | asmlinkage void int3(void); | 16 | asmlinkage void int3(void); |
17 | asmlinkage void xen_debug(void); | ||
18 | asmlinkage void xen_int3(void); | ||
19 | asmlinkage void xen_stack_segment(void); | ||
16 | asmlinkage void overflow(void); | 20 | asmlinkage void overflow(void); |
17 | asmlinkage void bounds(void); | 21 | asmlinkage void bounds(void); |
18 | asmlinkage void invalid_op(void); | 22 | asmlinkage void invalid_op(void); |
@@ -74,7 +78,6 @@ static inline int get_si_code(unsigned long condition) | |||
74 | } | 78 | } |
75 | 79 | ||
76 | extern int panic_on_unrecovered_nmi; | 80 | extern int panic_on_unrecovered_nmi; |
77 | extern int kstack_depth_to_print; | ||
78 | 81 | ||
79 | void math_error(void __user *); | 82 | void math_error(void __user *); |
80 | void math_emulate(struct math_emu_info *); | 83 | void math_emulate(struct math_emu_info *); |
diff --git a/arch/x86/include/asm/types.h b/arch/x86/include/asm/types.h index e6f736320077..09b97745772f 100644 --- a/arch/x86/include/asm/types.h +++ b/arch/x86/include/asm/types.h | |||
@@ -14,12 +14,6 @@ typedef unsigned short umode_t; | |||
14 | */ | 14 | */ |
15 | #ifdef __KERNEL__ | 15 | #ifdef __KERNEL__ |
16 | 16 | ||
17 | #ifdef CONFIG_X86_32 | ||
18 | # define BITS_PER_LONG 32 | ||
19 | #else | ||
20 | # define BITS_PER_LONG 64 | ||
21 | #endif | ||
22 | |||
23 | #ifndef __ASSEMBLY__ | 17 | #ifndef __ASSEMBLY__ |
24 | 18 | ||
25 | typedef u64 dma64_addr_t; | 19 | typedef u64 dma64_addr_t; |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index b685ece89d5c..20e6a795e160 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -25,7 +25,7 @@ | |||
25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) | 25 | #define MAKE_MM_SEG(s) ((mm_segment_t) { (s) }) |
26 | 26 | ||
27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) | 27 | #define KERNEL_DS MAKE_MM_SEG(-1UL) |
28 | #define USER_DS MAKE_MM_SEG(PAGE_OFFSET) | 28 | #define USER_DS MAKE_MM_SEG(TASK_SIZE_MAX) |
29 | 29 | ||
30 | #define get_ds() (KERNEL_DS) | 30 | #define get_ds() (KERNEL_DS) |
31 | #define get_fs() (current_thread_info()->addr_limit) | 31 | #define get_fs() (current_thread_info()->addr_limit) |
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h index 6e72d74cf8dc..732a30706153 100644 --- a/arch/x86/include/asm/unistd_32.h +++ b/arch/x86/include/asm/unistd_32.h | |||
@@ -340,6 +340,8 @@ | |||
340 | #define __NR_inotify_init1 332 | 340 | #define __NR_inotify_init1 332 |
341 | #define __NR_preadv 333 | 341 | #define __NR_preadv 333 |
342 | #define __NR_pwritev 334 | 342 | #define __NR_pwritev 334 |
343 | #define __NR_rt_tgsigqueueinfo 335 | ||
344 | #define __NR_perf_counter_open 336 | ||
343 | 345 | ||
344 | #ifdef __KERNEL__ | 346 | #ifdef __KERNEL__ |
345 | 347 | ||
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h index f81829462325..900e1617e672 100644 --- a/arch/x86/include/asm/unistd_64.h +++ b/arch/x86/include/asm/unistd_64.h | |||
@@ -657,7 +657,10 @@ __SYSCALL(__NR_inotify_init1, sys_inotify_init1) | |||
657 | __SYSCALL(__NR_preadv, sys_preadv) | 657 | __SYSCALL(__NR_preadv, sys_preadv) |
658 | #define __NR_pwritev 296 | 658 | #define __NR_pwritev 296 |
659 | __SYSCALL(__NR_pwritev, sys_pwritev) | 659 | __SYSCALL(__NR_pwritev, sys_pwritev) |
660 | 660 | #define __NR_rt_tgsigqueueinfo 297 | |
661 | __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo) | ||
662 | #define __NR_perf_counter_open 298 | ||
663 | __SYSCALL(__NR_perf_counter_open, sys_perf_counter_open) | ||
661 | 664 | ||
662 | #ifndef __NO_STUBS | 665 | #ifndef __NO_STUBS |
663 | #define __ARCH_WANT_OLD_READDIR | 666 | #define __ARCH_WANT_OLD_READDIR |
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h index 9b0e61bf7a88..bddd44f2f0ab 100644 --- a/arch/x86/include/asm/uv/uv_bau.h +++ b/arch/x86/include/asm/uv/uv_bau.h | |||
@@ -37,7 +37,7 @@ | |||
37 | #define UV_CPUS_PER_ACT_STATUS 32 | 37 | #define UV_CPUS_PER_ACT_STATUS 32 |
38 | #define UV_ACT_STATUS_MASK 0x3 | 38 | #define UV_ACT_STATUS_MASK 0x3 |
39 | #define UV_ACT_STATUS_SIZE 2 | 39 | #define UV_ACT_STATUS_SIZE 2 |
40 | #define UV_ACTIVATION_DESCRIPTOR_SIZE 32 | 40 | #define UV_ADP_SIZE 32 |
41 | #define UV_DISTRIBUTION_SIZE 256 | 41 | #define UV_DISTRIBUTION_SIZE 256 |
42 | #define UV_SW_ACK_NPENDING 8 | 42 | #define UV_SW_ACK_NPENDING 8 |
43 | #define UV_NET_ENDPOINT_INTD 0x38 | 43 | #define UV_NET_ENDPOINT_INTD 0x38 |
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index d3a98ea1062e..341070f7ad5c 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h | |||
@@ -133,6 +133,7 @@ struct uv_scir_s { | |||
133 | struct uv_hub_info_s { | 133 | struct uv_hub_info_s { |
134 | unsigned long global_mmr_base; | 134 | unsigned long global_mmr_base; |
135 | unsigned long gpa_mask; | 135 | unsigned long gpa_mask; |
136 | unsigned int gnode_extra; | ||
136 | unsigned long gnode_upper; | 137 | unsigned long gnode_upper; |
137 | unsigned long lowmem_remap_top; | 138 | unsigned long lowmem_remap_top; |
138 | unsigned long lowmem_remap_base; | 139 | unsigned long lowmem_remap_base; |
@@ -159,7 +160,8 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
159 | * p - PNODE (local part of nsids, right shifted 1) | 160 | * p - PNODE (local part of nsids, right shifted 1) |
160 | */ | 161 | */ |
161 | #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) | 162 | #define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) |
162 | #define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) | 163 | #define UV_PNODE_TO_GNODE(p) ((p) |uv_hub_info->gnode_extra) |
164 | #define UV_PNODE_TO_NASID(p) (UV_PNODE_TO_GNODE(p) << 1) | ||
163 | 165 | ||
164 | #define UV_LOCAL_MMR_BASE 0xf4000000UL | 166 | #define UV_LOCAL_MMR_BASE 0xf4000000UL |
165 | #define UV_GLOBAL_MMR32_BASE 0xf8000000UL | 167 | #define UV_GLOBAL_MMR32_BASE 0xf8000000UL |
@@ -173,7 +175,7 @@ DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | |||
173 | #define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) | 175 | #define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) |
174 | 176 | ||
175 | #define UV_GLOBAL_MMR64_PNODE_BITS(p) \ | 177 | #define UV_GLOBAL_MMR64_PNODE_BITS(p) \ |
176 | ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) | 178 | ((unsigned long)(UV_PNODE_TO_GNODE(p)) << UV_GLOBAL_MMR64_PNODE_SHIFT) |
177 | 179 | ||
178 | #define UV_APIC_PNODE_SHIFT 6 | 180 | #define UV_APIC_PNODE_SHIFT 6 |
179 | 181 | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 498f944010b9..11be5ad2e0e9 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -247,6 +247,7 @@ enum vmcs_field { | |||
247 | #define EXIT_REASON_MSR_READ 31 | 247 | #define EXIT_REASON_MSR_READ 31 |
248 | #define EXIT_REASON_MSR_WRITE 32 | 248 | #define EXIT_REASON_MSR_WRITE 32 |
249 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 | 249 | #define EXIT_REASON_MWAIT_INSTRUCTION 36 |
250 | #define EXIT_REASON_MCE_DURING_VMENTRY 41 | ||
250 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 | 251 | #define EXIT_REASON_TPR_BELOW_THRESHOLD 43 |
251 | #define EXIT_REASON_APIC_ACCESS 44 | 252 | #define EXIT_REASON_APIC_ACCESS 44 |
252 | #define EXIT_REASON_EPT_VIOLATION 48 | 253 | #define EXIT_REASON_EPT_VIOLATION 48 |
diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 11b3bb86e17b..7fcf6f3dbcc3 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h | |||
@@ -1,5 +1,10 @@ | |||
1 | #ifdef CONFIG_KMEMCHECK | ||
2 | /* kmemcheck doesn't handle MMX/SSE/SSE2 instructions */ | ||
3 | # include <asm-generic/xor.h> | ||
4 | #else | ||
1 | #ifdef CONFIG_X86_32 | 5 | #ifdef CONFIG_X86_32 |
2 | # include "xor_32.h" | 6 | # include "xor_32.h" |
3 | #else | 7 | #else |
4 | # include "xor_64.h" | 8 | # include "xor_64.h" |
5 | #endif | 9 | #endif |
10 | #endif | ||
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 88d1bfc847d3..6c327b852e23 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -24,11 +24,13 @@ CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) | |||
24 | CFLAGS_hpet.o := $(nostackp) | 24 | CFLAGS_hpet.o := $(nostackp) |
25 | CFLAGS_tsc.o := $(nostackp) | 25 | CFLAGS_tsc.o := $(nostackp) |
26 | CFLAGS_paravirt.o := $(nostackp) | 26 | CFLAGS_paravirt.o := $(nostackp) |
27 | GCOV_PROFILE_vsyscall_64.o := n | ||
28 | GCOV_PROFILE_hpet.o := n | ||
27 | 29 | ||
28 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 30 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
29 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 31 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
30 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o | 32 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o |
31 | obj-y += setup.o i8259.o irqinit_$(BITS).o | 33 | obj-y += setup.o i8259.o irqinit.o |
32 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 34 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
33 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 35 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
34 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 36 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
@@ -44,6 +46,7 @@ obj-y += process.o | |||
44 | obj-y += i387.o xsave.o | 46 | obj-y += i387.o xsave.o |
45 | obj-y += ptrace.o | 47 | obj-y += ptrace.o |
46 | obj-$(CONFIG_X86_DS) += ds.o | 48 | obj-$(CONFIG_X86_DS) += ds.o |
49 | obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | ||
47 | obj-$(CONFIG_X86_32) += tls.o | 50 | obj-$(CONFIG_X86_32) += tls.o |
48 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 51 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
49 | obj-y += step.o | 52 | obj-y += step.o |
@@ -72,7 +75,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o | |||
72 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o | 75 | obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o |
73 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o | 76 | obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o |
74 | obj-$(CONFIG_KPROBES) += kprobes.o | 77 | obj-$(CONFIG_KPROBES) += kprobes.o |
75 | obj-$(CONFIG_MODULES) += module_$(BITS).o | 78 | obj-$(CONFIG_MODULES) += module.o |
76 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o | 79 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o |
77 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o | 80 | obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o |
78 | obj-$(CONFIG_KGDB) += kgdb.o | 81 | obj-$(CONFIG_KGDB) += kgdb.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 723989d7f802..6b8ca3a0285d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <linux/irq.h> | 33 | #include <linux/irq.h> |
34 | #include <linux/bootmem.h> | 34 | #include <linux/bootmem.h> |
35 | #include <linux/ioport.h> | 35 | #include <linux/ioport.h> |
36 | #include <linux/pci.h> | ||
36 | 37 | ||
37 | #include <asm/pgtable.h> | 38 | #include <asm/pgtable.h> |
38 | #include <asm/io_apic.h> | 39 | #include <asm/io_apic.h> |
@@ -43,11 +44,7 @@ | |||
43 | 44 | ||
44 | static int __initdata acpi_force = 0; | 45 | static int __initdata acpi_force = 0; |
45 | u32 acpi_rsdt_forced; | 46 | u32 acpi_rsdt_forced; |
46 | #ifdef CONFIG_ACPI | 47 | int acpi_disabled; |
47 | int acpi_disabled = 0; | ||
48 | #else | ||
49 | int acpi_disabled = 1; | ||
50 | #endif | ||
51 | EXPORT_SYMBOL(acpi_disabled); | 48 | EXPORT_SYMBOL(acpi_disabled); |
52 | 49 | ||
53 | #ifdef CONFIG_X86_64 | 50 | #ifdef CONFIG_X86_64 |
@@ -121,72 +118,6 @@ void __init __acpi_unmap_table(char *map, unsigned long size) | |||
121 | early_iounmap(map, size); | 118 | early_iounmap(map, size); |
122 | } | 119 | } |
123 | 120 | ||
124 | #ifdef CONFIG_PCI_MMCONFIG | ||
125 | |||
126 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
127 | |||
128 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ | ||
129 | struct acpi_mcfg_allocation *pci_mmcfg_config; | ||
130 | int pci_mmcfg_config_num; | ||
131 | |||
132 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | ||
133 | { | ||
134 | if (!strcmp(mcfg->header.oem_id, "SGI")) | ||
135 | acpi_mcfg_64bit_base_addr = TRUE; | ||
136 | |||
137 | return 0; | ||
138 | } | ||
139 | |||
140 | int __init acpi_parse_mcfg(struct acpi_table_header *header) | ||
141 | { | ||
142 | struct acpi_table_mcfg *mcfg; | ||
143 | unsigned long i; | ||
144 | int config_size; | ||
145 | |||
146 | if (!header) | ||
147 | return -EINVAL; | ||
148 | |||
149 | mcfg = (struct acpi_table_mcfg *)header; | ||
150 | |||
151 | /* how many config structures do we have */ | ||
152 | pci_mmcfg_config_num = 0; | ||
153 | i = header->length - sizeof(struct acpi_table_mcfg); | ||
154 | while (i >= sizeof(struct acpi_mcfg_allocation)) { | ||
155 | ++pci_mmcfg_config_num; | ||
156 | i -= sizeof(struct acpi_mcfg_allocation); | ||
157 | }; | ||
158 | if (pci_mmcfg_config_num == 0) { | ||
159 | printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); | ||
160 | return -ENODEV; | ||
161 | } | ||
162 | |||
163 | config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); | ||
164 | pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); | ||
165 | if (!pci_mmcfg_config) { | ||
166 | printk(KERN_WARNING PREFIX | ||
167 | "No memory for MCFG config tables\n"); | ||
168 | return -ENOMEM; | ||
169 | } | ||
170 | |||
171 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); | ||
172 | |||
173 | acpi_mcfg_oem_check(mcfg); | ||
174 | |||
175 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | ||
176 | if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && | ||
177 | !acpi_mcfg_64bit_base_addr) { | ||
178 | printk(KERN_ERR PREFIX | ||
179 | "MMCONFIG not in low 4GB of memory\n"); | ||
180 | kfree(pci_mmcfg_config); | ||
181 | pci_mmcfg_config_num = 0; | ||
182 | return -ENODEV; | ||
183 | } | ||
184 | } | ||
185 | |||
186 | return 0; | ||
187 | } | ||
188 | #endif /* CONFIG_PCI_MMCONFIG */ | ||
189 | |||
190 | #ifdef CONFIG_X86_LOCAL_APIC | 121 | #ifdef CONFIG_X86_LOCAL_APIC |
191 | static int __init acpi_parse_madt(struct acpi_table_header *table) | 122 | static int __init acpi_parse_madt(struct acpi_table_header *table) |
192 | { | 123 | { |
@@ -522,7 +453,7 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) | |||
522 | * success: return IRQ number (>=0) | 453 | * success: return IRQ number (>=0) |
523 | * failure: return < 0 | 454 | * failure: return < 0 |
524 | */ | 455 | */ |
525 | int acpi_register_gsi(u32 gsi, int triggering, int polarity) | 456 | int acpi_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) |
526 | { | 457 | { |
527 | unsigned int irq; | 458 | unsigned int irq; |
528 | unsigned int plat_gsi = gsi; | 459 | unsigned int plat_gsi = gsi; |
@@ -532,14 +463,14 @@ int acpi_register_gsi(u32 gsi, int triggering, int polarity) | |||
532 | * Make sure all (legacy) PCI IRQs are set as level-triggered. | 463 | * Make sure all (legacy) PCI IRQs are set as level-triggered. |
533 | */ | 464 | */ |
534 | if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { | 465 | if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) { |
535 | if (triggering == ACPI_LEVEL_SENSITIVE) | 466 | if (trigger == ACPI_LEVEL_SENSITIVE) |
536 | eisa_set_level_irq(gsi); | 467 | eisa_set_level_irq(gsi); |
537 | } | 468 | } |
538 | #endif | 469 | #endif |
539 | 470 | ||
540 | #ifdef CONFIG_X86_IO_APIC | 471 | #ifdef CONFIG_X86_IO_APIC |
541 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { | 472 | if (acpi_irq_model == ACPI_IRQ_MODEL_IOAPIC) { |
542 | plat_gsi = mp_register_gsi(gsi, triggering, polarity); | 473 | plat_gsi = mp_register_gsi(dev, gsi, trigger, polarity); |
543 | } | 474 | } |
544 | #endif | 475 | #endif |
545 | acpi_gsi_to_irq(plat_gsi, &irq); | 476 | acpi_gsi_to_irq(plat_gsi, &irq); |
@@ -903,10 +834,8 @@ extern int es7000_plat; | |||
903 | #endif | 834 | #endif |
904 | 835 | ||
905 | static struct { | 836 | static struct { |
906 | int apic_id; | ||
907 | int gsi_base; | 837 | int gsi_base; |
908 | int gsi_end; | 838 | int gsi_end; |
909 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | ||
910 | } mp_ioapic_routing[MAX_IO_APICS]; | 839 | } mp_ioapic_routing[MAX_IO_APICS]; |
911 | 840 | ||
912 | int mp_find_ioapic(int gsi) | 841 | int mp_find_ioapic(int gsi) |
@@ -986,16 +915,12 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base) | |||
986 | 915 | ||
987 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); | 916 | set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); |
988 | mp_ioapics[idx].apicid = uniq_ioapic_id(id); | 917 | mp_ioapics[idx].apicid = uniq_ioapic_id(id); |
989 | #ifdef CONFIG_X86_32 | ||
990 | mp_ioapics[idx].apicver = io_apic_get_version(idx); | 918 | mp_ioapics[idx].apicver = io_apic_get_version(idx); |
991 | #else | 919 | |
992 | mp_ioapics[idx].apicver = 0; | ||
993 | #endif | ||
994 | /* | 920 | /* |
995 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups | 921 | * Build basic GSI lookup table to facilitate gsi->io_apic lookups |
996 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). | 922 | * and to prevent reprogramming of IOAPIC pins (PCI GSIs). |
997 | */ | 923 | */ |
998 | mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid; | ||
999 | mp_ioapic_routing[idx].gsi_base = gsi_base; | 924 | mp_ioapic_routing[idx].gsi_base = gsi_base; |
1000 | mp_ioapic_routing[idx].gsi_end = gsi_base + | 925 | mp_ioapic_routing[idx].gsi_end = gsi_base + |
1001 | io_apic_get_redir_entries(idx); | 926 | io_apic_get_redir_entries(idx); |
@@ -1158,26 +1083,52 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1158 | } | 1083 | } |
1159 | } | 1084 | } |
1160 | 1085 | ||
1161 | int mp_register_gsi(u32 gsi, int triggering, int polarity) | 1086 | static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, |
1087 | int polarity) | ||
1162 | { | 1088 | { |
1089 | #ifdef CONFIG_X86_MPPARSE | ||
1090 | struct mpc_intsrc mp_irq; | ||
1091 | struct pci_dev *pdev; | ||
1092 | unsigned char number; | ||
1093 | unsigned int devfn; | ||
1163 | int ioapic; | 1094 | int ioapic; |
1164 | int ioapic_pin; | 1095 | u8 pin; |
1165 | #ifdef CONFIG_X86_32 | ||
1166 | #define MAX_GSI_NUM 4096 | ||
1167 | #define IRQ_COMPRESSION_START 64 | ||
1168 | 1096 | ||
1169 | static int pci_irq = IRQ_COMPRESSION_START; | 1097 | if (!acpi_ioapic) |
1170 | /* | 1098 | return 0; |
1171 | * Mapping between Global System Interrupts, which | 1099 | if (!dev) |
1172 | * represent all possible interrupts, and IRQs | 1100 | return 0; |
1173 | * assigned to actual devices. | 1101 | if (dev->bus != &pci_bus_type) |
1174 | */ | 1102 | return 0; |
1175 | static int gsi_to_irq[MAX_GSI_NUM]; | 1103 | |
1176 | #else | 1104 | pdev = to_pci_dev(dev); |
1105 | number = pdev->bus->number; | ||
1106 | devfn = pdev->devfn; | ||
1107 | pin = pdev->pin; | ||
1108 | /* print the entry should happen on mptable identically */ | ||
1109 | mp_irq.type = MP_INTSRC; | ||
1110 | mp_irq.irqtype = mp_INT; | ||
1111 | mp_irq.irqflag = (trigger == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | | ||
1112 | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); | ||
1113 | mp_irq.srcbus = number; | ||
1114 | mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); | ||
1115 | ioapic = mp_find_ioapic(gsi); | ||
1116 | mp_irq.dstapic = mp_ioapics[ioapic].apicid; | ||
1117 | mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); | ||
1118 | |||
1119 | save_mp_irq(&mp_irq); | ||
1120 | #endif | ||
1121 | return 0; | ||
1122 | } | ||
1123 | |||
1124 | int mp_register_gsi(struct device *dev, u32 gsi, int trigger, int polarity) | ||
1125 | { | ||
1126 | int ioapic; | ||
1127 | int ioapic_pin; | ||
1128 | struct io_apic_irq_attr irq_attr; | ||
1177 | 1129 | ||
1178 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) | 1130 | if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC) |
1179 | return gsi; | 1131 | return gsi; |
1180 | #endif | ||
1181 | 1132 | ||
1182 | /* Don't set up the ACPI SCI because it's already set up */ | 1133 | /* Don't set up the ACPI SCI because it's already set up */ |
1183 | if (acpi_gbl_FADT.sci_interrupt == gsi) | 1134 | if (acpi_gbl_FADT.sci_interrupt == gsi) |
@@ -1196,93 +1147,22 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) | |||
1196 | gsi = ioapic_renumber_irq(ioapic, gsi); | 1147 | gsi = ioapic_renumber_irq(ioapic, gsi); |
1197 | #endif | 1148 | #endif |
1198 | 1149 | ||
1199 | /* | ||
1200 | * Avoid pin reprogramming. PRTs typically include entries | ||
1201 | * with redundant pin->gsi mappings (but unique PCI devices); | ||
1202 | * we only program the IOAPIC on the first. | ||
1203 | */ | ||
1204 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { | 1150 | if (ioapic_pin > MP_MAX_IOAPIC_PIN) { |
1205 | printk(KERN_ERR "Invalid reference to IOAPIC pin " | 1151 | printk(KERN_ERR "Invalid reference to IOAPIC pin " |
1206 | "%d-%d\n", mp_ioapic_routing[ioapic].apic_id, | 1152 | "%d-%d\n", mp_ioapics[ioapic].apicid, |
1207 | ioapic_pin); | 1153 | ioapic_pin); |
1208 | return gsi; | 1154 | return gsi; |
1209 | } | 1155 | } |
1210 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | ||
1211 | pr_debug("Pin %d-%d already programmed\n", | ||
1212 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | ||
1213 | #ifdef CONFIG_X86_32 | ||
1214 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | ||
1215 | #else | ||
1216 | return gsi; | ||
1217 | #endif | ||
1218 | } | ||
1219 | 1156 | ||
1220 | set_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed); | 1157 | if (enable_update_mptable) |
1221 | #ifdef CONFIG_X86_32 | 1158 | mp_config_acpi_gsi(dev, gsi, trigger, polarity); |
1222 | /* | ||
1223 | * For GSI >= 64, use IRQ compression | ||
1224 | */ | ||
1225 | if ((gsi >= IRQ_COMPRESSION_START) | ||
1226 | && (triggering == ACPI_LEVEL_SENSITIVE)) { | ||
1227 | /* | ||
1228 | * For PCI devices assign IRQs in order, avoiding gaps | ||
1229 | * due to unused I/O APIC pins. | ||
1230 | */ | ||
1231 | int irq = gsi; | ||
1232 | if (gsi < MAX_GSI_NUM) { | ||
1233 | /* | ||
1234 | * Retain the VIA chipset work-around (gsi > 15), but | ||
1235 | * avoid a problem where the 8254 timer (IRQ0) is setup | ||
1236 | * via an override (so it's not on pin 0 of the ioapic), | ||
1237 | * and at the same time, the pin 0 interrupt is a PCI | ||
1238 | * type. The gsi > 15 test could cause these two pins | ||
1239 | * to be shared as IRQ0, and they are not shareable. | ||
1240 | * So test for this condition, and if necessary, avoid | ||
1241 | * the pin collision. | ||
1242 | */ | ||
1243 | gsi = pci_irq++; | ||
1244 | /* | ||
1245 | * Don't assign IRQ used by ACPI SCI | ||
1246 | */ | ||
1247 | if (gsi == acpi_gbl_FADT.sci_interrupt) | ||
1248 | gsi = pci_irq++; | ||
1249 | gsi_to_irq[irq] = gsi; | ||
1250 | } else { | ||
1251 | printk(KERN_ERR "GSI %u is too high\n", gsi); | ||
1252 | return gsi; | ||
1253 | } | ||
1254 | } | ||
1255 | #endif | ||
1256 | io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, | ||
1257 | triggering == ACPI_EDGE_SENSITIVE ? 0 : 1, | ||
1258 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); | ||
1259 | return gsi; | ||
1260 | } | ||
1261 | 1159 | ||
1262 | int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, | 1160 | set_io_apic_irq_attr(&irq_attr, ioapic, ioapic_pin, |
1263 | u32 gsi, int triggering, int polarity) | 1161 | trigger == ACPI_EDGE_SENSITIVE ? 0 : 1, |
1264 | { | 1162 | polarity == ACPI_ACTIVE_HIGH ? 0 : 1); |
1265 | #ifdef CONFIG_X86_MPPARSE | 1163 | io_apic_set_pci_routing(dev, gsi, &irq_attr); |
1266 | struct mpc_intsrc mp_irq; | ||
1267 | int ioapic; | ||
1268 | |||
1269 | if (!acpi_ioapic) | ||
1270 | return 0; | ||
1271 | 1164 | ||
1272 | /* print the entry should happen on mptable identically */ | 1165 | return gsi; |
1273 | mp_irq.type = MP_INTSRC; | ||
1274 | mp_irq.irqtype = mp_INT; | ||
1275 | mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | | ||
1276 | (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); | ||
1277 | mp_irq.srcbus = number; | ||
1278 | mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); | ||
1279 | ioapic = mp_find_ioapic(gsi); | ||
1280 | mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; | ||
1281 | mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi); | ||
1282 | |||
1283 | save_mp_irq(&mp_irq); | ||
1284 | #endif | ||
1285 | return 0; | ||
1286 | } | 1166 | } |
1287 | 1167 | ||
1288 | /* | 1168 | /* |
@@ -1569,14 +1449,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { | |||
1569 | }, | 1449 | }, |
1570 | { | 1450 | { |
1571 | .callback = force_acpi_ht, | 1451 | .callback = force_acpi_ht, |
1572 | .ident = "ASUS P4B266", | ||
1573 | .matches = { | ||
1574 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | ||
1575 | DMI_MATCH(DMI_BOARD_NAME, "P4B266"), | ||
1576 | }, | ||
1577 | }, | ||
1578 | { | ||
1579 | .callback = force_acpi_ht, | ||
1580 | .ident = "ASUS P2B-DS", | 1452 | .ident = "ASUS P2B-DS", |
1581 | .matches = { | 1453 | .matches = { |
1582 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), | 1454 | DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index bbbe4bbb6f34..8c44c232efcb 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -34,12 +34,22 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags, | |||
34 | flags->bm_check = 1; | 34 | flags->bm_check = 1; |
35 | else if (c->x86_vendor == X86_VENDOR_INTEL) { | 35 | else if (c->x86_vendor == X86_VENDOR_INTEL) { |
36 | /* | 36 | /* |
37 | * Today all CPUs that support C3 share cache. | 37 | * Today all MP CPUs that support C3 share cache. |
38 | * TBD: This needs to look at cache shared map, once | 38 | * And caches should not be flushed by software while |
39 | * multi-core detection patch makes to the base. | 39 | * entering C3 type state. |
40 | */ | 40 | */ |
41 | flags->bm_check = 1; | 41 | flags->bm_check = 1; |
42 | } | 42 | } |
43 | |||
44 | /* | ||
45 | * On all recent Intel platforms, ARB_DISABLE is a nop. | ||
46 | * So, set bm_control to zero to indicate that ARB_DISABLE | ||
47 | * is not required while entering C3 type state on | ||
48 | * P4, Core and beyond CPUs | ||
49 | */ | ||
50 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
51 | (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14))) | ||
52 | flags->bm_control = 0; | ||
43 | } | 53 | } |
44 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); | 54 | EXPORT_SYMBOL(acpi_processor_power_init_bm_check); |
45 | 55 | ||
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 7c074eec39fb..d296f4a195c9 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c | |||
@@ -72,6 +72,7 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c) | |||
72 | return; | 72 | return; |
73 | } | 73 | } |
74 | 74 | ||
75 | |||
75 | /* Initialize _PDC data based on the CPU vendor */ | 76 | /* Initialize _PDC data based on the CPU vendor */ |
76 | void arch_acpi_processor_init_pdc(struct acpi_processor *pr) | 77 | void arch_acpi_processor_init_pdc(struct acpi_processor *pr) |
77 | { | 78 | { |
@@ -85,3 +86,15 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr) | |||
85 | } | 86 | } |
86 | 87 | ||
87 | EXPORT_SYMBOL(arch_acpi_processor_init_pdc); | 88 | EXPORT_SYMBOL(arch_acpi_processor_init_pdc); |
89 | |||
90 | void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr) | ||
91 | { | ||
92 | if (pr->pdc) { | ||
93 | kfree(pr->pdc->pointer->buffer.pointer); | ||
94 | kfree(pr->pdc->pointer); | ||
95 | kfree(pr->pdc); | ||
96 | pr->pdc = NULL; | ||
97 | } | ||
98 | } | ||
99 | |||
100 | EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc); | ||
diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile index 1c31cc0e9def..6a564ac67ef5 100644 --- a/arch/x86/kernel/acpi/realmode/Makefile +++ b/arch/x86/kernel/acpi/realmode/Makefile | |||
@@ -9,7 +9,7 @@ | |||
9 | always := wakeup.bin | 9 | always := wakeup.bin |
10 | targets := wakeup.elf wakeup.lds | 10 | targets := wakeup.elf wakeup.lds |
11 | 11 | ||
12 | wakeup-y += wakeup.o wakemain.o video-mode.o copy.o | 12 | wakeup-y += wakeup.o wakemain.o video-mode.o copy.o bioscall.o regs.o |
13 | 13 | ||
14 | # The link order of the video-*.o modules can matter. In particular, | 14 | # The link order of the video-*.o modules can matter. In particular, |
15 | # video-vga.o *must* be listed first, followed by video-vesa.o. | 15 | # video-vga.o *must* be listed first, followed by video-vesa.o. |
@@ -42,6 +42,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) -g -Os -D_SETUP -D_WAKEUP -D__KERNEL__ \ | |||
42 | $(call cc-option, -mpreferred-stack-boundary=2) | 42 | $(call cc-option, -mpreferred-stack-boundary=2) |
43 | KBUILD_CFLAGS += $(call cc-option, -m32) | 43 | KBUILD_CFLAGS += $(call cc-option, -m32) |
44 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 44 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
45 | GCOV_PROFILE := n | ||
45 | 46 | ||
46 | WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) | 47 | WAKEUP_OBJS = $(addprefix $(obj)/,$(wakeup-y)) |
47 | 48 | ||
diff --git a/arch/x86/kernel/acpi/realmode/bioscall.S b/arch/x86/kernel/acpi/realmode/bioscall.S new file mode 100644 index 000000000000..f51eb0bb56ce --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/bioscall.S | |||
@@ -0,0 +1 @@ | |||
#include "../../../boot/bioscall.S" | |||
diff --git a/arch/x86/kernel/acpi/realmode/regs.c b/arch/x86/kernel/acpi/realmode/regs.c new file mode 100644 index 000000000000..6206033ba202 --- /dev/null +++ b/arch/x86/kernel/acpi/realmode/regs.c | |||
@@ -0,0 +1 @@ | |||
#include "../../../boot/regs.c" | |||
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 7c243a2c5115..ca93638ba430 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -104,7 +104,7 @@ int acpi_save_state_mem(void) | |||
104 | initial_gs = per_cpu_offset(smp_processor_id()); | 104 | initial_gs = per_cpu_offset(smp_processor_id()); |
105 | #endif | 105 | #endif |
106 | initial_code = (unsigned long)wakeup_long64; | 106 | initial_code = (unsigned long)wakeup_long64; |
107 | saved_magic = 0x123456789abcdef0; | 107 | saved_magic = 0x123456789abcdef0L; |
108 | #endif /* CONFIG_64BIT */ | 108 | #endif /* CONFIG_64BIT */ |
109 | 109 | ||
110 | return 0; | 110 | return 0; |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a97db99dad52..9372f0406ad4 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -55,7 +55,16 @@ struct iommu_cmd { | |||
55 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 55 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
56 | struct unity_map_entry *e); | 56 | struct unity_map_entry *e); |
57 | static struct dma_ops_domain *find_protection_domain(u16 devid); | 57 | static struct dma_ops_domain *find_protection_domain(u16 devid); |
58 | static u64* alloc_pte(struct protection_domain *dom, | ||
59 | unsigned long address, u64 | ||
60 | **pte_page, gfp_t gfp); | ||
61 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | ||
62 | unsigned long start_page, | ||
63 | unsigned int pages); | ||
58 | 64 | ||
65 | #ifndef BUS_NOTIFY_UNBOUND_DRIVER | ||
66 | #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 | ||
67 | #endif | ||
59 | 68 | ||
60 | #ifdef CONFIG_AMD_IOMMU_STATS | 69 | #ifdef CONFIG_AMD_IOMMU_STATS |
61 | 70 | ||
@@ -213,7 +222,7 @@ irqreturn_t amd_iommu_int_handler(int irq, void *data) | |||
213 | { | 222 | { |
214 | struct amd_iommu *iommu; | 223 | struct amd_iommu *iommu; |
215 | 224 | ||
216 | list_for_each_entry(iommu, &amd_iommu_list, list) | 225 | for_each_iommu(iommu) |
217 | iommu_poll_events(iommu); | 226 | iommu_poll_events(iommu); |
218 | 227 | ||
219 | return IRQ_HANDLED; | 228 | return IRQ_HANDLED; |
@@ -425,6 +434,16 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid) | |||
425 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); | 434 | iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1); |
426 | } | 435 | } |
427 | 436 | ||
437 | /* Flush the whole IO/TLB for a given protection domain - including PDE */ | ||
438 | static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | ||
439 | { | ||
440 | u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; | ||
441 | |||
442 | INC_STATS_COUNTER(domain_flush_single); | ||
443 | |||
444 | iommu_queue_inv_iommu_pages(iommu, address, domid, 1, 1); | ||
445 | } | ||
446 | |||
428 | /* | 447 | /* |
429 | * This function is used to flush the IO/TLB for a given protection domain | 448 | * This function is used to flush the IO/TLB for a given protection domain |
430 | * on every IOMMU in the system | 449 | * on every IOMMU in the system |
@@ -440,7 +459,7 @@ static void iommu_flush_domain(u16 domid) | |||
440 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 459 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, |
441 | domid, 1, 1); | 460 | domid, 1, 1); |
442 | 461 | ||
443 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 462 | for_each_iommu(iommu) { |
444 | spin_lock_irqsave(&iommu->lock, flags); | 463 | spin_lock_irqsave(&iommu->lock, flags); |
445 | __iommu_queue_command(iommu, &cmd); | 464 | __iommu_queue_command(iommu, &cmd); |
446 | __iommu_completion_wait(iommu); | 465 | __iommu_completion_wait(iommu); |
@@ -449,6 +468,35 @@ static void iommu_flush_domain(u16 domid) | |||
449 | } | 468 | } |
450 | } | 469 | } |
451 | 470 | ||
471 | void amd_iommu_flush_all_domains(void) | ||
472 | { | ||
473 | int i; | ||
474 | |||
475 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | ||
476 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | ||
477 | continue; | ||
478 | iommu_flush_domain(i); | ||
479 | } | ||
480 | } | ||
481 | |||
482 | void amd_iommu_flush_all_devices(void) | ||
483 | { | ||
484 | struct amd_iommu *iommu; | ||
485 | int i; | ||
486 | |||
487 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | ||
488 | if (amd_iommu_pd_table[i] == NULL) | ||
489 | continue; | ||
490 | |||
491 | iommu = amd_iommu_rlookup_table[i]; | ||
492 | if (!iommu) | ||
493 | continue; | ||
494 | |||
495 | iommu_queue_inv_dev_entry(iommu, i); | ||
496 | iommu_completion_wait(iommu); | ||
497 | } | ||
498 | } | ||
499 | |||
452 | /**************************************************************************** | 500 | /**************************************************************************** |
453 | * | 501 | * |
454 | * The functions below are used the create the page table mappings for | 502 | * The functions below are used the create the page table mappings for |
@@ -468,7 +516,7 @@ static int iommu_map_page(struct protection_domain *dom, | |||
468 | unsigned long phys_addr, | 516 | unsigned long phys_addr, |
469 | int prot) | 517 | int prot) |
470 | { | 518 | { |
471 | u64 __pte, *pte, *page; | 519 | u64 __pte, *pte; |
472 | 520 | ||
473 | bus_addr = PAGE_ALIGN(bus_addr); | 521 | bus_addr = PAGE_ALIGN(bus_addr); |
474 | phys_addr = PAGE_ALIGN(phys_addr); | 522 | phys_addr = PAGE_ALIGN(phys_addr); |
@@ -477,27 +525,7 @@ static int iommu_map_page(struct protection_domain *dom, | |||
477 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) | 525 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) |
478 | return -EINVAL; | 526 | return -EINVAL; |
479 | 527 | ||
480 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; | 528 | pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); |
481 | |||
482 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
483 | page = (u64 *)get_zeroed_page(GFP_KERNEL); | ||
484 | if (!page) | ||
485 | return -ENOMEM; | ||
486 | *pte = IOMMU_L2_PDE(virt_to_phys(page)); | ||
487 | } | ||
488 | |||
489 | pte = IOMMU_PTE_PAGE(*pte); | ||
490 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
491 | |||
492 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
493 | page = (u64 *)get_zeroed_page(GFP_KERNEL); | ||
494 | if (!page) | ||
495 | return -ENOMEM; | ||
496 | *pte = IOMMU_L1_PDE(virt_to_phys(page)); | ||
497 | } | ||
498 | |||
499 | pte = IOMMU_PTE_PAGE(*pte); | ||
500 | pte = &pte[IOMMU_PTE_L0_INDEX(bus_addr)]; | ||
501 | 529 | ||
502 | if (IOMMU_PTE_PRESENT(*pte)) | 530 | if (IOMMU_PTE_PRESENT(*pte)) |
503 | return -EBUSY; | 531 | return -EBUSY; |
@@ -595,7 +623,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
595 | * as allocated in the aperture | 623 | * as allocated in the aperture |
596 | */ | 624 | */ |
597 | if (addr < dma_dom->aperture_size) | 625 | if (addr < dma_dom->aperture_size) |
598 | __set_bit(addr >> PAGE_SHIFT, dma_dom->bitmap); | 626 | __set_bit(addr >> PAGE_SHIFT, |
627 | dma_dom->aperture[0]->bitmap); | ||
599 | } | 628 | } |
600 | 629 | ||
601 | return 0; | 630 | return 0; |
@@ -632,42 +661,191 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
632 | ****************************************************************************/ | 661 | ****************************************************************************/ |
633 | 662 | ||
634 | /* | 663 | /* |
635 | * The address allocator core function. | 664 | * The address allocator core functions. |
636 | * | 665 | * |
637 | * called with domain->lock held | 666 | * called with domain->lock held |
638 | */ | 667 | */ |
668 | |||
669 | /* | ||
670 | * This function checks if there is a PTE for a given dma address. If | ||
671 | * there is one, it returns the pointer to it. | ||
672 | */ | ||
673 | static u64* fetch_pte(struct protection_domain *domain, | ||
674 | unsigned long address) | ||
675 | { | ||
676 | u64 *pte; | ||
677 | |||
678 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; | ||
679 | |||
680 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
681 | return NULL; | ||
682 | |||
683 | pte = IOMMU_PTE_PAGE(*pte); | ||
684 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | ||
685 | |||
686 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
687 | return NULL; | ||
688 | |||
689 | pte = IOMMU_PTE_PAGE(*pte); | ||
690 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | ||
691 | |||
692 | return pte; | ||
693 | } | ||
694 | |||
695 | /* | ||
696 | * This function is used to add a new aperture range to an existing | ||
697 | * aperture in case of dma_ops domain allocation or address allocation | ||
698 | * failure. | ||
699 | */ | ||
700 | static int alloc_new_range(struct amd_iommu *iommu, | ||
701 | struct dma_ops_domain *dma_dom, | ||
702 | bool populate, gfp_t gfp) | ||
703 | { | ||
704 | int index = dma_dom->aperture_size >> APERTURE_RANGE_SHIFT; | ||
705 | int i; | ||
706 | |||
707 | #ifdef CONFIG_IOMMU_STRESS | ||
708 | populate = false; | ||
709 | #endif | ||
710 | |||
711 | if (index >= APERTURE_MAX_RANGES) | ||
712 | return -ENOMEM; | ||
713 | |||
714 | dma_dom->aperture[index] = kzalloc(sizeof(struct aperture_range), gfp); | ||
715 | if (!dma_dom->aperture[index]) | ||
716 | return -ENOMEM; | ||
717 | |||
718 | dma_dom->aperture[index]->bitmap = (void *)get_zeroed_page(gfp); | ||
719 | if (!dma_dom->aperture[index]->bitmap) | ||
720 | goto out_free; | ||
721 | |||
722 | dma_dom->aperture[index]->offset = dma_dom->aperture_size; | ||
723 | |||
724 | if (populate) { | ||
725 | unsigned long address = dma_dom->aperture_size; | ||
726 | int i, num_ptes = APERTURE_RANGE_PAGES / 512; | ||
727 | u64 *pte, *pte_page; | ||
728 | |||
729 | for (i = 0; i < num_ptes; ++i) { | ||
730 | pte = alloc_pte(&dma_dom->domain, address, | ||
731 | &pte_page, gfp); | ||
732 | if (!pte) | ||
733 | goto out_free; | ||
734 | |||
735 | dma_dom->aperture[index]->pte_pages[i] = pte_page; | ||
736 | |||
737 | address += APERTURE_RANGE_SIZE / 64; | ||
738 | } | ||
739 | } | ||
740 | |||
741 | dma_dom->aperture_size += APERTURE_RANGE_SIZE; | ||
742 | |||
743 | /* Intialize the exclusion range if necessary */ | ||
744 | if (iommu->exclusion_start && | ||
745 | iommu->exclusion_start >= dma_dom->aperture[index]->offset && | ||
746 | iommu->exclusion_start < dma_dom->aperture_size) { | ||
747 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | ||
748 | int pages = iommu_num_pages(iommu->exclusion_start, | ||
749 | iommu->exclusion_length, | ||
750 | PAGE_SIZE); | ||
751 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | ||
752 | } | ||
753 | |||
754 | /* | ||
755 | * Check for areas already mapped as present in the new aperture | ||
756 | * range and mark those pages as reserved in the allocator. Such | ||
757 | * mappings may already exist as a result of requested unity | ||
758 | * mappings for devices. | ||
759 | */ | ||
760 | for (i = dma_dom->aperture[index]->offset; | ||
761 | i < dma_dom->aperture_size; | ||
762 | i += PAGE_SIZE) { | ||
763 | u64 *pte = fetch_pte(&dma_dom->domain, i); | ||
764 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | ||
765 | continue; | ||
766 | |||
767 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); | ||
768 | } | ||
769 | |||
770 | return 0; | ||
771 | |||
772 | out_free: | ||
773 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); | ||
774 | |||
775 | kfree(dma_dom->aperture[index]); | ||
776 | dma_dom->aperture[index] = NULL; | ||
777 | |||
778 | return -ENOMEM; | ||
779 | } | ||
780 | |||
781 | static unsigned long dma_ops_area_alloc(struct device *dev, | ||
782 | struct dma_ops_domain *dom, | ||
783 | unsigned int pages, | ||
784 | unsigned long align_mask, | ||
785 | u64 dma_mask, | ||
786 | unsigned long start) | ||
787 | { | ||
788 | unsigned long next_bit = dom->next_address % APERTURE_RANGE_SIZE; | ||
789 | int max_index = dom->aperture_size >> APERTURE_RANGE_SHIFT; | ||
790 | int i = start >> APERTURE_RANGE_SHIFT; | ||
791 | unsigned long boundary_size; | ||
792 | unsigned long address = -1; | ||
793 | unsigned long limit; | ||
794 | |||
795 | next_bit >>= PAGE_SHIFT; | ||
796 | |||
797 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | ||
798 | PAGE_SIZE) >> PAGE_SHIFT; | ||
799 | |||
800 | for (;i < max_index; ++i) { | ||
801 | unsigned long offset = dom->aperture[i]->offset >> PAGE_SHIFT; | ||
802 | |||
803 | if (dom->aperture[i]->offset >= dma_mask) | ||
804 | break; | ||
805 | |||
806 | limit = iommu_device_max_index(APERTURE_RANGE_PAGES, offset, | ||
807 | dma_mask >> PAGE_SHIFT); | ||
808 | |||
809 | address = iommu_area_alloc(dom->aperture[i]->bitmap, | ||
810 | limit, next_bit, pages, 0, | ||
811 | boundary_size, align_mask); | ||
812 | if (address != -1) { | ||
813 | address = dom->aperture[i]->offset + | ||
814 | (address << PAGE_SHIFT); | ||
815 | dom->next_address = address + (pages << PAGE_SHIFT); | ||
816 | break; | ||
817 | } | ||
818 | |||
819 | next_bit = 0; | ||
820 | } | ||
821 | |||
822 | return address; | ||
823 | } | ||
824 | |||
639 | static unsigned long dma_ops_alloc_addresses(struct device *dev, | 825 | static unsigned long dma_ops_alloc_addresses(struct device *dev, |
640 | struct dma_ops_domain *dom, | 826 | struct dma_ops_domain *dom, |
641 | unsigned int pages, | 827 | unsigned int pages, |
642 | unsigned long align_mask, | 828 | unsigned long align_mask, |
643 | u64 dma_mask) | 829 | u64 dma_mask) |
644 | { | 830 | { |
645 | unsigned long limit; | ||
646 | unsigned long address; | 831 | unsigned long address; |
647 | unsigned long boundary_size; | ||
648 | 832 | ||
649 | boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, | 833 | #ifdef CONFIG_IOMMU_STRESS |
650 | PAGE_SIZE) >> PAGE_SHIFT; | 834 | dom->next_address = 0; |
651 | limit = iommu_device_max_index(dom->aperture_size >> PAGE_SHIFT, 0, | 835 | dom->need_flush = true; |
652 | dma_mask >> PAGE_SHIFT); | 836 | #endif |
653 | 837 | ||
654 | if (dom->next_bit >= limit) { | 838 | address = dma_ops_area_alloc(dev, dom, pages, align_mask, |
655 | dom->next_bit = 0; | 839 | dma_mask, dom->next_address); |
656 | dom->need_flush = true; | ||
657 | } | ||
658 | 840 | ||
659 | address = iommu_area_alloc(dom->bitmap, limit, dom->next_bit, pages, | ||
660 | 0 , boundary_size, align_mask); | ||
661 | if (address == -1) { | 841 | if (address == -1) { |
662 | address = iommu_area_alloc(dom->bitmap, limit, 0, pages, | 842 | dom->next_address = 0; |
663 | 0, boundary_size, align_mask); | 843 | address = dma_ops_area_alloc(dev, dom, pages, align_mask, |
844 | dma_mask, 0); | ||
664 | dom->need_flush = true; | 845 | dom->need_flush = true; |
665 | } | 846 | } |
666 | 847 | ||
667 | if (likely(address != -1)) { | 848 | if (unlikely(address == -1)) |
668 | dom->next_bit = address + pages; | ||
669 | address <<= PAGE_SHIFT; | ||
670 | } else | ||
671 | address = bad_dma_address; | 849 | address = bad_dma_address; |
672 | 850 | ||
673 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); | 851 | WARN_ON((address + (PAGE_SIZE*pages)) > dom->aperture_size); |
@@ -684,11 +862,23 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
684 | unsigned long address, | 862 | unsigned long address, |
685 | unsigned int pages) | 863 | unsigned int pages) |
686 | { | 864 | { |
687 | address >>= PAGE_SHIFT; | 865 | unsigned i = address >> APERTURE_RANGE_SHIFT; |
688 | iommu_area_free(dom->bitmap, address, pages); | 866 | struct aperture_range *range = dom->aperture[i]; |
867 | |||
868 | BUG_ON(i >= APERTURE_MAX_RANGES || range == NULL); | ||
869 | |||
870 | #ifdef CONFIG_IOMMU_STRESS | ||
871 | if (i < 4) | ||
872 | return; | ||
873 | #endif | ||
689 | 874 | ||
690 | if (address >= dom->next_bit) | 875 | if (address >= dom->next_address) |
691 | dom->need_flush = true; | 876 | dom->need_flush = true; |
877 | |||
878 | address = (address % APERTURE_RANGE_SIZE) >> PAGE_SHIFT; | ||
879 | |||
880 | iommu_area_free(range->bitmap, address, pages); | ||
881 | |||
692 | } | 882 | } |
693 | 883 | ||
694 | /**************************************************************************** | 884 | /**************************************************************************** |
@@ -736,12 +926,16 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | |||
736 | unsigned long start_page, | 926 | unsigned long start_page, |
737 | unsigned int pages) | 927 | unsigned int pages) |
738 | { | 928 | { |
739 | unsigned int last_page = dom->aperture_size >> PAGE_SHIFT; | 929 | unsigned int i, last_page = dom->aperture_size >> PAGE_SHIFT; |
740 | 930 | ||
741 | if (start_page + pages > last_page) | 931 | if (start_page + pages > last_page) |
742 | pages = last_page - start_page; | 932 | pages = last_page - start_page; |
743 | 933 | ||
744 | iommu_area_reserve(dom->bitmap, start_page, pages); | 934 | for (i = start_page; i < start_page + pages; ++i) { |
935 | int index = i / APERTURE_RANGE_PAGES; | ||
936 | int page = i % APERTURE_RANGE_PAGES; | ||
937 | __set_bit(page, dom->aperture[index]->bitmap); | ||
938 | } | ||
745 | } | 939 | } |
746 | 940 | ||
747 | static void free_pagetable(struct protection_domain *domain) | 941 | static void free_pagetable(struct protection_domain *domain) |
@@ -780,14 +974,19 @@ static void free_pagetable(struct protection_domain *domain) | |||
780 | */ | 974 | */ |
781 | static void dma_ops_domain_free(struct dma_ops_domain *dom) | 975 | static void dma_ops_domain_free(struct dma_ops_domain *dom) |
782 | { | 976 | { |
977 | int i; | ||
978 | |||
783 | if (!dom) | 979 | if (!dom) |
784 | return; | 980 | return; |
785 | 981 | ||
786 | free_pagetable(&dom->domain); | 982 | free_pagetable(&dom->domain); |
787 | 983 | ||
788 | kfree(dom->pte_pages); | 984 | for (i = 0; i < APERTURE_MAX_RANGES; ++i) { |
789 | 985 | if (!dom->aperture[i]) | |
790 | kfree(dom->bitmap); | 986 | continue; |
987 | free_page((unsigned long)dom->aperture[i]->bitmap); | ||
988 | kfree(dom->aperture[i]); | ||
989 | } | ||
791 | 990 | ||
792 | kfree(dom); | 991 | kfree(dom); |
793 | } | 992 | } |
@@ -797,19 +996,9 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
797 | * It also intializes the page table and the address allocator data | 996 | * It also intializes the page table and the address allocator data |
798 | * structures required for the dma_ops interface | 997 | * structures required for the dma_ops interface |
799 | */ | 998 | */ |
800 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | 999 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) |
801 | unsigned order) | ||
802 | { | 1000 | { |
803 | struct dma_ops_domain *dma_dom; | 1001 | struct dma_ops_domain *dma_dom; |
804 | unsigned i, num_pte_pages; | ||
805 | u64 *l2_pde; | ||
806 | u64 address; | ||
807 | |||
808 | /* | ||
809 | * Currently the DMA aperture must be between 32 MB and 1GB in size | ||
810 | */ | ||
811 | if ((order < 25) || (order > 30)) | ||
812 | return NULL; | ||
813 | 1002 | ||
814 | dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); | 1003 | dma_dom = kzalloc(sizeof(struct dma_ops_domain), GFP_KERNEL); |
815 | if (!dma_dom) | 1004 | if (!dma_dom) |
@@ -826,55 +1015,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
826 | dma_dom->domain.priv = dma_dom; | 1015 | dma_dom->domain.priv = dma_dom; |
827 | if (!dma_dom->domain.pt_root) | 1016 | if (!dma_dom->domain.pt_root) |
828 | goto free_dma_dom; | 1017 | goto free_dma_dom; |
829 | dma_dom->aperture_size = (1ULL << order); | ||
830 | dma_dom->bitmap = kzalloc(dma_dom->aperture_size / (PAGE_SIZE * 8), | ||
831 | GFP_KERNEL); | ||
832 | if (!dma_dom->bitmap) | ||
833 | goto free_dma_dom; | ||
834 | /* | ||
835 | * mark the first page as allocated so we never return 0 as | ||
836 | * a valid dma-address. So we can use 0 as error value | ||
837 | */ | ||
838 | dma_dom->bitmap[0] = 1; | ||
839 | dma_dom->next_bit = 0; | ||
840 | 1018 | ||
841 | dma_dom->need_flush = false; | 1019 | dma_dom->need_flush = false; |
842 | dma_dom->target_dev = 0xffff; | 1020 | dma_dom->target_dev = 0xffff; |
843 | 1021 | ||
844 | /* Intialize the exclusion range if necessary */ | 1022 | if (alloc_new_range(iommu, dma_dom, true, GFP_KERNEL)) |
845 | if (iommu->exclusion_start && | 1023 | goto free_dma_dom; |
846 | iommu->exclusion_start < dma_dom->aperture_size) { | ||
847 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | ||
848 | int pages = iommu_num_pages(iommu->exclusion_start, | ||
849 | iommu->exclusion_length, | ||
850 | PAGE_SIZE); | ||
851 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | ||
852 | } | ||
853 | 1024 | ||
854 | /* | 1025 | /* |
855 | * At the last step, build the page tables so we don't need to | 1026 | * mark the first page as allocated so we never return 0 as |
856 | * allocate page table pages in the dma_ops mapping/unmapping | 1027 | * a valid dma-address. So we can use 0 as error value |
857 | * path. | ||
858 | */ | 1028 | */ |
859 | num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); | 1029 | dma_dom->aperture[0]->bitmap[0] = 1; |
860 | dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), | 1030 | dma_dom->next_address = 0; |
861 | GFP_KERNEL); | ||
862 | if (!dma_dom->pte_pages) | ||
863 | goto free_dma_dom; | ||
864 | |||
865 | l2_pde = (u64 *)get_zeroed_page(GFP_KERNEL); | ||
866 | if (l2_pde == NULL) | ||
867 | goto free_dma_dom; | ||
868 | 1031 | ||
869 | dma_dom->domain.pt_root[0] = IOMMU_L2_PDE(virt_to_phys(l2_pde)); | ||
870 | |||
871 | for (i = 0; i < num_pte_pages; ++i) { | ||
872 | dma_dom->pte_pages[i] = (u64 *)get_zeroed_page(GFP_KERNEL); | ||
873 | if (!dma_dom->pte_pages[i]) | ||
874 | goto free_dma_dom; | ||
875 | address = virt_to_phys(dma_dom->pte_pages[i]); | ||
876 | l2_pde[i] = IOMMU_L1_PDE(address); | ||
877 | } | ||
878 | 1032 | ||
879 | return dma_dom; | 1033 | return dma_dom; |
880 | 1034 | ||
@@ -934,7 +1088,13 @@ static void attach_device(struct amd_iommu *iommu, | |||
934 | amd_iommu_pd_table[devid] = domain; | 1088 | amd_iommu_pd_table[devid] = domain; |
935 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1089 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
936 | 1090 | ||
1091 | /* | ||
1092 | * We might boot into a crash-kernel here. The crashed kernel | ||
1093 | * left the caches in the IOMMU dirty. So we have to flush | ||
1094 | * here to evict all dirty stuff. | ||
1095 | */ | ||
937 | iommu_queue_inv_dev_entry(iommu, devid); | 1096 | iommu_queue_inv_dev_entry(iommu, devid); |
1097 | iommu_flush_tlb_pde(iommu, domain->id); | ||
938 | } | 1098 | } |
939 | 1099 | ||
940 | /* | 1100 | /* |
@@ -983,7 +1143,6 @@ static int device_change_notifier(struct notifier_block *nb, | |||
983 | struct protection_domain *domain; | 1143 | struct protection_domain *domain; |
984 | struct dma_ops_domain *dma_domain; | 1144 | struct dma_ops_domain *dma_domain; |
985 | struct amd_iommu *iommu; | 1145 | struct amd_iommu *iommu; |
986 | int order = amd_iommu_aperture_order; | ||
987 | unsigned long flags; | 1146 | unsigned long flags; |
988 | 1147 | ||
989 | if (devid > amd_iommu_last_bdf) | 1148 | if (devid > amd_iommu_last_bdf) |
@@ -1002,17 +1161,7 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1002 | "to a non-dma-ops domain\n", dev_name(dev)); | 1161 | "to a non-dma-ops domain\n", dev_name(dev)); |
1003 | 1162 | ||
1004 | switch (action) { | 1163 | switch (action) { |
1005 | case BUS_NOTIFY_BOUND_DRIVER: | 1164 | case BUS_NOTIFY_UNBOUND_DRIVER: |
1006 | if (domain) | ||
1007 | goto out; | ||
1008 | dma_domain = find_protection_domain(devid); | ||
1009 | if (!dma_domain) | ||
1010 | dma_domain = iommu->default_dom; | ||
1011 | attach_device(iommu, &dma_domain->domain, devid); | ||
1012 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | ||
1013 | "device %s\n", dma_domain->domain.id, dev_name(dev)); | ||
1014 | break; | ||
1015 | case BUS_NOTIFY_UNBIND_DRIVER: | ||
1016 | if (!domain) | 1165 | if (!domain) |
1017 | goto out; | 1166 | goto out; |
1018 | detach_device(domain, devid); | 1167 | detach_device(domain, devid); |
@@ -1022,7 +1171,7 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1022 | dma_domain = find_protection_domain(devid); | 1171 | dma_domain = find_protection_domain(devid); |
1023 | if (dma_domain) | 1172 | if (dma_domain) |
1024 | goto out; | 1173 | goto out; |
1025 | dma_domain = dma_ops_domain_alloc(iommu, order); | 1174 | dma_domain = dma_ops_domain_alloc(iommu); |
1026 | if (!dma_domain) | 1175 | if (!dma_domain) |
1027 | goto out; | 1176 | goto out; |
1028 | dma_domain->target_dev = devid; | 1177 | dma_domain->target_dev = devid; |
@@ -1133,8 +1282,8 @@ static int get_device_resources(struct device *dev, | |||
1133 | dma_dom = (*iommu)->default_dom; | 1282 | dma_dom = (*iommu)->default_dom; |
1134 | *domain = &dma_dom->domain; | 1283 | *domain = &dma_dom->domain; |
1135 | attach_device(*iommu, *domain, *bdf); | 1284 | attach_device(*iommu, *domain, *bdf); |
1136 | printk(KERN_INFO "AMD IOMMU: Using protection domain %d for " | 1285 | DUMP_printk("Using protection domain %d for device %s\n", |
1137 | "device %s\n", (*domain)->id, dev_name(dev)); | 1286 | (*domain)->id, dev_name(dev)); |
1138 | } | 1287 | } |
1139 | 1288 | ||
1140 | if (domain_for_device(_bdf) == NULL) | 1289 | if (domain_for_device(_bdf) == NULL) |
@@ -1144,6 +1293,66 @@ static int get_device_resources(struct device *dev, | |||
1144 | } | 1293 | } |
1145 | 1294 | ||
1146 | /* | 1295 | /* |
1296 | * If the pte_page is not yet allocated this function is called | ||
1297 | */ | ||
1298 | static u64* alloc_pte(struct protection_domain *dom, | ||
1299 | unsigned long address, u64 **pte_page, gfp_t gfp) | ||
1300 | { | ||
1301 | u64 *pte, *page; | ||
1302 | |||
1303 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; | ||
1304 | |||
1305 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
1306 | page = (u64 *)get_zeroed_page(gfp); | ||
1307 | if (!page) | ||
1308 | return NULL; | ||
1309 | *pte = IOMMU_L2_PDE(virt_to_phys(page)); | ||
1310 | } | ||
1311 | |||
1312 | pte = IOMMU_PTE_PAGE(*pte); | ||
1313 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | ||
1314 | |||
1315 | if (!IOMMU_PTE_PRESENT(*pte)) { | ||
1316 | page = (u64 *)get_zeroed_page(gfp); | ||
1317 | if (!page) | ||
1318 | return NULL; | ||
1319 | *pte = IOMMU_L1_PDE(virt_to_phys(page)); | ||
1320 | } | ||
1321 | |||
1322 | pte = IOMMU_PTE_PAGE(*pte); | ||
1323 | |||
1324 | if (pte_page) | ||
1325 | *pte_page = pte; | ||
1326 | |||
1327 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | ||
1328 | |||
1329 | return pte; | ||
1330 | } | ||
1331 | |||
1332 | /* | ||
1333 | * This function fetches the PTE for a given address in the aperture | ||
1334 | */ | ||
1335 | static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | ||
1336 | unsigned long address) | ||
1337 | { | ||
1338 | struct aperture_range *aperture; | ||
1339 | u64 *pte, *pte_page; | ||
1340 | |||
1341 | aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; | ||
1342 | if (!aperture) | ||
1343 | return NULL; | ||
1344 | |||
1345 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | ||
1346 | if (!pte) { | ||
1347 | pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); | ||
1348 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | ||
1349 | } else | ||
1350 | pte += IOMMU_PTE_L0_INDEX(address); | ||
1351 | |||
1352 | return pte; | ||
1353 | } | ||
1354 | |||
1355 | /* | ||
1147 | * This is the generic map function. It maps one 4kb page at paddr to | 1356 | * This is the generic map function. It maps one 4kb page at paddr to |
1148 | * the given address in the DMA address space for the domain. | 1357 | * the given address in the DMA address space for the domain. |
1149 | */ | 1358 | */ |
@@ -1159,8 +1368,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
1159 | 1368 | ||
1160 | paddr &= PAGE_MASK; | 1369 | paddr &= PAGE_MASK; |
1161 | 1370 | ||
1162 | pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; | 1371 | pte = dma_ops_get_pte(dom, address); |
1163 | pte += IOMMU_PTE_L0_INDEX(address); | 1372 | if (!pte) |
1373 | return bad_dma_address; | ||
1164 | 1374 | ||
1165 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; | 1375 | __pte = paddr | IOMMU_PTE_P | IOMMU_PTE_FC; |
1166 | 1376 | ||
@@ -1185,14 +1395,20 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
1185 | struct dma_ops_domain *dom, | 1395 | struct dma_ops_domain *dom, |
1186 | unsigned long address) | 1396 | unsigned long address) |
1187 | { | 1397 | { |
1398 | struct aperture_range *aperture; | ||
1188 | u64 *pte; | 1399 | u64 *pte; |
1189 | 1400 | ||
1190 | if (address >= dom->aperture_size) | 1401 | if (address >= dom->aperture_size) |
1191 | return; | 1402 | return; |
1192 | 1403 | ||
1193 | WARN_ON(address & ~PAGE_MASK || address >= dom->aperture_size); | 1404 | aperture = dom->aperture[APERTURE_RANGE_INDEX(address)]; |
1405 | if (!aperture) | ||
1406 | return; | ||
1407 | |||
1408 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | ||
1409 | if (!pte) | ||
1410 | return; | ||
1194 | 1411 | ||
1195 | pte = dom->pte_pages[IOMMU_PTE_L1_INDEX(address)]; | ||
1196 | pte += IOMMU_PTE_L0_INDEX(address); | 1412 | pte += IOMMU_PTE_L0_INDEX(address); |
1197 | 1413 | ||
1198 | WARN_ON(!*pte); | 1414 | WARN_ON(!*pte); |
@@ -1216,7 +1432,7 @@ static dma_addr_t __map_single(struct device *dev, | |||
1216 | u64 dma_mask) | 1432 | u64 dma_mask) |
1217 | { | 1433 | { |
1218 | dma_addr_t offset = paddr & ~PAGE_MASK; | 1434 | dma_addr_t offset = paddr & ~PAGE_MASK; |
1219 | dma_addr_t address, start; | 1435 | dma_addr_t address, start, ret; |
1220 | unsigned int pages; | 1436 | unsigned int pages; |
1221 | unsigned long align_mask = 0; | 1437 | unsigned long align_mask = 0; |
1222 | int i; | 1438 | int i; |
@@ -1232,14 +1448,33 @@ static dma_addr_t __map_single(struct device *dev, | |||
1232 | if (align) | 1448 | if (align) |
1233 | align_mask = (1UL << get_order(size)) - 1; | 1449 | align_mask = (1UL << get_order(size)) - 1; |
1234 | 1450 | ||
1451 | retry: | ||
1235 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, | 1452 | address = dma_ops_alloc_addresses(dev, dma_dom, pages, align_mask, |
1236 | dma_mask); | 1453 | dma_mask); |
1237 | if (unlikely(address == bad_dma_address)) | 1454 | if (unlikely(address == bad_dma_address)) { |
1238 | goto out; | 1455 | /* |
1456 | * setting next_address here will let the address | ||
1457 | * allocator only scan the new allocated range in the | ||
1458 | * first run. This is a small optimization. | ||
1459 | */ | ||
1460 | dma_dom->next_address = dma_dom->aperture_size; | ||
1461 | |||
1462 | if (alloc_new_range(iommu, dma_dom, false, GFP_ATOMIC)) | ||
1463 | goto out; | ||
1464 | |||
1465 | /* | ||
1466 | * aperture was sucessfully enlarged by 128 MB, try | ||
1467 | * allocation again | ||
1468 | */ | ||
1469 | goto retry; | ||
1470 | } | ||
1239 | 1471 | ||
1240 | start = address; | 1472 | start = address; |
1241 | for (i = 0; i < pages; ++i) { | 1473 | for (i = 0; i < pages; ++i) { |
1242 | dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); | 1474 | ret = dma_ops_domain_map(iommu, dma_dom, start, paddr, dir); |
1475 | if (ret == bad_dma_address) | ||
1476 | goto out_unmap; | ||
1477 | |||
1243 | paddr += PAGE_SIZE; | 1478 | paddr += PAGE_SIZE; |
1244 | start += PAGE_SIZE; | 1479 | start += PAGE_SIZE; |
1245 | } | 1480 | } |
@@ -1255,6 +1490,17 @@ static dma_addr_t __map_single(struct device *dev, | |||
1255 | 1490 | ||
1256 | out: | 1491 | out: |
1257 | return address; | 1492 | return address; |
1493 | |||
1494 | out_unmap: | ||
1495 | |||
1496 | for (--i; i >= 0; --i) { | ||
1497 | start -= PAGE_SIZE; | ||
1498 | dma_ops_domain_unmap(iommu, dma_dom, start); | ||
1499 | } | ||
1500 | |||
1501 | dma_ops_free_addresses(dma_dom, address, pages); | ||
1502 | |||
1503 | return bad_dma_address; | ||
1258 | } | 1504 | } |
1259 | 1505 | ||
1260 | /* | 1506 | /* |
@@ -1537,8 +1783,10 @@ static void *alloc_coherent(struct device *dev, size_t size, | |||
1537 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, | 1783 | *dma_addr = __map_single(dev, iommu, domain->priv, paddr, |
1538 | size, DMA_BIDIRECTIONAL, true, dma_mask); | 1784 | size, DMA_BIDIRECTIONAL, true, dma_mask); |
1539 | 1785 | ||
1540 | if (*dma_addr == bad_dma_address) | 1786 | if (*dma_addr == bad_dma_address) { |
1787 | spin_unlock_irqrestore(&domain->lock, flags); | ||
1541 | goto out_free; | 1788 | goto out_free; |
1789 | } | ||
1542 | 1790 | ||
1543 | iommu_completion_wait(iommu); | 1791 | iommu_completion_wait(iommu); |
1544 | 1792 | ||
@@ -1625,7 +1873,6 @@ static void prealloc_protection_domains(void) | |||
1625 | struct pci_dev *dev = NULL; | 1873 | struct pci_dev *dev = NULL; |
1626 | struct dma_ops_domain *dma_dom; | 1874 | struct dma_ops_domain *dma_dom; |
1627 | struct amd_iommu *iommu; | 1875 | struct amd_iommu *iommu; |
1628 | int order = amd_iommu_aperture_order; | ||
1629 | u16 devid; | 1876 | u16 devid; |
1630 | 1877 | ||
1631 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1878 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
@@ -1638,7 +1885,7 @@ static void prealloc_protection_domains(void) | |||
1638 | iommu = amd_iommu_rlookup_table[devid]; | 1885 | iommu = amd_iommu_rlookup_table[devid]; |
1639 | if (!iommu) | 1886 | if (!iommu) |
1640 | continue; | 1887 | continue; |
1641 | dma_dom = dma_ops_domain_alloc(iommu, order); | 1888 | dma_dom = dma_ops_domain_alloc(iommu); |
1642 | if (!dma_dom) | 1889 | if (!dma_dom) |
1643 | continue; | 1890 | continue; |
1644 | init_unity_mappings_for_device(dma_dom, devid); | 1891 | init_unity_mappings_for_device(dma_dom, devid); |
@@ -1664,7 +1911,6 @@ static struct dma_map_ops amd_iommu_dma_ops = { | |||
1664 | int __init amd_iommu_init_dma_ops(void) | 1911 | int __init amd_iommu_init_dma_ops(void) |
1665 | { | 1912 | { |
1666 | struct amd_iommu *iommu; | 1913 | struct amd_iommu *iommu; |
1667 | int order = amd_iommu_aperture_order; | ||
1668 | int ret; | 1914 | int ret; |
1669 | 1915 | ||
1670 | /* | 1916 | /* |
@@ -1672,8 +1918,8 @@ int __init amd_iommu_init_dma_ops(void) | |||
1672 | * found in the system. Devices not assigned to any other | 1918 | * found in the system. Devices not assigned to any other |
1673 | * protection domain will be assigned to the default one. | 1919 | * protection domain will be assigned to the default one. |
1674 | */ | 1920 | */ |
1675 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 1921 | for_each_iommu(iommu) { |
1676 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); | 1922 | iommu->default_dom = dma_ops_domain_alloc(iommu); |
1677 | if (iommu->default_dom == NULL) | 1923 | if (iommu->default_dom == NULL) |
1678 | return -ENOMEM; | 1924 | return -ENOMEM; |
1679 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; | 1925 | iommu->default_dom->domain.flags |= PD_DEFAULT_MASK; |
@@ -1710,7 +1956,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
1710 | 1956 | ||
1711 | free_domains: | 1957 | free_domains: |
1712 | 1958 | ||
1713 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 1959 | for_each_iommu(iommu) { |
1714 | if (iommu->default_dom) | 1960 | if (iommu->default_dom) |
1715 | dma_ops_domain_free(iommu->default_dom); | 1961 | dma_ops_domain_free(iommu->default_dom); |
1716 | } | 1962 | } |
@@ -1842,7 +2088,7 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, | |||
1842 | 2088 | ||
1843 | old_domain = domain_for_device(devid); | 2089 | old_domain = domain_for_device(devid); |
1844 | if (old_domain) | 2090 | if (old_domain) |
1845 | return -EBUSY; | 2091 | detach_device(old_domain, devid); |
1846 | 2092 | ||
1847 | attach_device(iommu, domain, devid); | 2093 | attach_device(iommu, domain, devid); |
1848 | 2094 | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 8c0be0902dac..10b2accd12ea 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -115,15 +115,21 @@ struct ivmd_header { | |||
115 | u64 range_length; | 115 | u64 range_length; |
116 | } __attribute__((packed)); | 116 | } __attribute__((packed)); |
117 | 117 | ||
118 | bool amd_iommu_dump; | ||
119 | |||
118 | static int __initdata amd_iommu_detected; | 120 | static int __initdata amd_iommu_detected; |
119 | 121 | ||
120 | u16 amd_iommu_last_bdf; /* largest PCI device id we have | 122 | u16 amd_iommu_last_bdf; /* largest PCI device id we have |
121 | to handle */ | 123 | to handle */ |
122 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings | 124 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
123 | we find in ACPI */ | 125 | we find in ACPI */ |
124 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | 126 | #ifdef CONFIG_IOMMU_STRESS |
127 | bool amd_iommu_isolate = false; | ||
128 | #else | ||
125 | bool amd_iommu_isolate = true; /* if true, device isolation is | 129 | bool amd_iommu_isolate = true; /* if true, device isolation is |
126 | enabled */ | 130 | enabled */ |
131 | #endif | ||
132 | |||
127 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ | 133 | bool amd_iommu_unmap_flush; /* if true, flush on every unmap */ |
128 | 134 | ||
129 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | 135 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the |
@@ -175,7 +181,7 @@ static inline void update_last_devid(u16 devid) | |||
175 | static inline unsigned long tbl_size(int entry_size) | 181 | static inline unsigned long tbl_size(int entry_size) |
176 | { | 182 | { |
177 | unsigned shift = PAGE_SHIFT + | 183 | unsigned shift = PAGE_SHIFT + |
178 | get_order(amd_iommu_last_bdf * entry_size); | 184 | get_order(((int)amd_iommu_last_bdf + 1) * entry_size); |
179 | 185 | ||
180 | return 1UL << shift; | 186 | return 1UL << shift; |
181 | } | 187 | } |
@@ -193,7 +199,7 @@ static inline unsigned long tbl_size(int entry_size) | |||
193 | * This function set the exclusion range in the IOMMU. DMA accesses to the | 199 | * This function set the exclusion range in the IOMMU. DMA accesses to the |
194 | * exclusion range are passed through untranslated | 200 | * exclusion range are passed through untranslated |
195 | */ | 201 | */ |
196 | static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) | 202 | static void iommu_set_exclusion_range(struct amd_iommu *iommu) |
197 | { | 203 | { |
198 | u64 start = iommu->exclusion_start & PAGE_MASK; | 204 | u64 start = iommu->exclusion_start & PAGE_MASK; |
199 | u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; | 205 | u64 limit = (start + iommu->exclusion_length) & PAGE_MASK; |
@@ -225,7 +231,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu) | |||
225 | } | 231 | } |
226 | 232 | ||
227 | /* Generic functions to enable/disable certain features of the IOMMU. */ | 233 | /* Generic functions to enable/disable certain features of the IOMMU. */ |
228 | static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) | 234 | static void iommu_feature_enable(struct amd_iommu *iommu, u8 bit) |
229 | { | 235 | { |
230 | u32 ctrl; | 236 | u32 ctrl; |
231 | 237 | ||
@@ -244,7 +250,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
244 | } | 250 | } |
245 | 251 | ||
246 | /* Function to enable the hardware */ | 252 | /* Function to enable the hardware */ |
247 | static void __init iommu_enable(struct amd_iommu *iommu) | 253 | static void iommu_enable(struct amd_iommu *iommu) |
248 | { | 254 | { |
249 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", | 255 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", |
250 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | 256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
@@ -252,11 +258,17 @@ static void __init iommu_enable(struct amd_iommu *iommu) | |||
252 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
253 | } | 259 | } |
254 | 260 | ||
255 | /* Function to enable IOMMU event logging and event interrupts */ | 261 | static void iommu_disable(struct amd_iommu *iommu) |
256 | static void __init iommu_enable_event_logging(struct amd_iommu *iommu) | ||
257 | { | 262 | { |
258 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); | 263 | /* Disable command buffer */ |
259 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | 264 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); |
265 | |||
266 | /* Disable event logging and event interrupts */ | ||
267 | iommu_feature_disable(iommu, CONTROL_EVT_INT_EN); | ||
268 | iommu_feature_disable(iommu, CONTROL_EVT_LOG_EN); | ||
269 | |||
270 | /* Disable IOMMU hardware itself */ | ||
271 | iommu_feature_disable(iommu, CONTROL_IOMMU_EN); | ||
260 | } | 272 | } |
261 | 273 | ||
262 | /* | 274 | /* |
@@ -413,25 +425,36 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
413 | { | 425 | { |
414 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | 426 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
415 | get_order(CMD_BUFFER_SIZE)); | 427 | get_order(CMD_BUFFER_SIZE)); |
416 | u64 entry; | ||
417 | 428 | ||
418 | if (cmd_buf == NULL) | 429 | if (cmd_buf == NULL) |
419 | return NULL; | 430 | return NULL; |
420 | 431 | ||
421 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; | 432 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; |
422 | 433 | ||
423 | entry = (u64)virt_to_phys(cmd_buf); | 434 | return cmd_buf; |
435 | } | ||
436 | |||
437 | /* | ||
438 | * This function writes the command buffer address to the hardware and | ||
439 | * enables it. | ||
440 | */ | ||
441 | static void iommu_enable_command_buffer(struct amd_iommu *iommu) | ||
442 | { | ||
443 | u64 entry; | ||
444 | |||
445 | BUG_ON(iommu->cmd_buf == NULL); | ||
446 | |||
447 | entry = (u64)virt_to_phys(iommu->cmd_buf); | ||
424 | entry |= MMIO_CMD_SIZE_512; | 448 | entry |= MMIO_CMD_SIZE_512; |
449 | |||
425 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 450 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
426 | &entry, sizeof(entry)); | 451 | &entry, sizeof(entry)); |
427 | 452 | ||
428 | /* set head and tail to zero manually */ | 453 | /* set head and tail to zero manually */ |
429 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | 454 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); |
430 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 455 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
431 | 456 | ||
432 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | 457 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); |
433 | |||
434 | return cmd_buf; | ||
435 | } | 458 | } |
436 | 459 | ||
437 | static void __init free_command_buffer(struct amd_iommu *iommu) | 460 | static void __init free_command_buffer(struct amd_iommu *iommu) |
@@ -443,20 +466,31 @@ static void __init free_command_buffer(struct amd_iommu *iommu) | |||
443 | /* allocates the memory where the IOMMU will log its events to */ | 466 | /* allocates the memory where the IOMMU will log its events to */ |
444 | static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) | 467 | static u8 * __init alloc_event_buffer(struct amd_iommu *iommu) |
445 | { | 468 | { |
446 | u64 entry; | ||
447 | iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, | 469 | iommu->evt_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
448 | get_order(EVT_BUFFER_SIZE)); | 470 | get_order(EVT_BUFFER_SIZE)); |
449 | 471 | ||
450 | if (iommu->evt_buf == NULL) | 472 | if (iommu->evt_buf == NULL) |
451 | return NULL; | 473 | return NULL; |
452 | 474 | ||
475 | return iommu->evt_buf; | ||
476 | } | ||
477 | |||
478 | static void iommu_enable_event_buffer(struct amd_iommu *iommu) | ||
479 | { | ||
480 | u64 entry; | ||
481 | |||
482 | BUG_ON(iommu->evt_buf == NULL); | ||
483 | |||
453 | entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; | 484 | entry = (u64)virt_to_phys(iommu->evt_buf) | EVT_LEN_MASK; |
485 | |||
454 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, | 486 | memcpy_toio(iommu->mmio_base + MMIO_EVT_BUF_OFFSET, |
455 | &entry, sizeof(entry)); | 487 | &entry, sizeof(entry)); |
456 | 488 | ||
457 | iommu->evt_buf_size = EVT_BUFFER_SIZE; | 489 | /* set head and tail to zero manually */ |
490 | writel(0x00, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); | ||
491 | writel(0x00, iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | ||
458 | 492 | ||
459 | return iommu->evt_buf; | 493 | iommu_feature_enable(iommu, CONTROL_EVT_LOG_EN); |
460 | } | 494 | } |
461 | 495 | ||
462 | static void __init free_event_buffer(struct amd_iommu *iommu) | 496 | static void __init free_event_buffer(struct amd_iommu *iommu) |
@@ -596,32 +630,83 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
596 | p += sizeof(struct ivhd_header); | 630 | p += sizeof(struct ivhd_header); |
597 | end += h->length; | 631 | end += h->length; |
598 | 632 | ||
633 | |||
599 | while (p < end) { | 634 | while (p < end) { |
600 | e = (struct ivhd_entry *)p; | 635 | e = (struct ivhd_entry *)p; |
601 | switch (e->type) { | 636 | switch (e->type) { |
602 | case IVHD_DEV_ALL: | 637 | case IVHD_DEV_ALL: |
638 | |||
639 | DUMP_printk(" DEV_ALL\t\t\t first devid: %02x:%02x.%x" | ||
640 | " last device %02x:%02x.%x flags: %02x\n", | ||
641 | PCI_BUS(iommu->first_device), | ||
642 | PCI_SLOT(iommu->first_device), | ||
643 | PCI_FUNC(iommu->first_device), | ||
644 | PCI_BUS(iommu->last_device), | ||
645 | PCI_SLOT(iommu->last_device), | ||
646 | PCI_FUNC(iommu->last_device), | ||
647 | e->flags); | ||
648 | |||
603 | for (dev_i = iommu->first_device; | 649 | for (dev_i = iommu->first_device; |
604 | dev_i <= iommu->last_device; ++dev_i) | 650 | dev_i <= iommu->last_device; ++dev_i) |
605 | set_dev_entry_from_acpi(iommu, dev_i, | 651 | set_dev_entry_from_acpi(iommu, dev_i, |
606 | e->flags, 0); | 652 | e->flags, 0); |
607 | break; | 653 | break; |
608 | case IVHD_DEV_SELECT: | 654 | case IVHD_DEV_SELECT: |
655 | |||
656 | DUMP_printk(" DEV_SELECT\t\t\t devid: %02x:%02x.%x " | ||
657 | "flags: %02x\n", | ||
658 | PCI_BUS(e->devid), | ||
659 | PCI_SLOT(e->devid), | ||
660 | PCI_FUNC(e->devid), | ||
661 | e->flags); | ||
662 | |||
609 | devid = e->devid; | 663 | devid = e->devid; |
610 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); | 664 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); |
611 | break; | 665 | break; |
612 | case IVHD_DEV_SELECT_RANGE_START: | 666 | case IVHD_DEV_SELECT_RANGE_START: |
667 | |||
668 | DUMP_printk(" DEV_SELECT_RANGE_START\t " | ||
669 | "devid: %02x:%02x.%x flags: %02x\n", | ||
670 | PCI_BUS(e->devid), | ||
671 | PCI_SLOT(e->devid), | ||
672 | PCI_FUNC(e->devid), | ||
673 | e->flags); | ||
674 | |||
613 | devid_start = e->devid; | 675 | devid_start = e->devid; |
614 | flags = e->flags; | 676 | flags = e->flags; |
615 | ext_flags = 0; | 677 | ext_flags = 0; |
616 | alias = false; | 678 | alias = false; |
617 | break; | 679 | break; |
618 | case IVHD_DEV_ALIAS: | 680 | case IVHD_DEV_ALIAS: |
681 | |||
682 | DUMP_printk(" DEV_ALIAS\t\t\t devid: %02x:%02x.%x " | ||
683 | "flags: %02x devid_to: %02x:%02x.%x\n", | ||
684 | PCI_BUS(e->devid), | ||
685 | PCI_SLOT(e->devid), | ||
686 | PCI_FUNC(e->devid), | ||
687 | e->flags, | ||
688 | PCI_BUS(e->ext >> 8), | ||
689 | PCI_SLOT(e->ext >> 8), | ||
690 | PCI_FUNC(e->ext >> 8)); | ||
691 | |||
619 | devid = e->devid; | 692 | devid = e->devid; |
620 | devid_to = e->ext >> 8; | 693 | devid_to = e->ext >> 8; |
621 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); | 694 | set_dev_entry_from_acpi(iommu, devid_to, e->flags, 0); |
622 | amd_iommu_alias_table[devid] = devid_to; | 695 | amd_iommu_alias_table[devid] = devid_to; |
623 | break; | 696 | break; |
624 | case IVHD_DEV_ALIAS_RANGE: | 697 | case IVHD_DEV_ALIAS_RANGE: |
698 | |||
699 | DUMP_printk(" DEV_ALIAS_RANGE\t\t " | ||
700 | "devid: %02x:%02x.%x flags: %02x " | ||
701 | "devid_to: %02x:%02x.%x\n", | ||
702 | PCI_BUS(e->devid), | ||
703 | PCI_SLOT(e->devid), | ||
704 | PCI_FUNC(e->devid), | ||
705 | e->flags, | ||
706 | PCI_BUS(e->ext >> 8), | ||
707 | PCI_SLOT(e->ext >> 8), | ||
708 | PCI_FUNC(e->ext >> 8)); | ||
709 | |||
625 | devid_start = e->devid; | 710 | devid_start = e->devid; |
626 | flags = e->flags; | 711 | flags = e->flags; |
627 | devid_to = e->ext >> 8; | 712 | devid_to = e->ext >> 8; |
@@ -629,17 +714,39 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
629 | alias = true; | 714 | alias = true; |
630 | break; | 715 | break; |
631 | case IVHD_DEV_EXT_SELECT: | 716 | case IVHD_DEV_EXT_SELECT: |
717 | |||
718 | DUMP_printk(" DEV_EXT_SELECT\t\t devid: %02x:%02x.%x " | ||
719 | "flags: %02x ext: %08x\n", | ||
720 | PCI_BUS(e->devid), | ||
721 | PCI_SLOT(e->devid), | ||
722 | PCI_FUNC(e->devid), | ||
723 | e->flags, e->ext); | ||
724 | |||
632 | devid = e->devid; | 725 | devid = e->devid; |
633 | set_dev_entry_from_acpi(iommu, devid, e->flags, | 726 | set_dev_entry_from_acpi(iommu, devid, e->flags, |
634 | e->ext); | 727 | e->ext); |
635 | break; | 728 | break; |
636 | case IVHD_DEV_EXT_SELECT_RANGE: | 729 | case IVHD_DEV_EXT_SELECT_RANGE: |
730 | |||
731 | DUMP_printk(" DEV_EXT_SELECT_RANGE\t devid: " | ||
732 | "%02x:%02x.%x flags: %02x ext: %08x\n", | ||
733 | PCI_BUS(e->devid), | ||
734 | PCI_SLOT(e->devid), | ||
735 | PCI_FUNC(e->devid), | ||
736 | e->flags, e->ext); | ||
737 | |||
637 | devid_start = e->devid; | 738 | devid_start = e->devid; |
638 | flags = e->flags; | 739 | flags = e->flags; |
639 | ext_flags = e->ext; | 740 | ext_flags = e->ext; |
640 | alias = false; | 741 | alias = false; |
641 | break; | 742 | break; |
642 | case IVHD_DEV_RANGE_END: | 743 | case IVHD_DEV_RANGE_END: |
744 | |||
745 | DUMP_printk(" DEV_RANGE_END\t\t devid: %02x:%02x.%x\n", | ||
746 | PCI_BUS(e->devid), | ||
747 | PCI_SLOT(e->devid), | ||
748 | PCI_FUNC(e->devid)); | ||
749 | |||
643 | devid = e->devid; | 750 | devid = e->devid; |
644 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { | 751 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { |
645 | if (alias) | 752 | if (alias) |
@@ -679,7 +786,7 @@ static void __init free_iommu_all(void) | |||
679 | { | 786 | { |
680 | struct amd_iommu *iommu, *next; | 787 | struct amd_iommu *iommu, *next; |
681 | 788 | ||
682 | list_for_each_entry_safe(iommu, next, &amd_iommu_list, list) { | 789 | for_each_iommu_safe(iommu, next) { |
683 | list_del(&iommu->list); | 790 | list_del(&iommu->list); |
684 | free_iommu_one(iommu); | 791 | free_iommu_one(iommu); |
685 | kfree(iommu); | 792 | kfree(iommu); |
@@ -710,7 +817,6 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
710 | if (!iommu->mmio_base) | 817 | if (!iommu->mmio_base) |
711 | return -ENOMEM; | 818 | return -ENOMEM; |
712 | 819 | ||
713 | iommu_set_device_table(iommu); | ||
714 | iommu->cmd_buf = alloc_command_buffer(iommu); | 820 | iommu->cmd_buf = alloc_command_buffer(iommu); |
715 | if (!iommu->cmd_buf) | 821 | if (!iommu->cmd_buf) |
716 | return -ENOMEM; | 822 | return -ENOMEM; |
@@ -746,6 +852,15 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
746 | h = (struct ivhd_header *)p; | 852 | h = (struct ivhd_header *)p; |
747 | switch (*p) { | 853 | switch (*p) { |
748 | case ACPI_IVHD_TYPE: | 854 | case ACPI_IVHD_TYPE: |
855 | |||
856 | DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " | ||
857 | "seg: %d flags: %01x info %04x\n", | ||
858 | PCI_BUS(h->devid), PCI_SLOT(h->devid), | ||
859 | PCI_FUNC(h->devid), h->cap_ptr, | ||
860 | h->pci_seg, h->flags, h->info); | ||
861 | DUMP_printk(" mmio-addr: %016llx\n", | ||
862 | h->mmio_phys); | ||
863 | |||
749 | iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); | 864 | iommu = kzalloc(sizeof(struct amd_iommu), GFP_KERNEL); |
750 | if (iommu == NULL) | 865 | if (iommu == NULL) |
751 | return -ENOMEM; | 866 | return -ENOMEM; |
@@ -773,56 +888,9 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
773 | * | 888 | * |
774 | ****************************************************************************/ | 889 | ****************************************************************************/ |
775 | 890 | ||
776 | static int __init iommu_setup_msix(struct amd_iommu *iommu) | ||
777 | { | ||
778 | struct amd_iommu *curr; | ||
779 | struct msix_entry entries[32]; /* only 32 supported by AMD IOMMU */ | ||
780 | int nvec = 0, i; | ||
781 | |||
782 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
783 | if (curr->dev == iommu->dev) { | ||
784 | entries[nvec].entry = curr->evt_msi_num; | ||
785 | entries[nvec].vector = 0; | ||
786 | curr->int_enabled = true; | ||
787 | nvec++; | ||
788 | } | ||
789 | } | ||
790 | |||
791 | if (pci_enable_msix(iommu->dev, entries, nvec)) { | ||
792 | pci_disable_msix(iommu->dev); | ||
793 | return 1; | ||
794 | } | ||
795 | |||
796 | for (i = 0; i < nvec; ++i) { | ||
797 | int r = request_irq(entries->vector, amd_iommu_int_handler, | ||
798 | IRQF_SAMPLE_RANDOM, | ||
799 | "AMD IOMMU", | ||
800 | NULL); | ||
801 | if (r) | ||
802 | goto out_free; | ||
803 | } | ||
804 | |||
805 | return 0; | ||
806 | |||
807 | out_free: | ||
808 | for (i -= 1; i >= 0; --i) | ||
809 | free_irq(entries->vector, NULL); | ||
810 | |||
811 | pci_disable_msix(iommu->dev); | ||
812 | |||
813 | return 1; | ||
814 | } | ||
815 | |||
816 | static int __init iommu_setup_msi(struct amd_iommu *iommu) | 891 | static int __init iommu_setup_msi(struct amd_iommu *iommu) |
817 | { | 892 | { |
818 | int r; | 893 | int r; |
819 | struct amd_iommu *curr; | ||
820 | |||
821 | list_for_each_entry(curr, &amd_iommu_list, list) { | ||
822 | if (curr->dev == iommu->dev) | ||
823 | curr->int_enabled = true; | ||
824 | } | ||
825 | |||
826 | 894 | ||
827 | if (pci_enable_msi(iommu->dev)) | 895 | if (pci_enable_msi(iommu->dev)) |
828 | return 1; | 896 | return 1; |
@@ -837,17 +905,18 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) | |||
837 | return 1; | 905 | return 1; |
838 | } | 906 | } |
839 | 907 | ||
908 | iommu->int_enabled = true; | ||
909 | iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); | ||
910 | |||
840 | return 0; | 911 | return 0; |
841 | } | 912 | } |
842 | 913 | ||
843 | static int __init iommu_init_msi(struct amd_iommu *iommu) | 914 | static int iommu_init_msi(struct amd_iommu *iommu) |
844 | { | 915 | { |
845 | if (iommu->int_enabled) | 916 | if (iommu->int_enabled) |
846 | return 0; | 917 | return 0; |
847 | 918 | ||
848 | if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSIX)) | 919 | if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) |
849 | return iommu_setup_msix(iommu); | ||
850 | else if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI)) | ||
851 | return iommu_setup_msi(iommu); | 920 | return iommu_setup_msi(iommu); |
852 | 921 | ||
853 | return 1; | 922 | return 1; |
@@ -899,6 +968,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
899 | static int __init init_unity_map_range(struct ivmd_header *m) | 968 | static int __init init_unity_map_range(struct ivmd_header *m) |
900 | { | 969 | { |
901 | struct unity_map_entry *e = 0; | 970 | struct unity_map_entry *e = 0; |
971 | char *s; | ||
902 | 972 | ||
903 | e = kzalloc(sizeof(*e), GFP_KERNEL); | 973 | e = kzalloc(sizeof(*e), GFP_KERNEL); |
904 | if (e == NULL) | 974 | if (e == NULL) |
@@ -906,14 +976,19 @@ static int __init init_unity_map_range(struct ivmd_header *m) | |||
906 | 976 | ||
907 | switch (m->type) { | 977 | switch (m->type) { |
908 | default: | 978 | default: |
979 | kfree(e); | ||
980 | return 0; | ||
909 | case ACPI_IVMD_TYPE: | 981 | case ACPI_IVMD_TYPE: |
982 | s = "IVMD_TYPEi\t\t\t"; | ||
910 | e->devid_start = e->devid_end = m->devid; | 983 | e->devid_start = e->devid_end = m->devid; |
911 | break; | 984 | break; |
912 | case ACPI_IVMD_TYPE_ALL: | 985 | case ACPI_IVMD_TYPE_ALL: |
986 | s = "IVMD_TYPE_ALL\t\t"; | ||
913 | e->devid_start = 0; | 987 | e->devid_start = 0; |
914 | e->devid_end = amd_iommu_last_bdf; | 988 | e->devid_end = amd_iommu_last_bdf; |
915 | break; | 989 | break; |
916 | case ACPI_IVMD_TYPE_RANGE: | 990 | case ACPI_IVMD_TYPE_RANGE: |
991 | s = "IVMD_TYPE_RANGE\t\t"; | ||
917 | e->devid_start = m->devid; | 992 | e->devid_start = m->devid; |
918 | e->devid_end = m->aux; | 993 | e->devid_end = m->aux; |
919 | break; | 994 | break; |
@@ -922,6 +997,13 @@ static int __init init_unity_map_range(struct ivmd_header *m) | |||
922 | e->address_end = e->address_start + PAGE_ALIGN(m->range_length); | 997 | e->address_end = e->address_start + PAGE_ALIGN(m->range_length); |
923 | e->prot = m->flags >> 1; | 998 | e->prot = m->flags >> 1; |
924 | 999 | ||
1000 | DUMP_printk("%s devid_start: %02x:%02x.%x devid_end: %02x:%02x.%x" | ||
1001 | " range_start: %016llx range_end: %016llx flags: %x\n", s, | ||
1002 | PCI_BUS(e->devid_start), PCI_SLOT(e->devid_start), | ||
1003 | PCI_FUNC(e->devid_start), PCI_BUS(e->devid_end), | ||
1004 | PCI_SLOT(e->devid_end), PCI_FUNC(e->devid_end), | ||
1005 | e->address_start, e->address_end, m->flags); | ||
1006 | |||
925 | list_add_tail(&e->list, &amd_iommu_unity_map); | 1007 | list_add_tail(&e->list, &amd_iommu_unity_map); |
926 | 1008 | ||
927 | return 0; | 1009 | return 0; |
@@ -967,18 +1049,29 @@ static void init_device_table(void) | |||
967 | * This function finally enables all IOMMUs found in the system after | 1049 | * This function finally enables all IOMMUs found in the system after |
968 | * they have been initialized | 1050 | * they have been initialized |
969 | */ | 1051 | */ |
970 | static void __init enable_iommus(void) | 1052 | static void enable_iommus(void) |
971 | { | 1053 | { |
972 | struct amd_iommu *iommu; | 1054 | struct amd_iommu *iommu; |
973 | 1055 | ||
974 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 1056 | for_each_iommu(iommu) { |
1057 | iommu_disable(iommu); | ||
1058 | iommu_set_device_table(iommu); | ||
1059 | iommu_enable_command_buffer(iommu); | ||
1060 | iommu_enable_event_buffer(iommu); | ||
975 | iommu_set_exclusion_range(iommu); | 1061 | iommu_set_exclusion_range(iommu); |
976 | iommu_init_msi(iommu); | 1062 | iommu_init_msi(iommu); |
977 | iommu_enable_event_logging(iommu); | ||
978 | iommu_enable(iommu); | 1063 | iommu_enable(iommu); |
979 | } | 1064 | } |
980 | } | 1065 | } |
981 | 1066 | ||
1067 | static void disable_iommus(void) | ||
1068 | { | ||
1069 | struct amd_iommu *iommu; | ||
1070 | |||
1071 | for_each_iommu(iommu) | ||
1072 | iommu_disable(iommu); | ||
1073 | } | ||
1074 | |||
982 | /* | 1075 | /* |
983 | * Suspend/Resume support | 1076 | * Suspend/Resume support |
984 | * disable suspend until real resume implemented | 1077 | * disable suspend until real resume implemented |
@@ -986,12 +1079,25 @@ static void __init enable_iommus(void) | |||
986 | 1079 | ||
987 | static int amd_iommu_resume(struct sys_device *dev) | 1080 | static int amd_iommu_resume(struct sys_device *dev) |
988 | { | 1081 | { |
1082 | /* re-load the hardware */ | ||
1083 | enable_iommus(); | ||
1084 | |||
1085 | /* | ||
1086 | * we have to flush after the IOMMUs are enabled because a | ||
1087 | * disabled IOMMU will never execute the commands we send | ||
1088 | */ | ||
1089 | amd_iommu_flush_all_devices(); | ||
1090 | amd_iommu_flush_all_domains(); | ||
1091 | |||
989 | return 0; | 1092 | return 0; |
990 | } | 1093 | } |
991 | 1094 | ||
992 | static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) | 1095 | static int amd_iommu_suspend(struct sys_device *dev, pm_message_t state) |
993 | { | 1096 | { |
994 | return -EINVAL; | 1097 | /* disable IOMMUs to go out of the way for BIOS */ |
1098 | disable_iommus(); | ||
1099 | |||
1100 | return 0; | ||
995 | } | 1101 | } |
996 | 1102 | ||
997 | static struct sysdev_class amd_iommu_sysdev_class = { | 1103 | static struct sysdev_class amd_iommu_sysdev_class = { |
@@ -1137,9 +1243,6 @@ int __init amd_iommu_init(void) | |||
1137 | 1243 | ||
1138 | enable_iommus(); | 1244 | enable_iommus(); |
1139 | 1245 | ||
1140 | printk(KERN_INFO "AMD IOMMU: aperture size is %d MB\n", | ||
1141 | (1 << (amd_iommu_aperture_order-20))); | ||
1142 | |||
1143 | printk(KERN_INFO "AMD IOMMU: device isolation "); | 1246 | printk(KERN_INFO "AMD IOMMU: device isolation "); |
1144 | if (amd_iommu_isolate) | 1247 | if (amd_iommu_isolate) |
1145 | printk("enabled\n"); | 1248 | printk("enabled\n"); |
@@ -1177,6 +1280,11 @@ free: | |||
1177 | goto out; | 1280 | goto out; |
1178 | } | 1281 | } |
1179 | 1282 | ||
1283 | void amd_iommu_shutdown(void) | ||
1284 | { | ||
1285 | disable_iommus(); | ||
1286 | } | ||
1287 | |||
1180 | /**************************************************************************** | 1288 | /**************************************************************************** |
1181 | * | 1289 | * |
1182 | * Early detect code. This code runs at IOMMU detection time in the DMA | 1290 | * Early detect code. This code runs at IOMMU detection time in the DMA |
@@ -1211,6 +1319,13 @@ void __init amd_iommu_detect(void) | |||
1211 | * | 1319 | * |
1212 | ****************************************************************************/ | 1320 | ****************************************************************************/ |
1213 | 1321 | ||
1322 | static int __init parse_amd_iommu_dump(char *str) | ||
1323 | { | ||
1324 | amd_iommu_dump = true; | ||
1325 | |||
1326 | return 1; | ||
1327 | } | ||
1328 | |||
1214 | static int __init parse_amd_iommu_options(char *str) | 1329 | static int __init parse_amd_iommu_options(char *str) |
1215 | { | 1330 | { |
1216 | for (; *str; ++str) { | 1331 | for (; *str; ++str) { |
@@ -1225,15 +1340,5 @@ static int __init parse_amd_iommu_options(char *str) | |||
1225 | return 1; | 1340 | return 1; |
1226 | } | 1341 | } |
1227 | 1342 | ||
1228 | static int __init parse_amd_iommu_size_options(char *str) | 1343 | __setup("amd_iommu_dump", parse_amd_iommu_dump); |
1229 | { | ||
1230 | unsigned order = PAGE_SHIFT + get_order(memparse(str, &str)); | ||
1231 | |||
1232 | if ((order > 24) && (order < 31)) | ||
1233 | amd_iommu_aperture_order = order; | ||
1234 | |||
1235 | return 1; | ||
1236 | } | ||
1237 | |||
1238 | __setup("amd_iommu=", parse_amd_iommu_options); | 1344 | __setup("amd_iommu=", parse_amd_iommu_options); |
1239 | __setup("amd_iommu_size=", parse_amd_iommu_size_options); | ||
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index f2870920f246..8c7c042ecad1 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -14,6 +14,7 @@ | |||
14 | * Mikael Pettersson : PM converted to driver model. | 14 | * Mikael Pettersson : PM converted to driver model. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <linux/perf_counter.h> | ||
17 | #include <linux/kernel_stat.h> | 18 | #include <linux/kernel_stat.h> |
18 | #include <linux/mc146818rtc.h> | 19 | #include <linux/mc146818rtc.h> |
19 | #include <linux/acpi_pmtmr.h> | 20 | #include <linux/acpi_pmtmr.h> |
@@ -34,6 +35,7 @@ | |||
34 | #include <linux/smp.h> | 35 | #include <linux/smp.h> |
35 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
36 | 37 | ||
38 | #include <asm/perf_counter.h> | ||
37 | #include <asm/pgalloc.h> | 39 | #include <asm/pgalloc.h> |
38 | #include <asm/atomic.h> | 40 | #include <asm/atomic.h> |
39 | #include <asm/mpspec.h> | 41 | #include <asm/mpspec.h> |
@@ -98,6 +100,29 @@ early_param("lapic", parse_lapic); | |||
98 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ | 100 | /* Local APIC was disabled by the BIOS and enabled by the kernel */ |
99 | static int enabled_via_apicbase; | 101 | static int enabled_via_apicbase; |
100 | 102 | ||
103 | /* | ||
104 | * Handle interrupt mode configuration register (IMCR). | ||
105 | * This register controls whether the interrupt signals | ||
106 | * that reach the BSP come from the master PIC or from the | ||
107 | * local APIC. Before entering Symmetric I/O Mode, either | ||
108 | * the BIOS or the operating system must switch out of | ||
109 | * PIC Mode by changing the IMCR. | ||
110 | */ | ||
111 | static inline void imcr_pic_to_apic(void) | ||
112 | { | ||
113 | /* select IMCR register */ | ||
114 | outb(0x70, 0x22); | ||
115 | /* NMI and 8259 INTR go through APIC */ | ||
116 | outb(0x01, 0x23); | ||
117 | } | ||
118 | |||
119 | static inline void imcr_apic_to_pic(void) | ||
120 | { | ||
121 | /* select IMCR register */ | ||
122 | outb(0x70, 0x22); | ||
123 | /* NMI and 8259 INTR go directly to BSP */ | ||
124 | outb(0x00, 0x23); | ||
125 | } | ||
101 | #endif | 126 | #endif |
102 | 127 | ||
103 | #ifdef CONFIG_X86_64 | 128 | #ifdef CONFIG_X86_64 |
@@ -111,13 +136,19 @@ static __init int setup_apicpmtimer(char *s) | |||
111 | __setup("apicpmtimer", setup_apicpmtimer); | 136 | __setup("apicpmtimer", setup_apicpmtimer); |
112 | #endif | 137 | #endif |
113 | 138 | ||
139 | int x2apic_mode; | ||
114 | #ifdef CONFIG_X86_X2APIC | 140 | #ifdef CONFIG_X86_X2APIC |
115 | int x2apic; | ||
116 | /* x2apic enabled before OS handover */ | 141 | /* x2apic enabled before OS handover */ |
117 | static int x2apic_preenabled; | 142 | static int x2apic_preenabled; |
118 | static int disable_x2apic; | 143 | static int disable_x2apic; |
119 | static __init int setup_nox2apic(char *str) | 144 | static __init int setup_nox2apic(char *str) |
120 | { | 145 | { |
146 | if (x2apic_enabled()) { | ||
147 | pr_warning("Bios already enabled x2apic, " | ||
148 | "can't enforce nox2apic"); | ||
149 | return 0; | ||
150 | } | ||
151 | |||
121 | disable_x2apic = 1; | 152 | disable_x2apic = 1; |
122 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | 153 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); |
123 | return 0; | 154 | return 0; |
@@ -209,6 +240,31 @@ static int modern_apic(void) | |||
209 | return lapic_get_version() >= 0x14; | 240 | return lapic_get_version() >= 0x14; |
210 | } | 241 | } |
211 | 242 | ||
243 | /* | ||
244 | * bare function to substitute write operation | ||
245 | * and it's _that_ fast :) | ||
246 | */ | ||
247 | static void native_apic_write_dummy(u32 reg, u32 v) | ||
248 | { | ||
249 | WARN_ON_ONCE((cpu_has_apic || !disable_apic)); | ||
250 | } | ||
251 | |||
252 | static u32 native_apic_read_dummy(u32 reg) | ||
253 | { | ||
254 | WARN_ON_ONCE((cpu_has_apic && !disable_apic)); | ||
255 | return 0; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * right after this call apic->write/read doesn't do anything | ||
260 | * note that there is no restore operation it works one way | ||
261 | */ | ||
262 | void apic_disable(void) | ||
263 | { | ||
264 | apic->read = native_apic_read_dummy; | ||
265 | apic->write = native_apic_write_dummy; | ||
266 | } | ||
267 | |||
212 | void native_apic_wait_icr_idle(void) | 268 | void native_apic_wait_icr_idle(void) |
213 | { | 269 | { |
214 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | 270 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) |
@@ -348,7 +404,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
348 | 404 | ||
349 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) | 405 | static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask) |
350 | { | 406 | { |
351 | unsigned long reg = (lvt_off << 4) + APIC_EILVT0; | 407 | unsigned long reg = (lvt_off << 4) + APIC_EILVTn(0); |
352 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; | 408 | unsigned int v = (mask << 16) | (msg_type << 8) | vector; |
353 | 409 | ||
354 | apic_write(reg, v); | 410 | apic_write(reg, v); |
@@ -815,7 +871,7 @@ void clear_local_APIC(void) | |||
815 | u32 v; | 871 | u32 v; |
816 | 872 | ||
817 | /* APIC hasn't been mapped yet */ | 873 | /* APIC hasn't been mapped yet */ |
818 | if (!x2apic && !apic_phys) | 874 | if (!x2apic_mode && !apic_phys) |
819 | return; | 875 | return; |
820 | 876 | ||
821 | maxlvt = lapic_get_maxlvt(); | 877 | maxlvt = lapic_get_maxlvt(); |
@@ -843,7 +899,7 @@ void clear_local_APIC(void) | |||
843 | } | 899 | } |
844 | 900 | ||
845 | /* lets not touch this if we didn't frob it */ | 901 | /* lets not touch this if we didn't frob it */ |
846 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) | 902 | #ifdef CONFIG_X86_THERMAL_VECTOR |
847 | if (maxlvt >= 5) { | 903 | if (maxlvt >= 5) { |
848 | v = apic_read(APIC_LVTTHMR); | 904 | v = apic_read(APIC_LVTTHMR); |
849 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); | 905 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); |
@@ -1133,6 +1189,7 @@ void __cpuinit setup_local_APIC(void) | |||
1133 | apic_write(APIC_ESR, 0); | 1189 | apic_write(APIC_ESR, 0); |
1134 | } | 1190 | } |
1135 | #endif | 1191 | #endif |
1192 | perf_counters_lapic_init(); | ||
1136 | 1193 | ||
1137 | preempt_disable(); | 1194 | preempt_disable(); |
1138 | 1195 | ||
@@ -1287,7 +1344,7 @@ void check_x2apic(void) | |||
1287 | { | 1344 | { |
1288 | if (x2apic_enabled()) { | 1345 | if (x2apic_enabled()) { |
1289 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); | 1346 | pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); |
1290 | x2apic_preenabled = x2apic = 1; | 1347 | x2apic_preenabled = x2apic_mode = 1; |
1291 | } | 1348 | } |
1292 | } | 1349 | } |
1293 | 1350 | ||
@@ -1295,7 +1352,7 @@ void enable_x2apic(void) | |||
1295 | { | 1352 | { |
1296 | int msr, msr2; | 1353 | int msr, msr2; |
1297 | 1354 | ||
1298 | if (!x2apic) | 1355 | if (!x2apic_mode) |
1299 | return; | 1356 | return; |
1300 | 1357 | ||
1301 | rdmsr(MSR_IA32_APICBASE, msr, msr2); | 1358 | rdmsr(MSR_IA32_APICBASE, msr, msr2); |
@@ -1304,6 +1361,7 @@ void enable_x2apic(void) | |||
1304 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); | 1361 | wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0); |
1305 | } | 1362 | } |
1306 | } | 1363 | } |
1364 | #endif /* CONFIG_X86_X2APIC */ | ||
1307 | 1365 | ||
1308 | void __init enable_IR_x2apic(void) | 1366 | void __init enable_IR_x2apic(void) |
1309 | { | 1367 | { |
@@ -1312,32 +1370,21 @@ void __init enable_IR_x2apic(void) | |||
1312 | unsigned long flags; | 1370 | unsigned long flags; |
1313 | struct IO_APIC_route_entry **ioapic_entries = NULL; | 1371 | struct IO_APIC_route_entry **ioapic_entries = NULL; |
1314 | 1372 | ||
1315 | if (!cpu_has_x2apic) | 1373 | ret = dmar_table_init(); |
1316 | return; | 1374 | if (ret) { |
1317 | 1375 | pr_debug("dmar_table_init() failed with %d:\n", ret); | |
1318 | if (!x2apic_preenabled && disable_x2apic) { | 1376 | goto ir_failed; |
1319 | pr_info("Skipped enabling x2apic and Interrupt-remapping " | ||
1320 | "because of nox2apic\n"); | ||
1321 | return; | ||
1322 | } | 1377 | } |
1323 | 1378 | ||
1324 | if (x2apic_preenabled && disable_x2apic) | 1379 | if (!intr_remapping_supported()) { |
1325 | panic("Bios already enabled x2apic, can't enforce nox2apic"); | 1380 | pr_debug("intr-remapping not supported\n"); |
1326 | 1381 | goto ir_failed; | |
1327 | if (!x2apic_preenabled && skip_ioapic_setup) { | ||
1328 | pr_info("Skipped enabling x2apic and Interrupt-remapping " | ||
1329 | "because of skipping io-apic setup\n"); | ||
1330 | return; | ||
1331 | } | 1382 | } |
1332 | 1383 | ||
1333 | ret = dmar_table_init(); | ||
1334 | if (ret) { | ||
1335 | pr_info("dmar_table_init() failed with %d:\n", ret); | ||
1336 | 1384 | ||
1337 | if (x2apic_preenabled) | 1385 | if (!x2apic_preenabled && skip_ioapic_setup) { |
1338 | panic("x2apic enabled by bios. But IR enabling failed"); | 1386 | pr_info("Skipped enabling intr-remap because of skipping " |
1339 | else | 1387 | "io-apic setup\n"); |
1340 | pr_info("Not enabling x2apic,Intr-remapping\n"); | ||
1341 | return; | 1388 | return; |
1342 | } | 1389 | } |
1343 | 1390 | ||
@@ -1357,19 +1404,16 @@ void __init enable_IR_x2apic(void) | |||
1357 | mask_IO_APIC_setup(ioapic_entries); | 1404 | mask_IO_APIC_setup(ioapic_entries); |
1358 | mask_8259A(); | 1405 | mask_8259A(); |
1359 | 1406 | ||
1360 | ret = enable_intr_remapping(EIM_32BIT_APIC_ID); | 1407 | ret = enable_intr_remapping(x2apic_supported()); |
1361 | |||
1362 | if (ret && x2apic_preenabled) { | ||
1363 | local_irq_restore(flags); | ||
1364 | panic("x2apic enabled by bios. But IR enabling failed"); | ||
1365 | } | ||
1366 | |||
1367 | if (ret) | 1408 | if (ret) |
1368 | goto end_restore; | 1409 | goto end_restore; |
1369 | 1410 | ||
1370 | if (!x2apic) { | 1411 | pr_info("Enabled Interrupt-remapping\n"); |
1371 | x2apic = 1; | 1412 | |
1413 | if (x2apic_supported() && !x2apic_mode) { | ||
1414 | x2apic_mode = 1; | ||
1372 | enable_x2apic(); | 1415 | enable_x2apic(); |
1416 | pr_info("Enabled x2apic\n"); | ||
1373 | } | 1417 | } |
1374 | 1418 | ||
1375 | end_restore: | 1419 | end_restore: |
@@ -1378,37 +1422,34 @@ end_restore: | |||
1378 | * IR enabling failed | 1422 | * IR enabling failed |
1379 | */ | 1423 | */ |
1380 | restore_IO_APIC_setup(ioapic_entries); | 1424 | restore_IO_APIC_setup(ioapic_entries); |
1381 | else | ||
1382 | reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries); | ||
1383 | 1425 | ||
1384 | unmask_8259A(); | 1426 | unmask_8259A(); |
1385 | local_irq_restore(flags); | 1427 | local_irq_restore(flags); |
1386 | 1428 | ||
1387 | end: | 1429 | end: |
1388 | if (!ret) { | ||
1389 | if (!x2apic_preenabled) | ||
1390 | pr_info("Enabled x2apic and interrupt-remapping\n"); | ||
1391 | else | ||
1392 | pr_info("Enabled Interrupt-remapping\n"); | ||
1393 | } else | ||
1394 | pr_err("Failed to enable Interrupt-remapping and x2apic\n"); | ||
1395 | if (ioapic_entries) | 1430 | if (ioapic_entries) |
1396 | free_ioapic_entries(ioapic_entries); | 1431 | free_ioapic_entries(ioapic_entries); |
1432 | |||
1433 | if (!ret) | ||
1434 | return; | ||
1435 | |||
1436 | ir_failed: | ||
1437 | if (x2apic_preenabled) | ||
1438 | panic("x2apic enabled by bios. But IR enabling failed"); | ||
1439 | else if (cpu_has_x2apic) | ||
1440 | pr_info("Not enabling x2apic,Intr-remapping\n"); | ||
1397 | #else | 1441 | #else |
1398 | if (!cpu_has_x2apic) | 1442 | if (!cpu_has_x2apic) |
1399 | return; | 1443 | return; |
1400 | 1444 | ||
1401 | if (x2apic_preenabled) | 1445 | if (x2apic_preenabled) |
1402 | panic("x2apic enabled prior OS handover," | 1446 | panic("x2apic enabled prior OS handover," |
1403 | " enable CONFIG_INTR_REMAP"); | 1447 | " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); |
1404 | |||
1405 | pr_info("Enable CONFIG_INTR_REMAP for enabling intr-remapping " | ||
1406 | " and x2apic\n"); | ||
1407 | #endif | 1448 | #endif |
1408 | 1449 | ||
1409 | return; | 1450 | return; |
1410 | } | 1451 | } |
1411 | #endif /* CONFIG_X86_X2APIC */ | 1452 | |
1412 | 1453 | ||
1413 | #ifdef CONFIG_X86_64 | 1454 | #ifdef CONFIG_X86_64 |
1414 | /* | 1455 | /* |
@@ -1425,7 +1466,6 @@ static int __init detect_init_APIC(void) | |||
1425 | } | 1466 | } |
1426 | 1467 | ||
1427 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | 1468 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; |
1428 | boot_cpu_physical_apicid = 0; | ||
1429 | return 0; | 1469 | return 0; |
1430 | } | 1470 | } |
1431 | #else | 1471 | #else |
@@ -1539,32 +1579,49 @@ void __init early_init_lapic_mapping(void) | |||
1539 | */ | 1579 | */ |
1540 | void __init init_apic_mappings(void) | 1580 | void __init init_apic_mappings(void) |
1541 | { | 1581 | { |
1542 | if (x2apic) { | 1582 | unsigned int new_apicid; |
1583 | |||
1584 | if (x2apic_mode) { | ||
1543 | boot_cpu_physical_apicid = read_apic_id(); | 1585 | boot_cpu_physical_apicid = read_apic_id(); |
1544 | return; | 1586 | return; |
1545 | } | 1587 | } |
1546 | 1588 | ||
1547 | /* | 1589 | /* If no local APIC can be found return early */ |
1548 | * If no local APIC can be found then set up a fake all | ||
1549 | * zeroes page to simulate the local APIC and another | ||
1550 | * one for the IO-APIC. | ||
1551 | */ | ||
1552 | if (!smp_found_config && detect_init_APIC()) { | 1590 | if (!smp_found_config && detect_init_APIC()) { |
1553 | apic_phys = (unsigned long) alloc_bootmem_pages(PAGE_SIZE); | 1591 | /* lets NOP'ify apic operations */ |
1554 | apic_phys = __pa(apic_phys); | 1592 | pr_info("APIC: disable apic facility\n"); |
1555 | } else | 1593 | apic_disable(); |
1594 | } else { | ||
1556 | apic_phys = mp_lapic_addr; | 1595 | apic_phys = mp_lapic_addr; |
1557 | 1596 | ||
1558 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | 1597 | /* |
1559 | apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", | 1598 | * acpi lapic path already maps that address in |
1560 | APIC_BASE, apic_phys); | 1599 | * acpi_register_lapic_address() |
1600 | */ | ||
1601 | if (!acpi_lapic) | ||
1602 | set_fixmap_nocache(FIX_APIC_BASE, apic_phys); | ||
1603 | |||
1604 | apic_printk(APIC_VERBOSE, "mapped APIC to %08lx (%08lx)\n", | ||
1605 | APIC_BASE, apic_phys); | ||
1606 | } | ||
1561 | 1607 | ||
1562 | /* | 1608 | /* |
1563 | * Fetch the APIC ID of the BSP in case we have a | 1609 | * Fetch the APIC ID of the BSP in case we have a |
1564 | * default configuration (or the MP table is broken). | 1610 | * default configuration (or the MP table is broken). |
1565 | */ | 1611 | */ |
1566 | if (boot_cpu_physical_apicid == -1U) | 1612 | new_apicid = read_apic_id(); |
1567 | boot_cpu_physical_apicid = read_apic_id(); | 1613 | if (boot_cpu_physical_apicid != new_apicid) { |
1614 | boot_cpu_physical_apicid = new_apicid; | ||
1615 | /* | ||
1616 | * yeah -- we lie about apic_version | ||
1617 | * in case if apic was disabled via boot option | ||
1618 | * but it's not a problem for SMP compiled kernel | ||
1619 | * since smp_sanity_check is prepared for such a case | ||
1620 | * and disable smp mode | ||
1621 | */ | ||
1622 | apic_version[new_apicid] = | ||
1623 | GET_APIC_VERSION(apic_read(APIC_LVR)); | ||
1624 | } | ||
1568 | } | 1625 | } |
1569 | 1626 | ||
1570 | /* | 1627 | /* |
@@ -1733,8 +1790,7 @@ void __init connect_bsp_APIC(void) | |||
1733 | */ | 1790 | */ |
1734 | apic_printk(APIC_VERBOSE, "leaving PIC mode, " | 1791 | apic_printk(APIC_VERBOSE, "leaving PIC mode, " |
1735 | "enabling APIC mode.\n"); | 1792 | "enabling APIC mode.\n"); |
1736 | outb(0x70, 0x22); | 1793 | imcr_pic_to_apic(); |
1737 | outb(0x01, 0x23); | ||
1738 | } | 1794 | } |
1739 | #endif | 1795 | #endif |
1740 | if (apic->enable_apic_mode) | 1796 | if (apic->enable_apic_mode) |
@@ -1762,8 +1818,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1762 | */ | 1818 | */ |
1763 | apic_printk(APIC_VERBOSE, "disabling APIC mode, " | 1819 | apic_printk(APIC_VERBOSE, "disabling APIC mode, " |
1764 | "entering PIC mode.\n"); | 1820 | "entering PIC mode.\n"); |
1765 | outb(0x70, 0x22); | 1821 | imcr_apic_to_pic(); |
1766 | outb(0x00, 0x23); | ||
1767 | return; | 1822 | return; |
1768 | } | 1823 | } |
1769 | #endif | 1824 | #endif |
@@ -1962,17 +2017,17 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) | |||
1962 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); | 2017 | apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); |
1963 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); | 2018 | apic_pm_state.apic_tmict = apic_read(APIC_TMICT); |
1964 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); | 2019 | apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); |
1965 | #if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL) | 2020 | #ifdef CONFIG_X86_THERMAL_VECTOR |
1966 | if (maxlvt >= 5) | 2021 | if (maxlvt >= 5) |
1967 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); | 2022 | apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); |
1968 | #endif | 2023 | #endif |
1969 | 2024 | ||
1970 | local_irq_save(flags); | 2025 | local_irq_save(flags); |
1971 | disable_local_APIC(); | 2026 | disable_local_APIC(); |
1972 | #ifdef CONFIG_INTR_REMAP | 2027 | |
1973 | if (intr_remapping_enabled) | 2028 | if (intr_remapping_enabled) |
1974 | disable_intr_remapping(); | 2029 | disable_intr_remapping(); |
1975 | #endif | 2030 | |
1976 | local_irq_restore(flags); | 2031 | local_irq_restore(flags); |
1977 | return 0; | 2032 | return 0; |
1978 | } | 2033 | } |
@@ -1982,42 +2037,34 @@ static int lapic_resume(struct sys_device *dev) | |||
1982 | unsigned int l, h; | 2037 | unsigned int l, h; |
1983 | unsigned long flags; | 2038 | unsigned long flags; |
1984 | int maxlvt; | 2039 | int maxlvt; |
1985 | 2040 | int ret = 0; | |
1986 | #ifdef CONFIG_INTR_REMAP | ||
1987 | int ret; | ||
1988 | struct IO_APIC_route_entry **ioapic_entries = NULL; | 2041 | struct IO_APIC_route_entry **ioapic_entries = NULL; |
1989 | 2042 | ||
1990 | if (!apic_pm_state.active) | 2043 | if (!apic_pm_state.active) |
1991 | return 0; | 2044 | return 0; |
1992 | 2045 | ||
1993 | local_irq_save(flags); | 2046 | local_irq_save(flags); |
1994 | if (x2apic) { | 2047 | if (intr_remapping_enabled) { |
1995 | ioapic_entries = alloc_ioapic_entries(); | 2048 | ioapic_entries = alloc_ioapic_entries(); |
1996 | if (!ioapic_entries) { | 2049 | if (!ioapic_entries) { |
1997 | WARN(1, "Alloc ioapic_entries in lapic resume failed."); | 2050 | WARN(1, "Alloc ioapic_entries in lapic resume failed."); |
1998 | return -ENOMEM; | 2051 | ret = -ENOMEM; |
2052 | goto restore; | ||
1999 | } | 2053 | } |
2000 | 2054 | ||
2001 | ret = save_IO_APIC_setup(ioapic_entries); | 2055 | ret = save_IO_APIC_setup(ioapic_entries); |
2002 | if (ret) { | 2056 | if (ret) { |
2003 | WARN(1, "Saving IO-APIC state failed: %d\n", ret); | 2057 | WARN(1, "Saving IO-APIC state failed: %d\n", ret); |
2004 | free_ioapic_entries(ioapic_entries); | 2058 | free_ioapic_entries(ioapic_entries); |
2005 | return ret; | 2059 | goto restore; |
2006 | } | 2060 | } |
2007 | 2061 | ||
2008 | mask_IO_APIC_setup(ioapic_entries); | 2062 | mask_IO_APIC_setup(ioapic_entries); |
2009 | mask_8259A(); | 2063 | mask_8259A(); |
2010 | enable_x2apic(); | ||
2011 | } | 2064 | } |
2012 | #else | ||
2013 | if (!apic_pm_state.active) | ||
2014 | return 0; | ||
2015 | 2065 | ||
2016 | local_irq_save(flags); | 2066 | if (x2apic_mode) |
2017 | if (x2apic) | ||
2018 | enable_x2apic(); | 2067 | enable_x2apic(); |
2019 | #endif | ||
2020 | |||
2021 | else { | 2068 | else { |
2022 | /* | 2069 | /* |
2023 | * Make sure the APICBASE points to the right address | 2070 | * Make sure the APICBASE points to the right address |
@@ -2055,21 +2102,16 @@ static int lapic_resume(struct sys_device *dev) | |||
2055 | apic_write(APIC_ESR, 0); | 2102 | apic_write(APIC_ESR, 0); |
2056 | apic_read(APIC_ESR); | 2103 | apic_read(APIC_ESR); |
2057 | 2104 | ||
2058 | #ifdef CONFIG_INTR_REMAP | 2105 | if (intr_remapping_enabled) { |
2059 | if (intr_remapping_enabled) | 2106 | reenable_intr_remapping(x2apic_mode); |
2060 | reenable_intr_remapping(EIM_32BIT_APIC_ID); | ||
2061 | |||
2062 | if (x2apic) { | ||
2063 | unmask_8259A(); | 2107 | unmask_8259A(); |
2064 | restore_IO_APIC_setup(ioapic_entries); | 2108 | restore_IO_APIC_setup(ioapic_entries); |
2065 | free_ioapic_entries(ioapic_entries); | 2109 | free_ioapic_entries(ioapic_entries); |
2066 | } | 2110 | } |
2067 | #endif | 2111 | restore: |
2068 | |||
2069 | local_irq_restore(flags); | 2112 | local_irq_restore(flags); |
2070 | 2113 | ||
2071 | 2114 | return ret; | |
2072 | return 0; | ||
2073 | } | 2115 | } |
2074 | 2116 | ||
2075 | /* | 2117 | /* |
@@ -2117,31 +2159,14 @@ static void apic_pm_activate(void) { } | |||
2117 | #endif /* CONFIG_PM */ | 2159 | #endif /* CONFIG_PM */ |
2118 | 2160 | ||
2119 | #ifdef CONFIG_X86_64 | 2161 | #ifdef CONFIG_X86_64 |
2120 | /* | 2162 | |
2121 | * apic_is_clustered_box() -- Check if we can expect good TSC | 2163 | static int __cpuinit apic_cluster_num(void) |
2122 | * | ||
2123 | * Thus far, the major user of this is IBM's Summit2 series: | ||
2124 | * | ||
2125 | * Clustered boxes may have unsynced TSC problems if they are | ||
2126 | * multi-chassis. Use available data to take a good guess. | ||
2127 | * If in doubt, go HPET. | ||
2128 | */ | ||
2129 | __cpuinit int apic_is_clustered_box(void) | ||
2130 | { | 2164 | { |
2131 | int i, clusters, zeros; | 2165 | int i, clusters, zeros; |
2132 | unsigned id; | 2166 | unsigned id; |
2133 | u16 *bios_cpu_apicid; | 2167 | u16 *bios_cpu_apicid; |
2134 | DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); | 2168 | DECLARE_BITMAP(clustermap, NUM_APIC_CLUSTERS); |
2135 | 2169 | ||
2136 | /* | ||
2137 | * there is not this kind of box with AMD CPU yet. | ||
2138 | * Some AMD box with quadcore cpu and 8 sockets apicid | ||
2139 | * will be [4, 0x23] or [8, 0x27] could be thought to | ||
2140 | * vsmp box still need checking... | ||
2141 | */ | ||
2142 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && !is_vsmp_box()) | ||
2143 | return 0; | ||
2144 | |||
2145 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); | 2170 | bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid); |
2146 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); | 2171 | bitmap_zero(clustermap, NUM_APIC_CLUSTERS); |
2147 | 2172 | ||
@@ -2177,18 +2202,67 @@ __cpuinit int apic_is_clustered_box(void) | |||
2177 | ++zeros; | 2202 | ++zeros; |
2178 | } | 2203 | } |
2179 | 2204 | ||
2180 | /* ScaleMP vSMPowered boxes have one cluster per board and TSCs are | 2205 | return clusters; |
2181 | * not guaranteed to be synced between boards | 2206 | } |
2182 | */ | 2207 | |
2183 | if (is_vsmp_box() && clusters > 1) | 2208 | static int __cpuinitdata multi_checked; |
2209 | static int __cpuinitdata multi; | ||
2210 | |||
2211 | static int __cpuinit set_multi(const struct dmi_system_id *d) | ||
2212 | { | ||
2213 | if (multi) | ||
2214 | return 0; | ||
2215 | pr_info("APIC: %s detected, Multi Chassis\n", d->ident); | ||
2216 | multi = 1; | ||
2217 | return 0; | ||
2218 | } | ||
2219 | |||
2220 | static const __cpuinitconst struct dmi_system_id multi_dmi_table[] = { | ||
2221 | { | ||
2222 | .callback = set_multi, | ||
2223 | .ident = "IBM System Summit2", | ||
2224 | .matches = { | ||
2225 | DMI_MATCH(DMI_SYS_VENDOR, "IBM"), | ||
2226 | DMI_MATCH(DMI_PRODUCT_NAME, "Summit2"), | ||
2227 | }, | ||
2228 | }, | ||
2229 | {} | ||
2230 | }; | ||
2231 | |||
2232 | static void __cpuinit dmi_check_multi(void) | ||
2233 | { | ||
2234 | if (multi_checked) | ||
2235 | return; | ||
2236 | |||
2237 | dmi_check_system(multi_dmi_table); | ||
2238 | multi_checked = 1; | ||
2239 | } | ||
2240 | |||
2241 | /* | ||
2242 | * apic_is_clustered_box() -- Check if we can expect good TSC | ||
2243 | * | ||
2244 | * Thus far, the major user of this is IBM's Summit2 series: | ||
2245 | * Clustered boxes may have unsynced TSC problems if they are | ||
2246 | * multi-chassis. | ||
2247 | * Use DMI to check them | ||
2248 | */ | ||
2249 | __cpuinit int apic_is_clustered_box(void) | ||
2250 | { | ||
2251 | dmi_check_multi(); | ||
2252 | if (multi) | ||
2184 | return 1; | 2253 | return 1; |
2185 | 2254 | ||
2255 | if (!is_vsmp_box()) | ||
2256 | return 0; | ||
2257 | |||
2186 | /* | 2258 | /* |
2187 | * If clusters > 2, then should be multi-chassis. | 2259 | * ScaleMP vSMPowered boxes have one cluster per board and TSCs are |
2188 | * May have to revisit this when multi-core + hyperthreaded CPUs come | 2260 | * not guaranteed to be synced between boards |
2189 | * out, but AFAIK this will work even for them. | ||
2190 | */ | 2261 | */ |
2191 | return (clusters > 2); | 2262 | if (apic_cluster_num() > 1) |
2263 | return 1; | ||
2264 | |||
2265 | return 0; | ||
2192 | } | 2266 | } |
2193 | #endif | 2267 | #endif |
2194 | 2268 | ||
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 306e5e88fb6f..d0c99abc26c3 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -161,7 +161,7 @@ static int flat_apic_id_registered(void) | |||
161 | 161 | ||
162 | static int flat_phys_pkg_id(int initial_apic_id, int index_msb) | 162 | static int flat_phys_pkg_id(int initial_apic_id, int index_msb) |
163 | { | 163 | { |
164 | return hard_smp_processor_id() >> index_msb; | 164 | return initial_apic_id >> index_msb; |
165 | } | 165 | } |
166 | 166 | ||
167 | struct apic apic_flat = { | 167 | struct apic apic_flat = { |
@@ -235,7 +235,7 @@ static int physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | |||
235 | * regardless of how many processors are present (x86_64 ES7000 | 235 | * regardless of how many processors are present (x86_64 ES7000 |
236 | * is an example). | 236 | * is an example). |
237 | */ | 237 | */ |
238 | if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID && | 238 | if (acpi_gbl_FADT.header.revision >= FADT2_REVISION_ID && |
239 | (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { | 239 | (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL)) { |
240 | printk(KERN_DEBUG "system APIC only can use physical flat"); | 240 | printk(KERN_DEBUG "system APIC only can use physical flat"); |
241 | return 1; | 241 | return 1; |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 302947775575..69328ac8de9c 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -145,7 +145,7 @@ es7000_rename_gsi(int ioapic, int gsi) | |||
145 | return gsi; | 145 | return gsi; |
146 | } | 146 | } |
147 | 147 | ||
148 | static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | 148 | static int __cpuinit wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) |
149 | { | 149 | { |
150 | unsigned long vect = 0, psaival = 0; | 150 | unsigned long vect = 0, psaival = 0; |
151 | 151 | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 30da617d18e4..4d0216fcb36c 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include <asm/setup.h> | 59 | #include <asm/setup.h> |
60 | #include <asm/irq_remapping.h> | 60 | #include <asm/irq_remapping.h> |
61 | #include <asm/hpet.h> | 61 | #include <asm/hpet.h> |
62 | #include <asm/hw_irq.h> | ||
62 | #include <asm/uv/uv_hub.h> | 63 | #include <asm/uv/uv_hub.h> |
63 | #include <asm/uv/uv_irq.h> | 64 | #include <asm/uv/uv_irq.h> |
64 | 65 | ||
@@ -129,12 +130,9 @@ struct irq_pin_list { | |||
129 | struct irq_pin_list *next; | 130 | struct irq_pin_list *next; |
130 | }; | 131 | }; |
131 | 132 | ||
132 | static struct irq_pin_list *get_one_free_irq_2_pin(int cpu) | 133 | static struct irq_pin_list *get_one_free_irq_2_pin(int node) |
133 | { | 134 | { |
134 | struct irq_pin_list *pin; | 135 | struct irq_pin_list *pin; |
135 | int node; | ||
136 | |||
137 | node = cpu_to_node(cpu); | ||
138 | 136 | ||
139 | pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); | 137 | pin = kzalloc_node(sizeof(*pin), GFP_ATOMIC, node); |
140 | 138 | ||
@@ -148,9 +146,6 @@ struct irq_cfg { | |||
148 | unsigned move_cleanup_count; | 146 | unsigned move_cleanup_count; |
149 | u8 vector; | 147 | u8 vector; |
150 | u8 move_in_progress : 1; | 148 | u8 move_in_progress : 1; |
151 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
152 | u8 move_desc_pending : 1; | ||
153 | #endif | ||
154 | }; | 149 | }; |
155 | 150 | ||
156 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ | 151 | /* irq_cfg is indexed by the sum of all RTEs in all I/O APICs. */ |
@@ -182,16 +177,18 @@ int __init arch_early_irq_init(void) | |||
182 | struct irq_cfg *cfg; | 177 | struct irq_cfg *cfg; |
183 | struct irq_desc *desc; | 178 | struct irq_desc *desc; |
184 | int count; | 179 | int count; |
180 | int node; | ||
185 | int i; | 181 | int i; |
186 | 182 | ||
187 | cfg = irq_cfgx; | 183 | cfg = irq_cfgx; |
188 | count = ARRAY_SIZE(irq_cfgx); | 184 | count = ARRAY_SIZE(irq_cfgx); |
185 | node= cpu_to_node(boot_cpu_id); | ||
189 | 186 | ||
190 | for (i = 0; i < count; i++) { | 187 | for (i = 0; i < count; i++) { |
191 | desc = irq_to_desc(i); | 188 | desc = irq_to_desc(i); |
192 | desc->chip_data = &cfg[i]; | 189 | desc->chip_data = &cfg[i]; |
193 | alloc_bootmem_cpumask_var(&cfg[i].domain); | 190 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); |
194 | alloc_bootmem_cpumask_var(&cfg[i].old_domain); | 191 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); |
195 | if (i < NR_IRQS_LEGACY) | 192 | if (i < NR_IRQS_LEGACY) |
196 | cpumask_setall(cfg[i].domain); | 193 | cpumask_setall(cfg[i].domain); |
197 | } | 194 | } |
@@ -212,12 +209,9 @@ static struct irq_cfg *irq_cfg(unsigned int irq) | |||
212 | return cfg; | 209 | return cfg; |
213 | } | 210 | } |
214 | 211 | ||
215 | static struct irq_cfg *get_one_free_irq_cfg(int cpu) | 212 | static struct irq_cfg *get_one_free_irq_cfg(int node) |
216 | { | 213 | { |
217 | struct irq_cfg *cfg; | 214 | struct irq_cfg *cfg; |
218 | int node; | ||
219 | |||
220 | node = cpu_to_node(cpu); | ||
221 | 215 | ||
222 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); | 216 | cfg = kzalloc_node(sizeof(*cfg), GFP_ATOMIC, node); |
223 | if (cfg) { | 217 | if (cfg) { |
@@ -238,13 +232,13 @@ static struct irq_cfg *get_one_free_irq_cfg(int cpu) | |||
238 | return cfg; | 232 | return cfg; |
239 | } | 233 | } |
240 | 234 | ||
241 | int arch_init_chip_data(struct irq_desc *desc, int cpu) | 235 | int arch_init_chip_data(struct irq_desc *desc, int node) |
242 | { | 236 | { |
243 | struct irq_cfg *cfg; | 237 | struct irq_cfg *cfg; |
244 | 238 | ||
245 | cfg = desc->chip_data; | 239 | cfg = desc->chip_data; |
246 | if (!cfg) { | 240 | if (!cfg) { |
247 | desc->chip_data = get_one_free_irq_cfg(cpu); | 241 | desc->chip_data = get_one_free_irq_cfg(node); |
248 | if (!desc->chip_data) { | 242 | if (!desc->chip_data) { |
249 | printk(KERN_ERR "can not alloc irq_cfg\n"); | 243 | printk(KERN_ERR "can not alloc irq_cfg\n"); |
250 | BUG_ON(1); | 244 | BUG_ON(1); |
@@ -254,10 +248,9 @@ int arch_init_chip_data(struct irq_desc *desc, int cpu) | |||
254 | return 0; | 248 | return 0; |
255 | } | 249 | } |
256 | 250 | ||
257 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | 251 | /* for move_irq_desc */ |
258 | |||
259 | static void | 252 | static void |
260 | init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) | 253 | init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int node) |
261 | { | 254 | { |
262 | struct irq_pin_list *old_entry, *head, *tail, *entry; | 255 | struct irq_pin_list *old_entry, *head, *tail, *entry; |
263 | 256 | ||
@@ -266,7 +259,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) | |||
266 | if (!old_entry) | 259 | if (!old_entry) |
267 | return; | 260 | return; |
268 | 261 | ||
269 | entry = get_one_free_irq_2_pin(cpu); | 262 | entry = get_one_free_irq_2_pin(node); |
270 | if (!entry) | 263 | if (!entry) |
271 | return; | 264 | return; |
272 | 265 | ||
@@ -276,7 +269,7 @@ init_copy_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg, int cpu) | |||
276 | tail = entry; | 269 | tail = entry; |
277 | old_entry = old_entry->next; | 270 | old_entry = old_entry->next; |
278 | while (old_entry) { | 271 | while (old_entry) { |
279 | entry = get_one_free_irq_2_pin(cpu); | 272 | entry = get_one_free_irq_2_pin(node); |
280 | if (!entry) { | 273 | if (!entry) { |
281 | entry = head; | 274 | entry = head; |
282 | while (entry) { | 275 | while (entry) { |
@@ -316,12 +309,12 @@ static void free_irq_2_pin(struct irq_cfg *old_cfg, struct irq_cfg *cfg) | |||
316 | } | 309 | } |
317 | 310 | ||
318 | void arch_init_copy_chip_data(struct irq_desc *old_desc, | 311 | void arch_init_copy_chip_data(struct irq_desc *old_desc, |
319 | struct irq_desc *desc, int cpu) | 312 | struct irq_desc *desc, int node) |
320 | { | 313 | { |
321 | struct irq_cfg *cfg; | 314 | struct irq_cfg *cfg; |
322 | struct irq_cfg *old_cfg; | 315 | struct irq_cfg *old_cfg; |
323 | 316 | ||
324 | cfg = get_one_free_irq_cfg(cpu); | 317 | cfg = get_one_free_irq_cfg(node); |
325 | 318 | ||
326 | if (!cfg) | 319 | if (!cfg) |
327 | return; | 320 | return; |
@@ -332,7 +325,7 @@ void arch_init_copy_chip_data(struct irq_desc *old_desc, | |||
332 | 325 | ||
333 | memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); | 326 | memcpy(cfg, old_cfg, sizeof(struct irq_cfg)); |
334 | 327 | ||
335 | init_copy_irq_2_pin(old_cfg, cfg, cpu); | 328 | init_copy_irq_2_pin(old_cfg, cfg, node); |
336 | } | 329 | } |
337 | 330 | ||
338 | static void free_irq_cfg(struct irq_cfg *old_cfg) | 331 | static void free_irq_cfg(struct irq_cfg *old_cfg) |
@@ -356,19 +349,7 @@ void arch_free_chip_data(struct irq_desc *old_desc, struct irq_desc *desc) | |||
356 | old_desc->chip_data = NULL; | 349 | old_desc->chip_data = NULL; |
357 | } | 350 | } |
358 | } | 351 | } |
359 | 352 | /* end for move_irq_desc */ | |
360 | static void | ||
361 | set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
362 | { | ||
363 | struct irq_cfg *cfg = desc->chip_data; | ||
364 | |||
365 | if (!cfg->move_in_progress) { | ||
366 | /* it means that domain is not changed */ | ||
367 | if (!cpumask_intersects(desc->affinity, mask)) | ||
368 | cfg->move_desc_pending = 1; | ||
369 | } | ||
370 | } | ||
371 | #endif | ||
372 | 353 | ||
373 | #else | 354 | #else |
374 | static struct irq_cfg *irq_cfg(unsigned int irq) | 355 | static struct irq_cfg *irq_cfg(unsigned int irq) |
@@ -378,13 +359,6 @@ static struct irq_cfg *irq_cfg(unsigned int irq) | |||
378 | 359 | ||
379 | #endif | 360 | #endif |
380 | 361 | ||
381 | #ifndef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
382 | static inline void | ||
383 | set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
384 | { | ||
385 | } | ||
386 | #endif | ||
387 | |||
388 | struct io_apic { | 362 | struct io_apic { |
389 | unsigned int index; | 363 | unsigned int index; |
390 | unsigned int unused[3]; | 364 | unsigned int unused[3]; |
@@ -488,7 +462,8 @@ static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | |||
488 | static void | 462 | static void |
489 | __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | 463 | __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) |
490 | { | 464 | { |
491 | union entry_union eu; | 465 | union entry_union eu = {{0, 0}}; |
466 | |||
492 | eu.entry = e; | 467 | eu.entry = e; |
493 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | 468 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); |
494 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | 469 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); |
@@ -518,132 +493,18 @@ static void ioapic_mask_entry(int apic, int pin) | |||
518 | spin_unlock_irqrestore(&ioapic_lock, flags); | 493 | spin_unlock_irqrestore(&ioapic_lock, flags); |
519 | } | 494 | } |
520 | 495 | ||
521 | #ifdef CONFIG_SMP | ||
522 | static void send_cleanup_vector(struct irq_cfg *cfg) | ||
523 | { | ||
524 | cpumask_var_t cleanup_mask; | ||
525 | |||
526 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | ||
527 | unsigned int i; | ||
528 | cfg->move_cleanup_count = 0; | ||
529 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
530 | cfg->move_cleanup_count++; | ||
531 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
532 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | ||
533 | } else { | ||
534 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | ||
535 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | ||
536 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
537 | free_cpumask_var(cleanup_mask); | ||
538 | } | ||
539 | cfg->move_in_progress = 0; | ||
540 | } | ||
541 | |||
542 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
543 | { | ||
544 | int apic, pin; | ||
545 | struct irq_pin_list *entry; | ||
546 | u8 vector = cfg->vector; | ||
547 | |||
548 | entry = cfg->irq_2_pin; | ||
549 | for (;;) { | ||
550 | unsigned int reg; | ||
551 | |||
552 | if (!entry) | ||
553 | break; | ||
554 | |||
555 | apic = entry->apic; | ||
556 | pin = entry->pin; | ||
557 | /* | ||
558 | * With interrupt-remapping, destination information comes | ||
559 | * from interrupt-remapping table entry. | ||
560 | */ | ||
561 | if (!irq_remapped(irq)) | ||
562 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
563 | reg = io_apic_read(apic, 0x10 + pin*2); | ||
564 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | ||
565 | reg |= vector; | ||
566 | io_apic_modify(apic, 0x10 + pin*2, reg); | ||
567 | if (!entry->next) | ||
568 | break; | ||
569 | entry = entry->next; | ||
570 | } | ||
571 | } | ||
572 | |||
573 | static int | ||
574 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | ||
575 | |||
576 | /* | ||
577 | * Either sets desc->affinity to a valid value, and returns | ||
578 | * ->cpu_mask_to_apicid of that, or returns BAD_APICID and | ||
579 | * leaves desc->affinity untouched. | ||
580 | */ | ||
581 | static unsigned int | ||
582 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | ||
583 | { | ||
584 | struct irq_cfg *cfg; | ||
585 | unsigned int irq; | ||
586 | |||
587 | if (!cpumask_intersects(mask, cpu_online_mask)) | ||
588 | return BAD_APICID; | ||
589 | |||
590 | irq = desc->irq; | ||
591 | cfg = desc->chip_data; | ||
592 | if (assign_irq_vector(irq, cfg, mask)) | ||
593 | return BAD_APICID; | ||
594 | |||
595 | /* check that before desc->addinity get updated */ | ||
596 | set_extra_move_desc(desc, mask); | ||
597 | |||
598 | cpumask_copy(desc->affinity, mask); | ||
599 | |||
600 | return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); | ||
601 | } | ||
602 | |||
603 | static void | ||
604 | set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
605 | { | ||
606 | struct irq_cfg *cfg; | ||
607 | unsigned long flags; | ||
608 | unsigned int dest; | ||
609 | unsigned int irq; | ||
610 | |||
611 | irq = desc->irq; | ||
612 | cfg = desc->chip_data; | ||
613 | |||
614 | spin_lock_irqsave(&ioapic_lock, flags); | ||
615 | dest = set_desc_affinity(desc, mask); | ||
616 | if (dest != BAD_APICID) { | ||
617 | /* Only the high 8 bits are valid. */ | ||
618 | dest = SET_APIC_LOGICAL_ID(dest); | ||
619 | __target_IO_APIC_irq(irq, dest, cfg); | ||
620 | } | ||
621 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
622 | } | ||
623 | |||
624 | static void | ||
625 | set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) | ||
626 | { | ||
627 | struct irq_desc *desc; | ||
628 | |||
629 | desc = irq_to_desc(irq); | ||
630 | |||
631 | set_ioapic_affinity_irq_desc(desc, mask); | ||
632 | } | ||
633 | #endif /* CONFIG_SMP */ | ||
634 | |||
635 | /* | 496 | /* |
636 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are | 497 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are |
637 | * shared ISA-space IRQs, so we have to support them. We are super | 498 | * shared ISA-space IRQs, so we have to support them. We are super |
638 | * fast in the common case, and fast for shared ISA-space IRQs. | 499 | * fast in the common case, and fast for shared ISA-space IRQs. |
639 | */ | 500 | */ |
640 | static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) | 501 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) |
641 | { | 502 | { |
642 | struct irq_pin_list *entry; | 503 | struct irq_pin_list *entry; |
643 | 504 | ||
644 | entry = cfg->irq_2_pin; | 505 | entry = cfg->irq_2_pin; |
645 | if (!entry) { | 506 | if (!entry) { |
646 | entry = get_one_free_irq_2_pin(cpu); | 507 | entry = get_one_free_irq_2_pin(node); |
647 | if (!entry) { | 508 | if (!entry) { |
648 | printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", | 509 | printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", |
649 | apic, pin); | 510 | apic, pin); |
@@ -663,7 +524,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) | |||
663 | entry = entry->next; | 524 | entry = entry->next; |
664 | } | 525 | } |
665 | 526 | ||
666 | entry->next = get_one_free_irq_2_pin(cpu); | 527 | entry->next = get_one_free_irq_2_pin(node); |
667 | entry = entry->next; | 528 | entry = entry->next; |
668 | entry->apic = apic; | 529 | entry->apic = apic; |
669 | entry->pin = pin; | 530 | entry->pin = pin; |
@@ -672,7 +533,7 @@ static void add_pin_to_irq_cpu(struct irq_cfg *cfg, int cpu, int apic, int pin) | |||
672 | /* | 533 | /* |
673 | * Reroute an IRQ to a different pin. | 534 | * Reroute an IRQ to a different pin. |
674 | */ | 535 | */ |
675 | static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, | 536 | static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, |
676 | int oldapic, int oldpin, | 537 | int oldapic, int oldpin, |
677 | int newapic, int newpin) | 538 | int newapic, int newpin) |
678 | { | 539 | { |
@@ -692,7 +553,7 @@ static void __init replace_pin_at_irq_cpu(struct irq_cfg *cfg, int cpu, | |||
692 | 553 | ||
693 | /* why? call replace before add? */ | 554 | /* why? call replace before add? */ |
694 | if (!replaced) | 555 | if (!replaced) |
695 | add_pin_to_irq_cpu(cfg, cpu, newapic, newpin); | 556 | add_pin_to_irq_node(cfg, node, newapic, newpin); |
696 | } | 557 | } |
697 | 558 | ||
698 | static inline void io_apic_modify_irq(struct irq_cfg *cfg, | 559 | static inline void io_apic_modify_irq(struct irq_cfg *cfg, |
@@ -850,7 +711,6 @@ static int __init ioapic_pirq_setup(char *str) | |||
850 | __setup("pirq=", ioapic_pirq_setup); | 711 | __setup("pirq=", ioapic_pirq_setup); |
851 | #endif /* CONFIG_X86_32 */ | 712 | #endif /* CONFIG_X86_32 */ |
852 | 713 | ||
853 | #ifdef CONFIG_INTR_REMAP | ||
854 | struct IO_APIC_route_entry **alloc_ioapic_entries(void) | 714 | struct IO_APIC_route_entry **alloc_ioapic_entries(void) |
855 | { | 715 | { |
856 | int apic; | 716 | int apic; |
@@ -948,20 +808,6 @@ int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) | |||
948 | return 0; | 808 | return 0; |
949 | } | 809 | } |
950 | 810 | ||
951 | void reinit_intr_remapped_IO_APIC(int intr_remapping, | ||
952 | struct IO_APIC_route_entry **ioapic_entries) | ||
953 | |||
954 | { | ||
955 | /* | ||
956 | * for now plain restore of previous settings. | ||
957 | * TBD: In the case of OS enabling interrupt-remapping, | ||
958 | * IO-APIC RTE's need to be setup to point to interrupt-remapping | ||
959 | * table entries. for now, do a plain restore, and wait for | ||
960 | * the setup_IO_APIC_irqs() to do proper initialization. | ||
961 | */ | ||
962 | restore_IO_APIC_setup(ioapic_entries); | ||
963 | } | ||
964 | |||
965 | void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) | 811 | void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) |
966 | { | 812 | { |
967 | int apic; | 813 | int apic; |
@@ -971,7 +817,6 @@ void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) | |||
971 | 817 | ||
972 | kfree(ioapic_entries); | 818 | kfree(ioapic_entries); |
973 | } | 819 | } |
974 | #endif | ||
975 | 820 | ||
976 | /* | 821 | /* |
977 | * Find the IRQ entry number of a certain pin. | 822 | * Find the IRQ entry number of a certain pin. |
@@ -1032,54 +877,6 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
1032 | return -1; | 877 | return -1; |
1033 | } | 878 | } |
1034 | 879 | ||
1035 | /* | ||
1036 | * Find a specific PCI IRQ entry. | ||
1037 | * Not an __init, possibly needed by modules | ||
1038 | */ | ||
1039 | static int pin_2_irq(int idx, int apic, int pin); | ||
1040 | |||
1041 | int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | ||
1042 | { | ||
1043 | int apic, i, best_guess = -1; | ||
1044 | |||
1045 | apic_printk(APIC_DEBUG, "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", | ||
1046 | bus, slot, pin); | ||
1047 | if (test_bit(bus, mp_bus_not_pci)) { | ||
1048 | apic_printk(APIC_VERBOSE, "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | ||
1049 | return -1; | ||
1050 | } | ||
1051 | for (i = 0; i < mp_irq_entries; i++) { | ||
1052 | int lbus = mp_irqs[i].srcbus; | ||
1053 | |||
1054 | for (apic = 0; apic < nr_ioapics; apic++) | ||
1055 | if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || | ||
1056 | mp_irqs[i].dstapic == MP_APIC_ALL) | ||
1057 | break; | ||
1058 | |||
1059 | if (!test_bit(lbus, mp_bus_not_pci) && | ||
1060 | !mp_irqs[i].irqtype && | ||
1061 | (bus == lbus) && | ||
1062 | (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { | ||
1063 | int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); | ||
1064 | |||
1065 | if (!(apic || IO_APIC_IRQ(irq))) | ||
1066 | continue; | ||
1067 | |||
1068 | if (pin == (mp_irqs[i].srcbusirq & 3)) | ||
1069 | return irq; | ||
1070 | /* | ||
1071 | * Use the first all-but-pin matching entry as a | ||
1072 | * best-guess fuzzy result for broken mptables. | ||
1073 | */ | ||
1074 | if (best_guess < 0) | ||
1075 | best_guess = irq; | ||
1076 | } | ||
1077 | } | ||
1078 | return best_guess; | ||
1079 | } | ||
1080 | |||
1081 | EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | ||
1082 | |||
1083 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 880 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
1084 | /* | 881 | /* |
1085 | * EISA Edge/Level control register, ELCR | 882 | * EISA Edge/Level control register, ELCR |
@@ -1298,6 +1095,64 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
1298 | return irq; | 1095 | return irq; |
1299 | } | 1096 | } |
1300 | 1097 | ||
1098 | /* | ||
1099 | * Find a specific PCI IRQ entry. | ||
1100 | * Not an __init, possibly needed by modules | ||
1101 | */ | ||
1102 | int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin, | ||
1103 | struct io_apic_irq_attr *irq_attr) | ||
1104 | { | ||
1105 | int apic, i, best_guess = -1; | ||
1106 | |||
1107 | apic_printk(APIC_DEBUG, | ||
1108 | "querying PCI -> IRQ mapping bus:%d, slot:%d, pin:%d.\n", | ||
1109 | bus, slot, pin); | ||
1110 | if (test_bit(bus, mp_bus_not_pci)) { | ||
1111 | apic_printk(APIC_VERBOSE, | ||
1112 | "PCI BIOS passed nonexistent PCI bus %d!\n", bus); | ||
1113 | return -1; | ||
1114 | } | ||
1115 | for (i = 0; i < mp_irq_entries; i++) { | ||
1116 | int lbus = mp_irqs[i].srcbus; | ||
1117 | |||
1118 | for (apic = 0; apic < nr_ioapics; apic++) | ||
1119 | if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic || | ||
1120 | mp_irqs[i].dstapic == MP_APIC_ALL) | ||
1121 | break; | ||
1122 | |||
1123 | if (!test_bit(lbus, mp_bus_not_pci) && | ||
1124 | !mp_irqs[i].irqtype && | ||
1125 | (bus == lbus) && | ||
1126 | (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) { | ||
1127 | int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq); | ||
1128 | |||
1129 | if (!(apic || IO_APIC_IRQ(irq))) | ||
1130 | continue; | ||
1131 | |||
1132 | if (pin == (mp_irqs[i].srcbusirq & 3)) { | ||
1133 | set_io_apic_irq_attr(irq_attr, apic, | ||
1134 | mp_irqs[i].dstirq, | ||
1135 | irq_trigger(i), | ||
1136 | irq_polarity(i)); | ||
1137 | return irq; | ||
1138 | } | ||
1139 | /* | ||
1140 | * Use the first all-but-pin matching entry as a | ||
1141 | * best-guess fuzzy result for broken mptables. | ||
1142 | */ | ||
1143 | if (best_guess < 0) { | ||
1144 | set_io_apic_irq_attr(irq_attr, apic, | ||
1145 | mp_irqs[i].dstirq, | ||
1146 | irq_trigger(i), | ||
1147 | irq_polarity(i)); | ||
1148 | best_guess = irq; | ||
1149 | } | ||
1150 | } | ||
1151 | } | ||
1152 | return best_guess; | ||
1153 | } | ||
1154 | EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector); | ||
1155 | |||
1301 | void lock_vector_lock(void) | 1156 | void lock_vector_lock(void) |
1302 | { | 1157 | { |
1303 | /* Used to the online set of cpus does not change | 1158 | /* Used to the online set of cpus does not change |
@@ -1559,6 +1414,9 @@ int setup_ioapic_entry(int apic_id, int irq, | |||
1559 | irte.vector = vector; | 1414 | irte.vector = vector; |
1560 | irte.dest_id = IRTE_DEST(destination); | 1415 | irte.dest_id = IRTE_DEST(destination); |
1561 | 1416 | ||
1417 | /* Set source-id of interrupt request */ | ||
1418 | set_ioapic_sid(&irte, apic_id); | ||
1419 | |||
1562 | modify_irte(irq, &irte); | 1420 | modify_irte(irq, &irte); |
1563 | 1421 | ||
1564 | ir_entry->index2 = (index >> 15) & 0x1; | 1422 | ir_entry->index2 = (index >> 15) & 0x1; |
@@ -1628,58 +1486,70 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
1628 | ioapic_write_entry(apic_id, pin, entry); | 1486 | ioapic_write_entry(apic_id, pin, entry); |
1629 | } | 1487 | } |
1630 | 1488 | ||
1489 | static struct { | ||
1490 | DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); | ||
1491 | } mp_ioapic_routing[MAX_IO_APICS]; | ||
1492 | |||
1631 | static void __init setup_IO_APIC_irqs(void) | 1493 | static void __init setup_IO_APIC_irqs(void) |
1632 | { | 1494 | { |
1633 | int apic_id, pin, idx, irq; | 1495 | int apic_id = 0, pin, idx, irq; |
1634 | int notcon = 0; | 1496 | int notcon = 0; |
1635 | struct irq_desc *desc; | 1497 | struct irq_desc *desc; |
1636 | struct irq_cfg *cfg; | 1498 | struct irq_cfg *cfg; |
1637 | int cpu = boot_cpu_id; | 1499 | int node = cpu_to_node(boot_cpu_id); |
1638 | 1500 | ||
1639 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); | 1501 | apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); |
1640 | 1502 | ||
1641 | for (apic_id = 0; apic_id < nr_ioapics; apic_id++) { | 1503 | #ifdef CONFIG_ACPI |
1642 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { | 1504 | if (!acpi_disabled && acpi_ioapic) { |
1643 | 1505 | apic_id = mp_find_ioapic(0); | |
1644 | idx = find_irq_entry(apic_id, pin, mp_INT); | 1506 | if (apic_id < 0) |
1645 | if (idx == -1) { | 1507 | apic_id = 0; |
1646 | if (!notcon) { | 1508 | } |
1647 | notcon = 1; | 1509 | #endif |
1648 | apic_printk(APIC_VERBOSE, | ||
1649 | KERN_DEBUG " %d-%d", | ||
1650 | mp_ioapics[apic_id].apicid, pin); | ||
1651 | } else | ||
1652 | apic_printk(APIC_VERBOSE, " %d-%d", | ||
1653 | mp_ioapics[apic_id].apicid, pin); | ||
1654 | continue; | ||
1655 | } | ||
1656 | if (notcon) { | ||
1657 | apic_printk(APIC_VERBOSE, | ||
1658 | " (apicid-pin) not connected\n"); | ||
1659 | notcon = 0; | ||
1660 | } | ||
1661 | 1510 | ||
1662 | irq = pin_2_irq(idx, apic_id, pin); | 1511 | for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) { |
1512 | idx = find_irq_entry(apic_id, pin, mp_INT); | ||
1513 | if (idx == -1) { | ||
1514 | if (!notcon) { | ||
1515 | notcon = 1; | ||
1516 | apic_printk(APIC_VERBOSE, | ||
1517 | KERN_DEBUG " %d-%d", | ||
1518 | mp_ioapics[apic_id].apicid, pin); | ||
1519 | } else | ||
1520 | apic_printk(APIC_VERBOSE, " %d-%d", | ||
1521 | mp_ioapics[apic_id].apicid, pin); | ||
1522 | continue; | ||
1523 | } | ||
1524 | if (notcon) { | ||
1525 | apic_printk(APIC_VERBOSE, | ||
1526 | " (apicid-pin) not connected\n"); | ||
1527 | notcon = 0; | ||
1528 | } | ||
1663 | 1529 | ||
1664 | /* | 1530 | irq = pin_2_irq(idx, apic_id, pin); |
1665 | * Skip the timer IRQ if there's a quirk handler | ||
1666 | * installed and if it returns 1: | ||
1667 | */ | ||
1668 | if (apic->multi_timer_check && | ||
1669 | apic->multi_timer_check(apic_id, irq)) | ||
1670 | continue; | ||
1671 | 1531 | ||
1672 | desc = irq_to_desc_alloc_cpu(irq, cpu); | 1532 | /* |
1673 | if (!desc) { | 1533 | * Skip the timer IRQ if there's a quirk handler |
1674 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | 1534 | * installed and if it returns 1: |
1675 | continue; | 1535 | */ |
1676 | } | 1536 | if (apic->multi_timer_check && |
1677 | cfg = desc->chip_data; | 1537 | apic->multi_timer_check(apic_id, irq)) |
1678 | add_pin_to_irq_cpu(cfg, cpu, apic_id, pin); | 1538 | continue; |
1679 | 1539 | ||
1680 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | 1540 | desc = irq_to_desc_alloc_node(irq, node); |
1681 | irq_trigger(idx), irq_polarity(idx)); | 1541 | if (!desc) { |
1542 | printk(KERN_INFO "can not get irq_desc for %d\n", irq); | ||
1543 | continue; | ||
1682 | } | 1544 | } |
1545 | cfg = desc->chip_data; | ||
1546 | add_pin_to_irq_node(cfg, node, apic_id, pin); | ||
1547 | /* | ||
1548 | * don't mark it in pin_programmed, so later acpi could | ||
1549 | * set it correctly when irq < 16 | ||
1550 | */ | ||
1551 | setup_IO_APIC_irq(apic_id, pin, irq, desc, | ||
1552 | irq_trigger(idx), irq_polarity(idx)); | ||
1683 | } | 1553 | } |
1684 | 1554 | ||
1685 | if (notcon) | 1555 | if (notcon) |
@@ -1869,7 +1739,7 @@ __apicdebuginit(void) print_APIC_bitfield(int base) | |||
1869 | 1739 | ||
1870 | __apicdebuginit(void) print_local_APIC(void *dummy) | 1740 | __apicdebuginit(void) print_local_APIC(void *dummy) |
1871 | { | 1741 | { |
1872 | unsigned int v, ver, maxlvt; | 1742 | unsigned int i, v, ver, maxlvt; |
1873 | u64 icr; | 1743 | u64 icr; |
1874 | 1744 | ||
1875 | if (apic_verbosity == APIC_QUIET) | 1745 | if (apic_verbosity == APIC_QUIET) |
@@ -1957,6 +1827,18 @@ __apicdebuginit(void) print_local_APIC(void *dummy) | |||
1957 | printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); | 1827 | printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v); |
1958 | v = apic_read(APIC_TDCR); | 1828 | v = apic_read(APIC_TDCR); |
1959 | printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); | 1829 | printk(KERN_DEBUG "... APIC TDCR: %08x\n", v); |
1830 | |||
1831 | if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { | ||
1832 | v = apic_read(APIC_EFEAT); | ||
1833 | maxlvt = (v >> 16) & 0xff; | ||
1834 | printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v); | ||
1835 | v = apic_read(APIC_ECTRL); | ||
1836 | printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v); | ||
1837 | for (i = 0; i < maxlvt; i++) { | ||
1838 | v = apic_read(APIC_EILVTn(i)); | ||
1839 | printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v); | ||
1840 | } | ||
1841 | } | ||
1960 | printk("\n"); | 1842 | printk("\n"); |
1961 | } | 1843 | } |
1962 | 1844 | ||
@@ -2005,6 +1887,11 @@ __apicdebuginit(void) print_PIC(void) | |||
2005 | __apicdebuginit(int) print_all_ICs(void) | 1887 | __apicdebuginit(int) print_all_ICs(void) |
2006 | { | 1888 | { |
2007 | print_PIC(); | 1889 | print_PIC(); |
1890 | |||
1891 | /* don't print out if apic is not there */ | ||
1892 | if (!cpu_has_apic || disable_apic) | ||
1893 | return 0; | ||
1894 | |||
2008 | print_all_local_APICs(); | 1895 | print_all_local_APICs(); |
2009 | print_IO_APIC(); | 1896 | print_IO_APIC(); |
2010 | 1897 | ||
@@ -2120,7 +2007,9 @@ void disable_IO_APIC(void) | |||
2120 | /* | 2007 | /* |
2121 | * Use virtual wire A mode when interrupt remapping is enabled. | 2008 | * Use virtual wire A mode when interrupt remapping is enabled. |
2122 | */ | 2009 | */ |
2123 | disconnect_bsp_APIC(!intr_remapping_enabled && ioapic_i8259.pin != -1); | 2010 | if (cpu_has_apic) |
2011 | disconnect_bsp_APIC(!intr_remapping_enabled && | ||
2012 | ioapic_i8259.pin != -1); | ||
2124 | } | 2013 | } |
2125 | 2014 | ||
2126 | #ifdef CONFIG_X86_32 | 2015 | #ifdef CONFIG_X86_32 |
@@ -2360,6 +2249,118 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2360 | */ | 2249 | */ |
2361 | 2250 | ||
2362 | #ifdef CONFIG_SMP | 2251 | #ifdef CONFIG_SMP |
2252 | static void send_cleanup_vector(struct irq_cfg *cfg) | ||
2253 | { | ||
2254 | cpumask_var_t cleanup_mask; | ||
2255 | |||
2256 | if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) { | ||
2257 | unsigned int i; | ||
2258 | cfg->move_cleanup_count = 0; | ||
2259 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
2260 | cfg->move_cleanup_count++; | ||
2261 | for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) | ||
2262 | apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); | ||
2263 | } else { | ||
2264 | cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); | ||
2265 | cfg->move_cleanup_count = cpumask_weight(cleanup_mask); | ||
2266 | apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); | ||
2267 | free_cpumask_var(cleanup_mask); | ||
2268 | } | ||
2269 | cfg->move_in_progress = 0; | ||
2270 | } | ||
2271 | |||
2272 | static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg) | ||
2273 | { | ||
2274 | int apic, pin; | ||
2275 | struct irq_pin_list *entry; | ||
2276 | u8 vector = cfg->vector; | ||
2277 | |||
2278 | entry = cfg->irq_2_pin; | ||
2279 | for (;;) { | ||
2280 | unsigned int reg; | ||
2281 | |||
2282 | if (!entry) | ||
2283 | break; | ||
2284 | |||
2285 | apic = entry->apic; | ||
2286 | pin = entry->pin; | ||
2287 | /* | ||
2288 | * With interrupt-remapping, destination information comes | ||
2289 | * from interrupt-remapping table entry. | ||
2290 | */ | ||
2291 | if (!irq_remapped(irq)) | ||
2292 | io_apic_write(apic, 0x11 + pin*2, dest); | ||
2293 | reg = io_apic_read(apic, 0x10 + pin*2); | ||
2294 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | ||
2295 | reg |= vector; | ||
2296 | io_apic_modify(apic, 0x10 + pin*2, reg); | ||
2297 | if (!entry->next) | ||
2298 | break; | ||
2299 | entry = entry->next; | ||
2300 | } | ||
2301 | } | ||
2302 | |||
2303 | static int | ||
2304 | assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); | ||
2305 | |||
2306 | /* | ||
2307 | * Either sets desc->affinity to a valid value, and returns | ||
2308 | * ->cpu_mask_to_apicid of that, or returns BAD_APICID and | ||
2309 | * leaves desc->affinity untouched. | ||
2310 | */ | ||
2311 | static unsigned int | ||
2312 | set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) | ||
2313 | { | ||
2314 | struct irq_cfg *cfg; | ||
2315 | unsigned int irq; | ||
2316 | |||
2317 | if (!cpumask_intersects(mask, cpu_online_mask)) | ||
2318 | return BAD_APICID; | ||
2319 | |||
2320 | irq = desc->irq; | ||
2321 | cfg = desc->chip_data; | ||
2322 | if (assign_irq_vector(irq, cfg, mask)) | ||
2323 | return BAD_APICID; | ||
2324 | |||
2325 | cpumask_copy(desc->affinity, mask); | ||
2326 | |||
2327 | return apic->cpu_mask_to_apicid_and(desc->affinity, cfg->domain); | ||
2328 | } | ||
2329 | |||
2330 | static int | ||
2331 | set_ioapic_affinity_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | ||
2332 | { | ||
2333 | struct irq_cfg *cfg; | ||
2334 | unsigned long flags; | ||
2335 | unsigned int dest; | ||
2336 | unsigned int irq; | ||
2337 | int ret = -1; | ||
2338 | |||
2339 | irq = desc->irq; | ||
2340 | cfg = desc->chip_data; | ||
2341 | |||
2342 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2343 | dest = set_desc_affinity(desc, mask); | ||
2344 | if (dest != BAD_APICID) { | ||
2345 | /* Only the high 8 bits are valid. */ | ||
2346 | dest = SET_APIC_LOGICAL_ID(dest); | ||
2347 | __target_IO_APIC_irq(irq, dest, cfg); | ||
2348 | ret = 0; | ||
2349 | } | ||
2350 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2351 | |||
2352 | return ret; | ||
2353 | } | ||
2354 | |||
2355 | static int | ||
2356 | set_ioapic_affinity_irq(unsigned int irq, const struct cpumask *mask) | ||
2357 | { | ||
2358 | struct irq_desc *desc; | ||
2359 | |||
2360 | desc = irq_to_desc(irq); | ||
2361 | |||
2362 | return set_ioapic_affinity_irq_desc(desc, mask); | ||
2363 | } | ||
2363 | 2364 | ||
2364 | #ifdef CONFIG_INTR_REMAP | 2365 | #ifdef CONFIG_INTR_REMAP |
2365 | 2366 | ||
@@ -2374,26 +2375,25 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2374 | * Real vector that is used for interrupting cpu will be coming from | 2375 | * Real vector that is used for interrupting cpu will be coming from |
2375 | * the interrupt-remapping table entry. | 2376 | * the interrupt-remapping table entry. |
2376 | */ | 2377 | */ |
2377 | static void | 2378 | static int |
2378 | migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | 2379 | migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) |
2379 | { | 2380 | { |
2380 | struct irq_cfg *cfg; | 2381 | struct irq_cfg *cfg; |
2381 | struct irte irte; | 2382 | struct irte irte; |
2382 | unsigned int dest; | 2383 | unsigned int dest; |
2383 | unsigned int irq; | 2384 | unsigned int irq; |
2385 | int ret = -1; | ||
2384 | 2386 | ||
2385 | if (!cpumask_intersects(mask, cpu_online_mask)) | 2387 | if (!cpumask_intersects(mask, cpu_online_mask)) |
2386 | return; | 2388 | return ret; |
2387 | 2389 | ||
2388 | irq = desc->irq; | 2390 | irq = desc->irq; |
2389 | if (get_irte(irq, &irte)) | 2391 | if (get_irte(irq, &irte)) |
2390 | return; | 2392 | return ret; |
2391 | 2393 | ||
2392 | cfg = desc->chip_data; | 2394 | cfg = desc->chip_data; |
2393 | if (assign_irq_vector(irq, cfg, mask)) | 2395 | if (assign_irq_vector(irq, cfg, mask)) |
2394 | return; | 2396 | return ret; |
2395 | |||
2396 | set_extra_move_desc(desc, mask); | ||
2397 | 2397 | ||
2398 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); | 2398 | dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask); |
2399 | 2399 | ||
@@ -2409,27 +2409,30 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask) | |||
2409 | send_cleanup_vector(cfg); | 2409 | send_cleanup_vector(cfg); |
2410 | 2410 | ||
2411 | cpumask_copy(desc->affinity, mask); | 2411 | cpumask_copy(desc->affinity, mask); |
2412 | |||
2413 | return 0; | ||
2412 | } | 2414 | } |
2413 | 2415 | ||
2414 | /* | 2416 | /* |
2415 | * Migrates the IRQ destination in the process context. | 2417 | * Migrates the IRQ destination in the process context. |
2416 | */ | 2418 | */ |
2417 | static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, | 2419 | static int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, |
2418 | const struct cpumask *mask) | 2420 | const struct cpumask *mask) |
2419 | { | 2421 | { |
2420 | migrate_ioapic_irq_desc(desc, mask); | 2422 | return migrate_ioapic_irq_desc(desc, mask); |
2421 | } | 2423 | } |
2422 | static void set_ir_ioapic_affinity_irq(unsigned int irq, | 2424 | static int set_ir_ioapic_affinity_irq(unsigned int irq, |
2423 | const struct cpumask *mask) | 2425 | const struct cpumask *mask) |
2424 | { | 2426 | { |
2425 | struct irq_desc *desc = irq_to_desc(irq); | 2427 | struct irq_desc *desc = irq_to_desc(irq); |
2426 | 2428 | ||
2427 | set_ir_ioapic_affinity_irq_desc(desc, mask); | 2429 | return set_ir_ioapic_affinity_irq_desc(desc, mask); |
2428 | } | 2430 | } |
2429 | #else | 2431 | #else |
2430 | static inline void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, | 2432 | static inline int set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc, |
2431 | const struct cpumask *mask) | 2433 | const struct cpumask *mask) |
2432 | { | 2434 | { |
2435 | return 0; | ||
2433 | } | 2436 | } |
2434 | #endif | 2437 | #endif |
2435 | 2438 | ||
@@ -2491,86 +2494,19 @@ static void irq_complete_move(struct irq_desc **descp) | |||
2491 | struct irq_cfg *cfg = desc->chip_data; | 2494 | struct irq_cfg *cfg = desc->chip_data; |
2492 | unsigned vector, me; | 2495 | unsigned vector, me; |
2493 | 2496 | ||
2494 | if (likely(!cfg->move_in_progress)) { | 2497 | if (likely(!cfg->move_in_progress)) |
2495 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
2496 | if (likely(!cfg->move_desc_pending)) | ||
2497 | return; | ||
2498 | |||
2499 | /* domain has not changed, but affinity did */ | ||
2500 | me = smp_processor_id(); | ||
2501 | if (cpumask_test_cpu(me, desc->affinity)) { | ||
2502 | *descp = desc = move_irq_desc(desc, me); | ||
2503 | /* get the new one */ | ||
2504 | cfg = desc->chip_data; | ||
2505 | cfg->move_desc_pending = 0; | ||
2506 | } | ||
2507 | #endif | ||
2508 | return; | 2498 | return; |
2509 | } | ||
2510 | 2499 | ||
2511 | vector = ~get_irq_regs()->orig_ax; | 2500 | vector = ~get_irq_regs()->orig_ax; |
2512 | me = smp_processor_id(); | 2501 | me = smp_processor_id(); |
2513 | 2502 | ||
2514 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) { | 2503 | if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain)) |
2515 | #ifdef CONFIG_NUMA_MIGRATE_IRQ_DESC | ||
2516 | *descp = desc = move_irq_desc(desc, me); | ||
2517 | /* get the new one */ | ||
2518 | cfg = desc->chip_data; | ||
2519 | #endif | ||
2520 | send_cleanup_vector(cfg); | 2504 | send_cleanup_vector(cfg); |
2521 | } | ||
2522 | } | 2505 | } |
2523 | #else | 2506 | #else |
2524 | static inline void irq_complete_move(struct irq_desc **descp) {} | 2507 | static inline void irq_complete_move(struct irq_desc **descp) {} |
2525 | #endif | 2508 | #endif |
2526 | 2509 | ||
2527 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
2528 | { | ||
2529 | int apic, pin; | ||
2530 | struct irq_pin_list *entry; | ||
2531 | |||
2532 | entry = cfg->irq_2_pin; | ||
2533 | for (;;) { | ||
2534 | |||
2535 | if (!entry) | ||
2536 | break; | ||
2537 | |||
2538 | apic = entry->apic; | ||
2539 | pin = entry->pin; | ||
2540 | io_apic_eoi(apic, pin); | ||
2541 | entry = entry->next; | ||
2542 | } | ||
2543 | } | ||
2544 | |||
2545 | static void | ||
2546 | eoi_ioapic_irq(struct irq_desc *desc) | ||
2547 | { | ||
2548 | struct irq_cfg *cfg; | ||
2549 | unsigned long flags; | ||
2550 | unsigned int irq; | ||
2551 | |||
2552 | irq = desc->irq; | ||
2553 | cfg = desc->chip_data; | ||
2554 | |||
2555 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2556 | __eoi_ioapic_irq(irq, cfg); | ||
2557 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2558 | } | ||
2559 | |||
2560 | #ifdef CONFIG_X86_X2APIC | ||
2561 | static void ack_x2apic_level(unsigned int irq) | ||
2562 | { | ||
2563 | struct irq_desc *desc = irq_to_desc(irq); | ||
2564 | ack_x2APIC_irq(); | ||
2565 | eoi_ioapic_irq(desc); | ||
2566 | } | ||
2567 | |||
2568 | static void ack_x2apic_edge(unsigned int irq) | ||
2569 | { | ||
2570 | ack_x2APIC_irq(); | ||
2571 | } | ||
2572 | #endif | ||
2573 | |||
2574 | static void ack_apic_edge(unsigned int irq) | 2510 | static void ack_apic_edge(unsigned int irq) |
2575 | { | 2511 | { |
2576 | struct irq_desc *desc = irq_to_desc(irq); | 2512 | struct irq_desc *desc = irq_to_desc(irq); |
@@ -2634,9 +2570,6 @@ static void ack_apic_level(unsigned int irq) | |||
2634 | */ | 2570 | */ |
2635 | ack_APIC_irq(); | 2571 | ack_APIC_irq(); |
2636 | 2572 | ||
2637 | if (irq_remapped(irq)) | ||
2638 | eoi_ioapic_irq(desc); | ||
2639 | |||
2640 | /* Now we can move and renable the irq */ | 2573 | /* Now we can move and renable the irq */ |
2641 | if (unlikely(do_unmask_irq)) { | 2574 | if (unlikely(do_unmask_irq)) { |
2642 | /* Only migrate the irq if the ack has been received. | 2575 | /* Only migrate the irq if the ack has been received. |
@@ -2683,22 +2616,50 @@ static void ack_apic_level(unsigned int irq) | |||
2683 | } | 2616 | } |
2684 | 2617 | ||
2685 | #ifdef CONFIG_INTR_REMAP | 2618 | #ifdef CONFIG_INTR_REMAP |
2619 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | ||
2620 | { | ||
2621 | int apic, pin; | ||
2622 | struct irq_pin_list *entry; | ||
2623 | |||
2624 | entry = cfg->irq_2_pin; | ||
2625 | for (;;) { | ||
2626 | |||
2627 | if (!entry) | ||
2628 | break; | ||
2629 | |||
2630 | apic = entry->apic; | ||
2631 | pin = entry->pin; | ||
2632 | io_apic_eoi(apic, pin); | ||
2633 | entry = entry->next; | ||
2634 | } | ||
2635 | } | ||
2636 | |||
2637 | static void | ||
2638 | eoi_ioapic_irq(struct irq_desc *desc) | ||
2639 | { | ||
2640 | struct irq_cfg *cfg; | ||
2641 | unsigned long flags; | ||
2642 | unsigned int irq; | ||
2643 | |||
2644 | irq = desc->irq; | ||
2645 | cfg = desc->chip_data; | ||
2646 | |||
2647 | spin_lock_irqsave(&ioapic_lock, flags); | ||
2648 | __eoi_ioapic_irq(irq, cfg); | ||
2649 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2650 | } | ||
2651 | |||
2686 | static void ir_ack_apic_edge(unsigned int irq) | 2652 | static void ir_ack_apic_edge(unsigned int irq) |
2687 | { | 2653 | { |
2688 | #ifdef CONFIG_X86_X2APIC | 2654 | ack_APIC_irq(); |
2689 | if (x2apic_enabled()) | ||
2690 | return ack_x2apic_edge(irq); | ||
2691 | #endif | ||
2692 | return ack_apic_edge(irq); | ||
2693 | } | 2655 | } |
2694 | 2656 | ||
2695 | static void ir_ack_apic_level(unsigned int irq) | 2657 | static void ir_ack_apic_level(unsigned int irq) |
2696 | { | 2658 | { |
2697 | #ifdef CONFIG_X86_X2APIC | 2659 | struct irq_desc *desc = irq_to_desc(irq); |
2698 | if (x2apic_enabled()) | 2660 | |
2699 | return ack_x2apic_level(irq); | 2661 | ack_APIC_irq(); |
2700 | #endif | 2662 | eoi_ioapic_irq(desc); |
2701 | return ack_apic_level(irq); | ||
2702 | } | 2663 | } |
2703 | #endif /* CONFIG_INTR_REMAP */ | 2664 | #endif /* CONFIG_INTR_REMAP */ |
2704 | 2665 | ||
@@ -2903,7 +2864,7 @@ static inline void __init check_timer(void) | |||
2903 | { | 2864 | { |
2904 | struct irq_desc *desc = irq_to_desc(0); | 2865 | struct irq_desc *desc = irq_to_desc(0); |
2905 | struct irq_cfg *cfg = desc->chip_data; | 2866 | struct irq_cfg *cfg = desc->chip_data; |
2906 | int cpu = boot_cpu_id; | 2867 | int node = cpu_to_node(boot_cpu_id); |
2907 | int apic1, pin1, apic2, pin2; | 2868 | int apic1, pin1, apic2, pin2; |
2908 | unsigned long flags; | 2869 | unsigned long flags; |
2909 | int no_pin1 = 0; | 2870 | int no_pin1 = 0; |
@@ -2969,7 +2930,7 @@ static inline void __init check_timer(void) | |||
2969 | * Ok, does IRQ0 through the IOAPIC work? | 2930 | * Ok, does IRQ0 through the IOAPIC work? |
2970 | */ | 2931 | */ |
2971 | if (no_pin1) { | 2932 | if (no_pin1) { |
2972 | add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); | 2933 | add_pin_to_irq_node(cfg, node, apic1, pin1); |
2973 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); | 2934 | setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); |
2974 | } else { | 2935 | } else { |
2975 | /* for edge trigger, setup_IO_APIC_irq already | 2936 | /* for edge trigger, setup_IO_APIC_irq already |
@@ -3006,7 +2967,7 @@ static inline void __init check_timer(void) | |||
3006 | /* | 2967 | /* |
3007 | * legacy devices should be connected to IO APIC #0 | 2968 | * legacy devices should be connected to IO APIC #0 |
3008 | */ | 2969 | */ |
3009 | replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); | 2970 | replace_pin_at_irq_node(cfg, node, apic1, pin1, apic2, pin2); |
3010 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); | 2971 | setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); |
3011 | enable_8259A_irq(0); | 2972 | enable_8259A_irq(0); |
3012 | if (timer_irq_works()) { | 2973 | if (timer_irq_works()) { |
@@ -3218,14 +3179,13 @@ static int nr_irqs_gsi = NR_IRQS_LEGACY; | |||
3218 | /* | 3179 | /* |
3219 | * Dynamic irq allocate and deallocation | 3180 | * Dynamic irq allocate and deallocation |
3220 | */ | 3181 | */ |
3221 | unsigned int create_irq_nr(unsigned int irq_want) | 3182 | unsigned int create_irq_nr(unsigned int irq_want, int node) |
3222 | { | 3183 | { |
3223 | /* Allocate an unused irq */ | 3184 | /* Allocate an unused irq */ |
3224 | unsigned int irq; | 3185 | unsigned int irq; |
3225 | unsigned int new; | 3186 | unsigned int new; |
3226 | unsigned long flags; | 3187 | unsigned long flags; |
3227 | struct irq_cfg *cfg_new = NULL; | 3188 | struct irq_cfg *cfg_new = NULL; |
3228 | int cpu = boot_cpu_id; | ||
3229 | struct irq_desc *desc_new = NULL; | 3189 | struct irq_desc *desc_new = NULL; |
3230 | 3190 | ||
3231 | irq = 0; | 3191 | irq = 0; |
@@ -3234,7 +3194,7 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
3234 | 3194 | ||
3235 | spin_lock_irqsave(&vector_lock, flags); | 3195 | spin_lock_irqsave(&vector_lock, flags); |
3236 | for (new = irq_want; new < nr_irqs; new++) { | 3196 | for (new = irq_want; new < nr_irqs; new++) { |
3237 | desc_new = irq_to_desc_alloc_cpu(new, cpu); | 3197 | desc_new = irq_to_desc_alloc_node(new, node); |
3238 | if (!desc_new) { | 3198 | if (!desc_new) { |
3239 | printk(KERN_INFO "can not get irq_desc for %d\n", new); | 3199 | printk(KERN_INFO "can not get irq_desc for %d\n", new); |
3240 | continue; | 3200 | continue; |
@@ -3243,6 +3203,9 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
3243 | 3203 | ||
3244 | if (cfg_new->vector != 0) | 3204 | if (cfg_new->vector != 0) |
3245 | continue; | 3205 | continue; |
3206 | |||
3207 | desc_new = move_irq_desc(desc_new, node); | ||
3208 | |||
3246 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) | 3209 | if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0) |
3247 | irq = new; | 3210 | irq = new; |
3248 | break; | 3211 | break; |
@@ -3260,11 +3223,12 @@ unsigned int create_irq_nr(unsigned int irq_want) | |||
3260 | 3223 | ||
3261 | int create_irq(void) | 3224 | int create_irq(void) |
3262 | { | 3225 | { |
3226 | int node = cpu_to_node(boot_cpu_id); | ||
3263 | unsigned int irq_want; | 3227 | unsigned int irq_want; |
3264 | int irq; | 3228 | int irq; |
3265 | 3229 | ||
3266 | irq_want = nr_irqs_gsi; | 3230 | irq_want = nr_irqs_gsi; |
3267 | irq = create_irq_nr(irq_want); | 3231 | irq = create_irq_nr(irq_want, node); |
3268 | 3232 | ||
3269 | if (irq == 0) | 3233 | if (irq == 0) |
3270 | irq = -1; | 3234 | irq = -1; |
@@ -3329,6 +3293,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3329 | irte.vector = cfg->vector; | 3293 | irte.vector = cfg->vector; |
3330 | irte.dest_id = IRTE_DEST(dest); | 3294 | irte.dest_id = IRTE_DEST(dest); |
3331 | 3295 | ||
3296 | /* Set source-id of interrupt request */ | ||
3297 | set_msi_sid(&irte, pdev); | ||
3298 | |||
3332 | modify_irte(irq, &irte); | 3299 | modify_irte(irq, &irte); |
3333 | 3300 | ||
3334 | msg->address_hi = MSI_ADDR_BASE_HI; | 3301 | msg->address_hi = MSI_ADDR_BASE_HI; |
@@ -3366,7 +3333,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms | |||
3366 | } | 3333 | } |
3367 | 3334 | ||
3368 | #ifdef CONFIG_SMP | 3335 | #ifdef CONFIG_SMP |
3369 | static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3336 | static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3370 | { | 3337 | { |
3371 | struct irq_desc *desc = irq_to_desc(irq); | 3338 | struct irq_desc *desc = irq_to_desc(irq); |
3372 | struct irq_cfg *cfg; | 3339 | struct irq_cfg *cfg; |
@@ -3375,7 +3342,7 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3375 | 3342 | ||
3376 | dest = set_desc_affinity(desc, mask); | 3343 | dest = set_desc_affinity(desc, mask); |
3377 | if (dest == BAD_APICID) | 3344 | if (dest == BAD_APICID) |
3378 | return; | 3345 | return -1; |
3379 | 3346 | ||
3380 | cfg = desc->chip_data; | 3347 | cfg = desc->chip_data; |
3381 | 3348 | ||
@@ -3387,13 +3354,15 @@ static void set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3387 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3354 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3388 | 3355 | ||
3389 | write_msi_msg_desc(desc, &msg); | 3356 | write_msi_msg_desc(desc, &msg); |
3357 | |||
3358 | return 0; | ||
3390 | } | 3359 | } |
3391 | #ifdef CONFIG_INTR_REMAP | 3360 | #ifdef CONFIG_INTR_REMAP |
3392 | /* | 3361 | /* |
3393 | * Migrate the MSI irq to another cpumask. This migration is | 3362 | * Migrate the MSI irq to another cpumask. This migration is |
3394 | * done in the process context using interrupt-remapping hardware. | 3363 | * done in the process context using interrupt-remapping hardware. |
3395 | */ | 3364 | */ |
3396 | static void | 3365 | static int |
3397 | ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3366 | ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3398 | { | 3367 | { |
3399 | struct irq_desc *desc = irq_to_desc(irq); | 3368 | struct irq_desc *desc = irq_to_desc(irq); |
@@ -3402,11 +3371,11 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3402 | struct irte irte; | 3371 | struct irte irte; |
3403 | 3372 | ||
3404 | if (get_irte(irq, &irte)) | 3373 | if (get_irte(irq, &irte)) |
3405 | return; | 3374 | return -1; |
3406 | 3375 | ||
3407 | dest = set_desc_affinity(desc, mask); | 3376 | dest = set_desc_affinity(desc, mask); |
3408 | if (dest == BAD_APICID) | 3377 | if (dest == BAD_APICID) |
3409 | return; | 3378 | return -1; |
3410 | 3379 | ||
3411 | irte.vector = cfg->vector; | 3380 | irte.vector = cfg->vector; |
3412 | irte.dest_id = IRTE_DEST(dest); | 3381 | irte.dest_id = IRTE_DEST(dest); |
@@ -3423,6 +3392,8 @@ ir_set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3423 | */ | 3392 | */ |
3424 | if (cfg->move_in_progress) | 3393 | if (cfg->move_in_progress) |
3425 | send_cleanup_vector(cfg); | 3394 | send_cleanup_vector(cfg); |
3395 | |||
3396 | return 0; | ||
3426 | } | 3397 | } |
3427 | 3398 | ||
3428 | #endif | 3399 | #endif |
@@ -3518,15 +3489,17 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
3518 | unsigned int irq_want; | 3489 | unsigned int irq_want; |
3519 | struct intel_iommu *iommu = NULL; | 3490 | struct intel_iommu *iommu = NULL; |
3520 | int index = 0; | 3491 | int index = 0; |
3492 | int node; | ||
3521 | 3493 | ||
3522 | /* x86 doesn't support multiple MSI yet */ | 3494 | /* x86 doesn't support multiple MSI yet */ |
3523 | if (type == PCI_CAP_ID_MSI && nvec > 1) | 3495 | if (type == PCI_CAP_ID_MSI && nvec > 1) |
3524 | return 1; | 3496 | return 1; |
3525 | 3497 | ||
3498 | node = dev_to_node(&dev->dev); | ||
3526 | irq_want = nr_irqs_gsi; | 3499 | irq_want = nr_irqs_gsi; |
3527 | sub_handle = 0; | 3500 | sub_handle = 0; |
3528 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 3501 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
3529 | irq = create_irq_nr(irq_want); | 3502 | irq = create_irq_nr(irq_want, node); |
3530 | if (irq == 0) | 3503 | if (irq == 0) |
3531 | return -1; | 3504 | return -1; |
3532 | irq_want = irq + 1; | 3505 | irq_want = irq + 1; |
@@ -3576,7 +3549,7 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
3576 | 3549 | ||
3577 | #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) | 3550 | #if defined (CONFIG_DMAR) || defined (CONFIG_INTR_REMAP) |
3578 | #ifdef CONFIG_SMP | 3551 | #ifdef CONFIG_SMP |
3579 | static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | 3552 | static int dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) |
3580 | { | 3553 | { |
3581 | struct irq_desc *desc = irq_to_desc(irq); | 3554 | struct irq_desc *desc = irq_to_desc(irq); |
3582 | struct irq_cfg *cfg; | 3555 | struct irq_cfg *cfg; |
@@ -3585,7 +3558,7 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3585 | 3558 | ||
3586 | dest = set_desc_affinity(desc, mask); | 3559 | dest = set_desc_affinity(desc, mask); |
3587 | if (dest == BAD_APICID) | 3560 | if (dest == BAD_APICID) |
3588 | return; | 3561 | return -1; |
3589 | 3562 | ||
3590 | cfg = desc->chip_data; | 3563 | cfg = desc->chip_data; |
3591 | 3564 | ||
@@ -3597,11 +3570,13 @@ static void dmar_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3597 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3570 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3598 | 3571 | ||
3599 | dmar_msi_write(irq, &msg); | 3572 | dmar_msi_write(irq, &msg); |
3573 | |||
3574 | return 0; | ||
3600 | } | 3575 | } |
3601 | 3576 | ||
3602 | #endif /* CONFIG_SMP */ | 3577 | #endif /* CONFIG_SMP */ |
3603 | 3578 | ||
3604 | struct irq_chip dmar_msi_type = { | 3579 | static struct irq_chip dmar_msi_type = { |
3605 | .name = "DMAR_MSI", | 3580 | .name = "DMAR_MSI", |
3606 | .unmask = dmar_msi_unmask, | 3581 | .unmask = dmar_msi_unmask, |
3607 | .mask = dmar_msi_mask, | 3582 | .mask = dmar_msi_mask, |
@@ -3630,7 +3605,7 @@ int arch_setup_dmar_msi(unsigned int irq) | |||
3630 | #ifdef CONFIG_HPET_TIMER | 3605 | #ifdef CONFIG_HPET_TIMER |
3631 | 3606 | ||
3632 | #ifdef CONFIG_SMP | 3607 | #ifdef CONFIG_SMP |
3633 | static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | 3608 | static int hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) |
3634 | { | 3609 | { |
3635 | struct irq_desc *desc = irq_to_desc(irq); | 3610 | struct irq_desc *desc = irq_to_desc(irq); |
3636 | struct irq_cfg *cfg; | 3611 | struct irq_cfg *cfg; |
@@ -3639,7 +3614,7 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3639 | 3614 | ||
3640 | dest = set_desc_affinity(desc, mask); | 3615 | dest = set_desc_affinity(desc, mask); |
3641 | if (dest == BAD_APICID) | 3616 | if (dest == BAD_APICID) |
3642 | return; | 3617 | return -1; |
3643 | 3618 | ||
3644 | cfg = desc->chip_data; | 3619 | cfg = desc->chip_data; |
3645 | 3620 | ||
@@ -3651,6 +3626,8 @@ static void hpet_msi_set_affinity(unsigned int irq, const struct cpumask *mask) | |||
3651 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); | 3626 | msg.address_lo |= MSI_ADDR_DEST_ID(dest); |
3652 | 3627 | ||
3653 | hpet_msi_write(irq, &msg); | 3628 | hpet_msi_write(irq, &msg); |
3629 | |||
3630 | return 0; | ||
3654 | } | 3631 | } |
3655 | 3632 | ||
3656 | #endif /* CONFIG_SMP */ | 3633 | #endif /* CONFIG_SMP */ |
@@ -3707,7 +3684,7 @@ static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector) | |||
3707 | write_ht_irq_msg(irq, &msg); | 3684 | write_ht_irq_msg(irq, &msg); |
3708 | } | 3685 | } |
3709 | 3686 | ||
3710 | static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) | 3687 | static int set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) |
3711 | { | 3688 | { |
3712 | struct irq_desc *desc = irq_to_desc(irq); | 3689 | struct irq_desc *desc = irq_to_desc(irq); |
3713 | struct irq_cfg *cfg; | 3690 | struct irq_cfg *cfg; |
@@ -3715,11 +3692,13 @@ static void set_ht_irq_affinity(unsigned int irq, const struct cpumask *mask) | |||
3715 | 3692 | ||
3716 | dest = set_desc_affinity(desc, mask); | 3693 | dest = set_desc_affinity(desc, mask); |
3717 | if (dest == BAD_APICID) | 3694 | if (dest == BAD_APICID) |
3718 | return; | 3695 | return -1; |
3719 | 3696 | ||
3720 | cfg = desc->chip_data; | 3697 | cfg = desc->chip_data; |
3721 | 3698 | ||
3722 | target_ht_irq(irq, dest, cfg->vector); | 3699 | target_ht_irq(irq, dest, cfg->vector); |
3700 | |||
3701 | return 0; | ||
3723 | } | 3702 | } |
3724 | 3703 | ||
3725 | #endif | 3704 | #endif |
@@ -3794,6 +3773,8 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3794 | unsigned long flags; | 3773 | unsigned long flags; |
3795 | int err; | 3774 | int err; |
3796 | 3775 | ||
3776 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3777 | |||
3797 | cfg = irq_cfg(irq); | 3778 | cfg = irq_cfg(irq); |
3798 | 3779 | ||
3799 | err = assign_irq_vector(irq, cfg, eligible_cpu); | 3780 | err = assign_irq_vector(irq, cfg, eligible_cpu); |
@@ -3807,15 +3788,13 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade, | |||
3807 | 3788 | ||
3808 | mmr_value = 0; | 3789 | mmr_value = 0; |
3809 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | 3790 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; |
3810 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | 3791 | entry->vector = cfg->vector; |
3811 | 3792 | entry->delivery_mode = apic->irq_delivery_mode; | |
3812 | entry->vector = cfg->vector; | 3793 | entry->dest_mode = apic->irq_dest_mode; |
3813 | entry->delivery_mode = apic->irq_delivery_mode; | 3794 | entry->polarity = 0; |
3814 | entry->dest_mode = apic->irq_dest_mode; | 3795 | entry->trigger = 0; |
3815 | entry->polarity = 0; | 3796 | entry->mask = 0; |
3816 | entry->trigger = 0; | 3797 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); |
3817 | entry->mask = 0; | ||
3818 | entry->dest = apic->cpu_mask_to_apicid(eligible_cpu); | ||
3819 | 3798 | ||
3820 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 3799 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
3821 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); | 3800 | uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); |
@@ -3833,10 +3812,10 @@ void arch_disable_uv_irq(int mmr_blade, unsigned long mmr_offset) | |||
3833 | struct uv_IO_APIC_route_entry *entry; | 3812 | struct uv_IO_APIC_route_entry *entry; |
3834 | int mmr_pnode; | 3813 | int mmr_pnode; |
3835 | 3814 | ||
3815 | BUILD_BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3816 | |||
3836 | mmr_value = 0; | 3817 | mmr_value = 0; |
3837 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; | 3818 | entry = (struct uv_IO_APIC_route_entry *)&mmr_value; |
3838 | BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); | ||
3839 | |||
3840 | entry->mask = 1; | 3819 | entry->mask = 1; |
3841 | 3820 | ||
3842 | mmr_pnode = uv_blade_to_pnode(mmr_blade); | 3821 | mmr_pnode = uv_blade_to_pnode(mmr_blade); |
@@ -3900,6 +3879,71 @@ int __init arch_probe_nr_irqs(void) | |||
3900 | } | 3879 | } |
3901 | #endif | 3880 | #endif |
3902 | 3881 | ||
3882 | static int __io_apic_set_pci_routing(struct device *dev, int irq, | ||
3883 | struct io_apic_irq_attr *irq_attr) | ||
3884 | { | ||
3885 | struct irq_desc *desc; | ||
3886 | struct irq_cfg *cfg; | ||
3887 | int node; | ||
3888 | int ioapic, pin; | ||
3889 | int trigger, polarity; | ||
3890 | |||
3891 | ioapic = irq_attr->ioapic; | ||
3892 | if (!IO_APIC_IRQ(irq)) { | ||
3893 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", | ||
3894 | ioapic); | ||
3895 | return -EINVAL; | ||
3896 | } | ||
3897 | |||
3898 | if (dev) | ||
3899 | node = dev_to_node(dev); | ||
3900 | else | ||
3901 | node = cpu_to_node(boot_cpu_id); | ||
3902 | |||
3903 | desc = irq_to_desc_alloc_node(irq, node); | ||
3904 | if (!desc) { | ||
3905 | printk(KERN_INFO "can not get irq_desc %d\n", irq); | ||
3906 | return 0; | ||
3907 | } | ||
3908 | |||
3909 | pin = irq_attr->ioapic_pin; | ||
3910 | trigger = irq_attr->trigger; | ||
3911 | polarity = irq_attr->polarity; | ||
3912 | |||
3913 | /* | ||
3914 | * IRQs < 16 are already in the irq_2_pin[] map | ||
3915 | */ | ||
3916 | if (irq >= NR_IRQS_LEGACY) { | ||
3917 | cfg = desc->chip_data; | ||
3918 | add_pin_to_irq_node(cfg, node, ioapic, pin); | ||
3919 | } | ||
3920 | |||
3921 | setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); | ||
3922 | |||
3923 | return 0; | ||
3924 | } | ||
3925 | |||
3926 | int io_apic_set_pci_routing(struct device *dev, int irq, | ||
3927 | struct io_apic_irq_attr *irq_attr) | ||
3928 | { | ||
3929 | int ioapic, pin; | ||
3930 | /* | ||
3931 | * Avoid pin reprogramming. PRTs typically include entries | ||
3932 | * with redundant pin->gsi mappings (but unique PCI devices); | ||
3933 | * we only program the IOAPIC on the first. | ||
3934 | */ | ||
3935 | ioapic = irq_attr->ioapic; | ||
3936 | pin = irq_attr->ioapic_pin; | ||
3937 | if (test_bit(pin, mp_ioapic_routing[ioapic].pin_programmed)) { | ||
3938 | pr_debug("Pin %d-%d already programmed\n", | ||
3939 | mp_ioapics[ioapic].apicid, pin); | ||
3940 | return 0; | ||
3941 | } | ||
3942 | set_bit(pin, mp_ioapic_routing[ioapic].pin_programmed); | ||
3943 | |||
3944 | return __io_apic_set_pci_routing(dev, irq, irq_attr); | ||
3945 | } | ||
3946 | |||
3903 | /* -------------------------------------------------------------------------- | 3947 | /* -------------------------------------------------------------------------- |
3904 | ACPI-based IOAPIC Configuration | 3948 | ACPI-based IOAPIC Configuration |
3905 | -------------------------------------------------------------------------- */ | 3949 | -------------------------------------------------------------------------- */ |
@@ -3980,6 +4024,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id) | |||
3980 | 4024 | ||
3981 | return apic_id; | 4025 | return apic_id; |
3982 | } | 4026 | } |
4027 | #endif | ||
3983 | 4028 | ||
3984 | int __init io_apic_get_version(int ioapic) | 4029 | int __init io_apic_get_version(int ioapic) |
3985 | { | 4030 | { |
@@ -3992,39 +4037,6 @@ int __init io_apic_get_version(int ioapic) | |||
3992 | 4037 | ||
3993 | return reg_01.bits.version; | 4038 | return reg_01.bits.version; |
3994 | } | 4039 | } |
3995 | #endif | ||
3996 | |||
3997 | int io_apic_set_pci_routing (int ioapic, int pin, int irq, int triggering, int polarity) | ||
3998 | { | ||
3999 | struct irq_desc *desc; | ||
4000 | struct irq_cfg *cfg; | ||
4001 | int cpu = boot_cpu_id; | ||
4002 | |||
4003 | if (!IO_APIC_IRQ(irq)) { | ||
4004 | apic_printk(APIC_QUIET,KERN_ERR "IOAPIC[%d]: Invalid reference to IRQ 0\n", | ||
4005 | ioapic); | ||
4006 | return -EINVAL; | ||
4007 | } | ||
4008 | |||
4009 | desc = irq_to_desc_alloc_cpu(irq, cpu); | ||
4010 | if (!desc) { | ||
4011 | printk(KERN_INFO "can not get irq_desc %d\n", irq); | ||
4012 | return 0; | ||
4013 | } | ||
4014 | |||
4015 | /* | ||
4016 | * IRQs < 16 are already in the irq_2_pin[] map | ||
4017 | */ | ||
4018 | if (irq >= NR_IRQS_LEGACY) { | ||
4019 | cfg = desc->chip_data; | ||
4020 | add_pin_to_irq_cpu(cfg, cpu, ioapic, pin); | ||
4021 | } | ||
4022 | |||
4023 | setup_IO_APIC_irq(ioapic, pin, irq, desc, triggering, polarity); | ||
4024 | |||
4025 | return 0; | ||
4026 | } | ||
4027 | |||
4028 | 4040 | ||
4029 | int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | 4041 | int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) |
4030 | { | 4042 | { |
@@ -4055,51 +4067,44 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity) | |||
4055 | #ifdef CONFIG_SMP | 4067 | #ifdef CONFIG_SMP |
4056 | void __init setup_ioapic_dest(void) | 4068 | void __init setup_ioapic_dest(void) |
4057 | { | 4069 | { |
4058 | int pin, ioapic, irq, irq_entry; | 4070 | int pin, ioapic = 0, irq, irq_entry; |
4059 | struct irq_desc *desc; | 4071 | struct irq_desc *desc; |
4060 | struct irq_cfg *cfg; | ||
4061 | const struct cpumask *mask; | 4072 | const struct cpumask *mask; |
4062 | 4073 | ||
4063 | if (skip_ioapic_setup == 1) | 4074 | if (skip_ioapic_setup == 1) |
4064 | return; | 4075 | return; |
4065 | 4076 | ||
4066 | for (ioapic = 0; ioapic < nr_ioapics; ioapic++) { | 4077 | #ifdef CONFIG_ACPI |
4067 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { | 4078 | if (!acpi_disabled && acpi_ioapic) { |
4068 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); | 4079 | ioapic = mp_find_ioapic(0); |
4069 | if (irq_entry == -1) | 4080 | if (ioapic < 0) |
4070 | continue; | 4081 | ioapic = 0; |
4071 | irq = pin_2_irq(irq_entry, ioapic, pin); | 4082 | } |
4072 | 4083 | #endif | |
4073 | /* setup_IO_APIC_irqs could fail to get vector for some device | ||
4074 | * when you have too many devices, because at that time only boot | ||
4075 | * cpu is online. | ||
4076 | */ | ||
4077 | desc = irq_to_desc(irq); | ||
4078 | cfg = desc->chip_data; | ||
4079 | if (!cfg->vector) { | ||
4080 | setup_IO_APIC_irq(ioapic, pin, irq, desc, | ||
4081 | irq_trigger(irq_entry), | ||
4082 | irq_polarity(irq_entry)); | ||
4083 | continue; | ||
4084 | 4084 | ||
4085 | } | 4085 | for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) { |
4086 | irq_entry = find_irq_entry(ioapic, pin, mp_INT); | ||
4087 | if (irq_entry == -1) | ||
4088 | continue; | ||
4089 | irq = pin_2_irq(irq_entry, ioapic, pin); | ||
4086 | 4090 | ||
4087 | /* | 4091 | desc = irq_to_desc(irq); |
4088 | * Honour affinities which have been set in early boot | ||
4089 | */ | ||
4090 | if (desc->status & | ||
4091 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) | ||
4092 | mask = desc->affinity; | ||
4093 | else | ||
4094 | mask = apic->target_cpus(); | ||
4095 | 4092 | ||
4096 | if (intr_remapping_enabled) | 4093 | /* |
4097 | set_ir_ioapic_affinity_irq_desc(desc, mask); | 4094 | * Honour affinities which have been set in early boot |
4098 | else | 4095 | */ |
4099 | set_ioapic_affinity_irq_desc(desc, mask); | 4096 | if (desc->status & |
4100 | } | 4097 | (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) |
4098 | mask = desc->affinity; | ||
4099 | else | ||
4100 | mask = apic->target_cpus(); | ||
4101 | 4101 | ||
4102 | if (intr_remapping_enabled) | ||
4103 | set_ir_ioapic_affinity_irq_desc(desc, mask); | ||
4104 | else | ||
4105 | set_ioapic_affinity_irq_desc(desc, mask); | ||
4102 | } | 4106 | } |
4107 | |||
4103 | } | 4108 | } |
4104 | #endif | 4109 | #endif |
4105 | 4110 | ||
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index ce4fbfa315a1..b3025b43b63a 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu) | |||
66 | 66 | ||
67 | static inline int mce_in_progress(void) | 67 | static inline int mce_in_progress(void) |
68 | { | 68 | { |
69 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) | 69 | #if defined(CONFIG_X86_NEW_MCE) |
70 | return atomic_read(&mce_entry) > 0; | 70 | return atomic_read(&mce_entry) > 0; |
71 | #endif | 71 | #endif |
72 | return 0; | 72 | return 0; |
@@ -104,7 +104,7 @@ static __init void nmi_cpu_busy(void *data) | |||
104 | } | 104 | } |
105 | #endif | 105 | #endif |
106 | 106 | ||
107 | static void report_broken_nmi(int cpu, int *prev_nmi_count) | 107 | static void report_broken_nmi(int cpu, unsigned int *prev_nmi_count) |
108 | { | 108 | { |
109 | printk(KERN_CONT "\n"); | 109 | printk(KERN_CONT "\n"); |
110 | 110 | ||
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 01eda2ac65e4..0c0182cc947d 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -20,23 +20,12 @@ | |||
20 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
21 | #include <asm/setup.h> | 21 | #include <asm/setup.h> |
22 | 22 | ||
23 | #include <linux/threads.h> | ||
24 | #include <linux/cpumask.h> | ||
25 | #include <asm/mpspec.h> | ||
26 | #include <asm/fixmap.h> | ||
27 | #include <asm/apicdef.h> | ||
28 | #include <linux/kernel.h> | ||
29 | #include <linux/string.h> | ||
30 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
31 | #include <linux/init.h> | ||
32 | #include <asm/ipi.h> | 24 | #include <asm/ipi.h> |
33 | 25 | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/init.h> | ||
36 | #include <linux/interrupt.h> | 26 | #include <linux/interrupt.h> |
37 | #include <asm/acpi.h> | 27 | #include <asm/acpi.h> |
38 | #include <asm/e820.h> | 28 | #include <asm/e820.h> |
39 | #include <asm/setup.h> | ||
40 | 29 | ||
41 | #ifdef CONFIG_HOTPLUG_CPU | 30 | #ifdef CONFIG_HOTPLUG_CPU |
42 | #define DEFAULT_SEND_IPI (1) | 31 | #define DEFAULT_SEND_IPI (1) |
@@ -160,7 +149,6 @@ extern struct apic apic_summit; | |||
160 | extern struct apic apic_bigsmp; | 149 | extern struct apic apic_bigsmp; |
161 | extern struct apic apic_es7000; | 150 | extern struct apic apic_es7000; |
162 | extern struct apic apic_es7000_cluster; | 151 | extern struct apic apic_es7000_cluster; |
163 | extern struct apic apic_default; | ||
164 | 152 | ||
165 | struct apic *apic = &apic_default; | 153 | struct apic *apic = &apic_default; |
166 | EXPORT_SYMBOL_GPL(apic); | 154 | EXPORT_SYMBOL_GPL(apic); |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 1783652bb0e5..bc3e880f9b82 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -50,7 +50,7 @@ static struct apic *apic_probe[] __initdata = { | |||
50 | void __init default_setup_apic_routing(void) | 50 | void __init default_setup_apic_routing(void) |
51 | { | 51 | { |
52 | #ifdef CONFIG_X86_X2APIC | 52 | #ifdef CONFIG_X86_X2APIC |
53 | if (x2apic && (apic != &apic_x2apic_phys && | 53 | if (x2apic_mode && (apic != &apic_x2apic_phys && |
54 | #ifdef CONFIG_X86_UV | 54 | #ifdef CONFIG_X86_UV |
55 | apic != &apic_x2apic_uv_x && | 55 | apic != &apic_x2apic_uv_x && |
56 | #endif | 56 | #endif |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index 9cfe1f415d81..eafdfbd1ea95 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <asm/ipi.h> | 44 | #include <asm/ipi.h> |
45 | #include <linux/kernel.h> | 45 | #include <linux/kernel.h> |
46 | #include <linux/string.h> | 46 | #include <linux/string.h> |
47 | #include <linux/init.h> | ||
48 | #include <linux/gfp.h> | 47 | #include <linux/gfp.h> |
49 | #include <linux/smp.h> | 48 | #include <linux/smp.h> |
50 | 49 | ||
@@ -173,13 +172,6 @@ static inline int is_WPEG(struct rio_detail *rio){ | |||
173 | rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); | 172 | rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); |
174 | } | 173 | } |
175 | 174 | ||
176 | |||
177 | /* In clustered mode, the high nibble of APIC ID is a cluster number. | ||
178 | * The low nibble is a 4-bit bitmap. */ | ||
179 | #define XAPIC_DEST_CPUS_SHIFT 4 | ||
180 | #define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1) | ||
181 | #define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT) | ||
182 | |||
183 | #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | 175 | #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) |
184 | 176 | ||
185 | static const struct cpumask *summit_target_cpus(void) | 177 | static const struct cpumask *summit_target_cpus(void) |
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 4a903e2f0d17..8e4cbb255c38 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -10,7 +10,7 @@ | |||
10 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
11 | #include <asm/ipi.h> | 11 | #include <asm/ipi.h> |
12 | 12 | ||
13 | DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); | 13 | static DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); |
14 | 14 | ||
15 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | 15 | static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) |
16 | { | 16 | { |
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 2bda69352976..096d19aea2f7 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -105,7 +105,7 @@ static void uv_vector_allocation_domain(int cpu, struct cpumask *retmask) | |||
105 | cpumask_set_cpu(cpu, retmask); | 105 | cpumask_set_cpu(cpu, retmask); |
106 | } | 106 | } |
107 | 107 | ||
108 | static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) | 108 | static int __cpuinit uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) |
109 | { | 109 | { |
110 | #ifdef CONFIG_SMP | 110 | #ifdef CONFIG_SMP |
111 | unsigned long val; | 111 | unsigned long val; |
@@ -463,7 +463,7 @@ static void uv_heartbeat(unsigned long ignored) | |||
463 | uv_set_scir_bits(bits); | 463 | uv_set_scir_bits(bits); |
464 | 464 | ||
465 | /* enable next timer period */ | 465 | /* enable next timer period */ |
466 | mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); | 466 | mod_timer_pinned(timer, jiffies + SCIR_CPU_HB_INTERVAL); |
467 | } | 467 | } |
468 | 468 | ||
469 | static void __cpuinit uv_heartbeat_enable(int cpu) | 469 | static void __cpuinit uv_heartbeat_enable(int cpu) |
@@ -562,7 +562,7 @@ void __init uv_system_init(void) | |||
562 | union uvh_node_id_u node_id; | 562 | union uvh_node_id_u node_id; |
563 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; | 563 | unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; |
564 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; | 564 | int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; |
565 | int max_pnode = 0; | 565 | int gnode_extra, max_pnode = 0; |
566 | unsigned long mmr_base, present, paddr; | 566 | unsigned long mmr_base, present, paddr; |
567 | unsigned short pnode_mask; | 567 | unsigned short pnode_mask; |
568 | 568 | ||
@@ -574,6 +574,13 @@ void __init uv_system_init(void) | |||
574 | mmr_base = | 574 | mmr_base = |
575 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & | 575 | uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & |
576 | ~UV_MMR_ENABLE; | 576 | ~UV_MMR_ENABLE; |
577 | pnode_mask = (1 << n_val) - 1; | ||
578 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); | ||
579 | gnode_extra = (node_id.s.node_id & ~((1 << n_val) - 1)) >> 1; | ||
580 | gnode_upper = ((unsigned long)gnode_extra << m_val); | ||
581 | printk(KERN_DEBUG "UV: N %d, M %d, gnode_upper 0x%lx, gnode_extra 0x%x\n", | ||
582 | n_val, m_val, gnode_upper, gnode_extra); | ||
583 | |||
577 | printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); | 584 | printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); |
578 | 585 | ||
579 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) | 586 | for(i = 0; i < UVH_NODE_PRESENT_TABLE_DEPTH; i++) |
@@ -583,15 +590,18 @@ void __init uv_system_init(void) | |||
583 | 590 | ||
584 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); | 591 | bytes = sizeof(struct uv_blade_info) * uv_num_possible_blades(); |
585 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); | 592 | uv_blade_info = kmalloc(bytes, GFP_KERNEL); |
593 | BUG_ON(!uv_blade_info); | ||
586 | 594 | ||
587 | get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); | 595 | get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); |
588 | 596 | ||
589 | bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); | 597 | bytes = sizeof(uv_node_to_blade[0]) * num_possible_nodes(); |
590 | uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); | 598 | uv_node_to_blade = kmalloc(bytes, GFP_KERNEL); |
599 | BUG_ON(!uv_node_to_blade); | ||
591 | memset(uv_node_to_blade, 255, bytes); | 600 | memset(uv_node_to_blade, 255, bytes); |
592 | 601 | ||
593 | bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); | 602 | bytes = sizeof(uv_cpu_to_blade[0]) * num_possible_cpus(); |
594 | uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); | 603 | uv_cpu_to_blade = kmalloc(bytes, GFP_KERNEL); |
604 | BUG_ON(!uv_cpu_to_blade); | ||
595 | memset(uv_cpu_to_blade, 255, bytes); | 605 | memset(uv_cpu_to_blade, 255, bytes); |
596 | 606 | ||
597 | blade = 0; | 607 | blade = 0; |
@@ -607,11 +617,6 @@ void __init uv_system_init(void) | |||
607 | } | 617 | } |
608 | } | 618 | } |
609 | 619 | ||
610 | pnode_mask = (1 << n_val) - 1; | ||
611 | node_id.v = uv_read_local_mmr(UVH_NODE_ID); | ||
612 | gnode_upper = (((unsigned long)node_id.s.node_id) & | ||
613 | ~((1 << n_val) - 1)) << m_val; | ||
614 | |||
615 | uv_bios_init(); | 620 | uv_bios_init(); |
616 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, | 621 | uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, |
617 | &sn_coherency_id, &sn_region_size); | 622 | &sn_coherency_id, &sn_region_size); |
@@ -634,6 +639,7 @@ void __init uv_system_init(void) | |||
634 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; | 639 | uv_cpu_hub_info(cpu)->pnode_mask = pnode_mask; |
635 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; | 640 | uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; |
636 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; | 641 | uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; |
642 | uv_cpu_hub_info(cpu)->gnode_extra = gnode_extra; | ||
637 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; | 643 | uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; |
638 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; | 644 | uv_cpu_hub_info(cpu)->coherency_domain_number = sn_coherency_id; |
639 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; | 645 | uv_cpu_hub_info(cpu)->scir.offset = SCIR_LOCAL_MMR_BASE + lcpu; |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 49e0939bac42..79302e9a33a4 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -1233,9 +1233,9 @@ static int suspend(int vetoable) | |||
1233 | int err; | 1233 | int err; |
1234 | struct apm_user *as; | 1234 | struct apm_user *as; |
1235 | 1235 | ||
1236 | device_suspend(PMSG_SUSPEND); | 1236 | dpm_suspend_start(PMSG_SUSPEND); |
1237 | 1237 | ||
1238 | device_power_down(PMSG_SUSPEND); | 1238 | dpm_suspend_noirq(PMSG_SUSPEND); |
1239 | 1239 | ||
1240 | local_irq_disable(); | 1240 | local_irq_disable(); |
1241 | sysdev_suspend(PMSG_SUSPEND); | 1241 | sysdev_suspend(PMSG_SUSPEND); |
@@ -1259,9 +1259,9 @@ static int suspend(int vetoable) | |||
1259 | sysdev_resume(); | 1259 | sysdev_resume(); |
1260 | local_irq_enable(); | 1260 | local_irq_enable(); |
1261 | 1261 | ||
1262 | device_power_up(PMSG_RESUME); | 1262 | dpm_resume_noirq(PMSG_RESUME); |
1263 | 1263 | ||
1264 | device_resume(PMSG_RESUME); | 1264 | dpm_resume_end(PMSG_RESUME); |
1265 | queue_event(APM_NORMAL_RESUME, NULL); | 1265 | queue_event(APM_NORMAL_RESUME, NULL); |
1266 | spin_lock(&user_list_lock); | 1266 | spin_lock(&user_list_lock); |
1267 | for (as = user_list; as != NULL; as = as->next) { | 1267 | for (as = user_list; as != NULL; as = as->next) { |
@@ -1277,7 +1277,7 @@ static void standby(void) | |||
1277 | { | 1277 | { |
1278 | int err; | 1278 | int err; |
1279 | 1279 | ||
1280 | device_power_down(PMSG_SUSPEND); | 1280 | dpm_suspend_noirq(PMSG_SUSPEND); |
1281 | 1281 | ||
1282 | local_irq_disable(); | 1282 | local_irq_disable(); |
1283 | sysdev_suspend(PMSG_SUSPEND); | 1283 | sysdev_suspend(PMSG_SUSPEND); |
@@ -1291,7 +1291,7 @@ static void standby(void) | |||
1291 | sysdev_resume(); | 1291 | sysdev_resume(); |
1292 | local_irq_enable(); | 1292 | local_irq_enable(); |
1293 | 1293 | ||
1294 | device_power_up(PMSG_RESUME); | 1294 | dpm_resume_noirq(PMSG_RESUME); |
1295 | } | 1295 | } |
1296 | 1296 | ||
1297 | static apm_event_t get_event(void) | 1297 | static apm_event_t get_event(void) |
@@ -1376,7 +1376,7 @@ static void check_events(void) | |||
1376 | ignore_bounce = 1; | 1376 | ignore_bounce = 1; |
1377 | if ((event != APM_NORMAL_RESUME) | 1377 | if ((event != APM_NORMAL_RESUME) |
1378 | || (ignore_normal_resume == 0)) { | 1378 | || (ignore_normal_resume == 0)) { |
1379 | device_resume(PMSG_RESUME); | 1379 | dpm_resume_end(PMSG_RESUME); |
1380 | queue_event(event, NULL); | 1380 | queue_event(event, NULL); |
1381 | } | 1381 | } |
1382 | ignore_normal_resume = 0; | 1382 | ignore_normal_resume = 0; |
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c index 5a6aa1c1162f..dfdbf6403895 100644 --- a/arch/x86/kernel/asm-offsets_32.c +++ b/arch/x86/kernel/asm-offsets_32.c | |||
@@ -126,6 +126,7 @@ void foo(void) | |||
126 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) | 126 | #if defined(CONFIG_LGUEST) || defined(CONFIG_LGUEST_GUEST) || defined(CONFIG_LGUEST_MODULE) |
127 | BLANK(); | 127 | BLANK(); |
128 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); | 128 | OFFSET(LGUEST_DATA_irq_enabled, lguest_data, irq_enabled); |
129 | OFFSET(LGUEST_DATA_irq_pending, lguest_data, irq_pending); | ||
129 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); | 130 | OFFSET(LGUEST_DATA_pgdir, lguest_data, pgdir); |
130 | 131 | ||
131 | BLANK(); | 132 | BLANK(); |
@@ -146,4 +147,5 @@ void foo(void) | |||
146 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | 147 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); |
147 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 148 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
148 | OFFSET(BP_version, boot_params, hdr.version); | 149 | OFFSET(BP_version, boot_params, hdr.version); |
150 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | ||
149 | } | 151 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index e72f062fb4b5..898ecc47e129 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -125,6 +125,7 @@ int main(void) | |||
125 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | 125 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); |
126 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 126 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
127 | OFFSET(BP_version, boot_params, hdr.version); | 127 | OFFSET(BP_version, boot_params, hdr.version); |
128 | OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment); | ||
128 | 129 | ||
129 | BLANK(); | 130 | BLANK(); |
130 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 131 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4e242f9a06e4..3efcb2b96a15 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | # | 1 | # |
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details, features and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | # Don't trace early stages of a secondary CPU boot | 5 | # Don't trace early stages of a secondary CPU boot |
@@ -23,11 +23,13 @@ obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o | |||
23 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o | 23 | obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o |
24 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o | 24 | obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o |
25 | 25 | ||
26 | obj-$(CONFIG_X86_MCE) += mcheck/ | 26 | obj-$(CONFIG_PERF_COUNTERS) += perf_counter.o |
27 | obj-$(CONFIG_MTRR) += mtrr/ | ||
28 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
29 | 27 | ||
30 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | 28 | obj-$(CONFIG_X86_MCE) += mcheck/ |
29 | obj-$(CONFIG_MTRR) += mtrr/ | ||
30 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | ||
31 | |||
32 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | ||
31 | 33 | ||
32 | quiet_cmd_mkcapflags = MKCAP $@ | 34 | quiet_cmd_mkcapflags = MKCAP $@ |
33 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ | 35 | cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@ |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 7e4a459daa64..e5b27d8f1b47 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
7 | #include <asm/apic.h> | 7 | #include <asm/apic.h> |
8 | #include <asm/cpu.h> | 8 | #include <asm/cpu.h> |
9 | #include <asm/pci-direct.h> | ||
9 | 10 | ||
10 | #ifdef CONFIG_X86_64 | 11 | #ifdef CONFIG_X86_64 |
11 | # include <asm/numa_64.h> | 12 | # include <asm/numa_64.h> |
@@ -272,7 +273,7 @@ static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | |||
272 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 273 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
273 | int cpu = smp_processor_id(); | 274 | int cpu = smp_processor_id(); |
274 | int node; | 275 | int node; |
275 | unsigned apicid = hard_smp_processor_id(); | 276 | unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; |
276 | 277 | ||
277 | node = c->phys_proc_id; | 278 | node = c->phys_proc_id; |
278 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | 279 | if (apicid_to_node[apicid] != NUMA_NO_NODE) |
@@ -351,6 +352,15 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
351 | (c->x86_model == 8 && c->x86_mask >= 8)) | 352 | (c->x86_model == 8 && c->x86_mask >= 8)) |
352 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | 353 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); |
353 | #endif | 354 | #endif |
355 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_PCI) | ||
356 | /* check CPU config space for extended APIC ID */ | ||
357 | if (c->x86 >= 0xf) { | ||
358 | unsigned int val; | ||
359 | val = read_pci_config(0, 24, 0, 0x68); | ||
360 | if ((val & ((1 << 17) | (1 << 18))) == ((1 << 17) | (1 << 18))) | ||
361 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | ||
362 | } | ||
363 | #endif | ||
354 | } | 364 | } |
355 | 365 | ||
356 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 366 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 77848d9fca68..6b26d4deada0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | 14 | ||
15 | #include <asm/stackprotector.h> | 15 | #include <asm/stackprotector.h> |
16 | #include <asm/perf_counter.h> | ||
16 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
17 | #include <asm/hypervisor.h> | 18 | #include <asm/hypervisor.h> |
18 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
@@ -107,7 +108,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
107 | /* data */ | 108 | /* data */ |
108 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | 109 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, |
109 | 110 | ||
110 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, | 111 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, |
111 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, | 112 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, |
112 | GDT_STACK_CANARY_INIT | 113 | GDT_STACK_CANARY_INIT |
113 | #endif | 114 | #endif |
@@ -299,7 +300,8 @@ static const char *__cpuinit table_lookup_model(struct cpuinfo_x86 *c) | |||
299 | return NULL; /* Not found */ | 300 | return NULL; /* Not found */ |
300 | } | 301 | } |
301 | 302 | ||
302 | __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; | 303 | __u32 cpu_caps_cleared[NCAPINTS] __cpuinitdata; |
304 | __u32 cpu_caps_set[NCAPINTS] __cpuinitdata; | ||
303 | 305 | ||
304 | void load_percpu_segment(int cpu) | 306 | void load_percpu_segment(int cpu) |
305 | { | 307 | { |
@@ -485,7 +487,6 @@ out: | |||
485 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | 487 | static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) |
486 | { | 488 | { |
487 | char *v = c->x86_vendor_id; | 489 | char *v = c->x86_vendor_id; |
488 | static int printed; | ||
489 | int i; | 490 | int i; |
490 | 491 | ||
491 | for (i = 0; i < X86_VENDOR_NUM; i++) { | 492 | for (i = 0; i < X86_VENDOR_NUM; i++) { |
@@ -502,13 +503,9 @@ static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c) | |||
502 | } | 503 | } |
503 | } | 504 | } |
504 | 505 | ||
505 | if (!printed) { | 506 | printk_once(KERN_ERR |
506 | printed++; | 507 | "CPU: vendor_id '%s' unknown, using generic init.\n" \ |
507 | printk(KERN_ERR | 508 | "CPU: Your system may be unstable.\n", v); |
508 | "CPU: vendor_id '%s' unknown, using generic init.\n", v); | ||
509 | |||
510 | printk(KERN_ERR "CPU: Your system may be unstable.\n"); | ||
511 | } | ||
512 | 509 | ||
513 | c->x86_vendor = X86_VENDOR_UNKNOWN; | 510 | c->x86_vendor = X86_VENDOR_UNKNOWN; |
514 | this_cpu = &default_cpu; | 511 | this_cpu = &default_cpu; |
@@ -768,6 +765,12 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
768 | if (this_cpu->c_identify) | 765 | if (this_cpu->c_identify) |
769 | this_cpu->c_identify(c); | 766 | this_cpu->c_identify(c); |
770 | 767 | ||
768 | /* Clear/Set all flags overriden by options, after probe */ | ||
769 | for (i = 0; i < NCAPINTS; i++) { | ||
770 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; | ||
771 | c->x86_capability[i] |= cpu_caps_set[i]; | ||
772 | } | ||
773 | |||
771 | #ifdef CONFIG_X86_64 | 774 | #ifdef CONFIG_X86_64 |
772 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); | 775 | c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); |
773 | #endif | 776 | #endif |
@@ -813,6 +816,16 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
813 | #endif | 816 | #endif |
814 | 817 | ||
815 | init_hypervisor(c); | 818 | init_hypervisor(c); |
819 | |||
820 | /* | ||
821 | * Clear/Set all flags overriden by options, need do it | ||
822 | * before following smp all cpus cap AND. | ||
823 | */ | ||
824 | for (i = 0; i < NCAPINTS; i++) { | ||
825 | c->x86_capability[i] &= ~cpu_caps_cleared[i]; | ||
826 | c->x86_capability[i] |= cpu_caps_set[i]; | ||
827 | } | ||
828 | |||
816 | /* | 829 | /* |
817 | * On SMP, boot_cpu_data holds the common feature set between | 830 | * On SMP, boot_cpu_data holds the common feature set between |
818 | * all CPUs; so make sure that we indicate which features are | 831 | * all CPUs; so make sure that we indicate which features are |
@@ -825,10 +838,6 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
825 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 838 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
826 | } | 839 | } |
827 | 840 | ||
828 | /* Clear all flags overriden by options */ | ||
829 | for (i = 0; i < NCAPINTS; i++) | ||
830 | c->x86_capability[i] &= ~cleared_cpu_caps[i]; | ||
831 | |||
832 | #ifdef CONFIG_X86_MCE | 841 | #ifdef CONFIG_X86_MCE |
833 | /* Init Machine Check Exception if available. */ | 842 | /* Init Machine Check Exception if available. */ |
834 | mcheck_init(c); | 843 | mcheck_init(c); |
@@ -839,6 +848,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
839 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 848 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
840 | numa_add_cpu(smp_processor_id()); | 849 | numa_add_cpu(smp_processor_id()); |
841 | #endif | 850 | #endif |
851 | |||
852 | /* Cap the iomem address space to what is addressable on all CPUs */ | ||
853 | iomem_resource.end &= (1ULL << c->x86_phys_bits) - 1; | ||
842 | } | 854 | } |
843 | 855 | ||
844 | #ifdef CONFIG_X86_64 | 856 | #ifdef CONFIG_X86_64 |
@@ -861,6 +873,7 @@ void __init identify_boot_cpu(void) | |||
861 | #else | 873 | #else |
862 | vgetcpu_set_mode(); | 874 | vgetcpu_set_mode(); |
863 | #endif | 875 | #endif |
876 | init_hw_perf_counters(); | ||
864 | } | 877 | } |
865 | 878 | ||
866 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) | 879 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 46e29ab96c6a..6b2a52dd0403 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c | |||
@@ -32,9 +32,7 @@ | |||
32 | 32 | ||
33 | static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); | 33 | static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); |
34 | static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); | 34 | static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); |
35 | static DEFINE_PER_CPU(unsigned, cpu_modelflag); | ||
36 | static DEFINE_PER_CPU(int, cpu_priv_count); | 35 | static DEFINE_PER_CPU(int, cpu_priv_count); |
37 | static DEFINE_PER_CPU(unsigned, cpu_model); | ||
38 | 36 | ||
39 | static DEFINE_MUTEX(cpu_debug_lock); | 37 | static DEFINE_MUTEX(cpu_debug_lock); |
40 | 38 | ||
@@ -80,302 +78,102 @@ static struct cpu_file_base cpu_file[] = { | |||
80 | { "value", CPU_REG_ALL, 1 }, | 78 | { "value", CPU_REG_ALL, 1 }, |
81 | }; | 79 | }; |
82 | 80 | ||
83 | /* Intel Registers Range */ | 81 | /* CPU Registers Range */ |
84 | static struct cpu_debug_range cpu_intel_range[] = { | 82 | static struct cpu_debug_range cpu_reg_range[] = { |
85 | { 0x00000000, 0x00000001, CPU_MC, CPU_INTEL_ALL }, | 83 | { 0x00000000, 0x00000001, CPU_MC, }, |
86 | { 0x00000006, 0x00000007, CPU_MONITOR, CPU_CX_AT_XE }, | 84 | { 0x00000006, 0x00000007, CPU_MONITOR, }, |
87 | { 0x00000010, 0x00000010, CPU_TIME, CPU_INTEL_ALL }, | 85 | { 0x00000010, 0x00000010, CPU_TIME, }, |
88 | { 0x00000011, 0x00000013, CPU_PMC, CPU_INTEL_PENTIUM }, | 86 | { 0x00000011, 0x00000013, CPU_PMC, }, |
89 | { 0x00000017, 0x00000017, CPU_PLATFORM, CPU_PX_CX_AT_XE }, | 87 | { 0x00000017, 0x00000017, CPU_PLATFORM, }, |
90 | { 0x0000001B, 0x0000001B, CPU_APIC, CPU_P6_CX_AT_XE }, | 88 | { 0x0000001B, 0x0000001B, CPU_APIC, }, |
91 | 89 | { 0x0000002A, 0x0000002B, CPU_POWERON, }, | |
92 | { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_PX_CX_AT_XE }, | 90 | { 0x0000002C, 0x0000002C, CPU_FREQ, }, |
93 | { 0x0000002B, 0x0000002B, CPU_POWERON, CPU_INTEL_XEON }, | 91 | { 0x0000003A, 0x0000003A, CPU_CONTROL, }, |
94 | { 0x0000002C, 0x0000002C, CPU_FREQ, CPU_INTEL_XEON }, | 92 | { 0x00000040, 0x00000047, CPU_LBRANCH, }, |
95 | { 0x0000003A, 0x0000003A, CPU_CONTROL, CPU_CX_AT_XE }, | 93 | { 0x00000060, 0x00000067, CPU_LBRANCH, }, |
96 | 94 | { 0x00000079, 0x00000079, CPU_BIOS, }, | |
97 | { 0x00000040, 0x00000043, CPU_LBRANCH, CPU_PM_CX_AT_XE }, | 95 | { 0x00000088, 0x0000008A, CPU_CACHE, }, |
98 | { 0x00000044, 0x00000047, CPU_LBRANCH, CPU_PM_CO_AT }, | 96 | { 0x0000008B, 0x0000008B, CPU_BIOS, }, |
99 | { 0x00000060, 0x00000063, CPU_LBRANCH, CPU_C2_AT }, | 97 | { 0x0000009B, 0x0000009B, CPU_MONITOR, }, |
100 | { 0x00000064, 0x00000067, CPU_LBRANCH, CPU_INTEL_ATOM }, | 98 | { 0x000000C1, 0x000000C4, CPU_PMC, }, |
101 | 99 | { 0x000000CD, 0x000000CD, CPU_FREQ, }, | |
102 | { 0x00000079, 0x00000079, CPU_BIOS, CPU_P6_CX_AT_XE }, | 100 | { 0x000000E7, 0x000000E8, CPU_PERF, }, |
103 | { 0x00000088, 0x0000008A, CPU_CACHE, CPU_INTEL_P6 }, | 101 | { 0x000000FE, 0x000000FE, CPU_MTRR, }, |
104 | { 0x0000008B, 0x0000008B, CPU_BIOS, CPU_P6_CX_AT_XE }, | 102 | |
105 | { 0x0000009B, 0x0000009B, CPU_MONITOR, CPU_INTEL_XEON }, | 103 | { 0x00000116, 0x0000011E, CPU_CACHE, }, |
106 | 104 | { 0x00000174, 0x00000176, CPU_SYSENTER, }, | |
107 | { 0x000000C1, 0x000000C2, CPU_PMC, CPU_P6_CX_AT }, | 105 | { 0x00000179, 0x0000017B, CPU_MC, }, |
108 | { 0x000000CD, 0x000000CD, CPU_FREQ, CPU_CX_AT }, | 106 | { 0x00000186, 0x00000189, CPU_PMC, }, |
109 | { 0x000000E7, 0x000000E8, CPU_PERF, CPU_CX_AT }, | 107 | { 0x00000198, 0x00000199, CPU_PERF, }, |
110 | { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_P6_CX_XE }, | 108 | { 0x0000019A, 0x0000019A, CPU_TIME, }, |
111 | 109 | { 0x0000019B, 0x0000019D, CPU_THERM, }, | |
112 | { 0x00000116, 0x00000116, CPU_CACHE, CPU_INTEL_P6 }, | 110 | { 0x000001A0, 0x000001A0, CPU_MISC, }, |
113 | { 0x00000118, 0x00000118, CPU_CACHE, CPU_INTEL_P6 }, | 111 | { 0x000001C9, 0x000001C9, CPU_LBRANCH, }, |
114 | { 0x00000119, 0x00000119, CPU_CACHE, CPU_INTEL_PX }, | 112 | { 0x000001D7, 0x000001D8, CPU_LBRANCH, }, |
115 | { 0x0000011A, 0x0000011B, CPU_CACHE, CPU_INTEL_P6 }, | 113 | { 0x000001D9, 0x000001D9, CPU_DEBUG, }, |
116 | { 0x0000011E, 0x0000011E, CPU_CACHE, CPU_PX_CX_AT }, | 114 | { 0x000001DA, 0x000001E0, CPU_LBRANCH, }, |
117 | 115 | ||
118 | { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_P6_CX_AT_XE }, | 116 | { 0x00000200, 0x0000020F, CPU_MTRR, }, |
119 | { 0x00000179, 0x0000017A, CPU_MC, CPU_PX_CX_AT_XE }, | 117 | { 0x00000250, 0x00000250, CPU_MTRR, }, |
120 | { 0x0000017B, 0x0000017B, CPU_MC, CPU_P6_XE }, | 118 | { 0x00000258, 0x00000259, CPU_MTRR, }, |
121 | { 0x00000186, 0x00000187, CPU_PMC, CPU_P6_CX_AT }, | 119 | { 0x00000268, 0x0000026F, CPU_MTRR, }, |
122 | { 0x00000198, 0x00000199, CPU_PERF, CPU_PM_CX_AT_XE }, | 120 | { 0x00000277, 0x00000277, CPU_PAT, }, |
123 | { 0x0000019A, 0x0000019A, CPU_TIME, CPU_PM_CX_AT_XE }, | 121 | { 0x000002FF, 0x000002FF, CPU_MTRR, }, |
124 | { 0x0000019B, 0x0000019D, CPU_THERM, CPU_PM_CX_AT_XE }, | 122 | |
125 | { 0x000001A0, 0x000001A0, CPU_MISC, CPU_PM_CX_AT_XE }, | 123 | { 0x00000300, 0x00000311, CPU_PMC, }, |
126 | 124 | { 0x00000345, 0x00000345, CPU_PMC, }, | |
127 | { 0x000001C9, 0x000001C9, CPU_LBRANCH, CPU_PM_CX_AT }, | 125 | { 0x00000360, 0x00000371, CPU_PMC, }, |
128 | { 0x000001D7, 0x000001D8, CPU_LBRANCH, CPU_INTEL_XEON }, | 126 | { 0x0000038D, 0x00000390, CPU_PMC, }, |
129 | { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_CX_AT_XE }, | 127 | { 0x000003A0, 0x000003BE, CPU_PMC, }, |
130 | { 0x000001DA, 0x000001DA, CPU_LBRANCH, CPU_INTEL_XEON }, | 128 | { 0x000003C0, 0x000003CD, CPU_PMC, }, |
131 | { 0x000001DB, 0x000001DB, CPU_LBRANCH, CPU_P6_XE }, | 129 | { 0x000003E0, 0x000003E1, CPU_PMC, }, |
132 | { 0x000001DC, 0x000001DC, CPU_LBRANCH, CPU_INTEL_P6 }, | 130 | { 0x000003F0, 0x000003F2, CPU_PMC, }, |
133 | { 0x000001DD, 0x000001DE, CPU_LBRANCH, CPU_PX_CX_AT_XE }, | 131 | |
134 | { 0x000001E0, 0x000001E0, CPU_LBRANCH, CPU_INTEL_P6 }, | 132 | { 0x00000400, 0x00000417, CPU_MC, }, |
135 | 133 | { 0x00000480, 0x0000048B, CPU_VMX, }, | |
136 | { 0x00000200, 0x0000020F, CPU_MTRR, CPU_P6_CX_XE }, | 134 | |
137 | { 0x00000250, 0x00000250, CPU_MTRR, CPU_P6_CX_XE }, | 135 | { 0x00000600, 0x00000600, CPU_DEBUG, }, |
138 | { 0x00000258, 0x00000259, CPU_MTRR, CPU_P6_CX_XE }, | 136 | { 0x00000680, 0x0000068F, CPU_LBRANCH, }, |
139 | { 0x00000268, 0x0000026F, CPU_MTRR, CPU_P6_CX_XE }, | 137 | { 0x000006C0, 0x000006CF, CPU_LBRANCH, }, |
140 | { 0x00000277, 0x00000277, CPU_PAT, CPU_C2_AT_XE }, | 138 | |
141 | { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_P6_CX_XE }, | 139 | { 0x000107CC, 0x000107D3, CPU_PMC, }, |
142 | 140 | ||
143 | { 0x00000300, 0x00000308, CPU_PMC, CPU_INTEL_XEON }, | 141 | { 0xC0000080, 0xC0000080, CPU_FEATURES, }, |
144 | { 0x00000309, 0x0000030B, CPU_PMC, CPU_C2_AT_XE }, | 142 | { 0xC0000081, 0xC0000084, CPU_CALL, }, |
145 | { 0x0000030C, 0x00000311, CPU_PMC, CPU_INTEL_XEON }, | 143 | { 0xC0000100, 0xC0000102, CPU_BASE, }, |
146 | { 0x00000345, 0x00000345, CPU_PMC, CPU_C2_AT }, | 144 | { 0xC0000103, 0xC0000103, CPU_TIME, }, |
147 | { 0x00000360, 0x00000371, CPU_PMC, CPU_INTEL_XEON }, | 145 | |
148 | { 0x0000038D, 0x00000390, CPU_PMC, CPU_C2_AT }, | 146 | { 0xC0010000, 0xC0010007, CPU_PMC, }, |
149 | { 0x000003A0, 0x000003BE, CPU_PMC, CPU_INTEL_XEON }, | 147 | { 0xC0010010, 0xC0010010, CPU_CONF, }, |
150 | { 0x000003C0, 0x000003CD, CPU_PMC, CPU_INTEL_XEON }, | 148 | { 0xC0010015, 0xC0010015, CPU_CONF, }, |
151 | { 0x000003E0, 0x000003E1, CPU_PMC, CPU_INTEL_XEON }, | 149 | { 0xC0010016, 0xC001001A, CPU_MTRR, }, |
152 | { 0x000003F0, 0x000003F0, CPU_PMC, CPU_INTEL_XEON }, | 150 | { 0xC001001D, 0xC001001D, CPU_MTRR, }, |
153 | { 0x000003F1, 0x000003F1, CPU_PMC, CPU_C2_AT_XE }, | 151 | { 0xC001001F, 0xC001001F, CPU_CONF, }, |
154 | { 0x000003F2, 0x000003F2, CPU_PMC, CPU_INTEL_XEON }, | 152 | { 0xC0010030, 0xC0010035, CPU_BIOS, }, |
155 | 153 | { 0xC0010044, 0xC0010048, CPU_MC, }, | |
156 | { 0x00000400, 0x00000402, CPU_MC, CPU_PM_CX_AT_XE }, | 154 | { 0xC0010050, 0xC0010056, CPU_SMM, }, |
157 | { 0x00000403, 0x00000403, CPU_MC, CPU_INTEL_XEON }, | 155 | { 0xC0010058, 0xC0010058, CPU_CONF, }, |
158 | { 0x00000404, 0x00000406, CPU_MC, CPU_PM_CX_AT_XE }, | 156 | { 0xC0010060, 0xC0010060, CPU_CACHE, }, |
159 | { 0x00000407, 0x00000407, CPU_MC, CPU_INTEL_XEON }, | 157 | { 0xC0010061, 0xC0010068, CPU_SMM, }, |
160 | { 0x00000408, 0x0000040A, CPU_MC, CPU_PM_CX_AT_XE }, | 158 | { 0xC0010069, 0xC001006B, CPU_SMM, }, |
161 | { 0x0000040B, 0x0000040B, CPU_MC, CPU_INTEL_XEON }, | 159 | { 0xC0010070, 0xC0010071, CPU_SMM, }, |
162 | { 0x0000040C, 0x0000040E, CPU_MC, CPU_PM_CX_XE }, | 160 | { 0xC0010111, 0xC0010113, CPU_SMM, }, |
163 | { 0x0000040F, 0x0000040F, CPU_MC, CPU_INTEL_XEON }, | 161 | { 0xC0010114, 0xC0010118, CPU_SVM, }, |
164 | { 0x00000410, 0x00000412, CPU_MC, CPU_PM_CX_AT_XE }, | 162 | { 0xC0010140, 0xC0010141, CPU_OSVM, }, |
165 | { 0x00000413, 0x00000417, CPU_MC, CPU_CX_AT_XE }, | 163 | { 0xC0011022, 0xC0011023, CPU_CONF, }, |
166 | { 0x00000480, 0x0000048B, CPU_VMX, CPU_CX_AT_XE }, | ||
167 | |||
168 | { 0x00000600, 0x00000600, CPU_DEBUG, CPU_PM_CX_AT_XE }, | ||
169 | { 0x00000680, 0x0000068F, CPU_LBRANCH, CPU_INTEL_XEON }, | ||
170 | { 0x000006C0, 0x000006CF, CPU_LBRANCH, CPU_INTEL_XEON }, | ||
171 | |||
172 | { 0x000107CC, 0x000107D3, CPU_PMC, CPU_INTEL_XEON_MP }, | ||
173 | |||
174 | { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_INTEL_XEON }, | ||
175 | { 0xC0000081, 0xC0000082, CPU_CALL, CPU_INTEL_XEON }, | ||
176 | { 0xC0000084, 0xC0000084, CPU_CALL, CPU_INTEL_XEON }, | ||
177 | { 0xC0000100, 0xC0000102, CPU_BASE, CPU_INTEL_XEON }, | ||
178 | }; | 164 | }; |
179 | 165 | ||
180 | /* AMD Registers Range */ | ||
181 | static struct cpu_debug_range cpu_amd_range[] = { | ||
182 | { 0x00000000, 0x00000001, CPU_MC, CPU_K10_PLUS, }, | ||
183 | { 0x00000010, 0x00000010, CPU_TIME, CPU_K8_PLUS, }, | ||
184 | { 0x0000001B, 0x0000001B, CPU_APIC, CPU_K8_PLUS, }, | ||
185 | { 0x0000002A, 0x0000002A, CPU_POWERON, CPU_K7_PLUS }, | ||
186 | { 0x0000008B, 0x0000008B, CPU_VER, CPU_K8_PLUS }, | ||
187 | { 0x000000FE, 0x000000FE, CPU_MTRR, CPU_K8_PLUS, }, | ||
188 | |||
189 | { 0x00000174, 0x00000176, CPU_SYSENTER, CPU_K8_PLUS, }, | ||
190 | { 0x00000179, 0x0000017B, CPU_MC, CPU_K8_PLUS, }, | ||
191 | { 0x000001D9, 0x000001D9, CPU_DEBUG, CPU_K8_PLUS, }, | ||
192 | { 0x000001DB, 0x000001DE, CPU_LBRANCH, CPU_K8_PLUS, }, | ||
193 | |||
194 | { 0x00000200, 0x0000020F, CPU_MTRR, CPU_K8_PLUS, }, | ||
195 | { 0x00000250, 0x00000250, CPU_MTRR, CPU_K8_PLUS, }, | ||
196 | { 0x00000258, 0x00000259, CPU_MTRR, CPU_K8_PLUS, }, | ||
197 | { 0x00000268, 0x0000026F, CPU_MTRR, CPU_K8_PLUS, }, | ||
198 | { 0x00000277, 0x00000277, CPU_PAT, CPU_K8_PLUS, }, | ||
199 | { 0x000002FF, 0x000002FF, CPU_MTRR, CPU_K8_PLUS, }, | ||
200 | |||
201 | { 0x00000400, 0x00000413, CPU_MC, CPU_K8_PLUS, }, | ||
202 | |||
203 | { 0xC0000080, 0xC0000080, CPU_FEATURES, CPU_AMD_ALL, }, | ||
204 | { 0xC0000081, 0xC0000084, CPU_CALL, CPU_K8_PLUS, }, | ||
205 | { 0xC0000100, 0xC0000102, CPU_BASE, CPU_K8_PLUS, }, | ||
206 | { 0xC0000103, 0xC0000103, CPU_TIME, CPU_K10_PLUS, }, | ||
207 | |||
208 | { 0xC0010000, 0xC0010007, CPU_PMC, CPU_K8_PLUS, }, | ||
209 | { 0xC0010010, 0xC0010010, CPU_CONF, CPU_K7_PLUS, }, | ||
210 | { 0xC0010015, 0xC0010015, CPU_CONF, CPU_K7_PLUS, }, | ||
211 | { 0xC0010016, 0xC001001A, CPU_MTRR, CPU_K8_PLUS, }, | ||
212 | { 0xC001001D, 0xC001001D, CPU_MTRR, CPU_K8_PLUS, }, | ||
213 | { 0xC001001F, 0xC001001F, CPU_CONF, CPU_K8_PLUS, }, | ||
214 | { 0xC0010030, 0xC0010035, CPU_BIOS, CPU_K8_PLUS, }, | ||
215 | { 0xC0010044, 0xC0010048, CPU_MC, CPU_K8_PLUS, }, | ||
216 | { 0xC0010050, 0xC0010056, CPU_SMM, CPU_K0F_PLUS, }, | ||
217 | { 0xC0010058, 0xC0010058, CPU_CONF, CPU_K10_PLUS, }, | ||
218 | { 0xC0010060, 0xC0010060, CPU_CACHE, CPU_AMD_11, }, | ||
219 | { 0xC0010061, 0xC0010068, CPU_SMM, CPU_K10_PLUS, }, | ||
220 | { 0xC0010069, 0xC001006B, CPU_SMM, CPU_AMD_11, }, | ||
221 | { 0xC0010070, 0xC0010071, CPU_SMM, CPU_K10_PLUS, }, | ||
222 | { 0xC0010111, 0xC0010113, CPU_SMM, CPU_K8_PLUS, }, | ||
223 | { 0xC0010114, 0xC0010118, CPU_SVM, CPU_K10_PLUS, }, | ||
224 | { 0xC0010140, 0xC0010141, CPU_OSVM, CPU_K10_PLUS, }, | ||
225 | { 0xC0011022, 0xC0011023, CPU_CONF, CPU_K10_PLUS, }, | ||
226 | }; | ||
227 | |||
228 | |||
229 | /* Intel */ | ||
230 | static int get_intel_modelflag(unsigned model) | ||
231 | { | ||
232 | int flag; | ||
233 | |||
234 | switch (model) { | ||
235 | case 0x0501: | ||
236 | case 0x0502: | ||
237 | case 0x0504: | ||
238 | flag = CPU_INTEL_PENTIUM; | ||
239 | break; | ||
240 | case 0x0601: | ||
241 | case 0x0603: | ||
242 | case 0x0605: | ||
243 | case 0x0607: | ||
244 | case 0x0608: | ||
245 | case 0x060A: | ||
246 | case 0x060B: | ||
247 | flag = CPU_INTEL_P6; | ||
248 | break; | ||
249 | case 0x0609: | ||
250 | case 0x060D: | ||
251 | flag = CPU_INTEL_PENTIUM_M; | ||
252 | break; | ||
253 | case 0x060E: | ||
254 | flag = CPU_INTEL_CORE; | ||
255 | break; | ||
256 | case 0x060F: | ||
257 | case 0x0617: | ||
258 | flag = CPU_INTEL_CORE2; | ||
259 | break; | ||
260 | case 0x061C: | ||
261 | flag = CPU_INTEL_ATOM; | ||
262 | break; | ||
263 | case 0x0F00: | ||
264 | case 0x0F01: | ||
265 | case 0x0F02: | ||
266 | case 0x0F03: | ||
267 | case 0x0F04: | ||
268 | flag = CPU_INTEL_XEON_P4; | ||
269 | break; | ||
270 | case 0x0F06: | ||
271 | flag = CPU_INTEL_XEON_MP; | ||
272 | break; | ||
273 | default: | ||
274 | flag = CPU_NONE; | ||
275 | break; | ||
276 | } | ||
277 | |||
278 | return flag; | ||
279 | } | ||
280 | |||
281 | /* AMD */ | ||
282 | static int get_amd_modelflag(unsigned model) | ||
283 | { | ||
284 | int flag; | ||
285 | |||
286 | switch (model >> 8) { | ||
287 | case 0x6: | ||
288 | flag = CPU_AMD_K6; | ||
289 | break; | ||
290 | case 0x7: | ||
291 | flag = CPU_AMD_K7; | ||
292 | break; | ||
293 | case 0x8: | ||
294 | flag = CPU_AMD_K8; | ||
295 | break; | ||
296 | case 0xf: | ||
297 | flag = CPU_AMD_0F; | ||
298 | break; | ||
299 | case 0x10: | ||
300 | flag = CPU_AMD_10; | ||
301 | break; | ||
302 | case 0x11: | ||
303 | flag = CPU_AMD_11; | ||
304 | break; | ||
305 | default: | ||
306 | flag = CPU_NONE; | ||
307 | break; | ||
308 | } | ||
309 | |||
310 | return flag; | ||
311 | } | ||
312 | |||
313 | static int get_cpu_modelflag(unsigned cpu) | ||
314 | { | ||
315 | int flag; | ||
316 | |||
317 | flag = per_cpu(cpu_model, cpu); | ||
318 | |||
319 | switch (flag >> 16) { | ||
320 | case X86_VENDOR_INTEL: | ||
321 | flag = get_intel_modelflag(flag); | ||
322 | break; | ||
323 | case X86_VENDOR_AMD: | ||
324 | flag = get_amd_modelflag(flag & 0xffff); | ||
325 | break; | ||
326 | default: | ||
327 | flag = CPU_NONE; | ||
328 | break; | ||
329 | } | ||
330 | |||
331 | return flag; | ||
332 | } | ||
333 | |||
334 | static int get_cpu_range_count(unsigned cpu) | ||
335 | { | ||
336 | int index; | ||
337 | |||
338 | switch (per_cpu(cpu_model, cpu) >> 16) { | ||
339 | case X86_VENDOR_INTEL: | ||
340 | index = ARRAY_SIZE(cpu_intel_range); | ||
341 | break; | ||
342 | case X86_VENDOR_AMD: | ||
343 | index = ARRAY_SIZE(cpu_amd_range); | ||
344 | break; | ||
345 | default: | ||
346 | index = 0; | ||
347 | break; | ||
348 | } | ||
349 | |||
350 | return index; | ||
351 | } | ||
352 | |||
353 | static int is_typeflag_valid(unsigned cpu, unsigned flag) | 166 | static int is_typeflag_valid(unsigned cpu, unsigned flag) |
354 | { | 167 | { |
355 | unsigned vendor, modelflag; | 168 | int i; |
356 | int i, index; | ||
357 | 169 | ||
358 | /* Standard Registers should be always valid */ | 170 | /* Standard Registers should be always valid */ |
359 | if (flag >= CPU_TSS) | 171 | if (flag >= CPU_TSS) |
360 | return 1; | 172 | return 1; |
361 | 173 | ||
362 | modelflag = per_cpu(cpu_modelflag, cpu); | 174 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { |
363 | vendor = per_cpu(cpu_model, cpu) >> 16; | 175 | if (cpu_reg_range[i].flag == flag) |
364 | index = get_cpu_range_count(cpu); | 176 | return 1; |
365 | |||
366 | for (i = 0; i < index; i++) { | ||
367 | switch (vendor) { | ||
368 | case X86_VENDOR_INTEL: | ||
369 | if ((cpu_intel_range[i].model & modelflag) && | ||
370 | (cpu_intel_range[i].flag & flag)) | ||
371 | return 1; | ||
372 | break; | ||
373 | case X86_VENDOR_AMD: | ||
374 | if ((cpu_amd_range[i].model & modelflag) && | ||
375 | (cpu_amd_range[i].flag & flag)) | ||
376 | return 1; | ||
377 | break; | ||
378 | } | ||
379 | } | 177 | } |
380 | 178 | ||
381 | /* Invalid */ | 179 | /* Invalid */ |
@@ -385,26 +183,11 @@ static int is_typeflag_valid(unsigned cpu, unsigned flag) | |||
385 | static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, | 183 | static unsigned get_cpu_range(unsigned cpu, unsigned *min, unsigned *max, |
386 | int index, unsigned flag) | 184 | int index, unsigned flag) |
387 | { | 185 | { |
388 | unsigned modelflag; | 186 | if (cpu_reg_range[index].flag == flag) { |
389 | 187 | *min = cpu_reg_range[index].min; | |
390 | modelflag = per_cpu(cpu_modelflag, cpu); | 188 | *max = cpu_reg_range[index].max; |
391 | *max = 0; | 189 | } else |
392 | switch (per_cpu(cpu_model, cpu) >> 16) { | 190 | *max = 0; |
393 | case X86_VENDOR_INTEL: | ||
394 | if ((cpu_intel_range[index].model & modelflag) && | ||
395 | (cpu_intel_range[index].flag & flag)) { | ||
396 | *min = cpu_intel_range[index].min; | ||
397 | *max = cpu_intel_range[index].max; | ||
398 | } | ||
399 | break; | ||
400 | case X86_VENDOR_AMD: | ||
401 | if ((cpu_amd_range[index].model & modelflag) && | ||
402 | (cpu_amd_range[index].flag & flag)) { | ||
403 | *min = cpu_amd_range[index].min; | ||
404 | *max = cpu_amd_range[index].max; | ||
405 | } | ||
406 | break; | ||
407 | } | ||
408 | 191 | ||
409 | return *max; | 192 | return *max; |
410 | } | 193 | } |
@@ -434,7 +217,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) | |||
434 | unsigned msr, msr_min, msr_max; | 217 | unsigned msr, msr_min, msr_max; |
435 | struct cpu_private *priv; | 218 | struct cpu_private *priv; |
436 | u32 low, high; | 219 | u32 low, high; |
437 | int i, range; | 220 | int i; |
438 | 221 | ||
439 | if (seq) { | 222 | if (seq) { |
440 | priv = seq->private; | 223 | priv = seq->private; |
@@ -446,9 +229,7 @@ static void print_msr(struct seq_file *seq, unsigned cpu, unsigned flag) | |||
446 | } | 229 | } |
447 | } | 230 | } |
448 | 231 | ||
449 | range = get_cpu_range_count(cpu); | 232 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { |
450 | |||
451 | for (i = 0; i < range; i++) { | ||
452 | if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) | 233 | if (!get_cpu_range(cpu, &msr_min, &msr_max, i, flag)) |
453 | continue; | 234 | continue; |
454 | 235 | ||
@@ -588,8 +369,20 @@ static void print_apic(void *arg) | |||
588 | seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); | 369 | seq_printf(seq, " TMICT\t\t: %08x\n", apic_read(APIC_TMICT)); |
589 | seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); | 370 | seq_printf(seq, " TMCCT\t\t: %08x\n", apic_read(APIC_TMCCT)); |
590 | seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); | 371 | seq_printf(seq, " TDCR\t\t: %08x\n", apic_read(APIC_TDCR)); |
591 | #endif /* CONFIG_X86_LOCAL_APIC */ | 372 | if (boot_cpu_has(X86_FEATURE_EXTAPIC)) { |
373 | unsigned int i, v, maxeilvt; | ||
374 | |||
375 | v = apic_read(APIC_EFEAT); | ||
376 | maxeilvt = (v >> 16) & 0xff; | ||
377 | seq_printf(seq, " EFEAT\t\t: %08x\n", v); | ||
378 | seq_printf(seq, " ECTRL\t\t: %08x\n", apic_read(APIC_ECTRL)); | ||
592 | 379 | ||
380 | for (i = 0; i < maxeilvt; i++) { | ||
381 | v = apic_read(APIC_EILVTn(i)); | ||
382 | seq_printf(seq, " EILVT%d\t\t: %08x\n", i, v); | ||
383 | } | ||
384 | } | ||
385 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
593 | seq_printf(seq, "\n MSR\t:\n"); | 386 | seq_printf(seq, "\n MSR\t:\n"); |
594 | } | 387 | } |
595 | 388 | ||
@@ -788,13 +581,11 @@ static int cpu_init_msr(unsigned cpu, unsigned type, struct dentry *dentry) | |||
788 | { | 581 | { |
789 | struct dentry *cpu_dentry = NULL; | 582 | struct dentry *cpu_dentry = NULL; |
790 | unsigned reg, reg_min, reg_max; | 583 | unsigned reg, reg_min, reg_max; |
791 | int i, range, err = 0; | 584 | int i, err = 0; |
792 | char reg_dir[12]; | 585 | char reg_dir[12]; |
793 | u32 low, high; | 586 | u32 low, high; |
794 | 587 | ||
795 | range = get_cpu_range_count(cpu); | 588 | for (i = 0; i < ARRAY_SIZE(cpu_reg_range); i++) { |
796 | |||
797 | for (i = 0; i < range; i++) { | ||
798 | if (!get_cpu_range(cpu, ®_min, ®_max, i, | 589 | if (!get_cpu_range(cpu, ®_min, ®_max, i, |
799 | cpu_base[type].flag)) | 590 | cpu_base[type].flag)) |
800 | continue; | 591 | continue; |
@@ -850,10 +641,6 @@ static int cpu_init_cpu(void) | |||
850 | cpui = &cpu_data(cpu); | 641 | cpui = &cpu_data(cpu); |
851 | if (!cpu_has(cpui, X86_FEATURE_MSR)) | 642 | if (!cpu_has(cpui, X86_FEATURE_MSR)) |
852 | continue; | 643 | continue; |
853 | per_cpu(cpu_model, cpu) = ((cpui->x86_vendor << 16) | | ||
854 | (cpui->x86 << 8) | | ||
855 | (cpui->x86_model)); | ||
856 | per_cpu(cpu_modelflag, cpu) = get_cpu_modelflag(cpu); | ||
857 | 644 | ||
858 | sprintf(cpu_dir, "cpu%d", cpu); | 645 | sprintf(cpu_dir, "cpu%d", cpu); |
859 | cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); | 646 | cpu_dentry = debugfs_create_dir(cpu_dir, cpu_debugfs_dir); |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index 52c839875478..f138c6c389b9 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
@@ -220,11 +220,14 @@ config X86_LONGHAUL | |||
220 | If in doubt, say N. | 220 | If in doubt, say N. |
221 | 221 | ||
222 | config X86_E_POWERSAVER | 222 | config X86_E_POWERSAVER |
223 | tristate "VIA C7 Enhanced PowerSaver" | 223 | tristate "VIA C7 Enhanced PowerSaver (DANGEROUS)" |
224 | select CPU_FREQ_TABLE | 224 | select CPU_FREQ_TABLE |
225 | depends on X86_32 | 225 | depends on X86_32 && EXPERIMENTAL |
226 | help | 226 | help |
227 | This adds the CPUFreq driver for VIA C7 processors. | 227 | This adds the CPUFreq driver for VIA C7 processors. However, this driver |
228 | does not have any safeguards to prevent operating the CPU out of spec | ||
229 | and is thus considered dangerous. Please use the regular ACPI cpufreq | ||
230 | driver, enabled by CONFIG_X86_ACPI_CPUFREQ. | ||
228 | 231 | ||
229 | If in doubt, say N. | 232 | If in doubt, say N. |
230 | 233 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index 752e8c6b2c7e..ae9b503220ca 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -90,11 +90,7 @@ static int check_est_cpu(unsigned int cpuid) | |||
90 | { | 90 | { |
91 | struct cpuinfo_x86 *cpu = &cpu_data(cpuid); | 91 | struct cpuinfo_x86 *cpu = &cpu_data(cpuid); |
92 | 92 | ||
93 | if (cpu->x86_vendor != X86_VENDOR_INTEL || | 93 | return cpu_has(cpu, X86_FEATURE_EST); |
94 | !cpu_has(cpu, X86_FEATURE_EST)) | ||
95 | return 0; | ||
96 | |||
97 | return 1; | ||
98 | } | 94 | } |
99 | 95 | ||
100 | static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) | 96 | static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index cf52215d9eb1..81cbe64ed6b4 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -1,3 +1,4 @@ | |||
1 | |||
1 | /* | 2 | /* |
2 | * (c) 2003-2006 Advanced Micro Devices, Inc. | 3 | * (c) 2003-2006 Advanced Micro Devices, Inc. |
3 | * Your use of this code is subject to the terms and conditions of the | 4 | * Your use of this code is subject to the terms and conditions of the |
@@ -117,20 +118,17 @@ static int query_current_values_with_pending_wait(struct powernow_k8_data *data) | |||
117 | u32 i = 0; | 118 | u32 i = 0; |
118 | 119 | ||
119 | if (cpu_family == CPU_HW_PSTATE) { | 120 | if (cpu_family == CPU_HW_PSTATE) { |
120 | if (data->currpstate == HW_PSTATE_INVALID) { | 121 | rdmsr(MSR_PSTATE_STATUS, lo, hi); |
121 | /* read (initial) hw pstate if not yet set */ | 122 | i = lo & HW_PSTATE_MASK; |
122 | rdmsr(MSR_PSTATE_STATUS, lo, hi); | 123 | data->currpstate = i; |
123 | i = lo & HW_PSTATE_MASK; | 124 | |
124 | 125 | /* | |
125 | /* | 126 | * a workaround for family 11h erratum 311 might cause |
126 | * a workaround for family 11h erratum 311 might cause | 127 | * an "out-of-range Pstate if the core is in Pstate-0 |
127 | * an "out-of-range Pstate if the core is in Pstate-0 | 128 | */ |
128 | */ | 129 | if ((boot_cpu_data.x86 == 0x11) && (i >= data->numps)) |
129 | if (i >= data->numps) | 130 | data->currpstate = HW_PSTATE_0; |
130 | data->currpstate = HW_PSTATE_0; | 131 | |
131 | else | ||
132 | data->currpstate = i; | ||
133 | } | ||
134 | return 0; | 132 | return 0; |
135 | } | 133 | } |
136 | do { | 134 | do { |
@@ -510,41 +508,34 @@ static int core_voltage_post_transition(struct powernow_k8_data *data, | |||
510 | return 0; | 508 | return 0; |
511 | } | 509 | } |
512 | 510 | ||
513 | static int check_supported_cpu(unsigned int cpu) | 511 | static void check_supported_cpu(void *_rc) |
514 | { | 512 | { |
515 | cpumask_t oldmask; | ||
516 | u32 eax, ebx, ecx, edx; | 513 | u32 eax, ebx, ecx, edx; |
517 | unsigned int rc = 0; | 514 | int *rc = _rc; |
518 | |||
519 | oldmask = current->cpus_allowed; | ||
520 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
521 | 515 | ||
522 | if (smp_processor_id() != cpu) { | 516 | *rc = -ENODEV; |
523 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", cpu); | ||
524 | goto out; | ||
525 | } | ||
526 | 517 | ||
527 | if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) | 518 | if (current_cpu_data.x86_vendor != X86_VENDOR_AMD) |
528 | goto out; | 519 | return; |
529 | 520 | ||
530 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); | 521 | eax = cpuid_eax(CPUID_PROCESSOR_SIGNATURE); |
531 | if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && | 522 | if (((eax & CPUID_XFAM) != CPUID_XFAM_K8) && |
532 | ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) | 523 | ((eax & CPUID_XFAM) < CPUID_XFAM_10H)) |
533 | goto out; | 524 | return; |
534 | 525 | ||
535 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { | 526 | if ((eax & CPUID_XFAM) == CPUID_XFAM_K8) { |
536 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || | 527 | if (((eax & CPUID_USE_XFAM_XMOD) != CPUID_USE_XFAM_XMOD) || |
537 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { | 528 | ((eax & CPUID_XMOD) > CPUID_XMOD_REV_MASK)) { |
538 | printk(KERN_INFO PFX | 529 | printk(KERN_INFO PFX |
539 | "Processor cpuid %x not supported\n", eax); | 530 | "Processor cpuid %x not supported\n", eax); |
540 | goto out; | 531 | return; |
541 | } | 532 | } |
542 | 533 | ||
543 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); | 534 | eax = cpuid_eax(CPUID_GET_MAX_CAPABILITIES); |
544 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { | 535 | if (eax < CPUID_FREQ_VOLT_CAPABILITIES) { |
545 | printk(KERN_INFO PFX | 536 | printk(KERN_INFO PFX |
546 | "No frequency change capabilities detected\n"); | 537 | "No frequency change capabilities detected\n"); |
547 | goto out; | 538 | return; |
548 | } | 539 | } |
549 | 540 | ||
550 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | 541 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); |
@@ -552,21 +543,17 @@ static int check_supported_cpu(unsigned int cpu) | |||
552 | != P_STATE_TRANSITION_CAPABLE) { | 543 | != P_STATE_TRANSITION_CAPABLE) { |
553 | printk(KERN_INFO PFX | 544 | printk(KERN_INFO PFX |
554 | "Power state transitions not supported\n"); | 545 | "Power state transitions not supported\n"); |
555 | goto out; | 546 | return; |
556 | } | 547 | } |
557 | } else { /* must be a HW Pstate capable processor */ | 548 | } else { /* must be a HW Pstate capable processor */ |
558 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); | 549 | cpuid(CPUID_FREQ_VOLT_CAPABILITIES, &eax, &ebx, &ecx, &edx); |
559 | if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) | 550 | if ((edx & USE_HW_PSTATE) == USE_HW_PSTATE) |
560 | cpu_family = CPU_HW_PSTATE; | 551 | cpu_family = CPU_HW_PSTATE; |
561 | else | 552 | else |
562 | goto out; | 553 | return; |
563 | } | 554 | } |
564 | 555 | ||
565 | rc = 1; | 556 | *rc = 0; |
566 | |||
567 | out: | ||
568 | set_cpus_allowed_ptr(current, &oldmask); | ||
569 | return rc; | ||
570 | } | 557 | } |
571 | 558 | ||
572 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, | 559 | static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, |
@@ -823,13 +810,14 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, | |||
823 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) | 810 | if (!data->acpi_data.state_count || (cpu_family == CPU_HW_PSTATE)) |
824 | return; | 811 | return; |
825 | 812 | ||
826 | control = data->acpi_data.states[index].control; data->irt = (control | 813 | control = data->acpi_data.states[index].control; |
827 | >> IRT_SHIFT) & IRT_MASK; data->rvo = (control >> | 814 | data->irt = (control >> IRT_SHIFT) & IRT_MASK; |
828 | RVO_SHIFT) & RVO_MASK; data->exttype = (control | 815 | data->rvo = (control >> RVO_SHIFT) & RVO_MASK; |
829 | >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; | 816 | data->exttype = (control >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK; |
830 | data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; data->vidmvs = 1 | 817 | data->plllock = (control >> PLL_L_SHIFT) & PLL_L_MASK; |
831 | << ((control >> MVS_SHIFT) & MVS_MASK); data->vstable = | 818 | data->vidmvs = 1 << ((control >> MVS_SHIFT) & MVS_MASK); |
832 | (control >> VST_SHIFT) & VST_MASK; } | 819 | data->vstable = (control >> VST_SHIFT) & VST_MASK; |
820 | } | ||
833 | 821 | ||
834 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | 822 | static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) |
835 | { | 823 | { |
@@ -1046,6 +1034,19 @@ static int get_transition_latency(struct powernow_k8_data *data) | |||
1046 | if (cur_latency > max_latency) | 1034 | if (cur_latency > max_latency) |
1047 | max_latency = cur_latency; | 1035 | max_latency = cur_latency; |
1048 | } | 1036 | } |
1037 | if (max_latency == 0) { | ||
1038 | /* | ||
1039 | * Fam 11h always returns 0 as transition latency. | ||
1040 | * This is intended and means "very fast". While cpufreq core | ||
1041 | * and governors currently can handle that gracefully, better | ||
1042 | * set it to 1 to avoid problems in the future. | ||
1043 | * For all others it's a BIOS bug. | ||
1044 | */ | ||
1045 | if (!boot_cpu_data.x86 == 0x11) | ||
1046 | printk(KERN_ERR FW_WARN PFX "Invalid zero transition " | ||
1047 | "latency\n"); | ||
1048 | max_latency = 1; | ||
1049 | } | ||
1049 | /* value in usecs, needs to be in nanoseconds */ | 1050 | /* value in usecs, needs to be in nanoseconds */ |
1050 | return 1000 * max_latency; | 1051 | return 1000 * max_latency; |
1051 | } | 1052 | } |
@@ -1093,7 +1094,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, | |||
1093 | freqs.old = find_khz_freq_from_fid(data->currfid); | 1094 | freqs.old = find_khz_freq_from_fid(data->currfid); |
1094 | freqs.new = find_khz_freq_from_fid(fid); | 1095 | freqs.new = find_khz_freq_from_fid(fid); |
1095 | 1096 | ||
1096 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1097 | for_each_cpu(i, data->available_cores) { |
1097 | freqs.cpu = i; | 1098 | freqs.cpu = i; |
1098 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1099 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1099 | } | 1100 | } |
@@ -1101,7 +1102,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, | |||
1101 | res = transition_fid_vid(data, fid, vid); | 1102 | res = transition_fid_vid(data, fid, vid); |
1102 | freqs.new = find_khz_freq_from_fid(data->currfid); | 1103 | freqs.new = find_khz_freq_from_fid(data->currfid); |
1103 | 1104 | ||
1104 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1105 | for_each_cpu(i, data->available_cores) { |
1105 | freqs.cpu = i; | 1106 | freqs.cpu = i; |
1106 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1107 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1107 | } | 1108 | } |
@@ -1126,7 +1127,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, | |||
1126 | data->currpstate); | 1127 | data->currpstate); |
1127 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1128 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
1128 | 1129 | ||
1129 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1130 | for_each_cpu(i, data->available_cores) { |
1130 | freqs.cpu = i; | 1131 | freqs.cpu = i; |
1131 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 1132 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1132 | } | 1133 | } |
@@ -1134,7 +1135,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, | |||
1134 | res = transition_pstate(data, pstate); | 1135 | res = transition_pstate(data, pstate); |
1135 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1136 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
1136 | 1137 | ||
1137 | for_each_cpu_mask_nr(i, *(data->available_cores)) { | 1138 | for_each_cpu(i, data->available_cores) { |
1138 | freqs.cpu = i; | 1139 | freqs.cpu = i; |
1139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1140 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1140 | } | 1141 | } |
@@ -1235,21 +1236,47 @@ static int powernowk8_verify(struct cpufreq_policy *pol) | |||
1235 | return cpufreq_frequency_table_verify(pol, data->powernow_table); | 1236 | return cpufreq_frequency_table_verify(pol, data->powernow_table); |
1236 | } | 1237 | } |
1237 | 1238 | ||
1238 | static const char ACPI_PSS_BIOS_BUG_MSG[] = | 1239 | struct init_on_cpu { |
1239 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" | 1240 | struct powernow_k8_data *data; |
1240 | KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; | 1241 | int rc; |
1242 | }; | ||
1243 | |||
1244 | static void __cpuinit powernowk8_cpu_init_on_cpu(void *_init_on_cpu) | ||
1245 | { | ||
1246 | struct init_on_cpu *init_on_cpu = _init_on_cpu; | ||
1247 | |||
1248 | if (pending_bit_stuck()) { | ||
1249 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); | ||
1250 | init_on_cpu->rc = -ENODEV; | ||
1251 | return; | ||
1252 | } | ||
1253 | |||
1254 | if (query_current_values_with_pending_wait(init_on_cpu->data)) { | ||
1255 | init_on_cpu->rc = -ENODEV; | ||
1256 | return; | ||
1257 | } | ||
1258 | |||
1259 | if (cpu_family == CPU_OPTERON) | ||
1260 | fidvid_msr_init(); | ||
1261 | |||
1262 | init_on_cpu->rc = 0; | ||
1263 | } | ||
1241 | 1264 | ||
1242 | /* per CPU init entry point to the driver */ | 1265 | /* per CPU init entry point to the driver */ |
1243 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | 1266 | static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) |
1244 | { | 1267 | { |
1268 | static const char ACPI_PSS_BIOS_BUG_MSG[] = | ||
1269 | KERN_ERR FW_BUG PFX "No compatible ACPI _PSS objects found.\n" | ||
1270 | KERN_ERR FW_BUG PFX "Try again with latest BIOS.\n"; | ||
1245 | struct powernow_k8_data *data; | 1271 | struct powernow_k8_data *data; |
1246 | cpumask_t oldmask; | 1272 | struct init_on_cpu init_on_cpu; |
1247 | int rc; | 1273 | int rc; |
1248 | 1274 | ||
1249 | if (!cpu_online(pol->cpu)) | 1275 | if (!cpu_online(pol->cpu)) |
1250 | return -ENODEV; | 1276 | return -ENODEV; |
1251 | 1277 | ||
1252 | if (!check_supported_cpu(pol->cpu)) | 1278 | smp_call_function_single(pol->cpu, check_supported_cpu, &rc, 1); |
1279 | if (rc) | ||
1253 | return -ENODEV; | 1280 | return -ENODEV; |
1254 | 1281 | ||
1255 | data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); | 1282 | data = kzalloc(sizeof(struct powernow_k8_data), GFP_KERNEL); |
@@ -1289,27 +1316,12 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1289 | pol->cpuinfo.transition_latency = get_transition_latency(data); | 1316 | pol->cpuinfo.transition_latency = get_transition_latency(data); |
1290 | 1317 | ||
1291 | /* only run on specific CPU from here on */ | 1318 | /* only run on specific CPU from here on */ |
1292 | oldmask = current->cpus_allowed; | 1319 | init_on_cpu.data = data; |
1293 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(pol->cpu)); | 1320 | smp_call_function_single(data->cpu, powernowk8_cpu_init_on_cpu, |
1294 | 1321 | &init_on_cpu, 1); | |
1295 | if (smp_processor_id() != pol->cpu) { | 1322 | rc = init_on_cpu.rc; |
1296 | printk(KERN_ERR PFX "limiting to cpu %u failed\n", pol->cpu); | 1323 | if (rc != 0) |
1297 | goto err_out_unmask; | 1324 | goto err_out_exit_acpi; |
1298 | } | ||
1299 | |||
1300 | if (pending_bit_stuck()) { | ||
1301 | printk(KERN_ERR PFX "failing init, change pending bit set\n"); | ||
1302 | goto err_out_unmask; | ||
1303 | } | ||
1304 | |||
1305 | if (query_current_values_with_pending_wait(data)) | ||
1306 | goto err_out_unmask; | ||
1307 | |||
1308 | if (cpu_family == CPU_OPTERON) | ||
1309 | fidvid_msr_init(); | ||
1310 | |||
1311 | /* run on any CPU again */ | ||
1312 | set_cpus_allowed_ptr(current, &oldmask); | ||
1313 | 1325 | ||
1314 | if (cpu_family == CPU_HW_PSTATE) | 1326 | if (cpu_family == CPU_HW_PSTATE) |
1315 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); | 1327 | cpumask_copy(pol->cpus, cpumask_of(pol->cpu)); |
@@ -1346,8 +1358,7 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol) | |||
1346 | 1358 | ||
1347 | return 0; | 1359 | return 0; |
1348 | 1360 | ||
1349 | err_out_unmask: | 1361 | err_out_exit_acpi: |
1350 | set_cpus_allowed_ptr(current, &oldmask); | ||
1351 | powernow_k8_cpu_exit_acpi(data); | 1362 | powernow_k8_cpu_exit_acpi(data); |
1352 | 1363 | ||
1353 | err_out: | 1364 | err_out: |
@@ -1372,28 +1383,25 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) | |||
1372 | return 0; | 1383 | return 0; |
1373 | } | 1384 | } |
1374 | 1385 | ||
1386 | static void query_values_on_cpu(void *_err) | ||
1387 | { | ||
1388 | int *err = _err; | ||
1389 | struct powernow_k8_data *data = __get_cpu_var(powernow_data); | ||
1390 | |||
1391 | *err = query_current_values_with_pending_wait(data); | ||
1392 | } | ||
1393 | |||
1375 | static unsigned int powernowk8_get(unsigned int cpu) | 1394 | static unsigned int powernowk8_get(unsigned int cpu) |
1376 | { | 1395 | { |
1377 | struct powernow_k8_data *data; | 1396 | struct powernow_k8_data *data = per_cpu(powernow_data, cpu); |
1378 | cpumask_t oldmask = current->cpus_allowed; | ||
1379 | unsigned int khz = 0; | 1397 | unsigned int khz = 0; |
1380 | unsigned int first; | 1398 | int err; |
1381 | |||
1382 | first = cpumask_first(cpu_core_mask(cpu)); | ||
1383 | data = per_cpu(powernow_data, first); | ||
1384 | 1399 | ||
1385 | if (!data) | 1400 | if (!data) |
1386 | return -EINVAL; | 1401 | return -EINVAL; |
1387 | 1402 | ||
1388 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 1403 | smp_call_function_single(cpu, query_values_on_cpu, &err, true); |
1389 | if (smp_processor_id() != cpu) { | 1404 | if (err) |
1390 | printk(KERN_ERR PFX | ||
1391 | "limiting to CPU %d failed in powernowk8_get\n", cpu); | ||
1392 | set_cpus_allowed_ptr(current, &oldmask); | ||
1393 | return 0; | ||
1394 | } | ||
1395 | |||
1396 | if (query_current_values_with_pending_wait(data)) | ||
1397 | goto out; | 1405 | goto out; |
1398 | 1406 | ||
1399 | if (cpu_family == CPU_HW_PSTATE) | 1407 | if (cpu_family == CPU_HW_PSTATE) |
@@ -1404,7 +1412,6 @@ static unsigned int powernowk8_get(unsigned int cpu) | |||
1404 | 1412 | ||
1405 | 1413 | ||
1406 | out: | 1414 | out: |
1407 | set_cpus_allowed_ptr(current, &oldmask); | ||
1408 | return khz; | 1415 | return khz; |
1409 | } | 1416 | } |
1410 | 1417 | ||
@@ -1430,7 +1437,9 @@ static int __cpuinit powernowk8_init(void) | |||
1430 | unsigned int i, supported_cpus = 0; | 1437 | unsigned int i, supported_cpus = 0; |
1431 | 1438 | ||
1432 | for_each_online_cpu(i) { | 1439 | for_each_online_cpu(i) { |
1433 | if (check_supported_cpu(i)) | 1440 | int rc; |
1441 | smp_call_function_single(i, check_supported_cpu, &rc, 1); | ||
1442 | if (rc == 0) | ||
1434 | supported_cpus++; | 1443 | supported_cpus++; |
1435 | } | 1444 | } |
1436 | 1445 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h index 6c6698feade1..c9c1190b5e1f 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.h | |||
@@ -223,14 +223,3 @@ static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned | |||
223 | 223 | ||
224 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); | 224 | static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); |
225 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); | 225 | static int fill_powernow_table_fidvid(struct powernow_k8_data *data, struct cpufreq_frequency_table *powernow_table); |
226 | |||
227 | #ifdef CONFIG_SMP | ||
228 | static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) | ||
229 | { | ||
230 | } | ||
231 | #else | ||
232 | static inline void define_siblings(int cpu, cpumask_t cpu_sharedcore_mask[]) | ||
233 | { | ||
234 | cpu_set(0, cpu_sharedcore_mask[0]); | ||
235 | } | ||
236 | #endif | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 55c831ed71ce..8d672ef162ce 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -323,14 +323,8 @@ static unsigned int get_cur_freq(unsigned int cpu) | |||
323 | { | 323 | { |
324 | unsigned l, h; | 324 | unsigned l, h; |
325 | unsigned clock_freq; | 325 | unsigned clock_freq; |
326 | cpumask_t saved_mask; | ||
327 | 326 | ||
328 | saved_mask = current->cpus_allowed; | 327 | rdmsr_on_cpu(cpu, MSR_IA32_PERF_STATUS, &l, &h); |
329 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | ||
330 | if (smp_processor_id() != cpu) | ||
331 | return 0; | ||
332 | |||
333 | rdmsr(MSR_IA32_PERF_STATUS, l, h); | ||
334 | clock_freq = extract_clock(l, cpu, 0); | 328 | clock_freq = extract_clock(l, cpu, 0); |
335 | 329 | ||
336 | if (unlikely(clock_freq == 0)) { | 330 | if (unlikely(clock_freq == 0)) { |
@@ -340,11 +334,9 @@ static unsigned int get_cur_freq(unsigned int cpu) | |||
340 | * P-state transition (like TM2). Get the last freq set | 334 | * P-state transition (like TM2). Get the last freq set |
341 | * in PERF_CTL. | 335 | * in PERF_CTL. |
342 | */ | 336 | */ |
343 | rdmsr(MSR_IA32_PERF_CTL, l, h); | 337 | rdmsr_on_cpu(cpu, MSR_IA32_PERF_CTL, &l, &h); |
344 | clock_freq = extract_clock(l, cpu, 1); | 338 | clock_freq = extract_clock(l, cpu, 1); |
345 | } | 339 | } |
346 | |||
347 | set_cpus_allowed_ptr(current, &saved_mask); | ||
348 | return clock_freq; | 340 | return clock_freq; |
349 | } | 341 | } |
350 | 342 | ||
@@ -467,15 +459,10 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
467 | struct cpufreq_freqs freqs; | 459 | struct cpufreq_freqs freqs; |
468 | int retval = 0; | 460 | int retval = 0; |
469 | unsigned int j, k, first_cpu, tmp; | 461 | unsigned int j, k, first_cpu, tmp; |
470 | cpumask_var_t saved_mask, covered_cpus; | 462 | cpumask_var_t covered_cpus; |
471 | 463 | ||
472 | if (unlikely(!alloc_cpumask_var(&saved_mask, GFP_KERNEL))) | 464 | if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) |
473 | return -ENOMEM; | ||
474 | if (unlikely(!zalloc_cpumask_var(&covered_cpus, GFP_KERNEL))) { | ||
475 | free_cpumask_var(saved_mask); | ||
476 | return -ENOMEM; | 465 | return -ENOMEM; |
477 | } | ||
478 | cpumask_copy(saved_mask, ¤t->cpus_allowed); | ||
479 | 466 | ||
480 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | 467 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { |
481 | retval = -ENODEV; | 468 | retval = -ENODEV; |
@@ -493,7 +480,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
493 | 480 | ||
494 | first_cpu = 1; | 481 | first_cpu = 1; |
495 | for_each_cpu(j, policy->cpus) { | 482 | for_each_cpu(j, policy->cpus) { |
496 | const struct cpumask *mask; | 483 | int good_cpu; |
497 | 484 | ||
498 | /* cpufreq holds the hotplug lock, so we are safe here */ | 485 | /* cpufreq holds the hotplug lock, so we are safe here */ |
499 | if (!cpu_online(j)) | 486 | if (!cpu_online(j)) |
@@ -504,32 +491,30 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
504 | * Make sure we are running on CPU that wants to change freq | 491 | * Make sure we are running on CPU that wants to change freq |
505 | */ | 492 | */ |
506 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 493 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
507 | mask = policy->cpus; | 494 | good_cpu = cpumask_any_and(policy->cpus, |
495 | cpu_online_mask); | ||
508 | else | 496 | else |
509 | mask = cpumask_of(j); | 497 | good_cpu = j; |
510 | 498 | ||
511 | set_cpus_allowed_ptr(current, mask); | 499 | if (good_cpu >= nr_cpu_ids) { |
512 | preempt_disable(); | ||
513 | if (unlikely(!cpu_isset(smp_processor_id(), *mask))) { | ||
514 | dprintk("couldn't limit to CPUs in this domain\n"); | 500 | dprintk("couldn't limit to CPUs in this domain\n"); |
515 | retval = -EAGAIN; | 501 | retval = -EAGAIN; |
516 | if (first_cpu) { | 502 | if (first_cpu) { |
517 | /* We haven't started the transition yet. */ | 503 | /* We haven't started the transition yet. */ |
518 | goto migrate_end; | 504 | goto out; |
519 | } | 505 | } |
520 | preempt_enable(); | ||
521 | break; | 506 | break; |
522 | } | 507 | } |
523 | 508 | ||
524 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; | 509 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; |
525 | 510 | ||
526 | if (first_cpu) { | 511 | if (first_cpu) { |
527 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 512 | rdmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, &oldmsr, &h); |
528 | if (msr == (oldmsr & 0xffff)) { | 513 | if (msr == (oldmsr & 0xffff)) { |
529 | dprintk("no change needed - msr was and needs " | 514 | dprintk("no change needed - msr was and needs " |
530 | "to be %x\n", oldmsr); | 515 | "to be %x\n", oldmsr); |
531 | retval = 0; | 516 | retval = 0; |
532 | goto migrate_end; | 517 | goto out; |
533 | } | 518 | } |
534 | 519 | ||
535 | freqs.old = extract_clock(oldmsr, cpu, 0); | 520 | freqs.old = extract_clock(oldmsr, cpu, 0); |
@@ -553,14 +538,11 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
553 | oldmsr |= msr; | 538 | oldmsr |= msr; |
554 | } | 539 | } |
555 | 540 | ||
556 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 541 | wrmsr_on_cpu(good_cpu, MSR_IA32_PERF_CTL, oldmsr, h); |
557 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) { | 542 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
558 | preempt_enable(); | ||
559 | break; | 543 | break; |
560 | } | ||
561 | 544 | ||
562 | cpu_set(j, *covered_cpus); | 545 | cpumask_set_cpu(j, covered_cpus); |
563 | preempt_enable(); | ||
564 | } | 546 | } |
565 | 547 | ||
566 | for_each_cpu(k, policy->cpus) { | 548 | for_each_cpu(k, policy->cpus) { |
@@ -578,10 +560,8 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
578 | * Best effort undo.. | 560 | * Best effort undo.. |
579 | */ | 561 | */ |
580 | 562 | ||
581 | for_each_cpu_mask_nr(j, *covered_cpus) { | 563 | for_each_cpu(j, covered_cpus) |
582 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(j)); | 564 | wrmsr_on_cpu(j, MSR_IA32_PERF_CTL, oldmsr, h); |
583 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | ||
584 | } | ||
585 | 565 | ||
586 | tmp = freqs.new; | 566 | tmp = freqs.new; |
587 | freqs.new = freqs.old; | 567 | freqs.new = freqs.old; |
@@ -593,15 +573,9 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
593 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 573 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
594 | } | 574 | } |
595 | } | 575 | } |
596 | set_cpus_allowed_ptr(current, saved_mask); | ||
597 | retval = 0; | 576 | retval = 0; |
598 | goto out; | ||
599 | 577 | ||
600 | migrate_end: | ||
601 | preempt_enable(); | ||
602 | set_cpus_allowed_ptr(current, saved_mask); | ||
603 | out: | 578 | out: |
604 | free_cpumask_var(saved_mask); | ||
605 | free_cpumask_var(covered_cpus); | 579 | free_cpumask_var(covered_cpus); |
606 | return retval; | 580 | return retval; |
607 | } | 581 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 016c1a4fa3fc..6911e91fb4f6 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -89,7 +89,8 @@ static int speedstep_find_register(void) | |||
89 | * speedstep_set_state - set the SpeedStep state | 89 | * speedstep_set_state - set the SpeedStep state |
90 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) | 90 | * @state: new processor frequency state (SPEEDSTEP_LOW or SPEEDSTEP_HIGH) |
91 | * | 91 | * |
92 | * Tries to change the SpeedStep state. | 92 | * Tries to change the SpeedStep state. Can be called from |
93 | * smp_call_function_single. | ||
93 | */ | 94 | */ |
94 | static void speedstep_set_state(unsigned int state) | 95 | static void speedstep_set_state(unsigned int state) |
95 | { | 96 | { |
@@ -143,6 +144,11 @@ static void speedstep_set_state(unsigned int state) | |||
143 | return; | 144 | return; |
144 | } | 145 | } |
145 | 146 | ||
147 | /* Wrapper for smp_call_function_single. */ | ||
148 | static void _speedstep_set_state(void *_state) | ||
149 | { | ||
150 | speedstep_set_state(*(unsigned int *)_state); | ||
151 | } | ||
146 | 152 | ||
147 | /** | 153 | /** |
148 | * speedstep_activate - activate SpeedStep control in the chipset | 154 | * speedstep_activate - activate SpeedStep control in the chipset |
@@ -226,22 +232,28 @@ static unsigned int speedstep_detect_chipset(void) | |||
226 | return 0; | 232 | return 0; |
227 | } | 233 | } |
228 | 234 | ||
229 | static unsigned int _speedstep_get(const struct cpumask *cpus) | 235 | struct get_freq_data { |
230 | { | ||
231 | unsigned int speed; | 236 | unsigned int speed; |
232 | cpumask_t cpus_allowed; | 237 | unsigned int processor; |
233 | 238 | }; | |
234 | cpus_allowed = current->cpus_allowed; | 239 | |
235 | set_cpus_allowed_ptr(current, cpus); | 240 | static void get_freq_data(void *_data) |
236 | speed = speedstep_get_frequency(speedstep_processor); | 241 | { |
237 | set_cpus_allowed_ptr(current, &cpus_allowed); | 242 | struct get_freq_data *data = _data; |
238 | dprintk("detected %u kHz as current frequency\n", speed); | 243 | |
239 | return speed; | 244 | data->speed = speedstep_get_frequency(data->processor); |
240 | } | 245 | } |
241 | 246 | ||
242 | static unsigned int speedstep_get(unsigned int cpu) | 247 | static unsigned int speedstep_get(unsigned int cpu) |
243 | { | 248 | { |
244 | return _speedstep_get(cpumask_of(cpu)); | 249 | struct get_freq_data data = { .processor = cpu }; |
250 | |||
251 | /* You're supposed to ensure CPU is online. */ | ||
252 | if (smp_call_function_single(cpu, get_freq_data, &data, 1) != 0) | ||
253 | BUG(); | ||
254 | |||
255 | dprintk("detected %u kHz as current frequency\n", data.speed); | ||
256 | return data.speed; | ||
245 | } | 257 | } |
246 | 258 | ||
247 | /** | 259 | /** |
@@ -257,16 +269,16 @@ static int speedstep_target(struct cpufreq_policy *policy, | |||
257 | unsigned int target_freq, | 269 | unsigned int target_freq, |
258 | unsigned int relation) | 270 | unsigned int relation) |
259 | { | 271 | { |
260 | unsigned int newstate = 0; | 272 | unsigned int newstate = 0, policy_cpu; |
261 | struct cpufreq_freqs freqs; | 273 | struct cpufreq_freqs freqs; |
262 | cpumask_t cpus_allowed; | ||
263 | int i; | 274 | int i; |
264 | 275 | ||
265 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], | 276 | if (cpufreq_frequency_table_target(policy, &speedstep_freqs[0], |
266 | target_freq, relation, &newstate)) | 277 | target_freq, relation, &newstate)) |
267 | return -EINVAL; | 278 | return -EINVAL; |
268 | 279 | ||
269 | freqs.old = _speedstep_get(policy->cpus); | 280 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); |
281 | freqs.old = speedstep_get(policy_cpu); | ||
270 | freqs.new = speedstep_freqs[newstate].frequency; | 282 | freqs.new = speedstep_freqs[newstate].frequency; |
271 | freqs.cpu = policy->cpu; | 283 | freqs.cpu = policy->cpu; |
272 | 284 | ||
@@ -276,20 +288,13 @@ static int speedstep_target(struct cpufreq_policy *policy, | |||
276 | if (freqs.old == freqs.new) | 288 | if (freqs.old == freqs.new) |
277 | return 0; | 289 | return 0; |
278 | 290 | ||
279 | cpus_allowed = current->cpus_allowed; | ||
280 | |||
281 | for_each_cpu(i, policy->cpus) { | 291 | for_each_cpu(i, policy->cpus) { |
282 | freqs.cpu = i; | 292 | freqs.cpu = i; |
283 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 293 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
284 | } | 294 | } |
285 | 295 | ||
286 | /* switch to physical CPU where state is to be changed */ | 296 | smp_call_function_single(policy_cpu, _speedstep_set_state, &newstate, |
287 | set_cpus_allowed_ptr(current, policy->cpus); | 297 | true); |
288 | |||
289 | speedstep_set_state(newstate); | ||
290 | |||
291 | /* allow to be run on all CPUs */ | ||
292 | set_cpus_allowed_ptr(current, &cpus_allowed); | ||
293 | 298 | ||
294 | for_each_cpu(i, policy->cpus) { | 299 | for_each_cpu(i, policy->cpus) { |
295 | freqs.cpu = i; | 300 | freqs.cpu = i; |
@@ -312,33 +317,43 @@ static int speedstep_verify(struct cpufreq_policy *policy) | |||
312 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); | 317 | return cpufreq_frequency_table_verify(policy, &speedstep_freqs[0]); |
313 | } | 318 | } |
314 | 319 | ||
320 | struct get_freqs { | ||
321 | struct cpufreq_policy *policy; | ||
322 | int ret; | ||
323 | }; | ||
324 | |||
325 | static void get_freqs_on_cpu(void *_get_freqs) | ||
326 | { | ||
327 | struct get_freqs *get_freqs = _get_freqs; | ||
328 | |||
329 | get_freqs->ret = | ||
330 | speedstep_get_freqs(speedstep_processor, | ||
331 | &speedstep_freqs[SPEEDSTEP_LOW].frequency, | ||
332 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency, | ||
333 | &get_freqs->policy->cpuinfo.transition_latency, | ||
334 | &speedstep_set_state); | ||
335 | } | ||
315 | 336 | ||
316 | static int speedstep_cpu_init(struct cpufreq_policy *policy) | 337 | static int speedstep_cpu_init(struct cpufreq_policy *policy) |
317 | { | 338 | { |
318 | int result = 0; | 339 | int result; |
319 | unsigned int speed; | 340 | unsigned int policy_cpu, speed; |
320 | cpumask_t cpus_allowed; | 341 | struct get_freqs gf; |
321 | 342 | ||
322 | /* only run on CPU to be set, or on its sibling */ | 343 | /* only run on CPU to be set, or on its sibling */ |
323 | #ifdef CONFIG_SMP | 344 | #ifdef CONFIG_SMP |
324 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); | 345 | cpumask_copy(policy->cpus, cpu_sibling_mask(policy->cpu)); |
325 | #endif | 346 | #endif |
326 | 347 | policy_cpu = cpumask_any_and(policy->cpus, cpu_online_mask); | |
327 | cpus_allowed = current->cpus_allowed; | ||
328 | set_cpus_allowed_ptr(current, policy->cpus); | ||
329 | 348 | ||
330 | /* detect low and high frequency and transition latency */ | 349 | /* detect low and high frequency and transition latency */ |
331 | result = speedstep_get_freqs(speedstep_processor, | 350 | gf.policy = policy; |
332 | &speedstep_freqs[SPEEDSTEP_LOW].frequency, | 351 | smp_call_function_single(policy_cpu, get_freqs_on_cpu, &gf, 1); |
333 | &speedstep_freqs[SPEEDSTEP_HIGH].frequency, | 352 | if (gf.ret) |
334 | &policy->cpuinfo.transition_latency, | 353 | return gf.ret; |
335 | &speedstep_set_state); | ||
336 | set_cpus_allowed_ptr(current, &cpus_allowed); | ||
337 | if (result) | ||
338 | return result; | ||
339 | 354 | ||
340 | /* get current speed setting */ | 355 | /* get current speed setting */ |
341 | speed = _speedstep_get(policy->cpus); | 356 | speed = speedstep_get(policy_cpu); |
342 | if (!speed) | 357 | if (!speed) |
343 | return -EIO; | 358 | return -EIO; |
344 | 359 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c index 2e3c6862657b..f4c290b8482f 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-lib.c | |||
@@ -226,6 +226,7 @@ static unsigned int pentium4_get_frequency(void) | |||
226 | } | 226 | } |
227 | 227 | ||
228 | 228 | ||
229 | /* Warning: may get called from smp_call_function_single. */ | ||
229 | unsigned int speedstep_get_frequency(unsigned int processor) | 230 | unsigned int speedstep_get_frequency(unsigned int processor) |
230 | { | 231 | { |
231 | switch (processor) { | 232 | switch (processor) { |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7437fa133c02..3260ab044996 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -86,6 +86,29 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) | |||
86 | */ | 86 | */ |
87 | if (c->x86 == 6 && c->x86_model < 15) | 87 | if (c->x86 == 6 && c->x86_model < 15) |
88 | clear_cpu_cap(c, X86_FEATURE_PAT); | 88 | clear_cpu_cap(c, X86_FEATURE_PAT); |
89 | |||
90 | #ifdef CONFIG_KMEMCHECK | ||
91 | /* | ||
92 | * P4s have a "fast strings" feature which causes single- | ||
93 | * stepping REP instructions to only generate a #DB on | ||
94 | * cache-line boundaries. | ||
95 | * | ||
96 | * Ingo Molnar reported a Pentium D (model 6) and a Xeon | ||
97 | * (model 2) with the same problem. | ||
98 | */ | ||
99 | if (c->x86 == 15) { | ||
100 | u64 misc_enable; | ||
101 | |||
102 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
103 | |||
104 | if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { | ||
105 | printk(KERN_INFO "kmemcheck: Disabling fast string operations\n"); | ||
106 | |||
107 | misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; | ||
108 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
109 | } | ||
110 | } | ||
111 | #endif | ||
89 | } | 112 | } |
90 | 113 | ||
91 | #ifdef CONFIG_X86_32 | 114 | #ifdef CONFIG_X86_32 |
@@ -229,12 +252,12 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | |||
229 | } | 252 | } |
230 | #endif | 253 | #endif |
231 | 254 | ||
232 | static void __cpuinit srat_detect_node(void) | 255 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
233 | { | 256 | { |
234 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 257 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
235 | unsigned node; | 258 | unsigned node; |
236 | int cpu = smp_processor_id(); | 259 | int cpu = smp_processor_id(); |
237 | int apicid = hard_smp_processor_id(); | 260 | int apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; |
238 | 261 | ||
239 | /* Don't do the funky fallback heuristics the AMD version employs | 262 | /* Don't do the funky fallback heuristics the AMD version employs |
240 | for now. */ | 263 | for now. */ |
@@ -400,7 +423,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
400 | } | 423 | } |
401 | 424 | ||
402 | /* Work around errata */ | 425 | /* Work around errata */ |
403 | srat_detect_node(); | 426 | srat_detect_node(c); |
404 | 427 | ||
405 | if (cpu_has(c, X86_FEATURE_VMX)) | 428 | if (cpu_has(c, X86_FEATURE_VMX)) |
406 | detect_vmx_virtcap(c); | 429 | detect_vmx_virtcap(c); |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 483eda96e102..789efe217e1a 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
20 | #include <asm/k8.h> | ||
20 | 21 | ||
21 | #define LVL_1_INST 1 | 22 | #define LVL_1_INST 1 |
22 | #define LVL_1_DATA 2 | 23 | #define LVL_1_DATA 2 |
@@ -159,14 +160,6 @@ struct _cpuid4_info_regs { | |||
159 | unsigned long can_disable; | 160 | unsigned long can_disable; |
160 | }; | 161 | }; |
161 | 162 | ||
162 | #if defined(CONFIG_PCI) && defined(CONFIG_SYSFS) | ||
163 | static struct pci_device_id k8_nb_id[] = { | ||
164 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, | ||
165 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, | ||
166 | {} | ||
167 | }; | ||
168 | #endif | ||
169 | |||
170 | unsigned short num_cache_leaves; | 163 | unsigned short num_cache_leaves; |
171 | 164 | ||
172 | /* AMD doesn't have CPUID4. Emulate it here to report the same | 165 | /* AMD doesn't have CPUID4. Emulate it here to report the same |
@@ -207,10 +200,17 @@ union l3_cache { | |||
207 | }; | 200 | }; |
208 | 201 | ||
209 | static const unsigned short __cpuinitconst assocs[] = { | 202 | static const unsigned short __cpuinitconst assocs[] = { |
210 | [1] = 1, [2] = 2, [4] = 4, [6] = 8, | 203 | [1] = 1, |
211 | [8] = 16, [0xa] = 32, [0xb] = 48, | 204 | [2] = 2, |
205 | [4] = 4, | ||
206 | [6] = 8, | ||
207 | [8] = 16, | ||
208 | [0xa] = 32, | ||
209 | [0xb] = 48, | ||
212 | [0xc] = 64, | 210 | [0xc] = 64, |
213 | [0xf] = 0xffff // ?? | 211 | [0xd] = 96, |
212 | [0xe] = 128, | ||
213 | [0xf] = 0xffff /* fully associative - no way to show this currently */ | ||
214 | }; | 214 | }; |
215 | 215 | ||
216 | static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; | 216 | static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 }; |
@@ -271,7 +271,8 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
271 | eax->split.type = types[leaf]; | 271 | eax->split.type = types[leaf]; |
272 | eax->split.level = levels[leaf]; | 272 | eax->split.level = levels[leaf]; |
273 | if (leaf == 3) | 273 | if (leaf == 3) |
274 | eax->split.num_threads_sharing = current_cpu_data.x86_max_cores - 1; | 274 | eax->split.num_threads_sharing = |
275 | current_cpu_data.x86_max_cores - 1; | ||
275 | else | 276 | else |
276 | eax->split.num_threads_sharing = 0; | 277 | eax->split.num_threads_sharing = 0; |
277 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; | 278 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; |
@@ -291,6 +292,14 @@ amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf) | |||
291 | { | 292 | { |
292 | if (index < 3) | 293 | if (index < 3) |
293 | return; | 294 | return; |
295 | |||
296 | if (boot_cpu_data.x86 == 0x11) | ||
297 | return; | ||
298 | |||
299 | /* see erratum #382 */ | ||
300 | if ((boot_cpu_data.x86 == 0x10) && (boot_cpu_data.x86_model < 0x8)) | ||
301 | return; | ||
302 | |||
294 | this_leaf->can_disable = 1; | 303 | this_leaf->can_disable = 1; |
295 | } | 304 | } |
296 | 305 | ||
@@ -696,97 +705,75 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) | |||
696 | #define to_object(k) container_of(k, struct _index_kobject, kobj) | 705 | #define to_object(k) container_of(k, struct _index_kobject, kobj) |
697 | #define to_attr(a) container_of(a, struct _cache_attr, attr) | 706 | #define to_attr(a) container_of(a, struct _cache_attr, attr) |
698 | 707 | ||
699 | #ifdef CONFIG_PCI | 708 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf, |
700 | static struct pci_dev *get_k8_northbridge(int node) | 709 | unsigned int index) |
701 | { | ||
702 | struct pci_dev *dev = NULL; | ||
703 | int i; | ||
704 | |||
705 | for (i = 0; i <= node; i++) { | ||
706 | do { | ||
707 | dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); | ||
708 | if (!dev) | ||
709 | break; | ||
710 | } while (!pci_match_id(&k8_nb_id[0], dev)); | ||
711 | if (!dev) | ||
712 | break; | ||
713 | } | ||
714 | return dev; | ||
715 | } | ||
716 | #else | ||
717 | static struct pci_dev *get_k8_northbridge(int node) | ||
718 | { | ||
719 | return NULL; | ||
720 | } | ||
721 | #endif | ||
722 | |||
723 | static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) | ||
724 | { | 710 | { |
725 | const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); | 711 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
726 | int node = cpu_to_node(cpumask_first(mask)); | 712 | int node = cpu_to_node(cpu); |
727 | struct pci_dev *dev = NULL; | 713 | struct pci_dev *dev = node_to_k8_nb_misc(node); |
728 | ssize_t ret = 0; | 714 | unsigned int reg = 0; |
729 | int i; | ||
730 | 715 | ||
731 | if (!this_leaf->can_disable) | 716 | if (!this_leaf->can_disable) |
732 | return sprintf(buf, "Feature not enabled\n"); | ||
733 | |||
734 | dev = get_k8_northbridge(node); | ||
735 | if (!dev) { | ||
736 | printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); | ||
737 | return -EINVAL; | 717 | return -EINVAL; |
738 | } | ||
739 | 718 | ||
740 | for (i = 0; i < 2; i++) { | 719 | if (!dev) |
741 | unsigned int reg; | 720 | return -EINVAL; |
742 | 721 | ||
743 | pci_read_config_dword(dev, 0x1BC + i * 4, ®); | 722 | pci_read_config_dword(dev, 0x1BC + index * 4, ®); |
723 | return sprintf(buf, "%x\n", reg); | ||
724 | } | ||
744 | 725 | ||
745 | ret += sprintf(buf, "%sEntry: %d\n", buf, i); | 726 | #define SHOW_CACHE_DISABLE(index) \ |
746 | ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n", | 727 | static ssize_t \ |
747 | buf, | 728 | show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \ |
748 | reg & 0x80000000 ? "Disabled" : "Allowed", | 729 | { \ |
749 | reg & 0x40000000 ? "Disabled" : "Allowed"); | 730 | return show_cache_disable(this_leaf, buf, index); \ |
750 | ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n", | ||
751 | buf, (reg & 0x30000) >> 16, reg & 0xfff); | ||
752 | } | ||
753 | return ret; | ||
754 | } | 731 | } |
732 | SHOW_CACHE_DISABLE(0) | ||
733 | SHOW_CACHE_DISABLE(1) | ||
755 | 734 | ||
756 | static ssize_t | 735 | static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf, |
757 | store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, | 736 | const char *buf, size_t count, unsigned int index) |
758 | size_t count) | ||
759 | { | 737 | { |
760 | const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map); | 738 | int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map)); |
761 | int node = cpu_to_node(cpumask_first(mask)); | 739 | int node = cpu_to_node(cpu); |
762 | struct pci_dev *dev = NULL; | 740 | struct pci_dev *dev = node_to_k8_nb_misc(node); |
763 | unsigned int ret, index, val; | 741 | unsigned long val = 0; |
742 | unsigned int scrubber = 0; | ||
764 | 743 | ||
765 | if (!this_leaf->can_disable) | 744 | if (!this_leaf->can_disable) |
766 | return 0; | ||
767 | |||
768 | if (strlen(buf) > 15) | ||
769 | return -EINVAL; | 745 | return -EINVAL; |
770 | 746 | ||
771 | ret = sscanf(buf, "%x %x", &index, &val); | 747 | if (!capable(CAP_SYS_ADMIN)) |
772 | if (ret != 2) | 748 | return -EPERM; |
749 | |||
750 | if (!dev) | ||
773 | return -EINVAL; | 751 | return -EINVAL; |
774 | if (index > 1) | 752 | |
753 | if (strict_strtoul(buf, 10, &val) < 0) | ||
775 | return -EINVAL; | 754 | return -EINVAL; |
776 | 755 | ||
777 | val |= 0xc0000000; | 756 | val |= 0xc0000000; |
778 | dev = get_k8_northbridge(node); | 757 | |
779 | if (!dev) { | 758 | pci_read_config_dword(dev, 0x58, &scrubber); |
780 | printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n"); | 759 | scrubber &= ~0x1f000000; |
781 | return -EINVAL; | 760 | pci_write_config_dword(dev, 0x58, scrubber); |
782 | } | ||
783 | 761 | ||
784 | pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); | 762 | pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000); |
785 | wbinvd(); | 763 | wbinvd(); |
786 | pci_write_config_dword(dev, 0x1BC + index * 4, val); | 764 | pci_write_config_dword(dev, 0x1BC + index * 4, val); |
765 | return count; | ||
766 | } | ||
787 | 767 | ||
788 | return 1; | 768 | #define STORE_CACHE_DISABLE(index) \ |
769 | static ssize_t \ | ||
770 | store_cache_disable_##index(struct _cpuid4_info *this_leaf, \ | ||
771 | const char *buf, size_t count) \ | ||
772 | { \ | ||
773 | return store_cache_disable(this_leaf, buf, count, index); \ | ||
789 | } | 774 | } |
775 | STORE_CACHE_DISABLE(0) | ||
776 | STORE_CACHE_DISABLE(1) | ||
790 | 777 | ||
791 | struct _cache_attr { | 778 | struct _cache_attr { |
792 | struct attribute attr; | 779 | struct attribute attr; |
@@ -808,7 +795,10 @@ define_one_ro(size); | |||
808 | define_one_ro(shared_cpu_map); | 795 | define_one_ro(shared_cpu_map); |
809 | define_one_ro(shared_cpu_list); | 796 | define_one_ro(shared_cpu_list); |
810 | 797 | ||
811 | static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable); | 798 | static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, |
799 | show_cache_disable_0, store_cache_disable_0); | ||
800 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | ||
801 | show_cache_disable_1, store_cache_disable_1); | ||
812 | 802 | ||
813 | static struct attribute * default_attrs[] = { | 803 | static struct attribute * default_attrs[] = { |
814 | &type.attr, | 804 | &type.attr, |
@@ -820,7 +810,8 @@ static struct attribute * default_attrs[] = { | |||
820 | &size.attr, | 810 | &size.attr, |
821 | &shared_cpu_map.attr, | 811 | &shared_cpu_map.attr, |
822 | &shared_cpu_list.attr, | 812 | &shared_cpu_list.attr, |
823 | &cache_disable.attr, | 813 | &cache_disable_0.attr, |
814 | &cache_disable_1.attr, | ||
824 | NULL | 815 | NULL |
825 | }; | 816 | }; |
826 | 817 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index b2f89829bbe8..188a1ca5ad2b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,7 +1,12 @@ | |||
1 | obj-y = mce_$(BITS).o therm_throt.o | 1 | obj-y = mce.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_32) += k7.o p4.o p5.o p6.o winchip.o | 3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o |
4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o | 4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o |
5 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o | 5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
6 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | ||
7 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | ||
6 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | 8 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o |
7 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 9 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
10 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | ||
11 | |||
12 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index dd3af6e7b39a..b945d5dbc609 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
@@ -2,25 +2,23 @@ | |||
2 | * Athlon specific Machine Check Exception Reporting | 2 | * Athlon specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> | 3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> |
4 | */ | 4 | */ |
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
11 | 10 | ||
12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | 15 | ||
16 | #include "mce.h" | 16 | /* Machine Check Handler For AMD Athlon/Duron: */ |
17 | |||
18 | /* Machine Check Handler For AMD Athlon/Duron */ | ||
19 | static void k7_machine_check(struct pt_regs *regs, long error_code) | 17 | static void k7_machine_check(struct pt_regs *regs, long error_code) |
20 | { | 18 | { |
21 | int recover = 1; | ||
22 | u32 alow, ahigh, high, low; | 19 | u32 alow, ahigh, high, low; |
23 | u32 mcgstl, mcgsth; | 20 | u32 mcgstl, mcgsth; |
21 | int recover = 1; | ||
24 | int i; | 22 | int i; |
25 | 23 | ||
26 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
@@ -32,15 +30,19 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) | |||
32 | 30 | ||
33 | for (i = 1; i < nr_mce_banks; i++) { | 31 | for (i = 1; i < nr_mce_banks; i++) { |
34 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | 32 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
35 | if (high&(1<<31)) { | 33 | if (high & (1<<31)) { |
36 | char misc[20]; | 34 | char misc[20]; |
37 | char addr[24]; | 35 | char addr[24]; |
38 | misc[0] = addr[0] = '\0'; | 36 | |
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
39 | if (high & (1<<29)) | 40 | if (high & (1<<29)) |
40 | recover |= 1; | 41 | recover |= 1; |
41 | if (high & (1<<25)) | 42 | if (high & (1<<25)) |
42 | recover |= 2; | 43 | recover |= 2; |
43 | high &= ~(1<<31); | 44 | high &= ~(1<<31); |
45 | |||
44 | if (high & (1<<27)) { | 46 | if (high & (1<<27)) { |
45 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
46 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | 48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
@@ -49,27 +51,31 @@ static void k7_machine_check(struct pt_regs *regs, long error_code) | |||
49 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
50 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | 52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
51 | } | 53 | } |
54 | |||
52 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | 55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
53 | smp_processor_id(), i, high, low, misc, addr); | 56 | smp_processor_id(), i, high, low, misc, addr); |
54 | /* Clear it */ | 57 | |
58 | /* Clear it: */ | ||
55 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | 59 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); |
56 | /* Serialize */ | 60 | /* Serialize: */ |
57 | wmb(); | 61 | wmb(); |
58 | add_taint(TAINT_MACHINE_CHECK); | 62 | add_taint(TAINT_MACHINE_CHECK); |
59 | } | 63 | } |
60 | } | 64 | } |
61 | 65 | ||
62 | if (recover&2) | 66 | if (recover & 2) |
63 | panic("CPU context corrupt"); | 67 | panic("CPU context corrupt"); |
64 | if (recover&1) | 68 | if (recover & 1) |
65 | panic("Unable to continue"); | 69 | panic("Unable to continue"); |
70 | |||
66 | printk(KERN_EMERG "Attempting to continue.\n"); | 71 | printk(KERN_EMERG "Attempting to continue.\n"); |
72 | |||
67 | mcgstl &= ~(1<<2); | 73 | mcgstl &= ~(1<<2); |
68 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 74 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
69 | } | 75 | } |
70 | 76 | ||
71 | 77 | ||
72 | /* AMD K7 machine check is Intel like */ | 78 | /* AMD K7 machine check is Intel like: */ |
73 | void amd_mcheck_init(struct cpuinfo_x86 *c) | 79 | void amd_mcheck_init(struct cpuinfo_x86 *c) |
74 | { | 80 | { |
75 | u32 l, h; | 81 | u32 l, h; |
@@ -79,21 +85,26 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) | |||
79 | return; | 85 | return; |
80 | 86 | ||
81 | machine_check_vector = k7_machine_check; | 87 | machine_check_vector = k7_machine_check; |
88 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
82 | wmb(); | 89 | wmb(); |
83 | 90 | ||
84 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | 91 | printk(KERN_INFO "Intel machine check architecture supported.\n"); |
92 | |||
85 | rdmsr(MSR_IA32_MCG_CAP, l, h); | 93 | rdmsr(MSR_IA32_MCG_CAP, l, h); |
86 | if (l & (1<<8)) /* Control register present ? */ | 94 | if (l & (1<<8)) /* Control register present ? */ |
87 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 95 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
88 | nr_mce_banks = l & 0xff; | 96 | nr_mce_banks = l & 0xff; |
89 | 97 | ||
90 | /* Clear status for MC index 0 separately, we don't touch CTL, | 98 | /* |
91 | * as some K7 Athlons cause spurious MCEs when its enabled. */ | 99 | * Clear status for MC index 0 separately, we don't touch CTL, |
100 | * as some K7 Athlons cause spurious MCEs when its enabled: | ||
101 | */ | ||
92 | if (boot_cpu_data.x86 == 6) { | 102 | if (boot_cpu_data.x86 == 6) { |
93 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); | 103 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); |
94 | i = 1; | 104 | i = 1; |
95 | } else | 105 | } else |
96 | i = 0; | 106 | i = 0; |
107 | |||
97 | for (; i < nr_mce_banks; i++) { | 108 | for (; i < nr_mce_banks; i++) { |
98 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | 109 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); |
99 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | 110 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c new file mode 100644 index 000000000000..a3a235a53f09 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -0,0 +1,127 @@ | |||
1 | /* | ||
2 | * Machine check injection support. | ||
3 | * Copyright 2008 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | * | ||
10 | * Authors: | ||
11 | * Andi Kleen | ||
12 | * Ying Huang | ||
13 | */ | ||
14 | #include <linux/uaccess.h> | ||
15 | #include <linux/module.h> | ||
16 | #include <linux/timer.h> | ||
17 | #include <linux/kernel.h> | ||
18 | #include <linux/string.h> | ||
19 | #include <linux/fs.h> | ||
20 | #include <linux/smp.h> | ||
21 | #include <asm/mce.h> | ||
22 | |||
23 | /* Update fake mce registers on current CPU. */ | ||
24 | static void inject_mce(struct mce *m) | ||
25 | { | ||
26 | struct mce *i = &per_cpu(injectm, m->extcpu); | ||
27 | |||
28 | /* Make sure noone reads partially written injectm */ | ||
29 | i->finished = 0; | ||
30 | mb(); | ||
31 | m->finished = 0; | ||
32 | /* First set the fields after finished */ | ||
33 | i->extcpu = m->extcpu; | ||
34 | mb(); | ||
35 | /* Now write record in order, finished last (except above) */ | ||
36 | memcpy(i, m, sizeof(struct mce)); | ||
37 | /* Finally activate it */ | ||
38 | mb(); | ||
39 | i->finished = 1; | ||
40 | } | ||
41 | |||
42 | struct delayed_mce { | ||
43 | struct timer_list timer; | ||
44 | struct mce m; | ||
45 | }; | ||
46 | |||
47 | /* Inject mce on current CPU */ | ||
48 | static void raise_mce(unsigned long data) | ||
49 | { | ||
50 | struct delayed_mce *dm = (struct delayed_mce *)data; | ||
51 | struct mce *m = &dm->m; | ||
52 | int cpu = m->extcpu; | ||
53 | |||
54 | inject_mce(m); | ||
55 | if (m->status & MCI_STATUS_UC) { | ||
56 | struct pt_regs regs; | ||
57 | memset(®s, 0, sizeof(struct pt_regs)); | ||
58 | regs.ip = m->ip; | ||
59 | regs.cs = m->cs; | ||
60 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); | ||
61 | do_machine_check(®s, 0); | ||
62 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); | ||
63 | } else { | ||
64 | mce_banks_t b; | ||
65 | memset(&b, 0xff, sizeof(mce_banks_t)); | ||
66 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); | ||
67 | machine_check_poll(0, &b); | ||
68 | mce_notify_irq(); | ||
69 | printk(KERN_INFO "Finished machine check poll on CPU %d\n", | ||
70 | cpu); | ||
71 | } | ||
72 | kfree(dm); | ||
73 | } | ||
74 | |||
75 | /* Error injection interface */ | ||
76 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, | ||
77 | size_t usize, loff_t *off) | ||
78 | { | ||
79 | struct delayed_mce *dm; | ||
80 | struct mce m; | ||
81 | |||
82 | if (!capable(CAP_SYS_ADMIN)) | ||
83 | return -EPERM; | ||
84 | /* | ||
85 | * There are some cases where real MSR reads could slip | ||
86 | * through. | ||
87 | */ | ||
88 | if (!boot_cpu_has(X86_FEATURE_MCE) || !boot_cpu_has(X86_FEATURE_MCA)) | ||
89 | return -EIO; | ||
90 | |||
91 | if ((unsigned long)usize > sizeof(struct mce)) | ||
92 | usize = sizeof(struct mce); | ||
93 | if (copy_from_user(&m, ubuf, usize)) | ||
94 | return -EFAULT; | ||
95 | |||
96 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) | ||
97 | return -EINVAL; | ||
98 | |||
99 | dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); | ||
100 | if (!dm) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | /* | ||
104 | * Need to give user space some time to set everything up, | ||
105 | * so do it a jiffie or two later everywhere. | ||
106 | * Should we use a hrtimer here for better synchronization? | ||
107 | */ | ||
108 | memcpy(&dm->m, &m, sizeof(struct mce)); | ||
109 | setup_timer(&dm->timer, raise_mce, (unsigned long)dm); | ||
110 | dm->timer.expires = jiffies + 2; | ||
111 | add_timer_on(&dm->timer, m.extcpu); | ||
112 | return usize; | ||
113 | } | ||
114 | |||
115 | static int inject_init(void) | ||
116 | { | ||
117 | printk(KERN_INFO "Machine check injector initialized\n"); | ||
118 | mce_chrdev_ops.write = mce_write; | ||
119 | return 0; | ||
120 | } | ||
121 | |||
122 | module_init(inject_init); | ||
123 | /* | ||
124 | * Cannot tolerate unloading currently because we cannot | ||
125 | * guarantee all openers of mce_chrdev will get a reference to us. | ||
126 | */ | ||
127 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h new file mode 100644 index 000000000000..54dcb8ff12e5 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #include <asm/mce.h> | ||
2 | |||
3 | enum severity_level { | ||
4 | MCE_NO_SEVERITY, | ||
5 | MCE_KEEP_SEVERITY, | ||
6 | MCE_SOME_SEVERITY, | ||
7 | MCE_AO_SEVERITY, | ||
8 | MCE_UC_SEVERITY, | ||
9 | MCE_AR_SEVERITY, | ||
10 | MCE_PANIC_SEVERITY, | ||
11 | }; | ||
12 | |||
13 | int mce_severity(struct mce *a, int tolerant, char **msg); | ||
14 | |||
15 | extern int mce_ser; | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c new file mode 100644 index 000000000000..ff0807f97056 --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -0,0 +1,218 @@ | |||
1 | /* | ||
2 | * MCE grading rules. | ||
3 | * Copyright 2008, 2009 Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License | ||
7 | * as published by the Free Software Foundation; version 2 | ||
8 | * of the License. | ||
9 | * | ||
10 | * Author: Andi Kleen | ||
11 | */ | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/seq_file.h> | ||
14 | #include <linux/init.h> | ||
15 | #include <linux/debugfs.h> | ||
16 | #include <asm/mce.h> | ||
17 | |||
18 | #include "mce-internal.h" | ||
19 | |||
20 | /* | ||
21 | * Grade an mce by severity. In general the most severe ones are processed | ||
22 | * first. Since there are quite a lot of combinations test the bits in a | ||
23 | * table-driven way. The rules are simply processed in order, first | ||
24 | * match wins. | ||
25 | * | ||
26 | * Note this is only used for machine check exceptions, the corrected | ||
27 | * errors use much simpler rules. The exceptions still check for the corrected | ||
28 | * errors, but only to leave them alone for the CMCI handler (except for | ||
29 | * panic situations) | ||
30 | */ | ||
31 | |||
32 | enum context { IN_KERNEL = 1, IN_USER = 2 }; | ||
33 | enum ser { SER_REQUIRED = 1, NO_SER = 2 }; | ||
34 | |||
35 | static struct severity { | ||
36 | u64 mask; | ||
37 | u64 result; | ||
38 | unsigned char sev; | ||
39 | unsigned char mcgmask; | ||
40 | unsigned char mcgres; | ||
41 | unsigned char ser; | ||
42 | unsigned char context; | ||
43 | unsigned char covered; | ||
44 | char *msg; | ||
45 | } severities[] = { | ||
46 | #define KERNEL .context = IN_KERNEL | ||
47 | #define USER .context = IN_USER | ||
48 | #define SER .ser = SER_REQUIRED | ||
49 | #define NOSER .ser = NO_SER | ||
50 | #define SEV(s) .sev = MCE_ ## s ## _SEVERITY | ||
51 | #define BITCLR(x, s, m, r...) { .mask = x, .result = 0, SEV(s), .msg = m, ## r } | ||
52 | #define BITSET(x, s, m, r...) { .mask = x, .result = x, SEV(s), .msg = m, ## r } | ||
53 | #define MCGMASK(x, res, s, m, r...) \ | ||
54 | { .mcgmask = x, .mcgres = res, SEV(s), .msg = m, ## r } | ||
55 | #define MASK(x, y, s, m, r...) \ | ||
56 | { .mask = x, .result = y, SEV(s), .msg = m, ## r } | ||
57 | #define MCI_UC_S (MCI_STATUS_UC|MCI_STATUS_S) | ||
58 | #define MCI_UC_SAR (MCI_STATUS_UC|MCI_STATUS_S|MCI_STATUS_AR) | ||
59 | #define MCACOD 0xffff | ||
60 | |||
61 | BITCLR(MCI_STATUS_VAL, NO, "Invalid"), | ||
62 | BITCLR(MCI_STATUS_EN, NO, "Not enabled"), | ||
63 | BITSET(MCI_STATUS_PCC, PANIC, "Processor context corrupt"), | ||
64 | /* When MCIP is not set something is very confused */ | ||
65 | MCGMASK(MCG_STATUS_MCIP, 0, PANIC, "MCIP not set in MCA handler"), | ||
66 | /* Neither return not error IP -- no chance to recover -> PANIC */ | ||
67 | MCGMASK(MCG_STATUS_RIPV|MCG_STATUS_EIPV, 0, PANIC, | ||
68 | "Neither restart nor error IP"), | ||
69 | MCGMASK(MCG_STATUS_RIPV, 0, PANIC, "In kernel and no restart IP", | ||
70 | KERNEL), | ||
71 | BITCLR(MCI_STATUS_UC, KEEP, "Corrected error", NOSER), | ||
72 | MASK(MCI_STATUS_OVER|MCI_STATUS_UC|MCI_STATUS_EN, MCI_STATUS_UC, SOME, | ||
73 | "Spurious not enabled", SER), | ||
74 | |||
75 | /* ignore OVER for UCNA */ | ||
76 | MASK(MCI_UC_SAR, MCI_STATUS_UC, KEEP, | ||
77 | "Uncorrected no action required", SER), | ||
78 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_UC|MCI_STATUS_AR, PANIC, | ||
79 | "Illegal combination (UCNA with AR=1)", SER), | ||
80 | MASK(MCI_STATUS_S, 0, KEEP, "Non signalled machine check", SER), | ||
81 | |||
82 | /* AR add known MCACODs here */ | ||
83 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_STATUS_OVER|MCI_UC_SAR, PANIC, | ||
84 | "Action required with lost events", SER), | ||
85 | MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCACOD, MCI_UC_SAR, PANIC, | ||
86 | "Action required; unknown MCACOD", SER), | ||
87 | |||
88 | /* known AO MCACODs: */ | ||
89 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|0xfff0, MCI_UC_S|0xc0, AO, | ||
90 | "Action optional: memory scrubbing error", SER), | ||
91 | MASK(MCI_UC_SAR|MCI_STATUS_OVER|MCACOD, MCI_UC_S|0x17a, AO, | ||
92 | "Action optional: last level cache writeback error", SER), | ||
93 | |||
94 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S, SOME, | ||
95 | "Action optional unknown MCACOD", SER), | ||
96 | MASK(MCI_STATUS_OVER|MCI_UC_SAR, MCI_UC_S|MCI_STATUS_OVER, SOME, | ||
97 | "Action optional with lost events", SER), | ||
98 | BITSET(MCI_STATUS_UC|MCI_STATUS_OVER, PANIC, "Overflowed uncorrected"), | ||
99 | BITSET(MCI_STATUS_UC, UC, "Uncorrected"), | ||
100 | BITSET(0, SOME, "No match") /* always matches. keep at end */ | ||
101 | }; | ||
102 | |||
103 | /* | ||
104 | * If the EIPV bit is set, it means the saved IP is the | ||
105 | * instruction which caused the MCE. | ||
106 | */ | ||
107 | static int error_context(struct mce *m) | ||
108 | { | ||
109 | if (m->mcgstatus & MCG_STATUS_EIPV) | ||
110 | return (m->ip && (m->cs & 3) == 3) ? IN_USER : IN_KERNEL; | ||
111 | /* Unknown, assume kernel */ | ||
112 | return IN_KERNEL; | ||
113 | } | ||
114 | |||
115 | int mce_severity(struct mce *a, int tolerant, char **msg) | ||
116 | { | ||
117 | enum context ctx = error_context(a); | ||
118 | struct severity *s; | ||
119 | |||
120 | for (s = severities;; s++) { | ||
121 | if ((a->status & s->mask) != s->result) | ||
122 | continue; | ||
123 | if ((a->mcgstatus & s->mcgmask) != s->mcgres) | ||
124 | continue; | ||
125 | if (s->ser == SER_REQUIRED && !mce_ser) | ||
126 | continue; | ||
127 | if (s->ser == NO_SER && mce_ser) | ||
128 | continue; | ||
129 | if (s->context && ctx != s->context) | ||
130 | continue; | ||
131 | if (msg) | ||
132 | *msg = s->msg; | ||
133 | s->covered = 1; | ||
134 | if (s->sev >= MCE_UC_SEVERITY && ctx == IN_KERNEL) { | ||
135 | if (panic_on_oops || tolerant < 1) | ||
136 | return MCE_PANIC_SEVERITY; | ||
137 | } | ||
138 | return s->sev; | ||
139 | } | ||
140 | } | ||
141 | |||
142 | static void *s_start(struct seq_file *f, loff_t *pos) | ||
143 | { | ||
144 | if (*pos >= ARRAY_SIZE(severities)) | ||
145 | return NULL; | ||
146 | return &severities[*pos]; | ||
147 | } | ||
148 | |||
149 | static void *s_next(struct seq_file *f, void *data, loff_t *pos) | ||
150 | { | ||
151 | if (++(*pos) >= ARRAY_SIZE(severities)) | ||
152 | return NULL; | ||
153 | return &severities[*pos]; | ||
154 | } | ||
155 | |||
156 | static void s_stop(struct seq_file *f, void *data) | ||
157 | { | ||
158 | } | ||
159 | |||
160 | static int s_show(struct seq_file *f, void *data) | ||
161 | { | ||
162 | struct severity *ser = data; | ||
163 | seq_printf(f, "%d\t%s\n", ser->covered, ser->msg); | ||
164 | return 0; | ||
165 | } | ||
166 | |||
167 | static const struct seq_operations severities_seq_ops = { | ||
168 | .start = s_start, | ||
169 | .next = s_next, | ||
170 | .stop = s_stop, | ||
171 | .show = s_show, | ||
172 | }; | ||
173 | |||
174 | static int severities_coverage_open(struct inode *inode, struct file *file) | ||
175 | { | ||
176 | return seq_open(file, &severities_seq_ops); | ||
177 | } | ||
178 | |||
179 | static ssize_t severities_coverage_write(struct file *file, | ||
180 | const char __user *ubuf, | ||
181 | size_t count, loff_t *ppos) | ||
182 | { | ||
183 | int i; | ||
184 | for (i = 0; i < ARRAY_SIZE(severities); i++) | ||
185 | severities[i].covered = 0; | ||
186 | return count; | ||
187 | } | ||
188 | |||
189 | static const struct file_operations severities_coverage_fops = { | ||
190 | .open = severities_coverage_open, | ||
191 | .release = seq_release, | ||
192 | .read = seq_read, | ||
193 | .write = severities_coverage_write, | ||
194 | }; | ||
195 | |||
196 | static int __init severities_debugfs_init(void) | ||
197 | { | ||
198 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; | ||
199 | |||
200 | dmce = debugfs_create_dir("mce", NULL); | ||
201 | if (dmce == NULL) | ||
202 | goto err_out; | ||
203 | fseverities_coverage = debugfs_create_file("severities-coverage", | ||
204 | 0444, dmce, NULL, | ||
205 | &severities_coverage_fops); | ||
206 | if (fseverities_coverage == NULL) | ||
207 | goto err_out; | ||
208 | |||
209 | return 0; | ||
210 | |||
211 | err_out: | ||
212 | if (fseverities_coverage) | ||
213 | debugfs_remove(fseverities_coverage); | ||
214 | if (dmce) | ||
215 | debugfs_remove(dmce); | ||
216 | return -ENOMEM; | ||
217 | } | ||
218 | late_initcall(severities_debugfs_init); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c new file mode 100644 index 000000000000..284d1de968bc --- /dev/null +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -0,0 +1,2049 @@ | |||
1 | /* | ||
2 | * Machine check handler. | ||
3 | * | ||
4 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
5 | * Rest from unknown author(s). | ||
6 | * 2004 Andi Kleen. Rewrote most of it. | ||
7 | * Copyright 2008 Intel Corporation | ||
8 | * Author: Andi Kleen | ||
9 | */ | ||
10 | #include <linux/thread_info.h> | ||
11 | #include <linux/capability.h> | ||
12 | #include <linux/miscdevice.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/ratelimit.h> | ||
15 | #include <linux/kallsyms.h> | ||
16 | #include <linux/rcupdate.h> | ||
17 | #include <linux/kobject.h> | ||
18 | #include <linux/uaccess.h> | ||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/percpu.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/sysdev.h> | ||
24 | #include <linux/delay.h> | ||
25 | #include <linux/ctype.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/sysfs.h> | ||
28 | #include <linux/types.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/kmod.h> | ||
31 | #include <linux/poll.h> | ||
32 | #include <linux/nmi.h> | ||
33 | #include <linux/cpu.h> | ||
34 | #include <linux/smp.h> | ||
35 | #include <linux/fs.h> | ||
36 | #include <linux/mm.h> | ||
37 | |||
38 | #include <asm/processor.h> | ||
39 | #include <asm/hw_irq.h> | ||
40 | #include <asm/apic.h> | ||
41 | #include <asm/idle.h> | ||
42 | #include <asm/ipi.h> | ||
43 | #include <asm/mce.h> | ||
44 | #include <asm/msr.h> | ||
45 | |||
46 | #include "mce-internal.h" | ||
47 | |||
48 | /* Handle unconfigured int18 (should never happen) */ | ||
49 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
50 | { | ||
51 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
52 | smp_processor_id()); | ||
53 | } | ||
54 | |||
55 | /* Call the installed machine check handler for this CPU setup. */ | ||
56 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
57 | unexpected_machine_check; | ||
58 | |||
59 | int mce_disabled __read_mostly; | ||
60 | |||
61 | #ifdef CONFIG_X86_NEW_MCE | ||
62 | |||
63 | #define MISC_MCELOG_MINOR 227 | ||
64 | |||
65 | #define SPINUNIT 100 /* 100ns */ | ||
66 | |||
67 | atomic_t mce_entry; | ||
68 | |||
69 | DEFINE_PER_CPU(unsigned, mce_exception_count); | ||
70 | |||
71 | /* | ||
72 | * Tolerant levels: | ||
73 | * 0: always panic on uncorrected errors, log corrected errors | ||
74 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | ||
75 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | ||
76 | * 3: never panic or SIGBUS, log all errors (for testing only) | ||
77 | */ | ||
78 | static int tolerant __read_mostly = 1; | ||
79 | static int banks __read_mostly; | ||
80 | static u64 *bank __read_mostly; | ||
81 | static int rip_msr __read_mostly; | ||
82 | static int mce_bootlog __read_mostly = -1; | ||
83 | static int monarch_timeout __read_mostly = -1; | ||
84 | static int mce_panic_timeout __read_mostly; | ||
85 | static int mce_dont_log_ce __read_mostly; | ||
86 | int mce_cmci_disabled __read_mostly; | ||
87 | int mce_ignore_ce __read_mostly; | ||
88 | int mce_ser __read_mostly; | ||
89 | |||
90 | /* User mode helper program triggered by machine check event */ | ||
91 | static unsigned long mce_need_notify; | ||
92 | static char mce_helper[128]; | ||
93 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | ||
94 | |||
95 | static unsigned long dont_init_banks; | ||
96 | |||
97 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | ||
98 | static DEFINE_PER_CPU(struct mce, mces_seen); | ||
99 | static int cpu_missing; | ||
100 | |||
101 | |||
102 | /* MCA banks polled by the period polling timer for corrected events */ | ||
103 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | ||
104 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | ||
105 | }; | ||
106 | |||
107 | static inline int skip_bank_init(int i) | ||
108 | { | ||
109 | return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); | ||
110 | } | ||
111 | |||
112 | static DEFINE_PER_CPU(struct work_struct, mce_work); | ||
113 | |||
114 | /* Do initial initialization of a struct mce */ | ||
115 | void mce_setup(struct mce *m) | ||
116 | { | ||
117 | memset(m, 0, sizeof(struct mce)); | ||
118 | m->cpu = m->extcpu = smp_processor_id(); | ||
119 | rdtscll(m->tsc); | ||
120 | /* We hope get_seconds stays lockless */ | ||
121 | m->time = get_seconds(); | ||
122 | m->cpuvendor = boot_cpu_data.x86_vendor; | ||
123 | m->cpuid = cpuid_eax(1); | ||
124 | #ifdef CONFIG_SMP | ||
125 | m->socketid = cpu_data(m->extcpu).phys_proc_id; | ||
126 | #endif | ||
127 | m->apicid = cpu_data(m->extcpu).initial_apicid; | ||
128 | rdmsrl(MSR_IA32_MCG_CAP, m->mcgcap); | ||
129 | } | ||
130 | |||
131 | DEFINE_PER_CPU(struct mce, injectm); | ||
132 | EXPORT_PER_CPU_SYMBOL_GPL(injectm); | ||
133 | |||
134 | /* | ||
135 | * Lockless MCE logging infrastructure. | ||
136 | * This avoids deadlocks on printk locks without having to break locks. Also | ||
137 | * separate MCEs from kernel messages to avoid bogus bug reports. | ||
138 | */ | ||
139 | |||
140 | static struct mce_log mcelog = { | ||
141 | .signature = MCE_LOG_SIGNATURE, | ||
142 | .len = MCE_LOG_LEN, | ||
143 | .recordlen = sizeof(struct mce), | ||
144 | }; | ||
145 | |||
146 | void mce_log(struct mce *mce) | ||
147 | { | ||
148 | unsigned next, entry; | ||
149 | |||
150 | mce->finished = 0; | ||
151 | wmb(); | ||
152 | for (;;) { | ||
153 | entry = rcu_dereference(mcelog.next); | ||
154 | for (;;) { | ||
155 | /* | ||
156 | * When the buffer fills up discard new entries. | ||
157 | * Assume that the earlier errors are the more | ||
158 | * interesting ones: | ||
159 | */ | ||
160 | if (entry >= MCE_LOG_LEN) { | ||
161 | set_bit(MCE_OVERFLOW, | ||
162 | (unsigned long *)&mcelog.flags); | ||
163 | return; | ||
164 | } | ||
165 | /* Old left over entry. Skip: */ | ||
166 | if (mcelog.entry[entry].finished) { | ||
167 | entry++; | ||
168 | continue; | ||
169 | } | ||
170 | break; | ||
171 | } | ||
172 | smp_rmb(); | ||
173 | next = entry + 1; | ||
174 | if (cmpxchg(&mcelog.next, entry, next) == entry) | ||
175 | break; | ||
176 | } | ||
177 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | ||
178 | wmb(); | ||
179 | mcelog.entry[entry].finished = 1; | ||
180 | wmb(); | ||
181 | |||
182 | mce->finished = 1; | ||
183 | set_bit(0, &mce_need_notify); | ||
184 | } | ||
185 | |||
186 | static void print_mce(struct mce *m) | ||
187 | { | ||
188 | printk(KERN_EMERG | ||
189 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
190 | m->extcpu, m->mcgstatus, m->bank, m->status); | ||
191 | if (m->ip) { | ||
192 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | ||
193 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | ||
194 | m->cs, m->ip); | ||
195 | if (m->cs == __KERNEL_CS) | ||
196 | print_symbol("{%s}", m->ip); | ||
197 | printk("\n"); | ||
198 | } | ||
199 | printk(KERN_EMERG "TSC %llx ", m->tsc); | ||
200 | if (m->addr) | ||
201 | printk("ADDR %llx ", m->addr); | ||
202 | if (m->misc) | ||
203 | printk("MISC %llx ", m->misc); | ||
204 | printk("\n"); | ||
205 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | ||
206 | m->cpuvendor, m->cpuid, m->time, m->socketid, | ||
207 | m->apicid); | ||
208 | } | ||
209 | |||
210 | static void print_mce_head(void) | ||
211 | { | ||
212 | printk(KERN_EMERG "\n" KERN_EMERG "HARDWARE ERROR\n"); | ||
213 | } | ||
214 | |||
215 | static void print_mce_tail(void) | ||
216 | { | ||
217 | printk(KERN_EMERG "This is not a software problem!\n" | ||
218 | KERN_EMERG "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | ||
219 | } | ||
220 | |||
221 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | ||
222 | |||
223 | static atomic_t mce_paniced; | ||
224 | |||
225 | /* Panic in progress. Enable interrupts and wait for final IPI */ | ||
226 | static void wait_for_panic(void) | ||
227 | { | ||
228 | long timeout = PANIC_TIMEOUT*USEC_PER_SEC; | ||
229 | preempt_disable(); | ||
230 | local_irq_enable(); | ||
231 | while (timeout-- > 0) | ||
232 | udelay(1); | ||
233 | if (panic_timeout == 0) | ||
234 | panic_timeout = mce_panic_timeout; | ||
235 | panic("Panicing machine check CPU died"); | ||
236 | } | ||
237 | |||
238 | static void mce_panic(char *msg, struct mce *final, char *exp) | ||
239 | { | ||
240 | int i; | ||
241 | |||
242 | /* | ||
243 | * Make sure only one CPU runs in machine check panic | ||
244 | */ | ||
245 | if (atomic_add_return(1, &mce_paniced) > 1) | ||
246 | wait_for_panic(); | ||
247 | barrier(); | ||
248 | |||
249 | bust_spinlocks(1); | ||
250 | console_verbose(); | ||
251 | print_mce_head(); | ||
252 | /* First print corrected ones that are still unlogged */ | ||
253 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
254 | struct mce *m = &mcelog.entry[i]; | ||
255 | if (!(m->status & MCI_STATUS_VAL)) | ||
256 | continue; | ||
257 | if (!(m->status & MCI_STATUS_UC)) | ||
258 | print_mce(m); | ||
259 | } | ||
260 | /* Now print uncorrected but with the final one last */ | ||
261 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
262 | struct mce *m = &mcelog.entry[i]; | ||
263 | if (!(m->status & MCI_STATUS_VAL)) | ||
264 | continue; | ||
265 | if (!(m->status & MCI_STATUS_UC)) | ||
266 | continue; | ||
267 | if (!final || memcmp(m, final, sizeof(struct mce))) | ||
268 | print_mce(m); | ||
269 | } | ||
270 | if (final) | ||
271 | print_mce(final); | ||
272 | if (cpu_missing) | ||
273 | printk(KERN_EMERG "Some CPUs didn't answer in synchronization\n"); | ||
274 | print_mce_tail(); | ||
275 | if (exp) | ||
276 | printk(KERN_EMERG "Machine check: %s\n", exp); | ||
277 | if (panic_timeout == 0) | ||
278 | panic_timeout = mce_panic_timeout; | ||
279 | panic(msg); | ||
280 | } | ||
281 | |||
282 | /* Support code for software error injection */ | ||
283 | |||
284 | static int msr_to_offset(u32 msr) | ||
285 | { | ||
286 | unsigned bank = __get_cpu_var(injectm.bank); | ||
287 | if (msr == rip_msr) | ||
288 | return offsetof(struct mce, ip); | ||
289 | if (msr == MSR_IA32_MC0_STATUS + bank*4) | ||
290 | return offsetof(struct mce, status); | ||
291 | if (msr == MSR_IA32_MC0_ADDR + bank*4) | ||
292 | return offsetof(struct mce, addr); | ||
293 | if (msr == MSR_IA32_MC0_MISC + bank*4) | ||
294 | return offsetof(struct mce, misc); | ||
295 | if (msr == MSR_IA32_MCG_STATUS) | ||
296 | return offsetof(struct mce, mcgstatus); | ||
297 | return -1; | ||
298 | } | ||
299 | |||
300 | /* MSR access wrappers used for error injection */ | ||
301 | static u64 mce_rdmsrl(u32 msr) | ||
302 | { | ||
303 | u64 v; | ||
304 | if (__get_cpu_var(injectm).finished) { | ||
305 | int offset = msr_to_offset(msr); | ||
306 | if (offset < 0) | ||
307 | return 0; | ||
308 | return *(u64 *)((char *)&__get_cpu_var(injectm) + offset); | ||
309 | } | ||
310 | rdmsrl(msr, v); | ||
311 | return v; | ||
312 | } | ||
313 | |||
314 | static void mce_wrmsrl(u32 msr, u64 v) | ||
315 | { | ||
316 | if (__get_cpu_var(injectm).finished) { | ||
317 | int offset = msr_to_offset(msr); | ||
318 | if (offset >= 0) | ||
319 | *(u64 *)((char *)&__get_cpu_var(injectm) + offset) = v; | ||
320 | return; | ||
321 | } | ||
322 | wrmsrl(msr, v); | ||
323 | } | ||
324 | |||
325 | /* | ||
326 | * Simple lockless ring to communicate PFNs from the exception handler with the | ||
327 | * process context work function. This is vastly simplified because there's | ||
328 | * only a single reader and a single writer. | ||
329 | */ | ||
330 | #define MCE_RING_SIZE 16 /* we use one entry less */ | ||
331 | |||
332 | struct mce_ring { | ||
333 | unsigned short start; | ||
334 | unsigned short end; | ||
335 | unsigned long ring[MCE_RING_SIZE]; | ||
336 | }; | ||
337 | static DEFINE_PER_CPU(struct mce_ring, mce_ring); | ||
338 | |||
339 | /* Runs with CPU affinity in workqueue */ | ||
340 | static int mce_ring_empty(void) | ||
341 | { | ||
342 | struct mce_ring *r = &__get_cpu_var(mce_ring); | ||
343 | |||
344 | return r->start == r->end; | ||
345 | } | ||
346 | |||
347 | static int mce_ring_get(unsigned long *pfn) | ||
348 | { | ||
349 | struct mce_ring *r; | ||
350 | int ret = 0; | ||
351 | |||
352 | *pfn = 0; | ||
353 | get_cpu(); | ||
354 | r = &__get_cpu_var(mce_ring); | ||
355 | if (r->start == r->end) | ||
356 | goto out; | ||
357 | *pfn = r->ring[r->start]; | ||
358 | r->start = (r->start + 1) % MCE_RING_SIZE; | ||
359 | ret = 1; | ||
360 | out: | ||
361 | put_cpu(); | ||
362 | return ret; | ||
363 | } | ||
364 | |||
365 | /* Always runs in MCE context with preempt off */ | ||
366 | static int mce_ring_add(unsigned long pfn) | ||
367 | { | ||
368 | struct mce_ring *r = &__get_cpu_var(mce_ring); | ||
369 | unsigned next; | ||
370 | |||
371 | next = (r->end + 1) % MCE_RING_SIZE; | ||
372 | if (next == r->start) | ||
373 | return -1; | ||
374 | r->ring[r->end] = pfn; | ||
375 | wmb(); | ||
376 | r->end = next; | ||
377 | return 0; | ||
378 | } | ||
379 | |||
380 | int mce_available(struct cpuinfo_x86 *c) | ||
381 | { | ||
382 | if (mce_disabled) | ||
383 | return 0; | ||
384 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | ||
385 | } | ||
386 | |||
387 | static void mce_schedule_work(void) | ||
388 | { | ||
389 | if (!mce_ring_empty()) { | ||
390 | struct work_struct *work = &__get_cpu_var(mce_work); | ||
391 | if (!work_pending(work)) | ||
392 | schedule_work(work); | ||
393 | } | ||
394 | } | ||
395 | |||
396 | /* | ||
397 | * Get the address of the instruction at the time of the machine check | ||
398 | * error. | ||
399 | */ | ||
400 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | ||
401 | { | ||
402 | |||
403 | if (regs && (m->mcgstatus & (MCG_STATUS_RIPV|MCG_STATUS_EIPV))) { | ||
404 | m->ip = regs->ip; | ||
405 | m->cs = regs->cs; | ||
406 | } else { | ||
407 | m->ip = 0; | ||
408 | m->cs = 0; | ||
409 | } | ||
410 | if (rip_msr) | ||
411 | m->ip = mce_rdmsrl(rip_msr); | ||
412 | } | ||
413 | |||
414 | #ifdef CONFIG_X86_LOCAL_APIC | ||
415 | /* | ||
416 | * Called after interrupts have been reenabled again | ||
417 | * when a MCE happened during an interrupts off region | ||
418 | * in the kernel. | ||
419 | */ | ||
420 | asmlinkage void smp_mce_self_interrupt(struct pt_regs *regs) | ||
421 | { | ||
422 | ack_APIC_irq(); | ||
423 | exit_idle(); | ||
424 | irq_enter(); | ||
425 | mce_notify_irq(); | ||
426 | mce_schedule_work(); | ||
427 | irq_exit(); | ||
428 | } | ||
429 | #endif | ||
430 | |||
431 | static void mce_report_event(struct pt_regs *regs) | ||
432 | { | ||
433 | if (regs->flags & (X86_VM_MASK|X86_EFLAGS_IF)) { | ||
434 | mce_notify_irq(); | ||
435 | /* | ||
436 | * Triggering the work queue here is just an insurance | ||
437 | * policy in case the syscall exit notify handler | ||
438 | * doesn't run soon enough or ends up running on the | ||
439 | * wrong CPU (can happen when audit sleeps) | ||
440 | */ | ||
441 | mce_schedule_work(); | ||
442 | return; | ||
443 | } | ||
444 | |||
445 | #ifdef CONFIG_X86_LOCAL_APIC | ||
446 | /* | ||
447 | * Without APIC do not notify. The event will be picked | ||
448 | * up eventually. | ||
449 | */ | ||
450 | if (!cpu_has_apic) | ||
451 | return; | ||
452 | |||
453 | /* | ||
454 | * When interrupts are disabled we cannot use | ||
455 | * kernel services safely. Trigger an self interrupt | ||
456 | * through the APIC to instead do the notification | ||
457 | * after interrupts are reenabled again. | ||
458 | */ | ||
459 | apic->send_IPI_self(MCE_SELF_VECTOR); | ||
460 | |||
461 | /* | ||
462 | * Wait for idle afterwards again so that we don't leave the | ||
463 | * APIC in a non idle state because the normal APIC writes | ||
464 | * cannot exclude us. | ||
465 | */ | ||
466 | apic_wait_icr_idle(); | ||
467 | #endif | ||
468 | } | ||
469 | |||
470 | DEFINE_PER_CPU(unsigned, mce_poll_count); | ||
471 | |||
472 | /* | ||
473 | * Poll for corrected events or events that happened before reset. | ||
474 | * Those are just logged through /dev/mcelog. | ||
475 | * | ||
476 | * This is executed in standard interrupt context. | ||
477 | * | ||
478 | * Note: spec recommends to panic for fatal unsignalled | ||
479 | * errors here. However this would be quite problematic -- | ||
480 | * we would need to reimplement the Monarch handling and | ||
481 | * it would mess up the exclusion between exception handler | ||
482 | * and poll hander -- * so we skip this for now. | ||
483 | * These cases should not happen anyways, or only when the CPU | ||
484 | * is already totally * confused. In this case it's likely it will | ||
485 | * not fully execute the machine check handler either. | ||
486 | */ | ||
487 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | ||
488 | { | ||
489 | struct mce m; | ||
490 | int i; | ||
491 | |||
492 | __get_cpu_var(mce_poll_count)++; | ||
493 | |||
494 | mce_setup(&m); | ||
495 | |||
496 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
497 | for (i = 0; i < banks; i++) { | ||
498 | if (!bank[i] || !test_bit(i, *b)) | ||
499 | continue; | ||
500 | |||
501 | m.misc = 0; | ||
502 | m.addr = 0; | ||
503 | m.bank = i; | ||
504 | m.tsc = 0; | ||
505 | |||
506 | barrier(); | ||
507 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
508 | if (!(m.status & MCI_STATUS_VAL)) | ||
509 | continue; | ||
510 | |||
511 | /* | ||
512 | * Uncorrected or signalled events are handled by the exception | ||
513 | * handler when it is enabled, so don't process those here. | ||
514 | * | ||
515 | * TBD do the same check for MCI_STATUS_EN here? | ||
516 | */ | ||
517 | if (!(flags & MCP_UC) && | ||
518 | (m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC))) | ||
519 | continue; | ||
520 | |||
521 | if (m.status & MCI_STATUS_MISCV) | ||
522 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | ||
523 | if (m.status & MCI_STATUS_ADDRV) | ||
524 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | ||
525 | |||
526 | if (!(flags & MCP_TIMESTAMP)) | ||
527 | m.tsc = 0; | ||
528 | /* | ||
529 | * Don't get the IP here because it's unlikely to | ||
530 | * have anything to do with the actual error location. | ||
531 | */ | ||
532 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | ||
533 | mce_log(&m); | ||
534 | add_taint(TAINT_MACHINE_CHECK); | ||
535 | } | ||
536 | |||
537 | /* | ||
538 | * Clear state for this bank. | ||
539 | */ | ||
540 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
541 | } | ||
542 | |||
543 | /* | ||
544 | * Don't clear MCG_STATUS here because it's only defined for | ||
545 | * exceptions. | ||
546 | */ | ||
547 | |||
548 | sync_core(); | ||
549 | } | ||
550 | EXPORT_SYMBOL_GPL(machine_check_poll); | ||
551 | |||
552 | /* | ||
553 | * Do a quick check if any of the events requires a panic. | ||
554 | * This decides if we keep the events around or clear them. | ||
555 | */ | ||
556 | static int mce_no_way_out(struct mce *m, char **msg) | ||
557 | { | ||
558 | int i; | ||
559 | |||
560 | for (i = 0; i < banks; i++) { | ||
561 | m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
562 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | ||
563 | return 1; | ||
564 | } | ||
565 | return 0; | ||
566 | } | ||
567 | |||
568 | /* | ||
569 | * Variable to establish order between CPUs while scanning. | ||
570 | * Each CPU spins initially until executing is equal its number. | ||
571 | */ | ||
572 | static atomic_t mce_executing; | ||
573 | |||
574 | /* | ||
575 | * Defines order of CPUs on entry. First CPU becomes Monarch. | ||
576 | */ | ||
577 | static atomic_t mce_callin; | ||
578 | |||
579 | /* | ||
580 | * Check if a timeout waiting for other CPUs happened. | ||
581 | */ | ||
582 | static int mce_timed_out(u64 *t) | ||
583 | { | ||
584 | /* | ||
585 | * The others already did panic for some reason. | ||
586 | * Bail out like in a timeout. | ||
587 | * rmb() to tell the compiler that system_state | ||
588 | * might have been modified by someone else. | ||
589 | */ | ||
590 | rmb(); | ||
591 | if (atomic_read(&mce_paniced)) | ||
592 | wait_for_panic(); | ||
593 | if (!monarch_timeout) | ||
594 | goto out; | ||
595 | if ((s64)*t < SPINUNIT) { | ||
596 | /* CHECKME: Make panic default for 1 too? */ | ||
597 | if (tolerant < 1) | ||
598 | mce_panic("Timeout synchronizing machine check over CPUs", | ||
599 | NULL, NULL); | ||
600 | cpu_missing = 1; | ||
601 | return 1; | ||
602 | } | ||
603 | *t -= SPINUNIT; | ||
604 | out: | ||
605 | touch_nmi_watchdog(); | ||
606 | return 0; | ||
607 | } | ||
608 | |||
609 | /* | ||
610 | * The Monarch's reign. The Monarch is the CPU who entered | ||
611 | * the machine check handler first. It waits for the others to | ||
612 | * raise the exception too and then grades them. When any | ||
613 | * error is fatal panic. Only then let the others continue. | ||
614 | * | ||
615 | * The other CPUs entering the MCE handler will be controlled by the | ||
616 | * Monarch. They are called Subjects. | ||
617 | * | ||
618 | * This way we prevent any potential data corruption in a unrecoverable case | ||
619 | * and also makes sure always all CPU's errors are examined. | ||
620 | * | ||
621 | * Also this detects the case of an machine check event coming from outer | ||
622 | * space (not detected by any CPUs) In this case some external agent wants | ||
623 | * us to shut down, so panic too. | ||
624 | * | ||
625 | * The other CPUs might still decide to panic if the handler happens | ||
626 | * in a unrecoverable place, but in this case the system is in a semi-stable | ||
627 | * state and won't corrupt anything by itself. It's ok to let the others | ||
628 | * continue for a bit first. | ||
629 | * | ||
630 | * All the spin loops have timeouts; when a timeout happens a CPU | ||
631 | * typically elects itself to be Monarch. | ||
632 | */ | ||
633 | static void mce_reign(void) | ||
634 | { | ||
635 | int cpu; | ||
636 | struct mce *m = NULL; | ||
637 | int global_worst = 0; | ||
638 | char *msg = NULL; | ||
639 | char *nmsg = NULL; | ||
640 | |||
641 | /* | ||
642 | * This CPU is the Monarch and the other CPUs have run | ||
643 | * through their handlers. | ||
644 | * Grade the severity of the errors of all the CPUs. | ||
645 | */ | ||
646 | for_each_possible_cpu(cpu) { | ||
647 | int severity = mce_severity(&per_cpu(mces_seen, cpu), tolerant, | ||
648 | &nmsg); | ||
649 | if (severity > global_worst) { | ||
650 | msg = nmsg; | ||
651 | global_worst = severity; | ||
652 | m = &per_cpu(mces_seen, cpu); | ||
653 | } | ||
654 | } | ||
655 | |||
656 | /* | ||
657 | * Cannot recover? Panic here then. | ||
658 | * This dumps all the mces in the log buffer and stops the | ||
659 | * other CPUs. | ||
660 | */ | ||
661 | if (m && global_worst >= MCE_PANIC_SEVERITY && tolerant < 3) | ||
662 | mce_panic("Fatal Machine check", m, msg); | ||
663 | |||
664 | /* | ||
665 | * For UC somewhere we let the CPU who detects it handle it. | ||
666 | * Also must let continue the others, otherwise the handling | ||
667 | * CPU could deadlock on a lock. | ||
668 | */ | ||
669 | |||
670 | /* | ||
671 | * No machine check event found. Must be some external | ||
672 | * source or one CPU is hung. Panic. | ||
673 | */ | ||
674 | if (!m && tolerant < 3) | ||
675 | mce_panic("Machine check from unknown source", NULL, NULL); | ||
676 | |||
677 | /* | ||
678 | * Now clear all the mces_seen so that they don't reappear on | ||
679 | * the next mce. | ||
680 | */ | ||
681 | for_each_possible_cpu(cpu) | ||
682 | memset(&per_cpu(mces_seen, cpu), 0, sizeof(struct mce)); | ||
683 | } | ||
684 | |||
685 | static atomic_t global_nwo; | ||
686 | |||
687 | /* | ||
688 | * Start of Monarch synchronization. This waits until all CPUs have | ||
689 | * entered the exception handler and then determines if any of them | ||
690 | * saw a fatal event that requires panic. Then it executes them | ||
691 | * in the entry order. | ||
692 | * TBD double check parallel CPU hotunplug | ||
693 | */ | ||
694 | static int mce_start(int *no_way_out) | ||
695 | { | ||
696 | int order; | ||
697 | int cpus = num_online_cpus(); | ||
698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | ||
699 | |||
700 | if (!timeout) | ||
701 | return -1; | ||
702 | |||
703 | atomic_add(*no_way_out, &global_nwo); | ||
704 | /* | ||
705 | * global_nwo should be updated before mce_callin | ||
706 | */ | ||
707 | smp_wmb(); | ||
708 | order = atomic_add_return(1, &mce_callin); | ||
709 | |||
710 | /* | ||
711 | * Wait for everyone. | ||
712 | */ | ||
713 | while (atomic_read(&mce_callin) != cpus) { | ||
714 | if (mce_timed_out(&timeout)) { | ||
715 | atomic_set(&global_nwo, 0); | ||
716 | return -1; | ||
717 | } | ||
718 | ndelay(SPINUNIT); | ||
719 | } | ||
720 | |||
721 | /* | ||
722 | * mce_callin should be read before global_nwo | ||
723 | */ | ||
724 | smp_rmb(); | ||
725 | |||
726 | if (order == 1) { | ||
727 | /* | ||
728 | * Monarch: Starts executing now, the others wait. | ||
729 | */ | ||
730 | atomic_set(&mce_executing, 1); | ||
731 | } else { | ||
732 | /* | ||
733 | * Subject: Now start the scanning loop one by one in | ||
734 | * the original callin order. | ||
735 | * This way when there are any shared banks it will be | ||
736 | * only seen by one CPU before cleared, avoiding duplicates. | ||
737 | */ | ||
738 | while (atomic_read(&mce_executing) < order) { | ||
739 | if (mce_timed_out(&timeout)) { | ||
740 | atomic_set(&global_nwo, 0); | ||
741 | return -1; | ||
742 | } | ||
743 | ndelay(SPINUNIT); | ||
744 | } | ||
745 | } | ||
746 | |||
747 | /* | ||
748 | * Cache the global no_way_out state. | ||
749 | */ | ||
750 | *no_way_out = atomic_read(&global_nwo); | ||
751 | |||
752 | return order; | ||
753 | } | ||
754 | |||
755 | /* | ||
756 | * Synchronize between CPUs after main scanning loop. | ||
757 | * This invokes the bulk of the Monarch processing. | ||
758 | */ | ||
759 | static int mce_end(int order) | ||
760 | { | ||
761 | int ret = -1; | ||
762 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | ||
763 | |||
764 | if (!timeout) | ||
765 | goto reset; | ||
766 | if (order < 0) | ||
767 | goto reset; | ||
768 | |||
769 | /* | ||
770 | * Allow others to run. | ||
771 | */ | ||
772 | atomic_inc(&mce_executing); | ||
773 | |||
774 | if (order == 1) { | ||
775 | /* CHECKME: Can this race with a parallel hotplug? */ | ||
776 | int cpus = num_online_cpus(); | ||
777 | |||
778 | /* | ||
779 | * Monarch: Wait for everyone to go through their scanning | ||
780 | * loops. | ||
781 | */ | ||
782 | while (atomic_read(&mce_executing) <= cpus) { | ||
783 | if (mce_timed_out(&timeout)) | ||
784 | goto reset; | ||
785 | ndelay(SPINUNIT); | ||
786 | } | ||
787 | |||
788 | mce_reign(); | ||
789 | barrier(); | ||
790 | ret = 0; | ||
791 | } else { | ||
792 | /* | ||
793 | * Subject: Wait for Monarch to finish. | ||
794 | */ | ||
795 | while (atomic_read(&mce_executing) != 0) { | ||
796 | if (mce_timed_out(&timeout)) | ||
797 | goto reset; | ||
798 | ndelay(SPINUNIT); | ||
799 | } | ||
800 | |||
801 | /* | ||
802 | * Don't reset anything. That's done by the Monarch. | ||
803 | */ | ||
804 | return 0; | ||
805 | } | ||
806 | |||
807 | /* | ||
808 | * Reset all global state. | ||
809 | */ | ||
810 | reset: | ||
811 | atomic_set(&global_nwo, 0); | ||
812 | atomic_set(&mce_callin, 0); | ||
813 | barrier(); | ||
814 | |||
815 | /* | ||
816 | * Let others run again. | ||
817 | */ | ||
818 | atomic_set(&mce_executing, 0); | ||
819 | return ret; | ||
820 | } | ||
821 | |||
822 | /* | ||
823 | * Check if the address reported by the CPU is in a format we can parse. | ||
824 | * It would be possible to add code for most other cases, but all would | ||
825 | * be somewhat complicated (e.g. segment offset would require an instruction | ||
826 | * parser). So only support physical addresses upto page granuality for now. | ||
827 | */ | ||
828 | static int mce_usable_address(struct mce *m) | ||
829 | { | ||
830 | if (!(m->status & MCI_STATUS_MISCV) || !(m->status & MCI_STATUS_ADDRV)) | ||
831 | return 0; | ||
832 | if ((m->misc & 0x3f) > PAGE_SHIFT) | ||
833 | return 0; | ||
834 | if (((m->misc >> 6) & 7) != MCM_ADDR_PHYS) | ||
835 | return 0; | ||
836 | return 1; | ||
837 | } | ||
838 | |||
839 | static void mce_clear_state(unsigned long *toclear) | ||
840 | { | ||
841 | int i; | ||
842 | |||
843 | for (i = 0; i < banks; i++) { | ||
844 | if (test_bit(i, toclear)) | ||
845 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
846 | } | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * The actual machine check handler. This only handles real | ||
851 | * exceptions when something got corrupted coming in through int 18. | ||
852 | * | ||
853 | * This is executed in NMI context not subject to normal locking rules. This | ||
854 | * implies that most kernel services cannot be safely used. Don't even | ||
855 | * think about putting a printk in there! | ||
856 | * | ||
857 | * On Intel systems this is entered on all CPUs in parallel through | ||
858 | * MCE broadcast. However some CPUs might be broken beyond repair, | ||
859 | * so be always careful when synchronizing with others. | ||
860 | */ | ||
861 | void do_machine_check(struct pt_regs *regs, long error_code) | ||
862 | { | ||
863 | struct mce m, *final; | ||
864 | int i; | ||
865 | int worst = 0; | ||
866 | int severity; | ||
867 | /* | ||
868 | * Establish sequential order between the CPUs entering the machine | ||
869 | * check handler. | ||
870 | */ | ||
871 | int order; | ||
872 | /* | ||
873 | * If no_way_out gets set, there is no safe way to recover from this | ||
874 | * MCE. If tolerant is cranked up, we'll try anyway. | ||
875 | */ | ||
876 | int no_way_out = 0; | ||
877 | /* | ||
878 | * If kill_it gets set, there might be a way to recover from this | ||
879 | * error. | ||
880 | */ | ||
881 | int kill_it = 0; | ||
882 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | ||
883 | char *msg = "Unknown"; | ||
884 | |||
885 | atomic_inc(&mce_entry); | ||
886 | |||
887 | __get_cpu_var(mce_exception_count)++; | ||
888 | |||
889 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | ||
890 | 18, SIGKILL) == NOTIFY_STOP) | ||
891 | goto out; | ||
892 | if (!banks) | ||
893 | goto out; | ||
894 | |||
895 | mce_setup(&m); | ||
896 | |||
897 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | ||
898 | no_way_out = mce_no_way_out(&m, &msg); | ||
899 | |||
900 | final = &__get_cpu_var(mces_seen); | ||
901 | *final = m; | ||
902 | |||
903 | barrier(); | ||
904 | |||
905 | /* | ||
906 | * When no restart IP must always kill or panic. | ||
907 | */ | ||
908 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | ||
909 | kill_it = 1; | ||
910 | |||
911 | /* | ||
912 | * Go through all the banks in exclusion of the other CPUs. | ||
913 | * This way we don't report duplicated events on shared banks | ||
914 | * because the first one to see it will clear it. | ||
915 | */ | ||
916 | order = mce_start(&no_way_out); | ||
917 | for (i = 0; i < banks; i++) { | ||
918 | __clear_bit(i, toclear); | ||
919 | if (!bank[i]) | ||
920 | continue; | ||
921 | |||
922 | m.misc = 0; | ||
923 | m.addr = 0; | ||
924 | m.bank = i; | ||
925 | |||
926 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | ||
927 | if ((m.status & MCI_STATUS_VAL) == 0) | ||
928 | continue; | ||
929 | |||
930 | /* | ||
931 | * Non uncorrected or non signaled errors are handled by | ||
932 | * machine_check_poll. Leave them alone, unless this panics. | ||
933 | */ | ||
934 | if (!(m.status & (mce_ser ? MCI_STATUS_S : MCI_STATUS_UC)) && | ||
935 | !no_way_out) | ||
936 | continue; | ||
937 | |||
938 | /* | ||
939 | * Set taint even when machine check was not enabled. | ||
940 | */ | ||
941 | add_taint(TAINT_MACHINE_CHECK); | ||
942 | |||
943 | severity = mce_severity(&m, tolerant, NULL); | ||
944 | |||
945 | /* | ||
946 | * When machine check was for corrected handler don't touch, | ||
947 | * unless we're panicing. | ||
948 | */ | ||
949 | if (severity == MCE_KEEP_SEVERITY && !no_way_out) | ||
950 | continue; | ||
951 | __set_bit(i, toclear); | ||
952 | if (severity == MCE_NO_SEVERITY) { | ||
953 | /* | ||
954 | * Machine check event was not enabled. Clear, but | ||
955 | * ignore. | ||
956 | */ | ||
957 | continue; | ||
958 | } | ||
959 | |||
960 | /* | ||
961 | * Kill on action required. | ||
962 | */ | ||
963 | if (severity == MCE_AR_SEVERITY) | ||
964 | kill_it = 1; | ||
965 | |||
966 | if (m.status & MCI_STATUS_MISCV) | ||
967 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | ||
968 | if (m.status & MCI_STATUS_ADDRV) | ||
969 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | ||
970 | |||
971 | /* | ||
972 | * Action optional error. Queue address for later processing. | ||
973 | * When the ring overflows we just ignore the AO error. | ||
974 | * RED-PEN add some logging mechanism when | ||
975 | * usable_address or mce_add_ring fails. | ||
976 | * RED-PEN don't ignore overflow for tolerant == 0 | ||
977 | */ | ||
978 | if (severity == MCE_AO_SEVERITY && mce_usable_address(&m)) | ||
979 | mce_ring_add(m.addr >> PAGE_SHIFT); | ||
980 | |||
981 | mce_get_rip(&m, regs); | ||
982 | mce_log(&m); | ||
983 | |||
984 | if (severity > worst) { | ||
985 | *final = m; | ||
986 | worst = severity; | ||
987 | } | ||
988 | } | ||
989 | |||
990 | if (!no_way_out) | ||
991 | mce_clear_state(toclear); | ||
992 | |||
993 | /* | ||
994 | * Do most of the synchronization with other CPUs. | ||
995 | * When there's any problem use only local no_way_out state. | ||
996 | */ | ||
997 | if (mce_end(order) < 0) | ||
998 | no_way_out = worst >= MCE_PANIC_SEVERITY; | ||
999 | |||
1000 | /* | ||
1001 | * If we have decided that we just CAN'T continue, and the user | ||
1002 | * has not set tolerant to an insane level, give up and die. | ||
1003 | * | ||
1004 | * This is mainly used in the case when the system doesn't | ||
1005 | * support MCE broadcasting or it has been disabled. | ||
1006 | */ | ||
1007 | if (no_way_out && tolerant < 3) | ||
1008 | mce_panic("Fatal machine check on current CPU", final, msg); | ||
1009 | |||
1010 | /* | ||
1011 | * If the error seems to be unrecoverable, something should be | ||
1012 | * done. Try to kill as little as possible. If we can kill just | ||
1013 | * one task, do that. If the user has set the tolerance very | ||
1014 | * high, don't try to do anything at all. | ||
1015 | */ | ||
1016 | |||
1017 | if (kill_it && tolerant < 3) | ||
1018 | force_sig(SIGBUS, current); | ||
1019 | |||
1020 | /* notify userspace ASAP */ | ||
1021 | set_thread_flag(TIF_MCE_NOTIFY); | ||
1022 | |||
1023 | if (worst > 0) | ||
1024 | mce_report_event(regs); | ||
1025 | mce_wrmsrl(MSR_IA32_MCG_STATUS, 0); | ||
1026 | out: | ||
1027 | atomic_dec(&mce_entry); | ||
1028 | sync_core(); | ||
1029 | } | ||
1030 | EXPORT_SYMBOL_GPL(do_machine_check); | ||
1031 | |||
1032 | /* dummy to break dependency. actual code is in mm/memory-failure.c */ | ||
1033 | void __attribute__((weak)) memory_failure(unsigned long pfn, int vector) | ||
1034 | { | ||
1035 | printk(KERN_ERR "Action optional memory failure at %lx ignored\n", pfn); | ||
1036 | } | ||
1037 | |||
1038 | /* | ||
1039 | * Called after mce notification in process context. This code | ||
1040 | * is allowed to sleep. Call the high level VM handler to process | ||
1041 | * any corrupted pages. | ||
1042 | * Assume that the work queue code only calls this one at a time | ||
1043 | * per CPU. | ||
1044 | * Note we don't disable preemption, so this code might run on the wrong | ||
1045 | * CPU. In this case the event is picked up by the scheduled work queue. | ||
1046 | * This is merely a fast path to expedite processing in some common | ||
1047 | * cases. | ||
1048 | */ | ||
1049 | void mce_notify_process(void) | ||
1050 | { | ||
1051 | unsigned long pfn; | ||
1052 | mce_notify_irq(); | ||
1053 | while (mce_ring_get(&pfn)) | ||
1054 | memory_failure(pfn, MCE_VECTOR); | ||
1055 | } | ||
1056 | |||
1057 | static void mce_process_work(struct work_struct *dummy) | ||
1058 | { | ||
1059 | mce_notify_process(); | ||
1060 | } | ||
1061 | |||
1062 | #ifdef CONFIG_X86_MCE_INTEL | ||
1063 | /*** | ||
1064 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | ||
1065 | * @cpu: The CPU on which the event occurred. | ||
1066 | * @status: Event status information | ||
1067 | * | ||
1068 | * This function should be called by the thermal interrupt after the | ||
1069 | * event has been processed and the decision was made to log the event | ||
1070 | * further. | ||
1071 | * | ||
1072 | * The status parameter will be saved to the 'status' field of 'struct mce' | ||
1073 | * and historically has been the register value of the | ||
1074 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | ||
1075 | */ | ||
1076 | void mce_log_therm_throt_event(__u64 status) | ||
1077 | { | ||
1078 | struct mce m; | ||
1079 | |||
1080 | mce_setup(&m); | ||
1081 | m.bank = MCE_THERMAL_BANK; | ||
1082 | m.status = status; | ||
1083 | mce_log(&m); | ||
1084 | } | ||
1085 | #endif /* CONFIG_X86_MCE_INTEL */ | ||
1086 | |||
1087 | /* | ||
1088 | * Periodic polling timer for "silent" machine check errors. If the | ||
1089 | * poller finds an MCE, poll 2x faster. When the poller finds no more | ||
1090 | * errors, poll 2x slower (up to check_interval seconds). | ||
1091 | */ | ||
1092 | static int check_interval = 5 * 60; /* 5 minutes */ | ||
1093 | |||
1094 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | ||
1095 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | ||
1096 | |||
1097 | static void mcheck_timer(unsigned long data) | ||
1098 | { | ||
1099 | struct timer_list *t = &per_cpu(mce_timer, data); | ||
1100 | int *n; | ||
1101 | |||
1102 | WARN_ON(smp_processor_id() != data); | ||
1103 | |||
1104 | if (mce_available(¤t_cpu_data)) { | ||
1105 | machine_check_poll(MCP_TIMESTAMP, | ||
1106 | &__get_cpu_var(mce_poll_banks)); | ||
1107 | } | ||
1108 | |||
1109 | /* | ||
1110 | * Alert userspace if needed. If we logged an MCE, reduce the | ||
1111 | * polling interval, otherwise increase the polling interval. | ||
1112 | */ | ||
1113 | n = &__get_cpu_var(next_interval); | ||
1114 | if (mce_notify_irq()) | ||
1115 | *n = max(*n/2, HZ/100); | ||
1116 | else | ||
1117 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | ||
1118 | |||
1119 | t->expires = jiffies + *n; | ||
1120 | add_timer(t); | ||
1121 | } | ||
1122 | |||
1123 | static void mce_do_trigger(struct work_struct *work) | ||
1124 | { | ||
1125 | call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); | ||
1126 | } | ||
1127 | |||
1128 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | ||
1129 | |||
1130 | /* | ||
1131 | * Notify the user(s) about new machine check events. | ||
1132 | * Can be called from interrupt context, but not from machine check/NMI | ||
1133 | * context. | ||
1134 | */ | ||
1135 | int mce_notify_irq(void) | ||
1136 | { | ||
1137 | /* Not more than two messages every minute */ | ||
1138 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | ||
1139 | |||
1140 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
1141 | |||
1142 | if (test_and_clear_bit(0, &mce_need_notify)) { | ||
1143 | wake_up_interruptible(&mce_wait); | ||
1144 | |||
1145 | /* | ||
1146 | * There is no risk of missing notifications because | ||
1147 | * work_pending is always cleared before the function is | ||
1148 | * executed. | ||
1149 | */ | ||
1150 | if (mce_helper[0] && !work_pending(&mce_trigger_work)) | ||
1151 | schedule_work(&mce_trigger_work); | ||
1152 | |||
1153 | if (__ratelimit(&ratelimit)) | ||
1154 | printk(KERN_INFO "Machine check events logged\n"); | ||
1155 | |||
1156 | return 1; | ||
1157 | } | ||
1158 | return 0; | ||
1159 | } | ||
1160 | EXPORT_SYMBOL_GPL(mce_notify_irq); | ||
1161 | |||
1162 | /* | ||
1163 | * Initialize Machine Checks for a CPU. | ||
1164 | */ | ||
1165 | static int mce_cap_init(void) | ||
1166 | { | ||
1167 | unsigned b; | ||
1168 | u64 cap; | ||
1169 | |||
1170 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
1171 | |||
1172 | b = cap & MCG_BANKCNT_MASK; | ||
1173 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", b); | ||
1174 | |||
1175 | if (b > MAX_NR_BANKS) { | ||
1176 | printk(KERN_WARNING | ||
1177 | "MCE: Using only %u machine check banks out of %u\n", | ||
1178 | MAX_NR_BANKS, b); | ||
1179 | b = MAX_NR_BANKS; | ||
1180 | } | ||
1181 | |||
1182 | /* Don't support asymmetric configurations today */ | ||
1183 | WARN_ON(banks != 0 && b != banks); | ||
1184 | banks = b; | ||
1185 | if (!bank) { | ||
1186 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
1187 | if (!bank) | ||
1188 | return -ENOMEM; | ||
1189 | memset(bank, 0xff, banks * sizeof(u64)); | ||
1190 | } | ||
1191 | |||
1192 | /* Use accurate RIP reporting if available. */ | ||
1193 | if ((cap & MCG_EXT_P) && MCG_EXT_CNT(cap) >= 9) | ||
1194 | rip_msr = MSR_IA32_MCG_EIP; | ||
1195 | |||
1196 | if (cap & MCG_SER_P) | ||
1197 | mce_ser = 1; | ||
1198 | |||
1199 | return 0; | ||
1200 | } | ||
1201 | |||
1202 | static void mce_init(void) | ||
1203 | { | ||
1204 | mce_banks_t all_banks; | ||
1205 | u64 cap; | ||
1206 | int i; | ||
1207 | |||
1208 | /* | ||
1209 | * Log the machine checks left over from the previous reset. | ||
1210 | */ | ||
1211 | bitmap_fill(all_banks, MAX_NR_BANKS); | ||
1212 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | ||
1213 | |||
1214 | set_in_cr4(X86_CR4_MCE); | ||
1215 | |||
1216 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
1217 | if (cap & MCG_CTL_P) | ||
1218 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
1219 | |||
1220 | for (i = 0; i < banks; i++) { | ||
1221 | if (skip_bank_init(i)) | ||
1222 | continue; | ||
1223 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
1224 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
1225 | } | ||
1226 | } | ||
1227 | |||
1228 | /* Add per CPU specific workarounds here */ | ||
1229 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | ||
1230 | { | ||
1231 | /* This should be disabled by the BIOS, but isn't always */ | ||
1232 | if (c->x86_vendor == X86_VENDOR_AMD) { | ||
1233 | if (c->x86 == 15 && banks > 4) { | ||
1234 | /* | ||
1235 | * disable GART TBL walk error reporting, which | ||
1236 | * trips off incorrectly with the IOMMU & 3ware | ||
1237 | * & Cerberus: | ||
1238 | */ | ||
1239 | clear_bit(10, (unsigned long *)&bank[4]); | ||
1240 | } | ||
1241 | if (c->x86 <= 17 && mce_bootlog < 0) { | ||
1242 | /* | ||
1243 | * Lots of broken BIOS around that don't clear them | ||
1244 | * by default and leave crap in there. Don't log: | ||
1245 | */ | ||
1246 | mce_bootlog = 0; | ||
1247 | } | ||
1248 | /* | ||
1249 | * Various K7s with broken bank 0 around. Always disable | ||
1250 | * by default. | ||
1251 | */ | ||
1252 | if (c->x86 == 6 && banks > 0) | ||
1253 | bank[0] = 0; | ||
1254 | } | ||
1255 | |||
1256 | if (c->x86_vendor == X86_VENDOR_INTEL) { | ||
1257 | /* | ||
1258 | * SDM documents that on family 6 bank 0 should not be written | ||
1259 | * because it aliases to another special BIOS controlled | ||
1260 | * register. | ||
1261 | * But it's not aliased anymore on model 0x1a+ | ||
1262 | * Don't ignore bank 0 completely because there could be a | ||
1263 | * valid event later, merely don't write CTL0. | ||
1264 | */ | ||
1265 | |||
1266 | if (c->x86 == 6 && c->x86_model < 0x1A) | ||
1267 | __set_bit(0, &dont_init_banks); | ||
1268 | |||
1269 | /* | ||
1270 | * All newer Intel systems support MCE broadcasting. Enable | ||
1271 | * synchronization with a one second timeout. | ||
1272 | */ | ||
1273 | if ((c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xe)) && | ||
1274 | monarch_timeout < 0) | ||
1275 | monarch_timeout = USEC_PER_SEC; | ||
1276 | } | ||
1277 | if (monarch_timeout < 0) | ||
1278 | monarch_timeout = 0; | ||
1279 | if (mce_bootlog != 0) | ||
1280 | mce_panic_timeout = 30; | ||
1281 | } | ||
1282 | |||
1283 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | ||
1284 | { | ||
1285 | if (c->x86 != 5) | ||
1286 | return; | ||
1287 | switch (c->x86_vendor) { | ||
1288 | case X86_VENDOR_INTEL: | ||
1289 | intel_p5_mcheck_init(c); | ||
1290 | break; | ||
1291 | case X86_VENDOR_CENTAUR: | ||
1292 | winchip_mcheck_init(c); | ||
1293 | break; | ||
1294 | } | ||
1295 | } | ||
1296 | |||
1297 | static void mce_cpu_features(struct cpuinfo_x86 *c) | ||
1298 | { | ||
1299 | switch (c->x86_vendor) { | ||
1300 | case X86_VENDOR_INTEL: | ||
1301 | mce_intel_feature_init(c); | ||
1302 | break; | ||
1303 | case X86_VENDOR_AMD: | ||
1304 | mce_amd_feature_init(c); | ||
1305 | break; | ||
1306 | default: | ||
1307 | break; | ||
1308 | } | ||
1309 | } | ||
1310 | |||
1311 | static void mce_init_timer(void) | ||
1312 | { | ||
1313 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
1314 | int *n = &__get_cpu_var(next_interval); | ||
1315 | |||
1316 | if (mce_ignore_ce) | ||
1317 | return; | ||
1318 | |||
1319 | *n = check_interval * HZ; | ||
1320 | if (!*n) | ||
1321 | return; | ||
1322 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
1323 | t->expires = round_jiffies(jiffies + *n); | ||
1324 | add_timer(t); | ||
1325 | } | ||
1326 | |||
1327 | /* | ||
1328 | * Called for each booted CPU to set up machine checks. | ||
1329 | * Must be called with preempt off: | ||
1330 | */ | ||
1331 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | ||
1332 | { | ||
1333 | if (mce_disabled) | ||
1334 | return; | ||
1335 | |||
1336 | mce_ancient_init(c); | ||
1337 | |||
1338 | if (!mce_available(c)) | ||
1339 | return; | ||
1340 | |||
1341 | if (mce_cap_init() < 0) { | ||
1342 | mce_disabled = 1; | ||
1343 | return; | ||
1344 | } | ||
1345 | mce_cpu_quirks(c); | ||
1346 | |||
1347 | machine_check_vector = do_machine_check; | ||
1348 | |||
1349 | mce_init(); | ||
1350 | mce_cpu_features(c); | ||
1351 | mce_init_timer(); | ||
1352 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | ||
1353 | } | ||
1354 | |||
1355 | /* | ||
1356 | * Character device to read and clear the MCE log. | ||
1357 | */ | ||
1358 | |||
1359 | static DEFINE_SPINLOCK(mce_state_lock); | ||
1360 | static int open_count; /* #times opened */ | ||
1361 | static int open_exclu; /* already open exclusive? */ | ||
1362 | |||
1363 | static int mce_open(struct inode *inode, struct file *file) | ||
1364 | { | ||
1365 | spin_lock(&mce_state_lock); | ||
1366 | |||
1367 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | ||
1368 | spin_unlock(&mce_state_lock); | ||
1369 | |||
1370 | return -EBUSY; | ||
1371 | } | ||
1372 | |||
1373 | if (file->f_flags & O_EXCL) | ||
1374 | open_exclu = 1; | ||
1375 | open_count++; | ||
1376 | |||
1377 | spin_unlock(&mce_state_lock); | ||
1378 | |||
1379 | return nonseekable_open(inode, file); | ||
1380 | } | ||
1381 | |||
1382 | static int mce_release(struct inode *inode, struct file *file) | ||
1383 | { | ||
1384 | spin_lock(&mce_state_lock); | ||
1385 | |||
1386 | open_count--; | ||
1387 | open_exclu = 0; | ||
1388 | |||
1389 | spin_unlock(&mce_state_lock); | ||
1390 | |||
1391 | return 0; | ||
1392 | } | ||
1393 | |||
1394 | static void collect_tscs(void *data) | ||
1395 | { | ||
1396 | unsigned long *cpu_tsc = (unsigned long *)data; | ||
1397 | |||
1398 | rdtscll(cpu_tsc[smp_processor_id()]); | ||
1399 | } | ||
1400 | |||
1401 | static DEFINE_MUTEX(mce_read_mutex); | ||
1402 | |||
1403 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | ||
1404 | loff_t *off) | ||
1405 | { | ||
1406 | char __user *buf = ubuf; | ||
1407 | unsigned long *cpu_tsc; | ||
1408 | unsigned prev, next; | ||
1409 | int i, err; | ||
1410 | |||
1411 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); | ||
1412 | if (!cpu_tsc) | ||
1413 | return -ENOMEM; | ||
1414 | |||
1415 | mutex_lock(&mce_read_mutex); | ||
1416 | next = rcu_dereference(mcelog.next); | ||
1417 | |||
1418 | /* Only supports full reads right now */ | ||
1419 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | ||
1420 | mutex_unlock(&mce_read_mutex); | ||
1421 | kfree(cpu_tsc); | ||
1422 | |||
1423 | return -EINVAL; | ||
1424 | } | ||
1425 | |||
1426 | err = 0; | ||
1427 | prev = 0; | ||
1428 | do { | ||
1429 | for (i = prev; i < next; i++) { | ||
1430 | unsigned long start = jiffies; | ||
1431 | |||
1432 | while (!mcelog.entry[i].finished) { | ||
1433 | if (time_after_eq(jiffies, start + 2)) { | ||
1434 | memset(mcelog.entry + i, 0, | ||
1435 | sizeof(struct mce)); | ||
1436 | goto timeout; | ||
1437 | } | ||
1438 | cpu_relax(); | ||
1439 | } | ||
1440 | smp_rmb(); | ||
1441 | err |= copy_to_user(buf, mcelog.entry + i, | ||
1442 | sizeof(struct mce)); | ||
1443 | buf += sizeof(struct mce); | ||
1444 | timeout: | ||
1445 | ; | ||
1446 | } | ||
1447 | |||
1448 | memset(mcelog.entry + prev, 0, | ||
1449 | (next - prev) * sizeof(struct mce)); | ||
1450 | prev = next; | ||
1451 | next = cmpxchg(&mcelog.next, prev, 0); | ||
1452 | } while (next != prev); | ||
1453 | |||
1454 | synchronize_sched(); | ||
1455 | |||
1456 | /* | ||
1457 | * Collect entries that were still getting written before the | ||
1458 | * synchronize. | ||
1459 | */ | ||
1460 | on_each_cpu(collect_tscs, cpu_tsc, 1); | ||
1461 | |||
1462 | for (i = next; i < MCE_LOG_LEN; i++) { | ||
1463 | if (mcelog.entry[i].finished && | ||
1464 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | ||
1465 | err |= copy_to_user(buf, mcelog.entry+i, | ||
1466 | sizeof(struct mce)); | ||
1467 | smp_rmb(); | ||
1468 | buf += sizeof(struct mce); | ||
1469 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | ||
1470 | } | ||
1471 | } | ||
1472 | mutex_unlock(&mce_read_mutex); | ||
1473 | kfree(cpu_tsc); | ||
1474 | |||
1475 | return err ? -EFAULT : buf - ubuf; | ||
1476 | } | ||
1477 | |||
1478 | static unsigned int mce_poll(struct file *file, poll_table *wait) | ||
1479 | { | ||
1480 | poll_wait(file, &mce_wait, wait); | ||
1481 | if (rcu_dereference(mcelog.next)) | ||
1482 | return POLLIN | POLLRDNORM; | ||
1483 | return 0; | ||
1484 | } | ||
1485 | |||
1486 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | ||
1487 | { | ||
1488 | int __user *p = (int __user *)arg; | ||
1489 | |||
1490 | if (!capable(CAP_SYS_ADMIN)) | ||
1491 | return -EPERM; | ||
1492 | |||
1493 | switch (cmd) { | ||
1494 | case MCE_GET_RECORD_LEN: | ||
1495 | return put_user(sizeof(struct mce), p); | ||
1496 | case MCE_GET_LOG_LEN: | ||
1497 | return put_user(MCE_LOG_LEN, p); | ||
1498 | case MCE_GETCLEAR_FLAGS: { | ||
1499 | unsigned flags; | ||
1500 | |||
1501 | do { | ||
1502 | flags = mcelog.flags; | ||
1503 | } while (cmpxchg(&mcelog.flags, flags, 0) != flags); | ||
1504 | |||
1505 | return put_user(flags, p); | ||
1506 | } | ||
1507 | default: | ||
1508 | return -ENOTTY; | ||
1509 | } | ||
1510 | } | ||
1511 | |||
1512 | /* Modified in mce-inject.c, so not static or const */ | ||
1513 | struct file_operations mce_chrdev_ops = { | ||
1514 | .open = mce_open, | ||
1515 | .release = mce_release, | ||
1516 | .read = mce_read, | ||
1517 | .poll = mce_poll, | ||
1518 | .unlocked_ioctl = mce_ioctl, | ||
1519 | }; | ||
1520 | EXPORT_SYMBOL_GPL(mce_chrdev_ops); | ||
1521 | |||
1522 | static struct miscdevice mce_log_device = { | ||
1523 | MISC_MCELOG_MINOR, | ||
1524 | "mcelog", | ||
1525 | &mce_chrdev_ops, | ||
1526 | }; | ||
1527 | |||
1528 | /* | ||
1529 | * mce=off Disables machine check | ||
1530 | * mce=no_cmci Disables CMCI | ||
1531 | * mce=dont_log_ce Clears corrected events silently, no log created for CEs. | ||
1532 | * mce=ignore_ce Disables polling and CMCI, corrected events are not cleared. | ||
1533 | * mce=TOLERANCELEVEL[,monarchtimeout] (number, see above) | ||
1534 | * monarchtimeout is how long to wait for other CPUs on machine | ||
1535 | * check, or 0 to not wait | ||
1536 | * mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | ||
1537 | * mce=nobootlog Don't log MCEs from before booting. | ||
1538 | */ | ||
1539 | static int __init mcheck_enable(char *str) | ||
1540 | { | ||
1541 | if (*str == 0) | ||
1542 | enable_p5_mce(); | ||
1543 | if (*str == '=') | ||
1544 | str++; | ||
1545 | if (!strcmp(str, "off")) | ||
1546 | mce_disabled = 1; | ||
1547 | else if (!strcmp(str, "no_cmci")) | ||
1548 | mce_cmci_disabled = 1; | ||
1549 | else if (!strcmp(str, "dont_log_ce")) | ||
1550 | mce_dont_log_ce = 1; | ||
1551 | else if (!strcmp(str, "ignore_ce")) | ||
1552 | mce_ignore_ce = 1; | ||
1553 | else if (!strcmp(str, "bootlog") || !strcmp(str, "nobootlog")) | ||
1554 | mce_bootlog = (str[0] == 'b'); | ||
1555 | else if (isdigit(str[0])) { | ||
1556 | get_option(&str, &tolerant); | ||
1557 | if (*str == ',') { | ||
1558 | ++str; | ||
1559 | get_option(&str, &monarch_timeout); | ||
1560 | } | ||
1561 | } else { | ||
1562 | printk(KERN_INFO "mce argument %s ignored. Please use /sys\n", | ||
1563 | str); | ||
1564 | return 0; | ||
1565 | } | ||
1566 | return 1; | ||
1567 | } | ||
1568 | __setup("mce", mcheck_enable); | ||
1569 | |||
1570 | /* | ||
1571 | * Sysfs support | ||
1572 | */ | ||
1573 | |||
1574 | /* | ||
1575 | * Disable machine checks on suspend and shutdown. We can't really handle | ||
1576 | * them later. | ||
1577 | */ | ||
1578 | static int mce_disable(void) | ||
1579 | { | ||
1580 | int i; | ||
1581 | |||
1582 | for (i = 0; i < banks; i++) { | ||
1583 | if (!skip_bank_init(i)) | ||
1584 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
1585 | } | ||
1586 | return 0; | ||
1587 | } | ||
1588 | |||
1589 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | ||
1590 | { | ||
1591 | return mce_disable(); | ||
1592 | } | ||
1593 | |||
1594 | static int mce_shutdown(struct sys_device *dev) | ||
1595 | { | ||
1596 | return mce_disable(); | ||
1597 | } | ||
1598 | |||
1599 | /* | ||
1600 | * On resume clear all MCE state. Don't want to see leftovers from the BIOS. | ||
1601 | * Only one CPU is active at this time, the others get re-added later using | ||
1602 | * CPU hotplug: | ||
1603 | */ | ||
1604 | static int mce_resume(struct sys_device *dev) | ||
1605 | { | ||
1606 | mce_init(); | ||
1607 | mce_cpu_features(¤t_cpu_data); | ||
1608 | |||
1609 | return 0; | ||
1610 | } | ||
1611 | |||
1612 | static void mce_cpu_restart(void *data) | ||
1613 | { | ||
1614 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
1615 | if (!mce_available(¤t_cpu_data)) | ||
1616 | return; | ||
1617 | mce_init(); | ||
1618 | mce_init_timer(); | ||
1619 | } | ||
1620 | |||
1621 | /* Reinit MCEs after user configuration changes */ | ||
1622 | static void mce_restart(void) | ||
1623 | { | ||
1624 | on_each_cpu(mce_cpu_restart, NULL, 1); | ||
1625 | } | ||
1626 | |||
1627 | /* Toggle features for corrected errors */ | ||
1628 | static void mce_disable_ce(void *all) | ||
1629 | { | ||
1630 | if (!mce_available(¤t_cpu_data)) | ||
1631 | return; | ||
1632 | if (all) | ||
1633 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
1634 | cmci_clear(); | ||
1635 | } | ||
1636 | |||
1637 | static void mce_enable_ce(void *all) | ||
1638 | { | ||
1639 | if (!mce_available(¤t_cpu_data)) | ||
1640 | return; | ||
1641 | cmci_reenable(); | ||
1642 | cmci_recheck(); | ||
1643 | if (all) | ||
1644 | mce_init_timer(); | ||
1645 | } | ||
1646 | |||
1647 | static struct sysdev_class mce_sysclass = { | ||
1648 | .suspend = mce_suspend, | ||
1649 | .shutdown = mce_shutdown, | ||
1650 | .resume = mce_resume, | ||
1651 | .name = "machinecheck", | ||
1652 | }; | ||
1653 | |||
1654 | DEFINE_PER_CPU(struct sys_device, mce_dev); | ||
1655 | |||
1656 | __cpuinitdata | ||
1657 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
1658 | |||
1659 | static struct sysdev_attribute *bank_attrs; | ||
1660 | |||
1661 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
1662 | char *buf) | ||
1663 | { | ||
1664 | u64 b = bank[attr - bank_attrs]; | ||
1665 | |||
1666 | return sprintf(buf, "%llx\n", b); | ||
1667 | } | ||
1668 | |||
1669 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
1670 | const char *buf, size_t size) | ||
1671 | { | ||
1672 | u64 new; | ||
1673 | |||
1674 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1675 | return -EINVAL; | ||
1676 | |||
1677 | bank[attr - bank_attrs] = new; | ||
1678 | mce_restart(); | ||
1679 | |||
1680 | return size; | ||
1681 | } | ||
1682 | |||
1683 | static ssize_t | ||
1684 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | ||
1685 | { | ||
1686 | strcpy(buf, mce_helper); | ||
1687 | strcat(buf, "\n"); | ||
1688 | return strlen(mce_helper) + 1; | ||
1689 | } | ||
1690 | |||
1691 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
1692 | const char *buf, size_t siz) | ||
1693 | { | ||
1694 | char *p; | ||
1695 | int len; | ||
1696 | |||
1697 | strncpy(mce_helper, buf, sizeof(mce_helper)); | ||
1698 | mce_helper[sizeof(mce_helper)-1] = 0; | ||
1699 | len = strlen(mce_helper); | ||
1700 | p = strchr(mce_helper, '\n'); | ||
1701 | |||
1702 | if (*p) | ||
1703 | *p = 0; | ||
1704 | |||
1705 | return len; | ||
1706 | } | ||
1707 | |||
1708 | static ssize_t set_ignore_ce(struct sys_device *s, | ||
1709 | struct sysdev_attribute *attr, | ||
1710 | const char *buf, size_t size) | ||
1711 | { | ||
1712 | u64 new; | ||
1713 | |||
1714 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1715 | return -EINVAL; | ||
1716 | |||
1717 | if (mce_ignore_ce ^ !!new) { | ||
1718 | if (new) { | ||
1719 | /* disable ce features */ | ||
1720 | on_each_cpu(mce_disable_ce, (void *)1, 1); | ||
1721 | mce_ignore_ce = 1; | ||
1722 | } else { | ||
1723 | /* enable ce features */ | ||
1724 | mce_ignore_ce = 0; | ||
1725 | on_each_cpu(mce_enable_ce, (void *)1, 1); | ||
1726 | } | ||
1727 | } | ||
1728 | return size; | ||
1729 | } | ||
1730 | |||
1731 | static ssize_t set_cmci_disabled(struct sys_device *s, | ||
1732 | struct sysdev_attribute *attr, | ||
1733 | const char *buf, size_t size) | ||
1734 | { | ||
1735 | u64 new; | ||
1736 | |||
1737 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1738 | return -EINVAL; | ||
1739 | |||
1740 | if (mce_cmci_disabled ^ !!new) { | ||
1741 | if (new) { | ||
1742 | /* disable cmci */ | ||
1743 | on_each_cpu(mce_disable_ce, NULL, 1); | ||
1744 | mce_cmci_disabled = 1; | ||
1745 | } else { | ||
1746 | /* enable cmci */ | ||
1747 | mce_cmci_disabled = 0; | ||
1748 | on_each_cpu(mce_enable_ce, NULL, 1); | ||
1749 | } | ||
1750 | } | ||
1751 | return size; | ||
1752 | } | ||
1753 | |||
1754 | static ssize_t store_int_with_restart(struct sys_device *s, | ||
1755 | struct sysdev_attribute *attr, | ||
1756 | const char *buf, size_t size) | ||
1757 | { | ||
1758 | ssize_t ret = sysdev_store_int(s, attr, buf, size); | ||
1759 | mce_restart(); | ||
1760 | return ret; | ||
1761 | } | ||
1762 | |||
1763 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | ||
1764 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | ||
1765 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | ||
1766 | static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | ||
1767 | |||
1768 | static struct sysdev_ext_attribute attr_check_interval = { | ||
1769 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | ||
1770 | store_int_with_restart), | ||
1771 | &check_interval | ||
1772 | }; | ||
1773 | |||
1774 | static struct sysdev_ext_attribute attr_ignore_ce = { | ||
1775 | _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), | ||
1776 | &mce_ignore_ce | ||
1777 | }; | ||
1778 | |||
1779 | static struct sysdev_ext_attribute attr_cmci_disabled = { | ||
1780 | _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), | ||
1781 | &mce_cmci_disabled | ||
1782 | }; | ||
1783 | |||
1784 | static struct sysdev_attribute *mce_attrs[] = { | ||
1785 | &attr_tolerant.attr, | ||
1786 | &attr_check_interval.attr, | ||
1787 | &attr_trigger, | ||
1788 | &attr_monarch_timeout.attr, | ||
1789 | &attr_dont_log_ce.attr, | ||
1790 | &attr_ignore_ce.attr, | ||
1791 | &attr_cmci_disabled.attr, | ||
1792 | NULL | ||
1793 | }; | ||
1794 | |||
1795 | static cpumask_var_t mce_dev_initialized; | ||
1796 | |||
1797 | /* Per cpu sysdev init. All of the cpus still share the same ctrl bank: */ | ||
1798 | static __cpuinit int mce_create_device(unsigned int cpu) | ||
1799 | { | ||
1800 | int err; | ||
1801 | int i, j; | ||
1802 | |||
1803 | if (!mce_available(&boot_cpu_data)) | ||
1804 | return -EIO; | ||
1805 | |||
1806 | memset(&per_cpu(mce_dev, cpu).kobj, 0, sizeof(struct kobject)); | ||
1807 | per_cpu(mce_dev, cpu).id = cpu; | ||
1808 | per_cpu(mce_dev, cpu).cls = &mce_sysclass; | ||
1809 | |||
1810 | err = sysdev_register(&per_cpu(mce_dev, cpu)); | ||
1811 | if (err) | ||
1812 | return err; | ||
1813 | |||
1814 | for (i = 0; mce_attrs[i]; i++) { | ||
1815 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
1816 | if (err) | ||
1817 | goto error; | ||
1818 | } | ||
1819 | for (j = 0; j < banks; j++) { | ||
1820 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | ||
1821 | &bank_attrs[j]); | ||
1822 | if (err) | ||
1823 | goto error2; | ||
1824 | } | ||
1825 | cpumask_set_cpu(cpu, mce_dev_initialized); | ||
1826 | |||
1827 | return 0; | ||
1828 | error2: | ||
1829 | while (--j >= 0) | ||
1830 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); | ||
1831 | error: | ||
1832 | while (--i >= 0) | ||
1833 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
1834 | |||
1835 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | ||
1836 | |||
1837 | return err; | ||
1838 | } | ||
1839 | |||
1840 | static __cpuinit void mce_remove_device(unsigned int cpu) | ||
1841 | { | ||
1842 | int i; | ||
1843 | |||
1844 | if (!cpumask_test_cpu(cpu, mce_dev_initialized)) | ||
1845 | return; | ||
1846 | |||
1847 | for (i = 0; mce_attrs[i]; i++) | ||
1848 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | ||
1849 | |||
1850 | for (i = 0; i < banks; i++) | ||
1851 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | ||
1852 | |||
1853 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | ||
1854 | cpumask_clear_cpu(cpu, mce_dev_initialized); | ||
1855 | } | ||
1856 | |||
1857 | /* Make sure there are no machine checks on offlined CPUs. */ | ||
1858 | static void mce_disable_cpu(void *h) | ||
1859 | { | ||
1860 | unsigned long action = *(unsigned long *)h; | ||
1861 | int i; | ||
1862 | |||
1863 | if (!mce_available(¤t_cpu_data)) | ||
1864 | return; | ||
1865 | if (!(action & CPU_TASKS_FROZEN)) | ||
1866 | cmci_clear(); | ||
1867 | for (i = 0; i < banks; i++) { | ||
1868 | if (!skip_bank_init(i)) | ||
1869 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
1870 | } | ||
1871 | } | ||
1872 | |||
1873 | static void mce_reenable_cpu(void *h) | ||
1874 | { | ||
1875 | unsigned long action = *(unsigned long *)h; | ||
1876 | int i; | ||
1877 | |||
1878 | if (!mce_available(¤t_cpu_data)) | ||
1879 | return; | ||
1880 | |||
1881 | if (!(action & CPU_TASKS_FROZEN)) | ||
1882 | cmci_reenable(); | ||
1883 | for (i = 0; i < banks; i++) { | ||
1884 | if (!skip_bank_init(i)) | ||
1885 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | ||
1886 | } | ||
1887 | } | ||
1888 | |||
1889 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
1890 | static int __cpuinit | ||
1891 | mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | ||
1892 | { | ||
1893 | unsigned int cpu = (unsigned long)hcpu; | ||
1894 | struct timer_list *t = &per_cpu(mce_timer, cpu); | ||
1895 | |||
1896 | switch (action) { | ||
1897 | case CPU_ONLINE: | ||
1898 | case CPU_ONLINE_FROZEN: | ||
1899 | mce_create_device(cpu); | ||
1900 | if (threshold_cpu_callback) | ||
1901 | threshold_cpu_callback(action, cpu); | ||
1902 | break; | ||
1903 | case CPU_DEAD: | ||
1904 | case CPU_DEAD_FROZEN: | ||
1905 | if (threshold_cpu_callback) | ||
1906 | threshold_cpu_callback(action, cpu); | ||
1907 | mce_remove_device(cpu); | ||
1908 | break; | ||
1909 | case CPU_DOWN_PREPARE: | ||
1910 | case CPU_DOWN_PREPARE_FROZEN: | ||
1911 | del_timer_sync(t); | ||
1912 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | ||
1913 | break; | ||
1914 | case CPU_DOWN_FAILED: | ||
1915 | case CPU_DOWN_FAILED_FROZEN: | ||
1916 | t->expires = round_jiffies(jiffies + | ||
1917 | __get_cpu_var(next_interval)); | ||
1918 | add_timer_on(t, cpu); | ||
1919 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | ||
1920 | break; | ||
1921 | case CPU_POST_DEAD: | ||
1922 | /* intentionally ignoring frozen here */ | ||
1923 | cmci_rediscover(cpu); | ||
1924 | break; | ||
1925 | } | ||
1926 | return NOTIFY_OK; | ||
1927 | } | ||
1928 | |||
1929 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { | ||
1930 | .notifier_call = mce_cpu_callback, | ||
1931 | }; | ||
1932 | |||
1933 | static __init int mce_init_banks(void) | ||
1934 | { | ||
1935 | int i; | ||
1936 | |||
1937 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
1938 | GFP_KERNEL); | ||
1939 | if (!bank_attrs) | ||
1940 | return -ENOMEM; | ||
1941 | |||
1942 | for (i = 0; i < banks; i++) { | ||
1943 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
1944 | |||
1945 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
1946 | if (!a->attr.name) | ||
1947 | goto nomem; | ||
1948 | |||
1949 | a->attr.mode = 0644; | ||
1950 | a->show = show_bank; | ||
1951 | a->store = set_bank; | ||
1952 | } | ||
1953 | return 0; | ||
1954 | |||
1955 | nomem: | ||
1956 | while (--i >= 0) | ||
1957 | kfree(bank_attrs[i].attr.name); | ||
1958 | kfree(bank_attrs); | ||
1959 | bank_attrs = NULL; | ||
1960 | |||
1961 | return -ENOMEM; | ||
1962 | } | ||
1963 | |||
1964 | static __init int mce_init_device(void) | ||
1965 | { | ||
1966 | int err; | ||
1967 | int i = 0; | ||
1968 | |||
1969 | if (!mce_available(&boot_cpu_data)) | ||
1970 | return -EIO; | ||
1971 | |||
1972 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | ||
1973 | |||
1974 | err = mce_init_banks(); | ||
1975 | if (err) | ||
1976 | return err; | ||
1977 | |||
1978 | err = sysdev_class_register(&mce_sysclass); | ||
1979 | if (err) | ||
1980 | return err; | ||
1981 | |||
1982 | for_each_online_cpu(i) { | ||
1983 | err = mce_create_device(i); | ||
1984 | if (err) | ||
1985 | return err; | ||
1986 | } | ||
1987 | |||
1988 | register_hotcpu_notifier(&mce_cpu_notifier); | ||
1989 | misc_register(&mce_log_device); | ||
1990 | |||
1991 | return err; | ||
1992 | } | ||
1993 | |||
1994 | device_initcall(mce_init_device); | ||
1995 | |||
1996 | #else /* CONFIG_X86_OLD_MCE: */ | ||
1997 | |||
1998 | int nr_mce_banks; | ||
1999 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | ||
2000 | |||
2001 | /* This has to be run for each processor */ | ||
2002 | void mcheck_init(struct cpuinfo_x86 *c) | ||
2003 | { | ||
2004 | if (mce_disabled) | ||
2005 | return; | ||
2006 | |||
2007 | switch (c->x86_vendor) { | ||
2008 | case X86_VENDOR_AMD: | ||
2009 | amd_mcheck_init(c); | ||
2010 | break; | ||
2011 | |||
2012 | case X86_VENDOR_INTEL: | ||
2013 | if (c->x86 == 5) | ||
2014 | intel_p5_mcheck_init(c); | ||
2015 | if (c->x86 == 6) | ||
2016 | intel_p6_mcheck_init(c); | ||
2017 | if (c->x86 == 15) | ||
2018 | intel_p4_mcheck_init(c); | ||
2019 | break; | ||
2020 | |||
2021 | case X86_VENDOR_CENTAUR: | ||
2022 | if (c->x86 == 5) | ||
2023 | winchip_mcheck_init(c); | ||
2024 | break; | ||
2025 | |||
2026 | default: | ||
2027 | break; | ||
2028 | } | ||
2029 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); | ||
2030 | } | ||
2031 | |||
2032 | static int __init mcheck_enable(char *str) | ||
2033 | { | ||
2034 | mce_p5_enabled = 1; | ||
2035 | return 1; | ||
2036 | } | ||
2037 | __setup("mce", mcheck_enable); | ||
2038 | |||
2039 | #endif /* CONFIG_X86_OLD_MCE */ | ||
2040 | |||
2041 | /* | ||
2042 | * Old style boot options parsing. Only for compatibility. | ||
2043 | */ | ||
2044 | static int __init mcheck_disable(char *str) | ||
2045 | { | ||
2046 | mce_disabled = 1; | ||
2047 | return 1; | ||
2048 | } | ||
2049 | __setup("nomce", mcheck_disable); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h deleted file mode 100644 index ae9f628838f1..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ /dev/null | |||
@@ -1,14 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/mce.h> | ||
3 | |||
4 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
5 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
6 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
7 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
8 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
9 | |||
10 | /* Call the installed machine check handler for this CPU setup. */ | ||
11 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
12 | |||
13 | extern int nr_mce_banks; | ||
14 | |||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_32.c b/arch/x86/kernel/cpu/mcheck/mce_32.c deleted file mode 100644 index 3552119b091d..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_32.c +++ /dev/null | |||
@@ -1,76 +0,0 @@ | |||
1 | /* | ||
2 | * mce.c - x86 Machine Check Exception Reporting | ||
3 | * (c) 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>, Dave Jones <davej@redhat.com> | ||
4 | */ | ||
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/smp.h> | ||
11 | #include <linux/thread_info.h> | ||
12 | |||
13 | #include <asm/processor.h> | ||
14 | #include <asm/system.h> | ||
15 | #include <asm/mce.h> | ||
16 | |||
17 | #include "mce.h" | ||
18 | |||
19 | int mce_disabled; | ||
20 | int nr_mce_banks; | ||
21 | |||
22 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | ||
23 | |||
24 | /* Handle unconfigured int18 (should never happen) */ | ||
25 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
26 | { | ||
27 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); | ||
28 | } | ||
29 | |||
30 | /* Call the installed machine check handler for this CPU setup. */ | ||
31 | void (*machine_check_vector)(struct pt_regs *, long error_code) = unexpected_machine_check; | ||
32 | |||
33 | /* This has to be run for each processor */ | ||
34 | void mcheck_init(struct cpuinfo_x86 *c) | ||
35 | { | ||
36 | if (mce_disabled == 1) | ||
37 | return; | ||
38 | |||
39 | switch (c->x86_vendor) { | ||
40 | case X86_VENDOR_AMD: | ||
41 | amd_mcheck_init(c); | ||
42 | break; | ||
43 | |||
44 | case X86_VENDOR_INTEL: | ||
45 | if (c->x86 == 5) | ||
46 | intel_p5_mcheck_init(c); | ||
47 | if (c->x86 == 6) | ||
48 | intel_p6_mcheck_init(c); | ||
49 | if (c->x86 == 15) | ||
50 | intel_p4_mcheck_init(c); | ||
51 | break; | ||
52 | |||
53 | case X86_VENDOR_CENTAUR: | ||
54 | if (c->x86 == 5) | ||
55 | winchip_mcheck_init(c); | ||
56 | break; | ||
57 | |||
58 | default: | ||
59 | break; | ||
60 | } | ||
61 | } | ||
62 | |||
63 | static int __init mcheck_disable(char *str) | ||
64 | { | ||
65 | mce_disabled = 1; | ||
66 | return 1; | ||
67 | } | ||
68 | |||
69 | static int __init mcheck_enable(char *str) | ||
70 | { | ||
71 | mce_disabled = -1; | ||
72 | return 1; | ||
73 | } | ||
74 | |||
75 | __setup("nomce", mcheck_disable); | ||
76 | __setup("mce", mcheck_enable); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c deleted file mode 100644 index 09dd1d414fc3..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ /dev/null | |||
@@ -1,1187 +0,0 @@ | |||
1 | /* | ||
2 | * Machine check handler. | ||
3 | * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs. | ||
4 | * Rest from unknown author(s). | ||
5 | * 2004 Andi Kleen. Rewrote most of it. | ||
6 | * Copyright 2008 Intel Corporation | ||
7 | * Author: Andi Kleen | ||
8 | */ | ||
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/sched.h> | ||
14 | #include <linux/smp_lock.h> | ||
15 | #include <linux/string.h> | ||
16 | #include <linux/rcupdate.h> | ||
17 | #include <linux/kallsyms.h> | ||
18 | #include <linux/sysdev.h> | ||
19 | #include <linux/miscdevice.h> | ||
20 | #include <linux/fs.h> | ||
21 | #include <linux/capability.h> | ||
22 | #include <linux/cpu.h> | ||
23 | #include <linux/percpu.h> | ||
24 | #include <linux/poll.h> | ||
25 | #include <linux/thread_info.h> | ||
26 | #include <linux/ctype.h> | ||
27 | #include <linux/kmod.h> | ||
28 | #include <linux/kdebug.h> | ||
29 | #include <linux/kobject.h> | ||
30 | #include <linux/sysfs.h> | ||
31 | #include <linux/ratelimit.h> | ||
32 | #include <asm/processor.h> | ||
33 | #include <asm/msr.h> | ||
34 | #include <asm/mce.h> | ||
35 | #include <asm/uaccess.h> | ||
36 | #include <asm/smp.h> | ||
37 | #include <asm/idle.h> | ||
38 | |||
39 | #define MISC_MCELOG_MINOR 227 | ||
40 | |||
41 | atomic_t mce_entry; | ||
42 | |||
43 | static int mce_dont_init; | ||
44 | |||
45 | /* | ||
46 | * Tolerant levels: | ||
47 | * 0: always panic on uncorrected errors, log corrected errors | ||
48 | * 1: panic or SIGBUS on uncorrected errors, log corrected errors | ||
49 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | ||
50 | * 3: never panic or SIGBUS, log all errors (for testing only) | ||
51 | */ | ||
52 | static int tolerant = 1; | ||
53 | static int banks; | ||
54 | static u64 *bank; | ||
55 | static unsigned long notify_user; | ||
56 | static int rip_msr; | ||
57 | static int mce_bootlog = -1; | ||
58 | static atomic_t mce_events; | ||
59 | |||
60 | static char trigger[128]; | ||
61 | static char *trigger_argv[2] = { trigger, NULL }; | ||
62 | |||
63 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | ||
64 | |||
65 | /* MCA banks polled by the period polling timer for corrected events */ | ||
66 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | ||
67 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | ||
68 | }; | ||
69 | |||
70 | /* Do initial initialization of a struct mce */ | ||
71 | void mce_setup(struct mce *m) | ||
72 | { | ||
73 | memset(m, 0, sizeof(struct mce)); | ||
74 | m->cpu = smp_processor_id(); | ||
75 | rdtscll(m->tsc); | ||
76 | } | ||
77 | |||
78 | /* | ||
79 | * Lockless MCE logging infrastructure. | ||
80 | * This avoids deadlocks on printk locks without having to break locks. Also | ||
81 | * separate MCEs from kernel messages to avoid bogus bug reports. | ||
82 | */ | ||
83 | |||
84 | static struct mce_log mcelog = { | ||
85 | MCE_LOG_SIGNATURE, | ||
86 | MCE_LOG_LEN, | ||
87 | }; | ||
88 | |||
89 | void mce_log(struct mce *mce) | ||
90 | { | ||
91 | unsigned next, entry; | ||
92 | atomic_inc(&mce_events); | ||
93 | mce->finished = 0; | ||
94 | wmb(); | ||
95 | for (;;) { | ||
96 | entry = rcu_dereference(mcelog.next); | ||
97 | for (;;) { | ||
98 | /* When the buffer fills up discard new entries. Assume | ||
99 | that the earlier errors are the more interesting. */ | ||
100 | if (entry >= MCE_LOG_LEN) { | ||
101 | set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); | ||
102 | return; | ||
103 | } | ||
104 | /* Old left over entry. Skip. */ | ||
105 | if (mcelog.entry[entry].finished) { | ||
106 | entry++; | ||
107 | continue; | ||
108 | } | ||
109 | break; | ||
110 | } | ||
111 | smp_rmb(); | ||
112 | next = entry + 1; | ||
113 | if (cmpxchg(&mcelog.next, entry, next) == entry) | ||
114 | break; | ||
115 | } | ||
116 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | ||
117 | wmb(); | ||
118 | mcelog.entry[entry].finished = 1; | ||
119 | wmb(); | ||
120 | |||
121 | set_bit(0, ¬ify_user); | ||
122 | } | ||
123 | |||
124 | static void print_mce(struct mce *m) | ||
125 | { | ||
126 | printk(KERN_EMERG "\n" | ||
127 | KERN_EMERG "HARDWARE ERROR\n" | ||
128 | KERN_EMERG | ||
129 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | ||
130 | m->cpu, m->mcgstatus, m->bank, m->status); | ||
131 | if (m->ip) { | ||
132 | printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", | ||
133 | !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", | ||
134 | m->cs, m->ip); | ||
135 | if (m->cs == __KERNEL_CS) | ||
136 | print_symbol("{%s}", m->ip); | ||
137 | printk("\n"); | ||
138 | } | ||
139 | printk(KERN_EMERG "TSC %llx ", m->tsc); | ||
140 | if (m->addr) | ||
141 | printk("ADDR %llx ", m->addr); | ||
142 | if (m->misc) | ||
143 | printk("MISC %llx ", m->misc); | ||
144 | printk("\n"); | ||
145 | printk(KERN_EMERG "This is not a software problem!\n"); | ||
146 | printk(KERN_EMERG "Run through mcelog --ascii to decode " | ||
147 | "and contact your hardware vendor\n"); | ||
148 | } | ||
149 | |||
150 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) | ||
151 | { | ||
152 | int i; | ||
153 | |||
154 | oops_begin(); | ||
155 | for (i = 0; i < MCE_LOG_LEN; i++) { | ||
156 | unsigned long tsc = mcelog.entry[i].tsc; | ||
157 | |||
158 | if (time_before(tsc, start)) | ||
159 | continue; | ||
160 | print_mce(&mcelog.entry[i]); | ||
161 | if (backup && mcelog.entry[i].tsc == backup->tsc) | ||
162 | backup = NULL; | ||
163 | } | ||
164 | if (backup) | ||
165 | print_mce(backup); | ||
166 | panic(msg); | ||
167 | } | ||
168 | |||
169 | int mce_available(struct cpuinfo_x86 *c) | ||
170 | { | ||
171 | if (mce_dont_init) | ||
172 | return 0; | ||
173 | return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA); | ||
174 | } | ||
175 | |||
176 | static inline void mce_get_rip(struct mce *m, struct pt_regs *regs) | ||
177 | { | ||
178 | if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) { | ||
179 | m->ip = regs->ip; | ||
180 | m->cs = regs->cs; | ||
181 | } else { | ||
182 | m->ip = 0; | ||
183 | m->cs = 0; | ||
184 | } | ||
185 | if (rip_msr) { | ||
186 | /* Assume the RIP in the MSR is exact. Is this true? */ | ||
187 | m->mcgstatus |= MCG_STATUS_EIPV; | ||
188 | rdmsrl(rip_msr, m->ip); | ||
189 | m->cs = 0; | ||
190 | } | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Poll for corrected events or events that happened before reset. | ||
195 | * Those are just logged through /dev/mcelog. | ||
196 | * | ||
197 | * This is executed in standard interrupt context. | ||
198 | */ | ||
199 | void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | ||
200 | { | ||
201 | struct mce m; | ||
202 | int i; | ||
203 | |||
204 | mce_setup(&m); | ||
205 | |||
206 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | ||
207 | for (i = 0; i < banks; i++) { | ||
208 | if (!bank[i] || !test_bit(i, *b)) | ||
209 | continue; | ||
210 | |||
211 | m.misc = 0; | ||
212 | m.addr = 0; | ||
213 | m.bank = i; | ||
214 | m.tsc = 0; | ||
215 | |||
216 | barrier(); | ||
217 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | ||
218 | if (!(m.status & MCI_STATUS_VAL)) | ||
219 | continue; | ||
220 | |||
221 | /* | ||
222 | * Uncorrected events are handled by the exception handler | ||
223 | * when it is enabled. But when the exception is disabled log | ||
224 | * everything. | ||
225 | * | ||
226 | * TBD do the same check for MCI_STATUS_EN here? | ||
227 | */ | ||
228 | if ((m.status & MCI_STATUS_UC) && !(flags & MCP_UC)) | ||
229 | continue; | ||
230 | |||
231 | if (m.status & MCI_STATUS_MISCV) | ||
232 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | ||
233 | if (m.status & MCI_STATUS_ADDRV) | ||
234 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | ||
235 | |||
236 | if (!(flags & MCP_TIMESTAMP)) | ||
237 | m.tsc = 0; | ||
238 | /* | ||
239 | * Don't get the IP here because it's unlikely to | ||
240 | * have anything to do with the actual error location. | ||
241 | */ | ||
242 | if (!(flags & MCP_DONTLOG)) { | ||
243 | mce_log(&m); | ||
244 | add_taint(TAINT_MACHINE_CHECK); | ||
245 | } | ||
246 | |||
247 | /* | ||
248 | * Clear state for this bank. | ||
249 | */ | ||
250 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * Don't clear MCG_STATUS here because it's only defined for | ||
255 | * exceptions. | ||
256 | */ | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * The actual machine check handler. This only handles real | ||
261 | * exceptions when something got corrupted coming in through int 18. | ||
262 | * | ||
263 | * This is executed in NMI context not subject to normal locking rules. This | ||
264 | * implies that most kernel services cannot be safely used. Don't even | ||
265 | * think about putting a printk in there! | ||
266 | */ | ||
267 | void do_machine_check(struct pt_regs * regs, long error_code) | ||
268 | { | ||
269 | struct mce m, panicm; | ||
270 | u64 mcestart = 0; | ||
271 | int i; | ||
272 | int panicm_found = 0; | ||
273 | /* | ||
274 | * If no_way_out gets set, there is no safe way to recover from this | ||
275 | * MCE. If tolerant is cranked up, we'll try anyway. | ||
276 | */ | ||
277 | int no_way_out = 0; | ||
278 | /* | ||
279 | * If kill_it gets set, there might be a way to recover from this | ||
280 | * error. | ||
281 | */ | ||
282 | int kill_it = 0; | ||
283 | DECLARE_BITMAP(toclear, MAX_NR_BANKS); | ||
284 | |||
285 | atomic_inc(&mce_entry); | ||
286 | |||
287 | if (notify_die(DIE_NMI, "machine check", regs, error_code, | ||
288 | 18, SIGKILL) == NOTIFY_STOP) | ||
289 | goto out2; | ||
290 | if (!banks) | ||
291 | goto out2; | ||
292 | |||
293 | mce_setup(&m); | ||
294 | |||
295 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | ||
296 | /* if the restart IP is not valid, we're done for */ | ||
297 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | ||
298 | no_way_out = 1; | ||
299 | |||
300 | rdtscll(mcestart); | ||
301 | barrier(); | ||
302 | |||
303 | for (i = 0; i < banks; i++) { | ||
304 | __clear_bit(i, toclear); | ||
305 | if (!bank[i]) | ||
306 | continue; | ||
307 | |||
308 | m.misc = 0; | ||
309 | m.addr = 0; | ||
310 | m.bank = i; | ||
311 | |||
312 | rdmsrl(MSR_IA32_MC0_STATUS + i*4, m.status); | ||
313 | if ((m.status & MCI_STATUS_VAL) == 0) | ||
314 | continue; | ||
315 | |||
316 | /* | ||
317 | * Non uncorrected errors are handled by machine_check_poll | ||
318 | * Leave them alone. | ||
319 | */ | ||
320 | if ((m.status & MCI_STATUS_UC) == 0) | ||
321 | continue; | ||
322 | |||
323 | /* | ||
324 | * Set taint even when machine check was not enabled. | ||
325 | */ | ||
326 | add_taint(TAINT_MACHINE_CHECK); | ||
327 | |||
328 | __set_bit(i, toclear); | ||
329 | |||
330 | if (m.status & MCI_STATUS_EN) { | ||
331 | /* if PCC was set, there's no way out */ | ||
332 | no_way_out |= !!(m.status & MCI_STATUS_PCC); | ||
333 | /* | ||
334 | * If this error was uncorrectable and there was | ||
335 | * an overflow, we're in trouble. If no overflow, | ||
336 | * we might get away with just killing a task. | ||
337 | */ | ||
338 | if (m.status & MCI_STATUS_UC) { | ||
339 | if (tolerant < 1 || m.status & MCI_STATUS_OVER) | ||
340 | no_way_out = 1; | ||
341 | kill_it = 1; | ||
342 | } | ||
343 | } else { | ||
344 | /* | ||
345 | * Machine check event was not enabled. Clear, but | ||
346 | * ignore. | ||
347 | */ | ||
348 | continue; | ||
349 | } | ||
350 | |||
351 | if (m.status & MCI_STATUS_MISCV) | ||
352 | rdmsrl(MSR_IA32_MC0_MISC + i*4, m.misc); | ||
353 | if (m.status & MCI_STATUS_ADDRV) | ||
354 | rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); | ||
355 | |||
356 | mce_get_rip(&m, regs); | ||
357 | mce_log(&m); | ||
358 | |||
359 | /* Did this bank cause the exception? */ | ||
360 | /* Assume that the bank with uncorrectable errors did it, | ||
361 | and that there is only a single one. */ | ||
362 | if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) { | ||
363 | panicm = m; | ||
364 | panicm_found = 1; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | /* If we didn't find an uncorrectable error, pick | ||
369 | the last one (shouldn't happen, just being safe). */ | ||
370 | if (!panicm_found) | ||
371 | panicm = m; | ||
372 | |||
373 | /* | ||
374 | * If we have decided that we just CAN'T continue, and the user | ||
375 | * has not set tolerant to an insane level, give up and die. | ||
376 | */ | ||
377 | if (no_way_out && tolerant < 3) | ||
378 | mce_panic("Machine check", &panicm, mcestart); | ||
379 | |||
380 | /* | ||
381 | * If the error seems to be unrecoverable, something should be | ||
382 | * done. Try to kill as little as possible. If we can kill just | ||
383 | * one task, do that. If the user has set the tolerance very | ||
384 | * high, don't try to do anything at all. | ||
385 | */ | ||
386 | if (kill_it && tolerant < 3) { | ||
387 | int user_space = 0; | ||
388 | |||
389 | /* | ||
390 | * If the EIPV bit is set, it means the saved IP is the | ||
391 | * instruction which caused the MCE. | ||
392 | */ | ||
393 | if (m.mcgstatus & MCG_STATUS_EIPV) | ||
394 | user_space = panicm.ip && (panicm.cs & 3); | ||
395 | |||
396 | /* | ||
397 | * If we know that the error was in user space, send a | ||
398 | * SIGBUS. Otherwise, panic if tolerance is low. | ||
399 | * | ||
400 | * force_sig() takes an awful lot of locks and has a slight | ||
401 | * risk of deadlocking. | ||
402 | */ | ||
403 | if (user_space) { | ||
404 | force_sig(SIGBUS, current); | ||
405 | } else if (panic_on_oops || tolerant < 2) { | ||
406 | mce_panic("Uncorrected machine check", | ||
407 | &panicm, mcestart); | ||
408 | } | ||
409 | } | ||
410 | |||
411 | /* notify userspace ASAP */ | ||
412 | set_thread_flag(TIF_MCE_NOTIFY); | ||
413 | |||
414 | /* the last thing we do is clear state */ | ||
415 | for (i = 0; i < banks; i++) { | ||
416 | if (test_bit(i, toclear)) | ||
417 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
418 | } | ||
419 | wrmsrl(MSR_IA32_MCG_STATUS, 0); | ||
420 | out2: | ||
421 | atomic_dec(&mce_entry); | ||
422 | } | ||
423 | |||
424 | #ifdef CONFIG_X86_MCE_INTEL | ||
425 | /*** | ||
426 | * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog | ||
427 | * @cpu: The CPU on which the event occurred. | ||
428 | * @status: Event status information | ||
429 | * | ||
430 | * This function should be called by the thermal interrupt after the | ||
431 | * event has been processed and the decision was made to log the event | ||
432 | * further. | ||
433 | * | ||
434 | * The status parameter will be saved to the 'status' field of 'struct mce' | ||
435 | * and historically has been the register value of the | ||
436 | * MSR_IA32_THERMAL_STATUS (Intel) msr. | ||
437 | */ | ||
438 | void mce_log_therm_throt_event(__u64 status) | ||
439 | { | ||
440 | struct mce m; | ||
441 | |||
442 | mce_setup(&m); | ||
443 | m.bank = MCE_THERMAL_BANK; | ||
444 | m.status = status; | ||
445 | mce_log(&m); | ||
446 | } | ||
447 | #endif /* CONFIG_X86_MCE_INTEL */ | ||
448 | |||
449 | /* | ||
450 | * Periodic polling timer for "silent" machine check errors. If the | ||
451 | * poller finds an MCE, poll 2x faster. When the poller finds no more | ||
452 | * errors, poll 2x slower (up to check_interval seconds). | ||
453 | */ | ||
454 | |||
455 | static int check_interval = 5 * 60; /* 5 minutes */ | ||
456 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | ||
457 | static void mcheck_timer(unsigned long); | ||
458 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | ||
459 | |||
460 | static void mcheck_timer(unsigned long data) | ||
461 | { | ||
462 | struct timer_list *t = &per_cpu(mce_timer, data); | ||
463 | int *n; | ||
464 | |||
465 | WARN_ON(smp_processor_id() != data); | ||
466 | |||
467 | if (mce_available(¤t_cpu_data)) | ||
468 | machine_check_poll(MCP_TIMESTAMP, | ||
469 | &__get_cpu_var(mce_poll_banks)); | ||
470 | |||
471 | /* | ||
472 | * Alert userspace if needed. If we logged an MCE, reduce the | ||
473 | * polling interval, otherwise increase the polling interval. | ||
474 | */ | ||
475 | n = &__get_cpu_var(next_interval); | ||
476 | if (mce_notify_user()) { | ||
477 | *n = max(*n/2, HZ/100); | ||
478 | } else { | ||
479 | *n = min(*n*2, (int)round_jiffies_relative(check_interval*HZ)); | ||
480 | } | ||
481 | |||
482 | t->expires = jiffies + *n; | ||
483 | add_timer(t); | ||
484 | } | ||
485 | |||
486 | static void mce_do_trigger(struct work_struct *work) | ||
487 | { | ||
488 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | ||
489 | } | ||
490 | |||
491 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | ||
492 | |||
493 | /* | ||
494 | * Notify the user(s) about new machine check events. | ||
495 | * Can be called from interrupt context, but not from machine check/NMI | ||
496 | * context. | ||
497 | */ | ||
498 | int mce_notify_user(void) | ||
499 | { | ||
500 | /* Not more than two messages every minute */ | ||
501 | static DEFINE_RATELIMIT_STATE(ratelimit, 60*HZ, 2); | ||
502 | |||
503 | clear_thread_flag(TIF_MCE_NOTIFY); | ||
504 | if (test_and_clear_bit(0, ¬ify_user)) { | ||
505 | wake_up_interruptible(&mce_wait); | ||
506 | |||
507 | /* | ||
508 | * There is no risk of missing notifications because | ||
509 | * work_pending is always cleared before the function is | ||
510 | * executed. | ||
511 | */ | ||
512 | if (trigger[0] && !work_pending(&mce_trigger_work)) | ||
513 | schedule_work(&mce_trigger_work); | ||
514 | |||
515 | if (__ratelimit(&ratelimit)) | ||
516 | printk(KERN_INFO "Machine check events logged\n"); | ||
517 | |||
518 | return 1; | ||
519 | } | ||
520 | return 0; | ||
521 | } | ||
522 | |||
523 | /* see if the idle task needs to notify userspace */ | ||
524 | static int | ||
525 | mce_idle_callback(struct notifier_block *nfb, unsigned long action, void *junk) | ||
526 | { | ||
527 | /* IDLE_END should be safe - interrupts are back on */ | ||
528 | if (action == IDLE_END && test_thread_flag(TIF_MCE_NOTIFY)) | ||
529 | mce_notify_user(); | ||
530 | |||
531 | return NOTIFY_OK; | ||
532 | } | ||
533 | |||
534 | static struct notifier_block mce_idle_notifier = { | ||
535 | .notifier_call = mce_idle_callback, | ||
536 | }; | ||
537 | |||
538 | static __init int periodic_mcheck_init(void) | ||
539 | { | ||
540 | idle_notifier_register(&mce_idle_notifier); | ||
541 | return 0; | ||
542 | } | ||
543 | __initcall(periodic_mcheck_init); | ||
544 | |||
545 | /* | ||
546 | * Initialize Machine Checks for a CPU. | ||
547 | */ | ||
548 | static int mce_cap_init(void) | ||
549 | { | ||
550 | u64 cap; | ||
551 | unsigned b; | ||
552 | |||
553 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
554 | b = cap & 0xff; | ||
555 | if (b > MAX_NR_BANKS) { | ||
556 | printk(KERN_WARNING | ||
557 | "MCE: Using only %u machine check banks out of %u\n", | ||
558 | MAX_NR_BANKS, b); | ||
559 | b = MAX_NR_BANKS; | ||
560 | } | ||
561 | |||
562 | /* Don't support asymmetric configurations today */ | ||
563 | WARN_ON(banks != 0 && b != banks); | ||
564 | banks = b; | ||
565 | if (!bank) { | ||
566 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | ||
567 | if (!bank) | ||
568 | return -ENOMEM; | ||
569 | memset(bank, 0xff, banks * sizeof(u64)); | ||
570 | } | ||
571 | |||
572 | /* Use accurate RIP reporting if available. */ | ||
573 | if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9) | ||
574 | rip_msr = MSR_IA32_MCG_EIP; | ||
575 | |||
576 | return 0; | ||
577 | } | ||
578 | |||
579 | static void mce_init(void *dummy) | ||
580 | { | ||
581 | u64 cap; | ||
582 | int i; | ||
583 | mce_banks_t all_banks; | ||
584 | |||
585 | /* | ||
586 | * Log the machine checks left over from the previous reset. | ||
587 | */ | ||
588 | bitmap_fill(all_banks, MAX_NR_BANKS); | ||
589 | machine_check_poll(MCP_UC|(!mce_bootlog ? MCP_DONTLOG : 0), &all_banks); | ||
590 | |||
591 | set_in_cr4(X86_CR4_MCE); | ||
592 | |||
593 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
594 | if (cap & MCG_CTL_P) | ||
595 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
596 | |||
597 | for (i = 0; i < banks; i++) { | ||
598 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | ||
599 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | ||
600 | } | ||
601 | } | ||
602 | |||
603 | /* Add per CPU specific workarounds here */ | ||
604 | static void mce_cpu_quirks(struct cpuinfo_x86 *c) | ||
605 | { | ||
606 | /* This should be disabled by the BIOS, but isn't always */ | ||
607 | if (c->x86_vendor == X86_VENDOR_AMD) { | ||
608 | if (c->x86 == 15 && banks > 4) | ||
609 | /* disable GART TBL walk error reporting, which trips off | ||
610 | incorrectly with the IOMMU & 3ware & Cerberus. */ | ||
611 | clear_bit(10, (unsigned long *)&bank[4]); | ||
612 | if(c->x86 <= 17 && mce_bootlog < 0) | ||
613 | /* Lots of broken BIOS around that don't clear them | ||
614 | by default and leave crap in there. Don't log. */ | ||
615 | mce_bootlog = 0; | ||
616 | } | ||
617 | |||
618 | } | ||
619 | |||
620 | static void mce_cpu_features(struct cpuinfo_x86 *c) | ||
621 | { | ||
622 | switch (c->x86_vendor) { | ||
623 | case X86_VENDOR_INTEL: | ||
624 | mce_intel_feature_init(c); | ||
625 | break; | ||
626 | case X86_VENDOR_AMD: | ||
627 | mce_amd_feature_init(c); | ||
628 | break; | ||
629 | default: | ||
630 | break; | ||
631 | } | ||
632 | } | ||
633 | |||
634 | static void mce_init_timer(void) | ||
635 | { | ||
636 | struct timer_list *t = &__get_cpu_var(mce_timer); | ||
637 | int *n = &__get_cpu_var(next_interval); | ||
638 | |||
639 | *n = check_interval * HZ; | ||
640 | if (!*n) | ||
641 | return; | ||
642 | setup_timer(t, mcheck_timer, smp_processor_id()); | ||
643 | t->expires = round_jiffies(jiffies + *n); | ||
644 | add_timer(t); | ||
645 | } | ||
646 | |||
647 | /* | ||
648 | * Called for each booted CPU to set up machine checks. | ||
649 | * Must be called with preempt off. | ||
650 | */ | ||
651 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | ||
652 | { | ||
653 | if (!mce_available(c)) | ||
654 | return; | ||
655 | |||
656 | if (mce_cap_init() < 0) { | ||
657 | mce_dont_init = 1; | ||
658 | return; | ||
659 | } | ||
660 | mce_cpu_quirks(c); | ||
661 | |||
662 | mce_init(NULL); | ||
663 | mce_cpu_features(c); | ||
664 | mce_init_timer(); | ||
665 | } | ||
666 | |||
667 | /* | ||
668 | * Character device to read and clear the MCE log. | ||
669 | */ | ||
670 | |||
671 | static DEFINE_SPINLOCK(mce_state_lock); | ||
672 | static int open_count; /* #times opened */ | ||
673 | static int open_exclu; /* already open exclusive? */ | ||
674 | |||
675 | static int mce_open(struct inode *inode, struct file *file) | ||
676 | { | ||
677 | lock_kernel(); | ||
678 | spin_lock(&mce_state_lock); | ||
679 | |||
680 | if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { | ||
681 | spin_unlock(&mce_state_lock); | ||
682 | unlock_kernel(); | ||
683 | return -EBUSY; | ||
684 | } | ||
685 | |||
686 | if (file->f_flags & O_EXCL) | ||
687 | open_exclu = 1; | ||
688 | open_count++; | ||
689 | |||
690 | spin_unlock(&mce_state_lock); | ||
691 | unlock_kernel(); | ||
692 | |||
693 | return nonseekable_open(inode, file); | ||
694 | } | ||
695 | |||
696 | static int mce_release(struct inode *inode, struct file *file) | ||
697 | { | ||
698 | spin_lock(&mce_state_lock); | ||
699 | |||
700 | open_count--; | ||
701 | open_exclu = 0; | ||
702 | |||
703 | spin_unlock(&mce_state_lock); | ||
704 | |||
705 | return 0; | ||
706 | } | ||
707 | |||
708 | static void collect_tscs(void *data) | ||
709 | { | ||
710 | unsigned long *cpu_tsc = (unsigned long *)data; | ||
711 | |||
712 | rdtscll(cpu_tsc[smp_processor_id()]); | ||
713 | } | ||
714 | |||
715 | static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | ||
716 | loff_t *off) | ||
717 | { | ||
718 | unsigned long *cpu_tsc; | ||
719 | static DEFINE_MUTEX(mce_read_mutex); | ||
720 | unsigned prev, next; | ||
721 | char __user *buf = ubuf; | ||
722 | int i, err; | ||
723 | |||
724 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); | ||
725 | if (!cpu_tsc) | ||
726 | return -ENOMEM; | ||
727 | |||
728 | mutex_lock(&mce_read_mutex); | ||
729 | next = rcu_dereference(mcelog.next); | ||
730 | |||
731 | /* Only supports full reads right now */ | ||
732 | if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { | ||
733 | mutex_unlock(&mce_read_mutex); | ||
734 | kfree(cpu_tsc); | ||
735 | return -EINVAL; | ||
736 | } | ||
737 | |||
738 | err = 0; | ||
739 | prev = 0; | ||
740 | do { | ||
741 | for (i = prev; i < next; i++) { | ||
742 | unsigned long start = jiffies; | ||
743 | |||
744 | while (!mcelog.entry[i].finished) { | ||
745 | if (time_after_eq(jiffies, start + 2)) { | ||
746 | memset(mcelog.entry + i, 0, | ||
747 | sizeof(struct mce)); | ||
748 | goto timeout; | ||
749 | } | ||
750 | cpu_relax(); | ||
751 | } | ||
752 | smp_rmb(); | ||
753 | err |= copy_to_user(buf, mcelog.entry + i, | ||
754 | sizeof(struct mce)); | ||
755 | buf += sizeof(struct mce); | ||
756 | timeout: | ||
757 | ; | ||
758 | } | ||
759 | |||
760 | memset(mcelog.entry + prev, 0, | ||
761 | (next - prev) * sizeof(struct mce)); | ||
762 | prev = next; | ||
763 | next = cmpxchg(&mcelog.next, prev, 0); | ||
764 | } while (next != prev); | ||
765 | |||
766 | synchronize_sched(); | ||
767 | |||
768 | /* | ||
769 | * Collect entries that were still getting written before the | ||
770 | * synchronize. | ||
771 | */ | ||
772 | on_each_cpu(collect_tscs, cpu_tsc, 1); | ||
773 | for (i = next; i < MCE_LOG_LEN; i++) { | ||
774 | if (mcelog.entry[i].finished && | ||
775 | mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { | ||
776 | err |= copy_to_user(buf, mcelog.entry+i, | ||
777 | sizeof(struct mce)); | ||
778 | smp_rmb(); | ||
779 | buf += sizeof(struct mce); | ||
780 | memset(&mcelog.entry[i], 0, sizeof(struct mce)); | ||
781 | } | ||
782 | } | ||
783 | mutex_unlock(&mce_read_mutex); | ||
784 | kfree(cpu_tsc); | ||
785 | return err ? -EFAULT : buf - ubuf; | ||
786 | } | ||
787 | |||
788 | static unsigned int mce_poll(struct file *file, poll_table *wait) | ||
789 | { | ||
790 | poll_wait(file, &mce_wait, wait); | ||
791 | if (rcu_dereference(mcelog.next)) | ||
792 | return POLLIN | POLLRDNORM; | ||
793 | return 0; | ||
794 | } | ||
795 | |||
796 | static long mce_ioctl(struct file *f, unsigned int cmd, unsigned long arg) | ||
797 | { | ||
798 | int __user *p = (int __user *)arg; | ||
799 | |||
800 | if (!capable(CAP_SYS_ADMIN)) | ||
801 | return -EPERM; | ||
802 | switch (cmd) { | ||
803 | case MCE_GET_RECORD_LEN: | ||
804 | return put_user(sizeof(struct mce), p); | ||
805 | case MCE_GET_LOG_LEN: | ||
806 | return put_user(MCE_LOG_LEN, p); | ||
807 | case MCE_GETCLEAR_FLAGS: { | ||
808 | unsigned flags; | ||
809 | |||
810 | do { | ||
811 | flags = mcelog.flags; | ||
812 | } while (cmpxchg(&mcelog.flags, flags, 0) != flags); | ||
813 | return put_user(flags, p); | ||
814 | } | ||
815 | default: | ||
816 | return -ENOTTY; | ||
817 | } | ||
818 | } | ||
819 | |||
820 | static const struct file_operations mce_chrdev_ops = { | ||
821 | .open = mce_open, | ||
822 | .release = mce_release, | ||
823 | .read = mce_read, | ||
824 | .poll = mce_poll, | ||
825 | .unlocked_ioctl = mce_ioctl, | ||
826 | }; | ||
827 | |||
828 | static struct miscdevice mce_log_device = { | ||
829 | MISC_MCELOG_MINOR, | ||
830 | "mcelog", | ||
831 | &mce_chrdev_ops, | ||
832 | }; | ||
833 | |||
834 | /* | ||
835 | * Old style boot options parsing. Only for compatibility. | ||
836 | */ | ||
837 | static int __init mcheck_disable(char *str) | ||
838 | { | ||
839 | mce_dont_init = 1; | ||
840 | return 1; | ||
841 | } | ||
842 | |||
843 | /* mce=off disables machine check. | ||
844 | mce=TOLERANCELEVEL (number, see above) | ||
845 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. | ||
846 | mce=nobootlog Don't log MCEs from before booting. */ | ||
847 | static int __init mcheck_enable(char *str) | ||
848 | { | ||
849 | if (!strcmp(str, "off")) | ||
850 | mce_dont_init = 1; | ||
851 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) | ||
852 | mce_bootlog = str[0] == 'b'; | ||
853 | else if (isdigit(str[0])) | ||
854 | get_option(&str, &tolerant); | ||
855 | else | ||
856 | printk("mce= argument %s ignored. Please use /sys", str); | ||
857 | return 1; | ||
858 | } | ||
859 | |||
860 | __setup("nomce", mcheck_disable); | ||
861 | __setup("mce=", mcheck_enable); | ||
862 | |||
863 | /* | ||
864 | * Sysfs support | ||
865 | */ | ||
866 | |||
867 | /* | ||
868 | * Disable machine checks on suspend and shutdown. We can't really handle | ||
869 | * them later. | ||
870 | */ | ||
871 | static int mce_disable(void) | ||
872 | { | ||
873 | int i; | ||
874 | |||
875 | for (i = 0; i < banks; i++) | ||
876 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
877 | return 0; | ||
878 | } | ||
879 | |||
880 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | ||
881 | { | ||
882 | return mce_disable(); | ||
883 | } | ||
884 | |||
885 | static int mce_shutdown(struct sys_device *dev) | ||
886 | { | ||
887 | return mce_disable(); | ||
888 | } | ||
889 | |||
890 | /* On resume clear all MCE state. Don't want to see leftovers from the BIOS. | ||
891 | Only one CPU is active at this time, the others get readded later using | ||
892 | CPU hotplug. */ | ||
893 | static int mce_resume(struct sys_device *dev) | ||
894 | { | ||
895 | mce_init(NULL); | ||
896 | mce_cpu_features(¤t_cpu_data); | ||
897 | return 0; | ||
898 | } | ||
899 | |||
900 | static void mce_cpu_restart(void *data) | ||
901 | { | ||
902 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
903 | if (mce_available(¤t_cpu_data)) | ||
904 | mce_init(NULL); | ||
905 | mce_init_timer(); | ||
906 | } | ||
907 | |||
908 | /* Reinit MCEs after user configuration changes */ | ||
909 | static void mce_restart(void) | ||
910 | { | ||
911 | on_each_cpu(mce_cpu_restart, NULL, 1); | ||
912 | } | ||
913 | |||
914 | static struct sysdev_class mce_sysclass = { | ||
915 | .suspend = mce_suspend, | ||
916 | .shutdown = mce_shutdown, | ||
917 | .resume = mce_resume, | ||
918 | .name = "machinecheck", | ||
919 | }; | ||
920 | |||
921 | DEFINE_PER_CPU(struct sys_device, device_mce); | ||
922 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; | ||
923 | |||
924 | /* Why are there no generic functions for this? */ | ||
925 | #define ACCESSOR(name, var, start) \ | ||
926 | static ssize_t show_ ## name(struct sys_device *s, \ | ||
927 | struct sysdev_attribute *attr, \ | ||
928 | char *buf) { \ | ||
929 | return sprintf(buf, "%lx\n", (unsigned long)var); \ | ||
930 | } \ | ||
931 | static ssize_t set_ ## name(struct sys_device *s, \ | ||
932 | struct sysdev_attribute *attr, \ | ||
933 | const char *buf, size_t siz) { \ | ||
934 | char *end; \ | ||
935 | unsigned long new = simple_strtoul(buf, &end, 0); \ | ||
936 | if (end == buf) return -EINVAL; \ | ||
937 | var = new; \ | ||
938 | start; \ | ||
939 | return end-buf; \ | ||
940 | } \ | ||
941 | static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name); | ||
942 | |||
943 | static struct sysdev_attribute *bank_attrs; | ||
944 | |||
945 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
946 | char *buf) | ||
947 | { | ||
948 | u64 b = bank[attr - bank_attrs]; | ||
949 | return sprintf(buf, "%llx\n", b); | ||
950 | } | ||
951 | |||
952 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | ||
953 | const char *buf, size_t siz) | ||
954 | { | ||
955 | char *end; | ||
956 | u64 new = simple_strtoull(buf, &end, 0); | ||
957 | if (end == buf) | ||
958 | return -EINVAL; | ||
959 | bank[attr - bank_attrs] = new; | ||
960 | mce_restart(); | ||
961 | return end-buf; | ||
962 | } | ||
963 | |||
964 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
965 | char *buf) | ||
966 | { | ||
967 | strcpy(buf, trigger); | ||
968 | strcat(buf, "\n"); | ||
969 | return strlen(trigger) + 1; | ||
970 | } | ||
971 | |||
972 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | ||
973 | const char *buf,size_t siz) | ||
974 | { | ||
975 | char *p; | ||
976 | int len; | ||
977 | strncpy(trigger, buf, sizeof(trigger)); | ||
978 | trigger[sizeof(trigger)-1] = 0; | ||
979 | len = strlen(trigger); | ||
980 | p = strchr(trigger, '\n'); | ||
981 | if (*p) *p = 0; | ||
982 | return len; | ||
983 | } | ||
984 | |||
985 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | ||
986 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | ||
987 | ACCESSOR(check_interval,check_interval,mce_restart()) | ||
988 | static struct sysdev_attribute *mce_attributes[] = { | ||
989 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, | ||
990 | NULL | ||
991 | }; | ||
992 | |||
993 | static cpumask_var_t mce_device_initialized; | ||
994 | |||
995 | /* Per cpu sysdev init. All of the cpus still share the same ctl bank */ | ||
996 | static __cpuinit int mce_create_device(unsigned int cpu) | ||
997 | { | ||
998 | int err; | ||
999 | int i; | ||
1000 | |||
1001 | if (!mce_available(&boot_cpu_data)) | ||
1002 | return -EIO; | ||
1003 | |||
1004 | memset(&per_cpu(device_mce, cpu).kobj, 0, sizeof(struct kobject)); | ||
1005 | per_cpu(device_mce,cpu).id = cpu; | ||
1006 | per_cpu(device_mce,cpu).cls = &mce_sysclass; | ||
1007 | |||
1008 | err = sysdev_register(&per_cpu(device_mce,cpu)); | ||
1009 | if (err) | ||
1010 | return err; | ||
1011 | |||
1012 | for (i = 0; mce_attributes[i]; i++) { | ||
1013 | err = sysdev_create_file(&per_cpu(device_mce,cpu), | ||
1014 | mce_attributes[i]); | ||
1015 | if (err) | ||
1016 | goto error; | ||
1017 | } | ||
1018 | for (i = 0; i < banks; i++) { | ||
1019 | err = sysdev_create_file(&per_cpu(device_mce, cpu), | ||
1020 | &bank_attrs[i]); | ||
1021 | if (err) | ||
1022 | goto error2; | ||
1023 | } | ||
1024 | cpumask_set_cpu(cpu, mce_device_initialized); | ||
1025 | |||
1026 | return 0; | ||
1027 | error2: | ||
1028 | while (--i >= 0) { | ||
1029 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
1030 | &bank_attrs[i]); | ||
1031 | } | ||
1032 | error: | ||
1033 | while (--i >= 0) { | ||
1034 | sysdev_remove_file(&per_cpu(device_mce,cpu), | ||
1035 | mce_attributes[i]); | ||
1036 | } | ||
1037 | sysdev_unregister(&per_cpu(device_mce,cpu)); | ||
1038 | |||
1039 | return err; | ||
1040 | } | ||
1041 | |||
1042 | static __cpuinit void mce_remove_device(unsigned int cpu) | ||
1043 | { | ||
1044 | int i; | ||
1045 | |||
1046 | if (!cpumask_test_cpu(cpu, mce_device_initialized)) | ||
1047 | return; | ||
1048 | |||
1049 | for (i = 0; mce_attributes[i]; i++) | ||
1050 | sysdev_remove_file(&per_cpu(device_mce,cpu), | ||
1051 | mce_attributes[i]); | ||
1052 | for (i = 0; i < banks; i++) | ||
1053 | sysdev_remove_file(&per_cpu(device_mce, cpu), | ||
1054 | &bank_attrs[i]); | ||
1055 | sysdev_unregister(&per_cpu(device_mce,cpu)); | ||
1056 | cpumask_clear_cpu(cpu, mce_device_initialized); | ||
1057 | } | ||
1058 | |||
1059 | /* Make sure there are no machine checks on offlined CPUs. */ | ||
1060 | static void mce_disable_cpu(void *h) | ||
1061 | { | ||
1062 | int i; | ||
1063 | unsigned long action = *(unsigned long *)h; | ||
1064 | |||
1065 | if (!mce_available(¤t_cpu_data)) | ||
1066 | return; | ||
1067 | if (!(action & CPU_TASKS_FROZEN)) | ||
1068 | cmci_clear(); | ||
1069 | for (i = 0; i < banks; i++) | ||
1070 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | ||
1071 | } | ||
1072 | |||
1073 | static void mce_reenable_cpu(void *h) | ||
1074 | { | ||
1075 | int i; | ||
1076 | unsigned long action = *(unsigned long *)h; | ||
1077 | |||
1078 | if (!mce_available(¤t_cpu_data)) | ||
1079 | return; | ||
1080 | if (!(action & CPU_TASKS_FROZEN)) | ||
1081 | cmci_reenable(); | ||
1082 | for (i = 0; i < banks; i++) | ||
1083 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | ||
1084 | } | ||
1085 | |||
1086 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
1087 | static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | ||
1088 | unsigned long action, void *hcpu) | ||
1089 | { | ||
1090 | unsigned int cpu = (unsigned long)hcpu; | ||
1091 | struct timer_list *t = &per_cpu(mce_timer, cpu); | ||
1092 | |||
1093 | switch (action) { | ||
1094 | case CPU_ONLINE: | ||
1095 | case CPU_ONLINE_FROZEN: | ||
1096 | mce_create_device(cpu); | ||
1097 | if (threshold_cpu_callback) | ||
1098 | threshold_cpu_callback(action, cpu); | ||
1099 | break; | ||
1100 | case CPU_DEAD: | ||
1101 | case CPU_DEAD_FROZEN: | ||
1102 | if (threshold_cpu_callback) | ||
1103 | threshold_cpu_callback(action, cpu); | ||
1104 | mce_remove_device(cpu); | ||
1105 | break; | ||
1106 | case CPU_DOWN_PREPARE: | ||
1107 | case CPU_DOWN_PREPARE_FROZEN: | ||
1108 | del_timer_sync(t); | ||
1109 | smp_call_function_single(cpu, mce_disable_cpu, &action, 1); | ||
1110 | break; | ||
1111 | case CPU_DOWN_FAILED: | ||
1112 | case CPU_DOWN_FAILED_FROZEN: | ||
1113 | t->expires = round_jiffies(jiffies + | ||
1114 | __get_cpu_var(next_interval)); | ||
1115 | add_timer_on(t, cpu); | ||
1116 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | ||
1117 | break; | ||
1118 | case CPU_POST_DEAD: | ||
1119 | /* intentionally ignoring frozen here */ | ||
1120 | cmci_rediscover(cpu); | ||
1121 | break; | ||
1122 | } | ||
1123 | return NOTIFY_OK; | ||
1124 | } | ||
1125 | |||
1126 | static struct notifier_block mce_cpu_notifier __cpuinitdata = { | ||
1127 | .notifier_call = mce_cpu_callback, | ||
1128 | }; | ||
1129 | |||
1130 | static __init int mce_init_banks(void) | ||
1131 | { | ||
1132 | int i; | ||
1133 | |||
1134 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
1135 | GFP_KERNEL); | ||
1136 | if (!bank_attrs) | ||
1137 | return -ENOMEM; | ||
1138 | |||
1139 | for (i = 0; i < banks; i++) { | ||
1140 | struct sysdev_attribute *a = &bank_attrs[i]; | ||
1141 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | ||
1142 | if (!a->attr.name) | ||
1143 | goto nomem; | ||
1144 | a->attr.mode = 0644; | ||
1145 | a->show = show_bank; | ||
1146 | a->store = set_bank; | ||
1147 | } | ||
1148 | return 0; | ||
1149 | |||
1150 | nomem: | ||
1151 | while (--i >= 0) | ||
1152 | kfree(bank_attrs[i].attr.name); | ||
1153 | kfree(bank_attrs); | ||
1154 | bank_attrs = NULL; | ||
1155 | return -ENOMEM; | ||
1156 | } | ||
1157 | |||
1158 | static __init int mce_init_device(void) | ||
1159 | { | ||
1160 | int err; | ||
1161 | int i = 0; | ||
1162 | |||
1163 | if (!mce_available(&boot_cpu_data)) | ||
1164 | return -EIO; | ||
1165 | |||
1166 | zalloc_cpumask_var(&mce_device_initialized, GFP_KERNEL); | ||
1167 | |||
1168 | err = mce_init_banks(); | ||
1169 | if (err) | ||
1170 | return err; | ||
1171 | |||
1172 | err = sysdev_class_register(&mce_sysclass); | ||
1173 | if (err) | ||
1174 | return err; | ||
1175 | |||
1176 | for_each_online_cpu(i) { | ||
1177 | err = mce_create_device(i); | ||
1178 | if (err) | ||
1179 | return err; | ||
1180 | } | ||
1181 | |||
1182 | register_hotcpu_notifier(&mce_cpu_notifier); | ||
1183 | misc_register(&mce_log_device); | ||
1184 | return err; | ||
1185 | } | ||
1186 | |||
1187 | device_initcall(mce_init_device); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 56dde9c4bc96..ddae21620bda 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -13,22 +13,22 @@ | |||
13 | * | 13 | * |
14 | * All MC4_MISCi registers are shared between multi-cores | 14 | * All MC4_MISCi registers are shared between multi-cores |
15 | */ | 15 | */ |
16 | |||
17 | #include <linux/cpu.h> | ||
18 | #include <linux/errno.h> | ||
19 | #include <linux/init.h> | ||
20 | #include <linux/interrupt.h> | 16 | #include <linux/interrupt.h> |
21 | #include <linux/kobject.h> | ||
22 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
23 | #include <linux/sched.h> | 18 | #include <linux/kobject.h> |
24 | #include <linux/smp.h> | 19 | #include <linux/percpu.h> |
25 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
21 | #include <linux/errno.h> | ||
22 | #include <linux/sched.h> | ||
26 | #include <linux/sysfs.h> | 23 | #include <linux/sysfs.h> |
24 | #include <linux/init.h> | ||
25 | #include <linux/cpu.h> | ||
26 | #include <linux/smp.h> | ||
27 | |||
27 | #include <asm/apic.h> | 28 | #include <asm/apic.h> |
29 | #include <asm/idle.h> | ||
28 | #include <asm/mce.h> | 30 | #include <asm/mce.h> |
29 | #include <asm/msr.h> | 31 | #include <asm/msr.h> |
30 | #include <asm/percpu.h> | ||
31 | #include <asm/idle.h> | ||
32 | 32 | ||
33 | #define PFX "mce_threshold: " | 33 | #define PFX "mce_threshold: " |
34 | #define VERSION "version 1.1.1" | 34 | #define VERSION "version 1.1.1" |
@@ -48,26 +48,26 @@ | |||
48 | #define MCG_XBLK_ADDR 0xC0000400 | 48 | #define MCG_XBLK_ADDR 0xC0000400 |
49 | 49 | ||
50 | struct threshold_block { | 50 | struct threshold_block { |
51 | unsigned int block; | 51 | unsigned int block; |
52 | unsigned int bank; | 52 | unsigned int bank; |
53 | unsigned int cpu; | 53 | unsigned int cpu; |
54 | u32 address; | 54 | u32 address; |
55 | u16 interrupt_enable; | 55 | u16 interrupt_enable; |
56 | u16 threshold_limit; | 56 | u16 threshold_limit; |
57 | struct kobject kobj; | 57 | struct kobject kobj; |
58 | struct list_head miscj; | 58 | struct list_head miscj; |
59 | }; | 59 | }; |
60 | 60 | ||
61 | /* defaults used early on boot */ | 61 | /* defaults used early on boot */ |
62 | static struct threshold_block threshold_defaults = { | 62 | static struct threshold_block threshold_defaults = { |
63 | .interrupt_enable = 0, | 63 | .interrupt_enable = 0, |
64 | .threshold_limit = THRESHOLD_MAX, | 64 | .threshold_limit = THRESHOLD_MAX, |
65 | }; | 65 | }; |
66 | 66 | ||
67 | struct threshold_bank { | 67 | struct threshold_bank { |
68 | struct kobject *kobj; | 68 | struct kobject *kobj; |
69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
70 | cpumask_var_t cpus; | 70 | cpumask_var_t cpus; |
71 | }; | 71 | }; |
72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | 72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); |
73 | 73 | ||
@@ -86,9 +86,9 @@ static void amd_threshold_interrupt(void); | |||
86 | */ | 86 | */ |
87 | 87 | ||
88 | struct thresh_restart { | 88 | struct thresh_restart { |
89 | struct threshold_block *b; | 89 | struct threshold_block *b; |
90 | int reset; | 90 | int reset; |
91 | u16 old_limit; | 91 | u16 old_limit; |
92 | }; | 92 | }; |
93 | 93 | ||
94 | /* must be called with correct cpu affinity */ | 94 | /* must be called with correct cpu affinity */ |
@@ -110,6 +110,7 @@ static void threshold_restart_bank(void *_tr) | |||
110 | } else if (tr->old_limit) { /* change limit w/o reset */ | 110 | } else if (tr->old_limit) { /* change limit w/o reset */ |
111 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | 111 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + |
112 | (tr->old_limit - tr->b->threshold_limit); | 112 | (tr->old_limit - tr->b->threshold_limit); |
113 | |||
113 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | 114 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | |
114 | (new_count & THRESHOLD_MAX); | 115 | (new_count & THRESHOLD_MAX); |
115 | } | 116 | } |
@@ -125,11 +126,11 @@ static void threshold_restart_bank(void *_tr) | |||
125 | /* cpu init entry point, called from mce.c with preempt off */ | 126 | /* cpu init entry point, called from mce.c with preempt off */ |
126 | void mce_amd_feature_init(struct cpuinfo_x86 *c) | 127 | void mce_amd_feature_init(struct cpuinfo_x86 *c) |
127 | { | 128 | { |
128 | unsigned int bank, block; | ||
129 | unsigned int cpu = smp_processor_id(); | 129 | unsigned int cpu = smp_processor_id(); |
130 | u8 lvt_off; | ||
131 | u32 low = 0, high = 0, address = 0; | 130 | u32 low = 0, high = 0, address = 0; |
131 | unsigned int bank, block; | ||
132 | struct thresh_restart tr; | 132 | struct thresh_restart tr; |
133 | u8 lvt_off; | ||
133 | 134 | ||
134 | for (bank = 0; bank < NR_BANKS; ++bank) { | 135 | for (bank = 0; bank < NR_BANKS; ++bank) { |
135 | for (block = 0; block < NR_BLOCKS; ++block) { | 136 | for (block = 0; block < NR_BLOCKS; ++block) { |
@@ -140,8 +141,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
140 | if (!address) | 141 | if (!address) |
141 | break; | 142 | break; |
142 | address += MCG_XBLK_ADDR; | 143 | address += MCG_XBLK_ADDR; |
143 | } | 144 | } else |
144 | else | ||
145 | ++address; | 145 | ++address; |
146 | 146 | ||
147 | if (rdmsr_safe(address, &low, &high)) | 147 | if (rdmsr_safe(address, &low, &high)) |
@@ -193,9 +193,9 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) | |||
193 | */ | 193 | */ |
194 | static void amd_threshold_interrupt(void) | 194 | static void amd_threshold_interrupt(void) |
195 | { | 195 | { |
196 | u32 low = 0, high = 0, address = 0; | ||
196 | unsigned int bank, block; | 197 | unsigned int bank, block; |
197 | struct mce m; | 198 | struct mce m; |
198 | u32 low = 0, high = 0, address = 0; | ||
199 | 199 | ||
200 | mce_setup(&m); | 200 | mce_setup(&m); |
201 | 201 | ||
@@ -204,16 +204,16 @@ static void amd_threshold_interrupt(void) | |||
204 | if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) | 204 | if (!(per_cpu(bank_map, m.cpu) & (1 << bank))) |
205 | continue; | 205 | continue; |
206 | for (block = 0; block < NR_BLOCKS; ++block) { | 206 | for (block = 0; block < NR_BLOCKS; ++block) { |
207 | if (block == 0) | 207 | if (block == 0) { |
208 | address = MSR_IA32_MC0_MISC + bank * 4; | 208 | address = MSR_IA32_MC0_MISC + bank * 4; |
209 | else if (block == 1) { | 209 | } else if (block == 1) { |
210 | address = (low & MASK_BLKPTR_LO) >> 21; | 210 | address = (low & MASK_BLKPTR_LO) >> 21; |
211 | if (!address) | 211 | if (!address) |
212 | break; | 212 | break; |
213 | address += MCG_XBLK_ADDR; | 213 | address += MCG_XBLK_ADDR; |
214 | } | 214 | } else { |
215 | else | ||
216 | ++address; | 215 | ++address; |
216 | } | ||
217 | 217 | ||
218 | if (rdmsr_safe(address, &low, &high)) | 218 | if (rdmsr_safe(address, &low, &high)) |
219 | break; | 219 | break; |
@@ -229,8 +229,10 @@ static void amd_threshold_interrupt(void) | |||
229 | (high & MASK_LOCKED_HI)) | 229 | (high & MASK_LOCKED_HI)) |
230 | continue; | 230 | continue; |
231 | 231 | ||
232 | /* Log the machine check that caused the threshold | 232 | /* |
233 | event. */ | 233 | * Log the machine check that caused the threshold |
234 | * event. | ||
235 | */ | ||
234 | machine_check_poll(MCP_TIMESTAMP, | 236 | machine_check_poll(MCP_TIMESTAMP, |
235 | &__get_cpu_var(mce_poll_banks)); | 237 | &__get_cpu_var(mce_poll_banks)); |
236 | 238 | ||
@@ -254,48 +256,52 @@ static void amd_threshold_interrupt(void) | |||
254 | 256 | ||
255 | struct threshold_attr { | 257 | struct threshold_attr { |
256 | struct attribute attr; | 258 | struct attribute attr; |
257 | ssize_t(*show) (struct threshold_block *, char *); | 259 | ssize_t (*show) (struct threshold_block *, char *); |
258 | ssize_t(*store) (struct threshold_block *, const char *, size_t count); | 260 | ssize_t (*store) (struct threshold_block *, const char *, size_t count); |
259 | }; | 261 | }; |
260 | 262 | ||
261 | #define SHOW_FIELDS(name) \ | 263 | #define SHOW_FIELDS(name) \ |
262 | static ssize_t show_ ## name(struct threshold_block * b, char *buf) \ | 264 | static ssize_t show_ ## name(struct threshold_block *b, char *buf) \ |
263 | { \ | 265 | { \ |
264 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | 266 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ |
265 | } | 267 | } |
266 | SHOW_FIELDS(interrupt_enable) | 268 | SHOW_FIELDS(interrupt_enable) |
267 | SHOW_FIELDS(threshold_limit) | 269 | SHOW_FIELDS(threshold_limit) |
268 | 270 | ||
269 | static ssize_t store_interrupt_enable(struct threshold_block *b, | 271 | static ssize_t |
270 | const char *buf, size_t count) | 272 | store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size) |
271 | { | 273 | { |
272 | char *end; | ||
273 | struct thresh_restart tr; | 274 | struct thresh_restart tr; |
274 | unsigned long new = simple_strtoul(buf, &end, 0); | 275 | unsigned long new; |
275 | if (end == buf) | 276 | |
277 | if (strict_strtoul(buf, 0, &new) < 0) | ||
276 | return -EINVAL; | 278 | return -EINVAL; |
279 | |||
277 | b->interrupt_enable = !!new; | 280 | b->interrupt_enable = !!new; |
278 | 281 | ||
279 | tr.b = b; | 282 | tr.b = b; |
280 | tr.reset = 0; | 283 | tr.reset = 0; |
281 | tr.old_limit = 0; | 284 | tr.old_limit = 0; |
285 | |||
282 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 286 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
283 | 287 | ||
284 | return end - buf; | 288 | return size; |
285 | } | 289 | } |
286 | 290 | ||
287 | static ssize_t store_threshold_limit(struct threshold_block *b, | 291 | static ssize_t |
288 | const char *buf, size_t count) | 292 | store_threshold_limit(struct threshold_block *b, const char *buf, size_t size) |
289 | { | 293 | { |
290 | char *end; | ||
291 | struct thresh_restart tr; | 294 | struct thresh_restart tr; |
292 | unsigned long new = simple_strtoul(buf, &end, 0); | 295 | unsigned long new; |
293 | if (end == buf) | 296 | |
297 | if (strict_strtoul(buf, 0, &new) < 0) | ||
294 | return -EINVAL; | 298 | return -EINVAL; |
299 | |||
295 | if (new > THRESHOLD_MAX) | 300 | if (new > THRESHOLD_MAX) |
296 | new = THRESHOLD_MAX; | 301 | new = THRESHOLD_MAX; |
297 | if (new < 1) | 302 | if (new < 1) |
298 | new = 1; | 303 | new = 1; |
304 | |||
299 | tr.old_limit = b->threshold_limit; | 305 | tr.old_limit = b->threshold_limit; |
300 | b->threshold_limit = new; | 306 | b->threshold_limit = new; |
301 | tr.b = b; | 307 | tr.b = b; |
@@ -303,12 +309,12 @@ static ssize_t store_threshold_limit(struct threshold_block *b, | |||
303 | 309 | ||
304 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); | 310 | smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); |
305 | 311 | ||
306 | return end - buf; | 312 | return size; |
307 | } | 313 | } |
308 | 314 | ||
309 | struct threshold_block_cross_cpu { | 315 | struct threshold_block_cross_cpu { |
310 | struct threshold_block *tb; | 316 | struct threshold_block *tb; |
311 | long retval; | 317 | long retval; |
312 | }; | 318 | }; |
313 | 319 | ||
314 | static void local_error_count_handler(void *_tbcc) | 320 | static void local_error_count_handler(void *_tbcc) |
@@ -338,16 +344,13 @@ static ssize_t store_error_count(struct threshold_block *b, | |||
338 | return 1; | 344 | return 1; |
339 | } | 345 | } |
340 | 346 | ||
341 | #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ | 347 | #define RW_ATTR(val) \ |
342 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 348 | static struct threshold_attr val = { \ |
343 | .show = _show, \ | 349 | .attr = {.name = __stringify(val), .mode = 0644 }, \ |
344 | .store = _store, \ | 350 | .show = show_## val, \ |
351 | .store = store_## val, \ | ||
345 | }; | 352 | }; |
346 | 353 | ||
347 | #define RW_ATTR(name) \ | ||
348 | static struct threshold_attr name = \ | ||
349 | THRESHOLD_ATTR(name, 0644, show_## name, store_## name) | ||
350 | |||
351 | RW_ATTR(interrupt_enable); | 354 | RW_ATTR(interrupt_enable); |
352 | RW_ATTR(threshold_limit); | 355 | RW_ATTR(threshold_limit); |
353 | RW_ATTR(error_count); | 356 | RW_ATTR(error_count); |
@@ -359,15 +362,17 @@ static struct attribute *default_attrs[] = { | |||
359 | NULL | 362 | NULL |
360 | }; | 363 | }; |
361 | 364 | ||
362 | #define to_block(k) container_of(k, struct threshold_block, kobj) | 365 | #define to_block(k) container_of(k, struct threshold_block, kobj) |
363 | #define to_attr(a) container_of(a, struct threshold_attr, attr) | 366 | #define to_attr(a) container_of(a, struct threshold_attr, attr) |
364 | 367 | ||
365 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | 368 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
366 | { | 369 | { |
367 | struct threshold_block *b = to_block(kobj); | 370 | struct threshold_block *b = to_block(kobj); |
368 | struct threshold_attr *a = to_attr(attr); | 371 | struct threshold_attr *a = to_attr(attr); |
369 | ssize_t ret; | 372 | ssize_t ret; |
373 | |||
370 | ret = a->show ? a->show(b, buf) : -EIO; | 374 | ret = a->show ? a->show(b, buf) : -EIO; |
375 | |||
371 | return ret; | 376 | return ret; |
372 | } | 377 | } |
373 | 378 | ||
@@ -377,18 +382,20 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, | |||
377 | struct threshold_block *b = to_block(kobj); | 382 | struct threshold_block *b = to_block(kobj); |
378 | struct threshold_attr *a = to_attr(attr); | 383 | struct threshold_attr *a = to_attr(attr); |
379 | ssize_t ret; | 384 | ssize_t ret; |
385 | |||
380 | ret = a->store ? a->store(b, buf, count) : -EIO; | 386 | ret = a->store ? a->store(b, buf, count) : -EIO; |
387 | |||
381 | return ret; | 388 | return ret; |
382 | } | 389 | } |
383 | 390 | ||
384 | static struct sysfs_ops threshold_ops = { | 391 | static struct sysfs_ops threshold_ops = { |
385 | .show = show, | 392 | .show = show, |
386 | .store = store, | 393 | .store = store, |
387 | }; | 394 | }; |
388 | 395 | ||
389 | static struct kobj_type threshold_ktype = { | 396 | static struct kobj_type threshold_ktype = { |
390 | .sysfs_ops = &threshold_ops, | 397 | .sysfs_ops = &threshold_ops, |
391 | .default_attrs = default_attrs, | 398 | .default_attrs = default_attrs, |
392 | }; | 399 | }; |
393 | 400 | ||
394 | static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | 401 | static __cpuinit int allocate_threshold_blocks(unsigned int cpu, |
@@ -396,9 +403,9 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
396 | unsigned int block, | 403 | unsigned int block, |
397 | u32 address) | 404 | u32 address) |
398 | { | 405 | { |
399 | int err; | ||
400 | u32 low, high; | ||
401 | struct threshold_block *b = NULL; | 406 | struct threshold_block *b = NULL; |
407 | u32 low, high; | ||
408 | int err; | ||
402 | 409 | ||
403 | if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) | 410 | if ((bank >= NR_BANKS) || (block >= NR_BLOCKS)) |
404 | return 0; | 411 | return 0; |
@@ -421,20 +428,21 @@ static __cpuinit int allocate_threshold_blocks(unsigned int cpu, | |||
421 | if (!b) | 428 | if (!b) |
422 | return -ENOMEM; | 429 | return -ENOMEM; |
423 | 430 | ||
424 | b->block = block; | 431 | b->block = block; |
425 | b->bank = bank; | 432 | b->bank = bank; |
426 | b->cpu = cpu; | 433 | b->cpu = cpu; |
427 | b->address = address; | 434 | b->address = address; |
428 | b->interrupt_enable = 0; | 435 | b->interrupt_enable = 0; |
429 | b->threshold_limit = THRESHOLD_MAX; | 436 | b->threshold_limit = THRESHOLD_MAX; |
430 | 437 | ||
431 | INIT_LIST_HEAD(&b->miscj); | 438 | INIT_LIST_HEAD(&b->miscj); |
432 | 439 | ||
433 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) | 440 | if (per_cpu(threshold_banks, cpu)[bank]->blocks) { |
434 | list_add(&b->miscj, | 441 | list_add(&b->miscj, |
435 | &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); | 442 | &per_cpu(threshold_banks, cpu)[bank]->blocks->miscj); |
436 | else | 443 | } else { |
437 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; | 444 | per_cpu(threshold_banks, cpu)[bank]->blocks = b; |
445 | } | ||
438 | 446 | ||
439 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, | 447 | err = kobject_init_and_add(&b->kobj, &threshold_ktype, |
440 | per_cpu(threshold_banks, cpu)[bank]->kobj, | 448 | per_cpu(threshold_banks, cpu)[bank]->kobj, |
@@ -447,8 +455,9 @@ recurse: | |||
447 | if (!address) | 455 | if (!address) |
448 | return 0; | 456 | return 0; |
449 | address += MCG_XBLK_ADDR; | 457 | address += MCG_XBLK_ADDR; |
450 | } else | 458 | } else { |
451 | ++address; | 459 | ++address; |
460 | } | ||
452 | 461 | ||
453 | err = allocate_threshold_blocks(cpu, bank, ++block, address); | 462 | err = allocate_threshold_blocks(cpu, bank, ++block, address); |
454 | if (err) | 463 | if (err) |
@@ -500,13 +509,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
500 | if (!b) | 509 | if (!b) |
501 | goto out; | 510 | goto out; |
502 | 511 | ||
503 | err = sysfs_create_link(&per_cpu(device_mce, cpu).kobj, | 512 | err = sysfs_create_link(&per_cpu(mce_dev, cpu).kobj, |
504 | b->kobj, name); | 513 | b->kobj, name); |
505 | if (err) | 514 | if (err) |
506 | goto out; | 515 | goto out; |
507 | 516 | ||
508 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 517 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); |
509 | per_cpu(threshold_banks, cpu)[bank] = b; | 518 | per_cpu(threshold_banks, cpu)[bank] = b; |
519 | |||
510 | goto out; | 520 | goto out; |
511 | } | 521 | } |
512 | #endif | 522 | #endif |
@@ -522,7 +532,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
522 | goto out; | 532 | goto out; |
523 | } | 533 | } |
524 | 534 | ||
525 | b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); | 535 | b->kobj = kobject_create_and_add(name, &per_cpu(mce_dev, cpu).kobj); |
526 | if (!b->kobj) | 536 | if (!b->kobj) |
527 | goto out_free; | 537 | goto out_free; |
528 | 538 | ||
@@ -542,7 +552,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
542 | if (i == cpu) | 552 | if (i == cpu) |
543 | continue; | 553 | continue; |
544 | 554 | ||
545 | err = sysfs_create_link(&per_cpu(device_mce, i).kobj, | 555 | err = sysfs_create_link(&per_cpu(mce_dev, i).kobj, |
546 | b->kobj, name); | 556 | b->kobj, name); |
547 | if (err) | 557 | if (err) |
548 | goto out; | 558 | goto out; |
@@ -605,15 +615,13 @@ static void deallocate_threshold_block(unsigned int cpu, | |||
605 | 615 | ||
606 | static void threshold_remove_bank(unsigned int cpu, int bank) | 616 | static void threshold_remove_bank(unsigned int cpu, int bank) |
607 | { | 617 | { |
608 | int i = 0; | ||
609 | struct threshold_bank *b; | 618 | struct threshold_bank *b; |
610 | char name[32]; | 619 | char name[32]; |
620 | int i = 0; | ||
611 | 621 | ||
612 | b = per_cpu(threshold_banks, cpu)[bank]; | 622 | b = per_cpu(threshold_banks, cpu)[bank]; |
613 | |||
614 | if (!b) | 623 | if (!b) |
615 | return; | 624 | return; |
616 | |||
617 | if (!b->blocks) | 625 | if (!b->blocks) |
618 | goto free_out; | 626 | goto free_out; |
619 | 627 | ||
@@ -622,8 +630,9 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
622 | #ifdef CONFIG_SMP | 630 | #ifdef CONFIG_SMP |
623 | /* sibling symlink */ | 631 | /* sibling symlink */ |
624 | if (shared_bank[bank] && b->blocks->cpu != cpu) { | 632 | if (shared_bank[bank] && b->blocks->cpu != cpu) { |
625 | sysfs_remove_link(&per_cpu(device_mce, cpu).kobj, name); | 633 | sysfs_remove_link(&per_cpu(mce_dev, cpu).kobj, name); |
626 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 634 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
635 | |||
627 | return; | 636 | return; |
628 | } | 637 | } |
629 | #endif | 638 | #endif |
@@ -633,7 +642,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
633 | if (i == cpu) | 642 | if (i == cpu) |
634 | continue; | 643 | continue; |
635 | 644 | ||
636 | sysfs_remove_link(&per_cpu(device_mce, i).kobj, name); | 645 | sysfs_remove_link(&per_cpu(mce_dev, i).kobj, name); |
637 | per_cpu(threshold_banks, i)[bank] = NULL; | 646 | per_cpu(threshold_banks, i)[bank] = NULL; |
638 | } | 647 | } |
639 | 648 | ||
@@ -659,12 +668,9 @@ static void threshold_remove_device(unsigned int cpu) | |||
659 | } | 668 | } |
660 | 669 | ||
661 | /* get notified when a cpu comes on/off */ | 670 | /* get notified when a cpu comes on/off */ |
662 | static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, | 671 | static void __cpuinit |
663 | unsigned int cpu) | 672 | amd_64_threshold_cpu_callback(unsigned long action, unsigned int cpu) |
664 | { | 673 | { |
665 | if (cpu >= NR_CPUS) | ||
666 | return; | ||
667 | |||
668 | switch (action) { | 674 | switch (action) { |
669 | case CPU_ONLINE: | 675 | case CPU_ONLINE: |
670 | case CPU_ONLINE_FROZEN: | 676 | case CPU_ONLINE_FROZEN: |
@@ -686,11 +692,12 @@ static __init int threshold_init_device(void) | |||
686 | /* to hit CPUs online before the notifier is up */ | 692 | /* to hit CPUs online before the notifier is up */ |
687 | for_each_online_cpu(lcpu) { | 693 | for_each_online_cpu(lcpu) { |
688 | int err = threshold_create_device(lcpu); | 694 | int err = threshold_create_device(lcpu); |
695 | |||
689 | if (err) | 696 | if (err) |
690 | return err; | 697 | return err; |
691 | } | 698 | } |
692 | threshold_cpu_callback = amd_64_threshold_cpu_callback; | 699 | threshold_cpu_callback = amd_64_threshold_cpu_callback; |
700 | |||
693 | return 0; | 701 | return 0; |
694 | } | 702 | } |
695 | |||
696 | device_initcall(threshold_init_device); | 703 | device_initcall(threshold_init_device); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index cef3ee30744b..e1acec0f7a32 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -8,85 +8,10 @@ | |||
8 | #include <linux/init.h> | 8 | #include <linux/init.h> |
9 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
10 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
11 | #include <asm/processor.h> | ||
12 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
12 | #include <asm/processor.h> | ||
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/mce.h> | 14 | #include <asm/mce.h> |
15 | #include <asm/hw_irq.h> | ||
16 | #include <asm/idle.h> | ||
17 | #include <asm/therm_throt.h> | ||
18 | #include <asm/apic.h> | ||
19 | |||
20 | asmlinkage void smp_thermal_interrupt(void) | ||
21 | { | ||
22 | __u64 msr_val; | ||
23 | |||
24 | ack_APIC_irq(); | ||
25 | |||
26 | exit_idle(); | ||
27 | irq_enter(); | ||
28 | |||
29 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
30 | if (therm_throt_process(msr_val & 1)) | ||
31 | mce_log_therm_throt_event(msr_val); | ||
32 | |||
33 | inc_irq_stat(irq_thermal_count); | ||
34 | irq_exit(); | ||
35 | } | ||
36 | |||
37 | static void intel_init_thermal(struct cpuinfo_x86 *c) | ||
38 | { | ||
39 | u32 l, h; | ||
40 | int tm2 = 0; | ||
41 | unsigned int cpu = smp_processor_id(); | ||
42 | |||
43 | if (!cpu_has(c, X86_FEATURE_ACPI)) | ||
44 | return; | ||
45 | |||
46 | if (!cpu_has(c, X86_FEATURE_ACC)) | ||
47 | return; | ||
48 | |||
49 | /* first check if TM1 is already enabled by the BIOS, in which | ||
50 | * case there might be some SMM goo which handles it, so we can't even | ||
51 | * put a handler since it might be delivered via SMI already. | ||
52 | */ | ||
53 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
54 | h = apic_read(APIC_LVTTHMR); | ||
55 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
56 | printk(KERN_DEBUG | ||
57 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
58 | return; | ||
59 | } | ||
60 | |||
61 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
62 | tm2 = 1; | ||
63 | |||
64 | if (h & APIC_VECTOR_MASK) { | ||
65 | printk(KERN_DEBUG | ||
66 | "CPU%d: Thermal LVT vector (%#x) already " | ||
67 | "installed\n", cpu, (h & APIC_VECTOR_MASK)); | ||
68 | return; | ||
69 | } | ||
70 | |||
71 | h = THERMAL_APIC_VECTOR; | ||
72 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); | ||
73 | apic_write(APIC_LVTTHMR, h); | ||
74 | |||
75 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
76 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); | ||
77 | |||
78 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
79 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
80 | |||
81 | l = apic_read(APIC_LVTTHMR); | ||
82 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
83 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
84 | cpu, tm2 ? "TM2" : "TM1"); | ||
85 | |||
86 | /* enable thermal throttle processing */ | ||
87 | atomic_set(&therm_throt_en, 1); | ||
88 | return; | ||
89 | } | ||
90 | 15 | ||
91 | /* | 16 | /* |
92 | * Support for Intel Correct Machine Check Interrupts. This allows | 17 | * Support for Intel Correct Machine Check Interrupts. This allows |
@@ -109,6 +34,9 @@ static int cmci_supported(int *banks) | |||
109 | { | 34 | { |
110 | u64 cap; | 35 | u64 cap; |
111 | 36 | ||
37 | if (mce_cmci_disabled || mce_ignore_ce) | ||
38 | return 0; | ||
39 | |||
112 | /* | 40 | /* |
113 | * Vendor check is not strictly needed, but the initial | 41 | * Vendor check is not strictly needed, but the initial |
114 | * initialization is vendor keyed and this | 42 | * initialization is vendor keyed and this |
@@ -132,7 +60,7 @@ static int cmci_supported(int *banks) | |||
132 | static void intel_threshold_interrupt(void) | 60 | static void intel_threshold_interrupt(void) |
133 | { | 61 | { |
134 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | 62 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); |
135 | mce_notify_user(); | 63 | mce_notify_irq(); |
136 | } | 64 | } |
137 | 65 | ||
138 | static void print_update(char *type, int *hdr, int num) | 66 | static void print_update(char *type, int *hdr, int num) |
@@ -248,7 +176,7 @@ void cmci_rediscover(int dying) | |||
248 | return; | 176 | return; |
249 | cpumask_copy(old, ¤t->cpus_allowed); | 177 | cpumask_copy(old, ¤t->cpus_allowed); |
250 | 178 | ||
251 | for_each_online_cpu (cpu) { | 179 | for_each_online_cpu(cpu) { |
252 | if (cpu == dying) | 180 | if (cpu == dying) |
253 | continue; | 181 | continue; |
254 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | 182 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) |
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index a74af128efc9..f5f2d6f71fb6 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
@@ -6,25 +6,23 @@ | |||
6 | * This file contains routines to check for non-fatal MCEs every 15s | 6 | * This file contains routines to check for non-fatal MCEs every 15s |
7 | * | 7 | * |
8 | */ | 8 | */ |
9 | |||
10 | #include <linux/init.h> | ||
11 | #include <linux/types.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/jiffies.h> | ||
14 | #include <linux/workqueue.h> | ||
15 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
16 | #include <linux/smp.h> | 10 | #include <linux/workqueue.h> |
11 | #include <linux/jiffies.h> | ||
12 | #include <linux/kernel.h> | ||
17 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/types.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/smp.h> | ||
18 | 17 | ||
19 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
20 | #include <asm/system.h> | 19 | #include <asm/system.h> |
20 | #include <asm/mce.h> | ||
21 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
22 | 22 | ||
23 | #include "mce.h" | 23 | static int firstbank; |
24 | 24 | ||
25 | static int firstbank; | 25 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ |
26 | |||
27 | #define MCE_RATE 15*HZ /* timer rate is 15s */ | ||
28 | 26 | ||
29 | static void mce_checkregs(void *info) | 27 | static void mce_checkregs(void *info) |
30 | { | 28 | { |
@@ -34,23 +32,24 @@ static void mce_checkregs(void *info) | |||
34 | for (i = firstbank; i < nr_mce_banks; i++) { | 32 | for (i = firstbank; i < nr_mce_banks; i++) { |
35 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | 33 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); |
36 | 34 | ||
37 | if (high & (1<<31)) { | 35 | if (!(high & (1<<31))) |
38 | printk(KERN_INFO "MCE: The hardware reports a non " | 36 | continue; |
39 | "fatal, correctable incident occurred on " | 37 | |
40 | "CPU %d.\n", | 38 | printk(KERN_INFO "MCE: The hardware reports a non fatal, " |
39 | "correctable incident occurred on CPU %d.\n", | ||
41 | smp_processor_id()); | 40 | smp_processor_id()); |
42 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | 41 | |
43 | 42 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | |
44 | /* | 43 | |
45 | * Scrub the error so we don't pick it up in MCE_RATE | 44 | /* |
46 | * seconds time. | 45 | * Scrub the error so we don't pick it up in MCE_RATE |
47 | */ | 46 | * seconds time: |
48 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | 47 | */ |
49 | 48 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | |
50 | /* Serialize */ | 49 | |
51 | wmb(); | 50 | /* Serialize: */ |
52 | add_taint(TAINT_MACHINE_CHECK); | 51 | wmb(); |
53 | } | 52 | add_taint(TAINT_MACHINE_CHECK); |
54 | } | 53 | } |
55 | } | 54 | } |
56 | 55 | ||
@@ -77,16 +76,17 @@ static int __init init_nonfatal_mce_checker(void) | |||
77 | 76 | ||
78 | /* Some Athlons misbehave when we frob bank 0 */ | 77 | /* Some Athlons misbehave when we frob bank 0 */ |
79 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | 78 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && |
80 | boot_cpu_data.x86 == 6) | 79 | boot_cpu_data.x86 == 6) |
81 | firstbank = 1; | 80 | firstbank = 1; |
82 | else | 81 | else |
83 | firstbank = 0; | 82 | firstbank = 0; |
84 | 83 | ||
85 | /* | 84 | /* |
86 | * Check for non-fatal errors every MCE_RATE s | 85 | * Check for non-fatal errors every MCE_RATE s |
87 | */ | 86 | */ |
88 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | 87 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); |
89 | printk(KERN_INFO "Machine check exception polling timer started.\n"); | 88 | printk(KERN_INFO "Machine check exception polling timer started.\n"); |
89 | |||
90 | return 0; | 90 | return 0; |
91 | } | 91 | } |
92 | module_init(init_nonfatal_mce_checker); | 92 | module_init(init_nonfatal_mce_checker); |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index f53bdcbaf382..4482aea9aa2e 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -1,21 +1,14 @@ | |||
1 | /* | 1 | /* |
2 | * P4 specific Machine Check Exception Reporting | 2 | * P4 specific Machine Check Exception Reporting |
3 | */ | 3 | */ |
4 | |||
5 | #include <linux/init.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
8 | #include <linux/interrupt.h> | 5 | #include <linux/types.h> |
6 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | 7 | #include <linux/smp.h> |
10 | 8 | ||
11 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 10 | #include <asm/mce.h> |
13 | #include <asm/msr.h> | 11 | #include <asm/msr.h> |
14 | #include <asm/apic.h> | ||
15 | |||
16 | #include <asm/therm_throt.h> | ||
17 | |||
18 | #include "mce.h" | ||
19 | 12 | ||
20 | /* as supported by the P4/Xeon family */ | 13 | /* as supported by the P4/Xeon family */ |
21 | struct intel_mce_extended_msrs { | 14 | struct intel_mce_extended_msrs { |
@@ -34,98 +27,8 @@ struct intel_mce_extended_msrs { | |||
34 | 27 | ||
35 | static int mce_num_extended_msrs; | 28 | static int mce_num_extended_msrs; |
36 | 29 | ||
37 | |||
38 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | ||
40 | { | ||
41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
42 | smp_processor_id()); | ||
43 | add_taint(TAINT_MACHINE_CHECK); | ||
44 | } | ||
45 | |||
46 | /* P4/Xeon Thermal transition interrupt handler */ | ||
47 | static void intel_thermal_interrupt(struct pt_regs *regs) | ||
48 | { | ||
49 | __u64 msr_val; | ||
50 | |||
51 | ack_APIC_irq(); | ||
52 | |||
53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
54 | therm_throt_process(msr_val & 0x1); | ||
55 | } | ||
56 | |||
57 | /* Thermal interrupt handler for this CPU setup */ | ||
58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = unexpected_thermal_interrupt; | ||
59 | |||
60 | void smp_thermal_interrupt(struct pt_regs *regs) | ||
61 | { | ||
62 | irq_enter(); | ||
63 | vendor_thermal_interrupt(regs); | ||
64 | __get_cpu_var(irq_stat).irq_thermal_count++; | ||
65 | irq_exit(); | ||
66 | } | ||
67 | |||
68 | /* P4/Xeon Thermal regulation detect and init */ | ||
69 | static void intel_init_thermal(struct cpuinfo_x86 *c) | ||
70 | { | ||
71 | u32 l, h; | ||
72 | unsigned int cpu = smp_processor_id(); | ||
73 | |||
74 | /* Thermal monitoring */ | ||
75 | if (!cpu_has(c, X86_FEATURE_ACPI)) | ||
76 | return; /* -ENODEV */ | ||
77 | |||
78 | /* Clock modulation */ | ||
79 | if (!cpu_has(c, X86_FEATURE_ACC)) | ||
80 | return; /* -ENODEV */ | ||
81 | |||
82 | /* first check if its enabled already, in which case there might | ||
83 | * be some SMM goo which handles it, so we can't even put a handler | ||
84 | * since it might be delivered via SMI already -zwanem. | ||
85 | */ | ||
86 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
87 | h = apic_read(APIC_LVTTHMR); | ||
88 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
89 | printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", | ||
90 | cpu); | ||
91 | return; /* -EBUSY */ | ||
92 | } | ||
93 | |||
94 | /* check whether a vector already exists, temporarily masked? */ | ||
95 | if (h & APIC_VECTOR_MASK) { | ||
96 | printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " | ||
97 | "installed\n", | ||
98 | cpu, (h & APIC_VECTOR_MASK)); | ||
99 | return; /* -EBUSY */ | ||
100 | } | ||
101 | |||
102 | /* The temperature transition interrupt handler setup */ | ||
103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | ||
104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | ||
105 | apic_write(APIC_LVTTHMR, h); | ||
106 | |||
107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | ||
109 | |||
110 | /* ok we're good to go... */ | ||
111 | vendor_thermal_interrupt = intel_thermal_interrupt; | ||
112 | |||
113 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
114 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
115 | |||
116 | l = apic_read(APIC_LVTTHMR); | ||
117 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | ||
119 | |||
120 | /* enable thermal throttle processing */ | ||
121 | atomic_set(&therm_throt_en, 1); | ||
122 | return; | ||
123 | } | ||
124 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
125 | |||
126 | |||
127 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | 30 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
128 | static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | 31 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
129 | { | 32 | { |
130 | u32 h; | 33 | u32 h; |
131 | 34 | ||
@@ -143,9 +46,9 @@ static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | |||
143 | 46 | ||
144 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 47 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
145 | { | 48 | { |
146 | int recover = 1; | ||
147 | u32 alow, ahigh, high, low; | 49 | u32 alow, ahigh, high, low; |
148 | u32 mcgstl, mcgsth; | 50 | u32 mcgstl, mcgsth; |
51 | int recover = 1; | ||
149 | int i; | 52 | int i; |
150 | 53 | ||
151 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 54 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
@@ -157,7 +60,9 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
157 | 60 | ||
158 | if (mce_num_extended_msrs > 0) { | 61 | if (mce_num_extended_msrs > 0) { |
159 | struct intel_mce_extended_msrs dbg; | 62 | struct intel_mce_extended_msrs dbg; |
63 | |||
160 | intel_get_extended_msrs(&dbg); | 64 | intel_get_extended_msrs(&dbg); |
65 | |||
161 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" | 66 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" |
162 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" | 67 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" |
163 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | 68 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", |
@@ -171,6 +76,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
171 | if (high & (1<<31)) { | 76 | if (high & (1<<31)) { |
172 | char misc[20]; | 77 | char misc[20]; |
173 | char addr[24]; | 78 | char addr[24]; |
79 | |||
174 | misc[0] = addr[0] = '\0'; | 80 | misc[0] = addr[0] = '\0'; |
175 | if (high & (1<<29)) | 81 | if (high & (1<<29)) |
176 | recover |= 1; | 82 | recover |= 1; |
@@ -196,6 +102,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
196 | panic("Unable to continue"); | 102 | panic("Unable to continue"); |
197 | 103 | ||
198 | printk(KERN_EMERG "Attempting to continue.\n"); | 104 | printk(KERN_EMERG "Attempting to continue.\n"); |
105 | |||
199 | /* | 106 | /* |
200 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | 107 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not |
201 | * recoverable/continuable.This will allow BIOS to look at the MSRs | 108 | * recoverable/continuable.This will allow BIOS to look at the MSRs |
@@ -217,7 +124,6 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
217 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 124 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
218 | } | 125 | } |
219 | 126 | ||
220 | |||
221 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) | 127 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) |
222 | { | 128 | { |
223 | u32 l, h; | 129 | u32 l, h; |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index c9f77ea69edc..5c0e6533d9bc 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -2,52 +2,67 @@ | |||
2 | * P5 specific Machine Check Exception Reporting | 2 | * P5 specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
11 | 10 | ||
12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | 15 | ||
16 | #include "mce.h" | 16 | /* By default disabled */ |
17 | int mce_p5_enabled __read_mostly; | ||
17 | 18 | ||
18 | /* Machine check handler for Pentium class Intel */ | 19 | /* Machine check handler for Pentium class Intel CPUs: */ |
19 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 20 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
20 | { | 21 | { |
21 | u32 loaddr, hi, lotype; | 22 | u32 loaddr, hi, lotype; |
23 | |||
22 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); | 24 | rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); |
23 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); | 25 | rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); |
24 | printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); | 26 | |
25 | if (lotype&(1<<5)) | 27 | printk(KERN_EMERG |
26 | printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); | 28 | "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", |
29 | smp_processor_id(), loaddr, lotype); | ||
30 | |||
31 | if (lotype & (1<<5)) { | ||
32 | printk(KERN_EMERG | ||
33 | "CPU#%d: Possible thermal failure (CPU on fire ?).\n", | ||
34 | smp_processor_id()); | ||
35 | } | ||
36 | |||
27 | add_taint(TAINT_MACHINE_CHECK); | 37 | add_taint(TAINT_MACHINE_CHECK); |
28 | } | 38 | } |
29 | 39 | ||
30 | /* Set up machine check reporting for processors with Intel style MCE */ | 40 | /* Set up machine check reporting for processors with Intel style MCE: */ |
31 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c) | 41 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c) |
32 | { | 42 | { |
33 | u32 l, h; | 43 | u32 l, h; |
34 | 44 | ||
35 | /*Check for MCE support */ | 45 | /* Default P5 to off as its often misconnected: */ |
36 | if (!cpu_has(c, X86_FEATURE_MCE)) | 46 | if (!mce_p5_enabled) |
37 | return; | 47 | return; |
38 | 48 | ||
39 | /* Default P5 to off as its often misconnected */ | 49 | /* Check for MCE support: */ |
40 | if (mce_disabled != -1) | 50 | if (!cpu_has(c, X86_FEATURE_MCE)) |
41 | return; | 51 | return; |
52 | |||
42 | machine_check_vector = pentium_machine_check; | 53 | machine_check_vector = pentium_machine_check; |
54 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
43 | wmb(); | 55 | wmb(); |
44 | 56 | ||
45 | /* Read registers before enabling */ | 57 | /* Read registers before enabling: */ |
46 | rdmsr(MSR_IA32_P5_MC_ADDR, l, h); | 58 | rdmsr(MSR_IA32_P5_MC_ADDR, l, h); |
47 | rdmsr(MSR_IA32_P5_MC_TYPE, l, h); | 59 | rdmsr(MSR_IA32_P5_MC_TYPE, l, h); |
48 | printk(KERN_INFO "Intel old style machine check architecture supported.\n"); | 60 | printk(KERN_INFO |
61 | "Intel old style machine check architecture supported.\n"); | ||
49 | 62 | ||
50 | /* Enable MCE */ | 63 | /* Enable MCE: */ |
51 | set_in_cr4(X86_CR4_MCE); | 64 | set_in_cr4(X86_CR4_MCE); |
52 | printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); | 65 | printk(KERN_INFO |
66 | "Intel old style machine check reporting enabled on CPU#%d.\n", | ||
67 | smp_processor_id()); | ||
53 | } | 68 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 2ac52d7b434b..01e4f8178183 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
@@ -2,25 +2,23 @@ | |||
2 | * P6 specific Machine Check Exception Reporting | 2 | * P6 specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
10 | #include <linux/smp.h> | 9 | #include <linux/smp.h> |
11 | 10 | ||
12 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
15 | 15 | ||
16 | #include "mce.h" | ||
17 | |||
18 | /* Machine Check Handler For PII/PIII */ | 16 | /* Machine Check Handler For PII/PIII */ |
19 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 17 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
20 | { | 18 | { |
21 | int recover = 1; | ||
22 | u32 alow, ahigh, high, low; | 19 | u32 alow, ahigh, high, low; |
23 | u32 mcgstl, mcgsth; | 20 | u32 mcgstl, mcgsth; |
21 | int recover = 1; | ||
24 | int i; | 22 | int i; |
25 | 23 | ||
26 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
@@ -35,12 +33,16 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
35 | if (high & (1<<31)) { | 33 | if (high & (1<<31)) { |
36 | char misc[20]; | 34 | char misc[20]; |
37 | char addr[24]; | 35 | char addr[24]; |
38 | misc[0] = addr[0] = '\0'; | 36 | |
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
39 | if (high & (1<<29)) | 40 | if (high & (1<<29)) |
40 | recover |= 1; | 41 | recover |= 1; |
41 | if (high & (1<<25)) | 42 | if (high & (1<<25)) |
42 | recover |= 2; | 43 | recover |= 2; |
43 | high &= ~(1<<31); | 44 | high &= ~(1<<31); |
45 | |||
44 | if (high & (1<<27)) { | 46 | if (high & (1<<27)) { |
45 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | 47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); |
46 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | 48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); |
@@ -49,6 +51,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
49 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | 51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); |
50 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | 52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); |
51 | } | 53 | } |
54 | |||
52 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | 55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", |
53 | smp_processor_id(), i, high, low, misc, addr); | 56 | smp_processor_id(), i, high, low, misc, addr); |
54 | } | 57 | } |
@@ -63,16 +66,17 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
63 | /* | 66 | /* |
64 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | 67 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not |
65 | * recoverable/continuable.This will allow BIOS to look at the MSRs | 68 | * recoverable/continuable.This will allow BIOS to look at the MSRs |
66 | * for errors if the OS could not log the error. | 69 | * for errors if the OS could not log the error: |
67 | */ | 70 | */ |
68 | for (i = 0; i < nr_mce_banks; i++) { | 71 | for (i = 0; i < nr_mce_banks; i++) { |
69 | unsigned int msr; | 72 | unsigned int msr; |
73 | |||
70 | msr = MSR_IA32_MC0_STATUS+i*4; | 74 | msr = MSR_IA32_MC0_STATUS+i*4; |
71 | rdmsr(msr, low, high); | 75 | rdmsr(msr, low, high); |
72 | if (high & (1<<31)) { | 76 | if (high & (1<<31)) { |
73 | /* Clear it */ | 77 | /* Clear it: */ |
74 | wrmsr(msr, 0UL, 0UL); | 78 | wrmsr(msr, 0UL, 0UL); |
75 | /* Serialize */ | 79 | /* Serialize: */ |
76 | wmb(); | 80 | wmb(); |
77 | add_taint(TAINT_MACHINE_CHECK); | 81 | add_taint(TAINT_MACHINE_CHECK); |
78 | } | 82 | } |
@@ -81,7 +85,7 @@ static void intel_machine_check(struct pt_regs *regs, long error_code) | |||
81 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 85 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
82 | } | 86 | } |
83 | 87 | ||
84 | /* Set up machine check reporting for processors with Intel style MCE */ | 88 | /* Set up machine check reporting for processors with Intel style MCE: */ |
85 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | 89 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) |
86 | { | 90 | { |
87 | u32 l, h; | 91 | u32 l, h; |
@@ -97,6 +101,7 @@ void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | |||
97 | 101 | ||
98 | /* Ok machine check is available */ | 102 | /* Ok machine check is available */ |
99 | machine_check_vector = intel_machine_check; | 103 | machine_check_vector = intel_machine_check; |
104 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
100 | wmb(); | 105 | wmb(); |
101 | 106 | ||
102 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | 107 | printk(KERN_INFO "Intel machine check architecture supported.\n"); |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index d5ae2243f0b9..bff8dd191dd5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * | ||
3 | * Thermal throttle event support code (such as syslog messaging and rate | 2 | * Thermal throttle event support code (such as syslog messaging and rate |
4 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). | 3 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). |
4 | * | ||
5 | * This allows consistent reporting of CPU thermal throttle events. | 5 | * This allows consistent reporting of CPU thermal throttle events. |
6 | * | 6 | * |
7 | * Maintains a counter in /sys that keeps track of the number of thermal | 7 | * Maintains a counter in /sys that keeps track of the number of thermal |
@@ -13,43 +13,53 @@ | |||
13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | 13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. |
14 | * Inspired by Ross Biro's and Al Borchers' counter code. | 14 | * Inspired by Ross Biro's and Al Borchers' counter code. |
15 | */ | 15 | */ |
16 | 16 | #include <linux/interrupt.h> | |
17 | #include <linux/notifier.h> | ||
18 | #include <linux/jiffies.h> | ||
19 | #include <linux/kernel.h> | ||
17 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
18 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
22 | #include <linux/types.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/smp.h> | ||
19 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
20 | #include <asm/cpu.h> | 26 | |
21 | #include <linux/notifier.h> | 27 | #include <asm/processor.h> |
22 | #include <linux/jiffies.h> | 28 | #include <asm/system.h> |
23 | #include <asm/therm_throt.h> | 29 | #include <asm/apic.h> |
30 | #include <asm/idle.h> | ||
31 | #include <asm/mce.h> | ||
32 | #include <asm/msr.h> | ||
24 | 33 | ||
25 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
26 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
27 | 36 | ||
28 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
29 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); |
30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | 39 | |
40 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | ||
31 | 41 | ||
32 | #ifdef CONFIG_SYSFS | 42 | #ifdef CONFIG_SYSFS |
33 | #define define_therm_throt_sysdev_one_ro(_name) \ | 43 | #define define_therm_throt_sysdev_one_ro(_name) \ |
34 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 44 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
35 | 45 | ||
36 | #define define_therm_throt_sysdev_show_func(name) \ | 46 | #define define_therm_throt_sysdev_show_func(name) \ |
37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | 47 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ |
38 | struct sysdev_attribute *attr, \ | 48 | struct sysdev_attribute *attr, \ |
39 | char *buf) \ | 49 | char *buf) \ |
40 | { \ | 50 | { \ |
41 | unsigned int cpu = dev->id; \ | 51 | unsigned int cpu = dev->id; \ |
42 | ssize_t ret; \ | 52 | ssize_t ret; \ |
43 | \ | 53 | \ |
44 | preempt_disable(); /* CPU hotplug */ \ | 54 | preempt_disable(); /* CPU hotplug */ \ |
45 | if (cpu_online(cpu)) \ | 55 | if (cpu_online(cpu)) \ |
46 | ret = sprintf(buf, "%lu\n", \ | 56 | ret = sprintf(buf, "%lu\n", \ |
47 | per_cpu(thermal_throttle_##name, cpu)); \ | 57 | per_cpu(thermal_throttle_##name, cpu)); \ |
48 | else \ | 58 | else \ |
49 | ret = 0; \ | 59 | ret = 0; \ |
50 | preempt_enable(); \ | 60 | preempt_enable(); \ |
51 | \ | 61 | \ |
52 | return ret; \ | 62 | return ret; \ |
53 | } | 63 | } |
54 | 64 | ||
55 | define_therm_throt_sysdev_show_func(count); | 65 | define_therm_throt_sysdev_show_func(count); |
@@ -61,8 +71,8 @@ static struct attribute *thermal_throttle_attrs[] = { | |||
61 | }; | 71 | }; |
62 | 72 | ||
63 | static struct attribute_group thermal_throttle_attr_group = { | 73 | static struct attribute_group thermal_throttle_attr_group = { |
64 | .attrs = thermal_throttle_attrs, | 74 | .attrs = thermal_throttle_attrs, |
65 | .name = "thermal_throttle" | 75 | .name = "thermal_throttle" |
66 | }; | 76 | }; |
67 | #endif /* CONFIG_SYSFS */ | 77 | #endif /* CONFIG_SYSFS */ |
68 | 78 | ||
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
82 | * 1 : Event should be logged further, and a message has been | 92 | * 1 : Event should be logged further, and a message has been |
83 | * printed to the syslog. | 93 | * printed to the syslog. |
84 | */ | 94 | */ |
85 | int therm_throt_process(int curr) | 95 | static int therm_throt_process(int curr) |
86 | { | 96 | { |
87 | unsigned int cpu = smp_processor_id(); | 97 | unsigned int cpu = smp_processor_id(); |
88 | __u64 tmp_jiffs = get_jiffies_64(); | 98 | __u64 tmp_jiffs = get_jiffies_64(); |
@@ -110,10 +120,11 @@ int therm_throt_process(int curr) | |||
110 | } | 120 | } |
111 | 121 | ||
112 | #ifdef CONFIG_SYSFS | 122 | #ifdef CONFIG_SYSFS |
113 | /* Add/Remove thermal_throttle interface for CPU device */ | 123 | /* Add/Remove thermal_throttle interface for CPU device: */ |
114 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 124 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) |
115 | { | 125 | { |
116 | return sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 126 | return sysfs_create_group(&sys_dev->kobj, |
127 | &thermal_throttle_attr_group); | ||
117 | } | 128 | } |
118 | 129 | ||
119 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 130 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
@@ -121,19 +132,21 @@ static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | |||
121 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 132 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); |
122 | } | 133 | } |
123 | 134 | ||
124 | /* Mutex protecting device creation against CPU hotplug */ | 135 | /* Mutex protecting device creation against CPU hotplug: */ |
125 | static DEFINE_MUTEX(therm_cpu_lock); | 136 | static DEFINE_MUTEX(therm_cpu_lock); |
126 | 137 | ||
127 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | 138 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ |
128 | static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, | 139 | static __cpuinit int |
129 | unsigned long action, | 140 | thermal_throttle_cpu_callback(struct notifier_block *nfb, |
130 | void *hcpu) | 141 | unsigned long action, |
142 | void *hcpu) | ||
131 | { | 143 | { |
132 | unsigned int cpu = (unsigned long)hcpu; | 144 | unsigned int cpu = (unsigned long)hcpu; |
133 | struct sys_device *sys_dev; | 145 | struct sys_device *sys_dev; |
134 | int err = 0; | 146 | int err = 0; |
135 | 147 | ||
136 | sys_dev = get_cpu_sysdev(cpu); | 148 | sys_dev = get_cpu_sysdev(cpu); |
149 | |||
137 | switch (action) { | 150 | switch (action) { |
138 | case CPU_UP_PREPARE: | 151 | case CPU_UP_PREPARE: |
139 | case CPU_UP_PREPARE_FROZEN: | 152 | case CPU_UP_PREPARE_FROZEN: |
@@ -183,6 +196,94 @@ static __init int thermal_throttle_init_device(void) | |||
183 | 196 | ||
184 | return 0; | 197 | return 0; |
185 | } | 198 | } |
186 | |||
187 | device_initcall(thermal_throttle_init_device); | 199 | device_initcall(thermal_throttle_init_device); |
200 | |||
188 | #endif /* CONFIG_SYSFS */ | 201 | #endif /* CONFIG_SYSFS */ |
202 | |||
203 | /* Thermal transition interrupt handler */ | ||
204 | static void intel_thermal_interrupt(void) | ||
205 | { | ||
206 | __u64 msr_val; | ||
207 | |||
208 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
209 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | ||
210 | mce_log_therm_throt_event(msr_val); | ||
211 | } | ||
212 | |||
213 | static void unexpected_thermal_interrupt(void) | ||
214 | { | ||
215 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
216 | smp_processor_id()); | ||
217 | add_taint(TAINT_MACHINE_CHECK); | ||
218 | } | ||
219 | |||
220 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | ||
221 | |||
222 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | ||
223 | { | ||
224 | exit_idle(); | ||
225 | irq_enter(); | ||
226 | inc_irq_stat(irq_thermal_count); | ||
227 | smp_thermal_vector(); | ||
228 | irq_exit(); | ||
229 | /* Ack only at the end to avoid potential reentry */ | ||
230 | ack_APIC_irq(); | ||
231 | } | ||
232 | |||
233 | void intel_init_thermal(struct cpuinfo_x86 *c) | ||
234 | { | ||
235 | unsigned int cpu = smp_processor_id(); | ||
236 | int tm2 = 0; | ||
237 | u32 l, h; | ||
238 | |||
239 | /* Thermal monitoring depends on ACPI and clock modulation*/ | ||
240 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
241 | return; | ||
242 | |||
243 | /* | ||
244 | * First check if its enabled already, in which case there might | ||
245 | * be some SMM goo which handles it, so we can't even put a handler | ||
246 | * since it might be delivered via SMI already: | ||
247 | */ | ||
248 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
249 | h = apic_read(APIC_LVTTHMR); | ||
250 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
251 | printk(KERN_DEBUG | ||
252 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
257 | tm2 = 1; | ||
258 | |||
259 | /* Check whether a vector already exists */ | ||
260 | if (h & APIC_VECTOR_MASK) { | ||
261 | printk(KERN_DEBUG | ||
262 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
263 | cpu, (h & APIC_VECTOR_MASK)); | ||
264 | return; | ||
265 | } | ||
266 | |||
267 | /* We'll mask the thermal vector in the lapic till we're ready: */ | ||
268 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | ||
269 | apic_write(APIC_LVTTHMR, h); | ||
270 | |||
271 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
272 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
273 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
274 | |||
275 | smp_thermal_vector = intel_thermal_interrupt; | ||
276 | |||
277 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
278 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
279 | |||
280 | /* Unmask the thermal vector: */ | ||
281 | l = apic_read(APIC_LVTTHMR); | ||
282 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
283 | |||
284 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
285 | cpu, tm2 ? "TM2" : "TM1"); | ||
286 | |||
287 | /* enable thermal throttle processing */ | ||
288 | atomic_set(&therm_throt_en, 1); | ||
289 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 23ee9e730f78..d746df2909c9 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c | |||
@@ -17,7 +17,7 @@ static void default_threshold_interrupt(void) | |||
17 | 17 | ||
18 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; | 18 | void (*mce_threshold_vector)(void) = default_threshold_interrupt; |
19 | 19 | ||
20 | asmlinkage void mce_threshold_interrupt(void) | 20 | asmlinkage void smp_threshold_interrupt(void) |
21 | { | 21 | { |
22 | exit_idle(); | 22 | exit_idle(); |
23 | irq_enter(); | 23 | irq_enter(); |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 2a043d89811d..54060f565974 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -2,19 +2,17 @@ | |||
2 | * IDT Winchip specific Machine Check Exception Reporting | 2 | * IDT Winchip specific Machine Check Exception Reporting |
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | 3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> |
4 | */ | 4 | */ |
5 | |||
6 | #include <linux/init.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/kernel.h> | ||
9 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
10 | 9 | ||
11 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 11 | #include <asm/system.h> |
12 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | 14 | ||
15 | #include "mce.h" | 15 | /* Machine check handler for WinChip C6: */ |
16 | |||
17 | /* Machine check handler for WinChip C6 */ | ||
18 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 16 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 17 | { |
20 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); | 18 | printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); |
@@ -25,12 +23,18 @@ static void winchip_machine_check(struct pt_regs *regs, long error_code) | |||
25 | void winchip_mcheck_init(struct cpuinfo_x86 *c) | 23 | void winchip_mcheck_init(struct cpuinfo_x86 *c) |
26 | { | 24 | { |
27 | u32 lo, hi; | 25 | u32 lo, hi; |
26 | |||
28 | machine_check_vector = winchip_machine_check; | 27 | machine_check_vector = winchip_machine_check; |
28 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
29 | wmb(); | 29 | wmb(); |
30 | |||
30 | rdmsr(MSR_IDT_FCR1, lo, hi); | 31 | rdmsr(MSR_IDT_FCR1, lo, hi); |
31 | lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ | 32 | lo |= (1<<2); /* Enable EIERRINT (int 18 MCE) */ |
32 | lo &= ~(1<<4); /* Enable MCE */ | 33 | lo &= ~(1<<4); /* Enable MCE */ |
33 | wrmsr(MSR_IDT_FCR1, lo, hi); | 34 | wrmsr(MSR_IDT_FCR1, lo, hi); |
35 | |||
34 | set_in_cr4(X86_CR4_MCE); | 36 | set_in_cr4(X86_CR4_MCE); |
35 | printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); | 37 | |
38 | printk(KERN_INFO | ||
39 | "Winchip machine check reporting enabled on CPU#0.\n"); | ||
36 | } | 40 | } |
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index ce0fe4b5c04f..1d584a18a50d 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -808,7 +808,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
808 | 808 | ||
809 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | 809 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) |
810 | return 0; | 810 | return 0; |
811 | rdmsr(MTRRdefType_MSR, def, dummy); | 811 | rdmsr(MSR_MTRRdefType, def, dummy); |
812 | def &= 0xff; | 812 | def &= 0xff; |
813 | if (def != MTRR_TYPE_UNCACHABLE) | 813 | if (def != MTRR_TYPE_UNCACHABLE) |
814 | return 0; | 814 | return 0; |
@@ -1003,7 +1003,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1003 | */ | 1003 | */ |
1004 | if (!is_cpu(INTEL) || disable_mtrr_trim) | 1004 | if (!is_cpu(INTEL) || disable_mtrr_trim) |
1005 | return 0; | 1005 | return 0; |
1006 | rdmsr(MTRRdefType_MSR, def, dummy); | 1006 | rdmsr(MSR_MTRRdefType, def, dummy); |
1007 | def &= 0xff; | 1007 | def &= 0xff; |
1008 | if (def != MTRR_TYPE_UNCACHABLE) | 1008 | if (def != MTRR_TYPE_UNCACHABLE) |
1009 | return 0; | 1009 | return 0; |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index d21d4fb161f7..0543f69f0b27 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -20,9 +20,9 @@ struct fixed_range_block { | |||
20 | }; | 20 | }; |
21 | 21 | ||
22 | static struct fixed_range_block fixed_range_blocks[] = { | 22 | static struct fixed_range_block fixed_range_blocks[] = { |
23 | { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ | 23 | { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ |
24 | { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ | 24 | { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ |
25 | { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ | 25 | { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ |
26 | {} | 26 | {} |
27 | }; | 27 | }; |
28 | 28 | ||
@@ -194,12 +194,12 @@ get_fixed_ranges(mtrr_type * frs) | |||
194 | 194 | ||
195 | k8_check_syscfg_dram_mod_en(); | 195 | k8_check_syscfg_dram_mod_en(); |
196 | 196 | ||
197 | rdmsr(MTRRfix64K_00000_MSR, p[0], p[1]); | 197 | rdmsr(MSR_MTRRfix64K_00000, p[0], p[1]); |
198 | 198 | ||
199 | for (i = 0; i < 2; i++) | 199 | for (i = 0; i < 2; i++) |
200 | rdmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); | 200 | rdmsr(MSR_MTRRfix16K_80000 + i, p[2 + i * 2], p[3 + i * 2]); |
201 | for (i = 0; i < 8; i++) | 201 | for (i = 0; i < 8; i++) |
202 | rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); | 202 | rdmsr(MSR_MTRRfix4K_C0000 + i, p[6 + i * 2], p[7 + i * 2]); |
203 | } | 203 | } |
204 | 204 | ||
205 | void mtrr_save_fixed_ranges(void *info) | 205 | void mtrr_save_fixed_ranges(void *info) |
@@ -310,7 +310,7 @@ void __init get_mtrr_state(void) | |||
310 | 310 | ||
311 | vrs = mtrr_state.var_ranges; | 311 | vrs = mtrr_state.var_ranges; |
312 | 312 | ||
313 | rdmsr(MTRRcap_MSR, lo, dummy); | 313 | rdmsr(MSR_MTRRcap, lo, dummy); |
314 | mtrr_state.have_fixed = (lo >> 8) & 1; | 314 | mtrr_state.have_fixed = (lo >> 8) & 1; |
315 | 315 | ||
316 | for (i = 0; i < num_var_ranges; i++) | 316 | for (i = 0; i < num_var_ranges; i++) |
@@ -318,7 +318,7 @@ void __init get_mtrr_state(void) | |||
318 | if (mtrr_state.have_fixed) | 318 | if (mtrr_state.have_fixed) |
319 | get_fixed_ranges(mtrr_state.fixed_ranges); | 319 | get_fixed_ranges(mtrr_state.fixed_ranges); |
320 | 320 | ||
321 | rdmsr(MTRRdefType_MSR, lo, dummy); | 321 | rdmsr(MSR_MTRRdefType, lo, dummy); |
322 | mtrr_state.def_type = (lo & 0xff); | 322 | mtrr_state.def_type = (lo & 0xff); |
323 | mtrr_state.enabled = (lo & 0xc00) >> 10; | 323 | mtrr_state.enabled = (lo & 0xc00) >> 10; |
324 | 324 | ||
@@ -583,10 +583,10 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
583 | __flush_tlb(); | 583 | __flush_tlb(); |
584 | 584 | ||
585 | /* Save MTRR state */ | 585 | /* Save MTRR state */ |
586 | rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); | 586 | rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); |
587 | 587 | ||
588 | /* Disable MTRRs, and set the default type to uncached */ | 588 | /* Disable MTRRs, and set the default type to uncached */ |
589 | mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & ~0xcff, deftype_hi); | 589 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); |
590 | } | 590 | } |
591 | 591 | ||
592 | static void post_set(void) __releases(set_atomicity_lock) | 592 | static void post_set(void) __releases(set_atomicity_lock) |
@@ -595,7 +595,7 @@ static void post_set(void) __releases(set_atomicity_lock) | |||
595 | __flush_tlb(); | 595 | __flush_tlb(); |
596 | 596 | ||
597 | /* Intel (P6) standard MTRRs */ | 597 | /* Intel (P6) standard MTRRs */ |
598 | mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); | 598 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); |
599 | 599 | ||
600 | /* Enable caches */ | 600 | /* Enable caches */ |
601 | write_cr0(read_cr0() & 0xbfffffff); | 601 | write_cr0(read_cr0() & 0xbfffffff); |
@@ -707,7 +707,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i | |||
707 | static int generic_have_wrcomb(void) | 707 | static int generic_have_wrcomb(void) |
708 | { | 708 | { |
709 | unsigned long config, dummy; | 709 | unsigned long config, dummy; |
710 | rdmsr(MTRRcap_MSR, config, dummy); | 710 | rdmsr(MSR_MTRRcap, config, dummy); |
711 | return (config & (1 << 10)); | 711 | return (config & (1 << 10)); |
712 | } | 712 | } |
713 | 713 | ||
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 03cda01f57c7..8fc248b5aeaf 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -104,7 +104,7 @@ static void __init set_num_var_ranges(void) | |||
104 | unsigned long config = 0, dummy; | 104 | unsigned long config = 0, dummy; |
105 | 105 | ||
106 | if (use_intel()) { | 106 | if (use_intel()) { |
107 | rdmsr(MTRRcap_MSR, config, dummy); | 107 | rdmsr(MSR_MTRRcap, config, dummy); |
108 | } else if (is_cpu(AMD)) | 108 | } else if (is_cpu(AMD)) |
109 | config = 2; | 109 | config = 2; |
110 | else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) | 110 | else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 77f67f7b347a..7538b767f206 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -5,21 +5,6 @@ | |||
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | #include <linux/stddef.h> | 6 | #include <linux/stddef.h> |
7 | 7 | ||
8 | #define MTRRcap_MSR 0x0fe | ||
9 | #define MTRRdefType_MSR 0x2ff | ||
10 | |||
11 | #define MTRRfix64K_00000_MSR 0x250 | ||
12 | #define MTRRfix16K_80000_MSR 0x258 | ||
13 | #define MTRRfix16K_A0000_MSR 0x259 | ||
14 | #define MTRRfix4K_C0000_MSR 0x268 | ||
15 | #define MTRRfix4K_C8000_MSR 0x269 | ||
16 | #define MTRRfix4K_D0000_MSR 0x26a | ||
17 | #define MTRRfix4K_D8000_MSR 0x26b | ||
18 | #define MTRRfix4K_E0000_MSR 0x26c | ||
19 | #define MTRRfix4K_E8000_MSR 0x26d | ||
20 | #define MTRRfix4K_F0000_MSR 0x26e | ||
21 | #define MTRRfix4K_F8000_MSR 0x26f | ||
22 | |||
23 | #define MTRR_CHANGE_MASK_FIXED 0x01 | 8 | #define MTRR_CHANGE_MASK_FIXED 0x01 |
24 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 | 9 | #define MTRR_CHANGE_MASK_VARIABLE 0x02 |
25 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 | 10 | #define MTRR_CHANGE_MASK_DEFTYPE 0x04 |
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 7f7e2753685b..1f5fb1588d1f 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c | |||
@@ -35,7 +35,7 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | |||
35 | 35 | ||
36 | if (use_intel()) | 36 | if (use_intel()) |
37 | /* Save MTRR state */ | 37 | /* Save MTRR state */ |
38 | rdmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); | 38 | rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); |
39 | else | 39 | else |
40 | /* Cyrix ARRs - everything else were excluded at the top */ | 40 | /* Cyrix ARRs - everything else were excluded at the top */ |
41 | ctxt->ccr3 = getCx86(CX86_CCR3); | 41 | ctxt->ccr3 = getCx86(CX86_CCR3); |
@@ -46,7 +46,7 @@ void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) | |||
46 | { | 46 | { |
47 | if (use_intel()) | 47 | if (use_intel()) |
48 | /* Disable MTRRs, and set the default type to uncached */ | 48 | /* Disable MTRRs, and set the default type to uncached */ |
49 | mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, | 49 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, |
50 | ctxt->deftype_hi); | 50 | ctxt->deftype_hi); |
51 | else if (is_cpu(CYRIX)) | 51 | else if (is_cpu(CYRIX)) |
52 | /* Cyrix ARRs - everything else were excluded at the top */ | 52 | /* Cyrix ARRs - everything else were excluded at the top */ |
@@ -64,7 +64,7 @@ void set_mtrr_done(struct set_mtrr_context *ctxt) | |||
64 | /* Restore MTRRdefType */ | 64 | /* Restore MTRRdefType */ |
65 | if (use_intel()) | 65 | if (use_intel()) |
66 | /* Intel (P6) standard MTRRs */ | 66 | /* Intel (P6) standard MTRRs */ |
67 | mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); | 67 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); |
68 | else | 68 | else |
69 | /* Cyrix ARRs - everything else was excluded at the top */ | 69 | /* Cyrix ARRs - everything else was excluded at the top */ |
70 | setCx86(CX86_CCR3, ctxt->ccr3); | 70 | setCx86(CX86_CCR3, ctxt->ccr3); |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c new file mode 100644 index 000000000000..76dfef23f789 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -0,0 +1,1721 @@ | |||
1 | /* | ||
2 | * Performance counter x86 architecture code | ||
3 | * | ||
4 | * Copyright (C) 2008 Thomas Gleixner <tglx@linutronix.de> | ||
5 | * Copyright (C) 2008-2009 Red Hat, Inc., Ingo Molnar | ||
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | ||
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | ||
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | ||
9 | * | ||
10 | * For licencing details see kernel-base/COPYING | ||
11 | */ | ||
12 | |||
13 | #include <linux/perf_counter.h> | ||
14 | #include <linux/capability.h> | ||
15 | #include <linux/notifier.h> | ||
16 | #include <linux/hardirq.h> | ||
17 | #include <linux/kprobes.h> | ||
18 | #include <linux/module.h> | ||
19 | #include <linux/kdebug.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/uaccess.h> | ||
22 | #include <linux/highmem.h> | ||
23 | |||
24 | #include <asm/apic.h> | ||
25 | #include <asm/stacktrace.h> | ||
26 | #include <asm/nmi.h> | ||
27 | |||
28 | static u64 perf_counter_mask __read_mostly; | ||
29 | |||
30 | struct cpu_hw_counters { | ||
31 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | ||
32 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
33 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | ||
34 | unsigned long interrupts; | ||
35 | int enabled; | ||
36 | }; | ||
37 | |||
38 | /* | ||
39 | * struct x86_pmu - generic x86 pmu | ||
40 | */ | ||
41 | struct x86_pmu { | ||
42 | const char *name; | ||
43 | int version; | ||
44 | int (*handle_irq)(struct pt_regs *); | ||
45 | void (*disable_all)(void); | ||
46 | void (*enable_all)(void); | ||
47 | void (*enable)(struct hw_perf_counter *, int); | ||
48 | void (*disable)(struct hw_perf_counter *, int); | ||
49 | unsigned eventsel; | ||
50 | unsigned perfctr; | ||
51 | u64 (*event_map)(int); | ||
52 | u64 (*raw_event)(u64); | ||
53 | int max_events; | ||
54 | int num_counters; | ||
55 | int num_counters_fixed; | ||
56 | int counter_bits; | ||
57 | u64 counter_mask; | ||
58 | u64 max_period; | ||
59 | u64 intel_ctrl; | ||
60 | }; | ||
61 | |||
62 | static struct x86_pmu x86_pmu __read_mostly; | ||
63 | |||
64 | static DEFINE_PER_CPU(struct cpu_hw_counters, cpu_hw_counters) = { | ||
65 | .enabled = 1, | ||
66 | }; | ||
67 | |||
68 | /* | ||
69 | * Intel PerfMon v3. Used on Core2 and later. | ||
70 | */ | ||
71 | static const u64 intel_perfmon_event_map[] = | ||
72 | { | ||
73 | [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, | ||
74 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
75 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, | ||
76 | [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, | ||
77 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
78 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
79 | [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, | ||
80 | }; | ||
81 | |||
82 | static u64 intel_pmu_event_map(int event) | ||
83 | { | ||
84 | return intel_perfmon_event_map[event]; | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * Generalized hw caching related event table, filled | ||
89 | * in on a per model basis. A value of 0 means | ||
90 | * 'not supported', -1 means 'event makes no sense on | ||
91 | * this CPU', any other value means the raw event | ||
92 | * ID. | ||
93 | */ | ||
94 | |||
95 | #define C(x) PERF_COUNT_HW_CACHE_##x | ||
96 | |||
97 | static u64 __read_mostly hw_cache_event_ids | ||
98 | [PERF_COUNT_HW_CACHE_MAX] | ||
99 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
100 | [PERF_COUNT_HW_CACHE_RESULT_MAX]; | ||
101 | |||
102 | static const u64 nehalem_hw_cache_event_ids | ||
103 | [PERF_COUNT_HW_CACHE_MAX] | ||
104 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
105 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
106 | { | ||
107 | [ C(L1D) ] = { | ||
108 | [ C(OP_READ) ] = { | ||
109 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
110 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
111 | }, | ||
112 | [ C(OP_WRITE) ] = { | ||
113 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
114 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
115 | }, | ||
116 | [ C(OP_PREFETCH) ] = { | ||
117 | [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ | ||
118 | [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ | ||
119 | }, | ||
120 | }, | ||
121 | [ C(L1I ) ] = { | ||
122 | [ C(OP_READ) ] = { | ||
123 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
124 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
125 | }, | ||
126 | [ C(OP_WRITE) ] = { | ||
127 | [ C(RESULT_ACCESS) ] = -1, | ||
128 | [ C(RESULT_MISS) ] = -1, | ||
129 | }, | ||
130 | [ C(OP_PREFETCH) ] = { | ||
131 | [ C(RESULT_ACCESS) ] = 0x0, | ||
132 | [ C(RESULT_MISS) ] = 0x0, | ||
133 | }, | ||
134 | }, | ||
135 | [ C(LL ) ] = { | ||
136 | [ C(OP_READ) ] = { | ||
137 | [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ | ||
138 | [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ | ||
139 | }, | ||
140 | [ C(OP_WRITE) ] = { | ||
141 | [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ | ||
142 | [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ | ||
143 | }, | ||
144 | [ C(OP_PREFETCH) ] = { | ||
145 | [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ | ||
146 | [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ | ||
147 | }, | ||
148 | }, | ||
149 | [ C(DTLB) ] = { | ||
150 | [ C(OP_READ) ] = { | ||
151 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
152 | [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ | ||
153 | }, | ||
154 | [ C(OP_WRITE) ] = { | ||
155 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
156 | [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ | ||
157 | }, | ||
158 | [ C(OP_PREFETCH) ] = { | ||
159 | [ C(RESULT_ACCESS) ] = 0x0, | ||
160 | [ C(RESULT_MISS) ] = 0x0, | ||
161 | }, | ||
162 | }, | ||
163 | [ C(ITLB) ] = { | ||
164 | [ C(OP_READ) ] = { | ||
165 | [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ | ||
166 | [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ | ||
167 | }, | ||
168 | [ C(OP_WRITE) ] = { | ||
169 | [ C(RESULT_ACCESS) ] = -1, | ||
170 | [ C(RESULT_MISS) ] = -1, | ||
171 | }, | ||
172 | [ C(OP_PREFETCH) ] = { | ||
173 | [ C(RESULT_ACCESS) ] = -1, | ||
174 | [ C(RESULT_MISS) ] = -1, | ||
175 | }, | ||
176 | }, | ||
177 | [ C(BPU ) ] = { | ||
178 | [ C(OP_READ) ] = { | ||
179 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ | ||
180 | [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ | ||
181 | }, | ||
182 | [ C(OP_WRITE) ] = { | ||
183 | [ C(RESULT_ACCESS) ] = -1, | ||
184 | [ C(RESULT_MISS) ] = -1, | ||
185 | }, | ||
186 | [ C(OP_PREFETCH) ] = { | ||
187 | [ C(RESULT_ACCESS) ] = -1, | ||
188 | [ C(RESULT_MISS) ] = -1, | ||
189 | }, | ||
190 | }, | ||
191 | }; | ||
192 | |||
193 | static const u64 core2_hw_cache_event_ids | ||
194 | [PERF_COUNT_HW_CACHE_MAX] | ||
195 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
196 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
197 | { | ||
198 | [ C(L1D) ] = { | ||
199 | [ C(OP_READ) ] = { | ||
200 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ | ||
201 | [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ | ||
202 | }, | ||
203 | [ C(OP_WRITE) ] = { | ||
204 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ | ||
205 | [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ | ||
206 | }, | ||
207 | [ C(OP_PREFETCH) ] = { | ||
208 | [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ | ||
209 | [ C(RESULT_MISS) ] = 0, | ||
210 | }, | ||
211 | }, | ||
212 | [ C(L1I ) ] = { | ||
213 | [ C(OP_READ) ] = { | ||
214 | [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ | ||
215 | [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ | ||
216 | }, | ||
217 | [ C(OP_WRITE) ] = { | ||
218 | [ C(RESULT_ACCESS) ] = -1, | ||
219 | [ C(RESULT_MISS) ] = -1, | ||
220 | }, | ||
221 | [ C(OP_PREFETCH) ] = { | ||
222 | [ C(RESULT_ACCESS) ] = 0, | ||
223 | [ C(RESULT_MISS) ] = 0, | ||
224 | }, | ||
225 | }, | ||
226 | [ C(LL ) ] = { | ||
227 | [ C(OP_READ) ] = { | ||
228 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
229 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
230 | }, | ||
231 | [ C(OP_WRITE) ] = { | ||
232 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
233 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
234 | }, | ||
235 | [ C(OP_PREFETCH) ] = { | ||
236 | [ C(RESULT_ACCESS) ] = 0, | ||
237 | [ C(RESULT_MISS) ] = 0, | ||
238 | }, | ||
239 | }, | ||
240 | [ C(DTLB) ] = { | ||
241 | [ C(OP_READ) ] = { | ||
242 | [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ | ||
243 | [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ | ||
244 | }, | ||
245 | [ C(OP_WRITE) ] = { | ||
246 | [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ | ||
247 | [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ | ||
248 | }, | ||
249 | [ C(OP_PREFETCH) ] = { | ||
250 | [ C(RESULT_ACCESS) ] = 0, | ||
251 | [ C(RESULT_MISS) ] = 0, | ||
252 | }, | ||
253 | }, | ||
254 | [ C(ITLB) ] = { | ||
255 | [ C(OP_READ) ] = { | ||
256 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
257 | [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ | ||
258 | }, | ||
259 | [ C(OP_WRITE) ] = { | ||
260 | [ C(RESULT_ACCESS) ] = -1, | ||
261 | [ C(RESULT_MISS) ] = -1, | ||
262 | }, | ||
263 | [ C(OP_PREFETCH) ] = { | ||
264 | [ C(RESULT_ACCESS) ] = -1, | ||
265 | [ C(RESULT_MISS) ] = -1, | ||
266 | }, | ||
267 | }, | ||
268 | [ C(BPU ) ] = { | ||
269 | [ C(OP_READ) ] = { | ||
270 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
271 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
272 | }, | ||
273 | [ C(OP_WRITE) ] = { | ||
274 | [ C(RESULT_ACCESS) ] = -1, | ||
275 | [ C(RESULT_MISS) ] = -1, | ||
276 | }, | ||
277 | [ C(OP_PREFETCH) ] = { | ||
278 | [ C(RESULT_ACCESS) ] = -1, | ||
279 | [ C(RESULT_MISS) ] = -1, | ||
280 | }, | ||
281 | }, | ||
282 | }; | ||
283 | |||
284 | static const u64 atom_hw_cache_event_ids | ||
285 | [PERF_COUNT_HW_CACHE_MAX] | ||
286 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
287 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
288 | { | ||
289 | [ C(L1D) ] = { | ||
290 | [ C(OP_READ) ] = { | ||
291 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ | ||
292 | [ C(RESULT_MISS) ] = 0, | ||
293 | }, | ||
294 | [ C(OP_WRITE) ] = { | ||
295 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ | ||
296 | [ C(RESULT_MISS) ] = 0, | ||
297 | }, | ||
298 | [ C(OP_PREFETCH) ] = { | ||
299 | [ C(RESULT_ACCESS) ] = 0x0, | ||
300 | [ C(RESULT_MISS) ] = 0, | ||
301 | }, | ||
302 | }, | ||
303 | [ C(L1I ) ] = { | ||
304 | [ C(OP_READ) ] = { | ||
305 | [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ | ||
306 | [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ | ||
307 | }, | ||
308 | [ C(OP_WRITE) ] = { | ||
309 | [ C(RESULT_ACCESS) ] = -1, | ||
310 | [ C(RESULT_MISS) ] = -1, | ||
311 | }, | ||
312 | [ C(OP_PREFETCH) ] = { | ||
313 | [ C(RESULT_ACCESS) ] = 0, | ||
314 | [ C(RESULT_MISS) ] = 0, | ||
315 | }, | ||
316 | }, | ||
317 | [ C(LL ) ] = { | ||
318 | [ C(OP_READ) ] = { | ||
319 | [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ | ||
320 | [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ | ||
321 | }, | ||
322 | [ C(OP_WRITE) ] = { | ||
323 | [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ | ||
324 | [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ | ||
325 | }, | ||
326 | [ C(OP_PREFETCH) ] = { | ||
327 | [ C(RESULT_ACCESS) ] = 0, | ||
328 | [ C(RESULT_MISS) ] = 0, | ||
329 | }, | ||
330 | }, | ||
331 | [ C(DTLB) ] = { | ||
332 | [ C(OP_READ) ] = { | ||
333 | [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ | ||
334 | [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ | ||
335 | }, | ||
336 | [ C(OP_WRITE) ] = { | ||
337 | [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ | ||
338 | [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ | ||
339 | }, | ||
340 | [ C(OP_PREFETCH) ] = { | ||
341 | [ C(RESULT_ACCESS) ] = 0, | ||
342 | [ C(RESULT_MISS) ] = 0, | ||
343 | }, | ||
344 | }, | ||
345 | [ C(ITLB) ] = { | ||
346 | [ C(OP_READ) ] = { | ||
347 | [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ | ||
348 | [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ | ||
349 | }, | ||
350 | [ C(OP_WRITE) ] = { | ||
351 | [ C(RESULT_ACCESS) ] = -1, | ||
352 | [ C(RESULT_MISS) ] = -1, | ||
353 | }, | ||
354 | [ C(OP_PREFETCH) ] = { | ||
355 | [ C(RESULT_ACCESS) ] = -1, | ||
356 | [ C(RESULT_MISS) ] = -1, | ||
357 | }, | ||
358 | }, | ||
359 | [ C(BPU ) ] = { | ||
360 | [ C(OP_READ) ] = { | ||
361 | [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ | ||
362 | [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ | ||
363 | }, | ||
364 | [ C(OP_WRITE) ] = { | ||
365 | [ C(RESULT_ACCESS) ] = -1, | ||
366 | [ C(RESULT_MISS) ] = -1, | ||
367 | }, | ||
368 | [ C(OP_PREFETCH) ] = { | ||
369 | [ C(RESULT_ACCESS) ] = -1, | ||
370 | [ C(RESULT_MISS) ] = -1, | ||
371 | }, | ||
372 | }, | ||
373 | }; | ||
374 | |||
375 | static u64 intel_pmu_raw_event(u64 event) | ||
376 | { | ||
377 | #define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL | ||
378 | #define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL | ||
379 | #define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL | ||
380 | #define CORE_EVNTSEL_INV_MASK 0x00800000ULL | ||
381 | #define CORE_EVNTSEL_COUNTER_MASK 0xFF000000ULL | ||
382 | |||
383 | #define CORE_EVNTSEL_MASK \ | ||
384 | (CORE_EVNTSEL_EVENT_MASK | \ | ||
385 | CORE_EVNTSEL_UNIT_MASK | \ | ||
386 | CORE_EVNTSEL_EDGE_MASK | \ | ||
387 | CORE_EVNTSEL_INV_MASK | \ | ||
388 | CORE_EVNTSEL_COUNTER_MASK) | ||
389 | |||
390 | return event & CORE_EVNTSEL_MASK; | ||
391 | } | ||
392 | |||
393 | static const u64 amd_hw_cache_event_ids | ||
394 | [PERF_COUNT_HW_CACHE_MAX] | ||
395 | [PERF_COUNT_HW_CACHE_OP_MAX] | ||
396 | [PERF_COUNT_HW_CACHE_RESULT_MAX] = | ||
397 | { | ||
398 | [ C(L1D) ] = { | ||
399 | [ C(OP_READ) ] = { | ||
400 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
401 | [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ | ||
402 | }, | ||
403 | [ C(OP_WRITE) ] = { | ||
404 | [ C(RESULT_ACCESS) ] = 0x0042, /* Data Cache Refills from L2 */ | ||
405 | [ C(RESULT_MISS) ] = 0, | ||
406 | }, | ||
407 | [ C(OP_PREFETCH) ] = { | ||
408 | [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ | ||
409 | [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ | ||
410 | }, | ||
411 | }, | ||
412 | [ C(L1I ) ] = { | ||
413 | [ C(OP_READ) ] = { | ||
414 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ | ||
415 | [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ | ||
416 | }, | ||
417 | [ C(OP_WRITE) ] = { | ||
418 | [ C(RESULT_ACCESS) ] = -1, | ||
419 | [ C(RESULT_MISS) ] = -1, | ||
420 | }, | ||
421 | [ C(OP_PREFETCH) ] = { | ||
422 | [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ | ||
423 | [ C(RESULT_MISS) ] = 0, | ||
424 | }, | ||
425 | }, | ||
426 | [ C(LL ) ] = { | ||
427 | [ C(OP_READ) ] = { | ||
428 | [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ | ||
429 | [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ | ||
430 | }, | ||
431 | [ C(OP_WRITE) ] = { | ||
432 | [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ | ||
433 | [ C(RESULT_MISS) ] = 0, | ||
434 | }, | ||
435 | [ C(OP_PREFETCH) ] = { | ||
436 | [ C(RESULT_ACCESS) ] = 0, | ||
437 | [ C(RESULT_MISS) ] = 0, | ||
438 | }, | ||
439 | }, | ||
440 | [ C(DTLB) ] = { | ||
441 | [ C(OP_READ) ] = { | ||
442 | [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ | ||
443 | [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ | ||
444 | }, | ||
445 | [ C(OP_WRITE) ] = { | ||
446 | [ C(RESULT_ACCESS) ] = 0, | ||
447 | [ C(RESULT_MISS) ] = 0, | ||
448 | }, | ||
449 | [ C(OP_PREFETCH) ] = { | ||
450 | [ C(RESULT_ACCESS) ] = 0, | ||
451 | [ C(RESULT_MISS) ] = 0, | ||
452 | }, | ||
453 | }, | ||
454 | [ C(ITLB) ] = { | ||
455 | [ C(OP_READ) ] = { | ||
456 | [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ | ||
457 | [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ | ||
458 | }, | ||
459 | [ C(OP_WRITE) ] = { | ||
460 | [ C(RESULT_ACCESS) ] = -1, | ||
461 | [ C(RESULT_MISS) ] = -1, | ||
462 | }, | ||
463 | [ C(OP_PREFETCH) ] = { | ||
464 | [ C(RESULT_ACCESS) ] = -1, | ||
465 | [ C(RESULT_MISS) ] = -1, | ||
466 | }, | ||
467 | }, | ||
468 | [ C(BPU ) ] = { | ||
469 | [ C(OP_READ) ] = { | ||
470 | [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ | ||
471 | [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ | ||
472 | }, | ||
473 | [ C(OP_WRITE) ] = { | ||
474 | [ C(RESULT_ACCESS) ] = -1, | ||
475 | [ C(RESULT_MISS) ] = -1, | ||
476 | }, | ||
477 | [ C(OP_PREFETCH) ] = { | ||
478 | [ C(RESULT_ACCESS) ] = -1, | ||
479 | [ C(RESULT_MISS) ] = -1, | ||
480 | }, | ||
481 | }, | ||
482 | }; | ||
483 | |||
484 | /* | ||
485 | * AMD Performance Monitor K7 and later. | ||
486 | */ | ||
487 | static const u64 amd_perfmon_event_map[] = | ||
488 | { | ||
489 | [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, | ||
490 | [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, | ||
491 | [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, | ||
492 | [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, | ||
493 | [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, | ||
494 | [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, | ||
495 | }; | ||
496 | |||
497 | static u64 amd_pmu_event_map(int event) | ||
498 | { | ||
499 | return amd_perfmon_event_map[event]; | ||
500 | } | ||
501 | |||
502 | static u64 amd_pmu_raw_event(u64 event) | ||
503 | { | ||
504 | #define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL | ||
505 | #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL | ||
506 | #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL | ||
507 | #define K7_EVNTSEL_INV_MASK 0x000800000ULL | ||
508 | #define K7_EVNTSEL_COUNTER_MASK 0x0FF000000ULL | ||
509 | |||
510 | #define K7_EVNTSEL_MASK \ | ||
511 | (K7_EVNTSEL_EVENT_MASK | \ | ||
512 | K7_EVNTSEL_UNIT_MASK | \ | ||
513 | K7_EVNTSEL_EDGE_MASK | \ | ||
514 | K7_EVNTSEL_INV_MASK | \ | ||
515 | K7_EVNTSEL_COUNTER_MASK) | ||
516 | |||
517 | return event & K7_EVNTSEL_MASK; | ||
518 | } | ||
519 | |||
520 | /* | ||
521 | * Propagate counter elapsed time into the generic counter. | ||
522 | * Can only be executed on the CPU where the counter is active. | ||
523 | * Returns the delta events processed. | ||
524 | */ | ||
525 | static u64 | ||
526 | x86_perf_counter_update(struct perf_counter *counter, | ||
527 | struct hw_perf_counter *hwc, int idx) | ||
528 | { | ||
529 | int shift = 64 - x86_pmu.counter_bits; | ||
530 | u64 prev_raw_count, new_raw_count; | ||
531 | s64 delta; | ||
532 | |||
533 | /* | ||
534 | * Careful: an NMI might modify the previous counter value. | ||
535 | * | ||
536 | * Our tactic to handle this is to first atomically read and | ||
537 | * exchange a new raw count - then add that new-prev delta | ||
538 | * count to the generic counter atomically: | ||
539 | */ | ||
540 | again: | ||
541 | prev_raw_count = atomic64_read(&hwc->prev_count); | ||
542 | rdmsrl(hwc->counter_base + idx, new_raw_count); | ||
543 | |||
544 | if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
545 | new_raw_count) != prev_raw_count) | ||
546 | goto again; | ||
547 | |||
548 | /* | ||
549 | * Now we have the new raw value and have updated the prev | ||
550 | * timestamp already. We can now calculate the elapsed delta | ||
551 | * (counter-)time and add that to the generic counter. | ||
552 | * | ||
553 | * Careful, not all hw sign-extends above the physical width | ||
554 | * of the count. | ||
555 | */ | ||
556 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
557 | delta >>= shift; | ||
558 | |||
559 | atomic64_add(delta, &counter->count); | ||
560 | atomic64_sub(delta, &hwc->period_left); | ||
561 | |||
562 | return new_raw_count; | ||
563 | } | ||
564 | |||
565 | static atomic_t active_counters; | ||
566 | static DEFINE_MUTEX(pmc_reserve_mutex); | ||
567 | |||
568 | static bool reserve_pmc_hardware(void) | ||
569 | { | ||
570 | int i; | ||
571 | |||
572 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
573 | disable_lapic_nmi_watchdog(); | ||
574 | |||
575 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
576 | if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) | ||
577 | goto perfctr_fail; | ||
578 | } | ||
579 | |||
580 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
581 | if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) | ||
582 | goto eventsel_fail; | ||
583 | } | ||
584 | |||
585 | return true; | ||
586 | |||
587 | eventsel_fail: | ||
588 | for (i--; i >= 0; i--) | ||
589 | release_evntsel_nmi(x86_pmu.eventsel + i); | ||
590 | |||
591 | i = x86_pmu.num_counters; | ||
592 | |||
593 | perfctr_fail: | ||
594 | for (i--; i >= 0; i--) | ||
595 | release_perfctr_nmi(x86_pmu.perfctr + i); | ||
596 | |||
597 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
598 | enable_lapic_nmi_watchdog(); | ||
599 | |||
600 | return false; | ||
601 | } | ||
602 | |||
603 | static void release_pmc_hardware(void) | ||
604 | { | ||
605 | int i; | ||
606 | |||
607 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
608 | release_perfctr_nmi(x86_pmu.perfctr + i); | ||
609 | release_evntsel_nmi(x86_pmu.eventsel + i); | ||
610 | } | ||
611 | |||
612 | if (nmi_watchdog == NMI_LOCAL_APIC) | ||
613 | enable_lapic_nmi_watchdog(); | ||
614 | } | ||
615 | |||
616 | static void hw_perf_counter_destroy(struct perf_counter *counter) | ||
617 | { | ||
618 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { | ||
619 | release_pmc_hardware(); | ||
620 | mutex_unlock(&pmc_reserve_mutex); | ||
621 | } | ||
622 | } | ||
623 | |||
624 | static inline int x86_pmu_initialized(void) | ||
625 | { | ||
626 | return x86_pmu.handle_irq != NULL; | ||
627 | } | ||
628 | |||
629 | static inline int | ||
630 | set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) | ||
631 | { | ||
632 | unsigned int cache_type, cache_op, cache_result; | ||
633 | u64 config, val; | ||
634 | |||
635 | config = attr->config; | ||
636 | |||
637 | cache_type = (config >> 0) & 0xff; | ||
638 | if (cache_type >= PERF_COUNT_HW_CACHE_MAX) | ||
639 | return -EINVAL; | ||
640 | |||
641 | cache_op = (config >> 8) & 0xff; | ||
642 | if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) | ||
643 | return -EINVAL; | ||
644 | |||
645 | cache_result = (config >> 16) & 0xff; | ||
646 | if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) | ||
647 | return -EINVAL; | ||
648 | |||
649 | val = hw_cache_event_ids[cache_type][cache_op][cache_result]; | ||
650 | |||
651 | if (val == 0) | ||
652 | return -ENOENT; | ||
653 | |||
654 | if (val == -1) | ||
655 | return -EINVAL; | ||
656 | |||
657 | hwc->config |= val; | ||
658 | |||
659 | return 0; | ||
660 | } | ||
661 | |||
662 | /* | ||
663 | * Setup the hardware configuration for a given attr_type | ||
664 | */ | ||
665 | static int __hw_perf_counter_init(struct perf_counter *counter) | ||
666 | { | ||
667 | struct perf_counter_attr *attr = &counter->attr; | ||
668 | struct hw_perf_counter *hwc = &counter->hw; | ||
669 | int err; | ||
670 | |||
671 | if (!x86_pmu_initialized()) | ||
672 | return -ENODEV; | ||
673 | |||
674 | err = 0; | ||
675 | if (!atomic_inc_not_zero(&active_counters)) { | ||
676 | mutex_lock(&pmc_reserve_mutex); | ||
677 | if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) | ||
678 | err = -EBUSY; | ||
679 | else | ||
680 | atomic_inc(&active_counters); | ||
681 | mutex_unlock(&pmc_reserve_mutex); | ||
682 | } | ||
683 | if (err) | ||
684 | return err; | ||
685 | |||
686 | /* | ||
687 | * Generate PMC IRQs: | ||
688 | * (keep 'enabled' bit clear for now) | ||
689 | */ | ||
690 | hwc->config = ARCH_PERFMON_EVENTSEL_INT; | ||
691 | |||
692 | /* | ||
693 | * Count user and OS events unless requested not to. | ||
694 | */ | ||
695 | if (!attr->exclude_user) | ||
696 | hwc->config |= ARCH_PERFMON_EVENTSEL_USR; | ||
697 | if (!attr->exclude_kernel) | ||
698 | hwc->config |= ARCH_PERFMON_EVENTSEL_OS; | ||
699 | |||
700 | if (!hwc->sample_period) { | ||
701 | hwc->sample_period = x86_pmu.max_period; | ||
702 | hwc->last_period = hwc->sample_period; | ||
703 | atomic64_set(&hwc->period_left, hwc->sample_period); | ||
704 | } | ||
705 | |||
706 | counter->destroy = hw_perf_counter_destroy; | ||
707 | |||
708 | /* | ||
709 | * Raw event type provide the config in the event structure | ||
710 | */ | ||
711 | if (attr->type == PERF_TYPE_RAW) { | ||
712 | hwc->config |= x86_pmu.raw_event(attr->config); | ||
713 | return 0; | ||
714 | } | ||
715 | |||
716 | if (attr->type == PERF_TYPE_HW_CACHE) | ||
717 | return set_ext_hw_attr(hwc, attr); | ||
718 | |||
719 | if (attr->config >= x86_pmu.max_events) | ||
720 | return -EINVAL; | ||
721 | /* | ||
722 | * The generic map: | ||
723 | */ | ||
724 | hwc->config |= x86_pmu.event_map(attr->config); | ||
725 | |||
726 | return 0; | ||
727 | } | ||
728 | |||
729 | static void intel_pmu_disable_all(void) | ||
730 | { | ||
731 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | ||
732 | } | ||
733 | |||
734 | static void amd_pmu_disable_all(void) | ||
735 | { | ||
736 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
737 | int idx; | ||
738 | |||
739 | if (!cpuc->enabled) | ||
740 | return; | ||
741 | |||
742 | cpuc->enabled = 0; | ||
743 | /* | ||
744 | * ensure we write the disable before we start disabling the | ||
745 | * counters proper, so that amd_pmu_enable_counter() does the | ||
746 | * right thing. | ||
747 | */ | ||
748 | barrier(); | ||
749 | |||
750 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
751 | u64 val; | ||
752 | |||
753 | if (!test_bit(idx, cpuc->active_mask)) | ||
754 | continue; | ||
755 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
756 | if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) | ||
757 | continue; | ||
758 | val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
759 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
760 | } | ||
761 | } | ||
762 | |||
763 | void hw_perf_disable(void) | ||
764 | { | ||
765 | if (!x86_pmu_initialized()) | ||
766 | return; | ||
767 | return x86_pmu.disable_all(); | ||
768 | } | ||
769 | |||
770 | static void intel_pmu_enable_all(void) | ||
771 | { | ||
772 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
773 | } | ||
774 | |||
775 | static void amd_pmu_enable_all(void) | ||
776 | { | ||
777 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
778 | int idx; | ||
779 | |||
780 | if (cpuc->enabled) | ||
781 | return; | ||
782 | |||
783 | cpuc->enabled = 1; | ||
784 | barrier(); | ||
785 | |||
786 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
787 | u64 val; | ||
788 | |||
789 | if (!test_bit(idx, cpuc->active_mask)) | ||
790 | continue; | ||
791 | rdmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
792 | if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) | ||
793 | continue; | ||
794 | val |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
795 | wrmsrl(MSR_K7_EVNTSEL0 + idx, val); | ||
796 | } | ||
797 | } | ||
798 | |||
799 | void hw_perf_enable(void) | ||
800 | { | ||
801 | if (!x86_pmu_initialized()) | ||
802 | return; | ||
803 | x86_pmu.enable_all(); | ||
804 | } | ||
805 | |||
806 | static inline u64 intel_pmu_get_status(void) | ||
807 | { | ||
808 | u64 status; | ||
809 | |||
810 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
811 | |||
812 | return status; | ||
813 | } | ||
814 | |||
815 | static inline void intel_pmu_ack_status(u64 ack) | ||
816 | { | ||
817 | wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); | ||
818 | } | ||
819 | |||
820 | static inline void x86_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
821 | { | ||
822 | int err; | ||
823 | err = checking_wrmsrl(hwc->config_base + idx, | ||
824 | hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); | ||
825 | } | ||
826 | |||
827 | static inline void x86_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
828 | { | ||
829 | int err; | ||
830 | err = checking_wrmsrl(hwc->config_base + idx, | ||
831 | hwc->config); | ||
832 | } | ||
833 | |||
834 | static inline void | ||
835 | intel_pmu_disable_fixed(struct hw_perf_counter *hwc, int __idx) | ||
836 | { | ||
837 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
838 | u64 ctrl_val, mask; | ||
839 | int err; | ||
840 | |||
841 | mask = 0xfULL << (idx * 4); | ||
842 | |||
843 | rdmsrl(hwc->config_base, ctrl_val); | ||
844 | ctrl_val &= ~mask; | ||
845 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
846 | } | ||
847 | |||
848 | static inline void | ||
849 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
850 | { | ||
851 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
852 | intel_pmu_disable_fixed(hwc, idx); | ||
853 | return; | ||
854 | } | ||
855 | |||
856 | x86_pmu_disable_counter(hwc, idx); | ||
857 | } | ||
858 | |||
859 | static inline void | ||
860 | amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | ||
861 | { | ||
862 | x86_pmu_disable_counter(hwc, idx); | ||
863 | } | ||
864 | |||
865 | static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); | ||
866 | |||
867 | /* | ||
868 | * Set the next IRQ period, based on the hwc->period_left value. | ||
869 | * To be called with the counter disabled in hw: | ||
870 | */ | ||
871 | static int | ||
872 | x86_perf_counter_set_period(struct perf_counter *counter, | ||
873 | struct hw_perf_counter *hwc, int idx) | ||
874 | { | ||
875 | s64 left = atomic64_read(&hwc->period_left); | ||
876 | s64 period = hwc->sample_period; | ||
877 | int err, ret = 0; | ||
878 | |||
879 | /* | ||
880 | * If we are way outside a reasoable range then just skip forward: | ||
881 | */ | ||
882 | if (unlikely(left <= -period)) { | ||
883 | left = period; | ||
884 | atomic64_set(&hwc->period_left, left); | ||
885 | hwc->last_period = period; | ||
886 | ret = 1; | ||
887 | } | ||
888 | |||
889 | if (unlikely(left <= 0)) { | ||
890 | left += period; | ||
891 | atomic64_set(&hwc->period_left, left); | ||
892 | hwc->last_period = period; | ||
893 | ret = 1; | ||
894 | } | ||
895 | /* | ||
896 | * Quirk: certain CPUs dont like it if just 1 event is left: | ||
897 | */ | ||
898 | if (unlikely(left < 2)) | ||
899 | left = 2; | ||
900 | |||
901 | if (left > x86_pmu.max_period) | ||
902 | left = x86_pmu.max_period; | ||
903 | |||
904 | per_cpu(prev_left[idx], smp_processor_id()) = left; | ||
905 | |||
906 | /* | ||
907 | * The hw counter starts counting from this counter offset, | ||
908 | * mark it to be able to extra future deltas: | ||
909 | */ | ||
910 | atomic64_set(&hwc->prev_count, (u64)-left); | ||
911 | |||
912 | err = checking_wrmsrl(hwc->counter_base + idx, | ||
913 | (u64)(-left) & x86_pmu.counter_mask); | ||
914 | |||
915 | return ret; | ||
916 | } | ||
917 | |||
918 | static inline void | ||
919 | intel_pmu_enable_fixed(struct hw_perf_counter *hwc, int __idx) | ||
920 | { | ||
921 | int idx = __idx - X86_PMC_IDX_FIXED; | ||
922 | u64 ctrl_val, bits, mask; | ||
923 | int err; | ||
924 | |||
925 | /* | ||
926 | * Enable IRQ generation (0x8), | ||
927 | * and enable ring-3 counting (0x2) and ring-0 counting (0x1) | ||
928 | * if requested: | ||
929 | */ | ||
930 | bits = 0x8ULL; | ||
931 | if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) | ||
932 | bits |= 0x2; | ||
933 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
934 | bits |= 0x1; | ||
935 | bits <<= (idx * 4); | ||
936 | mask = 0xfULL << (idx * 4); | ||
937 | |||
938 | rdmsrl(hwc->config_base, ctrl_val); | ||
939 | ctrl_val &= ~mask; | ||
940 | ctrl_val |= bits; | ||
941 | err = checking_wrmsrl(hwc->config_base, ctrl_val); | ||
942 | } | ||
943 | |||
944 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
945 | { | ||
946 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | ||
947 | intel_pmu_enable_fixed(hwc, idx); | ||
948 | return; | ||
949 | } | ||
950 | |||
951 | x86_pmu_enable_counter(hwc, idx); | ||
952 | } | ||
953 | |||
954 | static void amd_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | ||
955 | { | ||
956 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
957 | |||
958 | if (cpuc->enabled) | ||
959 | x86_pmu_enable_counter(hwc, idx); | ||
960 | else | ||
961 | x86_pmu_disable_counter(hwc, idx); | ||
962 | } | ||
963 | |||
964 | static int | ||
965 | fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | ||
966 | { | ||
967 | unsigned int event; | ||
968 | |||
969 | if (!x86_pmu.num_counters_fixed) | ||
970 | return -1; | ||
971 | |||
972 | /* | ||
973 | * Quirk, IA32_FIXED_CTRs do not work on current Atom processors: | ||
974 | */ | ||
975 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | ||
976 | boot_cpu_data.x86_model == 28) | ||
977 | return -1; | ||
978 | |||
979 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
980 | |||
981 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | ||
982 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | ||
983 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | ||
984 | return X86_PMC_IDX_FIXED_CPU_CYCLES; | ||
985 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) | ||
986 | return X86_PMC_IDX_FIXED_BUS_CYCLES; | ||
987 | |||
988 | return -1; | ||
989 | } | ||
990 | |||
991 | /* | ||
992 | * Find a PMC slot for the freshly enabled / scheduled in counter: | ||
993 | */ | ||
994 | static int x86_pmu_enable(struct perf_counter *counter) | ||
995 | { | ||
996 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
997 | struct hw_perf_counter *hwc = &counter->hw; | ||
998 | int idx; | ||
999 | |||
1000 | idx = fixed_mode_idx(counter, hwc); | ||
1001 | if (idx >= 0) { | ||
1002 | /* | ||
1003 | * Try to get the fixed counter, if that is already taken | ||
1004 | * then try to get a generic counter: | ||
1005 | */ | ||
1006 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1007 | goto try_generic; | ||
1008 | |||
1009 | hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; | ||
1010 | /* | ||
1011 | * We set it so that counter_base + idx in wrmsr/rdmsr maps to | ||
1012 | * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: | ||
1013 | */ | ||
1014 | hwc->counter_base = | ||
1015 | MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; | ||
1016 | hwc->idx = idx; | ||
1017 | } else { | ||
1018 | idx = hwc->idx; | ||
1019 | /* Try to get the previous generic counter again */ | ||
1020 | if (test_and_set_bit(idx, cpuc->used_mask)) { | ||
1021 | try_generic: | ||
1022 | idx = find_first_zero_bit(cpuc->used_mask, | ||
1023 | x86_pmu.num_counters); | ||
1024 | if (idx == x86_pmu.num_counters) | ||
1025 | return -EAGAIN; | ||
1026 | |||
1027 | set_bit(idx, cpuc->used_mask); | ||
1028 | hwc->idx = idx; | ||
1029 | } | ||
1030 | hwc->config_base = x86_pmu.eventsel; | ||
1031 | hwc->counter_base = x86_pmu.perfctr; | ||
1032 | } | ||
1033 | |||
1034 | perf_counters_lapic_init(); | ||
1035 | |||
1036 | x86_pmu.disable(hwc, idx); | ||
1037 | |||
1038 | cpuc->counters[idx] = counter; | ||
1039 | set_bit(idx, cpuc->active_mask); | ||
1040 | |||
1041 | x86_perf_counter_set_period(counter, hwc, idx); | ||
1042 | x86_pmu.enable(hwc, idx); | ||
1043 | |||
1044 | return 0; | ||
1045 | } | ||
1046 | |||
1047 | static void x86_pmu_unthrottle(struct perf_counter *counter) | ||
1048 | { | ||
1049 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1050 | struct hw_perf_counter *hwc = &counter->hw; | ||
1051 | |||
1052 | if (WARN_ON_ONCE(hwc->idx >= X86_PMC_IDX_MAX || | ||
1053 | cpuc->counters[hwc->idx] != counter)) | ||
1054 | return; | ||
1055 | |||
1056 | x86_pmu.enable(hwc, hwc->idx); | ||
1057 | } | ||
1058 | |||
1059 | void perf_counter_print_debug(void) | ||
1060 | { | ||
1061 | u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; | ||
1062 | struct cpu_hw_counters *cpuc; | ||
1063 | unsigned long flags; | ||
1064 | int cpu, idx; | ||
1065 | |||
1066 | if (!x86_pmu.num_counters) | ||
1067 | return; | ||
1068 | |||
1069 | local_irq_save(flags); | ||
1070 | |||
1071 | cpu = smp_processor_id(); | ||
1072 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1073 | |||
1074 | if (x86_pmu.version >= 2) { | ||
1075 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); | ||
1076 | rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); | ||
1077 | rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); | ||
1078 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); | ||
1079 | |||
1080 | pr_info("\n"); | ||
1081 | pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); | ||
1082 | pr_info("CPU#%d: status: %016llx\n", cpu, status); | ||
1083 | pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); | ||
1084 | pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); | ||
1085 | } | ||
1086 | pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); | ||
1087 | |||
1088 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
1089 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | ||
1090 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | ||
1091 | |||
1092 | prev_left = per_cpu(prev_left[idx], cpu); | ||
1093 | |||
1094 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", | ||
1095 | cpu, idx, pmc_ctrl); | ||
1096 | pr_info("CPU#%d: gen-PMC%d count: %016llx\n", | ||
1097 | cpu, idx, pmc_count); | ||
1098 | pr_info("CPU#%d: gen-PMC%d left: %016llx\n", | ||
1099 | cpu, idx, prev_left); | ||
1100 | } | ||
1101 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | ||
1102 | rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); | ||
1103 | |||
1104 | pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", | ||
1105 | cpu, idx, pmc_count); | ||
1106 | } | ||
1107 | local_irq_restore(flags); | ||
1108 | } | ||
1109 | |||
1110 | static void x86_pmu_disable(struct perf_counter *counter) | ||
1111 | { | ||
1112 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1113 | struct hw_perf_counter *hwc = &counter->hw; | ||
1114 | int idx = hwc->idx; | ||
1115 | |||
1116 | /* | ||
1117 | * Must be done before we disable, otherwise the nmi handler | ||
1118 | * could reenable again: | ||
1119 | */ | ||
1120 | clear_bit(idx, cpuc->active_mask); | ||
1121 | x86_pmu.disable(hwc, idx); | ||
1122 | |||
1123 | /* | ||
1124 | * Make sure the cleared pointer becomes visible before we | ||
1125 | * (potentially) free the counter: | ||
1126 | */ | ||
1127 | barrier(); | ||
1128 | |||
1129 | /* | ||
1130 | * Drain the remaining delta count out of a counter | ||
1131 | * that we are disabling: | ||
1132 | */ | ||
1133 | x86_perf_counter_update(counter, hwc, idx); | ||
1134 | cpuc->counters[idx] = NULL; | ||
1135 | clear_bit(idx, cpuc->used_mask); | ||
1136 | } | ||
1137 | |||
1138 | /* | ||
1139 | * Save and restart an expired counter. Called by NMI contexts, | ||
1140 | * so it has to be careful about preempting normal counter ops: | ||
1141 | */ | ||
1142 | static int intel_pmu_save_and_restart(struct perf_counter *counter) | ||
1143 | { | ||
1144 | struct hw_perf_counter *hwc = &counter->hw; | ||
1145 | int idx = hwc->idx; | ||
1146 | int ret; | ||
1147 | |||
1148 | x86_perf_counter_update(counter, hwc, idx); | ||
1149 | ret = x86_perf_counter_set_period(counter, hwc, idx); | ||
1150 | |||
1151 | if (counter->state == PERF_COUNTER_STATE_ACTIVE) | ||
1152 | intel_pmu_enable_counter(hwc, idx); | ||
1153 | |||
1154 | return ret; | ||
1155 | } | ||
1156 | |||
1157 | static void intel_pmu_reset(void) | ||
1158 | { | ||
1159 | unsigned long flags; | ||
1160 | int idx; | ||
1161 | |||
1162 | if (!x86_pmu.num_counters) | ||
1163 | return; | ||
1164 | |||
1165 | local_irq_save(flags); | ||
1166 | |||
1167 | printk("clearing PMU state on CPU#%d\n", smp_processor_id()); | ||
1168 | |||
1169 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
1170 | checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); | ||
1171 | checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); | ||
1172 | } | ||
1173 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | ||
1174 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | ||
1175 | } | ||
1176 | |||
1177 | local_irq_restore(flags); | ||
1178 | } | ||
1179 | |||
1180 | |||
1181 | /* | ||
1182 | * This handler is triggered by the local APIC, so the APIC IRQ handling | ||
1183 | * rules apply: | ||
1184 | */ | ||
1185 | static int intel_pmu_handle_irq(struct pt_regs *regs) | ||
1186 | { | ||
1187 | struct perf_sample_data data; | ||
1188 | struct cpu_hw_counters *cpuc; | ||
1189 | int bit, cpu, loops; | ||
1190 | u64 ack, status; | ||
1191 | |||
1192 | data.regs = regs; | ||
1193 | data.addr = 0; | ||
1194 | |||
1195 | cpu = smp_processor_id(); | ||
1196 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1197 | |||
1198 | perf_disable(); | ||
1199 | status = intel_pmu_get_status(); | ||
1200 | if (!status) { | ||
1201 | perf_enable(); | ||
1202 | return 0; | ||
1203 | } | ||
1204 | |||
1205 | loops = 0; | ||
1206 | again: | ||
1207 | if (++loops > 100) { | ||
1208 | WARN_ONCE(1, "perfcounters: irq loop stuck!\n"); | ||
1209 | perf_counter_print_debug(); | ||
1210 | intel_pmu_reset(); | ||
1211 | perf_enable(); | ||
1212 | return 1; | ||
1213 | } | ||
1214 | |||
1215 | inc_irq_stat(apic_perf_irqs); | ||
1216 | ack = status; | ||
1217 | for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { | ||
1218 | struct perf_counter *counter = cpuc->counters[bit]; | ||
1219 | |||
1220 | clear_bit(bit, (unsigned long *) &status); | ||
1221 | if (!test_bit(bit, cpuc->active_mask)) | ||
1222 | continue; | ||
1223 | |||
1224 | if (!intel_pmu_save_and_restart(counter)) | ||
1225 | continue; | ||
1226 | |||
1227 | data.period = counter->hw.last_period; | ||
1228 | |||
1229 | if (perf_counter_overflow(counter, 1, &data)) | ||
1230 | intel_pmu_disable_counter(&counter->hw, bit); | ||
1231 | } | ||
1232 | |||
1233 | intel_pmu_ack_status(ack); | ||
1234 | |||
1235 | /* | ||
1236 | * Repeat if there is more work to be done: | ||
1237 | */ | ||
1238 | status = intel_pmu_get_status(); | ||
1239 | if (status) | ||
1240 | goto again; | ||
1241 | |||
1242 | perf_enable(); | ||
1243 | |||
1244 | return 1; | ||
1245 | } | ||
1246 | |||
1247 | static int amd_pmu_handle_irq(struct pt_regs *regs) | ||
1248 | { | ||
1249 | struct perf_sample_data data; | ||
1250 | struct cpu_hw_counters *cpuc; | ||
1251 | struct perf_counter *counter; | ||
1252 | struct hw_perf_counter *hwc; | ||
1253 | int cpu, idx, handled = 0; | ||
1254 | u64 val; | ||
1255 | |||
1256 | data.regs = regs; | ||
1257 | data.addr = 0; | ||
1258 | |||
1259 | cpu = smp_processor_id(); | ||
1260 | cpuc = &per_cpu(cpu_hw_counters, cpu); | ||
1261 | |||
1262 | for (idx = 0; idx < x86_pmu.num_counters; idx++) { | ||
1263 | if (!test_bit(idx, cpuc->active_mask)) | ||
1264 | continue; | ||
1265 | |||
1266 | counter = cpuc->counters[idx]; | ||
1267 | hwc = &counter->hw; | ||
1268 | |||
1269 | val = x86_perf_counter_update(counter, hwc, idx); | ||
1270 | if (val & (1ULL << (x86_pmu.counter_bits - 1))) | ||
1271 | continue; | ||
1272 | |||
1273 | /* | ||
1274 | * counter overflow | ||
1275 | */ | ||
1276 | handled = 1; | ||
1277 | data.period = counter->hw.last_period; | ||
1278 | |||
1279 | if (!x86_perf_counter_set_period(counter, hwc, idx)) | ||
1280 | continue; | ||
1281 | |||
1282 | if (perf_counter_overflow(counter, 1, &data)) | ||
1283 | amd_pmu_disable_counter(hwc, idx); | ||
1284 | } | ||
1285 | |||
1286 | if (handled) | ||
1287 | inc_irq_stat(apic_perf_irqs); | ||
1288 | |||
1289 | return handled; | ||
1290 | } | ||
1291 | |||
1292 | void smp_perf_pending_interrupt(struct pt_regs *regs) | ||
1293 | { | ||
1294 | irq_enter(); | ||
1295 | ack_APIC_irq(); | ||
1296 | inc_irq_stat(apic_pending_irqs); | ||
1297 | perf_counter_do_pending(); | ||
1298 | irq_exit(); | ||
1299 | } | ||
1300 | |||
1301 | void set_perf_counter_pending(void) | ||
1302 | { | ||
1303 | apic->send_IPI_self(LOCAL_PENDING_VECTOR); | ||
1304 | } | ||
1305 | |||
1306 | void perf_counters_lapic_init(void) | ||
1307 | { | ||
1308 | if (!x86_pmu_initialized()) | ||
1309 | return; | ||
1310 | |||
1311 | /* | ||
1312 | * Always use NMI for PMU | ||
1313 | */ | ||
1314 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1315 | } | ||
1316 | |||
1317 | static int __kprobes | ||
1318 | perf_counter_nmi_handler(struct notifier_block *self, | ||
1319 | unsigned long cmd, void *__args) | ||
1320 | { | ||
1321 | struct die_args *args = __args; | ||
1322 | struct pt_regs *regs; | ||
1323 | |||
1324 | if (!atomic_read(&active_counters)) | ||
1325 | return NOTIFY_DONE; | ||
1326 | |||
1327 | switch (cmd) { | ||
1328 | case DIE_NMI: | ||
1329 | case DIE_NMI_IPI: | ||
1330 | break; | ||
1331 | |||
1332 | default: | ||
1333 | return NOTIFY_DONE; | ||
1334 | } | ||
1335 | |||
1336 | regs = args->regs; | ||
1337 | |||
1338 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1339 | /* | ||
1340 | * Can't rely on the handled return value to say it was our NMI, two | ||
1341 | * counters could trigger 'simultaneously' raising two back-to-back NMIs. | ||
1342 | * | ||
1343 | * If the first NMI handles both, the latter will be empty and daze | ||
1344 | * the CPU. | ||
1345 | */ | ||
1346 | x86_pmu.handle_irq(regs); | ||
1347 | |||
1348 | return NOTIFY_STOP; | ||
1349 | } | ||
1350 | |||
1351 | static __read_mostly struct notifier_block perf_counter_nmi_notifier = { | ||
1352 | .notifier_call = perf_counter_nmi_handler, | ||
1353 | .next = NULL, | ||
1354 | .priority = 1 | ||
1355 | }; | ||
1356 | |||
1357 | static struct x86_pmu intel_pmu = { | ||
1358 | .name = "Intel", | ||
1359 | .handle_irq = intel_pmu_handle_irq, | ||
1360 | .disable_all = intel_pmu_disable_all, | ||
1361 | .enable_all = intel_pmu_enable_all, | ||
1362 | .enable = intel_pmu_enable_counter, | ||
1363 | .disable = intel_pmu_disable_counter, | ||
1364 | .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
1365 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
1366 | .event_map = intel_pmu_event_map, | ||
1367 | .raw_event = intel_pmu_raw_event, | ||
1368 | .max_events = ARRAY_SIZE(intel_perfmon_event_map), | ||
1369 | /* | ||
1370 | * Intel PMCs cannot be accessed sanely above 32 bit width, | ||
1371 | * so we install an artificial 1<<31 period regardless of | ||
1372 | * the generic counter period: | ||
1373 | */ | ||
1374 | .max_period = (1ULL << 31) - 1, | ||
1375 | }; | ||
1376 | |||
1377 | static struct x86_pmu amd_pmu = { | ||
1378 | .name = "AMD", | ||
1379 | .handle_irq = amd_pmu_handle_irq, | ||
1380 | .disable_all = amd_pmu_disable_all, | ||
1381 | .enable_all = amd_pmu_enable_all, | ||
1382 | .enable = amd_pmu_enable_counter, | ||
1383 | .disable = amd_pmu_disable_counter, | ||
1384 | .eventsel = MSR_K7_EVNTSEL0, | ||
1385 | .perfctr = MSR_K7_PERFCTR0, | ||
1386 | .event_map = amd_pmu_event_map, | ||
1387 | .raw_event = amd_pmu_raw_event, | ||
1388 | .max_events = ARRAY_SIZE(amd_perfmon_event_map), | ||
1389 | .num_counters = 4, | ||
1390 | .counter_bits = 48, | ||
1391 | .counter_mask = (1ULL << 48) - 1, | ||
1392 | /* use highest bit to detect overflow */ | ||
1393 | .max_period = (1ULL << 47) - 1, | ||
1394 | }; | ||
1395 | |||
1396 | static int intel_pmu_init(void) | ||
1397 | { | ||
1398 | union cpuid10_edx edx; | ||
1399 | union cpuid10_eax eax; | ||
1400 | unsigned int unused; | ||
1401 | unsigned int ebx; | ||
1402 | int version; | ||
1403 | |||
1404 | if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
1405 | return -ENODEV; | ||
1406 | |||
1407 | /* | ||
1408 | * Check whether the Architectural PerfMon supports | ||
1409 | * Branch Misses Retired Event or not. | ||
1410 | */ | ||
1411 | cpuid(10, &eax.full, &ebx, &unused, &edx.full); | ||
1412 | if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) | ||
1413 | return -ENODEV; | ||
1414 | |||
1415 | version = eax.split.version_id; | ||
1416 | if (version < 2) | ||
1417 | return -ENODEV; | ||
1418 | |||
1419 | x86_pmu = intel_pmu; | ||
1420 | x86_pmu.version = version; | ||
1421 | x86_pmu.num_counters = eax.split.num_counters; | ||
1422 | x86_pmu.counter_bits = eax.split.bit_width; | ||
1423 | x86_pmu.counter_mask = (1ULL << eax.split.bit_width) - 1; | ||
1424 | |||
1425 | /* | ||
1426 | * Quirk: v2 perfmon does not report fixed-purpose counters, so | ||
1427 | * assume at least 3 counters: | ||
1428 | */ | ||
1429 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | ||
1430 | |||
1431 | rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | ||
1432 | |||
1433 | /* | ||
1434 | * Install the hw-cache-events table: | ||
1435 | */ | ||
1436 | switch (boot_cpu_data.x86_model) { | ||
1437 | case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ | ||
1438 | case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ | ||
1439 | case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ | ||
1440 | case 29: /* six-core 45 nm xeon "Dunnington" */ | ||
1441 | memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, | ||
1442 | sizeof(hw_cache_event_ids)); | ||
1443 | |||
1444 | pr_cont("Core2 events, "); | ||
1445 | break; | ||
1446 | default: | ||
1447 | case 26: | ||
1448 | memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, | ||
1449 | sizeof(hw_cache_event_ids)); | ||
1450 | |||
1451 | pr_cont("Nehalem/Corei7 events, "); | ||
1452 | break; | ||
1453 | case 28: | ||
1454 | memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, | ||
1455 | sizeof(hw_cache_event_ids)); | ||
1456 | |||
1457 | pr_cont("Atom events, "); | ||
1458 | break; | ||
1459 | } | ||
1460 | return 0; | ||
1461 | } | ||
1462 | |||
1463 | static int amd_pmu_init(void) | ||
1464 | { | ||
1465 | /* Performance-monitoring supported from K7 and later: */ | ||
1466 | if (boot_cpu_data.x86 < 6) | ||
1467 | return -ENODEV; | ||
1468 | |||
1469 | x86_pmu = amd_pmu; | ||
1470 | |||
1471 | /* Events are common for all AMDs */ | ||
1472 | memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, | ||
1473 | sizeof(hw_cache_event_ids)); | ||
1474 | |||
1475 | return 0; | ||
1476 | } | ||
1477 | |||
1478 | void __init init_hw_perf_counters(void) | ||
1479 | { | ||
1480 | int err; | ||
1481 | |||
1482 | pr_info("Performance Counters: "); | ||
1483 | |||
1484 | switch (boot_cpu_data.x86_vendor) { | ||
1485 | case X86_VENDOR_INTEL: | ||
1486 | err = intel_pmu_init(); | ||
1487 | break; | ||
1488 | case X86_VENDOR_AMD: | ||
1489 | err = amd_pmu_init(); | ||
1490 | break; | ||
1491 | default: | ||
1492 | return; | ||
1493 | } | ||
1494 | if (err != 0) { | ||
1495 | pr_cont("no PMU driver, software counters only.\n"); | ||
1496 | return; | ||
1497 | } | ||
1498 | |||
1499 | pr_cont("%s PMU driver.\n", x86_pmu.name); | ||
1500 | |||
1501 | if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { | ||
1502 | x86_pmu.num_counters = X86_PMC_MAX_GENERIC; | ||
1503 | WARN(1, KERN_ERR "hw perf counters %d > max(%d), clipping!", | ||
1504 | x86_pmu.num_counters, X86_PMC_MAX_GENERIC); | ||
1505 | } | ||
1506 | perf_counter_mask = (1 << x86_pmu.num_counters) - 1; | ||
1507 | perf_max_counters = x86_pmu.num_counters; | ||
1508 | |||
1509 | if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { | ||
1510 | x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; | ||
1511 | WARN(1, KERN_ERR "hw perf counters fixed %d > max(%d), clipping!", | ||
1512 | x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); | ||
1513 | } | ||
1514 | |||
1515 | perf_counter_mask |= | ||
1516 | ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; | ||
1517 | |||
1518 | perf_counters_lapic_init(); | ||
1519 | register_die_notifier(&perf_counter_nmi_notifier); | ||
1520 | |||
1521 | pr_info("... version: %d\n", x86_pmu.version); | ||
1522 | pr_info("... bit width: %d\n", x86_pmu.counter_bits); | ||
1523 | pr_info("... generic counters: %d\n", x86_pmu.num_counters); | ||
1524 | pr_info("... value mask: %016Lx\n", x86_pmu.counter_mask); | ||
1525 | pr_info("... max period: %016Lx\n", x86_pmu.max_period); | ||
1526 | pr_info("... fixed-purpose counters: %d\n", x86_pmu.num_counters_fixed); | ||
1527 | pr_info("... counter mask: %016Lx\n", perf_counter_mask); | ||
1528 | } | ||
1529 | |||
1530 | static inline void x86_pmu_read(struct perf_counter *counter) | ||
1531 | { | ||
1532 | x86_perf_counter_update(counter, &counter->hw, counter->hw.idx); | ||
1533 | } | ||
1534 | |||
1535 | static const struct pmu pmu = { | ||
1536 | .enable = x86_pmu_enable, | ||
1537 | .disable = x86_pmu_disable, | ||
1538 | .read = x86_pmu_read, | ||
1539 | .unthrottle = x86_pmu_unthrottle, | ||
1540 | }; | ||
1541 | |||
1542 | const struct pmu *hw_perf_counter_init(struct perf_counter *counter) | ||
1543 | { | ||
1544 | int err; | ||
1545 | |||
1546 | err = __hw_perf_counter_init(counter); | ||
1547 | if (err) | ||
1548 | return ERR_PTR(err); | ||
1549 | |||
1550 | return &pmu; | ||
1551 | } | ||
1552 | |||
1553 | /* | ||
1554 | * callchain support | ||
1555 | */ | ||
1556 | |||
1557 | static inline | ||
1558 | void callchain_store(struct perf_callchain_entry *entry, u64 ip) | ||
1559 | { | ||
1560 | if (entry->nr < PERF_MAX_STACK_DEPTH) | ||
1561 | entry->ip[entry->nr++] = ip; | ||
1562 | } | ||
1563 | |||
1564 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); | ||
1565 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); | ||
1566 | |||
1567 | |||
1568 | static void | ||
1569 | backtrace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
1570 | { | ||
1571 | /* Ignore warnings */ | ||
1572 | } | ||
1573 | |||
1574 | static void backtrace_warning(void *data, char *msg) | ||
1575 | { | ||
1576 | /* Ignore warnings */ | ||
1577 | } | ||
1578 | |||
1579 | static int backtrace_stack(void *data, char *name) | ||
1580 | { | ||
1581 | /* Process all stacks: */ | ||
1582 | return 0; | ||
1583 | } | ||
1584 | |||
1585 | static void backtrace_address(void *data, unsigned long addr, int reliable) | ||
1586 | { | ||
1587 | struct perf_callchain_entry *entry = data; | ||
1588 | |||
1589 | if (reliable) | ||
1590 | callchain_store(entry, addr); | ||
1591 | } | ||
1592 | |||
1593 | static const struct stacktrace_ops backtrace_ops = { | ||
1594 | .warning = backtrace_warning, | ||
1595 | .warning_symbol = backtrace_warning_symbol, | ||
1596 | .stack = backtrace_stack, | ||
1597 | .address = backtrace_address, | ||
1598 | }; | ||
1599 | |||
1600 | #include "../dumpstack.h" | ||
1601 | |||
1602 | static void | ||
1603 | perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1604 | { | ||
1605 | callchain_store(entry, PERF_CONTEXT_KERNEL); | ||
1606 | callchain_store(entry, regs->ip); | ||
1607 | |||
1608 | dump_trace(NULL, regs, NULL, 0, &backtrace_ops, entry); | ||
1609 | } | ||
1610 | |||
1611 | /* | ||
1612 | * best effort, GUP based copy_from_user() that assumes IRQ or NMI context | ||
1613 | */ | ||
1614 | static unsigned long | ||
1615 | copy_from_user_nmi(void *to, const void __user *from, unsigned long n) | ||
1616 | { | ||
1617 | unsigned long offset, addr = (unsigned long)from; | ||
1618 | int type = in_nmi() ? KM_NMI : KM_IRQ0; | ||
1619 | unsigned long size, len = 0; | ||
1620 | struct page *page; | ||
1621 | void *map; | ||
1622 | int ret; | ||
1623 | |||
1624 | do { | ||
1625 | ret = __get_user_pages_fast(addr, 1, 0, &page); | ||
1626 | if (!ret) | ||
1627 | break; | ||
1628 | |||
1629 | offset = addr & (PAGE_SIZE - 1); | ||
1630 | size = min(PAGE_SIZE - offset, n - len); | ||
1631 | |||
1632 | map = kmap_atomic(page, type); | ||
1633 | memcpy(to, map+offset, size); | ||
1634 | kunmap_atomic(map, type); | ||
1635 | put_page(page); | ||
1636 | |||
1637 | len += size; | ||
1638 | to += size; | ||
1639 | addr += size; | ||
1640 | |||
1641 | } while (len < n); | ||
1642 | |||
1643 | return len; | ||
1644 | } | ||
1645 | |||
1646 | static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) | ||
1647 | { | ||
1648 | unsigned long bytes; | ||
1649 | |||
1650 | bytes = copy_from_user_nmi(frame, fp, sizeof(*frame)); | ||
1651 | |||
1652 | return bytes == sizeof(*frame); | ||
1653 | } | ||
1654 | |||
1655 | static void | ||
1656 | perf_callchain_user(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1657 | { | ||
1658 | struct stack_frame frame; | ||
1659 | const void __user *fp; | ||
1660 | |||
1661 | if (!user_mode(regs)) | ||
1662 | regs = task_pt_regs(current); | ||
1663 | |||
1664 | fp = (void __user *)regs->bp; | ||
1665 | |||
1666 | callchain_store(entry, PERF_CONTEXT_USER); | ||
1667 | callchain_store(entry, regs->ip); | ||
1668 | |||
1669 | while (entry->nr < PERF_MAX_STACK_DEPTH) { | ||
1670 | frame.next_frame = NULL; | ||
1671 | frame.return_address = 0; | ||
1672 | |||
1673 | if (!copy_stack_frame(fp, &frame)) | ||
1674 | break; | ||
1675 | |||
1676 | if ((unsigned long)fp < regs->sp) | ||
1677 | break; | ||
1678 | |||
1679 | callchain_store(entry, frame.return_address); | ||
1680 | fp = frame.next_frame; | ||
1681 | } | ||
1682 | } | ||
1683 | |||
1684 | static void | ||
1685 | perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) | ||
1686 | { | ||
1687 | int is_user; | ||
1688 | |||
1689 | if (!regs) | ||
1690 | return; | ||
1691 | |||
1692 | is_user = user_mode(regs); | ||
1693 | |||
1694 | if (!current || current->pid == 0) | ||
1695 | return; | ||
1696 | |||
1697 | if (is_user && current->state != TASK_RUNNING) | ||
1698 | return; | ||
1699 | |||
1700 | if (!is_user) | ||
1701 | perf_callchain_kernel(regs, entry); | ||
1702 | |||
1703 | if (current->mm) | ||
1704 | perf_callchain_user(regs, entry); | ||
1705 | } | ||
1706 | |||
1707 | struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | ||
1708 | { | ||
1709 | struct perf_callchain_entry *entry; | ||
1710 | |||
1711 | if (in_nmi()) | ||
1712 | entry = &__get_cpu_var(nmi_entry); | ||
1713 | else | ||
1714 | entry = &__get_cpu_var(irq_entry); | ||
1715 | |||
1716 | entry->nr = 0; | ||
1717 | |||
1718 | perf_do_callchain(regs, entry); | ||
1719 | |||
1720 | return entry; | ||
1721 | } | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index f6c70a164e32..5c481f6205bf 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -19,8 +19,8 @@ | |||
19 | #include <linux/nmi.h> | 19 | #include <linux/nmi.h> |
20 | #include <linux/kprobes.h> | 20 | #include <linux/kprobes.h> |
21 | 21 | ||
22 | #include <asm/genapic.h> | 22 | #include <asm/apic.h> |
23 | #include <asm/intel_arch_perfmon.h> | 23 | #include <asm/perf_counter.h> |
24 | 24 | ||
25 | struct nmi_watchdog_ctlblk { | 25 | struct nmi_watchdog_ctlblk { |
26 | unsigned int cccr_msr; | 26 | unsigned int cccr_msr; |
@@ -716,11 +716,15 @@ static void probe_nmi_watchdog(void) | |||
716 | wd_ops = &k7_wd_ops; | 716 | wd_ops = &k7_wd_ops; |
717 | break; | 717 | break; |
718 | case X86_VENDOR_INTEL: | 718 | case X86_VENDOR_INTEL: |
719 | /* | 719 | /* Work around where perfctr1 doesn't have a working enable |
720 | * Work around Core Duo (Yonah) errata AE49 where perfctr1 | 720 | * bit as described in the following errata: |
721 | * doesn't have a working enable bit. | 721 | * AE49 Core Duo and Intel Core Solo 65 nm |
722 | * AN49 Intel Pentium Dual-Core | ||
723 | * AF49 Dual-Core Intel Xeon Processor LV | ||
722 | */ | 724 | */ |
723 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) { | 725 | if ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 14) || |
726 | ((boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 15 && | ||
727 | boot_cpu_data.x86_mask == 4))) { | ||
724 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; | 728 | intel_arch_wd_ops.perfctr = MSR_ARCH_PERFMON_PERFCTR0; |
725 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; | 729 | intel_arch_wd_ops.evntsel = MSR_ARCH_PERFMON_EVENTSEL0; |
726 | } | 730 | } |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 2ac1f0c2beb3..b07af8861244 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -182,6 +182,11 @@ static struct notifier_block __refdata cpuid_class_cpu_notifier = | |||
182 | .notifier_call = cpuid_class_cpu_callback, | 182 | .notifier_call = cpuid_class_cpu_callback, |
183 | }; | 183 | }; |
184 | 184 | ||
185 | static char *cpuid_nodename(struct device *dev) | ||
186 | { | ||
187 | return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt)); | ||
188 | } | ||
189 | |||
185 | static int __init cpuid_init(void) | 190 | static int __init cpuid_init(void) |
186 | { | 191 | { |
187 | int i, err = 0; | 192 | int i, err = 0; |
@@ -198,6 +203,7 @@ static int __init cpuid_init(void) | |||
198 | err = PTR_ERR(cpuid_class); | 203 | err = PTR_ERR(cpuid_class); |
199 | goto out_chrdev; | 204 | goto out_chrdev; |
200 | } | 205 | } |
206 | cpuid_class->nodename = cpuid_nodename; | ||
201 | for_each_online_cpu(i) { | 207 | for_each_online_cpu(i) { |
202 | err = cpuid_device_create(i); | 208 | err = cpuid_device_create(i); |
203 | if (err != 0) | 209 | if (err != 0) |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index ff958248e61d..5e409dc298a4 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/cpu.h> | 27 | #include <asm/cpu.h> |
28 | #include <asm/reboot.h> | 28 | #include <asm/reboot.h> |
29 | #include <asm/virtext.h> | 29 | #include <asm/virtext.h> |
30 | #include <asm/iommu.h> | ||
30 | 31 | ||
31 | 32 | ||
32 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 33 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
@@ -103,5 +104,10 @@ void native_machine_crash_shutdown(struct pt_regs *regs) | |||
103 | #ifdef CONFIG_HPET_TIMER | 104 | #ifdef CONFIG_HPET_TIMER |
104 | hpet_disable(); | 105 | hpet_disable(); |
105 | #endif | 106 | #endif |
107 | |||
108 | #ifdef CONFIG_X86_64 | ||
109 | pci_iommu_shutdown(); | ||
110 | #endif | ||
111 | |||
106 | crash_save_cpu(regs, safe_smp_processor_id()); | 112 | crash_save_cpu(regs, safe_smp_processor_id()); |
107 | } | 113 | } |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 87b67e3a765a..48bfe1386038 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -19,45 +19,61 @@ | |||
19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 | 19 | * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009 |
20 | */ | 20 | */ |
21 | 21 | ||
22 | 22 | #include <linux/kernel.h> | |
23 | #include <asm/ds.h> | ||
24 | |||
25 | #include <linux/errno.h> | ||
26 | #include <linux/string.h> | 23 | #include <linux/string.h> |
27 | #include <linux/slab.h> | 24 | #include <linux/errno.h> |
28 | #include <linux/sched.h> | 25 | #include <linux/sched.h> |
26 | #include <linux/slab.h> | ||
29 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
30 | #include <linux/kernel.h> | 28 | #include <linux/trace_clock.h> |
29 | |||
30 | #include <asm/ds.h> | ||
31 | 31 | ||
32 | #include "ds_selftest.h" | ||
32 | 33 | ||
33 | /* | 34 | /* |
34 | * The configuration for a particular DS hardware implementation. | 35 | * The configuration for a particular DS hardware implementation: |
35 | */ | 36 | */ |
36 | struct ds_configuration { | 37 | struct ds_configuration { |
37 | /* the name of the configuration */ | 38 | /* The name of the configuration: */ |
38 | const char *name; | 39 | const char *name; |
39 | /* the size of one pointer-typed field in the DS structure and | 40 | |
40 | in the BTS and PEBS buffers in bytes; | 41 | /* The size of pointer-typed fields in DS, BTS, and PEBS: */ |
41 | this covers the first 8 DS fields related to buffer management. */ | 42 | unsigned char sizeof_ptr_field; |
42 | unsigned char sizeof_field; | 43 | |
43 | /* the size of a BTS/PEBS record in bytes */ | 44 | /* The size of a BTS/PEBS record in bytes: */ |
44 | unsigned char sizeof_rec[2]; | 45 | unsigned char sizeof_rec[2]; |
45 | /* a series of bit-masks to control various features indexed | 46 | |
46 | * by enum ds_feature */ | 47 | /* The number of pebs counter reset values in the DS structure. */ |
47 | unsigned long ctl[dsf_ctl_max]; | 48 | unsigned char nr_counter_reset; |
49 | |||
50 | /* Control bit-masks indexed by enum ds_feature: */ | ||
51 | unsigned long ctl[dsf_ctl_max]; | ||
48 | }; | 52 | }; |
49 | static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); | 53 | static struct ds_configuration ds_cfg __read_mostly; |
54 | |||
55 | |||
56 | /* Maximal size of a DS configuration: */ | ||
57 | #define MAX_SIZEOF_DS 0x80 | ||
50 | 58 | ||
51 | #define ds_cfg per_cpu(ds_cfg_array, smp_processor_id()) | 59 | /* Maximal size of a BTS record: */ |
60 | #define MAX_SIZEOF_BTS (3 * 8) | ||
52 | 61 | ||
53 | #define MAX_SIZEOF_DS (12 * 8) /* maximal size of a DS configuration */ | 62 | /* BTS and PEBS buffer alignment: */ |
54 | #define MAX_SIZEOF_BTS (3 * 8) /* maximal size of a BTS record */ | 63 | #define DS_ALIGNMENT (1 << 3) |
55 | #define DS_ALIGNMENT (1 << 3) /* BTS and PEBS buffer alignment */ | ||
56 | 64 | ||
57 | #define BTS_CONTROL \ | 65 | /* Number of buffer pointers in DS: */ |
58 | (ds_cfg.ctl[dsf_bts] | ds_cfg.ctl[dsf_bts_kernel] | ds_cfg.ctl[dsf_bts_user] |\ | 66 | #define NUM_DS_PTR_FIELDS 8 |
59 | ds_cfg.ctl[dsf_bts_overflow]) | ||
60 | 67 | ||
68 | /* Size of a pebs reset value in DS: */ | ||
69 | #define PEBS_RESET_FIELD_SIZE 8 | ||
70 | |||
71 | /* Mask of control bits in the DS MSR register: */ | ||
72 | #define BTS_CONTROL \ | ||
73 | ( ds_cfg.ctl[dsf_bts] | \ | ||
74 | ds_cfg.ctl[dsf_bts_kernel] | \ | ||
75 | ds_cfg.ctl[dsf_bts_user] | \ | ||
76 | ds_cfg.ctl[dsf_bts_overflow] ) | ||
61 | 77 | ||
62 | /* | 78 | /* |
63 | * A BTS or PEBS tracer. | 79 | * A BTS or PEBS tracer. |
@@ -66,29 +82,36 @@ static DEFINE_PER_CPU(struct ds_configuration, ds_cfg_array); | |||
66 | * to identify tracers. | 82 | * to identify tracers. |
67 | */ | 83 | */ |
68 | struct ds_tracer { | 84 | struct ds_tracer { |
69 | /* the DS context (partially) owned by this tracer */ | 85 | /* The DS context (partially) owned by this tracer. */ |
70 | struct ds_context *context; | 86 | struct ds_context *context; |
71 | /* the buffer provided on ds_request() and its size in bytes */ | 87 | /* The buffer provided on ds_request() and its size in bytes. */ |
72 | void *buffer; | 88 | void *buffer; |
73 | size_t size; | 89 | size_t size; |
74 | }; | 90 | }; |
75 | 91 | ||
76 | struct bts_tracer { | 92 | struct bts_tracer { |
77 | /* the common DS part */ | 93 | /* The common DS part: */ |
78 | struct ds_tracer ds; | 94 | struct ds_tracer ds; |
79 | /* the trace including the DS configuration */ | 95 | |
80 | struct bts_trace trace; | 96 | /* The trace including the DS configuration: */ |
81 | /* buffer overflow notification function */ | 97 | struct bts_trace trace; |
82 | bts_ovfl_callback_t ovfl; | 98 | |
99 | /* Buffer overflow notification function: */ | ||
100 | bts_ovfl_callback_t ovfl; | ||
101 | |||
102 | /* Active flags affecting trace collection. */ | ||
103 | unsigned int flags; | ||
83 | }; | 104 | }; |
84 | 105 | ||
85 | struct pebs_tracer { | 106 | struct pebs_tracer { |
86 | /* the common DS part */ | 107 | /* The common DS part: */ |
87 | struct ds_tracer ds; | 108 | struct ds_tracer ds; |
88 | /* the trace including the DS configuration */ | 109 | |
89 | struct pebs_trace trace; | 110 | /* The trace including the DS configuration: */ |
90 | /* buffer overflow notification function */ | 111 | struct pebs_trace trace; |
91 | pebs_ovfl_callback_t ovfl; | 112 | |
113 | /* Buffer overflow notification function: */ | ||
114 | pebs_ovfl_callback_t ovfl; | ||
92 | }; | 115 | }; |
93 | 116 | ||
94 | /* | 117 | /* |
@@ -97,6 +120,7 @@ struct pebs_tracer { | |||
97 | * | 120 | * |
98 | * The DS configuration consists of the following fields; different | 121 | * The DS configuration consists of the following fields; different |
99 | * architetures vary in the size of those fields. | 122 | * architetures vary in the size of those fields. |
123 | * | ||
100 | * - double-word aligned base linear address of the BTS buffer | 124 | * - double-word aligned base linear address of the BTS buffer |
101 | * - write pointer into the BTS buffer | 125 | * - write pointer into the BTS buffer |
102 | * - end linear address of the BTS buffer (one byte beyond the end of | 126 | * - end linear address of the BTS buffer (one byte beyond the end of |
@@ -135,21 +159,22 @@ enum ds_field { | |||
135 | }; | 159 | }; |
136 | 160 | ||
137 | enum ds_qualifier { | 161 | enum ds_qualifier { |
138 | ds_bts = 0, | 162 | ds_bts = 0, |
139 | ds_pebs | 163 | ds_pebs |
140 | }; | 164 | }; |
141 | 165 | ||
142 | static inline unsigned long ds_get(const unsigned char *base, | 166 | static inline unsigned long |
143 | enum ds_qualifier qual, enum ds_field field) | 167 | ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) |
144 | { | 168 | { |
145 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | 169 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); |
146 | return *(unsigned long *)base; | 170 | return *(unsigned long *)base; |
147 | } | 171 | } |
148 | 172 | ||
149 | static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | 173 | static inline void |
150 | enum ds_field field, unsigned long value) | 174 | ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, |
175 | unsigned long value) | ||
151 | { | 176 | { |
152 | base += (ds_cfg.sizeof_field * (field + (4 * qual))); | 177 | base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); |
153 | (*(unsigned long *)base) = value; | 178 | (*(unsigned long *)base) = value; |
154 | } | 179 | } |
155 | 180 | ||
@@ -159,7 +184,6 @@ static inline void ds_set(unsigned char *base, enum ds_qualifier qual, | |||
159 | */ | 184 | */ |
160 | static DEFINE_SPINLOCK(ds_lock); | 185 | static DEFINE_SPINLOCK(ds_lock); |
161 | 186 | ||
162 | |||
163 | /* | 187 | /* |
164 | * We either support (system-wide) per-cpu or per-thread allocation. | 188 | * We either support (system-wide) per-cpu or per-thread allocation. |
165 | * We distinguish the two based on the task_struct pointer, where a | 189 | * We distinguish the two based on the task_struct pointer, where a |
@@ -178,12 +202,28 @@ static DEFINE_SPINLOCK(ds_lock); | |||
178 | */ | 202 | */ |
179 | static atomic_t tracers = ATOMIC_INIT(0); | 203 | static atomic_t tracers = ATOMIC_INIT(0); |
180 | 204 | ||
181 | static inline void get_tracer(struct task_struct *task) | 205 | static inline int get_tracer(struct task_struct *task) |
182 | { | 206 | { |
183 | if (task) | 207 | int error; |
208 | |||
209 | spin_lock_irq(&ds_lock); | ||
210 | |||
211 | if (task) { | ||
212 | error = -EPERM; | ||
213 | if (atomic_read(&tracers) < 0) | ||
214 | goto out; | ||
184 | atomic_inc(&tracers); | 215 | atomic_inc(&tracers); |
185 | else | 216 | } else { |
217 | error = -EPERM; | ||
218 | if (atomic_read(&tracers) > 0) | ||
219 | goto out; | ||
186 | atomic_dec(&tracers); | 220 | atomic_dec(&tracers); |
221 | } | ||
222 | |||
223 | error = 0; | ||
224 | out: | ||
225 | spin_unlock_irq(&ds_lock); | ||
226 | return error; | ||
187 | } | 227 | } |
188 | 228 | ||
189 | static inline void put_tracer(struct task_struct *task) | 229 | static inline void put_tracer(struct task_struct *task) |
@@ -194,14 +234,6 @@ static inline void put_tracer(struct task_struct *task) | |||
194 | atomic_inc(&tracers); | 234 | atomic_inc(&tracers); |
195 | } | 235 | } |
196 | 236 | ||
197 | static inline int check_tracer(struct task_struct *task) | ||
198 | { | ||
199 | return task ? | ||
200 | (atomic_read(&tracers) >= 0) : | ||
201 | (atomic_read(&tracers) <= 0); | ||
202 | } | ||
203 | |||
204 | |||
205 | /* | 237 | /* |
206 | * The DS context is either attached to a thread or to a cpu: | 238 | * The DS context is either attached to a thread or to a cpu: |
207 | * - in the former case, the thread_struct contains a pointer to the | 239 | * - in the former case, the thread_struct contains a pointer to the |
@@ -213,61 +245,58 @@ static inline int check_tracer(struct task_struct *task) | |||
213 | * deallocated when the last user puts the context. | 245 | * deallocated when the last user puts the context. |
214 | */ | 246 | */ |
215 | struct ds_context { | 247 | struct ds_context { |
216 | /* pointer to the DS configuration; goes into MSR_IA32_DS_AREA */ | 248 | /* The DS configuration; goes into MSR_IA32_DS_AREA: */ |
217 | unsigned char ds[MAX_SIZEOF_DS]; | 249 | unsigned char ds[MAX_SIZEOF_DS]; |
218 | /* the owner of the BTS and PEBS configuration, respectively */ | 250 | |
219 | struct bts_tracer *bts_master; | 251 | /* The owner of the BTS and PEBS configuration, respectively: */ |
220 | struct pebs_tracer *pebs_master; | 252 | struct bts_tracer *bts_master; |
221 | /* use count */ | 253 | struct pebs_tracer *pebs_master; |
222 | unsigned long count; | ||
223 | /* a pointer to the context location inside the thread_struct | ||
224 | * or the per_cpu context array */ | ||
225 | struct ds_context **this; | ||
226 | /* a pointer to the task owning this context, or NULL, if the | ||
227 | * context is owned by a cpu */ | ||
228 | struct task_struct *task; | ||
229 | }; | ||
230 | 254 | ||
231 | static DEFINE_PER_CPU(struct ds_context *, system_context_array); | 255 | /* Use count: */ |
256 | unsigned long count; | ||
232 | 257 | ||
233 | #define system_context per_cpu(system_context_array, smp_processor_id()) | 258 | /* Pointer to the context pointer field: */ |
259 | struct ds_context **this; | ||
260 | |||
261 | /* The traced task; NULL for cpu tracing: */ | ||
262 | struct task_struct *task; | ||
263 | |||
264 | /* The traced cpu; only valid if task is NULL: */ | ||
265 | int cpu; | ||
266 | }; | ||
234 | 267 | ||
268 | static DEFINE_PER_CPU(struct ds_context *, cpu_context); | ||
235 | 269 | ||
236 | static inline struct ds_context *ds_get_context(struct task_struct *task) | 270 | |
271 | static struct ds_context *ds_get_context(struct task_struct *task, int cpu) | ||
237 | { | 272 | { |
238 | struct ds_context **p_context = | 273 | struct ds_context **p_context = |
239 | (task ? &task->thread.ds_ctx : &system_context); | 274 | (task ? &task->thread.ds_ctx : &per_cpu(cpu_context, cpu)); |
240 | struct ds_context *context = NULL; | 275 | struct ds_context *context = NULL; |
241 | struct ds_context *new_context = NULL; | 276 | struct ds_context *new_context = NULL; |
242 | unsigned long irq; | ||
243 | 277 | ||
244 | /* Chances are small that we already have a context. */ | 278 | /* Chances are small that we already have a context. */ |
245 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); | 279 | new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); |
246 | if (!new_context) | 280 | if (!new_context) |
247 | return NULL; | 281 | return NULL; |
248 | 282 | ||
249 | spin_lock_irqsave(&ds_lock, irq); | 283 | spin_lock_irq(&ds_lock); |
250 | 284 | ||
251 | context = *p_context; | 285 | context = *p_context; |
252 | if (!context) { | 286 | if (likely(!context)) { |
253 | context = new_context; | 287 | context = new_context; |
254 | 288 | ||
255 | context->this = p_context; | 289 | context->this = p_context; |
256 | context->task = task; | 290 | context->task = task; |
291 | context->cpu = cpu; | ||
257 | context->count = 0; | 292 | context->count = 0; |
258 | 293 | ||
259 | if (task) | ||
260 | set_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
261 | |||
262 | if (!task || (task == current)) | ||
263 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)context->ds); | ||
264 | |||
265 | *p_context = context; | 294 | *p_context = context; |
266 | } | 295 | } |
267 | 296 | ||
268 | context->count++; | 297 | context->count++; |
269 | 298 | ||
270 | spin_unlock_irqrestore(&ds_lock, irq); | 299 | spin_unlock_irq(&ds_lock); |
271 | 300 | ||
272 | if (context != new_context) | 301 | if (context != new_context) |
273 | kfree(new_context); | 302 | kfree(new_context); |
@@ -275,8 +304,9 @@ static inline struct ds_context *ds_get_context(struct task_struct *task) | |||
275 | return context; | 304 | return context; |
276 | } | 305 | } |
277 | 306 | ||
278 | static inline void ds_put_context(struct ds_context *context) | 307 | static void ds_put_context(struct ds_context *context) |
279 | { | 308 | { |
309 | struct task_struct *task; | ||
280 | unsigned long irq; | 310 | unsigned long irq; |
281 | 311 | ||
282 | if (!context) | 312 | if (!context) |
@@ -291,17 +321,55 @@ static inline void ds_put_context(struct ds_context *context) | |||
291 | 321 | ||
292 | *(context->this) = NULL; | 322 | *(context->this) = NULL; |
293 | 323 | ||
294 | if (context->task) | 324 | task = context->task; |
295 | clear_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | 325 | |
326 | if (task) | ||
327 | clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); | ||
296 | 328 | ||
297 | if (!context->task || (context->task == current)) | 329 | /* |
298 | wrmsrl(MSR_IA32_DS_AREA, 0); | 330 | * We leave the (now dangling) pointer to the DS configuration in |
331 | * the DS_AREA msr. This is as good or as bad as replacing it with | ||
332 | * NULL - the hardware would crash if we enabled tracing. | ||
333 | * | ||
334 | * This saves us some problems with having to write an msr on a | ||
335 | * different cpu while preventing others from doing the same for the | ||
336 | * next context for that same cpu. | ||
337 | */ | ||
299 | 338 | ||
300 | spin_unlock_irqrestore(&ds_lock, irq); | 339 | spin_unlock_irqrestore(&ds_lock, irq); |
301 | 340 | ||
341 | /* The context might still be in use for context switching. */ | ||
342 | if (task && (task != current)) | ||
343 | wait_task_context_switch(task); | ||
344 | |||
302 | kfree(context); | 345 | kfree(context); |
303 | } | 346 | } |
304 | 347 | ||
348 | static void ds_install_ds_area(struct ds_context *context) | ||
349 | { | ||
350 | unsigned long ds; | ||
351 | |||
352 | ds = (unsigned long)context->ds; | ||
353 | |||
354 | /* | ||
355 | * There is a race between the bts master and the pebs master. | ||
356 | * | ||
357 | * The thread/cpu access is synchronized via get/put_cpu() for | ||
358 | * task tracing and via wrmsr_on_cpu for cpu tracing. | ||
359 | * | ||
360 | * If bts and pebs are collected for the same task or same cpu, | ||
361 | * the same confiuration is written twice. | ||
362 | */ | ||
363 | if (context->task) { | ||
364 | get_cpu(); | ||
365 | if (context->task == current) | ||
366 | wrmsrl(MSR_IA32_DS_AREA, ds); | ||
367 | set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); | ||
368 | put_cpu(); | ||
369 | } else | ||
370 | wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, | ||
371 | (u32)((u64)ds), (u32)((u64)ds >> 32)); | ||
372 | } | ||
305 | 373 | ||
306 | /* | 374 | /* |
307 | * Call the tracer's callback on a buffer overflow. | 375 | * Call the tracer's callback on a buffer overflow. |
@@ -332,9 +400,9 @@ static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) | |||
332 | * The remainder of any partially written record is zeroed out. | 400 | * The remainder of any partially written record is zeroed out. |
333 | * | 401 | * |
334 | * context: the DS context | 402 | * context: the DS context |
335 | * qual: the buffer type | 403 | * qual: the buffer type |
336 | * record: the data to write | 404 | * record: the data to write |
337 | * size: the size of the data | 405 | * size: the size of the data |
338 | */ | 406 | */ |
339 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, | 407 | static int ds_write(struct ds_context *context, enum ds_qualifier qual, |
340 | const void *record, size_t size) | 408 | const void *record, size_t size) |
@@ -349,14 +417,14 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
349 | unsigned long write_size, adj_write_size; | 417 | unsigned long write_size, adj_write_size; |
350 | 418 | ||
351 | /* | 419 | /* |
352 | * write as much as possible without producing an | 420 | * Write as much as possible without producing an |
353 | * overflow interrupt. | 421 | * overflow interrupt. |
354 | * | 422 | * |
355 | * interrupt_threshold must either be | 423 | * Interrupt_threshold must either be |
356 | * - bigger than absolute_maximum or | 424 | * - bigger than absolute_maximum or |
357 | * - point to a record between buffer_base and absolute_maximum | 425 | * - point to a record between buffer_base and absolute_maximum |
358 | * | 426 | * |
359 | * index points to a valid record. | 427 | * Index points to a valid record. |
360 | */ | 428 | */ |
361 | base = ds_get(context->ds, qual, ds_buffer_base); | 429 | base = ds_get(context->ds, qual, ds_buffer_base); |
362 | index = ds_get(context->ds, qual, ds_index); | 430 | index = ds_get(context->ds, qual, ds_index); |
@@ -365,8 +433,10 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
365 | 433 | ||
366 | write_end = min(end, int_th); | 434 | write_end = min(end, int_th); |
367 | 435 | ||
368 | /* if we are already beyond the interrupt threshold, | 436 | /* |
369 | * we fill the entire buffer */ | 437 | * If we are already beyond the interrupt threshold, |
438 | * we fill the entire buffer. | ||
439 | */ | ||
370 | if (write_end <= index) | 440 | if (write_end <= index) |
371 | write_end = end; | 441 | write_end = end; |
372 | 442 | ||
@@ -383,7 +453,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
383 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; | 453 | adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; |
384 | adj_write_size *= ds_cfg.sizeof_rec[qual]; | 454 | adj_write_size *= ds_cfg.sizeof_rec[qual]; |
385 | 455 | ||
386 | /* zero out trailing bytes */ | 456 | /* Zero out trailing bytes. */ |
387 | memset((char *)index + write_size, 0, | 457 | memset((char *)index + write_size, 0, |
388 | adj_write_size - write_size); | 458 | adj_write_size - write_size); |
389 | index += adj_write_size; | 459 | index += adj_write_size; |
@@ -410,7 +480,7 @@ static int ds_write(struct ds_context *context, enum ds_qualifier qual, | |||
410 | * Later architectures use 64bit pointers throughout, whereas earlier | 480 | * Later architectures use 64bit pointers throughout, whereas earlier |
411 | * architectures use 32bit pointers in 32bit mode. | 481 | * architectures use 32bit pointers in 32bit mode. |
412 | * | 482 | * |
413 | * We compute the base address for the first 8 fields based on: | 483 | * We compute the base address for the fields based on: |
414 | * - the field size stored in the DS configuration | 484 | * - the field size stored in the DS configuration |
415 | * - the relative field position | 485 | * - the relative field position |
416 | * | 486 | * |
@@ -431,23 +501,23 @@ enum bts_field { | |||
431 | bts_to, | 501 | bts_to, |
432 | bts_flags, | 502 | bts_flags, |
433 | 503 | ||
434 | bts_qual = bts_from, | 504 | bts_qual = bts_from, |
435 | bts_jiffies = bts_to, | 505 | bts_clock = bts_to, |
436 | bts_pid = bts_flags, | 506 | bts_pid = bts_flags, |
437 | 507 | ||
438 | bts_qual_mask = (bts_qual_max - 1), | 508 | bts_qual_mask = (bts_qual_max - 1), |
439 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | 509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) |
440 | }; | 510 | }; |
441 | 511 | ||
442 | static inline unsigned long bts_get(const char *base, enum bts_field field) | 512 | static inline unsigned long bts_get(const char *base, enum bts_field field) |
443 | { | 513 | { |
444 | base += (ds_cfg.sizeof_field * field); | 514 | base += (ds_cfg.sizeof_ptr_field * field); |
445 | return *(unsigned long *)base; | 515 | return *(unsigned long *)base; |
446 | } | 516 | } |
447 | 517 | ||
448 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | 518 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) |
449 | { | 519 | { |
450 | base += (ds_cfg.sizeof_field * field);; | 520 | base += (ds_cfg.sizeof_ptr_field * field);; |
451 | (*(unsigned long *)base) = val; | 521 | (*(unsigned long *)base) = val; |
452 | } | 522 | } |
453 | 523 | ||
@@ -463,8 +533,8 @@ static inline void bts_set(char *base, enum bts_field field, unsigned long val) | |||
463 | * | 533 | * |
464 | * return: bytes read/written on success; -Eerrno, otherwise | 534 | * return: bytes read/written on success; -Eerrno, otherwise |
465 | */ | 535 | */ |
466 | static int bts_read(struct bts_tracer *tracer, const void *at, | 536 | static int |
467 | struct bts_struct *out) | 537 | bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) |
468 | { | 538 | { |
469 | if (!tracer) | 539 | if (!tracer) |
470 | return -EINVAL; | 540 | return -EINVAL; |
@@ -478,8 +548,8 @@ static int bts_read(struct bts_tracer *tracer, const void *at, | |||
478 | memset(out, 0, sizeof(*out)); | 548 | memset(out, 0, sizeof(*out)); |
479 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { | 549 | if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { |
480 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); | 550 | out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); |
481 | out->variant.timestamp.jiffies = bts_get(at, bts_jiffies); | 551 | out->variant.event.clock = bts_get(at, bts_clock); |
482 | out->variant.timestamp.pid = bts_get(at, bts_pid); | 552 | out->variant.event.pid = bts_get(at, bts_pid); |
483 | } else { | 553 | } else { |
484 | out->qualifier = bts_branch; | 554 | out->qualifier = bts_branch; |
485 | out->variant.lbr.from = bts_get(at, bts_from); | 555 | out->variant.lbr.from = bts_get(at, bts_from); |
@@ -516,8 +586,8 @@ static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) | |||
516 | case bts_task_arrives: | 586 | case bts_task_arrives: |
517 | case bts_task_departs: | 587 | case bts_task_departs: |
518 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); | 588 | bts_set(raw, bts_qual, (bts_escape | in->qualifier)); |
519 | bts_set(raw, bts_jiffies, in->variant.timestamp.jiffies); | 589 | bts_set(raw, bts_clock, in->variant.event.clock); |
520 | bts_set(raw, bts_pid, in->variant.timestamp.pid); | 590 | bts_set(raw, bts_pid, in->variant.event.pid); |
521 | break; | 591 | break; |
522 | default: | 592 | default: |
523 | return -EINVAL; | 593 | return -EINVAL; |
@@ -555,7 +625,8 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
555 | unsigned int flags) { | 625 | unsigned int flags) { |
556 | unsigned long buffer, adj; | 626 | unsigned long buffer, adj; |
557 | 627 | ||
558 | /* adjust the buffer address and size to meet alignment | 628 | /* |
629 | * Adjust the buffer address and size to meet alignment | ||
559 | * constraints: | 630 | * constraints: |
560 | * - buffer is double-word aligned | 631 | * - buffer is double-word aligned |
561 | * - size is multiple of record size | 632 | * - size is multiple of record size |
@@ -577,9 +648,11 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
577 | trace->begin = (void *)buffer; | 648 | trace->begin = (void *)buffer; |
578 | trace->top = trace->begin; | 649 | trace->top = trace->begin; |
579 | trace->end = (void *)(buffer + size); | 650 | trace->end = (void *)(buffer + size); |
580 | /* The value for 'no threshold' is -1, which will set the | 651 | /* |
652 | * The value for 'no threshold' is -1, which will set the | ||
581 | * threshold outside of the buffer, just like we want it. | 653 | * threshold outside of the buffer, just like we want it. |
582 | */ | 654 | */ |
655 | ith *= ds_cfg.sizeof_rec[qual]; | ||
583 | trace->ith = (void *)(buffer + size - ith); | 656 | trace->ith = (void *)(buffer + size - ith); |
584 | 657 | ||
585 | trace->flags = flags; | 658 | trace->flags = flags; |
@@ -588,18 +661,27 @@ static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, | |||
588 | 661 | ||
589 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | 662 | static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, |
590 | enum ds_qualifier qual, struct task_struct *task, | 663 | enum ds_qualifier qual, struct task_struct *task, |
591 | void *base, size_t size, size_t th, unsigned int flags) | 664 | int cpu, void *base, size_t size, size_t th) |
592 | { | 665 | { |
593 | struct ds_context *context; | 666 | struct ds_context *context; |
594 | int error; | 667 | int error; |
668 | size_t req_size; | ||
669 | |||
670 | error = -EOPNOTSUPP; | ||
671 | if (!ds_cfg.sizeof_rec[qual]) | ||
672 | goto out; | ||
595 | 673 | ||
596 | error = -EINVAL; | 674 | error = -EINVAL; |
597 | if (!base) | 675 | if (!base) |
598 | goto out; | 676 | goto out; |
599 | 677 | ||
600 | /* we require some space to do alignment adjustments below */ | 678 | req_size = ds_cfg.sizeof_rec[qual]; |
679 | /* We might need space for alignment adjustments. */ | ||
680 | if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) | ||
681 | req_size += DS_ALIGNMENT; | ||
682 | |||
601 | error = -EINVAL; | 683 | error = -EINVAL; |
602 | if (size < (DS_ALIGNMENT + ds_cfg.sizeof_rec[qual])) | 684 | if (size < req_size) |
603 | goto out; | 685 | goto out; |
604 | 686 | ||
605 | if (th != (size_t)-1) { | 687 | if (th != (size_t)-1) { |
@@ -614,182 +696,318 @@ static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, | |||
614 | tracer->size = size; | 696 | tracer->size = size; |
615 | 697 | ||
616 | error = -ENOMEM; | 698 | error = -ENOMEM; |
617 | context = ds_get_context(task); | 699 | context = ds_get_context(task, cpu); |
618 | if (!context) | 700 | if (!context) |
619 | goto out; | 701 | goto out; |
620 | tracer->context = context; | 702 | tracer->context = context; |
621 | 703 | ||
622 | ds_init_ds_trace(trace, qual, base, size, th, flags); | 704 | /* |
705 | * Defer any tracer-specific initialization work for the context until | ||
706 | * context ownership has been clarified. | ||
707 | */ | ||
623 | 708 | ||
624 | error = 0; | 709 | error = 0; |
625 | out: | 710 | out: |
626 | return error; | 711 | return error; |
627 | } | 712 | } |
628 | 713 | ||
629 | struct bts_tracer *ds_request_bts(struct task_struct *task, | 714 | static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, |
630 | void *base, size_t size, | 715 | void *base, size_t size, |
631 | bts_ovfl_callback_t ovfl, size_t th, | 716 | bts_ovfl_callback_t ovfl, size_t th, |
632 | unsigned int flags) | 717 | unsigned int flags) |
633 | { | 718 | { |
634 | struct bts_tracer *tracer; | 719 | struct bts_tracer *tracer; |
635 | unsigned long irq; | ||
636 | int error; | 720 | int error; |
637 | 721 | ||
722 | /* Buffer overflow notification is not yet implemented. */ | ||
638 | error = -EOPNOTSUPP; | 723 | error = -EOPNOTSUPP; |
639 | if (!ds_cfg.ctl[dsf_bts]) | 724 | if (ovfl) |
640 | goto out; | 725 | goto out; |
641 | 726 | ||
642 | /* buffer overflow notification is not yet implemented */ | 727 | error = get_tracer(task); |
643 | error = -EOPNOTSUPP; | 728 | if (error < 0) |
644 | if (ovfl) | ||
645 | goto out; | 729 | goto out; |
646 | 730 | ||
647 | error = -ENOMEM; | 731 | error = -ENOMEM; |
648 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | 732 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
649 | if (!tracer) | 733 | if (!tracer) |
650 | goto out; | 734 | goto out_put_tracer; |
651 | tracer->ovfl = ovfl; | 735 | tracer->ovfl = ovfl; |
652 | 736 | ||
737 | /* Do some more error checking and acquire a tracing context. */ | ||
653 | error = ds_request(&tracer->ds, &tracer->trace.ds, | 738 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
654 | ds_bts, task, base, size, th, flags); | 739 | ds_bts, task, cpu, base, size, th); |
655 | if (error < 0) | 740 | if (error < 0) |
656 | goto out_tracer; | 741 | goto out_tracer; |
657 | 742 | ||
658 | 743 | /* Claim the bts part of the tracing context we acquired above. */ | |
659 | spin_lock_irqsave(&ds_lock, irq); | 744 | spin_lock_irq(&ds_lock); |
660 | |||
661 | error = -EPERM; | ||
662 | if (!check_tracer(task)) | ||
663 | goto out_unlock; | ||
664 | get_tracer(task); | ||
665 | 745 | ||
666 | error = -EPERM; | 746 | error = -EPERM; |
667 | if (tracer->ds.context->bts_master) | 747 | if (tracer->ds.context->bts_master) |
668 | goto out_put_tracer; | 748 | goto out_unlock; |
669 | tracer->ds.context->bts_master = tracer; | 749 | tracer->ds.context->bts_master = tracer; |
670 | 750 | ||
671 | spin_unlock_irqrestore(&ds_lock, irq); | 751 | spin_unlock_irq(&ds_lock); |
672 | 752 | ||
753 | /* | ||
754 | * Now that we own the bts part of the context, let's complete the | ||
755 | * initialization for that part. | ||
756 | */ | ||
757 | ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); | ||
758 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | ||
759 | ds_install_ds_area(tracer->ds.context); | ||
673 | 760 | ||
674 | tracer->trace.read = bts_read; | 761 | tracer->trace.read = bts_read; |
675 | tracer->trace.write = bts_write; | 762 | tracer->trace.write = bts_write; |
676 | 763 | ||
677 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); | 764 | /* Start tracing. */ |
678 | ds_resume_bts(tracer); | 765 | ds_resume_bts(tracer); |
679 | 766 | ||
680 | return tracer; | 767 | return tracer; |
681 | 768 | ||
682 | out_put_tracer: | ||
683 | put_tracer(task); | ||
684 | out_unlock: | 769 | out_unlock: |
685 | spin_unlock_irqrestore(&ds_lock, irq); | 770 | spin_unlock_irq(&ds_lock); |
686 | ds_put_context(tracer->ds.context); | 771 | ds_put_context(tracer->ds.context); |
687 | out_tracer: | 772 | out_tracer: |
688 | kfree(tracer); | 773 | kfree(tracer); |
774 | out_put_tracer: | ||
775 | put_tracer(task); | ||
689 | out: | 776 | out: |
690 | return ERR_PTR(error); | 777 | return ERR_PTR(error); |
691 | } | 778 | } |
692 | 779 | ||
693 | struct pebs_tracer *ds_request_pebs(struct task_struct *task, | 780 | struct bts_tracer *ds_request_bts_task(struct task_struct *task, |
694 | void *base, size_t size, | 781 | void *base, size_t size, |
695 | pebs_ovfl_callback_t ovfl, size_t th, | 782 | bts_ovfl_callback_t ovfl, |
696 | unsigned int flags) | 783 | size_t th, unsigned int flags) |
784 | { | ||
785 | return ds_request_bts(task, 0, base, size, ovfl, th, flags); | ||
786 | } | ||
787 | |||
788 | struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, | ||
789 | bts_ovfl_callback_t ovfl, | ||
790 | size_t th, unsigned int flags) | ||
791 | { | ||
792 | return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); | ||
793 | } | ||
794 | |||
795 | static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, | ||
796 | void *base, size_t size, | ||
797 | pebs_ovfl_callback_t ovfl, size_t th, | ||
798 | unsigned int flags) | ||
697 | { | 799 | { |
698 | struct pebs_tracer *tracer; | 800 | struct pebs_tracer *tracer; |
699 | unsigned long irq; | ||
700 | int error; | 801 | int error; |
701 | 802 | ||
702 | /* buffer overflow notification is not yet implemented */ | 803 | /* Buffer overflow notification is not yet implemented. */ |
703 | error = -EOPNOTSUPP; | 804 | error = -EOPNOTSUPP; |
704 | if (ovfl) | 805 | if (ovfl) |
705 | goto out; | 806 | goto out; |
706 | 807 | ||
808 | error = get_tracer(task); | ||
809 | if (error < 0) | ||
810 | goto out; | ||
811 | |||
707 | error = -ENOMEM; | 812 | error = -ENOMEM; |
708 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); | 813 | tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); |
709 | if (!tracer) | 814 | if (!tracer) |
710 | goto out; | 815 | goto out_put_tracer; |
711 | tracer->ovfl = ovfl; | 816 | tracer->ovfl = ovfl; |
712 | 817 | ||
818 | /* Do some more error checking and acquire a tracing context. */ | ||
713 | error = ds_request(&tracer->ds, &tracer->trace.ds, | 819 | error = ds_request(&tracer->ds, &tracer->trace.ds, |
714 | ds_pebs, task, base, size, th, flags); | 820 | ds_pebs, task, cpu, base, size, th); |
715 | if (error < 0) | 821 | if (error < 0) |
716 | goto out_tracer; | 822 | goto out_tracer; |
717 | 823 | ||
718 | spin_lock_irqsave(&ds_lock, irq); | 824 | /* Claim the pebs part of the tracing context we acquired above. */ |
719 | 825 | spin_lock_irq(&ds_lock); | |
720 | error = -EPERM; | ||
721 | if (!check_tracer(task)) | ||
722 | goto out_unlock; | ||
723 | get_tracer(task); | ||
724 | 826 | ||
725 | error = -EPERM; | 827 | error = -EPERM; |
726 | if (tracer->ds.context->pebs_master) | 828 | if (tracer->ds.context->pebs_master) |
727 | goto out_put_tracer; | 829 | goto out_unlock; |
728 | tracer->ds.context->pebs_master = tracer; | 830 | tracer->ds.context->pebs_master = tracer; |
729 | 831 | ||
730 | spin_unlock_irqrestore(&ds_lock, irq); | 832 | spin_unlock_irq(&ds_lock); |
731 | 833 | ||
834 | /* | ||
835 | * Now that we own the pebs part of the context, let's complete the | ||
836 | * initialization for that part. | ||
837 | */ | ||
838 | ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); | ||
732 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | 839 | ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
840 | ds_install_ds_area(tracer->ds.context); | ||
841 | |||
842 | /* Start tracing. */ | ||
733 | ds_resume_pebs(tracer); | 843 | ds_resume_pebs(tracer); |
734 | 844 | ||
735 | return tracer; | 845 | return tracer; |
736 | 846 | ||
737 | out_put_tracer: | ||
738 | put_tracer(task); | ||
739 | out_unlock: | 847 | out_unlock: |
740 | spin_unlock_irqrestore(&ds_lock, irq); | 848 | spin_unlock_irq(&ds_lock); |
741 | ds_put_context(tracer->ds.context); | 849 | ds_put_context(tracer->ds.context); |
742 | out_tracer: | 850 | out_tracer: |
743 | kfree(tracer); | 851 | kfree(tracer); |
852 | out_put_tracer: | ||
853 | put_tracer(task); | ||
744 | out: | 854 | out: |
745 | return ERR_PTR(error); | 855 | return ERR_PTR(error); |
746 | } | 856 | } |
747 | 857 | ||
748 | void ds_release_bts(struct bts_tracer *tracer) | 858 | struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, |
859 | void *base, size_t size, | ||
860 | pebs_ovfl_callback_t ovfl, | ||
861 | size_t th, unsigned int flags) | ||
749 | { | 862 | { |
750 | if (!tracer) | 863 | return ds_request_pebs(task, 0, base, size, ovfl, th, flags); |
751 | return; | 864 | } |
752 | 865 | ||
753 | ds_suspend_bts(tracer); | 866 | struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, |
867 | pebs_ovfl_callback_t ovfl, | ||
868 | size_t th, unsigned int flags) | ||
869 | { | ||
870 | return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); | ||
871 | } | ||
872 | |||
873 | static void ds_free_bts(struct bts_tracer *tracer) | ||
874 | { | ||
875 | struct task_struct *task; | ||
876 | |||
877 | task = tracer->ds.context->task; | ||
754 | 878 | ||
755 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); | 879 | WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); |
756 | tracer->ds.context->bts_master = NULL; | 880 | tracer->ds.context->bts_master = NULL; |
757 | 881 | ||
758 | put_tracer(tracer->ds.context->task); | 882 | /* Make sure tracing stopped and the tracer is not in use. */ |
883 | if (task && (task != current)) | ||
884 | wait_task_context_switch(task); | ||
885 | |||
759 | ds_put_context(tracer->ds.context); | 886 | ds_put_context(tracer->ds.context); |
887 | put_tracer(task); | ||
760 | 888 | ||
761 | kfree(tracer); | 889 | kfree(tracer); |
762 | } | 890 | } |
763 | 891 | ||
892 | void ds_release_bts(struct bts_tracer *tracer) | ||
893 | { | ||
894 | might_sleep(); | ||
895 | |||
896 | if (!tracer) | ||
897 | return; | ||
898 | |||
899 | ds_suspend_bts(tracer); | ||
900 | ds_free_bts(tracer); | ||
901 | } | ||
902 | |||
903 | int ds_release_bts_noirq(struct bts_tracer *tracer) | ||
904 | { | ||
905 | struct task_struct *task; | ||
906 | unsigned long irq; | ||
907 | int error; | ||
908 | |||
909 | if (!tracer) | ||
910 | return 0; | ||
911 | |||
912 | task = tracer->ds.context->task; | ||
913 | |||
914 | local_irq_save(irq); | ||
915 | |||
916 | error = -EPERM; | ||
917 | if (!task && | ||
918 | (tracer->ds.context->cpu != smp_processor_id())) | ||
919 | goto out; | ||
920 | |||
921 | error = -EPERM; | ||
922 | if (task && (task != current)) | ||
923 | goto out; | ||
924 | |||
925 | ds_suspend_bts_noirq(tracer); | ||
926 | ds_free_bts(tracer); | ||
927 | |||
928 | error = 0; | ||
929 | out: | ||
930 | local_irq_restore(irq); | ||
931 | return error; | ||
932 | } | ||
933 | |||
934 | static void update_task_debugctlmsr(struct task_struct *task, | ||
935 | unsigned long debugctlmsr) | ||
936 | { | ||
937 | task->thread.debugctlmsr = debugctlmsr; | ||
938 | |||
939 | get_cpu(); | ||
940 | if (task == current) | ||
941 | update_debugctlmsr(debugctlmsr); | ||
942 | put_cpu(); | ||
943 | } | ||
944 | |||
764 | void ds_suspend_bts(struct bts_tracer *tracer) | 945 | void ds_suspend_bts(struct bts_tracer *tracer) |
765 | { | 946 | { |
766 | struct task_struct *task; | 947 | struct task_struct *task; |
948 | unsigned long debugctlmsr; | ||
949 | int cpu; | ||
767 | 950 | ||
768 | if (!tracer) | 951 | if (!tracer) |
769 | return; | 952 | return; |
770 | 953 | ||
954 | tracer->flags = 0; | ||
955 | |||
771 | task = tracer->ds.context->task; | 956 | task = tracer->ds.context->task; |
957 | cpu = tracer->ds.context->cpu; | ||
772 | 958 | ||
773 | if (!task || (task == current)) | 959 | WARN_ON(!task && irqs_disabled()); |
774 | update_debugctlmsr(get_debugctlmsr() & ~BTS_CONTROL); | ||
775 | 960 | ||
776 | if (task) { | 961 | debugctlmsr = (task ? |
777 | task->thread.debugctlmsr &= ~BTS_CONTROL; | 962 | task->thread.debugctlmsr : |
963 | get_debugctlmsr_on_cpu(cpu)); | ||
964 | debugctlmsr &= ~BTS_CONTROL; | ||
778 | 965 | ||
779 | if (!task->thread.debugctlmsr) | 966 | if (task) |
780 | clear_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | 967 | update_task_debugctlmsr(task, debugctlmsr); |
781 | } | 968 | else |
969 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
782 | } | 970 | } |
783 | 971 | ||
784 | void ds_resume_bts(struct bts_tracer *tracer) | 972 | int ds_suspend_bts_noirq(struct bts_tracer *tracer) |
785 | { | 973 | { |
786 | struct task_struct *task; | 974 | struct task_struct *task; |
787 | unsigned long control; | 975 | unsigned long debugctlmsr, irq; |
976 | int cpu, error = 0; | ||
788 | 977 | ||
789 | if (!tracer) | 978 | if (!tracer) |
790 | return; | 979 | return 0; |
980 | |||
981 | tracer->flags = 0; | ||
791 | 982 | ||
792 | task = tracer->ds.context->task; | 983 | task = tracer->ds.context->task; |
984 | cpu = tracer->ds.context->cpu; | ||
985 | |||
986 | local_irq_save(irq); | ||
987 | |||
988 | error = -EPERM; | ||
989 | if (!task && (cpu != smp_processor_id())) | ||
990 | goto out; | ||
991 | |||
992 | debugctlmsr = (task ? | ||
993 | task->thread.debugctlmsr : | ||
994 | get_debugctlmsr()); | ||
995 | debugctlmsr &= ~BTS_CONTROL; | ||
996 | |||
997 | if (task) | ||
998 | update_task_debugctlmsr(task, debugctlmsr); | ||
999 | else | ||
1000 | update_debugctlmsr(debugctlmsr); | ||
1001 | |||
1002 | error = 0; | ||
1003 | out: | ||
1004 | local_irq_restore(irq); | ||
1005 | return error; | ||
1006 | } | ||
1007 | |||
1008 | static unsigned long ds_bts_control(struct bts_tracer *tracer) | ||
1009 | { | ||
1010 | unsigned long control; | ||
793 | 1011 | ||
794 | control = ds_cfg.ctl[dsf_bts]; | 1012 | control = ds_cfg.ctl[dsf_bts]; |
795 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) | 1013 | if (!(tracer->trace.ds.flags & BTS_KERNEL)) |
@@ -797,41 +1015,149 @@ void ds_resume_bts(struct bts_tracer *tracer) | |||
797 | if (!(tracer->trace.ds.flags & BTS_USER)) | 1015 | if (!(tracer->trace.ds.flags & BTS_USER)) |
798 | control |= ds_cfg.ctl[dsf_bts_user]; | 1016 | control |= ds_cfg.ctl[dsf_bts_user]; |
799 | 1017 | ||
800 | if (task) { | 1018 | return control; |
801 | task->thread.debugctlmsr |= control; | ||
802 | set_tsk_thread_flag(task, TIF_DEBUGCTLMSR); | ||
803 | } | ||
804 | |||
805 | if (!task || (task == current)) | ||
806 | update_debugctlmsr(get_debugctlmsr() | control); | ||
807 | } | 1019 | } |
808 | 1020 | ||
809 | void ds_release_pebs(struct pebs_tracer *tracer) | 1021 | void ds_resume_bts(struct bts_tracer *tracer) |
810 | { | 1022 | { |
1023 | struct task_struct *task; | ||
1024 | unsigned long debugctlmsr; | ||
1025 | int cpu; | ||
1026 | |||
811 | if (!tracer) | 1027 | if (!tracer) |
812 | return; | 1028 | return; |
813 | 1029 | ||
814 | ds_suspend_pebs(tracer); | 1030 | tracer->flags = tracer->trace.ds.flags; |
1031 | |||
1032 | task = tracer->ds.context->task; | ||
1033 | cpu = tracer->ds.context->cpu; | ||
1034 | |||
1035 | WARN_ON(!task && irqs_disabled()); | ||
1036 | |||
1037 | debugctlmsr = (task ? | ||
1038 | task->thread.debugctlmsr : | ||
1039 | get_debugctlmsr_on_cpu(cpu)); | ||
1040 | debugctlmsr |= ds_bts_control(tracer); | ||
1041 | |||
1042 | if (task) | ||
1043 | update_task_debugctlmsr(task, debugctlmsr); | ||
1044 | else | ||
1045 | update_debugctlmsr_on_cpu(cpu, debugctlmsr); | ||
1046 | } | ||
1047 | |||
1048 | int ds_resume_bts_noirq(struct bts_tracer *tracer) | ||
1049 | { | ||
1050 | struct task_struct *task; | ||
1051 | unsigned long debugctlmsr, irq; | ||
1052 | int cpu, error = 0; | ||
1053 | |||
1054 | if (!tracer) | ||
1055 | return 0; | ||
1056 | |||
1057 | tracer->flags = tracer->trace.ds.flags; | ||
1058 | |||
1059 | task = tracer->ds.context->task; | ||
1060 | cpu = tracer->ds.context->cpu; | ||
1061 | |||
1062 | local_irq_save(irq); | ||
1063 | |||
1064 | error = -EPERM; | ||
1065 | if (!task && (cpu != smp_processor_id())) | ||
1066 | goto out; | ||
1067 | |||
1068 | debugctlmsr = (task ? | ||
1069 | task->thread.debugctlmsr : | ||
1070 | get_debugctlmsr()); | ||
1071 | debugctlmsr |= ds_bts_control(tracer); | ||
1072 | |||
1073 | if (task) | ||
1074 | update_task_debugctlmsr(task, debugctlmsr); | ||
1075 | else | ||
1076 | update_debugctlmsr(debugctlmsr); | ||
1077 | |||
1078 | error = 0; | ||
1079 | out: | ||
1080 | local_irq_restore(irq); | ||
1081 | return error; | ||
1082 | } | ||
1083 | |||
1084 | static void ds_free_pebs(struct pebs_tracer *tracer) | ||
1085 | { | ||
1086 | struct task_struct *task; | ||
1087 | |||
1088 | task = tracer->ds.context->task; | ||
815 | 1089 | ||
816 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); | 1090 | WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); |
817 | tracer->ds.context->pebs_master = NULL; | 1091 | tracer->ds.context->pebs_master = NULL; |
818 | 1092 | ||
819 | put_tracer(tracer->ds.context->task); | ||
820 | ds_put_context(tracer->ds.context); | 1093 | ds_put_context(tracer->ds.context); |
1094 | put_tracer(task); | ||
821 | 1095 | ||
822 | kfree(tracer); | 1096 | kfree(tracer); |
823 | } | 1097 | } |
824 | 1098 | ||
1099 | void ds_release_pebs(struct pebs_tracer *tracer) | ||
1100 | { | ||
1101 | might_sleep(); | ||
1102 | |||
1103 | if (!tracer) | ||
1104 | return; | ||
1105 | |||
1106 | ds_suspend_pebs(tracer); | ||
1107 | ds_free_pebs(tracer); | ||
1108 | } | ||
1109 | |||
1110 | int ds_release_pebs_noirq(struct pebs_tracer *tracer) | ||
1111 | { | ||
1112 | struct task_struct *task; | ||
1113 | unsigned long irq; | ||
1114 | int error; | ||
1115 | |||
1116 | if (!tracer) | ||
1117 | return 0; | ||
1118 | |||
1119 | task = tracer->ds.context->task; | ||
1120 | |||
1121 | local_irq_save(irq); | ||
1122 | |||
1123 | error = -EPERM; | ||
1124 | if (!task && | ||
1125 | (tracer->ds.context->cpu != smp_processor_id())) | ||
1126 | goto out; | ||
1127 | |||
1128 | error = -EPERM; | ||
1129 | if (task && (task != current)) | ||
1130 | goto out; | ||
1131 | |||
1132 | ds_suspend_pebs_noirq(tracer); | ||
1133 | ds_free_pebs(tracer); | ||
1134 | |||
1135 | error = 0; | ||
1136 | out: | ||
1137 | local_irq_restore(irq); | ||
1138 | return error; | ||
1139 | } | ||
1140 | |||
825 | void ds_suspend_pebs(struct pebs_tracer *tracer) | 1141 | void ds_suspend_pebs(struct pebs_tracer *tracer) |
826 | { | 1142 | { |
827 | 1143 | ||
828 | } | 1144 | } |
829 | 1145 | ||
1146 | int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) | ||
1147 | { | ||
1148 | return 0; | ||
1149 | } | ||
1150 | |||
830 | void ds_resume_pebs(struct pebs_tracer *tracer) | 1151 | void ds_resume_pebs(struct pebs_tracer *tracer) |
831 | { | 1152 | { |
832 | 1153 | ||
833 | } | 1154 | } |
834 | 1155 | ||
1156 | int ds_resume_pebs_noirq(struct pebs_tracer *tracer) | ||
1157 | { | ||
1158 | return 0; | ||
1159 | } | ||
1160 | |||
835 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) | 1161 | const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) |
836 | { | 1162 | { |
837 | if (!tracer) | 1163 | if (!tracer) |
@@ -847,8 +1173,12 @@ const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) | |||
847 | return NULL; | 1173 | return NULL; |
848 | 1174 | ||
849 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); | 1175 | ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); |
850 | tracer->trace.reset_value = | 1176 | |
851 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)); | 1177 | tracer->trace.counters = ds_cfg.nr_counter_reset; |
1178 | memcpy(tracer->trace.counter_reset, | ||
1179 | tracer->ds.context->ds + | ||
1180 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), | ||
1181 | ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); | ||
852 | 1182 | ||
853 | return &tracer->trace; | 1183 | return &tracer->trace; |
854 | } | 1184 | } |
@@ -873,18 +1203,24 @@ int ds_reset_pebs(struct pebs_tracer *tracer) | |||
873 | 1203 | ||
874 | tracer->trace.ds.top = tracer->trace.ds.begin; | 1204 | tracer->trace.ds.top = tracer->trace.ds.begin; |
875 | 1205 | ||
876 | ds_set(tracer->ds.context->ds, ds_bts, ds_index, | 1206 | ds_set(tracer->ds.context->ds, ds_pebs, ds_index, |
877 | (unsigned long)tracer->trace.ds.top); | 1207 | (unsigned long)tracer->trace.ds.top); |
878 | 1208 | ||
879 | return 0; | 1209 | return 0; |
880 | } | 1210 | } |
881 | 1211 | ||
882 | int ds_set_pebs_reset(struct pebs_tracer *tracer, u64 value) | 1212 | int ds_set_pebs_reset(struct pebs_tracer *tracer, |
1213 | unsigned int counter, u64 value) | ||
883 | { | 1214 | { |
884 | if (!tracer) | 1215 | if (!tracer) |
885 | return -EINVAL; | 1216 | return -EINVAL; |
886 | 1217 | ||
887 | *(u64 *)(tracer->ds.context->ds + (ds_cfg.sizeof_field * 8)) = value; | 1218 | if (ds_cfg.nr_counter_reset < counter) |
1219 | return -EINVAL; | ||
1220 | |||
1221 | *(u64 *)(tracer->ds.context->ds + | ||
1222 | (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + | ||
1223 | (counter * PEBS_RESET_FIELD_SIZE)) = value; | ||
888 | 1224 | ||
889 | return 0; | 1225 | return 0; |
890 | } | 1226 | } |
@@ -894,73 +1230,117 @@ static const struct ds_configuration ds_cfg_netburst = { | |||
894 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), | 1230 | .ctl[dsf_bts] = (1 << 2) | (1 << 3), |
895 | .ctl[dsf_bts_kernel] = (1 << 5), | 1231 | .ctl[dsf_bts_kernel] = (1 << 5), |
896 | .ctl[dsf_bts_user] = (1 << 6), | 1232 | .ctl[dsf_bts_user] = (1 << 6), |
897 | 1233 | .nr_counter_reset = 1, | |
898 | .sizeof_field = sizeof(long), | ||
899 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
900 | #ifdef __i386__ | ||
901 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
902 | #else | ||
903 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
904 | #endif | ||
905 | }; | 1234 | }; |
906 | static const struct ds_configuration ds_cfg_pentium_m = { | 1235 | static const struct ds_configuration ds_cfg_pentium_m = { |
907 | .name = "Pentium M", | 1236 | .name = "Pentium M", |
908 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | 1237 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
909 | 1238 | .nr_counter_reset = 1, | |
910 | .sizeof_field = sizeof(long), | ||
911 | .sizeof_rec[ds_bts] = sizeof(long) * 3, | ||
912 | #ifdef __i386__ | ||
913 | .sizeof_rec[ds_pebs] = sizeof(long) * 10, | ||
914 | #else | ||
915 | .sizeof_rec[ds_pebs] = sizeof(long) * 18, | ||
916 | #endif | ||
917 | }; | 1239 | }; |
918 | static const struct ds_configuration ds_cfg_core2_atom = { | 1240 | static const struct ds_configuration ds_cfg_core2_atom = { |
919 | .name = "Core 2/Atom", | 1241 | .name = "Core 2/Atom", |
920 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | 1242 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), |
921 | .ctl[dsf_bts_kernel] = (1 << 9), | 1243 | .ctl[dsf_bts_kernel] = (1 << 9), |
922 | .ctl[dsf_bts_user] = (1 << 10), | 1244 | .ctl[dsf_bts_user] = (1 << 10), |
923 | 1245 | .nr_counter_reset = 1, | |
924 | .sizeof_field = 8, | 1246 | }; |
925 | .sizeof_rec[ds_bts] = 8 * 3, | 1247 | static const struct ds_configuration ds_cfg_core_i7 = { |
926 | .sizeof_rec[ds_pebs] = 8 * 18, | 1248 | .name = "Core i7", |
1249 | .ctl[dsf_bts] = (1 << 6) | (1 << 7), | ||
1250 | .ctl[dsf_bts_kernel] = (1 << 9), | ||
1251 | .ctl[dsf_bts_user] = (1 << 10), | ||
1252 | .nr_counter_reset = 4, | ||
927 | }; | 1253 | }; |
928 | 1254 | ||
929 | static void | 1255 | static void |
930 | ds_configure(const struct ds_configuration *cfg) | 1256 | ds_configure(const struct ds_configuration *cfg, |
1257 | struct cpuinfo_x86 *cpu) | ||
931 | { | 1258 | { |
1259 | unsigned long nr_pebs_fields = 0; | ||
1260 | |||
1261 | printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); | ||
1262 | |||
1263 | #ifdef __i386__ | ||
1264 | nr_pebs_fields = 10; | ||
1265 | #else | ||
1266 | nr_pebs_fields = 18; | ||
1267 | #endif | ||
1268 | |||
1269 | /* | ||
1270 | * Starting with version 2, architectural performance | ||
1271 | * monitoring supports a format specifier. | ||
1272 | */ | ||
1273 | if ((cpuid_eax(0xa) & 0xff) > 1) { | ||
1274 | unsigned long perf_capabilities, format; | ||
1275 | |||
1276 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); | ||
1277 | |||
1278 | format = (perf_capabilities >> 8) & 0xf; | ||
1279 | |||
1280 | switch (format) { | ||
1281 | case 0: | ||
1282 | nr_pebs_fields = 18; | ||
1283 | break; | ||
1284 | case 1: | ||
1285 | nr_pebs_fields = 22; | ||
1286 | break; | ||
1287 | default: | ||
1288 | printk(KERN_INFO | ||
1289 | "[ds] unknown PEBS format: %lu\n", format); | ||
1290 | nr_pebs_fields = 0; | ||
1291 | break; | ||
1292 | } | ||
1293 | } | ||
1294 | |||
932 | memset(&ds_cfg, 0, sizeof(ds_cfg)); | 1295 | memset(&ds_cfg, 0, sizeof(ds_cfg)); |
933 | ds_cfg = *cfg; | 1296 | ds_cfg = *cfg; |
934 | 1297 | ||
935 | printk(KERN_INFO "[ds] using %s configuration\n", ds_cfg.name); | 1298 | ds_cfg.sizeof_ptr_field = |
1299 | (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); | ||
1300 | |||
1301 | ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; | ||
1302 | ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; | ||
936 | 1303 | ||
937 | if (!cpu_has_bts) { | 1304 | if (!cpu_has(cpu, X86_FEATURE_BTS)) { |
938 | ds_cfg.ctl[dsf_bts] = 0; | 1305 | ds_cfg.sizeof_rec[ds_bts] = 0; |
939 | printk(KERN_INFO "[ds] bts not available\n"); | 1306 | printk(KERN_INFO "[ds] bts not available\n"); |
940 | } | 1307 | } |
941 | if (!cpu_has_pebs) | 1308 | if (!cpu_has(cpu, X86_FEATURE_PEBS)) { |
1309 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
942 | printk(KERN_INFO "[ds] pebs not available\n"); | 1310 | printk(KERN_INFO "[ds] pebs not available\n"); |
1311 | } | ||
1312 | |||
1313 | printk(KERN_INFO "[ds] sizes: address: %u bit, ", | ||
1314 | 8 * ds_cfg.sizeof_ptr_field); | ||
1315 | printk("bts/pebs record: %u/%u bytes\n", | ||
1316 | ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); | ||
943 | 1317 | ||
944 | WARN_ON_ONCE(MAX_SIZEOF_DS < (12 * ds_cfg.sizeof_field)); | 1318 | WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); |
945 | } | 1319 | } |
946 | 1320 | ||
947 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | 1321 | void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) |
948 | { | 1322 | { |
1323 | /* Only configure the first cpu. Others are identical. */ | ||
1324 | if (ds_cfg.name) | ||
1325 | return; | ||
1326 | |||
949 | switch (c->x86) { | 1327 | switch (c->x86) { |
950 | case 0x6: | 1328 | case 0x6: |
951 | switch (c->x86_model) { | 1329 | switch (c->x86_model) { |
952 | case 0x9: | 1330 | case 0x9: |
953 | case 0xd: /* Pentium M */ | 1331 | case 0xd: /* Pentium M */ |
954 | ds_configure(&ds_cfg_pentium_m); | 1332 | ds_configure(&ds_cfg_pentium_m, c); |
955 | break; | 1333 | break; |
956 | case 0xf: | 1334 | case 0xf: |
957 | case 0x17: /* Core2 */ | 1335 | case 0x17: /* Core2 */ |
958 | case 0x1c: /* Atom */ | 1336 | case 0x1c: /* Atom */ |
959 | ds_configure(&ds_cfg_core2_atom); | 1337 | ds_configure(&ds_cfg_core2_atom, c); |
1338 | break; | ||
1339 | case 0x1a: /* Core i7 */ | ||
1340 | ds_configure(&ds_cfg_core_i7, c); | ||
960 | break; | 1341 | break; |
961 | case 0x1a: /* i7 */ | ||
962 | default: | 1342 | default: |
963 | /* sorry, don't know about them */ | 1343 | /* Sorry, don't know about them. */ |
964 | break; | 1344 | break; |
965 | } | 1345 | } |
966 | break; | 1346 | break; |
@@ -969,64 +1349,89 @@ void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) | |||
969 | case 0x0: | 1349 | case 0x0: |
970 | case 0x1: | 1350 | case 0x1: |
971 | case 0x2: /* Netburst */ | 1351 | case 0x2: /* Netburst */ |
972 | ds_configure(&ds_cfg_netburst); | 1352 | ds_configure(&ds_cfg_netburst, c); |
973 | break; | 1353 | break; |
974 | default: | 1354 | default: |
975 | /* sorry, don't know about them */ | 1355 | /* Sorry, don't know about them. */ |
976 | break; | 1356 | break; |
977 | } | 1357 | } |
978 | break; | 1358 | break; |
979 | default: | 1359 | default: |
980 | /* sorry, don't know about them */ | 1360 | /* Sorry, don't know about them. */ |
981 | break; | 1361 | break; |
982 | } | 1362 | } |
983 | } | 1363 | } |
984 | 1364 | ||
1365 | static inline void ds_take_timestamp(struct ds_context *context, | ||
1366 | enum bts_qualifier qualifier, | ||
1367 | struct task_struct *task) | ||
1368 | { | ||
1369 | struct bts_tracer *tracer = context->bts_master; | ||
1370 | struct bts_struct ts; | ||
1371 | |||
1372 | /* Prevent compilers from reading the tracer pointer twice. */ | ||
1373 | barrier(); | ||
1374 | |||
1375 | if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) | ||
1376 | return; | ||
1377 | |||
1378 | memset(&ts, 0, sizeof(ts)); | ||
1379 | ts.qualifier = qualifier; | ||
1380 | ts.variant.event.clock = trace_clock_global(); | ||
1381 | ts.variant.event.pid = task->pid; | ||
1382 | |||
1383 | bts_write(tracer, &ts); | ||
1384 | } | ||
1385 | |||
985 | /* | 1386 | /* |
986 | * Change the DS configuration from tracing prev to tracing next. | 1387 | * Change the DS configuration from tracing prev to tracing next. |
987 | */ | 1388 | */ |
988 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) | 1389 | void ds_switch_to(struct task_struct *prev, struct task_struct *next) |
989 | { | 1390 | { |
990 | struct ds_context *prev_ctx = prev->thread.ds_ctx; | 1391 | struct ds_context *prev_ctx = prev->thread.ds_ctx; |
991 | struct ds_context *next_ctx = next->thread.ds_ctx; | 1392 | struct ds_context *next_ctx = next->thread.ds_ctx; |
1393 | unsigned long debugctlmsr = next->thread.debugctlmsr; | ||
1394 | |||
1395 | /* Make sure all data is read before we start. */ | ||
1396 | barrier(); | ||
992 | 1397 | ||
993 | if (prev_ctx) { | 1398 | if (prev_ctx) { |
994 | update_debugctlmsr(0); | 1399 | update_debugctlmsr(0); |
995 | 1400 | ||
996 | if (prev_ctx->bts_master && | 1401 | ds_take_timestamp(prev_ctx, bts_task_departs, prev); |
997 | (prev_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
998 | struct bts_struct ts = { | ||
999 | .qualifier = bts_task_departs, | ||
1000 | .variant.timestamp.jiffies = jiffies_64, | ||
1001 | .variant.timestamp.pid = prev->pid | ||
1002 | }; | ||
1003 | bts_write(prev_ctx->bts_master, &ts); | ||
1004 | } | ||
1005 | } | 1402 | } |
1006 | 1403 | ||
1007 | if (next_ctx) { | 1404 | if (next_ctx) { |
1008 | if (next_ctx->bts_master && | 1405 | ds_take_timestamp(next_ctx, bts_task_arrives, next); |
1009 | (next_ctx->bts_master->trace.ds.flags & BTS_TIMESTAMPS)) { | ||
1010 | struct bts_struct ts = { | ||
1011 | .qualifier = bts_task_arrives, | ||
1012 | .variant.timestamp.jiffies = jiffies_64, | ||
1013 | .variant.timestamp.pid = next->pid | ||
1014 | }; | ||
1015 | bts_write(next_ctx->bts_master, &ts); | ||
1016 | } | ||
1017 | 1406 | ||
1018 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); | 1407 | wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); |
1019 | } | 1408 | } |
1020 | 1409 | ||
1021 | update_debugctlmsr(next->thread.debugctlmsr); | 1410 | update_debugctlmsr(debugctlmsr); |
1022 | } | 1411 | } |
1023 | 1412 | ||
1024 | void ds_copy_thread(struct task_struct *tsk, struct task_struct *father) | 1413 | static __init int ds_selftest(void) |
1025 | { | 1414 | { |
1026 | clear_tsk_thread_flag(tsk, TIF_DS_AREA_MSR); | 1415 | if (ds_cfg.sizeof_rec[ds_bts]) { |
1027 | tsk->thread.ds_ctx = NULL; | 1416 | int error; |
1028 | } | ||
1029 | 1417 | ||
1030 | void ds_exit_thread(struct task_struct *tsk) | 1418 | error = ds_selftest_bts(); |
1031 | { | 1419 | if (error) { |
1420 | WARN(1, "[ds] selftest failed. disabling bts.\n"); | ||
1421 | ds_cfg.sizeof_rec[ds_bts] = 0; | ||
1422 | } | ||
1423 | } | ||
1424 | |||
1425 | if (ds_cfg.sizeof_rec[ds_pebs]) { | ||
1426 | int error; | ||
1427 | |||
1428 | error = ds_selftest_pebs(); | ||
1429 | if (error) { | ||
1430 | WARN(1, "[ds] selftest failed. disabling pebs.\n"); | ||
1431 | ds_cfg.sizeof_rec[ds_pebs] = 0; | ||
1432 | } | ||
1433 | } | ||
1434 | |||
1435 | return 0; | ||
1032 | } | 1436 | } |
1437 | device_initcall(ds_selftest); | ||
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c new file mode 100644 index 000000000000..6bc7c199ab99 --- /dev/null +++ b/arch/x86/kernel/ds_selftest.c | |||
@@ -0,0 +1,408 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #include "ds_selftest.h" | ||
10 | |||
11 | #include <linux/kernel.h> | ||
12 | #include <linux/string.h> | ||
13 | #include <linux/smp.h> | ||
14 | #include <linux/cpu.h> | ||
15 | |||
16 | #include <asm/ds.h> | ||
17 | |||
18 | |||
19 | #define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ | ||
20 | #define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ | ||
21 | |||
22 | struct ds_selftest_bts_conf { | ||
23 | struct bts_tracer *tracer; | ||
24 | int error; | ||
25 | int (*suspend)(struct bts_tracer *); | ||
26 | int (*resume)(struct bts_tracer *); | ||
27 | }; | ||
28 | |||
29 | static int ds_selftest_bts_consistency(const struct bts_trace *trace) | ||
30 | { | ||
31 | int error = 0; | ||
32 | |||
33 | if (!trace) { | ||
34 | printk(KERN_CONT "failed to access trace..."); | ||
35 | /* Bail out. Other tests are pointless. */ | ||
36 | return -1; | ||
37 | } | ||
38 | |||
39 | if (!trace->read) { | ||
40 | printk(KERN_CONT "bts read not available..."); | ||
41 | error = -1; | ||
42 | } | ||
43 | |||
44 | /* Do some sanity checks on the trace configuration. */ | ||
45 | if (!trace->ds.n) { | ||
46 | printk(KERN_CONT "empty bts buffer..."); | ||
47 | error = -1; | ||
48 | } | ||
49 | if (!trace->ds.size) { | ||
50 | printk(KERN_CONT "bad bts trace setup..."); | ||
51 | error = -1; | ||
52 | } | ||
53 | if (trace->ds.end != | ||
54 | (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { | ||
55 | printk(KERN_CONT "bad bts buffer setup..."); | ||
56 | error = -1; | ||
57 | } | ||
58 | /* | ||
59 | * We allow top in [begin; end], since its not clear when the | ||
60 | * overflow adjustment happens: after the increment or before the | ||
61 | * write. | ||
62 | */ | ||
63 | if ((trace->ds.top < trace->ds.begin) || | ||
64 | (trace->ds.end < trace->ds.top)) { | ||
65 | printk(KERN_CONT "bts top out of bounds..."); | ||
66 | error = -1; | ||
67 | } | ||
68 | |||
69 | return error; | ||
70 | } | ||
71 | |||
72 | static int ds_selftest_bts_read(struct bts_tracer *tracer, | ||
73 | const struct bts_trace *trace, | ||
74 | const void *from, const void *to) | ||
75 | { | ||
76 | const unsigned char *at; | ||
77 | |||
78 | /* | ||
79 | * Check a few things which do not belong to this test. | ||
80 | * They should be covered by other tests. | ||
81 | */ | ||
82 | if (!trace) | ||
83 | return -1; | ||
84 | |||
85 | if (!trace->read) | ||
86 | return -1; | ||
87 | |||
88 | if (to < from) | ||
89 | return -1; | ||
90 | |||
91 | if (from < trace->ds.begin) | ||
92 | return -1; | ||
93 | |||
94 | if (trace->ds.end < to) | ||
95 | return -1; | ||
96 | |||
97 | if (!trace->ds.size) | ||
98 | return -1; | ||
99 | |||
100 | /* Now to the test itself. */ | ||
101 | for (at = from; (void *)at < to; at += trace->ds.size) { | ||
102 | struct bts_struct bts; | ||
103 | unsigned long index; | ||
104 | int error; | ||
105 | |||
106 | if (((void *)at - trace->ds.begin) % trace->ds.size) { | ||
107 | printk(KERN_CONT | ||
108 | "read from non-integer index..."); | ||
109 | return -1; | ||
110 | } | ||
111 | index = ((void *)at - trace->ds.begin) / trace->ds.size; | ||
112 | |||
113 | memset(&bts, 0, sizeof(bts)); | ||
114 | error = trace->read(tracer, at, &bts); | ||
115 | if (error < 0) { | ||
116 | printk(KERN_CONT | ||
117 | "error reading bts trace at [%lu] (0x%p)...", | ||
118 | index, at); | ||
119 | return error; | ||
120 | } | ||
121 | |||
122 | switch (bts.qualifier) { | ||
123 | case BTS_BRANCH: | ||
124 | break; | ||
125 | default: | ||
126 | printk(KERN_CONT | ||
127 | "unexpected bts entry %llu at [%lu] (0x%p)...", | ||
128 | bts.qualifier, index, at); | ||
129 | return -1; | ||
130 | } | ||
131 | } | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static void ds_selftest_bts_cpu(void *arg) | ||
137 | { | ||
138 | struct ds_selftest_bts_conf *conf = arg; | ||
139 | const struct bts_trace *trace; | ||
140 | void *top; | ||
141 | |||
142 | if (IS_ERR(conf->tracer)) { | ||
143 | conf->error = PTR_ERR(conf->tracer); | ||
144 | conf->tracer = NULL; | ||
145 | |||
146 | printk(KERN_CONT | ||
147 | "initialization failed (err: %d)...", conf->error); | ||
148 | return; | ||
149 | } | ||
150 | |||
151 | /* We should meanwhile have enough trace. */ | ||
152 | conf->error = conf->suspend(conf->tracer); | ||
153 | if (conf->error < 0) | ||
154 | return; | ||
155 | |||
156 | /* Let's see if we can access the trace. */ | ||
157 | trace = ds_read_bts(conf->tracer); | ||
158 | |||
159 | conf->error = ds_selftest_bts_consistency(trace); | ||
160 | if (conf->error < 0) | ||
161 | return; | ||
162 | |||
163 | /* If everything went well, we should have a few trace entries. */ | ||
164 | if (trace->ds.top == trace->ds.begin) { | ||
165 | /* | ||
166 | * It is possible but highly unlikely that we got a | ||
167 | * buffer overflow and end up at exactly the same | ||
168 | * position we started from. | ||
169 | * Let's issue a warning, but continue. | ||
170 | */ | ||
171 | printk(KERN_CONT "no trace/overflow..."); | ||
172 | } | ||
173 | |||
174 | /* Let's try to read the trace we collected. */ | ||
175 | conf->error = | ||
176 | ds_selftest_bts_read(conf->tracer, trace, | ||
177 | trace->ds.begin, trace->ds.top); | ||
178 | if (conf->error < 0) | ||
179 | return; | ||
180 | |||
181 | /* | ||
182 | * Let's read the trace again. | ||
183 | * Since we suspended tracing, we should get the same result. | ||
184 | */ | ||
185 | top = trace->ds.top; | ||
186 | |||
187 | trace = ds_read_bts(conf->tracer); | ||
188 | conf->error = ds_selftest_bts_consistency(trace); | ||
189 | if (conf->error < 0) | ||
190 | return; | ||
191 | |||
192 | if (top != trace->ds.top) { | ||
193 | printk(KERN_CONT "suspend not working..."); | ||
194 | conf->error = -1; | ||
195 | return; | ||
196 | } | ||
197 | |||
198 | /* Let's collect some more trace - see if resume is working. */ | ||
199 | conf->error = conf->resume(conf->tracer); | ||
200 | if (conf->error < 0) | ||
201 | return; | ||
202 | |||
203 | conf->error = conf->suspend(conf->tracer); | ||
204 | if (conf->error < 0) | ||
205 | return; | ||
206 | |||
207 | trace = ds_read_bts(conf->tracer); | ||
208 | |||
209 | conf->error = ds_selftest_bts_consistency(trace); | ||
210 | if (conf->error < 0) | ||
211 | return; | ||
212 | |||
213 | if (trace->ds.top == top) { | ||
214 | /* | ||
215 | * It is possible but highly unlikely that we got a | ||
216 | * buffer overflow and end up at exactly the same | ||
217 | * position we started from. | ||
218 | * Let's issue a warning and check the full trace. | ||
219 | */ | ||
220 | printk(KERN_CONT | ||
221 | "no resume progress/overflow..."); | ||
222 | |||
223 | conf->error = | ||
224 | ds_selftest_bts_read(conf->tracer, trace, | ||
225 | trace->ds.begin, trace->ds.end); | ||
226 | } else if (trace->ds.top < top) { | ||
227 | /* | ||
228 | * We had a buffer overflow - the entire buffer should | ||
229 | * contain trace records. | ||
230 | */ | ||
231 | conf->error = | ||
232 | ds_selftest_bts_read(conf->tracer, trace, | ||
233 | trace->ds.begin, trace->ds.end); | ||
234 | } else { | ||
235 | /* | ||
236 | * It is quite likely that the buffer did not overflow. | ||
237 | * Let's just check the delta trace. | ||
238 | */ | ||
239 | conf->error = | ||
240 | ds_selftest_bts_read(conf->tracer, trace, top, | ||
241 | trace->ds.top); | ||
242 | } | ||
243 | if (conf->error < 0) | ||
244 | return; | ||
245 | |||
246 | conf->error = 0; | ||
247 | } | ||
248 | |||
249 | static int ds_suspend_bts_wrap(struct bts_tracer *tracer) | ||
250 | { | ||
251 | ds_suspend_bts(tracer); | ||
252 | return 0; | ||
253 | } | ||
254 | |||
255 | static int ds_resume_bts_wrap(struct bts_tracer *tracer) | ||
256 | { | ||
257 | ds_resume_bts(tracer); | ||
258 | return 0; | ||
259 | } | ||
260 | |||
261 | static void ds_release_bts_noirq_wrap(void *tracer) | ||
262 | { | ||
263 | (void)ds_release_bts_noirq(tracer); | ||
264 | } | ||
265 | |||
266 | static int ds_selftest_bts_bad_release_noirq(int cpu, | ||
267 | struct bts_tracer *tracer) | ||
268 | { | ||
269 | int error = -EPERM; | ||
270 | |||
271 | /* Try to release the tracer on the wrong cpu. */ | ||
272 | get_cpu(); | ||
273 | if (cpu != smp_processor_id()) { | ||
274 | error = ds_release_bts_noirq(tracer); | ||
275 | if (error != -EPERM) | ||
276 | printk(KERN_CONT "release on wrong cpu..."); | ||
277 | } | ||
278 | put_cpu(); | ||
279 | |||
280 | return error ? 0 : -1; | ||
281 | } | ||
282 | |||
283 | static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) | ||
284 | { | ||
285 | struct bts_tracer *tracer; | ||
286 | int error; | ||
287 | |||
288 | /* Try to request cpu tracing while task tracing is active. */ | ||
289 | tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, | ||
290 | (size_t)-1, BTS_KERNEL); | ||
291 | error = PTR_ERR(tracer); | ||
292 | if (!IS_ERR(tracer)) { | ||
293 | ds_release_bts(tracer); | ||
294 | error = 0; | ||
295 | } | ||
296 | |||
297 | if (error != -EPERM) | ||
298 | printk(KERN_CONT "cpu/task tracing overlap..."); | ||
299 | |||
300 | return error ? 0 : -1; | ||
301 | } | ||
302 | |||
303 | static int ds_selftest_bts_bad_request_task(void *buffer) | ||
304 | { | ||
305 | struct bts_tracer *tracer; | ||
306 | int error; | ||
307 | |||
308 | /* Try to request cpu tracing while task tracing is active. */ | ||
309 | tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, | ||
310 | (size_t)-1, BTS_KERNEL); | ||
311 | error = PTR_ERR(tracer); | ||
312 | if (!IS_ERR(tracer)) { | ||
313 | error = 0; | ||
314 | ds_release_bts(tracer); | ||
315 | } | ||
316 | |||
317 | if (error != -EPERM) | ||
318 | printk(KERN_CONT "task/cpu tracing overlap..."); | ||
319 | |||
320 | return error ? 0 : -1; | ||
321 | } | ||
322 | |||
323 | int ds_selftest_bts(void) | ||
324 | { | ||
325 | struct ds_selftest_bts_conf conf; | ||
326 | unsigned char buffer[BUFFER_SIZE], *small_buffer; | ||
327 | unsigned long irq; | ||
328 | int cpu; | ||
329 | |||
330 | printk(KERN_INFO "[ds] bts selftest..."); | ||
331 | conf.error = 0; | ||
332 | |||
333 | small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; | ||
334 | |||
335 | get_online_cpus(); | ||
336 | for_each_online_cpu(cpu) { | ||
337 | conf.suspend = ds_suspend_bts_wrap; | ||
338 | conf.resume = ds_resume_bts_wrap; | ||
339 | conf.tracer = | ||
340 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
341 | NULL, (size_t)-1, BTS_KERNEL); | ||
342 | ds_selftest_bts_cpu(&conf); | ||
343 | if (conf.error >= 0) | ||
344 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
345 | ds_release_bts(conf.tracer); | ||
346 | if (conf.error < 0) | ||
347 | goto out; | ||
348 | |||
349 | conf.suspend = ds_suspend_bts_noirq; | ||
350 | conf.resume = ds_resume_bts_noirq; | ||
351 | conf.tracer = | ||
352 | ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, | ||
353 | NULL, (size_t)-1, BTS_KERNEL); | ||
354 | smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); | ||
355 | if (conf.error >= 0) { | ||
356 | conf.error = | ||
357 | ds_selftest_bts_bad_release_noirq(cpu, | ||
358 | conf.tracer); | ||
359 | /* We must not release the tracer twice. */ | ||
360 | if (conf.error < 0) | ||
361 | conf.tracer = NULL; | ||
362 | } | ||
363 | if (conf.error >= 0) | ||
364 | conf.error = ds_selftest_bts_bad_request_task(buffer); | ||
365 | smp_call_function_single(cpu, ds_release_bts_noirq_wrap, | ||
366 | conf.tracer, 1); | ||
367 | if (conf.error < 0) | ||
368 | goto out; | ||
369 | } | ||
370 | |||
371 | conf.suspend = ds_suspend_bts_wrap; | ||
372 | conf.resume = ds_resume_bts_wrap; | ||
373 | conf.tracer = | ||
374 | ds_request_bts_task(current, buffer, BUFFER_SIZE, | ||
375 | NULL, (size_t)-1, BTS_KERNEL); | ||
376 | ds_selftest_bts_cpu(&conf); | ||
377 | if (conf.error >= 0) | ||
378 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
379 | ds_release_bts(conf.tracer); | ||
380 | if (conf.error < 0) | ||
381 | goto out; | ||
382 | |||
383 | conf.suspend = ds_suspend_bts_noirq; | ||
384 | conf.resume = ds_resume_bts_noirq; | ||
385 | conf.tracer = | ||
386 | ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, | ||
387 | NULL, (size_t)-1, BTS_KERNEL); | ||
388 | local_irq_save(irq); | ||
389 | ds_selftest_bts_cpu(&conf); | ||
390 | if (conf.error >= 0) | ||
391 | conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); | ||
392 | ds_release_bts_noirq(conf.tracer); | ||
393 | local_irq_restore(irq); | ||
394 | if (conf.error < 0) | ||
395 | goto out; | ||
396 | |||
397 | conf.error = 0; | ||
398 | out: | ||
399 | put_online_cpus(); | ||
400 | printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); | ||
401 | |||
402 | return conf.error; | ||
403 | } | ||
404 | |||
405 | int ds_selftest_pebs(void) | ||
406 | { | ||
407 | return 0; | ||
408 | } | ||
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h new file mode 100644 index 000000000000..2ba8745c6663 --- /dev/null +++ b/arch/x86/kernel/ds_selftest.h | |||
@@ -0,0 +1,15 @@ | |||
1 | /* | ||
2 | * Debug Store support - selftest | ||
3 | * | ||
4 | * | ||
5 | * Copyright (C) 2009 Intel Corporation. | ||
6 | * Markus Metzger <markus.t.metzger@intel.com>, 2009 | ||
7 | */ | ||
8 | |||
9 | #ifdef CONFIG_X86_DS_SELFTEST | ||
10 | extern int ds_selftest_bts(void); | ||
11 | extern int ds_selftest_pebs(void); | ||
12 | #else | ||
13 | static inline int ds_selftest_bts(void) { return 0; } | ||
14 | static inline int ds_selftest_pebs(void) { return 0; } | ||
15 | #endif | ||
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h index da87590b8698..81086c227ab7 100644 --- a/arch/x86/kernel/dumpstack.h +++ b/arch/x86/kernel/dumpstack.h | |||
@@ -29,7 +29,6 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
29 | unsigned long *sp, unsigned long bp, char *log_lvl); | 29 | unsigned long *sp, unsigned long bp, char *log_lvl); |
30 | 30 | ||
31 | extern unsigned int code_bytes; | 31 | extern unsigned int code_bytes; |
32 | extern int kstack_depth_to_print; | ||
33 | 32 | ||
34 | /* The form of the top of the frame on the stack */ | 33 | /* The form of the top of the frame on the stack */ |
35 | struct stack_frame { | 34 | struct stack_frame { |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 006281302925..7271fa33d791 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -617,7 +617,7 @@ __init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, | |||
617 | */ | 617 | */ |
618 | __init void e820_setup_gap(void) | 618 | __init void e820_setup_gap(void) |
619 | { | 619 | { |
620 | unsigned long gapstart, gapsize, round; | 620 | unsigned long gapstart, gapsize; |
621 | int found; | 621 | int found; |
622 | 622 | ||
623 | gapstart = 0x10000000; | 623 | gapstart = 0x10000000; |
@@ -635,14 +635,9 @@ __init void e820_setup_gap(void) | |||
635 | #endif | 635 | #endif |
636 | 636 | ||
637 | /* | 637 | /* |
638 | * See how much we want to round up: start off with | 638 | * e820_reserve_resources_late protect stolen RAM already |
639 | * rounding to the next 1MB area. | ||
640 | */ | 639 | */ |
641 | round = 0x100000; | 640 | pci_mem_start = gapstart; |
642 | while ((gapsize >> 4) > round) | ||
643 | round += round; | ||
644 | /* Fun with two's complement */ | ||
645 | pci_mem_start = (gapstart + round) & -round; | ||
646 | 641 | ||
647 | printk(KERN_INFO | 642 | printk(KERN_INFO |
648 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", | 643 | "Allocating PCI resources starting at %lx (gap: %lx:%lx)\n", |
@@ -1371,6 +1366,23 @@ void __init e820_reserve_resources(void) | |||
1371 | } | 1366 | } |
1372 | } | 1367 | } |
1373 | 1368 | ||
1369 | /* How much should we pad RAM ending depending on where it is? */ | ||
1370 | static unsigned long ram_alignment(resource_size_t pos) | ||
1371 | { | ||
1372 | unsigned long mb = pos >> 20; | ||
1373 | |||
1374 | /* To 64kB in the first megabyte */ | ||
1375 | if (!mb) | ||
1376 | return 64*1024; | ||
1377 | |||
1378 | /* To 1MB in the first 16MB */ | ||
1379 | if (mb < 16) | ||
1380 | return 1024*1024; | ||
1381 | |||
1382 | /* To 32MB for anything above that */ | ||
1383 | return 32*1024*1024; | ||
1384 | } | ||
1385 | |||
1374 | void __init e820_reserve_resources_late(void) | 1386 | void __init e820_reserve_resources_late(void) |
1375 | { | 1387 | { |
1376 | int i; | 1388 | int i; |
@@ -1382,6 +1394,24 @@ void __init e820_reserve_resources_late(void) | |||
1382 | insert_resource_expand_to_fit(&iomem_resource, res); | 1394 | insert_resource_expand_to_fit(&iomem_resource, res); |
1383 | res++; | 1395 | res++; |
1384 | } | 1396 | } |
1397 | |||
1398 | /* | ||
1399 | * Try to bump up RAM regions to reasonable boundaries to | ||
1400 | * avoid stolen RAM: | ||
1401 | */ | ||
1402 | for (i = 0; i < e820.nr_map; i++) { | ||
1403 | struct e820entry *entry = &e820_saved.map[i]; | ||
1404 | resource_size_t start, end; | ||
1405 | |||
1406 | if (entry->type != E820_RAM) | ||
1407 | continue; | ||
1408 | start = entry->addr + entry->size; | ||
1409 | end = round_up(start, ram_alignment(start)); | ||
1410 | if (start == end) | ||
1411 | continue; | ||
1412 | reserve_region_with_split(&iomem_resource, start, | ||
1413 | end - 1, "RAM buffer"); | ||
1414 | } | ||
1385 | } | 1415 | } |
1386 | 1416 | ||
1387 | char *__init default_machine_specific_memory_setup(void) | 1417 | char *__init default_machine_specific_memory_setup(void) |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index 76b8cd953dee..ebdb85cf2686 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -97,6 +97,7 @@ static void __init nvidia_bugs(int num, int slot, int func) | |||
97 | } | 97 | } |
98 | 98 | ||
99 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) | 99 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) |
100 | #if defined(CONFIG_ACPI) && defined(CONFIG_X86_IO_APIC) | ||
100 | static u32 __init ati_ixp4x0_rev(int num, int slot, int func) | 101 | static u32 __init ati_ixp4x0_rev(int num, int slot, int func) |
101 | { | 102 | { |
102 | u32 d; | 103 | u32 d; |
@@ -114,6 +115,7 @@ static u32 __init ati_ixp4x0_rev(int num, int slot, int func) | |||
114 | d &= 0xff; | 115 | d &= 0xff; |
115 | return d; | 116 | return d; |
116 | } | 117 | } |
118 | #endif | ||
117 | 119 | ||
118 | static void __init ati_bugs(int num, int slot, int func) | 120 | static void __init ati_bugs(int num, int slot, int func) |
119 | { | 121 | { |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 1736acc4d7aa..96f7ac0bbf01 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -240,10 +240,35 @@ static void __init do_add_efi_memmap(void) | |||
240 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; | 240 | unsigned long long size = md->num_pages << EFI_PAGE_SHIFT; |
241 | int e820_type; | 241 | int e820_type; |
242 | 242 | ||
243 | if (md->attribute & EFI_MEMORY_WB) | 243 | switch (md->type) { |
244 | e820_type = E820_RAM; | 244 | case EFI_LOADER_CODE: |
245 | else | 245 | case EFI_LOADER_DATA: |
246 | case EFI_BOOT_SERVICES_CODE: | ||
247 | case EFI_BOOT_SERVICES_DATA: | ||
248 | case EFI_CONVENTIONAL_MEMORY: | ||
249 | if (md->attribute & EFI_MEMORY_WB) | ||
250 | e820_type = E820_RAM; | ||
251 | else | ||
252 | e820_type = E820_RESERVED; | ||
253 | break; | ||
254 | case EFI_ACPI_RECLAIM_MEMORY: | ||
255 | e820_type = E820_ACPI; | ||
256 | break; | ||
257 | case EFI_ACPI_MEMORY_NVS: | ||
258 | e820_type = E820_NVS; | ||
259 | break; | ||
260 | case EFI_UNUSABLE_MEMORY: | ||
261 | e820_type = E820_UNUSABLE; | ||
262 | break; | ||
263 | default: | ||
264 | /* | ||
265 | * EFI_RESERVED_TYPE EFI_RUNTIME_SERVICES_CODE | ||
266 | * EFI_RUNTIME_SERVICES_DATA EFI_MEMORY_MAPPED_IO | ||
267 | * EFI_MEMORY_MAPPED_IO_PORT_SPACE EFI_PAL_CODE | ||
268 | */ | ||
246 | e820_type = E820_RESERVED; | 269 | e820_type = E820_RESERVED; |
270 | break; | ||
271 | } | ||
247 | e820_add_region(start, size, e820_type); | 272 | e820_add_region(start, size, e820_type); |
248 | } | 273 | } |
249 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 274 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index c929add475c9..c097e7d607c6 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -48,7 +48,6 @@ | |||
48 | #include <asm/segment.h> | 48 | #include <asm/segment.h> |
49 | #include <asm/smp.h> | 49 | #include <asm/smp.h> |
50 | #include <asm/page_types.h> | 50 | #include <asm/page_types.h> |
51 | #include <asm/desc.h> | ||
52 | #include <asm/percpu.h> | 51 | #include <asm/percpu.h> |
53 | #include <asm/dwarf2.h> | 52 | #include <asm/dwarf2.h> |
54 | #include <asm/processor-flags.h> | 53 | #include <asm/processor-flags.h> |
@@ -84,7 +83,7 @@ | |||
84 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF | 83 | #define preempt_stop(clobbers) DISABLE_INTERRUPTS(clobbers); TRACE_IRQS_OFF |
85 | #else | 84 | #else |
86 | #define preempt_stop(clobbers) | 85 | #define preempt_stop(clobbers) |
87 | #define resume_kernel restore_nocheck | 86 | #define resume_kernel restore_all |
88 | #endif | 87 | #endif |
89 | 88 | ||
90 | .macro TRACE_IRQS_IRET | 89 | .macro TRACE_IRQS_IRET |
@@ -372,7 +371,7 @@ END(ret_from_exception) | |||
372 | ENTRY(resume_kernel) | 371 | ENTRY(resume_kernel) |
373 | DISABLE_INTERRUPTS(CLBR_ANY) | 372 | DISABLE_INTERRUPTS(CLBR_ANY) |
374 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | 373 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? |
375 | jnz restore_nocheck | 374 | jnz restore_all |
376 | need_resched: | 375 | need_resched: |
377 | movl TI_flags(%ebp), %ecx # need_resched set ? | 376 | movl TI_flags(%ebp), %ecx # need_resched set ? |
378 | testb $_TIF_NEED_RESCHED, %cl | 377 | testb $_TIF_NEED_RESCHED, %cl |
@@ -540,6 +539,8 @@ syscall_exit: | |||
540 | jne syscall_exit_work | 539 | jne syscall_exit_work |
541 | 540 | ||
542 | restore_all: | 541 | restore_all: |
542 | TRACE_IRQS_IRET | ||
543 | restore_all_notrace: | ||
543 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS | 544 | movl PT_EFLAGS(%esp), %eax # mix EFLAGS, SS and CS |
544 | # Warning: PT_OLDSS(%esp) contains the wrong/random values if we | 545 | # Warning: PT_OLDSS(%esp) contains the wrong/random values if we |
545 | # are returning to the kernel. | 546 | # are returning to the kernel. |
@@ -551,8 +552,6 @@ restore_all: | |||
551 | CFI_REMEMBER_STATE | 552 | CFI_REMEMBER_STATE |
552 | je ldt_ss # returning to user-space with LDT SS | 553 | je ldt_ss # returning to user-space with LDT SS |
553 | restore_nocheck: | 554 | restore_nocheck: |
554 | TRACE_IRQS_IRET | ||
555 | restore_nocheck_notrace: | ||
556 | RESTORE_REGS 4 # skip orig_eax/error_code | 555 | RESTORE_REGS 4 # skip orig_eax/error_code |
557 | CFI_ADJUST_CFA_OFFSET -4 | 556 | CFI_ADJUST_CFA_OFFSET -4 |
558 | irq_return: | 557 | irq_return: |
@@ -588,22 +587,34 @@ ldt_ss: | |||
588 | jne restore_nocheck | 587 | jne restore_nocheck |
589 | #endif | 588 | #endif |
590 | 589 | ||
591 | /* If returning to userspace with 16bit stack, | 590 | /* |
592 | * try to fix the higher word of ESP, as the CPU | 591 | * Setup and switch to ESPFIX stack |
593 | * won't restore it. | 592 | * |
594 | * This is an "official" bug of all the x86-compatible | 593 | * We're returning to userspace with a 16 bit stack. The CPU will not |
595 | * CPUs, which we can try to work around to make | 594 | * restore the high word of ESP for us on executing iret... This is an |
596 | * dosemu and wine happy. */ | 595 | * "official" bug of all the x86-compatible CPUs, which we can work |
597 | movl PT_OLDESP(%esp), %eax | 596 | * around to make dosemu and wine happy. We do this by preloading the |
598 | movl %esp, %edx | 597 | * high word of ESP with the high word of the userspace ESP while |
599 | call patch_espfix_desc | 598 | * compensating for the offset by changing to the ESPFIX segment with |
599 | * a base address that matches for the difference. | ||
600 | */ | ||
601 | mov %esp, %edx /* load kernel esp */ | ||
602 | mov PT_OLDESP(%esp), %eax /* load userspace esp */ | ||
603 | mov %dx, %ax /* eax: new kernel esp */ | ||
604 | sub %eax, %edx /* offset (low word is 0) */ | ||
605 | PER_CPU(gdt_page, %ebx) | ||
606 | shr $16, %edx | ||
607 | mov %dl, GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx) /* bits 16..23 */ | ||
608 | mov %dh, GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx) /* bits 24..31 */ | ||
600 | pushl $__ESPFIX_SS | 609 | pushl $__ESPFIX_SS |
601 | CFI_ADJUST_CFA_OFFSET 4 | 610 | CFI_ADJUST_CFA_OFFSET 4 |
602 | pushl %eax | 611 | push %eax /* new kernel esp */ |
603 | CFI_ADJUST_CFA_OFFSET 4 | 612 | CFI_ADJUST_CFA_OFFSET 4 |
613 | /* Disable interrupts, but do not irqtrace this section: we | ||
614 | * will soon execute iret and the tracer was already set to | ||
615 | * the irqstate after the iret */ | ||
604 | DISABLE_INTERRUPTS(CLBR_EAX) | 616 | DISABLE_INTERRUPTS(CLBR_EAX) |
605 | TRACE_IRQS_OFF | 617 | lss (%esp), %esp /* switch to espfix segment */ |
606 | lss (%esp), %esp | ||
607 | CFI_ADJUST_CFA_OFFSET -8 | 618 | CFI_ADJUST_CFA_OFFSET -8 |
608 | jmp restore_nocheck | 619 | jmp restore_nocheck |
609 | CFI_ENDPROC | 620 | CFI_ENDPROC |
@@ -716,15 +727,24 @@ PTREGSCALL(vm86) | |||
716 | PTREGSCALL(vm86old) | 727 | PTREGSCALL(vm86old) |
717 | 728 | ||
718 | .macro FIXUP_ESPFIX_STACK | 729 | .macro FIXUP_ESPFIX_STACK |
719 | /* since we are on a wrong stack, we cant make it a C code :( */ | 730 | /* |
731 | * Switch back for ESPFIX stack to the normal zerobased stack | ||
732 | * | ||
733 | * We can't call C functions using the ESPFIX stack. This code reads | ||
734 | * the high word of the segment base from the GDT and swiches to the | ||
735 | * normal stack and adjusts ESP with the matching offset. | ||
736 | */ | ||
737 | /* fixup the stack */ | ||
720 | PER_CPU(gdt_page, %ebx) | 738 | PER_CPU(gdt_page, %ebx) |
721 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah) | 739 | mov GDT_ENTRY_ESPFIX_SS * 8 + 4(%ebx), %al /* bits 16..23 */ |
722 | addl %esp, %eax | 740 | mov GDT_ENTRY_ESPFIX_SS * 8 + 7(%ebx), %ah /* bits 24..31 */ |
741 | shl $16, %eax | ||
742 | addl %esp, %eax /* the adjusted stack pointer */ | ||
723 | pushl $__KERNEL_DS | 743 | pushl $__KERNEL_DS |
724 | CFI_ADJUST_CFA_OFFSET 4 | 744 | CFI_ADJUST_CFA_OFFSET 4 |
725 | pushl %eax | 745 | pushl %eax |
726 | CFI_ADJUST_CFA_OFFSET 4 | 746 | CFI_ADJUST_CFA_OFFSET 4 |
727 | lss (%esp), %esp | 747 | lss (%esp), %esp /* switch to the normal stack segment */ |
728 | CFI_ADJUST_CFA_OFFSET -8 | 748 | CFI_ADJUST_CFA_OFFSET -8 |
729 | .endm | 749 | .endm |
730 | .macro UNWIND_ESPFIX_STACK | 750 | .macro UNWIND_ESPFIX_STACK |
@@ -1154,6 +1174,7 @@ ENTRY(ftrace_graph_caller) | |||
1154 | pushl %edx | 1174 | pushl %edx |
1155 | movl 0xc(%esp), %edx | 1175 | movl 0xc(%esp), %edx |
1156 | lea 0x4(%ebp), %eax | 1176 | lea 0x4(%ebp), %eax |
1177 | movl (%ebp), %ecx | ||
1157 | subl $MCOUNT_INSN_SIZE, %edx | 1178 | subl $MCOUNT_INSN_SIZE, %edx |
1158 | call prepare_ftrace_return | 1179 | call prepare_ftrace_return |
1159 | popl %edx | 1180 | popl %edx |
@@ -1168,6 +1189,7 @@ return_to_handler: | |||
1168 | pushl %eax | 1189 | pushl %eax |
1169 | pushl %ecx | 1190 | pushl %ecx |
1170 | pushl %edx | 1191 | pushl %edx |
1192 | movl %ebp, %eax | ||
1171 | call ftrace_return_to_handler | 1193 | call ftrace_return_to_handler |
1172 | movl %eax, 0xc(%esp) | 1194 | movl %eax, 0xc(%esp) |
1173 | popl %edx | 1195 | popl %edx |
@@ -1329,7 +1351,7 @@ nmi_stack_correct: | |||
1329 | xorl %edx,%edx # zero error code | 1351 | xorl %edx,%edx # zero error code |
1330 | movl %esp,%eax # pt_regs pointer | 1352 | movl %esp,%eax # pt_regs pointer |
1331 | call do_nmi | 1353 | call do_nmi |
1332 | jmp restore_nocheck_notrace | 1354 | jmp restore_all_notrace |
1333 | CFI_ENDPROC | 1355 | CFI_ENDPROC |
1334 | 1356 | ||
1335 | nmi_stack_fixup: | 1357 | nmi_stack_fixup: |
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 38946c6e8433..c251be745107 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -135,6 +135,7 @@ ENTRY(ftrace_graph_caller) | |||
135 | 135 | ||
136 | leaq 8(%rbp), %rdi | 136 | leaq 8(%rbp), %rdi |
137 | movq 0x38(%rsp), %rsi | 137 | movq 0x38(%rsp), %rsi |
138 | movq (%rbp), %rdx | ||
138 | subq $MCOUNT_INSN_SIZE, %rsi | 139 | subq $MCOUNT_INSN_SIZE, %rsi |
139 | 140 | ||
140 | call prepare_ftrace_return | 141 | call prepare_ftrace_return |
@@ -147,27 +148,15 @@ END(ftrace_graph_caller) | |||
147 | GLOBAL(return_to_handler) | 148 | GLOBAL(return_to_handler) |
148 | subq $80, %rsp | 149 | subq $80, %rsp |
149 | 150 | ||
151 | /* Save the return values */ | ||
150 | movq %rax, (%rsp) | 152 | movq %rax, (%rsp) |
151 | movq %rcx, 8(%rsp) | 153 | movq %rdx, 8(%rsp) |
152 | movq %rdx, 16(%rsp) | 154 | movq %rbp, %rdi |
153 | movq %rsi, 24(%rsp) | ||
154 | movq %rdi, 32(%rsp) | ||
155 | movq %r8, 40(%rsp) | ||
156 | movq %r9, 48(%rsp) | ||
157 | movq %r10, 56(%rsp) | ||
158 | movq %r11, 64(%rsp) | ||
159 | 155 | ||
160 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
161 | 157 | ||
162 | movq %rax, 72(%rsp) | 158 | movq %rax, 72(%rsp) |
163 | movq 64(%rsp), %r11 | 159 | movq 8(%rsp), %rdx |
164 | movq 56(%rsp), %r10 | ||
165 | movq 48(%rsp), %r9 | ||
166 | movq 40(%rsp), %r8 | ||
167 | movq 32(%rsp), %rdi | ||
168 | movq 24(%rsp), %rsi | ||
169 | movq 16(%rsp), %rdx | ||
170 | movq 8(%rsp), %rcx | ||
171 | movq (%rsp), %rax | 160 | movq (%rsp), %rax |
172 | addq $72, %rsp | 161 | addq $72, %rsp |
173 | retq | 162 | retq |
@@ -976,6 +965,8 @@ END(\sym) | |||
976 | #ifdef CONFIG_SMP | 965 | #ifdef CONFIG_SMP |
977 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ | 966 | apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ |
978 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt | 967 | irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt |
968 | apicinterrupt REBOOT_VECTOR \ | ||
969 | reboot_interrupt smp_reboot_interrupt | ||
979 | #endif | 970 | #endif |
980 | 971 | ||
981 | #ifdef CONFIG_X86_UV | 972 | #ifdef CONFIG_X86_UV |
@@ -1007,10 +998,15 @@ apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ | |||
1007 | #endif | 998 | #endif |
1008 | 999 | ||
1009 | apicinterrupt THRESHOLD_APIC_VECTOR \ | 1000 | apicinterrupt THRESHOLD_APIC_VECTOR \ |
1010 | threshold_interrupt mce_threshold_interrupt | 1001 | threshold_interrupt smp_threshold_interrupt |
1011 | apicinterrupt THERMAL_APIC_VECTOR \ | 1002 | apicinterrupt THERMAL_APIC_VECTOR \ |
1012 | thermal_interrupt smp_thermal_interrupt | 1003 | thermal_interrupt smp_thermal_interrupt |
1013 | 1004 | ||
1005 | #ifdef CONFIG_X86_MCE | ||
1006 | apicinterrupt MCE_SELF_VECTOR \ | ||
1007 | mce_self_interrupt smp_mce_self_interrupt | ||
1008 | #endif | ||
1009 | |||
1014 | #ifdef CONFIG_SMP | 1010 | #ifdef CONFIG_SMP |
1015 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ | 1011 | apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ |
1016 | call_function_single_interrupt smp_call_function_single_interrupt | 1012 | call_function_single_interrupt smp_call_function_single_interrupt |
@@ -1025,6 +1021,11 @@ apicinterrupt ERROR_APIC_VECTOR \ | |||
1025 | apicinterrupt SPURIOUS_APIC_VECTOR \ | 1021 | apicinterrupt SPURIOUS_APIC_VECTOR \ |
1026 | spurious_interrupt smp_spurious_interrupt | 1022 | spurious_interrupt smp_spurious_interrupt |
1027 | 1023 | ||
1024 | #ifdef CONFIG_PERF_COUNTERS | ||
1025 | apicinterrupt LOCAL_PENDING_VECTOR \ | ||
1026 | perf_pending_interrupt smp_perf_pending_interrupt | ||
1027 | #endif | ||
1028 | |||
1028 | /* | 1029 | /* |
1029 | * Exception entry points. | 1030 | * Exception entry points. |
1030 | */ | 1031 | */ |
@@ -1379,10 +1380,15 @@ END(xen_failsafe_callback) | |||
1379 | paranoidzeroentry_ist debug do_debug DEBUG_STACK | 1380 | paranoidzeroentry_ist debug do_debug DEBUG_STACK |
1380 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK | 1381 | paranoidzeroentry_ist int3 do_int3 DEBUG_STACK |
1381 | paranoiderrorentry stack_segment do_stack_segment | 1382 | paranoiderrorentry stack_segment do_stack_segment |
1383 | #ifdef CONFIG_XEN | ||
1384 | zeroentry xen_debug do_debug | ||
1385 | zeroentry xen_int3 do_int3 | ||
1386 | errorentry xen_stack_segment do_stack_segment | ||
1387 | #endif | ||
1382 | errorentry general_protection do_general_protection | 1388 | errorentry general_protection do_general_protection |
1383 | errorentry page_fault do_page_fault | 1389 | errorentry page_fault do_page_fault |
1384 | #ifdef CONFIG_X86_MCE | 1390 | #ifdef CONFIG_X86_MCE |
1385 | paranoidzeroentry machine_check do_machine_check | 1391 | paranoidzeroentry machine_check *machine_check_vector(%rip) |
1386 | #endif | 1392 | #endif |
1387 | 1393 | ||
1388 | /* | 1394 | /* |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b79c5533c421..d94e1ea3b9fe 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -408,7 +408,8 @@ int ftrace_disable_ftrace_graph_caller(void) | |||
408 | * Hook the return address and push it in the stack of return addrs | 408 | * Hook the return address and push it in the stack of return addrs |
409 | * in current thread info. | 409 | * in current thread info. |
410 | */ | 410 | */ |
411 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | 411 | void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, |
412 | unsigned long frame_pointer) | ||
412 | { | 413 | { |
413 | unsigned long old; | 414 | unsigned long old; |
414 | int faulted; | 415 | int faulted; |
@@ -453,7 +454,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) | |||
453 | return; | 454 | return; |
454 | } | 455 | } |
455 | 456 | ||
456 | if (ftrace_push_return_trace(old, self_addr, &trace.depth) == -EBUSY) { | 457 | if (ftrace_push_return_trace(old, self_addr, &trace.depth, |
458 | frame_pointer) == -EBUSY) { | ||
457 | *parent = old; | 459 | *parent = old; |
458 | return; | 460 | return; |
459 | } | 461 | } |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 30683883e0cd..8663afb56535 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <asm/segment.h> | 13 | #include <asm/segment.h> |
14 | #include <asm/page_types.h> | 14 | #include <asm/page_types.h> |
15 | #include <asm/pgtable_types.h> | 15 | #include <asm/pgtable_types.h> |
16 | #include <asm/desc.h> | ||
17 | #include <asm/cache.h> | 16 | #include <asm/cache.h> |
18 | #include <asm/thread_info.h> | 17 | #include <asm/thread_info.h> |
19 | #include <asm/asm-offsets.h> | 18 | #include <asm/asm-offsets.h> |
@@ -608,13 +607,6 @@ ignore_int: | |||
608 | ENTRY(initial_code) | 607 | ENTRY(initial_code) |
609 | .long i386_start_kernel | 608 | .long i386_start_kernel |
610 | 609 | ||
611 | .section .text | ||
612 | /* | ||
613 | * Real beginning of normal "text" segment | ||
614 | */ | ||
615 | ENTRY(stext) | ||
616 | ENTRY(_stext) | ||
617 | |||
618 | /* | 610 | /* |
619 | * BSS section | 611 | * BSS section |
620 | */ | 612 | */ |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 54b29bb24e71..fa54f78e2a05 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/linkage.h> | 12 | #include <linux/linkage.h> |
13 | #include <linux/threads.h> | 13 | #include <linux/threads.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <asm/desc.h> | ||
16 | #include <asm/segment.h> | 15 | #include <asm/segment.h> |
17 | #include <asm/pgtable.h> | 16 | #include <asm/pgtable.h> |
18 | #include <asm/page.h> | 17 | #include <asm/page.h> |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 81408b93f887..dedc2bddf7a5 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -510,7 +510,8 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
510 | { | 510 | { |
511 | 511 | ||
512 | if (request_irq(dev->irq, hpet_interrupt_handler, | 512 | if (request_irq(dev->irq, hpet_interrupt_handler, |
513 | IRQF_DISABLED|IRQF_NOBALANCING, dev->name, dev)) | 513 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, |
514 | dev->name, dev)) | ||
514 | return -1; | 515 | return -1; |
515 | 516 | ||
516 | disable_irq(dev->irq); | 517 | disable_irq(dev->irq); |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index c2e0bb0890d4..5cf36c053ac4 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/spinlock.h> | 7 | #include <linux/spinlock.h> |
8 | #include <linux/jiffies.h> | 8 | #include <linux/jiffies.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/timex.h> | ||
10 | #include <linux/delay.h> | 11 | #include <linux/delay.h> |
11 | #include <linux/init.h> | 12 | #include <linux/init.h> |
12 | #include <linux/io.h> | 13 | #include <linux/io.h> |
diff --git a/arch/x86/kernel/init_task.c b/arch/x86/kernel/init_task.c index df3bf269beab..270ff83efc11 100644 --- a/arch/x86/kernel/init_task.c +++ b/arch/x86/kernel/init_task.c | |||
@@ -12,7 +12,6 @@ | |||
12 | 12 | ||
13 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); | 13 | static struct signal_struct init_signals = INIT_SIGNALS(init_signals); |
14 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); | 14 | static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); |
15 | struct mm_struct init_mm = INIT_MM(init_mm); | ||
16 | 15 | ||
17 | /* | 16 | /* |
18 | * Initial thread structure. | 17 | * Initial thread structure. |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index c3fe010d74c8..b0cdde6932f5 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -12,6 +12,8 @@ | |||
12 | #include <asm/io_apic.h> | 12 | #include <asm/io_apic.h> |
13 | #include <asm/irq.h> | 13 | #include <asm/irq.h> |
14 | #include <asm/idle.h> | 14 | #include <asm/idle.h> |
15 | #include <asm/mce.h> | ||
16 | #include <asm/hw_irq.h> | ||
15 | 17 | ||
16 | atomic_t irq_err_count; | 18 | atomic_t irq_err_count; |
17 | 19 | ||
@@ -24,9 +26,9 @@ void (*generic_interrupt_extension)(void) = NULL; | |||
24 | */ | 26 | */ |
25 | void ack_bad_irq(unsigned int irq) | 27 | void ack_bad_irq(unsigned int irq) |
26 | { | 28 | { |
27 | printk(KERN_ERR "unexpected IRQ trap at vector %02x\n", irq); | 29 | if (printk_ratelimit()) |
30 | pr_err("unexpected IRQ trap at vector %02x\n", irq); | ||
28 | 31 | ||
29 | #ifdef CONFIG_X86_LOCAL_APIC | ||
30 | /* | 32 | /* |
31 | * Currently unexpected vectors happen only on SMP and APIC. | 33 | * Currently unexpected vectors happen only on SMP and APIC. |
32 | * We _must_ ack these because every local APIC has only N | 34 | * We _must_ ack these because every local APIC has only N |
@@ -36,9 +38,7 @@ void ack_bad_irq(unsigned int irq) | |||
36 | * completely. | 38 | * completely. |
37 | * But only ack when the APIC is enabled -AK | 39 | * But only ack when the APIC is enabled -AK |
38 | */ | 40 | */ |
39 | if (cpu_has_apic) | 41 | ack_APIC_irq(); |
40 | ack_APIC_irq(); | ||
41 | #endif | ||
42 | } | 42 | } |
43 | 43 | ||
44 | #define irq_stats(x) (&per_cpu(irq_stat, x)) | 44 | #define irq_stats(x) (&per_cpu(irq_stat, x)) |
@@ -63,6 +63,14 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
63 | for_each_online_cpu(j) | 63 | for_each_online_cpu(j) |
64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); | 64 | seq_printf(p, "%10u ", irq_stats(j)->irq_spurious_count); |
65 | seq_printf(p, " Spurious interrupts\n"); | 65 | seq_printf(p, " Spurious interrupts\n"); |
66 | seq_printf(p, "%*s: ", prec, "CNT"); | ||
67 | for_each_online_cpu(j) | ||
68 | seq_printf(p, "%10u ", irq_stats(j)->apic_perf_irqs); | ||
69 | seq_printf(p, " Performance counter interrupts\n"); | ||
70 | seq_printf(p, "%*s: ", prec, "PND"); | ||
71 | for_each_online_cpu(j) | ||
72 | seq_printf(p, "%10u ", irq_stats(j)->apic_pending_irqs); | ||
73 | seq_printf(p, " Performance pending work\n"); | ||
66 | #endif | 74 | #endif |
67 | if (generic_interrupt_extension) { | 75 | if (generic_interrupt_extension) { |
68 | seq_printf(p, "%*s: ", prec, "PLT"); | 76 | seq_printf(p, "%*s: ", prec, "PLT"); |
@@ -89,13 +97,23 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
89 | for_each_online_cpu(j) | 97 | for_each_online_cpu(j) |
90 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); | 98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); |
91 | seq_printf(p, " Thermal event interrupts\n"); | 99 | seq_printf(p, " Thermal event interrupts\n"); |
92 | # ifdef CONFIG_X86_64 | 100 | # ifdef CONFIG_X86_MCE_THRESHOLD |
93 | seq_printf(p, "%*s: ", prec, "THR"); | 101 | seq_printf(p, "%*s: ", prec, "THR"); |
94 | for_each_online_cpu(j) | 102 | for_each_online_cpu(j) |
95 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); | 103 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); |
96 | seq_printf(p, " Threshold APIC interrupts\n"); | 104 | seq_printf(p, " Threshold APIC interrupts\n"); |
97 | # endif | 105 | # endif |
98 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_NEW_MCE | ||
108 | seq_printf(p, "%*s: ", prec, "MCE"); | ||
109 | for_each_online_cpu(j) | ||
110 | seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); | ||
111 | seq_printf(p, " Machine check exceptions\n"); | ||
112 | seq_printf(p, "%*s: ", prec, "MCP"); | ||
113 | for_each_online_cpu(j) | ||
114 | seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); | ||
115 | seq_printf(p, " Machine check polls\n"); | ||
116 | #endif | ||
99 | seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); | 117 | seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); |
100 | #if defined(CONFIG_X86_IO_APIC) | 118 | #if defined(CONFIG_X86_IO_APIC) |
101 | seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); | 119 | seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); |
@@ -166,6 +184,8 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
166 | #ifdef CONFIG_X86_LOCAL_APIC | 184 | #ifdef CONFIG_X86_LOCAL_APIC |
167 | sum += irq_stats(cpu)->apic_timer_irqs; | 185 | sum += irq_stats(cpu)->apic_timer_irqs; |
168 | sum += irq_stats(cpu)->irq_spurious_count; | 186 | sum += irq_stats(cpu)->irq_spurious_count; |
187 | sum += irq_stats(cpu)->apic_perf_irqs; | ||
188 | sum += irq_stats(cpu)->apic_pending_irqs; | ||
169 | #endif | 189 | #endif |
170 | if (generic_interrupt_extension) | 190 | if (generic_interrupt_extension) |
171 | sum += irq_stats(cpu)->generic_irqs; | 191 | sum += irq_stats(cpu)->generic_irqs; |
@@ -176,9 +196,13 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
176 | #endif | 196 | #endif |
177 | #ifdef CONFIG_X86_MCE | 197 | #ifdef CONFIG_X86_MCE |
178 | sum += irq_stats(cpu)->irq_thermal_count; | 198 | sum += irq_stats(cpu)->irq_thermal_count; |
179 | # ifdef CONFIG_X86_64 | 199 | # ifdef CONFIG_X86_MCE_THRESHOLD |
180 | sum += irq_stats(cpu)->irq_threshold_count; | 200 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | ||
181 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_NEW_MCE | ||
204 | sum += per_cpu(mce_exception_count, cpu); | ||
205 | sum += per_cpu(mce_poll_count, cpu); | ||
182 | #endif | 206 | #endif |
183 | return sum; | 207 | return sum; |
184 | } | 208 | } |
@@ -213,14 +237,11 @@ unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
213 | irq = __get_cpu_var(vector_irq)[vector]; | 237 | irq = __get_cpu_var(vector_irq)[vector]; |
214 | 238 | ||
215 | if (!handle_irq(irq, regs)) { | 239 | if (!handle_irq(irq, regs)) { |
216 | #ifdef CONFIG_X86_64 | 240 | ack_APIC_irq(); |
217 | if (!disable_apic) | ||
218 | ack_APIC_irq(); | ||
219 | #endif | ||
220 | 241 | ||
221 | if (printk_ratelimit()) | 242 | if (printk_ratelimit()) |
222 | printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n", | 243 | pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n", |
223 | __func__, smp_processor_id(), vector, irq); | 244 | __func__, smp_processor_id(), vector, irq); |
224 | } | 245 | } |
225 | 246 | ||
226 | irq_exit(); | 247 | irq_exit(); |
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit.c index 368b0a8836f9..696f0e475c2d 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -1,20 +1,25 @@ | |||
1 | #include <linux/linkage.h> | ||
1 | #include <linux/errno.h> | 2 | #include <linux/errno.h> |
2 | #include <linux/signal.h> | 3 | #include <linux/signal.h> |
3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
4 | #include <linux/ioport.h> | 5 | #include <linux/ioport.h> |
5 | #include <linux/interrupt.h> | 6 | #include <linux/interrupt.h> |
7 | #include <linux/timex.h> | ||
6 | #include <linux/slab.h> | 8 | #include <linux/slab.h> |
7 | #include <linux/random.h> | 9 | #include <linux/random.h> |
10 | #include <linux/kprobes.h> | ||
8 | #include <linux/init.h> | 11 | #include <linux/init.h> |
9 | #include <linux/kernel_stat.h> | 12 | #include <linux/kernel_stat.h> |
10 | #include <linux/sysdev.h> | 13 | #include <linux/sysdev.h> |
11 | #include <linux/bitops.h> | 14 | #include <linux/bitops.h> |
15 | #include <linux/acpi.h> | ||
12 | #include <linux/io.h> | 16 | #include <linux/io.h> |
13 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
14 | 18 | ||
15 | #include <asm/atomic.h> | 19 | #include <asm/atomic.h> |
16 | #include <asm/system.h> | 20 | #include <asm/system.h> |
17 | #include <asm/timer.h> | 21 | #include <asm/timer.h> |
22 | #include <asm/hw_irq.h> | ||
18 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
19 | #include <asm/desc.h> | 24 | #include <asm/desc.h> |
20 | #include <asm/apic.h> | 25 | #include <asm/apic.h> |
@@ -22,7 +27,23 @@ | |||
22 | #include <asm/i8259.h> | 27 | #include <asm/i8259.h> |
23 | #include <asm/traps.h> | 28 | #include <asm/traps.h> |
24 | 29 | ||
30 | /* | ||
31 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | ||
32 | * (these are usually mapped to vectors 0x30-0x3f) | ||
33 | */ | ||
34 | |||
35 | /* | ||
36 | * The IO-APIC gives us many more interrupt sources. Most of these | ||
37 | * are unused but an SMP system is supposed to have enough memory ... | ||
38 | * sometimes (mostly wrt. hw bugs) we get corrupted vectors all | ||
39 | * across the spectrum, so we really want to be prepared to get all | ||
40 | * of these. Plus, more powerful systems might have more than 64 | ||
41 | * IO-APIC registers. | ||
42 | * | ||
43 | * (these are usually mapped into the 0x30-0xff vector range) | ||
44 | */ | ||
25 | 45 | ||
46 | #ifdef CONFIG_X86_32 | ||
26 | /* | 47 | /* |
27 | * Note that on a 486, we don't want to do a SIGFPE on an irq13 | 48 | * Note that on a 486, we don't want to do a SIGFPE on an irq13 |
28 | * as the irq is unreliable, and exception 16 works correctly | 49 | * as the irq is unreliable, and exception 16 works correctly |
@@ -52,30 +73,7 @@ static struct irqaction fpu_irq = { | |||
52 | .handler = math_error_irq, | 73 | .handler = math_error_irq, |
53 | .name = "fpu", | 74 | .name = "fpu", |
54 | }; | 75 | }; |
55 | |||
56 | void __init init_ISA_irqs(void) | ||
57 | { | ||
58 | int i; | ||
59 | |||
60 | #ifdef CONFIG_X86_LOCAL_APIC | ||
61 | init_bsp_APIC(); | ||
62 | #endif | 76 | #endif |
63 | init_8259A(0); | ||
64 | |||
65 | /* | ||
66 | * 16 old-style INTA-cycle interrupts: | ||
67 | */ | ||
68 | for (i = 0; i < NR_IRQS_LEGACY; i++) { | ||
69 | struct irq_desc *desc = irq_to_desc(i); | ||
70 | |||
71 | desc->status = IRQ_DISABLED; | ||
72 | desc->action = NULL; | ||
73 | desc->depth = 1; | ||
74 | |||
75 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
76 | handle_level_irq, "XT"); | ||
77 | } | ||
78 | } | ||
79 | 77 | ||
80 | /* | 78 | /* |
81 | * IRQ2 is cascade interrupt to second interrupt controller | 79 | * IRQ2 is cascade interrupt to second interrupt controller |
@@ -118,29 +116,37 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
118 | return 0; | 116 | return 0; |
119 | } | 117 | } |
120 | 118 | ||
121 | /* Overridden in paravirt.c */ | 119 | static void __init init_ISA_irqs(void) |
122 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
123 | |||
124 | void __init native_init_IRQ(void) | ||
125 | { | 120 | { |
126 | int i; | 121 | int i; |
127 | 122 | ||
128 | /* Execute any quirks before the call gates are initialised: */ | 123 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) |
129 | x86_quirk_pre_intr_init(); | 124 | init_bsp_APIC(); |
125 | #endif | ||
126 | init_8259A(0); | ||
130 | 127 | ||
131 | /* | 128 | /* |
132 | * Cover the whole vector space, no vector can escape | 129 | * 16 old-style INTA-cycle interrupts: |
133 | * us. (some of these will be overridden and become | ||
134 | * 'special' SMP interrupts) | ||
135 | */ | 130 | */ |
136 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | 131 | for (i = 0; i < NR_IRQS_LEGACY; i++) { |
137 | /* SYSCALL_VECTOR was reserved in trap_init. */ | 132 | struct irq_desc *desc = irq_to_desc(i); |
138 | if (i != SYSCALL_VECTOR) | 133 | |
139 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); | 134 | desc->status = IRQ_DISABLED; |
135 | desc->action = NULL; | ||
136 | desc->depth = 1; | ||
137 | |||
138 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
139 | handle_level_irq, "XT"); | ||
140 | } | 140 | } |
141 | } | ||
141 | 142 | ||
143 | /* Overridden in paravirt.c */ | ||
144 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
142 | 145 | ||
143 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP) | 146 | static void __init smp_intr_init(void) |
147 | { | ||
148 | #ifdef CONFIG_SMP | ||
149 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | ||
144 | /* | 150 | /* |
145 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | 151 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper |
146 | * IPI, driven by wakeup. | 152 | * IPI, driven by wakeup. |
@@ -160,16 +166,35 @@ void __init native_init_IRQ(void) | |||
160 | /* IPI for generic function call */ | 166 | /* IPI for generic function call */ |
161 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 167 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
162 | 168 | ||
163 | /* IPI for single call function */ | 169 | /* IPI for generic single function call */ |
164 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, | 170 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, |
165 | call_function_single_interrupt); | 171 | call_function_single_interrupt); |
166 | 172 | ||
167 | /* Low priority IPI to cleanup after moving an irq */ | 173 | /* Low priority IPI to cleanup after moving an irq */ |
168 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | 174 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); |
169 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | 175 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); |
176 | |||
177 | /* IPI used for rebooting/stopping */ | ||
178 | alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt); | ||
170 | #endif | 179 | #endif |
180 | #endif /* CONFIG_SMP */ | ||
181 | } | ||
182 | |||
183 | static void __init apic_intr_init(void) | ||
184 | { | ||
185 | smp_intr_init(); | ||
171 | 186 | ||
172 | #ifdef CONFIG_X86_LOCAL_APIC | 187 | #ifdef CONFIG_X86_THERMAL_VECTOR |
188 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | ||
189 | #endif | ||
190 | #ifdef CONFIG_X86_THRESHOLD | ||
191 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | ||
192 | #endif | ||
193 | #if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) | ||
194 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); | ||
195 | #endif | ||
196 | |||
197 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC) | ||
173 | /* self generated IPI for local APIC timer */ | 198 | /* self generated IPI for local APIC timer */ |
174 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | 199 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); |
175 | 200 | ||
@@ -179,16 +204,59 @@ void __init native_init_IRQ(void) | |||
179 | /* IPI vectors for APIC spurious and error interrupts */ | 204 | /* IPI vectors for APIC spurious and error interrupts */ |
180 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | 205 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); |
181 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | 206 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); |
207 | |||
208 | /* Performance monitoring interrupts: */ | ||
209 | # ifdef CONFIG_PERF_COUNTERS | ||
210 | alloc_intr_gate(LOCAL_PENDING_VECTOR, perf_pending_interrupt); | ||
211 | # endif | ||
212 | |||
182 | #endif | 213 | #endif |
214 | } | ||
183 | 215 | ||
184 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL) | 216 | /** |
185 | /* thermal monitor LVT interrupt */ | 217 | * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors |
186 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 218 | * |
219 | * Description: | ||
220 | * Perform any necessary interrupt initialisation prior to setting up | ||
221 | * the "ordinary" interrupt call gates. For legacy reasons, the ISA | ||
222 | * interrupts should be initialised here if the machine emulates a PC | ||
223 | * in any way. | ||
224 | **/ | ||
225 | static void __init x86_quirk_pre_intr_init(void) | ||
226 | { | ||
227 | #ifdef CONFIG_X86_32 | ||
228 | if (x86_quirks->arch_pre_intr_init) { | ||
229 | if (x86_quirks->arch_pre_intr_init()) | ||
230 | return; | ||
231 | } | ||
187 | #endif | 232 | #endif |
233 | init_ISA_irqs(); | ||
234 | } | ||
235 | |||
236 | void __init native_init_IRQ(void) | ||
237 | { | ||
238 | int i; | ||
239 | |||
240 | /* Execute any quirks before the call gates are initialised: */ | ||
241 | x86_quirk_pre_intr_init(); | ||
242 | |||
243 | apic_intr_init(); | ||
244 | |||
245 | /* | ||
246 | * Cover the whole vector space, no vector can escape | ||
247 | * us. (some of these will be overridden and become | ||
248 | * 'special' SMP interrupts) | ||
249 | */ | ||
250 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { | ||
251 | /* IA32_SYSCALL_VECTOR could be used in trap_init already. */ | ||
252 | if (!test_bit(i, used_vectors)) | ||
253 | set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); | ||
254 | } | ||
188 | 255 | ||
189 | if (!acpi_ioapic) | 256 | if (!acpi_ioapic) |
190 | setup_irq(2, &irq2); | 257 | setup_irq(2, &irq2); |
191 | 258 | ||
259 | #ifdef CONFIG_X86_32 | ||
192 | /* | 260 | /* |
193 | * Call quirks after call gates are initialised (usually add in | 261 | * Call quirks after call gates are initialised (usually add in |
194 | * the architecture specific gates): | 262 | * the architecture specific gates): |
@@ -203,4 +271,5 @@ void __init native_init_IRQ(void) | |||
203 | setup_irq(FPU_IRQ, &fpu_irq); | 271 | setup_irq(FPU_IRQ, &fpu_irq); |
204 | 272 | ||
205 | irq_ctx_init(smp_processor_id()); | 273 | irq_ctx_init(smp_processor_id()); |
274 | #endif | ||
206 | } | 275 | } |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c deleted file mode 100644 index 8cd10537fd46..000000000000 --- a/arch/x86/kernel/irqinit_64.c +++ /dev/null | |||
@@ -1,177 +0,0 @@ | |||
1 | #include <linux/linkage.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/signal.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/ioport.h> | ||
6 | #include <linux/interrupt.h> | ||
7 | #include <linux/timex.h> | ||
8 | #include <linux/slab.h> | ||
9 | #include <linux/random.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/kernel_stat.h> | ||
12 | #include <linux/sysdev.h> | ||
13 | #include <linux/bitops.h> | ||
14 | #include <linux/acpi.h> | ||
15 | #include <linux/io.h> | ||
16 | #include <linux/delay.h> | ||
17 | |||
18 | #include <asm/atomic.h> | ||
19 | #include <asm/system.h> | ||
20 | #include <asm/hw_irq.h> | ||
21 | #include <asm/pgtable.h> | ||
22 | #include <asm/desc.h> | ||
23 | #include <asm/apic.h> | ||
24 | #include <asm/i8259.h> | ||
25 | |||
26 | /* | ||
27 | * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: | ||
28 | * (these are usually mapped to vectors 0x30-0x3f) | ||
29 | */ | ||
30 | |||
31 | /* | ||
32 | * The IO-APIC gives us many more interrupt sources. Most of these | ||
33 | * are unused but an SMP system is supposed to have enough memory ... | ||
34 | * sometimes (mostly wrt. hw bugs) we get corrupted vectors all | ||
35 | * across the spectrum, so we really want to be prepared to get all | ||
36 | * of these. Plus, more powerful systems might have more than 64 | ||
37 | * IO-APIC registers. | ||
38 | * | ||
39 | * (these are usually mapped into the 0x30-0xff vector range) | ||
40 | */ | ||
41 | |||
42 | /* | ||
43 | * IRQ2 is cascade interrupt to second interrupt controller | ||
44 | */ | ||
45 | |||
46 | static struct irqaction irq2 = { | ||
47 | .handler = no_action, | ||
48 | .name = "cascade", | ||
49 | }; | ||
50 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | ||
51 | [0 ... IRQ0_VECTOR - 1] = -1, | ||
52 | [IRQ0_VECTOR] = 0, | ||
53 | [IRQ1_VECTOR] = 1, | ||
54 | [IRQ2_VECTOR] = 2, | ||
55 | [IRQ3_VECTOR] = 3, | ||
56 | [IRQ4_VECTOR] = 4, | ||
57 | [IRQ5_VECTOR] = 5, | ||
58 | [IRQ6_VECTOR] = 6, | ||
59 | [IRQ7_VECTOR] = 7, | ||
60 | [IRQ8_VECTOR] = 8, | ||
61 | [IRQ9_VECTOR] = 9, | ||
62 | [IRQ10_VECTOR] = 10, | ||
63 | [IRQ11_VECTOR] = 11, | ||
64 | [IRQ12_VECTOR] = 12, | ||
65 | [IRQ13_VECTOR] = 13, | ||
66 | [IRQ14_VECTOR] = 14, | ||
67 | [IRQ15_VECTOR] = 15, | ||
68 | [IRQ15_VECTOR + 1 ... NR_VECTORS - 1] = -1 | ||
69 | }; | ||
70 | |||
71 | int vector_used_by_percpu_irq(unsigned int vector) | ||
72 | { | ||
73 | int cpu; | ||
74 | |||
75 | for_each_online_cpu(cpu) { | ||
76 | if (per_cpu(vector_irq, cpu)[vector] != -1) | ||
77 | return 1; | ||
78 | } | ||
79 | |||
80 | return 0; | ||
81 | } | ||
82 | |||
83 | static void __init init_ISA_irqs(void) | ||
84 | { | ||
85 | int i; | ||
86 | |||
87 | init_bsp_APIC(); | ||
88 | init_8259A(0); | ||
89 | |||
90 | for (i = 0; i < NR_IRQS_LEGACY; i++) { | ||
91 | struct irq_desc *desc = irq_to_desc(i); | ||
92 | |||
93 | desc->status = IRQ_DISABLED; | ||
94 | desc->action = NULL; | ||
95 | desc->depth = 1; | ||
96 | |||
97 | /* | ||
98 | * 16 old-style INTA-cycle interrupts: | ||
99 | */ | ||
100 | set_irq_chip_and_handler_name(i, &i8259A_chip, | ||
101 | handle_level_irq, "XT"); | ||
102 | } | ||
103 | } | ||
104 | |||
105 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | ||
106 | |||
107 | static void __init smp_intr_init(void) | ||
108 | { | ||
109 | #ifdef CONFIG_SMP | ||
110 | /* | ||
111 | * The reschedule interrupt is a CPU-to-CPU reschedule-helper | ||
112 | * IPI, driven by wakeup. | ||
113 | */ | ||
114 | alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); | ||
115 | |||
116 | /* IPIs for invalidation */ | ||
117 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0); | ||
118 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1); | ||
119 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2); | ||
120 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3); | ||
121 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4); | ||
122 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5); | ||
123 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6); | ||
124 | alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7); | ||
125 | |||
126 | /* IPI for generic function call */ | ||
127 | alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | ||
128 | |||
129 | /* IPI for generic single function call */ | ||
130 | alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, | ||
131 | call_function_single_interrupt); | ||
132 | |||
133 | /* Low priority IPI to cleanup after moving an irq */ | ||
134 | set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); | ||
135 | set_bit(IRQ_MOVE_CLEANUP_VECTOR, used_vectors); | ||
136 | #endif | ||
137 | } | ||
138 | |||
139 | static void __init apic_intr_init(void) | ||
140 | { | ||
141 | smp_intr_init(); | ||
142 | |||
143 | alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | ||
144 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | ||
145 | |||
146 | /* self generated IPI for local APIC timer */ | ||
147 | alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt); | ||
148 | |||
149 | /* generic IPI for platform specific use */ | ||
150 | alloc_intr_gate(GENERIC_INTERRUPT_VECTOR, generic_interrupt); | ||
151 | |||
152 | /* IPI vectors for APIC spurious and error interrupts */ | ||
153 | alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); | ||
154 | alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt); | ||
155 | } | ||
156 | |||
157 | void __init native_init_IRQ(void) | ||
158 | { | ||
159 | int i; | ||
160 | |||
161 | init_ISA_irqs(); | ||
162 | /* | ||
163 | * Cover the whole vector space, no vector can escape | ||
164 | * us. (some of these will be overridden and become | ||
165 | * 'special' SMP interrupts) | ||
166 | */ | ||
167 | for (i = 0; i < (NR_VECTORS - FIRST_EXTERNAL_VECTOR); i++) { | ||
168 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
169 | if (vector != IA32_SYSCALL_VECTOR) | ||
170 | set_intr_gate(vector, interrupt[i]); | ||
171 | } | ||
172 | |||
173 | apic_intr_init(); | ||
174 | |||
175 | if (!acpi_ioapic) | ||
176 | setup_irq(2, &irq2); | ||
177 | } | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index b1f4dffb919e..8d82a77a3f3b 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -142,7 +142,7 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
142 | gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); | 142 | gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); |
143 | gdb_regs32[GDB_CS] = __KERNEL_CS; | 143 | gdb_regs32[GDB_CS] = __KERNEL_CS; |
144 | gdb_regs32[GDB_SS] = __KERNEL_DS; | 144 | gdb_regs32[GDB_SS] = __KERNEL_DS; |
145 | gdb_regs[GDB_PC] = p->thread.ip; | 145 | gdb_regs[GDB_PC] = 0; |
146 | gdb_regs[GDB_R8] = 0; | 146 | gdb_regs[GDB_R8] = 0; |
147 | gdb_regs[GDB_R9] = 0; | 147 | gdb_regs[GDB_R9] = 0; |
148 | gdb_regs[GDB_R10] = 0; | 148 | gdb_regs[GDB_R10] = 0; |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 33019ddb56b4..a78ecad0c900 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/mm.h> | 27 | #include <linux/mm.h> |
28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
29 | #include <linux/hardirq.h> | 29 | #include <linux/hardirq.h> |
30 | #include <asm/timer.h> | ||
30 | 31 | ||
31 | #define MMU_QUEUE_SIZE 1024 | 32 | #define MMU_QUEUE_SIZE 1024 |
32 | 33 | ||
@@ -195,7 +196,7 @@ static void kvm_leave_lazy_mmu(void) | |||
195 | struct kvm_para_state *state = kvm_para_state(); | 196 | struct kvm_para_state *state = kvm_para_state(); |
196 | 197 | ||
197 | mmu_queue_flush(state); | 198 | mmu_queue_flush(state); |
198 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | 199 | paravirt_leave_lazy_mmu(); |
199 | state->mode = paravirt_get_lazy_mode(); | 200 | state->mode = paravirt_get_lazy_mode(); |
200 | } | 201 | } |
201 | 202 | ||
@@ -230,6 +231,9 @@ static void paravirt_ops_setup(void) | |||
230 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; | 231 | pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; |
231 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; | 232 | pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; |
232 | } | 233 | } |
234 | #ifdef CONFIG_X86_IO_APIC | ||
235 | no_timer_check = 1; | ||
236 | #endif | ||
233 | } | 237 | } |
234 | 238 | ||
235 | void __init kvm_guest_init(void) | 239 | void __init kvm_guest_init(void) |
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/microcode_amd.c index 453b5795a5c6..366baa179913 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/microcode_amd.c | |||
@@ -13,25 +13,13 @@ | |||
13 | * Licensed under the terms of the GNU General Public | 13 | * Licensed under the terms of the GNU General Public |
14 | * License version 2. See file COPYING for details. | 14 | * License version 2. See file COPYING for details. |
15 | */ | 15 | */ |
16 | #include <linux/platform_device.h> | ||
17 | #include <linux/capability.h> | ||
18 | #include <linux/miscdevice.h> | ||
19 | #include <linux/firmware.h> | 16 | #include <linux/firmware.h> |
20 | #include <linux/spinlock.h> | ||
21 | #include <linux/cpumask.h> | ||
22 | #include <linux/pci_ids.h> | 17 | #include <linux/pci_ids.h> |
23 | #include <linux/uaccess.h> | 18 | #include <linux/uaccess.h> |
24 | #include <linux/vmalloc.h> | 19 | #include <linux/vmalloc.h> |
25 | #include <linux/kernel.h> | 20 | #include <linux/kernel.h> |
26 | #include <linux/module.h> | 21 | #include <linux/module.h> |
27 | #include <linux/mutex.h> | ||
28 | #include <linux/sched.h> | ||
29 | #include <linux/init.h> | ||
30 | #include <linux/slab.h> | ||
31 | #include <linux/cpu.h> | ||
32 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
33 | #include <linux/fs.h> | ||
34 | #include <linux/mm.h> | ||
35 | 23 | ||
36 | #include <asm/microcode.h> | 24 | #include <asm/microcode.h> |
37 | #include <asm/processor.h> | 25 | #include <asm/processor.h> |
@@ -79,9 +67,6 @@ struct microcode_amd { | |||
79 | #define UCODE_CONTAINER_SECTION_HDR 8 | 67 | #define UCODE_CONTAINER_SECTION_HDR 8 |
80 | #define UCODE_CONTAINER_HEADER_SIZE 12 | 68 | #define UCODE_CONTAINER_HEADER_SIZE 12 |
81 | 69 | ||
82 | /* serialize access to the physical write */ | ||
83 | static DEFINE_SPINLOCK(microcode_update_lock); | ||
84 | |||
85 | static struct equiv_cpu_entry *equiv_cpu_table; | 70 | static struct equiv_cpu_entry *equiv_cpu_table; |
86 | 71 | ||
87 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) | 72 | static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) |
@@ -144,9 +129,8 @@ static int get_matching_microcode(int cpu, void *mc, int rev) | |||
144 | return 1; | 129 | return 1; |
145 | } | 130 | } |
146 | 131 | ||
147 | static void apply_microcode_amd(int cpu) | 132 | static int apply_microcode_amd(int cpu) |
148 | { | 133 | { |
149 | unsigned long flags; | ||
150 | u32 rev, dummy; | 134 | u32 rev, dummy; |
151 | int cpu_num = raw_smp_processor_id(); | 135 | int cpu_num = raw_smp_processor_id(); |
152 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; | 136 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num; |
@@ -156,25 +140,25 @@ static void apply_microcode_amd(int cpu) | |||
156 | BUG_ON(cpu_num != cpu); | 140 | BUG_ON(cpu_num != cpu); |
157 | 141 | ||
158 | if (mc_amd == NULL) | 142 | if (mc_amd == NULL) |
159 | return; | 143 | return 0; |
160 | 144 | ||
161 | spin_lock_irqsave(µcode_update_lock, flags); | ||
162 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | 145 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); |
163 | /* get patch id after patching */ | 146 | /* get patch id after patching */ |
164 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); | 147 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
165 | spin_unlock_irqrestore(µcode_update_lock, flags); | ||
166 | 148 | ||
167 | /* check current patch id and patch's id for match */ | 149 | /* check current patch id and patch's id for match */ |
168 | if (rev != mc_amd->hdr.patch_id) { | 150 | if (rev != mc_amd->hdr.patch_id) { |
169 | printk(KERN_ERR "microcode: CPU%d: update failed " | 151 | printk(KERN_ERR "microcode: CPU%d: update failed " |
170 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); | 152 | "(for patch_level=0x%x)\n", cpu, mc_amd->hdr.patch_id); |
171 | return; | 153 | return -1; |
172 | } | 154 | } |
173 | 155 | ||
174 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", | 156 | printk(KERN_INFO "microcode: CPU%d: updated (new patch_level=0x%x)\n", |
175 | cpu, rev); | 157 | cpu, rev); |
176 | 158 | ||
177 | uci->cpu_sig.rev = rev; | 159 | uci->cpu_sig.rev = rev; |
160 | |||
161 | return 0; | ||
178 | } | 162 | } |
179 | 163 | ||
180 | static int get_ucode_data(void *to, const u8 *from, size_t n) | 164 | static int get_ucode_data(void *to, const u8 *from, size_t n) |
@@ -257,13 +241,12 @@ static int install_equiv_cpu_table(const u8 *buf) | |||
257 | 241 | ||
258 | static void free_equiv_cpu_table(void) | 242 | static void free_equiv_cpu_table(void) |
259 | { | 243 | { |
260 | if (equiv_cpu_table) { | 244 | vfree(equiv_cpu_table); |
261 | vfree(equiv_cpu_table); | 245 | equiv_cpu_table = NULL; |
262 | equiv_cpu_table = NULL; | ||
263 | } | ||
264 | } | 246 | } |
265 | 247 | ||
266 | static int generic_load_microcode(int cpu, const u8 *data, size_t size) | 248 | static enum ucode_state |
249 | generic_load_microcode(int cpu, const u8 *data, size_t size) | ||
267 | { | 250 | { |
268 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 251 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
269 | const u8 *ucode_ptr = data; | 252 | const u8 *ucode_ptr = data; |
@@ -272,12 +255,13 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
272 | int new_rev = uci->cpu_sig.rev; | 255 | int new_rev = uci->cpu_sig.rev; |
273 | unsigned int leftover; | 256 | unsigned int leftover; |
274 | unsigned long offset; | 257 | unsigned long offset; |
258 | enum ucode_state state = UCODE_OK; | ||
275 | 259 | ||
276 | offset = install_equiv_cpu_table(ucode_ptr); | 260 | offset = install_equiv_cpu_table(ucode_ptr); |
277 | if (!offset) { | 261 | if (!offset) { |
278 | printk(KERN_ERR "microcode: failed to create " | 262 | printk(KERN_ERR "microcode: failed to create " |
279 | "equivalent cpu table\n"); | 263 | "equivalent cpu table\n"); |
280 | return -EINVAL; | 264 | return UCODE_ERROR; |
281 | } | 265 | } |
282 | 266 | ||
283 | ucode_ptr += offset; | 267 | ucode_ptr += offset; |
@@ -293,8 +277,7 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
293 | 277 | ||
294 | mc_header = (struct microcode_header_amd *)mc; | 278 | mc_header = (struct microcode_header_amd *)mc; |
295 | if (get_matching_microcode(cpu, mc, new_rev)) { | 279 | if (get_matching_microcode(cpu, mc, new_rev)) { |
296 | if (new_mc) | 280 | vfree(new_mc); |
297 | vfree(new_mc); | ||
298 | new_rev = mc_header->patch_id; | 281 | new_rev = mc_header->patch_id; |
299 | new_mc = mc; | 282 | new_mc = mc; |
300 | } else | 283 | } else |
@@ -306,34 +289,32 @@ static int generic_load_microcode(int cpu, const u8 *data, size_t size) | |||
306 | 289 | ||
307 | if (new_mc) { | 290 | if (new_mc) { |
308 | if (!leftover) { | 291 | if (!leftover) { |
309 | if (uci->mc) | 292 | vfree(uci->mc); |
310 | vfree(uci->mc); | ||
311 | uci->mc = new_mc; | 293 | uci->mc = new_mc; |
312 | pr_debug("microcode: CPU%d found a matching microcode " | 294 | pr_debug("microcode: CPU%d found a matching microcode " |
313 | "update with version 0x%x (current=0x%x)\n", | 295 | "update with version 0x%x (current=0x%x)\n", |
314 | cpu, new_rev, uci->cpu_sig.rev); | 296 | cpu, new_rev, uci->cpu_sig.rev); |
315 | } else | 297 | } else { |
316 | vfree(new_mc); | 298 | vfree(new_mc); |
317 | } | 299 | state = UCODE_ERROR; |
300 | } | ||
301 | } else | ||
302 | state = UCODE_NFOUND; | ||
318 | 303 | ||
319 | free_equiv_cpu_table(); | 304 | free_equiv_cpu_table(); |
320 | 305 | ||
321 | return (int)leftover; | 306 | return state; |
322 | } | 307 | } |
323 | 308 | ||
324 | static int request_microcode_fw(int cpu, struct device *device) | 309 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) |
325 | { | 310 | { |
326 | const char *fw_name = "amd-ucode/microcode_amd.bin"; | 311 | const char *fw_name = "amd-ucode/microcode_amd.bin"; |
327 | const struct firmware *firmware; | 312 | const struct firmware *firmware; |
328 | int ret; | 313 | enum ucode_state ret; |
329 | |||
330 | /* We should bind the task to the CPU */ | ||
331 | BUG_ON(cpu != raw_smp_processor_id()); | ||
332 | 314 | ||
333 | ret = request_firmware(&firmware, fw_name, device); | 315 | if (request_firmware(&firmware, fw_name, device)) { |
334 | if (ret) { | ||
335 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); | 316 | printk(KERN_ERR "microcode: failed to load file %s\n", fw_name); |
336 | return ret; | 317 | return UCODE_NFOUND; |
337 | } | 318 | } |
338 | 319 | ||
339 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); | 320 | ret = generic_load_microcode(cpu, firmware->data, firmware->size); |
@@ -343,11 +324,12 @@ static int request_microcode_fw(int cpu, struct device *device) | |||
343 | return ret; | 324 | return ret; |
344 | } | 325 | } |
345 | 326 | ||
346 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) | 327 | static enum ucode_state |
328 | request_microcode_user(int cpu, const void __user *buf, size_t size) | ||
347 | { | 329 | { |
348 | printk(KERN_INFO "microcode: AMD microcode update via " | 330 | printk(KERN_INFO "microcode: AMD microcode update via " |
349 | "/dev/cpu/microcode not supported\n"); | 331 | "/dev/cpu/microcode not supported\n"); |
350 | return -1; | 332 | return UCODE_ERROR; |
351 | } | 333 | } |
352 | 334 | ||
353 | static void microcode_fini_cpu_amd(int cpu) | 335 | static void microcode_fini_cpu_amd(int cpu) |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/microcode_core.c index 98c470c069d1..9371448290ac 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/microcode_core.c | |||
@@ -71,27 +71,18 @@ | |||
71 | * Thanks to Stuart Swales for pointing out this bug. | 71 | * Thanks to Stuart Swales for pointing out this bug. |
72 | */ | 72 | */ |
73 | #include <linux/platform_device.h> | 73 | #include <linux/platform_device.h> |
74 | #include <linux/capability.h> | ||
75 | #include <linux/miscdevice.h> | 74 | #include <linux/miscdevice.h> |
76 | #include <linux/firmware.h> | 75 | #include <linux/capability.h> |
77 | #include <linux/smp_lock.h> | 76 | #include <linux/smp_lock.h> |
78 | #include <linux/spinlock.h> | ||
79 | #include <linux/cpumask.h> | ||
80 | #include <linux/uaccess.h> | ||
81 | #include <linux/vmalloc.h> | ||
82 | #include <linux/kernel.h> | 77 | #include <linux/kernel.h> |
83 | #include <linux/module.h> | 78 | #include <linux/module.h> |
84 | #include <linux/mutex.h> | 79 | #include <linux/mutex.h> |
85 | #include <linux/sched.h> | ||
86 | #include <linux/init.h> | ||
87 | #include <linux/slab.h> | ||
88 | #include <linux/cpu.h> | 80 | #include <linux/cpu.h> |
89 | #include <linux/fs.h> | 81 | #include <linux/fs.h> |
90 | #include <linux/mm.h> | 82 | #include <linux/mm.h> |
91 | 83 | ||
92 | #include <asm/microcode.h> | 84 | #include <asm/microcode.h> |
93 | #include <asm/processor.h> | 85 | #include <asm/processor.h> |
94 | #include <asm/msr.h> | ||
95 | 86 | ||
96 | MODULE_DESCRIPTION("Microcode Update Driver"); | 87 | MODULE_DESCRIPTION("Microcode Update Driver"); |
97 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); | 88 | MODULE_AUTHOR("Tigran Aivazian <tigran@aivazian.fsnet.co.uk>"); |
@@ -101,36 +92,110 @@ MODULE_LICENSE("GPL"); | |||
101 | 92 | ||
102 | static struct microcode_ops *microcode_ops; | 93 | static struct microcode_ops *microcode_ops; |
103 | 94 | ||
104 | /* no concurrent ->write()s are allowed on /dev/cpu/microcode */ | 95 | /* |
96 | * Synchronization. | ||
97 | * | ||
98 | * All non cpu-hotplug-callback call sites use: | ||
99 | * | ||
100 | * - microcode_mutex to synchronize with each other; | ||
101 | * - get/put_online_cpus() to synchronize with | ||
102 | * the cpu-hotplug-callback call sites. | ||
103 | * | ||
104 | * We guarantee that only a single cpu is being | ||
105 | * updated at any particular moment of time. | ||
106 | */ | ||
105 | static DEFINE_MUTEX(microcode_mutex); | 107 | static DEFINE_MUTEX(microcode_mutex); |
106 | 108 | ||
107 | struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; | 109 | struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; |
108 | EXPORT_SYMBOL_GPL(ucode_cpu_info); | 110 | EXPORT_SYMBOL_GPL(ucode_cpu_info); |
109 | 111 | ||
112 | /* | ||
113 | * Operations that are run on a target cpu: | ||
114 | */ | ||
115 | |||
116 | struct cpu_info_ctx { | ||
117 | struct cpu_signature *cpu_sig; | ||
118 | int err; | ||
119 | }; | ||
120 | |||
121 | static void collect_cpu_info_local(void *arg) | ||
122 | { | ||
123 | struct cpu_info_ctx *ctx = arg; | ||
124 | |||
125 | ctx->err = microcode_ops->collect_cpu_info(smp_processor_id(), | ||
126 | ctx->cpu_sig); | ||
127 | } | ||
128 | |||
129 | static int collect_cpu_info_on_target(int cpu, struct cpu_signature *cpu_sig) | ||
130 | { | ||
131 | struct cpu_info_ctx ctx = { .cpu_sig = cpu_sig, .err = 0 }; | ||
132 | int ret; | ||
133 | |||
134 | ret = smp_call_function_single(cpu, collect_cpu_info_local, &ctx, 1); | ||
135 | if (!ret) | ||
136 | ret = ctx.err; | ||
137 | |||
138 | return ret; | ||
139 | } | ||
140 | |||
141 | static int collect_cpu_info(int cpu) | ||
142 | { | ||
143 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
144 | int ret; | ||
145 | |||
146 | memset(uci, 0, sizeof(*uci)); | ||
147 | |||
148 | ret = collect_cpu_info_on_target(cpu, &uci->cpu_sig); | ||
149 | if (!ret) | ||
150 | uci->valid = 1; | ||
151 | |||
152 | return ret; | ||
153 | } | ||
154 | |||
155 | struct apply_microcode_ctx { | ||
156 | int err; | ||
157 | }; | ||
158 | |||
159 | static void apply_microcode_local(void *arg) | ||
160 | { | ||
161 | struct apply_microcode_ctx *ctx = arg; | ||
162 | |||
163 | ctx->err = microcode_ops->apply_microcode(smp_processor_id()); | ||
164 | } | ||
165 | |||
166 | static int apply_microcode_on_target(int cpu) | ||
167 | { | ||
168 | struct apply_microcode_ctx ctx = { .err = 0 }; | ||
169 | int ret; | ||
170 | |||
171 | ret = smp_call_function_single(cpu, apply_microcode_local, &ctx, 1); | ||
172 | if (!ret) | ||
173 | ret = ctx.err; | ||
174 | |||
175 | return ret; | ||
176 | } | ||
177 | |||
110 | #ifdef CONFIG_MICROCODE_OLD_INTERFACE | 178 | #ifdef CONFIG_MICROCODE_OLD_INTERFACE |
111 | static int do_microcode_update(const void __user *buf, size_t size) | 179 | static int do_microcode_update(const void __user *buf, size_t size) |
112 | { | 180 | { |
113 | cpumask_t old; | ||
114 | int error = 0; | 181 | int error = 0; |
115 | int cpu; | 182 | int cpu; |
116 | 183 | ||
117 | old = current->cpus_allowed; | ||
118 | |||
119 | for_each_online_cpu(cpu) { | 184 | for_each_online_cpu(cpu) { |
120 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 185 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
186 | enum ucode_state ustate; | ||
121 | 187 | ||
122 | if (!uci->valid) | 188 | if (!uci->valid) |
123 | continue; | 189 | continue; |
124 | 190 | ||
125 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 191 | ustate = microcode_ops->request_microcode_user(cpu, buf, size); |
126 | error = microcode_ops->request_microcode_user(cpu, buf, size); | 192 | if (ustate == UCODE_ERROR) { |
127 | if (error < 0) | 193 | error = -1; |
128 | goto out; | 194 | break; |
129 | if (!error) | 195 | } else if (ustate == UCODE_OK) |
130 | microcode_ops->apply_microcode(cpu); | 196 | apply_microcode_on_target(cpu); |
131 | } | 197 | } |
132 | out: | 198 | |
133 | set_cpus_allowed_ptr(current, &old); | ||
134 | return error; | 199 | return error; |
135 | } | 200 | } |
136 | 201 | ||
@@ -143,19 +208,17 @@ static int microcode_open(struct inode *unused1, struct file *unused2) | |||
143 | static ssize_t microcode_write(struct file *file, const char __user *buf, | 208 | static ssize_t microcode_write(struct file *file, const char __user *buf, |
144 | size_t len, loff_t *ppos) | 209 | size_t len, loff_t *ppos) |
145 | { | 210 | { |
146 | ssize_t ret; | 211 | ssize_t ret = -EINVAL; |
147 | 212 | ||
148 | if ((len >> PAGE_SHIFT) > num_physpages) { | 213 | if ((len >> PAGE_SHIFT) > num_physpages) { |
149 | printk(KERN_ERR "microcode: too much data (max %ld pages)\n", | 214 | pr_err("microcode: too much data (max %ld pages)\n", num_physpages); |
150 | num_physpages); | 215 | return ret; |
151 | return -EINVAL; | ||
152 | } | 216 | } |
153 | 217 | ||
154 | get_online_cpus(); | 218 | get_online_cpus(); |
155 | mutex_lock(µcode_mutex); | 219 | mutex_lock(µcode_mutex); |
156 | 220 | ||
157 | ret = do_microcode_update(buf, len); | 221 | if (do_microcode_update(buf, len) == 0) |
158 | if (!ret) | ||
159 | ret = (ssize_t)len; | 222 | ret = (ssize_t)len; |
160 | 223 | ||
161 | mutex_unlock(µcode_mutex); | 224 | mutex_unlock(µcode_mutex); |
@@ -165,15 +228,16 @@ static ssize_t microcode_write(struct file *file, const char __user *buf, | |||
165 | } | 228 | } |
166 | 229 | ||
167 | static const struct file_operations microcode_fops = { | 230 | static const struct file_operations microcode_fops = { |
168 | .owner = THIS_MODULE, | 231 | .owner = THIS_MODULE, |
169 | .write = microcode_write, | 232 | .write = microcode_write, |
170 | .open = microcode_open, | 233 | .open = microcode_open, |
171 | }; | 234 | }; |
172 | 235 | ||
173 | static struct miscdevice microcode_dev = { | 236 | static struct miscdevice microcode_dev = { |
174 | .minor = MICROCODE_MINOR, | 237 | .minor = MICROCODE_MINOR, |
175 | .name = "microcode", | 238 | .name = "microcode", |
176 | .fops = µcode_fops, | 239 | .devnode = "cpu/microcode", |
240 | .fops = µcode_fops, | ||
177 | }; | 241 | }; |
178 | 242 | ||
179 | static int __init microcode_dev_init(void) | 243 | static int __init microcode_dev_init(void) |
@@ -182,9 +246,7 @@ static int __init microcode_dev_init(void) | |||
182 | 246 | ||
183 | error = misc_register(µcode_dev); | 247 | error = misc_register(µcode_dev); |
184 | if (error) { | 248 | if (error) { |
185 | printk(KERN_ERR | 249 | pr_err("microcode: can't misc_register on minor=%d\n", MICROCODE_MINOR); |
186 | "microcode: can't misc_register on minor=%d\n", | ||
187 | MICROCODE_MINOR); | ||
188 | return error; | 250 | return error; |
189 | } | 251 | } |
190 | 252 | ||
@@ -205,42 +267,51 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR); | |||
205 | /* fake device for request_firmware */ | 267 | /* fake device for request_firmware */ |
206 | static struct platform_device *microcode_pdev; | 268 | static struct platform_device *microcode_pdev; |
207 | 269 | ||
208 | static long reload_for_cpu(void *unused) | 270 | static int reload_for_cpu(int cpu) |
209 | { | 271 | { |
210 | struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); | 272 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
211 | int err = 0; | 273 | int err = 0; |
212 | 274 | ||
213 | mutex_lock(µcode_mutex); | 275 | mutex_lock(µcode_mutex); |
214 | if (uci->valid) { | 276 | if (uci->valid) { |
215 | err = microcode_ops->request_microcode_fw(smp_processor_id(), | 277 | enum ucode_state ustate; |
216 | µcode_pdev->dev); | 278 | |
217 | if (!err) | 279 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); |
218 | microcode_ops->apply_microcode(smp_processor_id()); | 280 | if (ustate == UCODE_OK) |
281 | apply_microcode_on_target(cpu); | ||
282 | else | ||
283 | if (ustate == UCODE_ERROR) | ||
284 | err = -EINVAL; | ||
219 | } | 285 | } |
220 | mutex_unlock(µcode_mutex); | 286 | mutex_unlock(µcode_mutex); |
287 | |||
221 | return err; | 288 | return err; |
222 | } | 289 | } |
223 | 290 | ||
224 | static ssize_t reload_store(struct sys_device *dev, | 291 | static ssize_t reload_store(struct sys_device *dev, |
225 | struct sysdev_attribute *attr, | 292 | struct sysdev_attribute *attr, |
226 | const char *buf, size_t sz) | 293 | const char *buf, size_t size) |
227 | { | 294 | { |
228 | char *end; | 295 | unsigned long val; |
229 | unsigned long val = simple_strtoul(buf, &end, 0); | ||
230 | int err = 0; | ||
231 | int cpu = dev->id; | 296 | int cpu = dev->id; |
297 | int ret = 0; | ||
298 | char *end; | ||
232 | 299 | ||
300 | val = simple_strtoul(buf, &end, 0); | ||
233 | if (end == buf) | 301 | if (end == buf) |
234 | return -EINVAL; | 302 | return -EINVAL; |
303 | |||
235 | if (val == 1) { | 304 | if (val == 1) { |
236 | get_online_cpus(); | 305 | get_online_cpus(); |
237 | if (cpu_online(cpu)) | 306 | if (cpu_online(cpu)) |
238 | err = work_on_cpu(cpu, reload_for_cpu, NULL); | 307 | ret = reload_for_cpu(cpu); |
239 | put_online_cpus(); | 308 | put_online_cpus(); |
240 | } | 309 | } |
241 | if (err) | 310 | |
242 | return err; | 311 | if (!ret) |
243 | return sz; | 312 | ret = size; |
313 | |||
314 | return ret; | ||
244 | } | 315 | } |
245 | 316 | ||
246 | static ssize_t version_show(struct sys_device *dev, | 317 | static ssize_t version_show(struct sys_device *dev, |
@@ -271,11 +342,11 @@ static struct attribute *mc_default_attrs[] = { | |||
271 | }; | 342 | }; |
272 | 343 | ||
273 | static struct attribute_group mc_attr_group = { | 344 | static struct attribute_group mc_attr_group = { |
274 | .attrs = mc_default_attrs, | 345 | .attrs = mc_default_attrs, |
275 | .name = "microcode", | 346 | .name = "microcode", |
276 | }; | 347 | }; |
277 | 348 | ||
278 | static void __microcode_fini_cpu(int cpu) | 349 | static void microcode_fini_cpu(int cpu) |
279 | { | 350 | { |
280 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 351 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
281 | 352 | ||
@@ -283,103 +354,68 @@ static void __microcode_fini_cpu(int cpu) | |||
283 | uci->valid = 0; | 354 | uci->valid = 0; |
284 | } | 355 | } |
285 | 356 | ||
286 | static void microcode_fini_cpu(int cpu) | 357 | static enum ucode_state microcode_resume_cpu(int cpu) |
287 | { | ||
288 | mutex_lock(µcode_mutex); | ||
289 | __microcode_fini_cpu(cpu); | ||
290 | mutex_unlock(µcode_mutex); | ||
291 | } | ||
292 | |||
293 | static void collect_cpu_info(int cpu) | ||
294 | { | 358 | { |
295 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 359 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
296 | 360 | ||
297 | memset(uci, 0, sizeof(*uci)); | 361 | if (!uci->mc) |
298 | if (!microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig)) | 362 | return UCODE_NFOUND; |
299 | uci->valid = 1; | 363 | |
364 | pr_debug("microcode: CPU%d updated upon resume\n", cpu); | ||
365 | apply_microcode_on_target(cpu); | ||
366 | |||
367 | return UCODE_OK; | ||
300 | } | 368 | } |
301 | 369 | ||
302 | static int microcode_resume_cpu(int cpu) | 370 | static enum ucode_state microcode_init_cpu(int cpu) |
303 | { | 371 | { |
304 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 372 | enum ucode_state ustate; |
305 | struct cpu_signature nsig; | ||
306 | 373 | ||
307 | pr_debug("microcode: CPU%d resumed\n", cpu); | 374 | if (collect_cpu_info(cpu)) |
375 | return UCODE_ERROR; | ||
308 | 376 | ||
309 | if (!uci->mc) | 377 | /* --dimm. Trigger a delayed update? */ |
310 | return 1; | 378 | if (system_state != SYSTEM_RUNNING) |
379 | return UCODE_NFOUND; | ||
311 | 380 | ||
312 | /* | 381 | ustate = microcode_ops->request_microcode_fw(cpu, µcode_pdev->dev); |
313 | * Let's verify that the 'cached' ucode does belong | ||
314 | * to this cpu (a bit of paranoia): | ||
315 | */ | ||
316 | if (microcode_ops->collect_cpu_info(cpu, &nsig)) { | ||
317 | __microcode_fini_cpu(cpu); | ||
318 | printk(KERN_ERR "failed to collect_cpu_info for resuming cpu #%d\n", | ||
319 | cpu); | ||
320 | return -1; | ||
321 | } | ||
322 | 382 | ||
323 | if ((nsig.sig != uci->cpu_sig.sig) || (nsig.pf != uci->cpu_sig.pf)) { | 383 | if (ustate == UCODE_OK) { |
324 | __microcode_fini_cpu(cpu); | 384 | pr_debug("microcode: CPU%d updated upon init\n", cpu); |
325 | printk(KERN_ERR "cached ucode doesn't match the resuming cpu #%d\n", | 385 | apply_microcode_on_target(cpu); |
326 | cpu); | ||
327 | /* Should we look for a new ucode here? */ | ||
328 | return 1; | ||
329 | } | 386 | } |
330 | 387 | ||
331 | return 0; | 388 | return ustate; |
332 | } | 389 | } |
333 | 390 | ||
334 | static long microcode_update_cpu(void *unused) | 391 | static enum ucode_state microcode_update_cpu(int cpu) |
335 | { | 392 | { |
336 | struct ucode_cpu_info *uci = ucode_cpu_info + smp_processor_id(); | 393 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
337 | int err = 0; | 394 | enum ucode_state ustate; |
338 | 395 | ||
339 | /* | 396 | if (uci->valid) |
340 | * Check if the system resume is in progress (uci->valid != NULL), | 397 | ustate = microcode_resume_cpu(cpu); |
341 | * otherwise just request a firmware: | 398 | else |
342 | */ | 399 | ustate = microcode_init_cpu(cpu); |
343 | if (uci->valid) { | ||
344 | err = microcode_resume_cpu(smp_processor_id()); | ||
345 | } else { | ||
346 | collect_cpu_info(smp_processor_id()); | ||
347 | if (uci->valid && system_state == SYSTEM_RUNNING) | ||
348 | err = microcode_ops->request_microcode_fw( | ||
349 | smp_processor_id(), | ||
350 | µcode_pdev->dev); | ||
351 | } | ||
352 | if (!err) | ||
353 | microcode_ops->apply_microcode(smp_processor_id()); | ||
354 | return err; | ||
355 | } | ||
356 | 400 | ||
357 | static int microcode_init_cpu(int cpu) | 401 | return ustate; |
358 | { | ||
359 | int err; | ||
360 | mutex_lock(µcode_mutex); | ||
361 | err = work_on_cpu(cpu, microcode_update_cpu, NULL); | ||
362 | mutex_unlock(µcode_mutex); | ||
363 | |||
364 | return err; | ||
365 | } | 402 | } |
366 | 403 | ||
367 | static int mc_sysdev_add(struct sys_device *sys_dev) | 404 | static int mc_sysdev_add(struct sys_device *sys_dev) |
368 | { | 405 | { |
369 | int err, cpu = sys_dev->id; | 406 | int err, cpu = sys_dev->id; |
370 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
371 | 407 | ||
372 | if (!cpu_online(cpu)) | 408 | if (!cpu_online(cpu)) |
373 | return 0; | 409 | return 0; |
374 | 410 | ||
375 | pr_debug("microcode: CPU%d added\n", cpu); | 411 | pr_debug("microcode: CPU%d added\n", cpu); |
376 | memset(uci, 0, sizeof(*uci)); | ||
377 | 412 | ||
378 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); | 413 | err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group); |
379 | if (err) | 414 | if (err) |
380 | return err; | 415 | return err; |
381 | 416 | ||
382 | err = microcode_init_cpu(cpu); | 417 | if (microcode_init_cpu(cpu) == UCODE_ERROR) |
418 | err = -EINVAL; | ||
383 | 419 | ||
384 | return err; | 420 | return err; |
385 | } | 421 | } |
@@ -400,19 +436,30 @@ static int mc_sysdev_remove(struct sys_device *sys_dev) | |||
400 | static int mc_sysdev_resume(struct sys_device *dev) | 436 | static int mc_sysdev_resume(struct sys_device *dev) |
401 | { | 437 | { |
402 | int cpu = dev->id; | 438 | int cpu = dev->id; |
439 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | ||
403 | 440 | ||
404 | if (!cpu_online(cpu)) | 441 | if (!cpu_online(cpu)) |
405 | return 0; | 442 | return 0; |
406 | 443 | ||
407 | /* only CPU 0 will apply ucode here */ | 444 | /* |
408 | microcode_update_cpu(NULL); | 445 | * All non-bootup cpus are still disabled, |
446 | * so only CPU 0 will apply ucode here. | ||
447 | * | ||
448 | * Moreover, there can be no concurrent | ||
449 | * updates from any other places at this point. | ||
450 | */ | ||
451 | WARN_ON(cpu != 0); | ||
452 | |||
453 | if (uci->valid && uci->mc) | ||
454 | microcode_ops->apply_microcode(cpu); | ||
455 | |||
409 | return 0; | 456 | return 0; |
410 | } | 457 | } |
411 | 458 | ||
412 | static struct sysdev_driver mc_sysdev_driver = { | 459 | static struct sysdev_driver mc_sysdev_driver = { |
413 | .add = mc_sysdev_add, | 460 | .add = mc_sysdev_add, |
414 | .remove = mc_sysdev_remove, | 461 | .remove = mc_sysdev_remove, |
415 | .resume = mc_sysdev_resume, | 462 | .resume = mc_sysdev_resume, |
416 | }; | 463 | }; |
417 | 464 | ||
418 | static __cpuinit int | 465 | static __cpuinit int |
@@ -425,15 +472,12 @@ mc_cpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) | |||
425 | switch (action) { | 472 | switch (action) { |
426 | case CPU_ONLINE: | 473 | case CPU_ONLINE: |
427 | case CPU_ONLINE_FROZEN: | 474 | case CPU_ONLINE_FROZEN: |
428 | if (microcode_init_cpu(cpu)) | 475 | microcode_update_cpu(cpu); |
429 | printk(KERN_ERR "microcode: failed to init CPU%d\n", | ||
430 | cpu); | ||
431 | case CPU_DOWN_FAILED: | 476 | case CPU_DOWN_FAILED: |
432 | case CPU_DOWN_FAILED_FROZEN: | 477 | case CPU_DOWN_FAILED_FROZEN: |
433 | pr_debug("microcode: CPU%d added\n", cpu); | 478 | pr_debug("microcode: CPU%d added\n", cpu); |
434 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) | 479 | if (sysfs_create_group(&sys_dev->kobj, &mc_attr_group)) |
435 | printk(KERN_ERR "microcode: Failed to create the sysfs " | 480 | pr_err("microcode: Failed to create group for CPU%d\n", cpu); |
436 | "group for CPU%d\n", cpu); | ||
437 | break; | 481 | break; |
438 | case CPU_DOWN_PREPARE: | 482 | case CPU_DOWN_PREPARE: |
439 | case CPU_DOWN_PREPARE_FROZEN: | 483 | case CPU_DOWN_PREPARE_FROZEN: |
@@ -465,13 +509,10 @@ static int __init microcode_init(void) | |||
465 | microcode_ops = init_amd_microcode(); | 509 | microcode_ops = init_amd_microcode(); |
466 | 510 | ||
467 | if (!microcode_ops) { | 511 | if (!microcode_ops) { |
468 | printk(KERN_ERR "microcode: no support for this CPU vendor\n"); | 512 | pr_err("microcode: no support for this CPU vendor\n"); |
469 | return -ENODEV; | 513 | return -ENODEV; |
470 | } | 514 | } |
471 | 515 | ||
472 | error = microcode_dev_init(); | ||
473 | if (error) | ||
474 | return error; | ||
475 | microcode_pdev = platform_device_register_simple("microcode", -1, | 516 | microcode_pdev = platform_device_register_simple("microcode", -1, |
476 | NULL, 0); | 517 | NULL, 0); |
477 | if (IS_ERR(microcode_pdev)) { | 518 | if (IS_ERR(microcode_pdev)) { |
@@ -480,23 +521,31 @@ static int __init microcode_init(void) | |||
480 | } | 521 | } |
481 | 522 | ||
482 | get_online_cpus(); | 523 | get_online_cpus(); |
524 | mutex_lock(µcode_mutex); | ||
525 | |||
483 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); | 526 | error = sysdev_driver_register(&cpu_sysdev_class, &mc_sysdev_driver); |
527 | |||
528 | mutex_unlock(µcode_mutex); | ||
484 | put_online_cpus(); | 529 | put_online_cpus(); |
530 | |||
485 | if (error) { | 531 | if (error) { |
486 | microcode_dev_exit(); | ||
487 | platform_device_unregister(microcode_pdev); | 532 | platform_device_unregister(microcode_pdev); |
488 | return error; | 533 | return error; |
489 | } | 534 | } |
490 | 535 | ||
536 | error = microcode_dev_init(); | ||
537 | if (error) | ||
538 | return error; | ||
539 | |||
491 | register_hotcpu_notifier(&mc_cpu_notifier); | 540 | register_hotcpu_notifier(&mc_cpu_notifier); |
492 | 541 | ||
493 | printk(KERN_INFO | 542 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION |
494 | "Microcode Update Driver: v" MICROCODE_VERSION | ||
495 | " <tigran@aivazian.fsnet.co.uk>," | 543 | " <tigran@aivazian.fsnet.co.uk>," |
496 | " Peter Oruba\n"); | 544 | " Peter Oruba\n"); |
497 | 545 | ||
498 | return 0; | 546 | return 0; |
499 | } | 547 | } |
548 | module_init(microcode_init); | ||
500 | 549 | ||
501 | static void __exit microcode_exit(void) | 550 | static void __exit microcode_exit(void) |
502 | { | 551 | { |
@@ -505,16 +554,17 @@ static void __exit microcode_exit(void) | |||
505 | unregister_hotcpu_notifier(&mc_cpu_notifier); | 554 | unregister_hotcpu_notifier(&mc_cpu_notifier); |
506 | 555 | ||
507 | get_online_cpus(); | 556 | get_online_cpus(); |
557 | mutex_lock(µcode_mutex); | ||
558 | |||
508 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); | 559 | sysdev_driver_unregister(&cpu_sysdev_class, &mc_sysdev_driver); |
560 | |||
561 | mutex_unlock(µcode_mutex); | ||
509 | put_online_cpus(); | 562 | put_online_cpus(); |
510 | 563 | ||
511 | platform_device_unregister(microcode_pdev); | 564 | platform_device_unregister(microcode_pdev); |
512 | 565 | ||
513 | microcode_ops = NULL; | 566 | microcode_ops = NULL; |
514 | 567 | ||
515 | printk(KERN_INFO | 568 | pr_info("Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); |
516 | "Microcode Update Driver: v" MICROCODE_VERSION " removed.\n"); | ||
517 | } | 569 | } |
518 | |||
519 | module_init(microcode_init); | ||
520 | module_exit(microcode_exit); | 570 | module_exit(microcode_exit); |
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c index 149b9ec7c1ab..0d334ddd0a96 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/microcode_intel.c | |||
@@ -70,24 +70,11 @@ | |||
70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. | 70 | * Fix sigmatch() macro to handle old CPUs with pf == 0. |
71 | * Thanks to Stuart Swales for pointing out this bug. | 71 | * Thanks to Stuart Swales for pointing out this bug. |
72 | */ | 72 | */ |
73 | #include <linux/platform_device.h> | ||
74 | #include <linux/capability.h> | ||
75 | #include <linux/miscdevice.h> | ||
76 | #include <linux/firmware.h> | 73 | #include <linux/firmware.h> |
77 | #include <linux/smp_lock.h> | ||
78 | #include <linux/spinlock.h> | ||
79 | #include <linux/cpumask.h> | ||
80 | #include <linux/uaccess.h> | 74 | #include <linux/uaccess.h> |
81 | #include <linux/vmalloc.h> | ||
82 | #include <linux/kernel.h> | 75 | #include <linux/kernel.h> |
83 | #include <linux/module.h> | 76 | #include <linux/module.h> |
84 | #include <linux/mutex.h> | 77 | #include <linux/vmalloc.h> |
85 | #include <linux/sched.h> | ||
86 | #include <linux/init.h> | ||
87 | #include <linux/slab.h> | ||
88 | #include <linux/cpu.h> | ||
89 | #include <linux/fs.h> | ||
90 | #include <linux/mm.h> | ||
91 | 78 | ||
92 | #include <asm/microcode.h> | 79 | #include <asm/microcode.h> |
93 | #include <asm/processor.h> | 80 | #include <asm/processor.h> |
@@ -150,13 +137,9 @@ struct extended_sigtable { | |||
150 | 137 | ||
151 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) | 138 | #define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) |
152 | 139 | ||
153 | /* serialize access to the physical write to MSR 0x79 */ | ||
154 | static DEFINE_SPINLOCK(microcode_update_lock); | ||
155 | |||
156 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | 140 | static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) |
157 | { | 141 | { |
158 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); | 142 | struct cpuinfo_x86 *c = &cpu_data(cpu_num); |
159 | unsigned long flags; | ||
160 | unsigned int val[2]; | 143 | unsigned int val[2]; |
161 | 144 | ||
162 | memset(csig, 0, sizeof(*csig)); | 145 | memset(csig, 0, sizeof(*csig)); |
@@ -176,18 +159,14 @@ static int collect_cpu_info(int cpu_num, struct cpu_signature *csig) | |||
176 | csig->pf = 1 << ((val[1] >> 18) & 7); | 159 | csig->pf = 1 << ((val[1] >> 18) & 7); |
177 | } | 160 | } |
178 | 161 | ||
179 | /* serialize access to the physical write to MSR 0x79 */ | ||
180 | spin_lock_irqsave(µcode_update_lock, flags); | ||
181 | |||
182 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); | 162 | wrmsr(MSR_IA32_UCODE_REV, 0, 0); |
183 | /* see notes above for revision 1.07. Apparent chip bug */ | 163 | /* see notes above for revision 1.07. Apparent chip bug */ |
184 | sync_core(); | 164 | sync_core(); |
185 | /* get the current revision from MSR 0x8B */ | 165 | /* get the current revision from MSR 0x8B */ |
186 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); | 166 | rdmsr(MSR_IA32_UCODE_REV, val[0], csig->rev); |
187 | spin_unlock_irqrestore(µcode_update_lock, flags); | ||
188 | 167 | ||
189 | pr_debug("microcode: collect_cpu_info : sig=0x%x, pf=0x%x, rev=0x%x\n", | 168 | printk(KERN_INFO "microcode: CPU%d sig=0x%x, pf=0x%x, revision=0x%x\n", |
190 | csig->sig, csig->pf, csig->rev); | 169 | cpu_num, csig->sig, csig->pf, csig->rev); |
191 | 170 | ||
192 | return 0; | 171 | return 0; |
193 | } | 172 | } |
@@ -318,11 +297,10 @@ get_matching_microcode(struct cpu_signature *cpu_sig, void *mc, int rev) | |||
318 | return 0; | 297 | return 0; |
319 | } | 298 | } |
320 | 299 | ||
321 | static void apply_microcode(int cpu) | 300 | static int apply_microcode(int cpu) |
322 | { | 301 | { |
323 | struct microcode_intel *mc_intel; | 302 | struct microcode_intel *mc_intel; |
324 | struct ucode_cpu_info *uci; | 303 | struct ucode_cpu_info *uci; |
325 | unsigned long flags; | ||
326 | unsigned int val[2]; | 304 | unsigned int val[2]; |
327 | int cpu_num; | 305 | int cpu_num; |
328 | 306 | ||
@@ -334,10 +312,7 @@ static void apply_microcode(int cpu) | |||
334 | BUG_ON(cpu_num != cpu); | 312 | BUG_ON(cpu_num != cpu); |
335 | 313 | ||
336 | if (mc_intel == NULL) | 314 | if (mc_intel == NULL) |
337 | return; | 315 | return 0; |
338 | |||
339 | /* serialize access to the physical write to MSR 0x79 */ | ||
340 | spin_lock_irqsave(µcode_update_lock, flags); | ||
341 | 316 | ||
342 | /* write microcode via MSR 0x79 */ | 317 | /* write microcode via MSR 0x79 */ |
343 | wrmsr(MSR_IA32_UCODE_WRITE, | 318 | wrmsr(MSR_IA32_UCODE_WRITE, |
@@ -351,30 +326,32 @@ static void apply_microcode(int cpu) | |||
351 | /* get the current revision from MSR 0x8B */ | 326 | /* get the current revision from MSR 0x8B */ |
352 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); | 327 | rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]); |
353 | 328 | ||
354 | spin_unlock_irqrestore(µcode_update_lock, flags); | ||
355 | if (val[1] != mc_intel->hdr.rev) { | 329 | if (val[1] != mc_intel->hdr.rev) { |
356 | printk(KERN_ERR "microcode: CPU%d update from revision " | 330 | printk(KERN_ERR "microcode: CPU%d update " |
357 | "0x%x to 0x%x failed\n", | 331 | "to revision 0x%x failed\n", |
358 | cpu_num, uci->cpu_sig.rev, val[1]); | 332 | cpu_num, mc_intel->hdr.rev); |
359 | return; | 333 | return -1; |
360 | } | 334 | } |
361 | printk(KERN_INFO "microcode: CPU%d updated from revision " | 335 | printk(KERN_INFO "microcode: CPU%d updated to revision " |
362 | "0x%x to 0x%x, date = %04x-%02x-%02x \n", | 336 | "0x%x, date = %04x-%02x-%02x \n", |
363 | cpu_num, uci->cpu_sig.rev, val[1], | 337 | cpu_num, val[1], |
364 | mc_intel->hdr.date & 0xffff, | 338 | mc_intel->hdr.date & 0xffff, |
365 | mc_intel->hdr.date >> 24, | 339 | mc_intel->hdr.date >> 24, |
366 | (mc_intel->hdr.date >> 16) & 0xff); | 340 | (mc_intel->hdr.date >> 16) & 0xff); |
367 | 341 | ||
368 | uci->cpu_sig.rev = val[1]; | 342 | uci->cpu_sig.rev = val[1]; |
343 | |||
344 | return 0; | ||
369 | } | 345 | } |
370 | 346 | ||
371 | static int generic_load_microcode(int cpu, void *data, size_t size, | 347 | static enum ucode_state generic_load_microcode(int cpu, void *data, size_t size, |
372 | int (*get_ucode_data)(void *, const void *, size_t)) | 348 | int (*get_ucode_data)(void *, const void *, size_t)) |
373 | { | 349 | { |
374 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 350 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
375 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; | 351 | u8 *ucode_ptr = data, *new_mc = NULL, *mc; |
376 | int new_rev = uci->cpu_sig.rev; | 352 | int new_rev = uci->cpu_sig.rev; |
377 | unsigned int leftover = size; | 353 | unsigned int leftover = size; |
354 | enum ucode_state state = UCODE_OK; | ||
378 | 355 | ||
379 | while (leftover) { | 356 | while (leftover) { |
380 | struct microcode_header_intel mc_header; | 357 | struct microcode_header_intel mc_header; |
@@ -412,11 +389,15 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
412 | leftover -= mc_size; | 389 | leftover -= mc_size; |
413 | } | 390 | } |
414 | 391 | ||
415 | if (!new_mc) | 392 | if (leftover) { |
393 | if (new_mc) | ||
394 | vfree(new_mc); | ||
395 | state = UCODE_ERROR; | ||
416 | goto out; | 396 | goto out; |
397 | } | ||
417 | 398 | ||
418 | if (leftover) { | 399 | if (!new_mc) { |
419 | vfree(new_mc); | 400 | state = UCODE_NFOUND; |
420 | goto out; | 401 | goto out; |
421 | } | 402 | } |
422 | 403 | ||
@@ -427,9 +408,8 @@ static int generic_load_microcode(int cpu, void *data, size_t size, | |||
427 | pr_debug("microcode: CPU%d found a matching microcode update with" | 408 | pr_debug("microcode: CPU%d found a matching microcode update with" |
428 | " version 0x%x (current=0x%x)\n", | 409 | " version 0x%x (current=0x%x)\n", |
429 | cpu, new_rev, uci->cpu_sig.rev); | 410 | cpu, new_rev, uci->cpu_sig.rev); |
430 | 411 | out: | |
431 | out: | 412 | return state; |
432 | return (int)leftover; | ||
433 | } | 413 | } |
434 | 414 | ||
435 | static int get_ucode_fw(void *to, const void *from, size_t n) | 415 | static int get_ucode_fw(void *to, const void *from, size_t n) |
@@ -438,21 +418,19 @@ static int get_ucode_fw(void *to, const void *from, size_t n) | |||
438 | return 0; | 418 | return 0; |
439 | } | 419 | } |
440 | 420 | ||
441 | static int request_microcode_fw(int cpu, struct device *device) | 421 | static enum ucode_state request_microcode_fw(int cpu, struct device *device) |
442 | { | 422 | { |
443 | char name[30]; | 423 | char name[30]; |
444 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 424 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
445 | const struct firmware *firmware; | 425 | const struct firmware *firmware; |
446 | int ret; | 426 | enum ucode_state ret; |
447 | 427 | ||
448 | /* We should bind the task to the CPU */ | ||
449 | BUG_ON(cpu != raw_smp_processor_id()); | ||
450 | sprintf(name, "intel-ucode/%02x-%02x-%02x", | 428 | sprintf(name, "intel-ucode/%02x-%02x-%02x", |
451 | c->x86, c->x86_model, c->x86_mask); | 429 | c->x86, c->x86_model, c->x86_mask); |
452 | ret = request_firmware(&firmware, name, device); | 430 | |
453 | if (ret) { | 431 | if (request_firmware(&firmware, name, device)) { |
454 | pr_debug("microcode: data file %s load failed\n", name); | 432 | pr_debug("microcode: data file %s load failed\n", name); |
455 | return ret; | 433 | return UCODE_NFOUND; |
456 | } | 434 | } |
457 | 435 | ||
458 | ret = generic_load_microcode(cpu, (void *)firmware->data, | 436 | ret = generic_load_microcode(cpu, (void *)firmware->data, |
@@ -468,11 +446,9 @@ static int get_ucode_user(void *to, const void *from, size_t n) | |||
468 | return copy_from_user(to, from, n); | 446 | return copy_from_user(to, from, n); |
469 | } | 447 | } |
470 | 448 | ||
471 | static int request_microcode_user(int cpu, const void __user *buf, size_t size) | 449 | static enum ucode_state |
450 | request_microcode_user(int cpu, const void __user *buf, size_t size) | ||
472 | { | 451 | { |
473 | /* We should bind the task to the CPU */ | ||
474 | BUG_ON(cpu != raw_smp_processor_id()); | ||
475 | |||
476 | return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); | 452 | return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user); |
477 | } | 453 | } |
478 | 454 | ||
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module.c index c23880b90b5c..89f386f044e4 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module.c | |||
@@ -1,6 +1,5 @@ | |||
1 | /* Kernel module help for x86-64 | 1 | /* Kernel module help for x86. |
2 | Copyright (C) 2001 Rusty Russell. | 2 | Copyright (C) 2001 Rusty Russell. |
3 | Copyright (C) 2002,2003 Andi Kleen, SuSE Labs. | ||
4 | 3 | ||
5 | This program is free software; you can redistribute it and/or modify | 4 | This program is free software; you can redistribute it and/or modify |
6 | it under the terms of the GNU General Public License as published by | 5 | it under the terms of the GNU General Public License as published by |
@@ -22,23 +21,18 @@ | |||
22 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
23 | #include <linux/string.h> | 22 | #include <linux/string.h> |
24 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
25 | #include <linux/mm.h> | ||
26 | #include <linux/slab.h> | ||
27 | #include <linux/bug.h> | 24 | #include <linux/bug.h> |
25 | #include <linux/mm.h> | ||
28 | 26 | ||
29 | #include <asm/system.h> | 27 | #include <asm/system.h> |
30 | #include <asm/page.h> | 28 | #include <asm/page.h> |
31 | #include <asm/pgtable.h> | 29 | #include <asm/pgtable.h> |
32 | 30 | ||
31 | #if 0 | ||
32 | #define DEBUGP printk | ||
33 | #else | ||
33 | #define DEBUGP(fmt...) | 34 | #define DEBUGP(fmt...) |
34 | 35 | #endif | |
35 | #ifndef CONFIG_UML | ||
36 | void module_free(struct module *mod, void *module_region) | ||
37 | { | ||
38 | vfree(module_region); | ||
39 | /* FIXME: If module_region == mod->init_region, trim exception | ||
40 | table entries. */ | ||
41 | } | ||
42 | 36 | ||
43 | void *module_alloc(unsigned long size) | 37 | void *module_alloc(unsigned long size) |
44 | { | 38 | { |
@@ -54,9 +48,15 @@ void *module_alloc(unsigned long size) | |||
54 | if (!area) | 48 | if (!area) |
55 | return NULL; | 49 | return NULL; |
56 | 50 | ||
57 | return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); | 51 | return __vmalloc_area(area, GFP_KERNEL | __GFP_HIGHMEM, |
52 | PAGE_KERNEL_EXEC); | ||
53 | } | ||
54 | |||
55 | /* Free memory returned from module_alloc */ | ||
56 | void module_free(struct module *mod, void *module_region) | ||
57 | { | ||
58 | vfree(module_region); | ||
58 | } | 59 | } |
59 | #endif | ||
60 | 60 | ||
61 | /* We don't need anything special. */ | 61 | /* We don't need anything special. */ |
62 | int module_frob_arch_sections(Elf_Ehdr *hdr, | 62 | int module_frob_arch_sections(Elf_Ehdr *hdr, |
@@ -67,6 +67,58 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, | |||
67 | return 0; | 67 | return 0; |
68 | } | 68 | } |
69 | 69 | ||
70 | #ifdef CONFIG_X86_32 | ||
71 | int apply_relocate(Elf32_Shdr *sechdrs, | ||
72 | const char *strtab, | ||
73 | unsigned int symindex, | ||
74 | unsigned int relsec, | ||
75 | struct module *me) | ||
76 | { | ||
77 | unsigned int i; | ||
78 | Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; | ||
79 | Elf32_Sym *sym; | ||
80 | uint32_t *location; | ||
81 | |||
82 | DEBUGP("Applying relocate section %u to %u\n", relsec, | ||
83 | sechdrs[relsec].sh_info); | ||
84 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | ||
85 | /* This is where to make the change */ | ||
86 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | ||
87 | + rel[i].r_offset; | ||
88 | /* This is the symbol it is referring to. Note that all | ||
89 | undefined symbols have been resolved. */ | ||
90 | sym = (Elf32_Sym *)sechdrs[symindex].sh_addr | ||
91 | + ELF32_R_SYM(rel[i].r_info); | ||
92 | |||
93 | switch (ELF32_R_TYPE(rel[i].r_info)) { | ||
94 | case R_386_32: | ||
95 | /* We add the value into the location given */ | ||
96 | *location += sym->st_value; | ||
97 | break; | ||
98 | case R_386_PC32: | ||
99 | /* Add the value, subtract its postition */ | ||
100 | *location += sym->st_value - (uint32_t)location; | ||
101 | break; | ||
102 | default: | ||
103 | printk(KERN_ERR "module %s: Unknown relocation: %u\n", | ||
104 | me->name, ELF32_R_TYPE(rel[i].r_info)); | ||
105 | return -ENOEXEC; | ||
106 | } | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | int apply_relocate_add(Elf32_Shdr *sechdrs, | ||
112 | const char *strtab, | ||
113 | unsigned int symindex, | ||
114 | unsigned int relsec, | ||
115 | struct module *me) | ||
116 | { | ||
117 | printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", | ||
118 | me->name); | ||
119 | return -ENOEXEC; | ||
120 | } | ||
121 | #else /*X86_64*/ | ||
70 | int apply_relocate_add(Elf64_Shdr *sechdrs, | 122 | int apply_relocate_add(Elf64_Shdr *sechdrs, |
71 | const char *strtab, | 123 | const char *strtab, |
72 | unsigned int symindex, | 124 | unsigned int symindex, |
@@ -147,6 +199,8 @@ int apply_relocate(Elf_Shdr *sechdrs, | |||
147 | return -ENOSYS; | 199 | return -ENOSYS; |
148 | } | 200 | } |
149 | 201 | ||
202 | #endif | ||
203 | |||
150 | int module_finalize(const Elf_Ehdr *hdr, | 204 | int module_finalize(const Elf_Ehdr *hdr, |
151 | const Elf_Shdr *sechdrs, | 205 | const Elf_Shdr *sechdrs, |
152 | struct module *me) | 206 | struct module *me) |
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c deleted file mode 100644 index 0edd819050e7..000000000000 --- a/arch/x86/kernel/module_32.c +++ /dev/null | |||
@@ -1,152 +0,0 @@ | |||
1 | /* Kernel module help for i386. | ||
2 | Copyright (C) 2001 Rusty Russell. | ||
3 | |||
4 | This program is free software; you can redistribute it and/or modify | ||
5 | it under the terms of the GNU General Public License as published by | ||
6 | the Free Software Foundation; either version 2 of the License, or | ||
7 | (at your option) any later version. | ||
8 | |||
9 | This program is distributed in the hope that it will be useful, | ||
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | GNU General Public License for more details. | ||
13 | |||
14 | You should have received a copy of the GNU General Public License | ||
15 | along with this program; if not, write to the Free Software | ||
16 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
17 | */ | ||
18 | #include <linux/moduleloader.h> | ||
19 | #include <linux/elf.h> | ||
20 | #include <linux/vmalloc.h> | ||
21 | #include <linux/fs.h> | ||
22 | #include <linux/string.h> | ||
23 | #include <linux/kernel.h> | ||
24 | #include <linux/bug.h> | ||
25 | |||
26 | #if 0 | ||
27 | #define DEBUGP printk | ||
28 | #else | ||
29 | #define DEBUGP(fmt...) | ||
30 | #endif | ||
31 | |||
32 | void *module_alloc(unsigned long size) | ||
33 | { | ||
34 | if (size == 0) | ||
35 | return NULL; | ||
36 | return vmalloc_exec(size); | ||
37 | } | ||
38 | |||
39 | |||
40 | /* Free memory returned from module_alloc */ | ||
41 | void module_free(struct module *mod, void *module_region) | ||
42 | { | ||
43 | vfree(module_region); | ||
44 | /* FIXME: If module_region == mod->init_region, trim exception | ||
45 | table entries. */ | ||
46 | } | ||
47 | |||
48 | /* We don't need anything special. */ | ||
49 | int module_frob_arch_sections(Elf_Ehdr *hdr, | ||
50 | Elf_Shdr *sechdrs, | ||
51 | char *secstrings, | ||
52 | struct module *mod) | ||
53 | { | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | int apply_relocate(Elf32_Shdr *sechdrs, | ||
58 | const char *strtab, | ||
59 | unsigned int symindex, | ||
60 | unsigned int relsec, | ||
61 | struct module *me) | ||
62 | { | ||
63 | unsigned int i; | ||
64 | Elf32_Rel *rel = (void *)sechdrs[relsec].sh_addr; | ||
65 | Elf32_Sym *sym; | ||
66 | uint32_t *location; | ||
67 | |||
68 | DEBUGP("Applying relocate section %u to %u\n", relsec, | ||
69 | sechdrs[relsec].sh_info); | ||
70 | for (i = 0; i < sechdrs[relsec].sh_size / sizeof(*rel); i++) { | ||
71 | /* This is where to make the change */ | ||
72 | location = (void *)sechdrs[sechdrs[relsec].sh_info].sh_addr | ||
73 | + rel[i].r_offset; | ||
74 | /* This is the symbol it is referring to. Note that all | ||
75 | undefined symbols have been resolved. */ | ||
76 | sym = (Elf32_Sym *)sechdrs[symindex].sh_addr | ||
77 | + ELF32_R_SYM(rel[i].r_info); | ||
78 | |||
79 | switch (ELF32_R_TYPE(rel[i].r_info)) { | ||
80 | case R_386_32: | ||
81 | /* We add the value into the location given */ | ||
82 | *location += sym->st_value; | ||
83 | break; | ||
84 | case R_386_PC32: | ||
85 | /* Add the value, subtract its postition */ | ||
86 | *location += sym->st_value - (uint32_t)location; | ||
87 | break; | ||
88 | default: | ||
89 | printk(KERN_ERR "module %s: Unknown relocation: %u\n", | ||
90 | me->name, ELF32_R_TYPE(rel[i].r_info)); | ||
91 | return -ENOEXEC; | ||
92 | } | ||
93 | } | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | int apply_relocate_add(Elf32_Shdr *sechdrs, | ||
98 | const char *strtab, | ||
99 | unsigned int symindex, | ||
100 | unsigned int relsec, | ||
101 | struct module *me) | ||
102 | { | ||
103 | printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", | ||
104 | me->name); | ||
105 | return -ENOEXEC; | ||
106 | } | ||
107 | |||
108 | int module_finalize(const Elf_Ehdr *hdr, | ||
109 | const Elf_Shdr *sechdrs, | ||
110 | struct module *me) | ||
111 | { | ||
112 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, | ||
113 | *para = NULL; | ||
114 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | ||
115 | |||
116 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | ||
117 | if (!strcmp(".text", secstrings + s->sh_name)) | ||
118 | text = s; | ||
119 | if (!strcmp(".altinstructions", secstrings + s->sh_name)) | ||
120 | alt = s; | ||
121 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | ||
122 | locks = s; | ||
123 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | ||
124 | para = s; | ||
125 | } | ||
126 | |||
127 | if (alt) { | ||
128 | /* patch .altinstructions */ | ||
129 | void *aseg = (void *)alt->sh_addr; | ||
130 | apply_alternatives(aseg, aseg + alt->sh_size); | ||
131 | } | ||
132 | if (locks && text) { | ||
133 | void *lseg = (void *)locks->sh_addr; | ||
134 | void *tseg = (void *)text->sh_addr; | ||
135 | alternatives_smp_module_add(me, me->name, | ||
136 | lseg, lseg + locks->sh_size, | ||
137 | tseg, tseg + text->sh_size); | ||
138 | } | ||
139 | |||
140 | if (para) { | ||
141 | void *pseg = (void *)para->sh_addr; | ||
142 | apply_paravirt(pseg, pseg + para->sh_size); | ||
143 | } | ||
144 | |||
145 | return module_bug_finalize(hdr, sechdrs, me); | ||
146 | } | ||
147 | |||
148 | void module_arch_cleanup(struct module *mod) | ||
149 | { | ||
150 | alternatives_smp_module_del(mod); | ||
151 | module_bug_cleanup(mod); | ||
152 | } | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 70fd7e414c15..651c93b28862 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/acpi.h> | 17 | #include <linux/acpi.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <linux/pci.h> | ||
20 | 21 | ||
21 | #include <asm/mtrr.h> | 22 | #include <asm/mtrr.h> |
22 | #include <asm/mpspec.h> | 23 | #include <asm/mpspec.h> |
@@ -870,24 +871,17 @@ static | |||
870 | inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} | 871 | inline void __init check_irq_src(struct mpc_intsrc *m, int *nr_m_spare) {} |
871 | #endif /* CONFIG_X86_IO_APIC */ | 872 | #endif /* CONFIG_X86_IO_APIC */ |
872 | 873 | ||
873 | static int check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, | 874 | static int |
874 | int count) | 875 | check_slot(unsigned long mpc_new_phys, unsigned long mpc_new_length, int count) |
875 | { | 876 | { |
876 | if (!mpc_new_phys) { | 877 | int ret = 0; |
877 | pr_info("No spare slots, try to append...take your risk, " | 878 | |
878 | "new mpc_length %x\n", count); | 879 | if (!mpc_new_phys || count <= mpc_new_length) { |
879 | } else { | 880 | WARN(1, "update_mptable: No spare slots (length: %x)\n", count); |
880 | if (count <= mpc_new_length) | 881 | return -1; |
881 | pr_info("No spare slots, try to append..., " | ||
882 | "new mpc_length %x\n", count); | ||
883 | else { | ||
884 | pr_err("mpc_new_length %lx is too small\n", | ||
885 | mpc_new_length); | ||
886 | return -1; | ||
887 | } | ||
888 | } | 882 | } |
889 | 883 | ||
890 | return 0; | 884 | return ret; |
891 | } | 885 | } |
892 | 886 | ||
893 | static int __init replace_intsrc_all(struct mpc_table *mpc, | 887 | static int __init replace_intsrc_all(struct mpc_table *mpc, |
@@ -946,7 +940,7 @@ static int __init replace_intsrc_all(struct mpc_table *mpc, | |||
946 | } else { | 940 | } else { |
947 | struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; | 941 | struct mpc_intsrc *m = (struct mpc_intsrc *)mpt; |
948 | count += sizeof(struct mpc_intsrc); | 942 | count += sizeof(struct mpc_intsrc); |
949 | if (!check_slot(mpc_new_phys, mpc_new_length, count)) | 943 | if (check_slot(mpc_new_phys, mpc_new_length, count) < 0) |
950 | goto out; | 944 | goto out; |
951 | assign_to_mpc_intsrc(&mp_irqs[i], m); | 945 | assign_to_mpc_intsrc(&mp_irqs[i], m); |
952 | mpc->length = count; | 946 | mpc->length = count; |
@@ -963,11 +957,14 @@ out: | |||
963 | return 0; | 957 | return 0; |
964 | } | 958 | } |
965 | 959 | ||
966 | static int __initdata enable_update_mptable; | 960 | int enable_update_mptable; |
967 | 961 | ||
968 | static int __init update_mptable_setup(char *str) | 962 | static int __init update_mptable_setup(char *str) |
969 | { | 963 | { |
970 | enable_update_mptable = 1; | 964 | enable_update_mptable = 1; |
965 | #ifdef CONFIG_PCI | ||
966 | pci_routeirq = 1; | ||
967 | #endif | ||
971 | return 0; | 968 | return 0; |
972 | } | 969 | } |
973 | early_param("update_mptable", update_mptable_setup); | 970 | early_param("update_mptable", update_mptable_setup); |
@@ -980,6 +977,9 @@ static int __initdata alloc_mptable; | |||
980 | static int __init parse_alloc_mptable_opt(char *p) | 977 | static int __init parse_alloc_mptable_opt(char *p) |
981 | { | 978 | { |
982 | enable_update_mptable = 1; | 979 | enable_update_mptable = 1; |
980 | #ifdef CONFIG_PCI | ||
981 | pci_routeirq = 1; | ||
982 | #endif | ||
983 | alloc_mptable = 1; | 983 | alloc_mptable = 1; |
984 | if (!p) | 984 | if (!p) |
985 | return 0; | 985 | return 0; |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 3cf3413ec626..98fd6cd4e3a4 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -196,6 +196,11 @@ static struct notifier_block __refdata msr_class_cpu_notifier = { | |||
196 | .notifier_call = msr_class_cpu_callback, | 196 | .notifier_call = msr_class_cpu_callback, |
197 | }; | 197 | }; |
198 | 198 | ||
199 | static char *msr_nodename(struct device *dev) | ||
200 | { | ||
201 | return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt)); | ||
202 | } | ||
203 | |||
199 | static int __init msr_init(void) | 204 | static int __init msr_init(void) |
200 | { | 205 | { |
201 | int i, err = 0; | 206 | int i, err = 0; |
@@ -212,6 +217,7 @@ static int __init msr_init(void) | |||
212 | err = PTR_ERR(msr_class); | 217 | err = PTR_ERR(msr_class); |
213 | goto out_chrdev; | 218 | goto out_chrdev; |
214 | } | 219 | } |
220 | msr_class->nodename = msr_nodename; | ||
215 | for_each_online_cpu(i) { | 221 | for_each_online_cpu(i) { |
216 | err = msr_device_create(i); | 222 | err = msr_device_create(i); |
217 | if (err != 0) | 223 | if (err != 0) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 9faf43bea336..70ec9b951d76 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -248,18 +248,16 @@ static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LA | |||
248 | 248 | ||
249 | static inline void enter_lazy(enum paravirt_lazy_mode mode) | 249 | static inline void enter_lazy(enum paravirt_lazy_mode mode) |
250 | { | 250 | { |
251 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); | 251 | BUG_ON(percpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE); |
252 | BUG_ON(preemptible()); | ||
253 | 252 | ||
254 | __get_cpu_var(paravirt_lazy_mode) = mode; | 253 | percpu_write(paravirt_lazy_mode, mode); |
255 | } | 254 | } |
256 | 255 | ||
257 | void paravirt_leave_lazy(enum paravirt_lazy_mode mode) | 256 | static void leave_lazy(enum paravirt_lazy_mode mode) |
258 | { | 257 | { |
259 | BUG_ON(__get_cpu_var(paravirt_lazy_mode) != mode); | 258 | BUG_ON(percpu_read(paravirt_lazy_mode) != mode); |
260 | BUG_ON(preemptible()); | ||
261 | 259 | ||
262 | __get_cpu_var(paravirt_lazy_mode) = PARAVIRT_LAZY_NONE; | 260 | percpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE); |
263 | } | 261 | } |
264 | 262 | ||
265 | void paravirt_enter_lazy_mmu(void) | 263 | void paravirt_enter_lazy_mmu(void) |
@@ -269,22 +267,36 @@ void paravirt_enter_lazy_mmu(void) | |||
269 | 267 | ||
270 | void paravirt_leave_lazy_mmu(void) | 268 | void paravirt_leave_lazy_mmu(void) |
271 | { | 269 | { |
272 | paravirt_leave_lazy(PARAVIRT_LAZY_MMU); | 270 | leave_lazy(PARAVIRT_LAZY_MMU); |
273 | } | 271 | } |
274 | 272 | ||
275 | void paravirt_enter_lazy_cpu(void) | 273 | void paravirt_start_context_switch(struct task_struct *prev) |
276 | { | 274 | { |
275 | BUG_ON(preemptible()); | ||
276 | |||
277 | if (percpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) { | ||
278 | arch_leave_lazy_mmu_mode(); | ||
279 | set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES); | ||
280 | } | ||
277 | enter_lazy(PARAVIRT_LAZY_CPU); | 281 | enter_lazy(PARAVIRT_LAZY_CPU); |
278 | } | 282 | } |
279 | 283 | ||
280 | void paravirt_leave_lazy_cpu(void) | 284 | void paravirt_end_context_switch(struct task_struct *next) |
281 | { | 285 | { |
282 | paravirt_leave_lazy(PARAVIRT_LAZY_CPU); | 286 | BUG_ON(preemptible()); |
287 | |||
288 | leave_lazy(PARAVIRT_LAZY_CPU); | ||
289 | |||
290 | if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES)) | ||
291 | arch_enter_lazy_mmu_mode(); | ||
283 | } | 292 | } |
284 | 293 | ||
285 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | 294 | enum paravirt_lazy_mode paravirt_get_lazy_mode(void) |
286 | { | 295 | { |
287 | return __get_cpu_var(paravirt_lazy_mode); | 296 | if (in_interrupt()) |
297 | return PARAVIRT_LAZY_NONE; | ||
298 | |||
299 | return percpu_read(paravirt_lazy_mode); | ||
288 | } | 300 | } |
289 | 301 | ||
290 | void arch_flush_lazy_mmu_mode(void) | 302 | void arch_flush_lazy_mmu_mode(void) |
@@ -292,7 +304,6 @@ void arch_flush_lazy_mmu_mode(void) | |||
292 | preempt_disable(); | 304 | preempt_disable(); |
293 | 305 | ||
294 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { | 306 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) { |
295 | WARN_ON(preempt_count() == 1); | ||
296 | arch_leave_lazy_mmu_mode(); | 307 | arch_leave_lazy_mmu_mode(); |
297 | arch_enter_lazy_mmu_mode(); | 308 | arch_enter_lazy_mmu_mode(); |
298 | } | 309 | } |
@@ -300,19 +311,6 @@ void arch_flush_lazy_mmu_mode(void) | |||
300 | preempt_enable(); | 311 | preempt_enable(); |
301 | } | 312 | } |
302 | 313 | ||
303 | void arch_flush_lazy_cpu_mode(void) | ||
304 | { | ||
305 | preempt_disable(); | ||
306 | |||
307 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { | ||
308 | WARN_ON(preempt_count() == 1); | ||
309 | arch_leave_lazy_cpu_mode(); | ||
310 | arch_enter_lazy_cpu_mode(); | ||
311 | } | ||
312 | |||
313 | preempt_enable(); | ||
314 | } | ||
315 | |||
316 | struct pv_info pv_info = { | 314 | struct pv_info pv_info = { |
317 | .name = "bare hardware", | 315 | .name = "bare hardware", |
318 | .paravirt_enabled = 0, | 316 | .paravirt_enabled = 0, |
@@ -404,10 +402,8 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
404 | .set_iopl_mask = native_set_iopl_mask, | 402 | .set_iopl_mask = native_set_iopl_mask, |
405 | .io_delay = native_io_delay, | 403 | .io_delay = native_io_delay, |
406 | 404 | ||
407 | .lazy_mode = { | 405 | .start_context_switch = paravirt_nop, |
408 | .enter = paravirt_nop, | 406 | .end_context_switch = paravirt_nop, |
409 | .leave = paravirt_nop, | ||
410 | }, | ||
411 | }; | 407 | }; |
412 | 408 | ||
413 | struct pv_apic_ops pv_apic_ops = { | 409 | struct pv_apic_ops pv_apic_ops = { |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 755c21e906f3..971a3bec47a8 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -186,37 +186,6 @@ static struct cal_chipset_ops calioc2_chip_ops = { | |||
186 | 186 | ||
187 | static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; | 187 | static struct calgary_bus_info bus_info[MAX_PHB_BUS_NUM] = { { NULL, 0, 0 }, }; |
188 | 188 | ||
189 | /* enable this to stress test the chip's TCE cache */ | ||
190 | #ifdef CONFIG_IOMMU_DEBUG | ||
191 | static int debugging = 1; | ||
192 | |||
193 | static inline unsigned long verify_bit_range(unsigned long* bitmap, | ||
194 | int expected, unsigned long start, unsigned long end) | ||
195 | { | ||
196 | unsigned long idx = start; | ||
197 | |||
198 | BUG_ON(start >= end); | ||
199 | |||
200 | while (idx < end) { | ||
201 | if (!!test_bit(idx, bitmap) != expected) | ||
202 | return idx; | ||
203 | ++idx; | ||
204 | } | ||
205 | |||
206 | /* all bits have the expected value */ | ||
207 | return ~0UL; | ||
208 | } | ||
209 | #else /* debugging is disabled */ | ||
210 | static int debugging; | ||
211 | |||
212 | static inline unsigned long verify_bit_range(unsigned long* bitmap, | ||
213 | int expected, unsigned long start, unsigned long end) | ||
214 | { | ||
215 | return ~0UL; | ||
216 | } | ||
217 | |||
218 | #endif /* CONFIG_IOMMU_DEBUG */ | ||
219 | |||
220 | static inline int translation_enabled(struct iommu_table *tbl) | 189 | static inline int translation_enabled(struct iommu_table *tbl) |
221 | { | 190 | { |
222 | /* only PHBs with translation enabled have an IOMMU table */ | 191 | /* only PHBs with translation enabled have an IOMMU table */ |
@@ -228,7 +197,6 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
228 | { | 197 | { |
229 | unsigned long index; | 198 | unsigned long index; |
230 | unsigned long end; | 199 | unsigned long end; |
231 | unsigned long badbit; | ||
232 | unsigned long flags; | 200 | unsigned long flags; |
233 | 201 | ||
234 | index = start_addr >> PAGE_SHIFT; | 202 | index = start_addr >> PAGE_SHIFT; |
@@ -243,14 +211,6 @@ static void iommu_range_reserve(struct iommu_table *tbl, | |||
243 | 211 | ||
244 | spin_lock_irqsave(&tbl->it_lock, flags); | 212 | spin_lock_irqsave(&tbl->it_lock, flags); |
245 | 213 | ||
246 | badbit = verify_bit_range(tbl->it_map, 0, index, end); | ||
247 | if (badbit != ~0UL) { | ||
248 | if (printk_ratelimit()) | ||
249 | printk(KERN_ERR "Calgary: entry already allocated at " | ||
250 | "0x%lx tbl %p dma 0x%lx npages %u\n", | ||
251 | badbit, tbl, start_addr, npages); | ||
252 | } | ||
253 | |||
254 | iommu_area_reserve(tbl->it_map, index, npages); | 214 | iommu_area_reserve(tbl->it_map, index, npages); |
255 | 215 | ||
256 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 216 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
@@ -326,7 +286,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
326 | unsigned int npages) | 286 | unsigned int npages) |
327 | { | 287 | { |
328 | unsigned long entry; | 288 | unsigned long entry; |
329 | unsigned long badbit; | ||
330 | unsigned long badend; | 289 | unsigned long badend; |
331 | unsigned long flags; | 290 | unsigned long flags; |
332 | 291 | ||
@@ -346,14 +305,6 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
346 | 305 | ||
347 | spin_lock_irqsave(&tbl->it_lock, flags); | 306 | spin_lock_irqsave(&tbl->it_lock, flags); |
348 | 307 | ||
349 | badbit = verify_bit_range(tbl->it_map, 1, entry, entry + npages); | ||
350 | if (badbit != ~0UL) { | ||
351 | if (printk_ratelimit()) | ||
352 | printk(KERN_ERR "Calgary: bit is off at 0x%lx " | ||
353 | "tbl %p dma 0x%Lx entry 0x%lx npages %u\n", | ||
354 | badbit, tbl, dma_addr, entry, npages); | ||
355 | } | ||
356 | |||
357 | iommu_area_free(tbl->it_map, entry, npages); | 308 | iommu_area_free(tbl->it_map, entry, npages); |
358 | 309 | ||
359 | spin_unlock_irqrestore(&tbl->it_lock, flags); | 310 | spin_unlock_irqrestore(&tbl->it_lock, flags); |
@@ -1488,9 +1439,8 @@ void __init detect_calgary(void) | |||
1488 | iommu_detected = 1; | 1439 | iommu_detected = 1; |
1489 | calgary_detected = 1; | 1440 | calgary_detected = 1; |
1490 | printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); | 1441 | printk(KERN_INFO "PCI-DMA: Calgary IOMMU detected.\n"); |
1491 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " | 1442 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d\n", |
1492 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, | 1443 | specified_table_size); |
1493 | debugging ? "enabled" : "disabled"); | ||
1494 | 1444 | ||
1495 | /* swiotlb for devices that aren't behind the Calgary. */ | 1445 | /* swiotlb for devices that aren't behind the Calgary. */ |
1496 | if (max_pfn > MAX_DMA32_PFN) | 1446 | if (max_pfn > MAX_DMA32_PFN) |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 745579bc8256..47630479b067 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -32,6 +32,8 @@ int no_iommu __read_mostly; | |||
32 | /* Set this to 1 if there is a HW IOMMU in the system */ | 32 | /* Set this to 1 if there is a HW IOMMU in the system */ |
33 | int iommu_detected __read_mostly = 0; | 33 | int iommu_detected __read_mostly = 0; |
34 | 34 | ||
35 | int iommu_pass_through; | ||
36 | |||
35 | dma_addr_t bad_dma_address __read_mostly = 0; | 37 | dma_addr_t bad_dma_address __read_mostly = 0; |
36 | EXPORT_SYMBOL(bad_dma_address); | 38 | EXPORT_SYMBOL(bad_dma_address); |
37 | 39 | ||
@@ -209,6 +211,10 @@ static __init int iommu_setup(char *p) | |||
209 | #ifdef CONFIG_SWIOTLB | 211 | #ifdef CONFIG_SWIOTLB |
210 | if (!strncmp(p, "soft", 4)) | 212 | if (!strncmp(p, "soft", 4)) |
211 | swiotlb = 1; | 213 | swiotlb = 1; |
214 | if (!strncmp(p, "pt", 2)) { | ||
215 | iommu_pass_through = 1; | ||
216 | return 1; | ||
217 | } | ||
212 | #endif | 218 | #endif |
213 | 219 | ||
214 | gart_parse_options(p); | 220 | gart_parse_options(p); |
@@ -290,6 +296,8 @@ static int __init pci_iommu_init(void) | |||
290 | void pci_iommu_shutdown(void) | 296 | void pci_iommu_shutdown(void) |
291 | { | 297 | { |
292 | gart_iommu_shutdown(); | 298 | gart_iommu_shutdown(); |
299 | |||
300 | amd_iommu_shutdown(); | ||
293 | } | 301 | } |
294 | /* Must execute after PCI subsystem */ | 302 | /* Must execute after PCI subsystem */ |
295 | fs_initcall(pci_iommu_init); | 303 | fs_initcall(pci_iommu_init); |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index b284b58c035c..cfd9f9063896 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -144,48 +144,21 @@ static void flush_gart(void) | |||
144 | } | 144 | } |
145 | 145 | ||
146 | #ifdef CONFIG_IOMMU_LEAK | 146 | #ifdef CONFIG_IOMMU_LEAK |
147 | |||
148 | #define SET_LEAK(x) \ | ||
149 | do { \ | ||
150 | if (iommu_leak_tab) \ | ||
151 | iommu_leak_tab[x] = __builtin_return_address(0);\ | ||
152 | } while (0) | ||
153 | |||
154 | #define CLEAR_LEAK(x) \ | ||
155 | do { \ | ||
156 | if (iommu_leak_tab) \ | ||
157 | iommu_leak_tab[x] = NULL; \ | ||
158 | } while (0) | ||
159 | |||
160 | /* Debugging aid for drivers that don't free their IOMMU tables */ | 147 | /* Debugging aid for drivers that don't free their IOMMU tables */ |
161 | static void **iommu_leak_tab; | ||
162 | static int leak_trace; | 148 | static int leak_trace; |
163 | static int iommu_leak_pages = 20; | 149 | static int iommu_leak_pages = 20; |
164 | 150 | ||
165 | static void dump_leak(void) | 151 | static void dump_leak(void) |
166 | { | 152 | { |
167 | int i; | ||
168 | static int dump; | 153 | static int dump; |
169 | 154 | ||
170 | if (dump || !iommu_leak_tab) | 155 | if (dump) |
171 | return; | 156 | return; |
172 | dump = 1; | 157 | dump = 1; |
173 | show_stack(NULL, NULL); | ||
174 | 158 | ||
175 | /* Very crude. dump some from the end of the table too */ | 159 | show_stack(NULL, NULL); |
176 | printk(KERN_DEBUG "Dumping %d pages from end of IOMMU:\n", | 160 | debug_dma_dump_mappings(NULL); |
177 | iommu_leak_pages); | ||
178 | for (i = 0; i < iommu_leak_pages; i += 2) { | ||
179 | printk(KERN_DEBUG "%lu: ", iommu_pages-i); | ||
180 | printk_address((unsigned long) iommu_leak_tab[iommu_pages-i], | ||
181 | 0); | ||
182 | printk(KERN_CONT "%c", (i+1)%2 == 0 ? '\n' : ' '); | ||
183 | } | ||
184 | printk(KERN_DEBUG "\n"); | ||
185 | } | 161 | } |
186 | #else | ||
187 | # define SET_LEAK(x) | ||
188 | # define CLEAR_LEAK(x) | ||
189 | #endif | 162 | #endif |
190 | 163 | ||
191 | static void iommu_full(struct device *dev, size_t size, int dir) | 164 | static void iommu_full(struct device *dev, size_t size, int dir) |
@@ -248,7 +221,6 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | |||
248 | 221 | ||
249 | for (i = 0; i < npages; i++) { | 222 | for (i = 0; i < npages; i++) { |
250 | iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); | 223 | iommu_gatt_base[iommu_page + i] = GPTE_ENCODE(phys_mem); |
251 | SET_LEAK(iommu_page + i); | ||
252 | phys_mem += PAGE_SIZE; | 224 | phys_mem += PAGE_SIZE; |
253 | } | 225 | } |
254 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 226 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
@@ -294,7 +266,6 @@ static void gart_unmap_page(struct device *dev, dma_addr_t dma_addr, | |||
294 | npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); | 266 | npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); |
295 | for (i = 0; i < npages; i++) { | 267 | for (i = 0; i < npages; i++) { |
296 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | 268 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; |
297 | CLEAR_LEAK(iommu_page + i); | ||
298 | } | 269 | } |
299 | free_iommu(iommu_page, npages); | 270 | free_iommu(iommu_page, npages); |
300 | } | 271 | } |
@@ -377,7 +348,6 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
377 | pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); | 348 | pages = iommu_num_pages(s->offset, s->length, PAGE_SIZE); |
378 | while (pages--) { | 349 | while (pages--) { |
379 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); | 350 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); |
380 | SET_LEAK(iommu_page); | ||
381 | addr += PAGE_SIZE; | 351 | addr += PAGE_SIZE; |
382 | iommu_page++; | 352 | iommu_page++; |
383 | } | 353 | } |
@@ -688,8 +658,6 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
688 | 658 | ||
689 | agp_gatt_table = gatt; | 659 | agp_gatt_table = gatt; |
690 | 660 | ||
691 | enable_gart_translations(); | ||
692 | |||
693 | error = sysdev_class_register(&gart_sysdev_class); | 661 | error = sysdev_class_register(&gart_sysdev_class); |
694 | if (!error) | 662 | if (!error) |
695 | error = sysdev_register(&device_gart); | 663 | error = sysdev_register(&device_gart); |
@@ -801,11 +769,12 @@ void __init gart_iommu_init(void) | |||
801 | 769 | ||
802 | #ifdef CONFIG_IOMMU_LEAK | 770 | #ifdef CONFIG_IOMMU_LEAK |
803 | if (leak_trace) { | 771 | if (leak_trace) { |
804 | iommu_leak_tab = (void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO, | 772 | int ret; |
805 | get_order(iommu_pages*sizeof(void *))); | 773 | |
806 | if (!iommu_leak_tab) | 774 | ret = dma_debug_resize_entries(iommu_pages); |
775 | if (ret) | ||
807 | printk(KERN_DEBUG | 776 | printk(KERN_DEBUG |
808 | "PCI-DMA: Cannot allocate leak trace area\n"); | 777 | "PCI-DMA: Cannot trace all the entries\n"); |
809 | } | 778 | } |
810 | #endif | 779 | #endif |
811 | 780 | ||
@@ -845,6 +814,14 @@ void __init gart_iommu_init(void) | |||
845 | * the pages as Not-Present: | 814 | * the pages as Not-Present: |
846 | */ | 815 | */ |
847 | wbinvd(); | 816 | wbinvd(); |
817 | |||
818 | /* | ||
819 | * Now all caches are flushed and we can safely enable | ||
820 | * GART hardware. Doing it early leaves the possibility | ||
821 | * of stale cache entries that can lead to GART PTE | ||
822 | * errors. | ||
823 | */ | ||
824 | enable_gart_translations(); | ||
848 | 825 | ||
849 | /* | 826 | /* |
850 | * Try to workaround a bug (thanks to BenH): | 827 | * Try to workaround a bug (thanks to BenH): |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 221a3853e268..6af96ee44200 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -28,7 +28,7 @@ dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) | |||
28 | return paddr; | 28 | return paddr; |
29 | } | 29 | } |
30 | 30 | ||
31 | phys_addr_t swiotlb_bus_to_phys(dma_addr_t baddr) | 31 | phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) |
32 | { | 32 | { |
33 | return baddr; | 33 | return baddr; |
34 | } | 34 | } |
@@ -71,7 +71,8 @@ void __init pci_swiotlb_init(void) | |||
71 | { | 71 | { |
72 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | 72 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ |
73 | #ifdef CONFIG_X86_64 | 73 | #ifdef CONFIG_X86_64 |
74 | if (!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) | 74 | if ((!iommu_detected && !no_iommu && max_pfn > MAX_DMA32_PFN) || |
75 | iommu_pass_through) | ||
75 | swiotlb = 1; | 76 | swiotlb = 1; |
76 | #endif | 77 | #endif |
77 | if (swiotlb_force) | 78 | if (swiotlb_force) |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ca989158e847..994dd6a4a2a0 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -8,12 +8,15 @@ | |||
8 | #include <linux/module.h> | 8 | #include <linux/module.h> |
9 | #include <linux/pm.h> | 9 | #include <linux/pm.h> |
10 | #include <linux/clockchips.h> | 10 | #include <linux/clockchips.h> |
11 | #include <linux/random.h> | ||
11 | #include <trace/power.h> | 12 | #include <trace/power.h> |
12 | #include <asm/system.h> | 13 | #include <asm/system.h> |
13 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | #include <asm/syscalls.h> | ||
14 | #include <asm/idle.h> | 16 | #include <asm/idle.h> |
15 | #include <asm/uaccess.h> | 17 | #include <asm/uaccess.h> |
16 | #include <asm/i387.h> | 18 | #include <asm/i387.h> |
19 | #include <asm/ds.h> | ||
17 | 20 | ||
18 | unsigned long idle_halt; | 21 | unsigned long idle_halt; |
19 | EXPORT_SYMBOL(idle_halt); | 22 | EXPORT_SYMBOL(idle_halt); |
@@ -45,6 +48,8 @@ void free_thread_xstate(struct task_struct *tsk) | |||
45 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); | 48 | kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); |
46 | tsk->thread.xstate = NULL; | 49 | tsk->thread.xstate = NULL; |
47 | } | 50 | } |
51 | |||
52 | WARN(tsk->thread.ds_ctx, "leaking DS context\n"); | ||
48 | } | 53 | } |
49 | 54 | ||
50 | void free_thread_info(struct thread_info *ti) | 55 | void free_thread_info(struct thread_info *ti) |
@@ -58,7 +63,7 @@ void arch_task_cache_init(void) | |||
58 | task_xstate_cachep = | 63 | task_xstate_cachep = |
59 | kmem_cache_create("task_xstate", xstate_size, | 64 | kmem_cache_create("task_xstate", xstate_size, |
60 | __alignof__(union thread_xstate), | 65 | __alignof__(union thread_xstate), |
61 | SLAB_PANIC, NULL); | 66 | SLAB_PANIC | SLAB_NOTRACK, NULL); |
62 | } | 67 | } |
63 | 68 | ||
64 | /* | 69 | /* |
@@ -83,8 +88,6 @@ void exit_thread(void) | |||
83 | put_cpu(); | 88 | put_cpu(); |
84 | kfree(bp); | 89 | kfree(bp); |
85 | } | 90 | } |
86 | |||
87 | ds_exit_thread(current); | ||
88 | } | 91 | } |
89 | 92 | ||
90 | void flush_thread(void) | 93 | void flush_thread(void) |
@@ -613,3 +616,16 @@ static int __init idle_setup(char *str) | |||
613 | } | 616 | } |
614 | early_param("idle", idle_setup); | 617 | early_param("idle", idle_setup); |
615 | 618 | ||
619 | unsigned long arch_align_stack(unsigned long sp) | ||
620 | { | ||
621 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) | ||
622 | sp -= get_random_int() % 8192; | ||
623 | return sp & ~0xf; | ||
624 | } | ||
625 | |||
626 | unsigned long arch_randomize_brk(struct mm_struct *mm) | ||
627 | { | ||
628 | unsigned long range_end = mm->brk + 0x02000000; | ||
629 | return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | ||
630 | } | ||
631 | |||
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 76f8f84043a2..59f4524984af 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -9,8 +9,6 @@ | |||
9 | * This file handles the architecture-dependent parts of process handling.. | 9 | * This file handles the architecture-dependent parts of process handling.. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <stdarg.h> | ||
13 | |||
14 | #include <linux/stackprotector.h> | 12 | #include <linux/stackprotector.h> |
15 | #include <linux/cpu.h> | 13 | #include <linux/cpu.h> |
16 | #include <linux/errno.h> | 14 | #include <linux/errno.h> |
@@ -33,7 +31,6 @@ | |||
33 | #include <linux/module.h> | 31 | #include <linux/module.h> |
34 | #include <linux/kallsyms.h> | 32 | #include <linux/kallsyms.h> |
35 | #include <linux/ptrace.h> | 33 | #include <linux/ptrace.h> |
36 | #include <linux/random.h> | ||
37 | #include <linux/personality.h> | 34 | #include <linux/personality.h> |
38 | #include <linux/tick.h> | 35 | #include <linux/tick.h> |
39 | #include <linux/percpu.h> | 36 | #include <linux/percpu.h> |
@@ -290,7 +287,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
290 | p->thread.io_bitmap_max = 0; | 287 | p->thread.io_bitmap_max = 0; |
291 | } | 288 | } |
292 | 289 | ||
293 | ds_copy_thread(p, current); | 290 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); |
291 | p->thread.ds_ctx = NULL; | ||
294 | 292 | ||
295 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | 293 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); |
296 | p->thread.debugctlmsr = 0; | 294 | p->thread.debugctlmsr = 0; |
@@ -407,7 +405,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
407 | * done before math_state_restore, so the TS bit is up | 405 | * done before math_state_restore, so the TS bit is up |
408 | * to date. | 406 | * to date. |
409 | */ | 407 | */ |
410 | arch_leave_lazy_cpu_mode(); | 408 | arch_end_context_switch(next_p); |
411 | 409 | ||
412 | /* If the task has used fpu the last 5 timeslices, just do a full | 410 | /* If the task has used fpu the last 5 timeslices, just do a full |
413 | * restore of the math state immediately to avoid the trap; the | 411 | * restore of the math state immediately to avoid the trap; the |
@@ -497,15 +495,3 @@ unsigned long get_wchan(struct task_struct *p) | |||
497 | return 0; | 495 | return 0; |
498 | } | 496 | } |
499 | 497 | ||
500 | unsigned long arch_align_stack(unsigned long sp) | ||
501 | { | ||
502 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) | ||
503 | sp -= get_random_int() % 8192; | ||
504 | return sp & ~0xf; | ||
505 | } | ||
506 | |||
507 | unsigned long arch_randomize_brk(struct mm_struct *mm) | ||
508 | { | ||
509 | unsigned long range_end = mm->brk + 0x02000000; | ||
510 | return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | ||
511 | } | ||
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index b751a41392b1..ebefb5407b9d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -14,8 +14,6 @@ | |||
14 | * This file handles the architecture-dependent parts of process handling.. | 14 | * This file handles the architecture-dependent parts of process handling.. |
15 | */ | 15 | */ |
16 | 16 | ||
17 | #include <stdarg.h> | ||
18 | |||
19 | #include <linux/stackprotector.h> | 17 | #include <linux/stackprotector.h> |
20 | #include <linux/cpu.h> | 18 | #include <linux/cpu.h> |
21 | #include <linux/errno.h> | 19 | #include <linux/errno.h> |
@@ -32,7 +30,6 @@ | |||
32 | #include <linux/delay.h> | 30 | #include <linux/delay.h> |
33 | #include <linux/module.h> | 31 | #include <linux/module.h> |
34 | #include <linux/ptrace.h> | 32 | #include <linux/ptrace.h> |
35 | #include <linux/random.h> | ||
36 | #include <linux/notifier.h> | 33 | #include <linux/notifier.h> |
37 | #include <linux/kprobes.h> | 34 | #include <linux/kprobes.h> |
38 | #include <linux/kdebug.h> | 35 | #include <linux/kdebug.h> |
@@ -335,7 +332,8 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, | |||
335 | goto out; | 332 | goto out; |
336 | } | 333 | } |
337 | 334 | ||
338 | ds_copy_thread(p, me); | 335 | clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); |
336 | p->thread.ds_ctx = NULL; | ||
339 | 337 | ||
340 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); | 338 | clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); |
341 | p->thread.debugctlmsr = 0; | 339 | p->thread.debugctlmsr = 0; |
@@ -428,7 +426,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
428 | * done before math_state_restore, so the TS bit is up | 426 | * done before math_state_restore, so the TS bit is up |
429 | * to date. | 427 | * to date. |
430 | */ | 428 | */ |
431 | arch_leave_lazy_cpu_mode(); | 429 | arch_end_context_switch(next_p); |
432 | 430 | ||
433 | /* | 431 | /* |
434 | * Switch FS and GS. | 432 | * Switch FS and GS. |
@@ -660,15 +658,3 @@ long sys_arch_prctl(int code, unsigned long addr) | |||
660 | return do_arch_prctl(current, code, addr); | 658 | return do_arch_prctl(current, code, addr); |
661 | } | 659 | } |
662 | 660 | ||
663 | unsigned long arch_align_stack(unsigned long sp) | ||
664 | { | ||
665 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) | ||
666 | sp -= get_random_int() % 8192; | ||
667 | return sp & ~0xf; | ||
668 | } | ||
669 | |||
670 | unsigned long arch_randomize_brk(struct mm_struct *mm) | ||
671 | { | ||
672 | unsigned long range_end = mm->brk + 0x02000000; | ||
673 | return randomize_range(mm->brk, range_end, 0) ? : mm->brk; | ||
674 | } | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 23b7c8f017e2..09ecbde91c13 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/audit.h> | 21 | #include <linux/audit.h> |
22 | #include <linux/seccomp.h> | 22 | #include <linux/seccomp.h> |
23 | #include <linux/signal.h> | 23 | #include <linux/signal.h> |
24 | #include <linux/workqueue.h> | ||
24 | 25 | ||
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
26 | #include <asm/pgtable.h> | 27 | #include <asm/pgtable.h> |
@@ -578,17 +579,130 @@ static int ioperm_get(struct task_struct *target, | |||
578 | } | 579 | } |
579 | 580 | ||
580 | #ifdef CONFIG_X86_PTRACE_BTS | 581 | #ifdef CONFIG_X86_PTRACE_BTS |
582 | /* | ||
583 | * A branch trace store context. | ||
584 | * | ||
585 | * Contexts may only be installed by ptrace_bts_config() and only for | ||
586 | * ptraced tasks. | ||
587 | * | ||
588 | * Contexts are destroyed when the tracee is detached from the tracer. | ||
589 | * The actual destruction work requires interrupts enabled, so the | ||
590 | * work is deferred and will be scheduled during __ptrace_unlink(). | ||
591 | * | ||
592 | * Contexts hold an additional task_struct reference on the traced | ||
593 | * task, as well as a reference on the tracer's mm. | ||
594 | * | ||
595 | * Ptrace already holds a task_struct for the duration of ptrace operations, | ||
596 | * but since destruction is deferred, it may be executed after both | ||
597 | * tracer and tracee exited. | ||
598 | */ | ||
599 | struct bts_context { | ||
600 | /* The branch trace handle. */ | ||
601 | struct bts_tracer *tracer; | ||
602 | |||
603 | /* The buffer used to store the branch trace and its size. */ | ||
604 | void *buffer; | ||
605 | unsigned int size; | ||
606 | |||
607 | /* The mm that paid for the above buffer. */ | ||
608 | struct mm_struct *mm; | ||
609 | |||
610 | /* The task this context belongs to. */ | ||
611 | struct task_struct *task; | ||
612 | |||
613 | /* The signal to send on a bts buffer overflow. */ | ||
614 | unsigned int bts_ovfl_signal; | ||
615 | |||
616 | /* The work struct to destroy a context. */ | ||
617 | struct work_struct work; | ||
618 | }; | ||
619 | |||
620 | static int alloc_bts_buffer(struct bts_context *context, unsigned int size) | ||
621 | { | ||
622 | void *buffer = NULL; | ||
623 | int err = -ENOMEM; | ||
624 | |||
625 | err = account_locked_memory(current->mm, current->signal->rlim, size); | ||
626 | if (err < 0) | ||
627 | return err; | ||
628 | |||
629 | buffer = kzalloc(size, GFP_KERNEL); | ||
630 | if (!buffer) | ||
631 | goto out_refund; | ||
632 | |||
633 | context->buffer = buffer; | ||
634 | context->size = size; | ||
635 | context->mm = get_task_mm(current); | ||
636 | |||
637 | return 0; | ||
638 | |||
639 | out_refund: | ||
640 | refund_locked_memory(current->mm, size); | ||
641 | return err; | ||
642 | } | ||
643 | |||
644 | static inline void free_bts_buffer(struct bts_context *context) | ||
645 | { | ||
646 | if (!context->buffer) | ||
647 | return; | ||
648 | |||
649 | kfree(context->buffer); | ||
650 | context->buffer = NULL; | ||
651 | |||
652 | refund_locked_memory(context->mm, context->size); | ||
653 | context->size = 0; | ||
654 | |||
655 | mmput(context->mm); | ||
656 | context->mm = NULL; | ||
657 | } | ||
658 | |||
659 | static void free_bts_context_work(struct work_struct *w) | ||
660 | { | ||
661 | struct bts_context *context; | ||
662 | |||
663 | context = container_of(w, struct bts_context, work); | ||
664 | |||
665 | ds_release_bts(context->tracer); | ||
666 | put_task_struct(context->task); | ||
667 | free_bts_buffer(context); | ||
668 | kfree(context); | ||
669 | } | ||
670 | |||
671 | static inline void free_bts_context(struct bts_context *context) | ||
672 | { | ||
673 | INIT_WORK(&context->work, free_bts_context_work); | ||
674 | schedule_work(&context->work); | ||
675 | } | ||
676 | |||
677 | static inline struct bts_context *alloc_bts_context(struct task_struct *task) | ||
678 | { | ||
679 | struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); | ||
680 | if (context) { | ||
681 | context->task = task; | ||
682 | task->bts = context; | ||
683 | |||
684 | get_task_struct(task); | ||
685 | } | ||
686 | |||
687 | return context; | ||
688 | } | ||
689 | |||
581 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, | 690 | static int ptrace_bts_read_record(struct task_struct *child, size_t index, |
582 | struct bts_struct __user *out) | 691 | struct bts_struct __user *out) |
583 | { | 692 | { |
693 | struct bts_context *context; | ||
584 | const struct bts_trace *trace; | 694 | const struct bts_trace *trace; |
585 | struct bts_struct bts; | 695 | struct bts_struct bts; |
586 | const unsigned char *at; | 696 | const unsigned char *at; |
587 | int error; | 697 | int error; |
588 | 698 | ||
589 | trace = ds_read_bts(child->bts); | 699 | context = child->bts; |
700 | if (!context) | ||
701 | return -ESRCH; | ||
702 | |||
703 | trace = ds_read_bts(context->tracer); | ||
590 | if (!trace) | 704 | if (!trace) |
591 | return -EPERM; | 705 | return -ESRCH; |
592 | 706 | ||
593 | at = trace->ds.top - ((index + 1) * trace->ds.size); | 707 | at = trace->ds.top - ((index + 1) * trace->ds.size); |
594 | if ((void *)at < trace->ds.begin) | 708 | if ((void *)at < trace->ds.begin) |
@@ -597,7 +711,7 @@ static int ptrace_bts_read_record(struct task_struct *child, size_t index, | |||
597 | if (!trace->read) | 711 | if (!trace->read) |
598 | return -EOPNOTSUPP; | 712 | return -EOPNOTSUPP; |
599 | 713 | ||
600 | error = trace->read(child->bts, at, &bts); | 714 | error = trace->read(context->tracer, at, &bts); |
601 | if (error < 0) | 715 | if (error < 0) |
602 | return error; | 716 | return error; |
603 | 717 | ||
@@ -611,13 +725,18 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
611 | long size, | 725 | long size, |
612 | struct bts_struct __user *out) | 726 | struct bts_struct __user *out) |
613 | { | 727 | { |
728 | struct bts_context *context; | ||
614 | const struct bts_trace *trace; | 729 | const struct bts_trace *trace; |
615 | const unsigned char *at; | 730 | const unsigned char *at; |
616 | int error, drained = 0; | 731 | int error, drained = 0; |
617 | 732 | ||
618 | trace = ds_read_bts(child->bts); | 733 | context = child->bts; |
734 | if (!context) | ||
735 | return -ESRCH; | ||
736 | |||
737 | trace = ds_read_bts(context->tracer); | ||
619 | if (!trace) | 738 | if (!trace) |
620 | return -EPERM; | 739 | return -ESRCH; |
621 | 740 | ||
622 | if (!trace->read) | 741 | if (!trace->read) |
623 | return -EOPNOTSUPP; | 742 | return -EOPNOTSUPP; |
@@ -628,9 +747,8 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
628 | for (at = trace->ds.begin; (void *)at < trace->ds.top; | 747 | for (at = trace->ds.begin; (void *)at < trace->ds.top; |
629 | out++, drained++, at += trace->ds.size) { | 748 | out++, drained++, at += trace->ds.size) { |
630 | struct bts_struct bts; | 749 | struct bts_struct bts; |
631 | int error; | ||
632 | 750 | ||
633 | error = trace->read(child->bts, at, &bts); | 751 | error = trace->read(context->tracer, at, &bts); |
634 | if (error < 0) | 752 | if (error < 0) |
635 | return error; | 753 | return error; |
636 | 754 | ||
@@ -640,35 +758,18 @@ static int ptrace_bts_drain(struct task_struct *child, | |||
640 | 758 | ||
641 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | 759 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
642 | 760 | ||
643 | error = ds_reset_bts(child->bts); | 761 | error = ds_reset_bts(context->tracer); |
644 | if (error < 0) | 762 | if (error < 0) |
645 | return error; | 763 | return error; |
646 | 764 | ||
647 | return drained; | 765 | return drained; |
648 | } | 766 | } |
649 | 767 | ||
650 | static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) | ||
651 | { | ||
652 | child->bts_buffer = alloc_locked_buffer(size); | ||
653 | if (!child->bts_buffer) | ||
654 | return -ENOMEM; | ||
655 | |||
656 | child->bts_size = size; | ||
657 | |||
658 | return 0; | ||
659 | } | ||
660 | |||
661 | static void ptrace_bts_free_buffer(struct task_struct *child) | ||
662 | { | ||
663 | free_locked_buffer(child->bts_buffer, child->bts_size); | ||
664 | child->bts_buffer = NULL; | ||
665 | child->bts_size = 0; | ||
666 | } | ||
667 | |||
668 | static int ptrace_bts_config(struct task_struct *child, | 768 | static int ptrace_bts_config(struct task_struct *child, |
669 | long cfg_size, | 769 | long cfg_size, |
670 | const struct ptrace_bts_config __user *ucfg) | 770 | const struct ptrace_bts_config __user *ucfg) |
671 | { | 771 | { |
772 | struct bts_context *context; | ||
672 | struct ptrace_bts_config cfg; | 773 | struct ptrace_bts_config cfg; |
673 | unsigned int flags = 0; | 774 | unsigned int flags = 0; |
674 | 775 | ||
@@ -678,28 +779,33 @@ static int ptrace_bts_config(struct task_struct *child, | |||
678 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) | 779 | if (copy_from_user(&cfg, ucfg, sizeof(cfg))) |
679 | return -EFAULT; | 780 | return -EFAULT; |
680 | 781 | ||
681 | if (child->bts) { | 782 | context = child->bts; |
682 | ds_release_bts(child->bts); | 783 | if (!context) |
683 | child->bts = NULL; | 784 | context = alloc_bts_context(child); |
684 | } | 785 | if (!context) |
786 | return -ENOMEM; | ||
685 | 787 | ||
686 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { | 788 | if (cfg.flags & PTRACE_BTS_O_SIGNAL) { |
687 | if (!cfg.signal) | 789 | if (!cfg.signal) |
688 | return -EINVAL; | 790 | return -EINVAL; |
689 | 791 | ||
690 | child->thread.bts_ovfl_signal = cfg.signal; | ||
691 | return -EOPNOTSUPP; | 792 | return -EOPNOTSUPP; |
793 | context->bts_ovfl_signal = cfg.signal; | ||
692 | } | 794 | } |
693 | 795 | ||
694 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && | 796 | ds_release_bts(context->tracer); |
695 | (cfg.size != child->bts_size)) { | 797 | context->tracer = NULL; |
696 | int error; | ||
697 | 798 | ||
698 | ptrace_bts_free_buffer(child); | 799 | if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { |
800 | int err; | ||
699 | 801 | ||
700 | error = ptrace_bts_allocate_buffer(child, cfg.size); | 802 | free_bts_buffer(context); |
701 | if (error < 0) | 803 | if (!cfg.size) |
702 | return error; | 804 | return 0; |
805 | |||
806 | err = alloc_bts_buffer(context, cfg.size); | ||
807 | if (err < 0) | ||
808 | return err; | ||
703 | } | 809 | } |
704 | 810 | ||
705 | if (cfg.flags & PTRACE_BTS_O_TRACE) | 811 | if (cfg.flags & PTRACE_BTS_O_TRACE) |
@@ -708,15 +814,14 @@ static int ptrace_bts_config(struct task_struct *child, | |||
708 | if (cfg.flags & PTRACE_BTS_O_SCHED) | 814 | if (cfg.flags & PTRACE_BTS_O_SCHED) |
709 | flags |= BTS_TIMESTAMPS; | 815 | flags |= BTS_TIMESTAMPS; |
710 | 816 | ||
711 | child->bts = ds_request_bts(child, child->bts_buffer, child->bts_size, | 817 | context->tracer = |
712 | /* ovfl = */ NULL, /* th = */ (size_t)-1, | 818 | ds_request_bts_task(child, context->buffer, context->size, |
713 | flags); | 819 | NULL, (size_t)-1, flags); |
714 | if (IS_ERR(child->bts)) { | 820 | if (unlikely(IS_ERR(context->tracer))) { |
715 | int error = PTR_ERR(child->bts); | 821 | int error = PTR_ERR(context->tracer); |
716 | |||
717 | ptrace_bts_free_buffer(child); | ||
718 | child->bts = NULL; | ||
719 | 822 | ||
823 | free_bts_buffer(context); | ||
824 | context->tracer = NULL; | ||
720 | return error; | 825 | return error; |
721 | } | 826 | } |
722 | 827 | ||
@@ -727,20 +832,25 @@ static int ptrace_bts_status(struct task_struct *child, | |||
727 | long cfg_size, | 832 | long cfg_size, |
728 | struct ptrace_bts_config __user *ucfg) | 833 | struct ptrace_bts_config __user *ucfg) |
729 | { | 834 | { |
835 | struct bts_context *context; | ||
730 | const struct bts_trace *trace; | 836 | const struct bts_trace *trace; |
731 | struct ptrace_bts_config cfg; | 837 | struct ptrace_bts_config cfg; |
732 | 838 | ||
839 | context = child->bts; | ||
840 | if (!context) | ||
841 | return -ESRCH; | ||
842 | |||
733 | if (cfg_size < sizeof(cfg)) | 843 | if (cfg_size < sizeof(cfg)) |
734 | return -EIO; | 844 | return -EIO; |
735 | 845 | ||
736 | trace = ds_read_bts(child->bts); | 846 | trace = ds_read_bts(context->tracer); |
737 | if (!trace) | 847 | if (!trace) |
738 | return -EPERM; | 848 | return -ESRCH; |
739 | 849 | ||
740 | memset(&cfg, 0, sizeof(cfg)); | 850 | memset(&cfg, 0, sizeof(cfg)); |
741 | cfg.size = trace->ds.end - trace->ds.begin; | 851 | cfg.size = trace->ds.end - trace->ds.begin; |
742 | cfg.signal = child->thread.bts_ovfl_signal; | 852 | cfg.signal = context->bts_ovfl_signal; |
743 | cfg.bts_size = sizeof(struct bts_struct); | 853 | cfg.bts_size = sizeof(struct bts_struct); |
744 | 854 | ||
745 | if (cfg.signal) | 855 | if (cfg.signal) |
746 | cfg.flags |= PTRACE_BTS_O_SIGNAL; | 856 | cfg.flags |= PTRACE_BTS_O_SIGNAL; |
@@ -759,80 +869,51 @@ static int ptrace_bts_status(struct task_struct *child, | |||
759 | 869 | ||
760 | static int ptrace_bts_clear(struct task_struct *child) | 870 | static int ptrace_bts_clear(struct task_struct *child) |
761 | { | 871 | { |
872 | struct bts_context *context; | ||
762 | const struct bts_trace *trace; | 873 | const struct bts_trace *trace; |
763 | 874 | ||
764 | trace = ds_read_bts(child->bts); | 875 | context = child->bts; |
876 | if (!context) | ||
877 | return -ESRCH; | ||
878 | |||
879 | trace = ds_read_bts(context->tracer); | ||
765 | if (!trace) | 880 | if (!trace) |
766 | return -EPERM; | 881 | return -ESRCH; |
767 | 882 | ||
768 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); | 883 | memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); |
769 | 884 | ||
770 | return ds_reset_bts(child->bts); | 885 | return ds_reset_bts(context->tracer); |
771 | } | 886 | } |
772 | 887 | ||
773 | static int ptrace_bts_size(struct task_struct *child) | 888 | static int ptrace_bts_size(struct task_struct *child) |
774 | { | 889 | { |
890 | struct bts_context *context; | ||
775 | const struct bts_trace *trace; | 891 | const struct bts_trace *trace; |
776 | 892 | ||
777 | trace = ds_read_bts(child->bts); | 893 | context = child->bts; |
894 | if (!context) | ||
895 | return -ESRCH; | ||
896 | |||
897 | trace = ds_read_bts(context->tracer); | ||
778 | if (!trace) | 898 | if (!trace) |
779 | return -EPERM; | 899 | return -ESRCH; |
780 | 900 | ||
781 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; | 901 | return (trace->ds.top - trace->ds.begin) / trace->ds.size; |
782 | } | 902 | } |
783 | 903 | ||
784 | static void ptrace_bts_fork(struct task_struct *tsk) | 904 | /* |
785 | { | 905 | * Called from __ptrace_unlink() after the child has been moved back |
786 | tsk->bts = NULL; | 906 | * to its original parent. |
787 | tsk->bts_buffer = NULL; | 907 | */ |
788 | tsk->bts_size = 0; | 908 | void ptrace_bts_untrace(struct task_struct *child) |
789 | tsk->thread.bts_ovfl_signal = 0; | ||
790 | } | ||
791 | |||
792 | static void ptrace_bts_untrace(struct task_struct *child) | ||
793 | { | 909 | { |
794 | if (unlikely(child->bts)) { | 910 | if (unlikely(child->bts)) { |
795 | ds_release_bts(child->bts); | 911 | free_bts_context(child->bts); |
796 | child->bts = NULL; | 912 | child->bts = NULL; |
797 | |||
798 | /* We cannot update total_vm and locked_vm since | ||
799 | child's mm is already gone. But we can reclaim the | ||
800 | memory. */ | ||
801 | kfree(child->bts_buffer); | ||
802 | child->bts_buffer = NULL; | ||
803 | child->bts_size = 0; | ||
804 | } | 913 | } |
805 | } | 914 | } |
806 | |||
807 | static void ptrace_bts_detach(struct task_struct *child) | ||
808 | { | ||
809 | /* | ||
810 | * Ptrace_detach() races with ptrace_untrace() in case | ||
811 | * the child dies and is reaped by another thread. | ||
812 | * | ||
813 | * We only do the memory accounting at this point and | ||
814 | * leave the buffer deallocation and the bts tracer | ||
815 | * release to ptrace_bts_untrace() which will be called | ||
816 | * later on with tasklist_lock held. | ||
817 | */ | ||
818 | release_locked_buffer(child->bts_buffer, child->bts_size); | ||
819 | } | ||
820 | #else | ||
821 | static inline void ptrace_bts_fork(struct task_struct *tsk) {} | ||
822 | static inline void ptrace_bts_detach(struct task_struct *child) {} | ||
823 | static inline void ptrace_bts_untrace(struct task_struct *child) {} | ||
824 | #endif /* CONFIG_X86_PTRACE_BTS */ | 915 | #endif /* CONFIG_X86_PTRACE_BTS */ |
825 | 916 | ||
826 | void x86_ptrace_fork(struct task_struct *child, unsigned long clone_flags) | ||
827 | { | ||
828 | ptrace_bts_fork(child); | ||
829 | } | ||
830 | |||
831 | void x86_ptrace_untrace(struct task_struct *child) | ||
832 | { | ||
833 | ptrace_bts_untrace(child); | ||
834 | } | ||
835 | |||
836 | /* | 917 | /* |
837 | * Called by kernel/ptrace.c when detaching.. | 918 | * Called by kernel/ptrace.c when detaching.. |
838 | * | 919 | * |
@@ -844,7 +925,6 @@ void ptrace_disable(struct task_struct *child) | |||
844 | #ifdef TIF_SYSCALL_EMU | 925 | #ifdef TIF_SYSCALL_EMU |
845 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); | 926 | clear_tsk_thread_flag(child, TIF_SYSCALL_EMU); |
846 | #endif | 927 | #endif |
847 | ptrace_bts_detach(child); | ||
848 | } | 928 | } |
849 | 929 | ||
850 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION | 930 | #if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 7563b31b4f03..af71d06624bf 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -491,5 +491,42 @@ void force_hpet_resume(void) | |||
491 | break; | 491 | break; |
492 | } | 492 | } |
493 | } | 493 | } |
494 | #endif | ||
495 | |||
496 | #if defined(CONFIG_PCI) && defined(CONFIG_NUMA) | ||
497 | /* Set correct numa_node information for AMD NB functions */ | ||
498 | static void __init quirk_amd_nb_node(struct pci_dev *dev) | ||
499 | { | ||
500 | struct pci_dev *nb_ht; | ||
501 | unsigned int devfn; | ||
502 | u32 val; | ||
503 | |||
504 | devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0); | ||
505 | nb_ht = pci_get_slot(dev->bus, devfn); | ||
506 | if (!nb_ht) | ||
507 | return; | ||
508 | |||
509 | pci_read_config_dword(nb_ht, 0x60, &val); | ||
510 | set_dev_node(&dev->dev, val & 7); | ||
511 | pci_dev_put(dev); | ||
512 | } | ||
494 | 513 | ||
514 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | ||
515 | quirk_amd_nb_node); | ||
516 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP, | ||
517 | quirk_amd_nb_node); | ||
518 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MEMCTL, | ||
519 | quirk_amd_nb_node); | ||
520 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC, | ||
521 | quirk_amd_nb_node); | ||
522 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_HT, | ||
523 | quirk_amd_nb_node); | ||
524 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MAP, | ||
525 | quirk_amd_nb_node); | ||
526 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_DRAM, | ||
527 | quirk_amd_nb_node); | ||
528 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC, | ||
529 | quirk_amd_nb_node); | ||
530 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_LINK, | ||
531 | quirk_amd_nb_node); | ||
495 | #endif | 532 | #endif |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 667188e0b5a0..d2d1ce8170f0 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -192,6 +192,15 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
192 | DMI_MATCH(DMI_BOARD_NAME, "0KP561"), | 192 | DMI_MATCH(DMI_BOARD_NAME, "0KP561"), |
193 | }, | 193 | }, |
194 | }, | 194 | }, |
195 | { /* Handle problems with rebooting on Dell Optiplex 360 with 0T656F */ | ||
196 | .callback = set_bios_reboot, | ||
197 | .ident = "Dell OptiPlex 360", | ||
198 | .matches = { | ||
199 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
200 | DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 360"), | ||
201 | DMI_MATCH(DMI_BOARD_NAME, "0T656F"), | ||
202 | }, | ||
203 | }, | ||
195 | { /* Handle problems with rebooting on Dell 2400's */ | 204 | { /* Handle problems with rebooting on Dell 2400's */ |
196 | .callback = set_bios_reboot, | 205 | .callback = set_bios_reboot, |
197 | .ident = "Dell PowerEdge 2400", | 206 | .ident = "Dell PowerEdge 2400", |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b4158439bf63..be5ae80f897f 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -112,6 +112,14 @@ | |||
112 | #define ARCH_SETUP | 112 | #define ARCH_SETUP |
113 | #endif | 113 | #endif |
114 | 114 | ||
115 | /* | ||
116 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
117 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
118 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
119 | */ | ||
120 | unsigned long max_low_pfn_mapped; | ||
121 | unsigned long max_pfn_mapped; | ||
122 | |||
115 | RESERVE_BRK(dmi_alloc, 65536); | 123 | RESERVE_BRK(dmi_alloc, 65536); |
116 | 124 | ||
117 | unsigned int boot_cpu_id __read_mostly; | 125 | unsigned int boot_cpu_id __read_mostly; |
@@ -214,8 +222,8 @@ unsigned long mmu_cr4_features; | |||
214 | unsigned long mmu_cr4_features = X86_CR4_PAE; | 222 | unsigned long mmu_cr4_features = X86_CR4_PAE; |
215 | #endif | 223 | #endif |
216 | 224 | ||
217 | /* Boot loader ID as an integer, for the benefit of proc_dointvec */ | 225 | /* Boot loader ID and version as integers, for the benefit of proc_dointvec */ |
218 | int bootloader_type; | 226 | int bootloader_type, bootloader_version; |
219 | 227 | ||
220 | /* | 228 | /* |
221 | * Setup options | 229 | * Setup options |
@@ -293,15 +301,13 @@ static void __init reserve_brk(void) | |||
293 | 301 | ||
294 | #ifdef CONFIG_BLK_DEV_INITRD | 302 | #ifdef CONFIG_BLK_DEV_INITRD |
295 | 303 | ||
296 | #ifdef CONFIG_X86_32 | ||
297 | |||
298 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) | 304 | #define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT) |
299 | static void __init relocate_initrd(void) | 305 | static void __init relocate_initrd(void) |
300 | { | 306 | { |
301 | 307 | ||
302 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 308 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
303 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 309 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
304 | u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; | 310 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; |
305 | u64 ramdisk_here; | 311 | u64 ramdisk_here; |
306 | unsigned long slop, clen, mapaddr; | 312 | unsigned long slop, clen, mapaddr; |
307 | char *p, *q; | 313 | char *p, *q; |
@@ -357,14 +363,13 @@ static void __init relocate_initrd(void) | |||
357 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | 363 | ramdisk_image, ramdisk_image + ramdisk_size - 1, |
358 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 364 | ramdisk_here, ramdisk_here + ramdisk_size - 1); |
359 | } | 365 | } |
360 | #endif | ||
361 | 366 | ||
362 | static void __init reserve_initrd(void) | 367 | static void __init reserve_initrd(void) |
363 | { | 368 | { |
364 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; | 369 | u64 ramdisk_image = boot_params.hdr.ramdisk_image; |
365 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; | 370 | u64 ramdisk_size = boot_params.hdr.ramdisk_size; |
366 | u64 ramdisk_end = ramdisk_image + ramdisk_size; | 371 | u64 ramdisk_end = ramdisk_image + ramdisk_size; |
367 | u64 end_of_lowmem = max_low_pfn << PAGE_SHIFT; | 372 | u64 end_of_lowmem = max_low_pfn_mapped << PAGE_SHIFT; |
368 | 373 | ||
369 | if (!boot_params.hdr.type_of_loader || | 374 | if (!boot_params.hdr.type_of_loader || |
370 | !ramdisk_image || !ramdisk_size) | 375 | !ramdisk_image || !ramdisk_size) |
@@ -394,14 +399,8 @@ static void __init reserve_initrd(void) | |||
394 | return; | 399 | return; |
395 | } | 400 | } |
396 | 401 | ||
397 | #ifdef CONFIG_X86_32 | ||
398 | relocate_initrd(); | 402 | relocate_initrd(); |
399 | #else | 403 | |
400 | printk(KERN_ERR "initrd extends beyond end of memory " | ||
401 | "(0x%08llx > 0x%08llx)\ndisabling initrd\n", | ||
402 | ramdisk_end, end_of_lowmem); | ||
403 | initrd_start = 0; | ||
404 | #endif | ||
405 | free_early(ramdisk_image, ramdisk_end); | 404 | free_early(ramdisk_image, ramdisk_end); |
406 | } | 405 | } |
407 | #else | 406 | #else |
@@ -706,6 +705,12 @@ void __init setup_arch(char **cmdline_p) | |||
706 | #endif | 705 | #endif |
707 | saved_video_mode = boot_params.hdr.vid_mode; | 706 | saved_video_mode = boot_params.hdr.vid_mode; |
708 | bootloader_type = boot_params.hdr.type_of_loader; | 707 | bootloader_type = boot_params.hdr.type_of_loader; |
708 | if ((bootloader_type >> 4) == 0xe) { | ||
709 | bootloader_type &= 0xf; | ||
710 | bootloader_type |= (boot_params.hdr.ext_loader_type+0x10) << 4; | ||
711 | } | ||
712 | bootloader_version = bootloader_type & 0xf; | ||
713 | bootloader_version |= boot_params.hdr.ext_loader_ver << 4; | ||
709 | 714 | ||
710 | #ifdef CONFIG_BLK_DEV_RAM | 715 | #ifdef CONFIG_BLK_DEV_RAM |
711 | rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; | 716 | rd_image_start = boot_params.hdr.ram_size & RAMDISK_IMAGE_START_MASK; |
@@ -854,12 +859,16 @@ void __init setup_arch(char **cmdline_p) | |||
854 | max_low_pfn = max_pfn; | 859 | max_low_pfn = max_pfn; |
855 | 860 | ||
856 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; | 861 | high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; |
862 | max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; | ||
857 | #endif | 863 | #endif |
858 | 864 | ||
859 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION | 865 | #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION |
860 | setup_bios_corruption_check(); | 866 | setup_bios_corruption_check(); |
861 | #endif | 867 | #endif |
862 | 868 | ||
869 | printk(KERN_DEBUG "initial memory mapped : 0 - %08lx\n", | ||
870 | max_pfn_mapped<<PAGE_SHIFT); | ||
871 | |||
863 | reserve_brk(); | 872 | reserve_brk(); |
864 | 873 | ||
865 | /* max_pfn_mapped is updated here */ | 874 | /* max_pfn_mapped is updated here */ |
@@ -997,24 +1006,6 @@ void __init setup_arch(char **cmdline_p) | |||
997 | #ifdef CONFIG_X86_32 | 1006 | #ifdef CONFIG_X86_32 |
998 | 1007 | ||
999 | /** | 1008 | /** |
1000 | * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors | ||
1001 | * | ||
1002 | * Description: | ||
1003 | * Perform any necessary interrupt initialisation prior to setting up | ||
1004 | * the "ordinary" interrupt call gates. For legacy reasons, the ISA | ||
1005 | * interrupts should be initialised here if the machine emulates a PC | ||
1006 | * in any way. | ||
1007 | **/ | ||
1008 | void __init x86_quirk_pre_intr_init(void) | ||
1009 | { | ||
1010 | if (x86_quirks->arch_pre_intr_init) { | ||
1011 | if (x86_quirks->arch_pre_intr_init()) | ||
1012 | return; | ||
1013 | } | ||
1014 | init_ISA_irqs(); | ||
1015 | } | ||
1016 | |||
1017 | /** | ||
1018 | * x86_quirk_intr_init - post gate setup interrupt initialisation | 1009 | * x86_quirk_intr_init - post gate setup interrupt initialisation |
1019 | * | 1010 | * |
1020 | * Description: | 1011 | * Description: |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 8f0e13be36b3..9c3f0823e6aa 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -425,6 +425,14 @@ void __init setup_per_cpu_areas(void) | |||
425 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; | 425 | early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; |
426 | #endif | 426 | #endif |
427 | 427 | ||
428 | #if defined(CONFIG_X86_64) && defined(CONFIG_NUMA) | ||
429 | /* | ||
430 | * make sure boot cpu node_number is right, when boot cpu is on the | ||
431 | * node that doesn't have mem installed | ||
432 | */ | ||
433 | per_cpu(node_number, boot_cpu_id) = cpu_to_node(boot_cpu_id); | ||
434 | #endif | ||
435 | |||
428 | /* Setup node to cpumask map */ | 436 | /* Setup node to cpumask map */ |
429 | setup_node_to_cpumask_map(); | 437 | setup_node_to_cpumask_map(); |
430 | 438 | ||
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 14425166b8e3..4c578751e94e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -6,7 +6,6 @@ | |||
6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes | 6 | * 2000-06-20 Pentium III FXSR, SSE support by Gareth Hughes |
7 | * 2000-2002 x86-64 support by Andi Kleen | 7 | * 2000-2002 x86-64 support by Andi Kleen |
8 | */ | 8 | */ |
9 | |||
10 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
11 | #include <linux/mm.h> | 10 | #include <linux/mm.h> |
12 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
@@ -25,11 +24,11 @@ | |||
25 | #include <asm/ucontext.h> | 24 | #include <asm/ucontext.h> |
26 | #include <asm/i387.h> | 25 | #include <asm/i387.h> |
27 | #include <asm/vdso.h> | 26 | #include <asm/vdso.h> |
27 | #include <asm/mce.h> | ||
28 | 28 | ||
29 | #ifdef CONFIG_X86_64 | 29 | #ifdef CONFIG_X86_64 |
30 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
31 | #include <asm/ia32_unistd.h> | 31 | #include <asm/ia32_unistd.h> |
32 | #include <asm/mce.h> | ||
33 | #endif /* CONFIG_X86_64 */ | 32 | #endif /* CONFIG_X86_64 */ |
34 | 33 | ||
35 | #include <asm/syscall.h> | 34 | #include <asm/syscall.h> |
@@ -857,10 +856,10 @@ static void do_signal(struct pt_regs *regs) | |||
857 | void | 856 | void |
858 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 857 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
859 | { | 858 | { |
860 | #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) | 859 | #ifdef CONFIG_X86_NEW_MCE |
861 | /* notify userspace of pending MCEs */ | 860 | /* notify userspace of pending MCEs */ |
862 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 861 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
863 | mce_notify_user(); | 862 | mce_notify_process(); |
864 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ | 863 | #endif /* CONFIG_X86_64 && CONFIG_X86_MCE */ |
865 | 864 | ||
866 | /* deal with pending signal delivery */ | 865 | /* deal with pending signal delivery */ |
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 13f33ea8ccaa..ec1de97600e7 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c | |||
@@ -150,14 +150,40 @@ void native_send_call_func_ipi(const struct cpumask *mask) | |||
150 | * this function calls the 'stop' function on all other CPUs in the system. | 150 | * this function calls the 'stop' function on all other CPUs in the system. |
151 | */ | 151 | */ |
152 | 152 | ||
153 | asmlinkage void smp_reboot_interrupt(void) | ||
154 | { | ||
155 | ack_APIC_irq(); | ||
156 | irq_enter(); | ||
157 | stop_this_cpu(NULL); | ||
158 | irq_exit(); | ||
159 | } | ||
160 | |||
153 | static void native_smp_send_stop(void) | 161 | static void native_smp_send_stop(void) |
154 | { | 162 | { |
155 | unsigned long flags; | 163 | unsigned long flags; |
164 | unsigned long wait; | ||
156 | 165 | ||
157 | if (reboot_force) | 166 | if (reboot_force) |
158 | return; | 167 | return; |
159 | 168 | ||
160 | smp_call_function(stop_this_cpu, NULL, 0); | 169 | /* |
170 | * Use an own vector here because smp_call_function | ||
171 | * does lots of things not suitable in a panic situation. | ||
172 | * On most systems we could also use an NMI here, | ||
173 | * but there are a few systems around where NMI | ||
174 | * is problematic so stay with an non NMI for now | ||
175 | * (this implies we cannot stop CPUs spinning with irq off | ||
176 | * currently) | ||
177 | */ | ||
178 | if (num_online_cpus() > 1) { | ||
179 | apic->send_IPI_allbutself(REBOOT_VECTOR); | ||
180 | |||
181 | /* Don't wait longer than a second */ | ||
182 | wait = USEC_PER_SEC; | ||
183 | while (num_online_cpus() > 1 && wait--) | ||
184 | udelay(1); | ||
185 | } | ||
186 | |||
161 | local_irq_save(flags); | 187 | local_irq_save(flags); |
162 | disable_local_APIC(); | 188 | disable_local_APIC(); |
163 | local_irq_restore(flags); | 189 | local_irq_restore(flags); |
@@ -172,6 +198,9 @@ void smp_reschedule_interrupt(struct pt_regs *regs) | |||
172 | { | 198 | { |
173 | ack_APIC_irq(); | 199 | ack_APIC_irq(); |
174 | inc_irq_stat(irq_resched_count); | 200 | inc_irq_stat(irq_resched_count); |
201 | /* | ||
202 | * KVM uses this interrupt to force a cpu out of guest mode | ||
203 | */ | ||
175 | } | 204 | } |
176 | 205 | ||
177 | void smp_call_function_interrupt(struct pt_regs *regs) | 206 | void smp_call_function_interrupt(struct pt_regs *regs) |
@@ -193,19 +222,19 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) | |||
193 | } | 222 | } |
194 | 223 | ||
195 | struct smp_ops smp_ops = { | 224 | struct smp_ops smp_ops = { |
196 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | 225 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, |
197 | .smp_prepare_cpus = native_smp_prepare_cpus, | 226 | .smp_prepare_cpus = native_smp_prepare_cpus, |
198 | .smp_cpus_done = native_smp_cpus_done, | 227 | .smp_cpus_done = native_smp_cpus_done, |
199 | 228 | ||
200 | .smp_send_stop = native_smp_send_stop, | 229 | .smp_send_stop = native_smp_send_stop, |
201 | .smp_send_reschedule = native_smp_send_reschedule, | 230 | .smp_send_reschedule = native_smp_send_reschedule, |
202 | 231 | ||
203 | .cpu_up = native_cpu_up, | 232 | .cpu_up = native_cpu_up, |
204 | .cpu_die = native_cpu_die, | 233 | .cpu_die = native_cpu_die, |
205 | .cpu_disable = native_cpu_disable, | 234 | .cpu_disable = native_cpu_disable, |
206 | .play_dead = native_play_dead, | 235 | .play_dead = native_play_dead, |
207 | 236 | ||
208 | .send_call_func_ipi = native_send_call_func_ipi, | 237 | .send_call_func_ipi = native_send_call_func_ipi, |
209 | .send_call_func_single_ipi = native_send_call_func_single_ipi, | 238 | .send_call_func_single_ipi = native_send_call_func_single_ipi, |
210 | }; | 239 | }; |
211 | EXPORT_SYMBOL_GPL(smp_ops); | 240 | EXPORT_SYMBOL_GPL(smp_ops); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 58d24ef917d8..2fecda69ee64 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -504,7 +504,7 @@ void __inquire_remote_apic(int apicid) | |||
504 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this | 504 | * INIT, INIT, STARTUP sequence will reset the chip hard for us, and this |
505 | * won't ... remember to clear down the APIC, etc later. | 505 | * won't ... remember to clear down the APIC, etc later. |
506 | */ | 506 | */ |
507 | int __devinit | 507 | int __cpuinit |
508 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | 508 | wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) |
509 | { | 509 | { |
510 | unsigned long send_status, accept_status = 0; | 510 | unsigned long send_status, accept_status = 0; |
@@ -538,7 +538,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip) | |||
538 | return (send_status | accept_status); | 538 | return (send_status | accept_status); |
539 | } | 539 | } |
540 | 540 | ||
541 | int __devinit | 541 | static int __cpuinit |
542 | wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) | 542 | wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip) |
543 | { | 543 | { |
544 | unsigned long send_status, accept_status = 0; | 544 | unsigned long send_status, accept_status = 0; |
@@ -822,10 +822,12 @@ do_rest: | |||
822 | /* mark "stuck" area as not stuck */ | 822 | /* mark "stuck" area as not stuck */ |
823 | *((volatile unsigned long *)trampoline_base) = 0; | 823 | *((volatile unsigned long *)trampoline_base) = 0; |
824 | 824 | ||
825 | /* | 825 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
826 | * Cleanup possible dangling ends... | 826 | /* |
827 | */ | 827 | * Cleanup possible dangling ends... |
828 | smpboot_restore_warm_reset_vector(); | 828 | */ |
829 | smpboot_restore_warm_reset_vector(); | ||
830 | } | ||
829 | 831 | ||
830 | return boot_error; | 832 | return boot_error; |
831 | } | 833 | } |
@@ -871,7 +873,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
871 | 873 | ||
872 | err = do_boot_cpu(apicid, cpu); | 874 | err = do_boot_cpu(apicid, cpu); |
873 | 875 | ||
874 | zap_low_mappings(); | 876 | zap_low_mappings(false); |
875 | low_mappings = 0; | 877 | low_mappings = 0; |
876 | #else | 878 | #else |
877 | err = do_boot_cpu(apicid, cpu); | 879 | err = do_boot_cpu(apicid, cpu); |
@@ -990,10 +992,12 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
990 | */ | 992 | */ |
991 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && | 993 | if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid]) && |
992 | !cpu_has_apic) { | 994 | !cpu_has_apic) { |
993 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | 995 | if (!disable_apic) { |
994 | boot_cpu_physical_apicid); | 996 | pr_err("BIOS bug, local APIC #%d not detected!...\n", |
995 | printk(KERN_ERR "... forcing use of dummy APIC emulation." | 997 | boot_cpu_physical_apicid); |
998 | pr_err("... forcing use of dummy APIC emulation." | ||
996 | "(tell your hw vendor)\n"); | 999 | "(tell your hw vendor)\n"); |
1000 | } | ||
997 | smpboot_clear_io_apic(); | 1001 | smpboot_clear_io_apic(); |
998 | arch_disable_smp_support(); | 1002 | arch_disable_smp_support(); |
999 | return -1; | 1003 | return -1; |
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index f7bddc2e37d1..c3eb207181fe 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c | |||
@@ -20,7 +20,7 @@ save_stack_warning_symbol(void *data, char *msg, unsigned long symbol) | |||
20 | 20 | ||
21 | static int save_stack_stack(void *data, char *name) | 21 | static int save_stack_stack(void *data, char *name) |
22 | { | 22 | { |
23 | return -1; | 23 | return 0; |
24 | } | 24 | } |
25 | 25 | ||
26 | static void save_stack_address(void *data, unsigned long addr, int reliable) | 26 | static void save_stack_address(void *data, unsigned long addr, int reliable) |
@@ -77,6 +77,13 @@ void save_stack_trace(struct stack_trace *trace) | |||
77 | } | 77 | } |
78 | EXPORT_SYMBOL_GPL(save_stack_trace); | 78 | EXPORT_SYMBOL_GPL(save_stack_trace); |
79 | 79 | ||
80 | void save_stack_trace_bp(struct stack_trace *trace, unsigned long bp) | ||
81 | { | ||
82 | dump_trace(current, NULL, NULL, bp, &save_stack_ops, trace); | ||
83 | if (trace->nr_entries < trace->max_entries) | ||
84 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
85 | } | ||
86 | |||
80 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) | 87 | void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) |
81 | { | 88 | { |
82 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); | 89 | dump_trace(tsk, NULL, NULL, 0, &save_stack_ops_nosched, trace); |
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index ff5c8736b491..d51321ddafda 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -334,3 +334,5 @@ ENTRY(sys_call_table) | |||
334 | .long sys_inotify_init1 | 334 | .long sys_inotify_init1 |
335 | .long sys_preadv | 335 | .long sys_preadv |
336 | .long sys_pwritev | 336 | .long sys_pwritev |
337 | .long sys_rt_tgsigqueueinfo /* 335 */ | ||
338 | .long sys_perf_counter_open | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 8c7b03b0cfcb..124d40c575df 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -715,7 +715,12 @@ uv_activation_descriptor_init(int node, int pnode) | |||
715 | struct bau_desc *adp; | 715 | struct bau_desc *adp; |
716 | struct bau_desc *ad2; | 716 | struct bau_desc *ad2; |
717 | 717 | ||
718 | adp = (struct bau_desc *)kmalloc_node(16384, GFP_KERNEL, node); | 718 | /* |
719 | * each bau_desc is 64 bytes; there are 8 (UV_ITEMS_PER_DESCRIPTOR) | ||
720 | * per cpu; and up to 32 (UV_ADP_SIZE) cpu's per blade | ||
721 | */ | ||
722 | adp = (struct bau_desc *)kmalloc_node(sizeof(struct bau_desc)* | ||
723 | UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR, GFP_KERNEL, node); | ||
719 | BUG_ON(!adp); | 724 | BUG_ON(!adp); |
720 | 725 | ||
721 | pa = uv_gpa(adp); /* need the real nasid*/ | 726 | pa = uv_gpa(adp); /* need the real nasid*/ |
@@ -729,7 +734,13 @@ uv_activation_descriptor_init(int node, int pnode) | |||
729 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); | 734 | (n << UV_DESC_BASE_PNODE_SHIFT | m)); |
730 | } | 735 | } |
731 | 736 | ||
732 | for (i = 0, ad2 = adp; i < UV_ACTIVATION_DESCRIPTOR_SIZE; i++, ad2++) { | 737 | /* |
738 | * initializing all 8 (UV_ITEMS_PER_DESCRIPTOR) descriptors for each | ||
739 | * cpu even though we only use the first one; one descriptor can | ||
740 | * describe a broadcast to 256 nodes. | ||
741 | */ | ||
742 | for (i = 0, ad2 = adp; i < (UV_ADP_SIZE*UV_ITEMS_PER_DESCRIPTOR); | ||
743 | i++, ad2++) { | ||
733 | memset(ad2, 0, sizeof(struct bau_desc)); | 744 | memset(ad2, 0, sizeof(struct bau_desc)); |
734 | ad2->header.sw_ack_flag = 1; | 745 | ad2->header.sw_ack_flag = 1; |
735 | /* | 746 | /* |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index a1d288327ff0..a0f48f5671c0 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <linux/edac.h> | 45 | #include <linux/edac.h> |
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | #include <asm/kmemcheck.h> | ||
48 | #include <asm/stacktrace.h> | 49 | #include <asm/stacktrace.h> |
49 | #include <asm/processor.h> | 50 | #include <asm/processor.h> |
50 | #include <asm/debugreg.h> | 51 | #include <asm/debugreg.h> |
@@ -53,6 +54,7 @@ | |||
53 | #include <asm/traps.h> | 54 | #include <asm/traps.h> |
54 | #include <asm/desc.h> | 55 | #include <asm/desc.h> |
55 | #include <asm/i387.h> | 56 | #include <asm/i387.h> |
57 | #include <asm/mce.h> | ||
56 | 58 | ||
57 | #include <asm/mach_traps.h> | 59 | #include <asm/mach_traps.h> |
58 | 60 | ||
@@ -64,8 +66,6 @@ | |||
64 | #include <asm/setup.h> | 66 | #include <asm/setup.h> |
65 | #include <asm/traps.h> | 67 | #include <asm/traps.h> |
66 | 68 | ||
67 | #include "cpu/mcheck/mce.h" | ||
68 | |||
69 | asmlinkage int system_call(void); | 69 | asmlinkage int system_call(void); |
70 | 70 | ||
71 | /* Do we ignore FPU interrupts ? */ | 71 | /* Do we ignore FPU interrupts ? */ |
@@ -534,6 +534,10 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) | |||
534 | 534 | ||
535 | get_debugreg(condition, 6); | 535 | get_debugreg(condition, 6); |
536 | 536 | ||
537 | /* Catch kmemcheck conditions first of all! */ | ||
538 | if (condition & DR_STEP && kmemcheck_trap(regs)) | ||
539 | return; | ||
540 | |||
537 | /* | 541 | /* |
538 | * The processor cleared BTF, so don't mark that we need it set. | 542 | * The processor cleared BTF, so don't mark that we need it set. |
539 | */ | 543 | */ |
@@ -798,15 +802,15 @@ unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | |||
798 | 802 | ||
799 | return new_kesp; | 803 | return new_kesp; |
800 | } | 804 | } |
801 | #else | 805 | #endif |
806 | |||
802 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | 807 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) |
803 | { | 808 | { |
804 | } | 809 | } |
805 | 810 | ||
806 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | 811 | asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) |
807 | { | 812 | { |
808 | } | 813 | } |
809 | #endif | ||
810 | 814 | ||
811 | /* | 815 | /* |
812 | * 'math_state_restore()' saves the current math information in the | 816 | * 'math_state_restore()' saves the current math information in the |
@@ -839,9 +843,6 @@ asmlinkage void math_state_restore(void) | |||
839 | } | 843 | } |
840 | 844 | ||
841 | clts(); /* Allow maths ops (or we recurse) */ | 845 | clts(); /* Allow maths ops (or we recurse) */ |
842 | #ifdef CONFIG_X86_32 | ||
843 | restore_fpu(tsk); | ||
844 | #else | ||
845 | /* | 846 | /* |
846 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | 847 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. |
847 | */ | 848 | */ |
@@ -850,7 +851,7 @@ asmlinkage void math_state_restore(void) | |||
850 | force_sig(SIGSEGV, tsk); | 851 | force_sig(SIGSEGV, tsk); |
851 | return; | 852 | return; |
852 | } | 853 | } |
853 | #endif | 854 | |
854 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 855 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ |
855 | tsk->fpu_counter++; | 856 | tsk->fpu_counter++; |
856 | } | 857 | } |
@@ -945,8 +946,13 @@ void __init trap_init(void) | |||
945 | #endif | 946 | #endif |
946 | set_intr_gate(19, &simd_coprocessor_error); | 947 | set_intr_gate(19, &simd_coprocessor_error); |
947 | 948 | ||
949 | /* Reserve all the builtin and the syscall vector: */ | ||
950 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
951 | set_bit(i, used_vectors); | ||
952 | |||
948 | #ifdef CONFIG_IA32_EMULATION | 953 | #ifdef CONFIG_IA32_EMULATION |
949 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | 954 | set_system_intr_gate(IA32_SYSCALL_VECTOR, ia32_syscall); |
955 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
950 | #endif | 956 | #endif |
951 | 957 | ||
952 | #ifdef CONFIG_X86_32 | 958 | #ifdef CONFIG_X86_32 |
@@ -963,17 +969,9 @@ void __init trap_init(void) | |||
963 | } | 969 | } |
964 | 970 | ||
965 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); | 971 | set_system_trap_gate(SYSCALL_VECTOR, &system_call); |
966 | #endif | ||
967 | |||
968 | /* Reserve all the builtin and the syscall vector: */ | ||
969 | for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) | ||
970 | set_bit(i, used_vectors); | ||
971 | |||
972 | #ifdef CONFIG_X86_64 | ||
973 | set_bit(IA32_SYSCALL_VECTOR, used_vectors); | ||
974 | #else | ||
975 | set_bit(SYSCALL_VECTOR, used_vectors); | 972 | set_bit(SYSCALL_VECTOR, used_vectors); |
976 | #endif | 973 | #endif |
974 | |||
977 | /* | 975 | /* |
978 | * Should be a barrier for any external CPU state: | 976 | * Should be a barrier for any external CPU state: |
979 | */ | 977 | */ |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index d57de05dc430..6e1a368d21d4 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/delay.h> | 9 | #include <linux/delay.h> |
10 | #include <linux/clocksource.h> | 10 | #include <linux/clocksource.h> |
11 | #include <linux/percpu.h> | 11 | #include <linux/percpu.h> |
12 | #include <linux/timex.h> | ||
12 | 13 | ||
13 | #include <asm/hpet.h> | 14 | #include <asm/hpet.h> |
14 | #include <asm/timer.h> | 15 | #include <asm/timer.h> |
@@ -384,13 +385,13 @@ unsigned long native_calibrate_tsc(void) | |||
384 | { | 385 | { |
385 | u64 tsc1, tsc2, delta, ref1, ref2; | 386 | u64 tsc1, tsc2, delta, ref1, ref2; |
386 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 387 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; |
387 | unsigned long flags, latch, ms, fast_calibrate, tsc_khz; | 388 | unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz; |
388 | int hpet = is_hpet_enabled(), i, loopmin; | 389 | int hpet = is_hpet_enabled(), i, loopmin; |
389 | 390 | ||
390 | tsc_khz = get_hypervisor_tsc_freq(); | 391 | hv_tsc_khz = get_hypervisor_tsc_freq(); |
391 | if (tsc_khz) { | 392 | if (hv_tsc_khz) { |
392 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); | 393 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); |
393 | return tsc_khz; | 394 | return hv_tsc_khz; |
394 | } | 395 | } |
395 | 396 | ||
396 | local_irq_save(flags); | 397 | local_irq_save(flags); |
@@ -589,22 +590,26 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); | |||
589 | */ | 590 | */ |
590 | 591 | ||
591 | DEFINE_PER_CPU(unsigned long, cyc2ns); | 592 | DEFINE_PER_CPU(unsigned long, cyc2ns); |
593 | DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); | ||
592 | 594 | ||
593 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | 595 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) |
594 | { | 596 | { |
595 | unsigned long long tsc_now, ns_now; | 597 | unsigned long long tsc_now, ns_now, *offset; |
596 | unsigned long flags, *scale; | 598 | unsigned long flags, *scale; |
597 | 599 | ||
598 | local_irq_save(flags); | 600 | local_irq_save(flags); |
599 | sched_clock_idle_sleep_event(); | 601 | sched_clock_idle_sleep_event(); |
600 | 602 | ||
601 | scale = &per_cpu(cyc2ns, cpu); | 603 | scale = &per_cpu(cyc2ns, cpu); |
604 | offset = &per_cpu(cyc2ns_offset, cpu); | ||
602 | 605 | ||
603 | rdtscll(tsc_now); | 606 | rdtscll(tsc_now); |
604 | ns_now = __cycles_2_ns(tsc_now); | 607 | ns_now = __cycles_2_ns(tsc_now); |
605 | 608 | ||
606 | if (cpu_khz) | 609 | if (cpu_khz) { |
607 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; | 610 | *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz; |
611 | *offset = ns_now - (tsc_now * *scale >> CYC2NS_SCALE_FACTOR); | ||
612 | } | ||
608 | 613 | ||
609 | sched_clock_idle_wakeup_event(0); | 614 | sched_clock_idle_wakeup_event(0); |
610 | local_irq_restore(flags); | 615 | local_irq_restore(flags); |
@@ -631,17 +636,15 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
631 | void *data) | 636 | void *data) |
632 | { | 637 | { |
633 | struct cpufreq_freqs *freq = data; | 638 | struct cpufreq_freqs *freq = data; |
634 | unsigned long *lpj, dummy; | 639 | unsigned long *lpj; |
635 | 640 | ||
636 | if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) | 641 | if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC)) |
637 | return 0; | 642 | return 0; |
638 | 643 | ||
639 | lpj = &dummy; | 644 | lpj = &boot_cpu_data.loops_per_jiffy; |
640 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
641 | #ifdef CONFIG_SMP | 645 | #ifdef CONFIG_SMP |
646 | if (!(freq->flags & CPUFREQ_CONST_LOOPS)) | ||
642 | lpj = &cpu_data(freq->cpu).loops_per_jiffy; | 647 | lpj = &cpu_data(freq->cpu).loops_per_jiffy; |
643 | #else | ||
644 | lpj = &boot_cpu_data.loops_per_jiffy; | ||
645 | #endif | 648 | #endif |
646 | 649 | ||
647 | if (!ref_freq) { | 650 | if (!ref_freq) { |
@@ -710,7 +713,16 @@ static cycle_t read_tsc(struct clocksource *cs) | |||
710 | #ifdef CONFIG_X86_64 | 713 | #ifdef CONFIG_X86_64 |
711 | static cycle_t __vsyscall_fn vread_tsc(void) | 714 | static cycle_t __vsyscall_fn vread_tsc(void) |
712 | { | 715 | { |
713 | cycle_t ret = (cycle_t)vget_cycles(); | 716 | cycle_t ret; |
717 | |||
718 | /* | ||
719 | * Surround the RDTSC by barriers, to make sure it's not | ||
720 | * speculated to outside the seqlock critical section and | ||
721 | * does not cause time warps: | ||
722 | */ | ||
723 | rdtsc_barrier(); | ||
724 | ret = (cycle_t)vget_cycles(); | ||
725 | rdtsc_barrier(); | ||
714 | 726 | ||
715 | return ret >= __vsyscall_gtod_data.clock.cycle_last ? | 727 | return ret >= __vsyscall_gtod_data.clock.cycle_last ? |
716 | ret : __vsyscall_gtod_data.clock.cycle_last; | 728 | ret : __vsyscall_gtod_data.clock.cycle_last; |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index bf36328f6ef9..027b5b498993 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -34,6 +34,7 @@ static __cpuinitdata atomic_t stop_count; | |||
34 | * of a critical section, to be able to prove TSC time-warps: | 34 | * of a critical section, to be able to prove TSC time-warps: |
35 | */ | 35 | */ |
36 | static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; | 36 | static __cpuinitdata raw_spinlock_t sync_lock = __RAW_SPIN_LOCK_UNLOCKED; |
37 | |||
37 | static __cpuinitdata cycles_t last_tsc; | 38 | static __cpuinitdata cycles_t last_tsc; |
38 | static __cpuinitdata cycles_t max_warp; | 39 | static __cpuinitdata cycles_t max_warp; |
39 | static __cpuinitdata int nr_warps; | 40 | static __cpuinitdata int nr_warps; |
@@ -113,13 +114,12 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
113 | return; | 114 | return; |
114 | 115 | ||
115 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { | 116 | if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) { |
116 | printk(KERN_INFO | 117 | pr_info("Skipping synchronization checks as TSC is reliable.\n"); |
117 | "Skipping synchronization checks as TSC is reliable.\n"); | ||
118 | return; | 118 | return; |
119 | } | 119 | } |
120 | 120 | ||
121 | printk(KERN_INFO "checking TSC synchronization [CPU#%d -> CPU#%d]:", | 121 | pr_info("checking TSC synchronization [CPU#%d -> CPU#%d]:", |
122 | smp_processor_id(), cpu); | 122 | smp_processor_id(), cpu); |
123 | 123 | ||
124 | /* | 124 | /* |
125 | * Reset it - in case this is a second bootup: | 125 | * Reset it - in case this is a second bootup: |
@@ -143,8 +143,8 @@ void __cpuinit check_tsc_sync_source(int cpu) | |||
143 | 143 | ||
144 | if (nr_warps) { | 144 | if (nr_warps) { |
145 | printk("\n"); | 145 | printk("\n"); |
146 | printk(KERN_WARNING "Measured %Ld cycles TSC warp between CPUs," | 146 | pr_warning("Measured %Ld cycles TSC warp between CPUs, " |
147 | " turning off TSC clock.\n", max_warp); | 147 | "turning off TSC clock.\n", max_warp); |
148 | mark_tsc_unstable("check_tsc_sync_source failed"); | 148 | mark_tsc_unstable("check_tsc_sync_source failed"); |
149 | } else { | 149 | } else { |
150 | printk(" passed.\n"); | 150 | printk(" passed.\n"); |
@@ -195,5 +195,3 @@ void __cpuinit check_tsc_sync_target(void) | |||
195 | while (atomic_read(&stop_count) != cpus) | 195 | while (atomic_read(&stop_count) != cpus) |
196 | cpu_relax(); | 196 | cpu_relax(); |
197 | } | 197 | } |
198 | #undef NR_LOOPS | ||
199 | |||
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index d7ac84e7fc1c..9c4e62539058 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c | |||
@@ -287,10 +287,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
287 | info->regs.pt.ds = 0; | 287 | info->regs.pt.ds = 0; |
288 | info->regs.pt.es = 0; | 288 | info->regs.pt.es = 0; |
289 | info->regs.pt.fs = 0; | 289 | info->regs.pt.fs = 0; |
290 | 290 | #ifndef CONFIG_X86_32_LAZY_GS | |
291 | /* we are clearing gs later just before "jmp resume_userspace", | 291 | info->regs.pt.gs = 0; |
292 | * because it is not saved/restored. | 292 | #endif |
293 | */ | ||
294 | 293 | ||
295 | /* | 294 | /* |
296 | * The flags register is also special: we cannot trust that the user | 295 | * The flags register is also special: we cannot trust that the user |
@@ -318,9 +317,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
318 | } | 317 | } |
319 | 318 | ||
320 | /* | 319 | /* |
321 | * Save old state, set default return value (%ax) to 0 | 320 | * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) |
322 | */ | 321 | */ |
323 | info->regs32->ax = 0; | 322 | info->regs32->ax = VM86_SIGNAL; |
324 | tsk->thread.saved_sp0 = tsk->thread.sp0; | 323 | tsk->thread.saved_sp0 = tsk->thread.sp0; |
325 | tsk->thread.saved_fs = info->regs32->fs; | 324 | tsk->thread.saved_fs = info->regs32->fs; |
326 | tsk->thread.saved_gs = get_user_gs(info->regs32); | 325 | tsk->thread.saved_gs = get_user_gs(info->regs32); |
@@ -343,7 +342,9 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk | |||
343 | __asm__ __volatile__( | 342 | __asm__ __volatile__( |
344 | "movl %0,%%esp\n\t" | 343 | "movl %0,%%esp\n\t" |
345 | "movl %1,%%ebp\n\t" | 344 | "movl %1,%%ebp\n\t" |
345 | #ifdef CONFIG_X86_32_LAZY_GS | ||
346 | "mov %2, %%gs\n\t" | 346 | "mov %2, %%gs\n\t" |
347 | #endif | ||
347 | "jmp resume_userspace" | 348 | "jmp resume_userspace" |
348 | : /* no outputs */ | 349 | : /* no outputs */ |
349 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); | 350 | :"r" (&info->regs), "r" (task_thread_info(tsk)), "r" (0)); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 95deb9f2211e..b263423fbe2a 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -462,22 +462,28 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
462 | } | 462 | } |
463 | #endif | 463 | #endif |
464 | 464 | ||
465 | static void vmi_enter_lazy_cpu(void) | 465 | static void vmi_start_context_switch(struct task_struct *prev) |
466 | { | 466 | { |
467 | paravirt_enter_lazy_cpu(); | 467 | paravirt_start_context_switch(prev); |
468 | vmi_ops.set_lazy_mode(2); | 468 | vmi_ops.set_lazy_mode(2); |
469 | } | 469 | } |
470 | 470 | ||
471 | static void vmi_end_context_switch(struct task_struct *next) | ||
472 | { | ||
473 | vmi_ops.set_lazy_mode(0); | ||
474 | paravirt_end_context_switch(next); | ||
475 | } | ||
476 | |||
471 | static void vmi_enter_lazy_mmu(void) | 477 | static void vmi_enter_lazy_mmu(void) |
472 | { | 478 | { |
473 | paravirt_enter_lazy_mmu(); | 479 | paravirt_enter_lazy_mmu(); |
474 | vmi_ops.set_lazy_mode(1); | 480 | vmi_ops.set_lazy_mode(1); |
475 | } | 481 | } |
476 | 482 | ||
477 | static void vmi_leave_lazy(void) | 483 | static void vmi_leave_lazy_mmu(void) |
478 | { | 484 | { |
479 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
480 | vmi_ops.set_lazy_mode(0); | 485 | vmi_ops.set_lazy_mode(0); |
486 | paravirt_leave_lazy_mmu(); | ||
481 | } | 487 | } |
482 | 488 | ||
483 | static inline int __init check_vmi_rom(struct vrom_header *rom) | 489 | static inline int __init check_vmi_rom(struct vrom_header *rom) |
@@ -711,14 +717,14 @@ static inline int __init activate_vmi(void) | |||
711 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); | 717 | para_fill(pv_cpu_ops.set_iopl_mask, SetIOPLMask); |
712 | para_fill(pv_cpu_ops.io_delay, IODelay); | 718 | para_fill(pv_cpu_ops.io_delay, IODelay); |
713 | 719 | ||
714 | para_wrap(pv_cpu_ops.lazy_mode.enter, vmi_enter_lazy_cpu, | 720 | para_wrap(pv_cpu_ops.start_context_switch, vmi_start_context_switch, |
715 | set_lazy_mode, SetLazyMode); | 721 | set_lazy_mode, SetLazyMode); |
716 | para_wrap(pv_cpu_ops.lazy_mode.leave, vmi_leave_lazy, | 722 | para_wrap(pv_cpu_ops.end_context_switch, vmi_end_context_switch, |
717 | set_lazy_mode, SetLazyMode); | 723 | set_lazy_mode, SetLazyMode); |
718 | 724 | ||
719 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, | 725 | para_wrap(pv_mmu_ops.lazy_mode.enter, vmi_enter_lazy_mmu, |
720 | set_lazy_mode, SetLazyMode); | 726 | set_lazy_mode, SetLazyMode); |
721 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy, | 727 | para_wrap(pv_mmu_ops.lazy_mode.leave, vmi_leave_lazy_mmu, |
722 | set_lazy_mode, SetLazyMode); | 728 | set_lazy_mode, SetLazyMode); |
723 | 729 | ||
724 | /* user and kernel flush are just handled with different flags to FlushTLB */ | 730 | /* user and kernel flush are just handled with different flags to FlushTLB */ |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 849ee611f013..367e87882041 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -1,5 +1,433 @@ | |||
1 | /* | ||
2 | * ld script for the x86 kernel | ||
3 | * | ||
4 | * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz> | ||
5 | * | ||
6 | * Modernisation, unification and other changes and fixes: | ||
7 | * Copyright (C) 2007-2009 Sam Ravnborg <sam@ravnborg.org> | ||
8 | * | ||
9 | * | ||
10 | * Don't define absolute symbols until and unless you know that symbol | ||
11 | * value is should remain constant even if kernel image is relocated | ||
12 | * at run time. Absolute symbols are not relocated. If symbol value should | ||
13 | * change if kernel is relocated, make the symbol section relative and | ||
14 | * put it inside the section definition. | ||
15 | */ | ||
16 | |||
1 | #ifdef CONFIG_X86_32 | 17 | #ifdef CONFIG_X86_32 |
2 | # include "vmlinux_32.lds.S" | 18 | #define LOAD_OFFSET __PAGE_OFFSET |
3 | #else | 19 | #else |
4 | # include "vmlinux_64.lds.S" | 20 | #define LOAD_OFFSET __START_KERNEL_map |
5 | #endif | 21 | #endif |
22 | |||
23 | #include <asm-generic/vmlinux.lds.h> | ||
24 | #include <asm/asm-offsets.h> | ||
25 | #include <asm/thread_info.h> | ||
26 | #include <asm/page_types.h> | ||
27 | #include <asm/cache.h> | ||
28 | #include <asm/boot.h> | ||
29 | |||
30 | #undef i386 /* in case the preprocessor is a 32bit one */ | ||
31 | |||
32 | OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT) | ||
33 | |||
34 | #ifdef CONFIG_X86_32 | ||
35 | OUTPUT_ARCH(i386) | ||
36 | ENTRY(phys_startup_32) | ||
37 | jiffies = jiffies_64; | ||
38 | #else | ||
39 | OUTPUT_ARCH(i386:x86-64) | ||
40 | ENTRY(phys_startup_64) | ||
41 | jiffies_64 = jiffies; | ||
42 | #endif | ||
43 | |||
44 | PHDRS { | ||
45 | text PT_LOAD FLAGS(5); /* R_E */ | ||
46 | data PT_LOAD FLAGS(7); /* RWE */ | ||
47 | #ifdef CONFIG_X86_64 | ||
48 | user PT_LOAD FLAGS(7); /* RWE */ | ||
49 | data.init PT_LOAD FLAGS(7); /* RWE */ | ||
50 | #ifdef CONFIG_SMP | ||
51 | percpu PT_LOAD FLAGS(7); /* RWE */ | ||
52 | #endif | ||
53 | data.init2 PT_LOAD FLAGS(7); /* RWE */ | ||
54 | #endif | ||
55 | note PT_NOTE FLAGS(0); /* ___ */ | ||
56 | } | ||
57 | |||
58 | SECTIONS | ||
59 | { | ||
60 | #ifdef CONFIG_X86_32 | ||
61 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; | ||
62 | phys_startup_32 = startup_32 - LOAD_OFFSET; | ||
63 | #else | ||
64 | . = __START_KERNEL; | ||
65 | phys_startup_64 = startup_64 - LOAD_OFFSET; | ||
66 | #endif | ||
67 | |||
68 | /* Text and read-only data */ | ||
69 | |||
70 | /* bootstrapping code */ | ||
71 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { | ||
72 | _text = .; | ||
73 | *(.text.head) | ||
74 | } :text = 0x9090 | ||
75 | |||
76 | /* The rest of the text */ | ||
77 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | ||
78 | #ifdef CONFIG_X86_32 | ||
79 | /* not really needed, already page aligned */ | ||
80 | . = ALIGN(PAGE_SIZE); | ||
81 | *(.text.page_aligned) | ||
82 | #endif | ||
83 | . = ALIGN(8); | ||
84 | _stext = .; | ||
85 | TEXT_TEXT | ||
86 | SCHED_TEXT | ||
87 | LOCK_TEXT | ||
88 | KPROBES_TEXT | ||
89 | IRQENTRY_TEXT | ||
90 | *(.fixup) | ||
91 | *(.gnu.warning) | ||
92 | /* End of text section */ | ||
93 | _etext = .; | ||
94 | } :text = 0x9090 | ||
95 | |||
96 | NOTES :text :note | ||
97 | |||
98 | /* Exception table */ | ||
99 | . = ALIGN(16); | ||
100 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { | ||
101 | __start___ex_table = .; | ||
102 | *(__ex_table) | ||
103 | __stop___ex_table = .; | ||
104 | } :text = 0x9090 | ||
105 | |||
106 | RODATA | ||
107 | |||
108 | /* Data */ | ||
109 | . = ALIGN(PAGE_SIZE); | ||
110 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | ||
111 | /* Start of data section */ | ||
112 | _sdata = .; | ||
113 | DATA_DATA | ||
114 | CONSTRUCTORS | ||
115 | |||
116 | #ifdef CONFIG_X86_64 | ||
117 | /* End of data section */ | ||
118 | _edata = .; | ||
119 | #endif | ||
120 | } :data | ||
121 | |||
122 | #ifdef CONFIG_X86_32 | ||
123 | /* 32 bit has nosave before _edata */ | ||
124 | . = ALIGN(PAGE_SIZE); | ||
125 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { | ||
126 | __nosave_begin = .; | ||
127 | *(.data.nosave) | ||
128 | . = ALIGN(PAGE_SIZE); | ||
129 | __nosave_end = .; | ||
130 | } | ||
131 | #endif | ||
132 | |||
133 | . = ALIGN(PAGE_SIZE); | ||
134 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { | ||
135 | *(.data.page_aligned) | ||
136 | *(.data.idt) | ||
137 | } | ||
138 | |||
139 | #ifdef CONFIG_X86_32 | ||
140 | . = ALIGN(32); | ||
141 | #else | ||
142 | . = ALIGN(PAGE_SIZE); | ||
143 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
144 | #endif | ||
145 | .data.cacheline_aligned : | ||
146 | AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { | ||
147 | *(.data.cacheline_aligned) | ||
148 | } | ||
149 | |||
150 | /* rarely changed data like cpu maps */ | ||
151 | #ifdef CONFIG_X86_32 | ||
152 | . = ALIGN(32); | ||
153 | #else | ||
154 | . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); | ||
155 | #endif | ||
156 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { | ||
157 | *(.data.read_mostly) | ||
158 | |||
159 | #ifdef CONFIG_X86_32 | ||
160 | /* End of data section */ | ||
161 | _edata = .; | ||
162 | #endif | ||
163 | } | ||
164 | |||
165 | #ifdef CONFIG_X86_64 | ||
166 | |||
167 | #define VSYSCALL_ADDR (-10*1024*1024) | ||
168 | #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + \ | ||
169 | SIZEOF(.data.read_mostly) + 4095) & ~(4095)) | ||
170 | #define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + \ | ||
171 | SIZEOF(.data.read_mostly) + 4095) & ~(4095)) | ||
172 | |||
173 | #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) | ||
174 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) | ||
175 | |||
176 | #define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) | ||
177 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | ||
178 | |||
179 | . = VSYSCALL_ADDR; | ||
180 | .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { | ||
181 | *(.vsyscall_0) | ||
182 | } :user | ||
183 | |||
184 | __vsyscall_0 = VSYSCALL_VIRT_ADDR; | ||
185 | |||
186 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
187 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { | ||
188 | *(.vsyscall_fn) | ||
189 | } | ||
190 | |||
191 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
192 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) { | ||
193 | *(.vsyscall_gtod_data) | ||
194 | } | ||
195 | |||
196 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); | ||
197 | .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) { | ||
198 | *(.vsyscall_clock) | ||
199 | } | ||
200 | vsyscall_clock = VVIRT(.vsyscall_clock); | ||
201 | |||
202 | |||
203 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) { | ||
204 | *(.vsyscall_1) | ||
205 | } | ||
206 | .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) { | ||
207 | *(.vsyscall_2) | ||
208 | } | ||
209 | |||
210 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { | ||
211 | *(.vgetcpu_mode) | ||
212 | } | ||
213 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | ||
214 | |||
215 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
216 | .jiffies : AT(VLOAD(.jiffies)) { | ||
217 | *(.jiffies) | ||
218 | } | ||
219 | jiffies = VVIRT(.jiffies); | ||
220 | |||
221 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) { | ||
222 | *(.vsyscall_3) | ||
223 | } | ||
224 | |||
225 | . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; | ||
226 | |||
227 | #undef VSYSCALL_ADDR | ||
228 | #undef VSYSCALL_PHYS_ADDR | ||
229 | #undef VSYSCALL_VIRT_ADDR | ||
230 | #undef VLOAD_OFFSET | ||
231 | #undef VLOAD | ||
232 | #undef VVIRT_OFFSET | ||
233 | #undef VVIRT | ||
234 | |||
235 | #endif /* CONFIG_X86_64 */ | ||
236 | |||
237 | /* init_task */ | ||
238 | . = ALIGN(THREAD_SIZE); | ||
239 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { | ||
240 | *(.data.init_task) | ||
241 | } | ||
242 | #ifdef CONFIG_X86_64 | ||
243 | :data.init | ||
244 | #endif | ||
245 | |||
246 | /* | ||
247 | * smp_locks might be freed after init | ||
248 | * start/end must be page aligned | ||
249 | */ | ||
250 | . = ALIGN(PAGE_SIZE); | ||
251 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | ||
252 | __smp_locks = .; | ||
253 | *(.smp_locks) | ||
254 | __smp_locks_end = .; | ||
255 | . = ALIGN(PAGE_SIZE); | ||
256 | } | ||
257 | |||
258 | /* Init code and data - will be freed after init */ | ||
259 | . = ALIGN(PAGE_SIZE); | ||
260 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | ||
261 | __init_begin = .; /* paired with __init_end */ | ||
262 | _sinittext = .; | ||
263 | INIT_TEXT | ||
264 | _einittext = .; | ||
265 | } | ||
266 | |||
267 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { | ||
268 | INIT_DATA | ||
269 | } | ||
270 | |||
271 | . = ALIGN(16); | ||
272 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { | ||
273 | __setup_start = .; | ||
274 | *(.init.setup) | ||
275 | __setup_end = .; | ||
276 | } | ||
277 | .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { | ||
278 | __initcall_start = .; | ||
279 | INITCALLS | ||
280 | __initcall_end = .; | ||
281 | } | ||
282 | |||
283 | .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { | ||
284 | __con_initcall_start = .; | ||
285 | *(.con_initcall.init) | ||
286 | __con_initcall_end = .; | ||
287 | } | ||
288 | |||
289 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { | ||
290 | __x86_cpu_dev_start = .; | ||
291 | *(.x86_cpu_dev.init) | ||
292 | __x86_cpu_dev_end = .; | ||
293 | } | ||
294 | |||
295 | SECURITY_INIT | ||
296 | |||
297 | . = ALIGN(8); | ||
298 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | ||
299 | __parainstructions = .; | ||
300 | *(.parainstructions) | ||
301 | __parainstructions_end = .; | ||
302 | } | ||
303 | |||
304 | . = ALIGN(8); | ||
305 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { | ||
306 | __alt_instructions = .; | ||
307 | *(.altinstructions) | ||
308 | __alt_instructions_end = .; | ||
309 | } | ||
310 | |||
311 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { | ||
312 | *(.altinstr_replacement) | ||
313 | } | ||
314 | |||
315 | /* | ||
316 | * .exit.text is discard at runtime, not link time, to deal with | ||
317 | * references from .altinstructions and .eh_frame | ||
318 | */ | ||
319 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { | ||
320 | EXIT_TEXT | ||
321 | } | ||
322 | |||
323 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { | ||
324 | EXIT_DATA | ||
325 | } | ||
326 | |||
327 | #ifdef CONFIG_BLK_DEV_INITRD | ||
328 | . = ALIGN(PAGE_SIZE); | ||
329 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { | ||
330 | __initramfs_start = .; | ||
331 | *(.init.ramfs) | ||
332 | __initramfs_end = .; | ||
333 | } | ||
334 | #endif | ||
335 | |||
336 | #if defined(CONFIG_X86_64) && defined(CONFIG_SMP) | ||
337 | /* | ||
338 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | ||
339 | * output PHDR, so the next output section - __data_nosave - should | ||
340 | * start another section data.init2. Also, pda should be at the head of | ||
341 | * percpu area. Preallocate it and define the percpu offset symbol | ||
342 | * so that it can be accessed as a percpu variable. | ||
343 | */ | ||
344 | . = ALIGN(PAGE_SIZE); | ||
345 | PERCPU_VADDR(0, :percpu) | ||
346 | #else | ||
347 | PERCPU(PAGE_SIZE) | ||
348 | #endif | ||
349 | |||
350 | . = ALIGN(PAGE_SIZE); | ||
351 | |||
352 | /* freed after init ends here */ | ||
353 | .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) { | ||
354 | __init_end = .; | ||
355 | } | ||
356 | |||
357 | #ifdef CONFIG_X86_64 | ||
358 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { | ||
359 | . = ALIGN(PAGE_SIZE); | ||
360 | __nosave_begin = .; | ||
361 | *(.data.nosave) | ||
362 | . = ALIGN(PAGE_SIZE); | ||
363 | __nosave_end = .; | ||
364 | } :data.init2 | ||
365 | /* use another section data.init2, see PERCPU_VADDR() above */ | ||
366 | #endif | ||
367 | |||
368 | /* BSS */ | ||
369 | . = ALIGN(PAGE_SIZE); | ||
370 | .bss : AT(ADDR(.bss) - LOAD_OFFSET) { | ||
371 | __bss_start = .; | ||
372 | *(.bss.page_aligned) | ||
373 | *(.bss) | ||
374 | . = ALIGN(4); | ||
375 | __bss_stop = .; | ||
376 | } | ||
377 | |||
378 | . = ALIGN(PAGE_SIZE); | ||
379 | .brk : AT(ADDR(.brk) - LOAD_OFFSET) { | ||
380 | __brk_base = .; | ||
381 | . += 64 * 1024; /* 64k alignment slop space */ | ||
382 | *(.brk_reservation) /* areas brk users have reserved */ | ||
383 | __brk_limit = .; | ||
384 | } | ||
385 | |||
386 | .end : AT(ADDR(.end) - LOAD_OFFSET) { | ||
387 | _end = .; | ||
388 | } | ||
389 | |||
390 | /* Sections to be discarded */ | ||
391 | /DISCARD/ : { | ||
392 | *(.exitcall.exit) | ||
393 | *(.eh_frame) | ||
394 | *(.discard) | ||
395 | } | ||
396 | |||
397 | STABS_DEBUG | ||
398 | DWARF_DEBUG | ||
399 | } | ||
400 | |||
401 | |||
402 | #ifdef CONFIG_X86_32 | ||
403 | ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), | ||
404 | "kernel image bigger than KERNEL_IMAGE_SIZE") | ||
405 | #else | ||
406 | /* | ||
407 | * Per-cpu symbols which need to be offset from __per_cpu_load | ||
408 | * for the boot processor. | ||
409 | */ | ||
410 | #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load | ||
411 | INIT_PER_CPU(gdt_page); | ||
412 | INIT_PER_CPU(irq_stack_union); | ||
413 | |||
414 | /* | ||
415 | * Build-time check on the image size: | ||
416 | */ | ||
417 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), | ||
418 | "kernel image bigger than KERNEL_IMAGE_SIZE") | ||
419 | |||
420 | #ifdef CONFIG_SMP | ||
421 | ASSERT((per_cpu__irq_stack_union == 0), | ||
422 | "irq_stack_union is not at start of per-cpu area"); | ||
423 | #endif | ||
424 | |||
425 | #endif /* CONFIG_X86_32 */ | ||
426 | |||
427 | #ifdef CONFIG_KEXEC | ||
428 | #include <asm/kexec.h> | ||
429 | |||
430 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | ||
431 | "kexec control code size is too big") | ||
432 | #endif | ||
433 | |||
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S deleted file mode 100644 index 62ad500d55f3..000000000000 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ /dev/null | |||
@@ -1,229 +0,0 @@ | |||
1 | /* ld script to make i386 Linux kernel | ||
2 | * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; | ||
3 | * | ||
4 | * Don't define absolute symbols until and unless you know that symbol | ||
5 | * value is should remain constant even if kernel image is relocated | ||
6 | * at run time. Absolute symbols are not relocated. If symbol value should | ||
7 | * change if kernel is relocated, make the symbol section relative and | ||
8 | * put it inside the section definition. | ||
9 | */ | ||
10 | |||
11 | #define LOAD_OFFSET __PAGE_OFFSET | ||
12 | |||
13 | #include <asm-generic/vmlinux.lds.h> | ||
14 | #include <asm/thread_info.h> | ||
15 | #include <asm/page_types.h> | ||
16 | #include <asm/cache.h> | ||
17 | #include <asm/boot.h> | ||
18 | |||
19 | OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") | ||
20 | OUTPUT_ARCH(i386) | ||
21 | ENTRY(phys_startup_32) | ||
22 | jiffies = jiffies_64; | ||
23 | |||
24 | PHDRS { | ||
25 | text PT_LOAD FLAGS(5); /* R_E */ | ||
26 | data PT_LOAD FLAGS(7); /* RWE */ | ||
27 | note PT_NOTE FLAGS(0); /* ___ */ | ||
28 | } | ||
29 | SECTIONS | ||
30 | { | ||
31 | . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR; | ||
32 | phys_startup_32 = startup_32 - LOAD_OFFSET; | ||
33 | |||
34 | .text.head : AT(ADDR(.text.head) - LOAD_OFFSET) { | ||
35 | _text = .; /* Text and read-only data */ | ||
36 | *(.text.head) | ||
37 | } :text = 0x9090 | ||
38 | |||
39 | /* read-only */ | ||
40 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | ||
41 | . = ALIGN(PAGE_SIZE); /* not really needed, already page aligned */ | ||
42 | *(.text.page_aligned) | ||
43 | TEXT_TEXT | ||
44 | SCHED_TEXT | ||
45 | LOCK_TEXT | ||
46 | KPROBES_TEXT | ||
47 | IRQENTRY_TEXT | ||
48 | *(.fixup) | ||
49 | *(.gnu.warning) | ||
50 | _etext = .; /* End of text section */ | ||
51 | } :text = 0x9090 | ||
52 | |||
53 | NOTES :text :note | ||
54 | |||
55 | . = ALIGN(16); /* Exception table */ | ||
56 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { | ||
57 | __start___ex_table = .; | ||
58 | *(__ex_table) | ||
59 | __stop___ex_table = .; | ||
60 | } :text = 0x9090 | ||
61 | |||
62 | RODATA | ||
63 | |||
64 | /* writeable */ | ||
65 | . = ALIGN(PAGE_SIZE); | ||
66 | .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ | ||
67 | DATA_DATA | ||
68 | CONSTRUCTORS | ||
69 | } :data | ||
70 | |||
71 | . = ALIGN(PAGE_SIZE); | ||
72 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { | ||
73 | __nosave_begin = .; | ||
74 | *(.data.nosave) | ||
75 | . = ALIGN(PAGE_SIZE); | ||
76 | __nosave_end = .; | ||
77 | } | ||
78 | |||
79 | . = ALIGN(PAGE_SIZE); | ||
80 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { | ||
81 | *(.data.page_aligned) | ||
82 | *(.data.idt) | ||
83 | } | ||
84 | |||
85 | . = ALIGN(32); | ||
86 | .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { | ||
87 | *(.data.cacheline_aligned) | ||
88 | } | ||
89 | |||
90 | /* rarely changed data like cpu maps */ | ||
91 | . = ALIGN(32); | ||
92 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { | ||
93 | *(.data.read_mostly) | ||
94 | _edata = .; /* End of data section */ | ||
95 | } | ||
96 | |||
97 | . = ALIGN(THREAD_SIZE); /* init_task */ | ||
98 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { | ||
99 | *(.data.init_task) | ||
100 | } | ||
101 | |||
102 | /* might get freed after init */ | ||
103 | . = ALIGN(PAGE_SIZE); | ||
104 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | ||
105 | __smp_locks = .; | ||
106 | *(.smp_locks) | ||
107 | __smp_locks_end = .; | ||
108 | } | ||
109 | /* will be freed after init | ||
110 | * Following ALIGN() is required to make sure no other data falls on the | ||
111 | * same page where __smp_alt_end is pointing as that page might be freed | ||
112 | * after boot. Always make sure that ALIGN() directive is present after | ||
113 | * the section which contains __smp_alt_end. | ||
114 | */ | ||
115 | . = ALIGN(PAGE_SIZE); | ||
116 | |||
117 | /* will be freed after init */ | ||
118 | . = ALIGN(PAGE_SIZE); /* Init code and data */ | ||
119 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | ||
120 | __init_begin = .; | ||
121 | _sinittext = .; | ||
122 | INIT_TEXT | ||
123 | _einittext = .; | ||
124 | } | ||
125 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { | ||
126 | INIT_DATA | ||
127 | } | ||
128 | . = ALIGN(16); | ||
129 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { | ||
130 | __setup_start = .; | ||
131 | *(.init.setup) | ||
132 | __setup_end = .; | ||
133 | } | ||
134 | .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { | ||
135 | __initcall_start = .; | ||
136 | INITCALLS | ||
137 | __initcall_end = .; | ||
138 | } | ||
139 | .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { | ||
140 | __con_initcall_start = .; | ||
141 | *(.con_initcall.init) | ||
142 | __con_initcall_end = .; | ||
143 | } | ||
144 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { | ||
145 | __x86_cpu_dev_start = .; | ||
146 | *(.x86_cpu_dev.init) | ||
147 | __x86_cpu_dev_end = .; | ||
148 | } | ||
149 | SECURITY_INIT | ||
150 | . = ALIGN(4); | ||
151 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { | ||
152 | __alt_instructions = .; | ||
153 | *(.altinstructions) | ||
154 | __alt_instructions_end = .; | ||
155 | } | ||
156 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { | ||
157 | *(.altinstr_replacement) | ||
158 | } | ||
159 | . = ALIGN(4); | ||
160 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | ||
161 | __parainstructions = .; | ||
162 | *(.parainstructions) | ||
163 | __parainstructions_end = .; | ||
164 | } | ||
165 | /* .exit.text is discard at runtime, not link time, to deal with references | ||
166 | from .altinstructions and .eh_frame */ | ||
167 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { | ||
168 | EXIT_TEXT | ||
169 | } | ||
170 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { | ||
171 | EXIT_DATA | ||
172 | } | ||
173 | #if defined(CONFIG_BLK_DEV_INITRD) | ||
174 | . = ALIGN(PAGE_SIZE); | ||
175 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { | ||
176 | __initramfs_start = .; | ||
177 | *(.init.ramfs) | ||
178 | __initramfs_end = .; | ||
179 | } | ||
180 | #endif | ||
181 | PERCPU(PAGE_SIZE) | ||
182 | . = ALIGN(PAGE_SIZE); | ||
183 | /* freed after init ends here */ | ||
184 | |||
185 | .bss : AT(ADDR(.bss) - LOAD_OFFSET) { | ||
186 | __init_end = .; | ||
187 | __bss_start = .; /* BSS */ | ||
188 | *(.bss.page_aligned) | ||
189 | *(.bss) | ||
190 | . = ALIGN(4); | ||
191 | __bss_stop = .; | ||
192 | } | ||
193 | |||
194 | .brk : AT(ADDR(.brk) - LOAD_OFFSET) { | ||
195 | . = ALIGN(PAGE_SIZE); | ||
196 | __brk_base = . ; | ||
197 | . += 64 * 1024 ; /* 64k alignment slop space */ | ||
198 | *(.brk_reservation) /* areas brk users have reserved */ | ||
199 | __brk_limit = . ; | ||
200 | } | ||
201 | |||
202 | .end : AT(ADDR(.end) - LOAD_OFFSET) { | ||
203 | _end = . ; | ||
204 | } | ||
205 | |||
206 | /* Sections to be discarded */ | ||
207 | /DISCARD/ : { | ||
208 | *(.exitcall.exit) | ||
209 | *(.discard) | ||
210 | } | ||
211 | |||
212 | STABS_DEBUG | ||
213 | |||
214 | DWARF_DEBUG | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Build-time check on the image size: | ||
219 | */ | ||
220 | ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE), | ||
221 | "kernel image bigger than KERNEL_IMAGE_SIZE") | ||
222 | |||
223 | #ifdef CONFIG_KEXEC | ||
224 | /* Link time checks */ | ||
225 | #include <asm/kexec.h> | ||
226 | |||
227 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | ||
228 | "kexec control code size is too big") | ||
229 | #endif | ||
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S deleted file mode 100644 index c8742507b030..000000000000 --- a/arch/x86/kernel/vmlinux_64.lds.S +++ /dev/null | |||
@@ -1,298 +0,0 @@ | |||
1 | /* ld script to make x86-64 Linux kernel | ||
2 | * Written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>; | ||
3 | */ | ||
4 | |||
5 | #define LOAD_OFFSET __START_KERNEL_map | ||
6 | |||
7 | #include <asm-generic/vmlinux.lds.h> | ||
8 | #include <asm/asm-offsets.h> | ||
9 | #include <asm/page_types.h> | ||
10 | |||
11 | #undef i386 /* in case the preprocessor is a 32bit one */ | ||
12 | |||
13 | OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") | ||
14 | OUTPUT_ARCH(i386:x86-64) | ||
15 | ENTRY(phys_startup_64) | ||
16 | jiffies_64 = jiffies; | ||
17 | PHDRS { | ||
18 | text PT_LOAD FLAGS(5); /* R_E */ | ||
19 | data PT_LOAD FLAGS(7); /* RWE */ | ||
20 | user PT_LOAD FLAGS(7); /* RWE */ | ||
21 | data.init PT_LOAD FLAGS(7); /* RWE */ | ||
22 | #ifdef CONFIG_SMP | ||
23 | percpu PT_LOAD FLAGS(7); /* RWE */ | ||
24 | #endif | ||
25 | data.init2 PT_LOAD FLAGS(7); /* RWE */ | ||
26 | note PT_NOTE FLAGS(0); /* ___ */ | ||
27 | } | ||
28 | SECTIONS | ||
29 | { | ||
30 | . = __START_KERNEL; | ||
31 | phys_startup_64 = startup_64 - LOAD_OFFSET; | ||
32 | .text : AT(ADDR(.text) - LOAD_OFFSET) { | ||
33 | _text = .; /* Text and read-only data */ | ||
34 | /* First the code that has to be first for bootstrapping */ | ||
35 | *(.text.head) | ||
36 | _stext = .; | ||
37 | /* Then the rest */ | ||
38 | TEXT_TEXT | ||
39 | SCHED_TEXT | ||
40 | LOCK_TEXT | ||
41 | KPROBES_TEXT | ||
42 | IRQENTRY_TEXT | ||
43 | *(.fixup) | ||
44 | *(.gnu.warning) | ||
45 | _etext = .; /* End of text section */ | ||
46 | } :text = 0x9090 | ||
47 | |||
48 | NOTES :text :note | ||
49 | |||
50 | . = ALIGN(16); /* Exception table */ | ||
51 | __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { | ||
52 | __start___ex_table = .; | ||
53 | *(__ex_table) | ||
54 | __stop___ex_table = .; | ||
55 | } :text = 0x9090 | ||
56 | |||
57 | RODATA | ||
58 | |||
59 | . = ALIGN(PAGE_SIZE); /* Align data segment to page size boundary */ | ||
60 | /* Data */ | ||
61 | .data : AT(ADDR(.data) - LOAD_OFFSET) { | ||
62 | DATA_DATA | ||
63 | CONSTRUCTORS | ||
64 | _edata = .; /* End of data section */ | ||
65 | } :data | ||
66 | |||
67 | |||
68 | .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET) { | ||
69 | . = ALIGN(PAGE_SIZE); | ||
70 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
71 | *(.data.cacheline_aligned) | ||
72 | } | ||
73 | . = ALIGN(CONFIG_X86_INTERNODE_CACHE_BYTES); | ||
74 | .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET) { | ||
75 | *(.data.read_mostly) | ||
76 | } | ||
77 | |||
78 | #define VSYSCALL_ADDR (-10*1024*1024) | ||
79 | #define VSYSCALL_PHYS_ADDR ((LOADADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) | ||
80 | #define VSYSCALL_VIRT_ADDR ((ADDR(.data.read_mostly) + SIZEOF(.data.read_mostly) + 4095) & ~(4095)) | ||
81 | |||
82 | #define VLOAD_OFFSET (VSYSCALL_ADDR - VSYSCALL_PHYS_ADDR) | ||
83 | #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET) | ||
84 | |||
85 | #define VVIRT_OFFSET (VSYSCALL_ADDR - VSYSCALL_VIRT_ADDR) | ||
86 | #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET) | ||
87 | |||
88 | . = VSYSCALL_ADDR; | ||
89 | .vsyscall_0 : AT(VSYSCALL_PHYS_ADDR) { *(.vsyscall_0) } :user | ||
90 | __vsyscall_0 = VSYSCALL_VIRT_ADDR; | ||
91 | |||
92 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
93 | .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) { *(.vsyscall_fn) } | ||
94 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
95 | .vsyscall_gtod_data : AT(VLOAD(.vsyscall_gtod_data)) | ||
96 | { *(.vsyscall_gtod_data) } | ||
97 | vsyscall_gtod_data = VVIRT(.vsyscall_gtod_data); | ||
98 | .vsyscall_clock : AT(VLOAD(.vsyscall_clock)) | ||
99 | { *(.vsyscall_clock) } | ||
100 | vsyscall_clock = VVIRT(.vsyscall_clock); | ||
101 | |||
102 | |||
103 | .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) | ||
104 | { *(.vsyscall_1) } | ||
105 | .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) | ||
106 | { *(.vsyscall_2) } | ||
107 | |||
108 | .vgetcpu_mode : AT(VLOAD(.vgetcpu_mode)) { *(.vgetcpu_mode) } | ||
109 | vgetcpu_mode = VVIRT(.vgetcpu_mode); | ||
110 | |||
111 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | ||
112 | .jiffies : AT(VLOAD(.jiffies)) { *(.jiffies) } | ||
113 | jiffies = VVIRT(.jiffies); | ||
114 | |||
115 | .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) | ||
116 | { *(.vsyscall_3) } | ||
117 | |||
118 | . = VSYSCALL_VIRT_ADDR + PAGE_SIZE; | ||
119 | |||
120 | #undef VSYSCALL_ADDR | ||
121 | #undef VSYSCALL_PHYS_ADDR | ||
122 | #undef VSYSCALL_VIRT_ADDR | ||
123 | #undef VLOAD_OFFSET | ||
124 | #undef VLOAD | ||
125 | #undef VVIRT_OFFSET | ||
126 | #undef VVIRT | ||
127 | |||
128 | .data.init_task : AT(ADDR(.data.init_task) - LOAD_OFFSET) { | ||
129 | . = ALIGN(THREAD_SIZE); /* init_task */ | ||
130 | *(.data.init_task) | ||
131 | }:data.init | ||
132 | |||
133 | .data.page_aligned : AT(ADDR(.data.page_aligned) - LOAD_OFFSET) { | ||
134 | . = ALIGN(PAGE_SIZE); | ||
135 | *(.data.page_aligned) | ||
136 | } | ||
137 | |||
138 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | ||
139 | /* might get freed after init */ | ||
140 | . = ALIGN(PAGE_SIZE); | ||
141 | __smp_alt_begin = .; | ||
142 | __smp_locks = .; | ||
143 | *(.smp_locks) | ||
144 | __smp_locks_end = .; | ||
145 | . = ALIGN(PAGE_SIZE); | ||
146 | __smp_alt_end = .; | ||
147 | } | ||
148 | |||
149 | . = ALIGN(PAGE_SIZE); /* Init code and data */ | ||
150 | __init_begin = .; /* paired with __init_end */ | ||
151 | .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { | ||
152 | _sinittext = .; | ||
153 | INIT_TEXT | ||
154 | _einittext = .; | ||
155 | } | ||
156 | .init.data : AT(ADDR(.init.data) - LOAD_OFFSET) { | ||
157 | __initdata_begin = .; | ||
158 | INIT_DATA | ||
159 | __initdata_end = .; | ||
160 | } | ||
161 | |||
162 | .init.setup : AT(ADDR(.init.setup) - LOAD_OFFSET) { | ||
163 | . = ALIGN(16); | ||
164 | __setup_start = .; | ||
165 | *(.init.setup) | ||
166 | __setup_end = .; | ||
167 | } | ||
168 | .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) { | ||
169 | __initcall_start = .; | ||
170 | INITCALLS | ||
171 | __initcall_end = .; | ||
172 | } | ||
173 | .con_initcall.init : AT(ADDR(.con_initcall.init) - LOAD_OFFSET) { | ||
174 | __con_initcall_start = .; | ||
175 | *(.con_initcall.init) | ||
176 | __con_initcall_end = .; | ||
177 | } | ||
178 | .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) { | ||
179 | __x86_cpu_dev_start = .; | ||
180 | *(.x86_cpu_dev.init) | ||
181 | __x86_cpu_dev_end = .; | ||
182 | } | ||
183 | SECURITY_INIT | ||
184 | |||
185 | . = ALIGN(8); | ||
186 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | ||
187 | __parainstructions = .; | ||
188 | *(.parainstructions) | ||
189 | __parainstructions_end = .; | ||
190 | } | ||
191 | |||
192 | .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) { | ||
193 | . = ALIGN(8); | ||
194 | __alt_instructions = .; | ||
195 | *(.altinstructions) | ||
196 | __alt_instructions_end = .; | ||
197 | } | ||
198 | .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) { | ||
199 | *(.altinstr_replacement) | ||
200 | } | ||
201 | /* .exit.text is discard at runtime, not link time, to deal with references | ||
202 | from .altinstructions and .eh_frame */ | ||
203 | .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) { | ||
204 | EXIT_TEXT | ||
205 | } | ||
206 | .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) { | ||
207 | EXIT_DATA | ||
208 | } | ||
209 | |||
210 | #ifdef CONFIG_BLK_DEV_INITRD | ||
211 | . = ALIGN(PAGE_SIZE); | ||
212 | .init.ramfs : AT(ADDR(.init.ramfs) - LOAD_OFFSET) { | ||
213 | __initramfs_start = .; | ||
214 | *(.init.ramfs) | ||
215 | __initramfs_end = .; | ||
216 | } | ||
217 | #endif | ||
218 | |||
219 | #ifdef CONFIG_SMP | ||
220 | /* | ||
221 | * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the | ||
222 | * output PHDR, so the next output section - __data_nosave - should | ||
223 | * start another section data.init2. Also, pda should be at the head of | ||
224 | * percpu area. Preallocate it and define the percpu offset symbol | ||
225 | * so that it can be accessed as a percpu variable. | ||
226 | */ | ||
227 | . = ALIGN(PAGE_SIZE); | ||
228 | PERCPU_VADDR(0, :percpu) | ||
229 | #else | ||
230 | PERCPU(PAGE_SIZE) | ||
231 | #endif | ||
232 | |||
233 | . = ALIGN(PAGE_SIZE); | ||
234 | __init_end = .; | ||
235 | |||
236 | .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { | ||
237 | . = ALIGN(PAGE_SIZE); | ||
238 | __nosave_begin = .; | ||
239 | *(.data.nosave) | ||
240 | . = ALIGN(PAGE_SIZE); | ||
241 | __nosave_end = .; | ||
242 | } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */ | ||
243 | |||
244 | .bss : AT(ADDR(.bss) - LOAD_OFFSET) { | ||
245 | . = ALIGN(PAGE_SIZE); | ||
246 | __bss_start = .; /* BSS */ | ||
247 | *(.bss.page_aligned) | ||
248 | *(.bss) | ||
249 | __bss_stop = .; | ||
250 | } | ||
251 | |||
252 | .brk : AT(ADDR(.brk) - LOAD_OFFSET) { | ||
253 | . = ALIGN(PAGE_SIZE); | ||
254 | __brk_base = . ; | ||
255 | . += 64 * 1024 ; /* 64k alignment slop space */ | ||
256 | *(.brk_reservation) /* areas brk users have reserved */ | ||
257 | __brk_limit = . ; | ||
258 | } | ||
259 | |||
260 | _end = . ; | ||
261 | |||
262 | /* Sections to be discarded */ | ||
263 | /DISCARD/ : { | ||
264 | *(.exitcall.exit) | ||
265 | *(.eh_frame) | ||
266 | *(.discard) | ||
267 | } | ||
268 | |||
269 | STABS_DEBUG | ||
270 | |||
271 | DWARF_DEBUG | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * Per-cpu symbols which need to be offset from __per_cpu_load | ||
276 | * for the boot processor. | ||
277 | */ | ||
278 | #define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load | ||
279 | INIT_PER_CPU(gdt_page); | ||
280 | INIT_PER_CPU(irq_stack_union); | ||
281 | |||
282 | /* | ||
283 | * Build-time check on the image size: | ||
284 | */ | ||
285 | ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), | ||
286 | "kernel image bigger than KERNEL_IMAGE_SIZE") | ||
287 | |||
288 | #ifdef CONFIG_SMP | ||
289 | ASSERT((per_cpu__irq_stack_union == 0), | ||
290 | "irq_stack_union is not at start of per-cpu area"); | ||
291 | #endif | ||
292 | |||
293 | #ifdef CONFIG_KEXEC | ||
294 | #include <asm/kexec.h> | ||
295 | |||
296 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | ||
297 | "kexec control code size is too big") | ||
298 | #endif | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 44153afc9067..25ee06a80aad 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -132,15 +132,7 @@ static __always_inline void do_vgettimeofday(struct timeval * tv) | |||
132 | return; | 132 | return; |
133 | } | 133 | } |
134 | 134 | ||
135 | /* | ||
136 | * Surround the RDTSC by barriers, to make sure it's not | ||
137 | * speculated to outside the seqlock critical section and | ||
138 | * does not cause time warps: | ||
139 | */ | ||
140 | rdtsc_barrier(); | ||
141 | now = vread(); | 135 | now = vread(); |
142 | rdtsc_barrier(); | ||
143 | |||
144 | base = __vsyscall_gtod_data.clock.cycle_last; | 136 | base = __vsyscall_gtod_data.clock.cycle_last; |
145 | mask = __vsyscall_gtod_data.clock.mask; | 137 | mask = __vsyscall_gtod_data.clock.mask; |
146 | mult = __vsyscall_gtod_data.clock.mult; | 138 | mult = __vsyscall_gtod_data.clock.mult; |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index a58504ea78cc..8600a09e0c6c 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -50,6 +50,9 @@ config KVM_INTEL | |||
50 | Provides support for KVM on Intel processors equipped with the VT | 50 | Provides support for KVM on Intel processors equipped with the VT |
51 | extensions. | 51 | extensions. |
52 | 52 | ||
53 | To compile this as a module, choose M here: the module | ||
54 | will be called kvm-intel. | ||
55 | |||
53 | config KVM_AMD | 56 | config KVM_AMD |
54 | tristate "KVM for AMD processors support" | 57 | tristate "KVM for AMD processors support" |
55 | depends on KVM | 58 | depends on KVM |
@@ -57,6 +60,9 @@ config KVM_AMD | |||
57 | Provides support for KVM on AMD processors equipped with the AMD-V | 60 | Provides support for KVM on AMD processors equipped with the AMD-V |
58 | (SVM) extensions. | 61 | (SVM) extensions. |
59 | 62 | ||
63 | To compile this as a module, choose M here: the module | ||
64 | will be called kvm-amd. | ||
65 | |||
60 | config KVM_TRACE | 66 | config KVM_TRACE |
61 | bool "KVM trace support" | 67 | bool "KVM trace support" |
62 | depends on KVM && SYSFS | 68 | depends on KVM && SYSFS |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index d3ec292f00f2..b43c4efafe80 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -14,7 +14,7 @@ endif | |||
14 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm | 14 | EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm |
15 | 15 | ||
16 | kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ | 16 | kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \ |
17 | i8254.o | 17 | i8254.o timer.o |
18 | obj-$(CONFIG_KVM) += kvm.o | 18 | obj-$(CONFIG_KVM) += kvm.o |
19 | kvm-intel-objs = vmx.o | 19 | kvm-intel-objs = vmx.o |
20 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o | 20 | obj-$(CONFIG_KVM_INTEL) += kvm-intel.o |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index c13bb92d3157..4d6f0d293ee2 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -98,6 +98,37 @@ static int pit_get_gate(struct kvm *kvm, int channel) | |||
98 | return kvm->arch.vpit->pit_state.channels[channel].gate; | 98 | return kvm->arch.vpit->pit_state.channels[channel].gate; |
99 | } | 99 | } |
100 | 100 | ||
101 | static s64 __kpit_elapsed(struct kvm *kvm) | ||
102 | { | ||
103 | s64 elapsed; | ||
104 | ktime_t remaining; | ||
105 | struct kvm_kpit_state *ps = &kvm->arch.vpit->pit_state; | ||
106 | |||
107 | /* | ||
108 | * The Counter does not stop when it reaches zero. In | ||
109 | * Modes 0, 1, 4, and 5 the Counter ``wraps around'' to | ||
110 | * the highest count, either FFFF hex for binary counting | ||
111 | * or 9999 for BCD counting, and continues counting. | ||
112 | * Modes 2 and 3 are periodic; the Counter reloads | ||
113 | * itself with the initial count and continues counting | ||
114 | * from there. | ||
115 | */ | ||
116 | remaining = hrtimer_expires_remaining(&ps->pit_timer.timer); | ||
117 | elapsed = ps->pit_timer.period - ktime_to_ns(remaining); | ||
118 | elapsed = mod_64(elapsed, ps->pit_timer.period); | ||
119 | |||
120 | return elapsed; | ||
121 | } | ||
122 | |||
123 | static s64 kpit_elapsed(struct kvm *kvm, struct kvm_kpit_channel_state *c, | ||
124 | int channel) | ||
125 | { | ||
126 | if (channel == 0) | ||
127 | return __kpit_elapsed(kvm); | ||
128 | |||
129 | return ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | ||
130 | } | ||
131 | |||
101 | static int pit_get_count(struct kvm *kvm, int channel) | 132 | static int pit_get_count(struct kvm *kvm, int channel) |
102 | { | 133 | { |
103 | struct kvm_kpit_channel_state *c = | 134 | struct kvm_kpit_channel_state *c = |
@@ -107,7 +138,7 @@ static int pit_get_count(struct kvm *kvm, int channel) | |||
107 | 138 | ||
108 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | 139 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); |
109 | 140 | ||
110 | t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | 141 | t = kpit_elapsed(kvm, c, channel); |
111 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); | 142 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); |
112 | 143 | ||
113 | switch (c->mode) { | 144 | switch (c->mode) { |
@@ -137,7 +168,7 @@ static int pit_get_out(struct kvm *kvm, int channel) | |||
137 | 168 | ||
138 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | 169 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); |
139 | 170 | ||
140 | t = ktime_to_ns(ktime_sub(ktime_get(), c->count_load_time)); | 171 | t = kpit_elapsed(kvm, c, channel); |
141 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); | 172 | d = muldiv64(t, KVM_PIT_FREQ, NSEC_PER_SEC); |
142 | 173 | ||
143 | switch (c->mode) { | 174 | switch (c->mode) { |
@@ -193,28 +224,6 @@ static void pit_latch_status(struct kvm *kvm, int channel) | |||
193 | } | 224 | } |
194 | } | 225 | } |
195 | 226 | ||
196 | static int __pit_timer_fn(struct kvm_kpit_state *ps) | ||
197 | { | ||
198 | struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; | ||
199 | struct kvm_kpit_timer *pt = &ps->pit_timer; | ||
200 | |||
201 | if (!atomic_inc_and_test(&pt->pending)) | ||
202 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); | ||
203 | |||
204 | if (!pt->reinject) | ||
205 | atomic_set(&pt->pending, 1); | ||
206 | |||
207 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) | ||
208 | wake_up_interruptible(&vcpu0->wq); | ||
209 | |||
210 | hrtimer_add_expires_ns(&pt->timer, pt->period); | ||
211 | pt->scheduled = hrtimer_get_expires_ns(&pt->timer); | ||
212 | if (pt->period) | ||
213 | ps->channels[0].count_load_time = ktime_get(); | ||
214 | |||
215 | return (pt->period == 0 ? 0 : 1); | ||
216 | } | ||
217 | |||
218 | int pit_has_pending_timer(struct kvm_vcpu *vcpu) | 227 | int pit_has_pending_timer(struct kvm_vcpu *vcpu) |
219 | { | 228 | { |
220 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | 229 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; |
@@ -235,21 +244,6 @@ static void kvm_pit_ack_irq(struct kvm_irq_ack_notifier *kian) | |||
235 | spin_unlock(&ps->inject_lock); | 244 | spin_unlock(&ps->inject_lock); |
236 | } | 245 | } |
237 | 246 | ||
238 | static enum hrtimer_restart pit_timer_fn(struct hrtimer *data) | ||
239 | { | ||
240 | struct kvm_kpit_state *ps; | ||
241 | int restart_timer = 0; | ||
242 | |||
243 | ps = container_of(data, struct kvm_kpit_state, pit_timer.timer); | ||
244 | |||
245 | restart_timer = __pit_timer_fn(ps); | ||
246 | |||
247 | if (restart_timer) | ||
248 | return HRTIMER_RESTART; | ||
249 | else | ||
250 | return HRTIMER_NORESTART; | ||
251 | } | ||
252 | |||
253 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | 247 | void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) |
254 | { | 248 | { |
255 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; | 249 | struct kvm_pit *pit = vcpu->kvm->arch.vpit; |
@@ -263,15 +257,26 @@ void __kvm_migrate_pit_timer(struct kvm_vcpu *vcpu) | |||
263 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 257 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
264 | } | 258 | } |
265 | 259 | ||
266 | static void destroy_pit_timer(struct kvm_kpit_timer *pt) | 260 | static void destroy_pit_timer(struct kvm_timer *pt) |
267 | { | 261 | { |
268 | pr_debug("pit: execute del timer!\n"); | 262 | pr_debug("pit: execute del timer!\n"); |
269 | hrtimer_cancel(&pt->timer); | 263 | hrtimer_cancel(&pt->timer); |
270 | } | 264 | } |
271 | 265 | ||
266 | static bool kpit_is_periodic(struct kvm_timer *ktimer) | ||
267 | { | ||
268 | struct kvm_kpit_state *ps = container_of(ktimer, struct kvm_kpit_state, | ||
269 | pit_timer); | ||
270 | return ps->is_periodic; | ||
271 | } | ||
272 | |||
273 | static struct kvm_timer_ops kpit_ops = { | ||
274 | .is_periodic = kpit_is_periodic, | ||
275 | }; | ||
276 | |||
272 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | 277 | static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) |
273 | { | 278 | { |
274 | struct kvm_kpit_timer *pt = &ps->pit_timer; | 279 | struct kvm_timer *pt = &ps->pit_timer; |
275 | s64 interval; | 280 | s64 interval; |
276 | 281 | ||
277 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); | 282 | interval = muldiv64(val, NSEC_PER_SEC, KVM_PIT_FREQ); |
@@ -280,8 +285,14 @@ static void create_pit_timer(struct kvm_kpit_state *ps, u32 val, int is_period) | |||
280 | 285 | ||
281 | /* TODO The new value only affected after the retriggered */ | 286 | /* TODO The new value only affected after the retriggered */ |
282 | hrtimer_cancel(&pt->timer); | 287 | hrtimer_cancel(&pt->timer); |
283 | pt->period = (is_period == 0) ? 0 : interval; | 288 | pt->period = interval; |
284 | pt->timer.function = pit_timer_fn; | 289 | ps->is_periodic = is_period; |
290 | |||
291 | pt->timer.function = kvm_timer_fn; | ||
292 | pt->t_ops = &kpit_ops; | ||
293 | pt->kvm = ps->pit->kvm; | ||
294 | pt->vcpu_id = 0; | ||
295 | |||
285 | atomic_set(&pt->pending, 0); | 296 | atomic_set(&pt->pending, 0); |
286 | ps->irq_ack = 1; | 297 | ps->irq_ack = 1; |
287 | 298 | ||
@@ -298,23 +309,23 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
298 | pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); | 309 | pr_debug("pit: load_count val is %d, channel is %d\n", val, channel); |
299 | 310 | ||
300 | /* | 311 | /* |
301 | * Though spec said the state of 8254 is undefined after power-up, | 312 | * The largest possible initial count is 0; this is equivalent |
302 | * seems some tricky OS like Windows XP depends on IRQ0 interrupt | 313 | * to 216 for binary counting and 104 for BCD counting. |
303 | * when booting up. | ||
304 | * So here setting initialize rate for it, and not a specific number | ||
305 | */ | 314 | */ |
306 | if (val == 0) | 315 | if (val == 0) |
307 | val = 0x10000; | 316 | val = 0x10000; |
308 | 317 | ||
309 | ps->channels[channel].count_load_time = ktime_get(); | ||
310 | ps->channels[channel].count = val; | 318 | ps->channels[channel].count = val; |
311 | 319 | ||
312 | if (channel != 0) | 320 | if (channel != 0) { |
321 | ps->channels[channel].count_load_time = ktime_get(); | ||
313 | return; | 322 | return; |
323 | } | ||
314 | 324 | ||
315 | /* Two types of timer | 325 | /* Two types of timer |
316 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ | 326 | * mode 1 is one shot, mode 2 is period, otherwise del timer */ |
317 | switch (ps->channels[0].mode) { | 327 | switch (ps->channels[0].mode) { |
328 | case 0: | ||
318 | case 1: | 329 | case 1: |
319 | /* FIXME: enhance mode 4 precision */ | 330 | /* FIXME: enhance mode 4 precision */ |
320 | case 4: | 331 | case 4: |
diff --git a/arch/x86/kvm/i8254.h b/arch/x86/kvm/i8254.h index 6acbe4b505d5..bbd863ff60b7 100644 --- a/arch/x86/kvm/i8254.h +++ b/arch/x86/kvm/i8254.h | |||
@@ -3,15 +3,6 @@ | |||
3 | 3 | ||
4 | #include "iodev.h" | 4 | #include "iodev.h" |
5 | 5 | ||
6 | struct kvm_kpit_timer { | ||
7 | struct hrtimer timer; | ||
8 | int irq; | ||
9 | s64 period; /* unit: ns */ | ||
10 | s64 scheduled; | ||
11 | atomic_t pending; | ||
12 | bool reinject; | ||
13 | }; | ||
14 | |||
15 | struct kvm_kpit_channel_state { | 6 | struct kvm_kpit_channel_state { |
16 | u32 count; /* can be 65536 */ | 7 | u32 count; /* can be 65536 */ |
17 | u16 latched_count; | 8 | u16 latched_count; |
@@ -30,7 +21,8 @@ struct kvm_kpit_channel_state { | |||
30 | 21 | ||
31 | struct kvm_kpit_state { | 22 | struct kvm_kpit_state { |
32 | struct kvm_kpit_channel_state channels[3]; | 23 | struct kvm_kpit_channel_state channels[3]; |
33 | struct kvm_kpit_timer pit_timer; | 24 | struct kvm_timer pit_timer; |
25 | bool is_periodic; | ||
34 | u32 speaker_data_on; | 26 | u32 speaker_data_on; |
35 | struct mutex lock; | 27 | struct mutex lock; |
36 | struct kvm_pit *pit; | 28 | struct kvm_pit *pit; |
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index cf17ed52f6fb..96dfbb6ad2a9 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c | |||
@@ -24,6 +24,7 @@ | |||
24 | 24 | ||
25 | #include "irq.h" | 25 | #include "irq.h" |
26 | #include "i8254.h" | 26 | #include "i8254.h" |
27 | #include "x86.h" | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | * check if there are pending timer events | 30 | * check if there are pending timer events |
@@ -48,6 +49,9 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v) | |||
48 | { | 49 | { |
49 | struct kvm_pic *s; | 50 | struct kvm_pic *s; |
50 | 51 | ||
52 | if (!irqchip_in_kernel(v->kvm)) | ||
53 | return v->arch.interrupt.pending; | ||
54 | |||
51 | if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ | 55 | if (kvm_apic_has_interrupt(v) == -1) { /* LAPIC */ |
52 | if (kvm_apic_accept_pic_intr(v)) { | 56 | if (kvm_apic_accept_pic_intr(v)) { |
53 | s = pic_irqchip(v->kvm); /* PIC */ | 57 | s = pic_irqchip(v->kvm); /* PIC */ |
@@ -67,6 +71,9 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v) | |||
67 | struct kvm_pic *s; | 71 | struct kvm_pic *s; |
68 | int vector; | 72 | int vector; |
69 | 73 | ||
74 | if (!irqchip_in_kernel(v->kvm)) | ||
75 | return v->arch.interrupt.nr; | ||
76 | |||
70 | vector = kvm_get_apic_interrupt(v); /* APIC */ | 77 | vector = kvm_get_apic_interrupt(v); /* APIC */ |
71 | if (vector == -1) { | 78 | if (vector == -1) { |
72 | if (kvm_apic_accept_pic_intr(v)) { | 79 | if (kvm_apic_accept_pic_intr(v)) { |
diff --git a/arch/x86/kvm/kvm_timer.h b/arch/x86/kvm/kvm_timer.h new file mode 100644 index 000000000000..26bd6ba74e1c --- /dev/null +++ b/arch/x86/kvm/kvm_timer.h | |||
@@ -0,0 +1,18 @@ | |||
1 | |||
2 | struct kvm_timer { | ||
3 | struct hrtimer timer; | ||
4 | s64 period; /* unit: ns */ | ||
5 | atomic_t pending; /* accumulated triggered timers */ | ||
6 | bool reinject; | ||
7 | struct kvm_timer_ops *t_ops; | ||
8 | struct kvm *kvm; | ||
9 | int vcpu_id; | ||
10 | }; | ||
11 | |||
12 | struct kvm_timer_ops { | ||
13 | bool (*is_periodic)(struct kvm_timer *); | ||
14 | }; | ||
15 | |||
16 | |||
17 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data); | ||
18 | |||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index f0b67f2cdd69..ae99d83f81a3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -196,20 +196,15 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) | |||
196 | } | 196 | } |
197 | EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); | 197 | EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); |
198 | 198 | ||
199 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig) | 199 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
200 | int vector, int level, int trig_mode); | ||
201 | |||
202 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq) | ||
200 | { | 203 | { |
201 | struct kvm_lapic *apic = vcpu->arch.apic; | 204 | struct kvm_lapic *apic = vcpu->arch.apic; |
202 | 205 | ||
203 | if (!apic_test_and_set_irr(vec, apic)) { | 206 | return __apic_accept_irq(apic, irq->delivery_mode, irq->vector, |
204 | /* a new pending irq is set in IRR */ | 207 | irq->level, irq->trig_mode); |
205 | if (trig) | ||
206 | apic_set_vector(vec, apic->regs + APIC_TMR); | ||
207 | else | ||
208 | apic_clear_vector(vec, apic->regs + APIC_TMR); | ||
209 | kvm_vcpu_kick(apic->vcpu); | ||
210 | return 1; | ||
211 | } | ||
212 | return 0; | ||
213 | } | 208 | } |
214 | 209 | ||
215 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) | 210 | static inline int apic_find_highest_isr(struct kvm_lapic *apic) |
@@ -250,7 +245,7 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) | |||
250 | 245 | ||
251 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) | 246 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest) |
252 | { | 247 | { |
253 | return kvm_apic_id(apic) == dest; | 248 | return dest == 0xff || kvm_apic_id(apic) == dest; |
254 | } | 249 | } |
255 | 250 | ||
256 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | 251 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) |
@@ -279,37 +274,34 @@ int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda) | |||
279 | return result; | 274 | return result; |
280 | } | 275 | } |
281 | 276 | ||
282 | static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | 277 | int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, |
283 | int short_hand, int dest, int dest_mode) | 278 | int short_hand, int dest, int dest_mode) |
284 | { | 279 | { |
285 | int result = 0; | 280 | int result = 0; |
286 | struct kvm_lapic *target = vcpu->arch.apic; | 281 | struct kvm_lapic *target = vcpu->arch.apic; |
287 | 282 | ||
288 | apic_debug("target %p, source %p, dest 0x%x, " | 283 | apic_debug("target %p, source %p, dest 0x%x, " |
289 | "dest_mode 0x%x, short_hand 0x%x", | 284 | "dest_mode 0x%x, short_hand 0x%x\n", |
290 | target, source, dest, dest_mode, short_hand); | 285 | target, source, dest, dest_mode, short_hand); |
291 | 286 | ||
292 | ASSERT(!target); | 287 | ASSERT(!target); |
293 | switch (short_hand) { | 288 | switch (short_hand) { |
294 | case APIC_DEST_NOSHORT: | 289 | case APIC_DEST_NOSHORT: |
295 | if (dest_mode == 0) { | 290 | if (dest_mode == 0) |
296 | /* Physical mode. */ | 291 | /* Physical mode. */ |
297 | if ((dest == 0xFF) || (dest == kvm_apic_id(target))) | 292 | result = kvm_apic_match_physical_addr(target, dest); |
298 | result = 1; | 293 | else |
299 | } else | ||
300 | /* Logical mode. */ | 294 | /* Logical mode. */ |
301 | result = kvm_apic_match_logical_addr(target, dest); | 295 | result = kvm_apic_match_logical_addr(target, dest); |
302 | break; | 296 | break; |
303 | case APIC_DEST_SELF: | 297 | case APIC_DEST_SELF: |
304 | if (target == source) | 298 | result = (target == source); |
305 | result = 1; | ||
306 | break; | 299 | break; |
307 | case APIC_DEST_ALLINC: | 300 | case APIC_DEST_ALLINC: |
308 | result = 1; | 301 | result = 1; |
309 | break; | 302 | break; |
310 | case APIC_DEST_ALLBUT: | 303 | case APIC_DEST_ALLBUT: |
311 | if (target != source) | 304 | result = (target != source); |
312 | result = 1; | ||
313 | break; | 305 | break; |
314 | default: | 306 | default: |
315 | printk(KERN_WARNING "Bad dest shorthand value %x\n", | 307 | printk(KERN_WARNING "Bad dest shorthand value %x\n", |
@@ -327,20 +319,22 @@ static int apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, | |||
327 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | 319 | static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, |
328 | int vector, int level, int trig_mode) | 320 | int vector, int level, int trig_mode) |
329 | { | 321 | { |
330 | int orig_irr, result = 0; | 322 | int result = 0; |
331 | struct kvm_vcpu *vcpu = apic->vcpu; | 323 | struct kvm_vcpu *vcpu = apic->vcpu; |
332 | 324 | ||
333 | switch (delivery_mode) { | 325 | switch (delivery_mode) { |
334 | case APIC_DM_FIXED: | ||
335 | case APIC_DM_LOWEST: | 326 | case APIC_DM_LOWEST: |
327 | vcpu->arch.apic_arb_prio++; | ||
328 | case APIC_DM_FIXED: | ||
336 | /* FIXME add logic for vcpu on reset */ | 329 | /* FIXME add logic for vcpu on reset */ |
337 | if (unlikely(!apic_enabled(apic))) | 330 | if (unlikely(!apic_enabled(apic))) |
338 | break; | 331 | break; |
339 | 332 | ||
340 | orig_irr = apic_test_and_set_irr(vector, apic); | 333 | result = !apic_test_and_set_irr(vector, apic); |
341 | if (orig_irr && trig_mode) { | 334 | if (!result) { |
342 | apic_debug("level trig mode repeatedly for vector %d", | 335 | if (trig_mode) |
343 | vector); | 336 | apic_debug("level trig mode repeatedly for " |
337 | "vector %d", vector); | ||
344 | break; | 338 | break; |
345 | } | 339 | } |
346 | 340 | ||
@@ -349,10 +343,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
349 | apic_set_vector(vector, apic->regs + APIC_TMR); | 343 | apic_set_vector(vector, apic->regs + APIC_TMR); |
350 | } else | 344 | } else |
351 | apic_clear_vector(vector, apic->regs + APIC_TMR); | 345 | apic_clear_vector(vector, apic->regs + APIC_TMR); |
352 | |||
353 | kvm_vcpu_kick(vcpu); | 346 | kvm_vcpu_kick(vcpu); |
354 | |||
355 | result = (orig_irr == 0); | ||
356 | break; | 347 | break; |
357 | 348 | ||
358 | case APIC_DM_REMRD: | 349 | case APIC_DM_REMRD: |
@@ -364,12 +355,14 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
364 | break; | 355 | break; |
365 | 356 | ||
366 | case APIC_DM_NMI: | 357 | case APIC_DM_NMI: |
358 | result = 1; | ||
367 | kvm_inject_nmi(vcpu); | 359 | kvm_inject_nmi(vcpu); |
368 | kvm_vcpu_kick(vcpu); | 360 | kvm_vcpu_kick(vcpu); |
369 | break; | 361 | break; |
370 | 362 | ||
371 | case APIC_DM_INIT: | 363 | case APIC_DM_INIT: |
372 | if (level) { | 364 | if (level) { |
365 | result = 1; | ||
373 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) | 366 | if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) |
374 | printk(KERN_DEBUG | 367 | printk(KERN_DEBUG |
375 | "INIT on a runnable vcpu %d\n", | 368 | "INIT on a runnable vcpu %d\n", |
@@ -386,6 +379,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
386 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", | 379 | apic_debug("SIPI to vcpu %d vector 0x%02x\n", |
387 | vcpu->vcpu_id, vector); | 380 | vcpu->vcpu_id, vector); |
388 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { | 381 | if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { |
382 | result = 1; | ||
389 | vcpu->arch.sipi_vector = vector; | 383 | vcpu->arch.sipi_vector = vector; |
390 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; | 384 | vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED; |
391 | kvm_vcpu_kick(vcpu); | 385 | kvm_vcpu_kick(vcpu); |
@@ -408,43 +402,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
408 | return result; | 402 | return result; |
409 | } | 403 | } |
410 | 404 | ||
411 | static struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector, | 405 | int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2) |
412 | unsigned long bitmap) | ||
413 | { | ||
414 | int last; | ||
415 | int next; | ||
416 | struct kvm_lapic *apic = NULL; | ||
417 | |||
418 | last = kvm->arch.round_robin_prev_vcpu; | ||
419 | next = last; | ||
420 | |||
421 | do { | ||
422 | if (++next == KVM_MAX_VCPUS) | ||
423 | next = 0; | ||
424 | if (kvm->vcpus[next] == NULL || !test_bit(next, &bitmap)) | ||
425 | continue; | ||
426 | apic = kvm->vcpus[next]->arch.apic; | ||
427 | if (apic && apic_enabled(apic)) | ||
428 | break; | ||
429 | apic = NULL; | ||
430 | } while (next != last); | ||
431 | kvm->arch.round_robin_prev_vcpu = next; | ||
432 | |||
433 | if (!apic) | ||
434 | printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n"); | ||
435 | |||
436 | return apic; | ||
437 | } | ||
438 | |||
439 | struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector, | ||
440 | unsigned long bitmap) | ||
441 | { | 406 | { |
442 | struct kvm_lapic *apic; | 407 | return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio; |
443 | |||
444 | apic = kvm_apic_round_robin(kvm, vector, bitmap); | ||
445 | if (apic) | ||
446 | return apic->vcpu; | ||
447 | return NULL; | ||
448 | } | 408 | } |
449 | 409 | ||
450 | static void apic_set_eoi(struct kvm_lapic *apic) | 410 | static void apic_set_eoi(struct kvm_lapic *apic) |
@@ -472,47 +432,24 @@ static void apic_send_ipi(struct kvm_lapic *apic) | |||
472 | { | 432 | { |
473 | u32 icr_low = apic_get_reg(apic, APIC_ICR); | 433 | u32 icr_low = apic_get_reg(apic, APIC_ICR); |
474 | u32 icr_high = apic_get_reg(apic, APIC_ICR2); | 434 | u32 icr_high = apic_get_reg(apic, APIC_ICR2); |
435 | struct kvm_lapic_irq irq; | ||
475 | 436 | ||
476 | unsigned int dest = GET_APIC_DEST_FIELD(icr_high); | 437 | irq.vector = icr_low & APIC_VECTOR_MASK; |
477 | unsigned int short_hand = icr_low & APIC_SHORT_MASK; | 438 | irq.delivery_mode = icr_low & APIC_MODE_MASK; |
478 | unsigned int trig_mode = icr_low & APIC_INT_LEVELTRIG; | 439 | irq.dest_mode = icr_low & APIC_DEST_MASK; |
479 | unsigned int level = icr_low & APIC_INT_ASSERT; | 440 | irq.level = icr_low & APIC_INT_ASSERT; |
480 | unsigned int dest_mode = icr_low & APIC_DEST_MASK; | 441 | irq.trig_mode = icr_low & APIC_INT_LEVELTRIG; |
481 | unsigned int delivery_mode = icr_low & APIC_MODE_MASK; | 442 | irq.shorthand = icr_low & APIC_SHORT_MASK; |
482 | unsigned int vector = icr_low & APIC_VECTOR_MASK; | 443 | irq.dest_id = GET_APIC_DEST_FIELD(icr_high); |
483 | |||
484 | struct kvm_vcpu *target; | ||
485 | struct kvm_vcpu *vcpu; | ||
486 | unsigned long lpr_map = 0; | ||
487 | int i; | ||
488 | 444 | ||
489 | apic_debug("icr_high 0x%x, icr_low 0x%x, " | 445 | apic_debug("icr_high 0x%x, icr_low 0x%x, " |
490 | "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " | 446 | "short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, " |
491 | "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", | 447 | "dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x\n", |
492 | icr_high, icr_low, short_hand, dest, | 448 | icr_high, icr_low, irq.shorthand, irq.dest_id, |
493 | trig_mode, level, dest_mode, delivery_mode, vector); | 449 | irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode, |
494 | 450 | irq.vector); | |
495 | for (i = 0; i < KVM_MAX_VCPUS; i++) { | ||
496 | vcpu = apic->vcpu->kvm->vcpus[i]; | ||
497 | if (!vcpu) | ||
498 | continue; | ||
499 | |||
500 | if (vcpu->arch.apic && | ||
501 | apic_match_dest(vcpu, apic, short_hand, dest, dest_mode)) { | ||
502 | if (delivery_mode == APIC_DM_LOWEST) | ||
503 | set_bit(vcpu->vcpu_id, &lpr_map); | ||
504 | else | ||
505 | __apic_accept_irq(vcpu->arch.apic, delivery_mode, | ||
506 | vector, level, trig_mode); | ||
507 | } | ||
508 | } | ||
509 | 451 | ||
510 | if (delivery_mode == APIC_DM_LOWEST) { | 452 | kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq); |
511 | target = kvm_get_lowest_prio_vcpu(vcpu->kvm, vector, lpr_map); | ||
512 | if (target != NULL) | ||
513 | __apic_accept_irq(target->arch.apic, delivery_mode, | ||
514 | vector, level, trig_mode); | ||
515 | } | ||
516 | } | 453 | } |
517 | 454 | ||
518 | static u32 apic_get_tmcct(struct kvm_lapic *apic) | 455 | static u32 apic_get_tmcct(struct kvm_lapic *apic) |
@@ -527,12 +464,13 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic) | |||
527 | if (apic_get_reg(apic, APIC_TMICT) == 0) | 464 | if (apic_get_reg(apic, APIC_TMICT) == 0) |
528 | return 0; | 465 | return 0; |
529 | 466 | ||
530 | remaining = hrtimer_expires_remaining(&apic->timer.dev); | 467 | remaining = hrtimer_expires_remaining(&apic->lapic_timer.timer); |
531 | if (ktime_to_ns(remaining) < 0) | 468 | if (ktime_to_ns(remaining) < 0) |
532 | remaining = ktime_set(0, 0); | 469 | remaining = ktime_set(0, 0); |
533 | 470 | ||
534 | ns = mod_64(ktime_to_ns(remaining), apic->timer.period); | 471 | ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period); |
535 | tmcct = div64_u64(ns, (APIC_BUS_CYCLE_NS * apic->timer.divide_count)); | 472 | tmcct = div64_u64(ns, |
473 | (APIC_BUS_CYCLE_NS * apic->divide_count)); | ||
536 | 474 | ||
537 | return tmcct; | 475 | return tmcct; |
538 | } | 476 | } |
@@ -619,25 +557,25 @@ static void update_divide_count(struct kvm_lapic *apic) | |||
619 | tdcr = apic_get_reg(apic, APIC_TDCR); | 557 | tdcr = apic_get_reg(apic, APIC_TDCR); |
620 | tmp1 = tdcr & 0xf; | 558 | tmp1 = tdcr & 0xf; |
621 | tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; | 559 | tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; |
622 | apic->timer.divide_count = 0x1 << (tmp2 & 0x7); | 560 | apic->divide_count = 0x1 << (tmp2 & 0x7); |
623 | 561 | ||
624 | apic_debug("timer divide count is 0x%x\n", | 562 | apic_debug("timer divide count is 0x%x\n", |
625 | apic->timer.divide_count); | 563 | apic->divide_count); |
626 | } | 564 | } |
627 | 565 | ||
628 | static void start_apic_timer(struct kvm_lapic *apic) | 566 | static void start_apic_timer(struct kvm_lapic *apic) |
629 | { | 567 | { |
630 | ktime_t now = apic->timer.dev.base->get_time(); | 568 | ktime_t now = apic->lapic_timer.timer.base->get_time(); |
631 | 569 | ||
632 | apic->timer.period = apic_get_reg(apic, APIC_TMICT) * | 570 | apic->lapic_timer.period = apic_get_reg(apic, APIC_TMICT) * |
633 | APIC_BUS_CYCLE_NS * apic->timer.divide_count; | 571 | APIC_BUS_CYCLE_NS * apic->divide_count; |
634 | atomic_set(&apic->timer.pending, 0); | 572 | atomic_set(&apic->lapic_timer.pending, 0); |
635 | 573 | ||
636 | if (!apic->timer.period) | 574 | if (!apic->lapic_timer.period) |
637 | return; | 575 | return; |
638 | 576 | ||
639 | hrtimer_start(&apic->timer.dev, | 577 | hrtimer_start(&apic->lapic_timer.timer, |
640 | ktime_add_ns(now, apic->timer.period), | 578 | ktime_add_ns(now, apic->lapic_timer.period), |
641 | HRTIMER_MODE_ABS); | 579 | HRTIMER_MODE_ABS); |
642 | 580 | ||
643 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" | 581 | apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016" |
@@ -646,9 +584,9 @@ static void start_apic_timer(struct kvm_lapic *apic) | |||
646 | "expire @ 0x%016" PRIx64 ".\n", __func__, | 584 | "expire @ 0x%016" PRIx64 ".\n", __func__, |
647 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), | 585 | APIC_BUS_CYCLE_NS, ktime_to_ns(now), |
648 | apic_get_reg(apic, APIC_TMICT), | 586 | apic_get_reg(apic, APIC_TMICT), |
649 | apic->timer.period, | 587 | apic->lapic_timer.period, |
650 | ktime_to_ns(ktime_add_ns(now, | 588 | ktime_to_ns(ktime_add_ns(now, |
651 | apic->timer.period))); | 589 | apic->lapic_timer.period))); |
652 | } | 590 | } |
653 | 591 | ||
654 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) | 592 | static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) |
@@ -730,7 +668,7 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
730 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, | 668 | apic_set_reg(apic, APIC_LVTT + 0x10 * i, |
731 | lvt_val | APIC_LVT_MASKED); | 669 | lvt_val | APIC_LVT_MASKED); |
732 | } | 670 | } |
733 | atomic_set(&apic->timer.pending, 0); | 671 | atomic_set(&apic->lapic_timer.pending, 0); |
734 | 672 | ||
735 | } | 673 | } |
736 | break; | 674 | break; |
@@ -762,7 +700,7 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
762 | break; | 700 | break; |
763 | 701 | ||
764 | case APIC_TMICT: | 702 | case APIC_TMICT: |
765 | hrtimer_cancel(&apic->timer.dev); | 703 | hrtimer_cancel(&apic->lapic_timer.timer); |
766 | apic_set_reg(apic, APIC_TMICT, val); | 704 | apic_set_reg(apic, APIC_TMICT, val); |
767 | start_apic_timer(apic); | 705 | start_apic_timer(apic); |
768 | return; | 706 | return; |
@@ -802,7 +740,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu) | |||
802 | if (!vcpu->arch.apic) | 740 | if (!vcpu->arch.apic) |
803 | return; | 741 | return; |
804 | 742 | ||
805 | hrtimer_cancel(&vcpu->arch.apic->timer.dev); | 743 | hrtimer_cancel(&vcpu->arch.apic->lapic_timer.timer); |
806 | 744 | ||
807 | if (vcpu->arch.apic->regs_page) | 745 | if (vcpu->arch.apic->regs_page) |
808 | __free_page(vcpu->arch.apic->regs_page); | 746 | __free_page(vcpu->arch.apic->regs_page); |
@@ -880,7 +818,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
880 | ASSERT(apic != NULL); | 818 | ASSERT(apic != NULL); |
881 | 819 | ||
882 | /* Stop the timer in case it's a reset to an active apic */ | 820 | /* Stop the timer in case it's a reset to an active apic */ |
883 | hrtimer_cancel(&apic->timer.dev); | 821 | hrtimer_cancel(&apic->lapic_timer.timer); |
884 | 822 | ||
885 | apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); | 823 | apic_set_reg(apic, APIC_ID, vcpu->vcpu_id << 24); |
886 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); | 824 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); |
@@ -905,11 +843,13 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
905 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); | 843 | apic_set_reg(apic, APIC_TMR + 0x10 * i, 0); |
906 | } | 844 | } |
907 | update_divide_count(apic); | 845 | update_divide_count(apic); |
908 | atomic_set(&apic->timer.pending, 0); | 846 | atomic_set(&apic->lapic_timer.pending, 0); |
909 | if (vcpu->vcpu_id == 0) | 847 | if (vcpu->vcpu_id == 0) |
910 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; | 848 | vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP; |
911 | apic_update_ppr(apic); | 849 | apic_update_ppr(apic); |
912 | 850 | ||
851 | vcpu->arch.apic_arb_prio = 0; | ||
852 | |||
913 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" | 853 | apic_debug(KERN_INFO "%s: vcpu=%p, id=%d, base_msr=" |
914 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, | 854 | "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, |
915 | vcpu, kvm_apic_id(apic), | 855 | vcpu, kvm_apic_id(apic), |
@@ -917,16 +857,14 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu) | |||
917 | } | 857 | } |
918 | EXPORT_SYMBOL_GPL(kvm_lapic_reset); | 858 | EXPORT_SYMBOL_GPL(kvm_lapic_reset); |
919 | 859 | ||
920 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu) | 860 | bool kvm_apic_present(struct kvm_vcpu *vcpu) |
921 | { | 861 | { |
922 | struct kvm_lapic *apic = vcpu->arch.apic; | 862 | return vcpu->arch.apic && apic_hw_enabled(vcpu->arch.apic); |
923 | int ret = 0; | 863 | } |
924 | |||
925 | if (!apic) | ||
926 | return 0; | ||
927 | ret = apic_enabled(apic); | ||
928 | 864 | ||
929 | return ret; | 865 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu) |
866 | { | ||
867 | return kvm_apic_present(vcpu) && apic_sw_enabled(vcpu->arch.apic); | ||
930 | } | 868 | } |
931 | EXPORT_SYMBOL_GPL(kvm_lapic_enabled); | 869 | EXPORT_SYMBOL_GPL(kvm_lapic_enabled); |
932 | 870 | ||
@@ -936,22 +874,11 @@ EXPORT_SYMBOL_GPL(kvm_lapic_enabled); | |||
936 | *---------------------------------------------------------------------- | 874 | *---------------------------------------------------------------------- |
937 | */ | 875 | */ |
938 | 876 | ||
939 | /* TODO: make sure __apic_timer_fn runs in current pCPU */ | 877 | static bool lapic_is_periodic(struct kvm_timer *ktimer) |
940 | static int __apic_timer_fn(struct kvm_lapic *apic) | ||
941 | { | 878 | { |
942 | int result = 0; | 879 | struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, |
943 | wait_queue_head_t *q = &apic->vcpu->wq; | 880 | lapic_timer); |
944 | 881 | return apic_lvtt_period(apic); | |
945 | if(!atomic_inc_and_test(&apic->timer.pending)) | ||
946 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); | ||
947 | if (waitqueue_active(q)) | ||
948 | wake_up_interruptible(q); | ||
949 | |||
950 | if (apic_lvtt_period(apic)) { | ||
951 | result = 1; | ||
952 | hrtimer_add_expires_ns(&apic->timer.dev, apic->timer.period); | ||
953 | } | ||
954 | return result; | ||
955 | } | 882 | } |
956 | 883 | ||
957 | int apic_has_pending_timer(struct kvm_vcpu *vcpu) | 884 | int apic_has_pending_timer(struct kvm_vcpu *vcpu) |
@@ -959,7 +886,7 @@ int apic_has_pending_timer(struct kvm_vcpu *vcpu) | |||
959 | struct kvm_lapic *lapic = vcpu->arch.apic; | 886 | struct kvm_lapic *lapic = vcpu->arch.apic; |
960 | 887 | ||
961 | if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) | 888 | if (lapic && apic_enabled(lapic) && apic_lvt_enabled(lapic, APIC_LVTT)) |
962 | return atomic_read(&lapic->timer.pending); | 889 | return atomic_read(&lapic->lapic_timer.pending); |
963 | 890 | ||
964 | return 0; | 891 | return 0; |
965 | } | 892 | } |
@@ -986,20 +913,9 @@ void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu) | |||
986 | kvm_apic_local_deliver(apic, APIC_LVT0); | 913 | kvm_apic_local_deliver(apic, APIC_LVT0); |
987 | } | 914 | } |
988 | 915 | ||
989 | static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) | 916 | static struct kvm_timer_ops lapic_timer_ops = { |
990 | { | 917 | .is_periodic = lapic_is_periodic, |
991 | struct kvm_lapic *apic; | 918 | }; |
992 | int restart_timer = 0; | ||
993 | |||
994 | apic = container_of(data, struct kvm_lapic, timer.dev); | ||
995 | |||
996 | restart_timer = __apic_timer_fn(apic); | ||
997 | |||
998 | if (restart_timer) | ||
999 | return HRTIMER_RESTART; | ||
1000 | else | ||
1001 | return HRTIMER_NORESTART; | ||
1002 | } | ||
1003 | 919 | ||
1004 | int kvm_create_lapic(struct kvm_vcpu *vcpu) | 920 | int kvm_create_lapic(struct kvm_vcpu *vcpu) |
1005 | { | 921 | { |
@@ -1024,8 +940,13 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu) | |||
1024 | memset(apic->regs, 0, PAGE_SIZE); | 940 | memset(apic->regs, 0, PAGE_SIZE); |
1025 | apic->vcpu = vcpu; | 941 | apic->vcpu = vcpu; |
1026 | 942 | ||
1027 | hrtimer_init(&apic->timer.dev, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | 943 | hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, |
1028 | apic->timer.dev.function = apic_timer_fn; | 944 | HRTIMER_MODE_ABS); |
945 | apic->lapic_timer.timer.function = kvm_timer_fn; | ||
946 | apic->lapic_timer.t_ops = &lapic_timer_ops; | ||
947 | apic->lapic_timer.kvm = vcpu->kvm; | ||
948 | apic->lapic_timer.vcpu_id = vcpu->vcpu_id; | ||
949 | |||
1029 | apic->base_address = APIC_DEFAULT_PHYS_BASE; | 950 | apic->base_address = APIC_DEFAULT_PHYS_BASE; |
1030 | vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; | 951 | vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE; |
1031 | 952 | ||
@@ -1078,9 +999,9 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu) | |||
1078 | { | 999 | { |
1079 | struct kvm_lapic *apic = vcpu->arch.apic; | 1000 | struct kvm_lapic *apic = vcpu->arch.apic; |
1080 | 1001 | ||
1081 | if (apic && atomic_read(&apic->timer.pending) > 0) { | 1002 | if (apic && atomic_read(&apic->lapic_timer.pending) > 0) { |
1082 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) | 1003 | if (kvm_apic_local_deliver(apic, APIC_LVTT)) |
1083 | atomic_dec(&apic->timer.pending); | 1004 | atomic_dec(&apic->lapic_timer.pending); |
1084 | } | 1005 | } |
1085 | } | 1006 | } |
1086 | 1007 | ||
@@ -1106,7 +1027,7 @@ void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu) | |||
1106 | MSR_IA32_APICBASE_BASE; | 1027 | MSR_IA32_APICBASE_BASE; |
1107 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); | 1028 | apic_set_reg(apic, APIC_LVR, APIC_VERSION); |
1108 | apic_update_ppr(apic); | 1029 | apic_update_ppr(apic); |
1109 | hrtimer_cancel(&apic->timer.dev); | 1030 | hrtimer_cancel(&apic->lapic_timer.timer); |
1110 | update_divide_count(apic); | 1031 | update_divide_count(apic); |
1111 | start_apic_timer(apic); | 1032 | start_apic_timer(apic); |
1112 | } | 1033 | } |
@@ -1119,7 +1040,7 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu) | |||
1119 | if (!apic) | 1040 | if (!apic) |
1120 | return; | 1041 | return; |
1121 | 1042 | ||
1122 | timer = &apic->timer.dev; | 1043 | timer = &apic->lapic_timer.timer; |
1123 | if (hrtimer_cancel(timer)) | 1044 | if (hrtimer_cancel(timer)) |
1124 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); | 1045 | hrtimer_start_expires(timer, HRTIMER_MODE_ABS); |
1125 | } | 1046 | } |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 45ab6ee71209..a587f8349c46 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -2,18 +2,15 @@ | |||
2 | #define __KVM_X86_LAPIC_H | 2 | #define __KVM_X86_LAPIC_H |
3 | 3 | ||
4 | #include "iodev.h" | 4 | #include "iodev.h" |
5 | #include "kvm_timer.h" | ||
5 | 6 | ||
6 | #include <linux/kvm_host.h> | 7 | #include <linux/kvm_host.h> |
7 | 8 | ||
8 | struct kvm_lapic { | 9 | struct kvm_lapic { |
9 | unsigned long base_address; | 10 | unsigned long base_address; |
10 | struct kvm_io_device dev; | 11 | struct kvm_io_device dev; |
11 | struct { | 12 | struct kvm_timer lapic_timer; |
12 | atomic_t pending; | 13 | u32 divide_count; |
13 | s64 period; /* unit: ns */ | ||
14 | u32 divide_count; | ||
15 | struct hrtimer dev; | ||
16 | } timer; | ||
17 | struct kvm_vcpu *vcpu; | 14 | struct kvm_vcpu *vcpu; |
18 | struct page *regs_page; | 15 | struct page *regs_page; |
19 | void *regs; | 16 | void *regs; |
@@ -34,12 +31,13 @@ u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | |||
34 | 31 | ||
35 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 32 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
36 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 33 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
37 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, u8 vec, u8 trig); | 34 | int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq); |
38 | 35 | ||
39 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 36 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
40 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 37 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); |
41 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); | 38 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu); |
42 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu); | 39 | int kvm_lapic_enabled(struct kvm_vcpu *vcpu); |
40 | bool kvm_apic_present(struct kvm_vcpu *vcpu); | ||
43 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | 41 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
44 | 42 | ||
45 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); | 43 | void kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 32cf11e5728a..5c3d6e81a7dc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -126,6 +126,7 @@ module_param(oos_shadow, bool, 0644); | |||
126 | #define PFERR_PRESENT_MASK (1U << 0) | 126 | #define PFERR_PRESENT_MASK (1U << 0) |
127 | #define PFERR_WRITE_MASK (1U << 1) | 127 | #define PFERR_WRITE_MASK (1U << 1) |
128 | #define PFERR_USER_MASK (1U << 2) | 128 | #define PFERR_USER_MASK (1U << 2) |
129 | #define PFERR_RSVD_MASK (1U << 3) | ||
129 | #define PFERR_FETCH_MASK (1U << 4) | 130 | #define PFERR_FETCH_MASK (1U << 4) |
130 | 131 | ||
131 | #define PT_DIRECTORY_LEVEL 2 | 132 | #define PT_DIRECTORY_LEVEL 2 |
@@ -177,7 +178,11 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ | |||
177 | static u64 __read_mostly shadow_user_mask; | 178 | static u64 __read_mostly shadow_user_mask; |
178 | static u64 __read_mostly shadow_accessed_mask; | 179 | static u64 __read_mostly shadow_accessed_mask; |
179 | static u64 __read_mostly shadow_dirty_mask; | 180 | static u64 __read_mostly shadow_dirty_mask; |
180 | static u64 __read_mostly shadow_mt_mask; | 181 | |
182 | static inline u64 rsvd_bits(int s, int e) | ||
183 | { | ||
184 | return ((1ULL << (e - s + 1)) - 1) << s; | ||
185 | } | ||
181 | 186 | ||
182 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) | 187 | void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte) |
183 | { | 188 | { |
@@ -193,14 +198,13 @@ void kvm_mmu_set_base_ptes(u64 base_pte) | |||
193 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); | 198 | EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes); |
194 | 199 | ||
195 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, | 200 | void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, |
196 | u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 mt_mask) | 201 | u64 dirty_mask, u64 nx_mask, u64 x_mask) |
197 | { | 202 | { |
198 | shadow_user_mask = user_mask; | 203 | shadow_user_mask = user_mask; |
199 | shadow_accessed_mask = accessed_mask; | 204 | shadow_accessed_mask = accessed_mask; |
200 | shadow_dirty_mask = dirty_mask; | 205 | shadow_dirty_mask = dirty_mask; |
201 | shadow_nx_mask = nx_mask; | 206 | shadow_nx_mask = nx_mask; |
202 | shadow_x_mask = x_mask; | 207 | shadow_x_mask = x_mask; |
203 | shadow_mt_mask = mt_mask; | ||
204 | } | 208 | } |
205 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); | 209 | EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); |
206 | 210 | ||
@@ -219,11 +223,6 @@ static int is_nx(struct kvm_vcpu *vcpu) | |||
219 | return vcpu->arch.shadow_efer & EFER_NX; | 223 | return vcpu->arch.shadow_efer & EFER_NX; |
220 | } | 224 | } |
221 | 225 | ||
222 | static int is_present_pte(unsigned long pte) | ||
223 | { | ||
224 | return pte & PT_PRESENT_MASK; | ||
225 | } | ||
226 | |||
227 | static int is_shadow_present_pte(u64 pte) | 226 | static int is_shadow_present_pte(u64 pte) |
228 | { | 227 | { |
229 | return pte != shadow_trap_nonpresent_pte | 228 | return pte != shadow_trap_nonpresent_pte |
@@ -1074,18 +1073,10 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | |||
1074 | return NULL; | 1073 | return NULL; |
1075 | } | 1074 | } |
1076 | 1075 | ||
1077 | static void kvm_unlink_unsync_global(struct kvm *kvm, struct kvm_mmu_page *sp) | ||
1078 | { | ||
1079 | list_del(&sp->oos_link); | ||
1080 | --kvm->stat.mmu_unsync_global; | ||
1081 | } | ||
1082 | |||
1083 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) | 1076 | static void kvm_unlink_unsync_page(struct kvm *kvm, struct kvm_mmu_page *sp) |
1084 | { | 1077 | { |
1085 | WARN_ON(!sp->unsync); | 1078 | WARN_ON(!sp->unsync); |
1086 | sp->unsync = 0; | 1079 | sp->unsync = 0; |
1087 | if (sp->global) | ||
1088 | kvm_unlink_unsync_global(kvm, sp); | ||
1089 | --kvm->stat.mmu_unsync; | 1080 | --kvm->stat.mmu_unsync; |
1090 | } | 1081 | } |
1091 | 1082 | ||
@@ -1248,7 +1239,6 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
1248 | pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); | 1239 | pgprintk("%s: adding gfn %lx role %x\n", __func__, gfn, role.word); |
1249 | sp->gfn = gfn; | 1240 | sp->gfn = gfn; |
1250 | sp->role = role; | 1241 | sp->role = role; |
1251 | sp->global = 0; | ||
1252 | hlist_add_head(&sp->hash_link, bucket); | 1242 | hlist_add_head(&sp->hash_link, bucket); |
1253 | if (!direct) { | 1243 | if (!direct) { |
1254 | if (rmap_write_protect(vcpu->kvm, gfn)) | 1244 | if (rmap_write_protect(vcpu->kvm, gfn)) |
@@ -1616,7 +1606,7 @@ static int get_mtrr_type(struct mtrr_state_type *mtrr_state, | |||
1616 | return mtrr_state->def_type; | 1606 | return mtrr_state->def_type; |
1617 | } | 1607 | } |
1618 | 1608 | ||
1619 | static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | 1609 | u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) |
1620 | { | 1610 | { |
1621 | u8 mtrr; | 1611 | u8 mtrr; |
1622 | 1612 | ||
@@ -1626,6 +1616,7 @@ static u8 get_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn) | |||
1626 | mtrr = MTRR_TYPE_WRBACK; | 1616 | mtrr = MTRR_TYPE_WRBACK; |
1627 | return mtrr; | 1617 | return mtrr; |
1628 | } | 1618 | } |
1619 | EXPORT_SYMBOL_GPL(kvm_get_guest_memory_type); | ||
1629 | 1620 | ||
1630 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 1621 | static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
1631 | { | 1622 | { |
@@ -1646,11 +1637,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
1646 | ++vcpu->kvm->stat.mmu_unsync; | 1637 | ++vcpu->kvm->stat.mmu_unsync; |
1647 | sp->unsync = 1; | 1638 | sp->unsync = 1; |
1648 | 1639 | ||
1649 | if (sp->global) { | 1640 | kvm_mmu_mark_parents_unsync(vcpu, sp); |
1650 | list_add(&sp->oos_link, &vcpu->kvm->arch.oos_global_pages); | ||
1651 | ++vcpu->kvm->stat.mmu_unsync_global; | ||
1652 | } else | ||
1653 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
1654 | 1641 | ||
1655 | mmu_convert_notrap(sp); | 1642 | mmu_convert_notrap(sp); |
1656 | return 0; | 1643 | return 0; |
@@ -1677,21 +1664,11 @@ static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, | |||
1677 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1664 | static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
1678 | unsigned pte_access, int user_fault, | 1665 | unsigned pte_access, int user_fault, |
1679 | int write_fault, int dirty, int largepage, | 1666 | int write_fault, int dirty, int largepage, |
1680 | int global, gfn_t gfn, pfn_t pfn, bool speculative, | 1667 | gfn_t gfn, pfn_t pfn, bool speculative, |
1681 | bool can_unsync) | 1668 | bool can_unsync) |
1682 | { | 1669 | { |
1683 | u64 spte; | 1670 | u64 spte; |
1684 | int ret = 0; | 1671 | int ret = 0; |
1685 | u64 mt_mask = shadow_mt_mask; | ||
1686 | struct kvm_mmu_page *sp = page_header(__pa(shadow_pte)); | ||
1687 | |||
1688 | if (!global && sp->global) { | ||
1689 | sp->global = 0; | ||
1690 | if (sp->unsync) { | ||
1691 | kvm_unlink_unsync_global(vcpu->kvm, sp); | ||
1692 | kvm_mmu_mark_parents_unsync(vcpu, sp); | ||
1693 | } | ||
1694 | } | ||
1695 | 1672 | ||
1696 | /* | 1673 | /* |
1697 | * We don't set the accessed bit, since we sometimes want to see | 1674 | * We don't set the accessed bit, since we sometimes want to see |
@@ -1711,16 +1688,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1711 | spte |= shadow_user_mask; | 1688 | spte |= shadow_user_mask; |
1712 | if (largepage) | 1689 | if (largepage) |
1713 | spte |= PT_PAGE_SIZE_MASK; | 1690 | spte |= PT_PAGE_SIZE_MASK; |
1714 | if (mt_mask) { | 1691 | if (tdp_enabled) |
1715 | if (!kvm_is_mmio_pfn(pfn)) { | 1692 | spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn, |
1716 | mt_mask = get_memory_type(vcpu, gfn) << | 1693 | kvm_is_mmio_pfn(pfn)); |
1717 | kvm_x86_ops->get_mt_mask_shift(); | ||
1718 | mt_mask |= VMX_EPT_IGMT_BIT; | ||
1719 | } else | ||
1720 | mt_mask = MTRR_TYPE_UNCACHABLE << | ||
1721 | kvm_x86_ops->get_mt_mask_shift(); | ||
1722 | spte |= mt_mask; | ||
1723 | } | ||
1724 | 1694 | ||
1725 | spte |= (u64)pfn << PAGE_SHIFT; | 1695 | spte |= (u64)pfn << PAGE_SHIFT; |
1726 | 1696 | ||
@@ -1765,8 +1735,8 @@ set_pte: | |||
1765 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | 1735 | static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, |
1766 | unsigned pt_access, unsigned pte_access, | 1736 | unsigned pt_access, unsigned pte_access, |
1767 | int user_fault, int write_fault, int dirty, | 1737 | int user_fault, int write_fault, int dirty, |
1768 | int *ptwrite, int largepage, int global, | 1738 | int *ptwrite, int largepage, gfn_t gfn, |
1769 | gfn_t gfn, pfn_t pfn, bool speculative) | 1739 | pfn_t pfn, bool speculative) |
1770 | { | 1740 | { |
1771 | int was_rmapped = 0; | 1741 | int was_rmapped = 0; |
1772 | int was_writeble = is_writeble_pte(*shadow_pte); | 1742 | int was_writeble = is_writeble_pte(*shadow_pte); |
@@ -1795,7 +1765,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1795 | was_rmapped = 1; | 1765 | was_rmapped = 1; |
1796 | } | 1766 | } |
1797 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, | 1767 | if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault, |
1798 | dirty, largepage, global, gfn, pfn, speculative, true)) { | 1768 | dirty, largepage, gfn, pfn, speculative, true)) { |
1799 | if (write_fault) | 1769 | if (write_fault) |
1800 | *ptwrite = 1; | 1770 | *ptwrite = 1; |
1801 | kvm_x86_ops->tlb_flush(vcpu); | 1771 | kvm_x86_ops->tlb_flush(vcpu); |
@@ -1843,7 +1813,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1843 | || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { | 1813 | || (largepage && iterator.level == PT_DIRECTORY_LEVEL)) { |
1844 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, | 1814 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, ACC_ALL, |
1845 | 0, write, 1, &pt_write, | 1815 | 0, write, 1, &pt_write, |
1846 | largepage, 0, gfn, pfn, false); | 1816 | largepage, gfn, pfn, false); |
1847 | ++vcpu->stat.pf_fixed; | 1817 | ++vcpu->stat.pf_fixed; |
1848 | break; | 1818 | break; |
1849 | } | 1819 | } |
@@ -1942,7 +1912,19 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) | |||
1942 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; | 1912 | vcpu->arch.mmu.root_hpa = INVALID_PAGE; |
1943 | } | 1913 | } |
1944 | 1914 | ||
1945 | static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | 1915 | static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) |
1916 | { | ||
1917 | int ret = 0; | ||
1918 | |||
1919 | if (!kvm_is_visible_gfn(vcpu->kvm, root_gfn)) { | ||
1920 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
1921 | ret = 1; | ||
1922 | } | ||
1923 | |||
1924 | return ret; | ||
1925 | } | ||
1926 | |||
1927 | static int mmu_alloc_roots(struct kvm_vcpu *vcpu) | ||
1946 | { | 1928 | { |
1947 | int i; | 1929 | int i; |
1948 | gfn_t root_gfn; | 1930 | gfn_t root_gfn; |
@@ -1957,13 +1939,15 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1957 | ASSERT(!VALID_PAGE(root)); | 1939 | ASSERT(!VALID_PAGE(root)); |
1958 | if (tdp_enabled) | 1940 | if (tdp_enabled) |
1959 | direct = 1; | 1941 | direct = 1; |
1942 | if (mmu_check_root(vcpu, root_gfn)) | ||
1943 | return 1; | ||
1960 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, | 1944 | sp = kvm_mmu_get_page(vcpu, root_gfn, 0, |
1961 | PT64_ROOT_LEVEL, direct, | 1945 | PT64_ROOT_LEVEL, direct, |
1962 | ACC_ALL, NULL); | 1946 | ACC_ALL, NULL); |
1963 | root = __pa(sp->spt); | 1947 | root = __pa(sp->spt); |
1964 | ++sp->root_count; | 1948 | ++sp->root_count; |
1965 | vcpu->arch.mmu.root_hpa = root; | 1949 | vcpu->arch.mmu.root_hpa = root; |
1966 | return; | 1950 | return 0; |
1967 | } | 1951 | } |
1968 | direct = !is_paging(vcpu); | 1952 | direct = !is_paging(vcpu); |
1969 | if (tdp_enabled) | 1953 | if (tdp_enabled) |
@@ -1980,6 +1964,8 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1980 | root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT; | 1964 | root_gfn = vcpu->arch.pdptrs[i] >> PAGE_SHIFT; |
1981 | } else if (vcpu->arch.mmu.root_level == 0) | 1965 | } else if (vcpu->arch.mmu.root_level == 0) |
1982 | root_gfn = 0; | 1966 | root_gfn = 0; |
1967 | if (mmu_check_root(vcpu, root_gfn)) | ||
1968 | return 1; | ||
1983 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, | 1969 | sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, |
1984 | PT32_ROOT_LEVEL, direct, | 1970 | PT32_ROOT_LEVEL, direct, |
1985 | ACC_ALL, NULL); | 1971 | ACC_ALL, NULL); |
@@ -1988,6 +1974,7 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu) | |||
1988 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; | 1974 | vcpu->arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; |
1989 | } | 1975 | } |
1990 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); | 1976 | vcpu->arch.mmu.root_hpa = __pa(vcpu->arch.mmu.pae_root); |
1977 | return 0; | ||
1991 | } | 1978 | } |
1992 | 1979 | ||
1993 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) | 1980 | static void mmu_sync_roots(struct kvm_vcpu *vcpu) |
@@ -2006,7 +1993,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2006 | for (i = 0; i < 4; ++i) { | 1993 | for (i = 0; i < 4; ++i) { |
2007 | hpa_t root = vcpu->arch.mmu.pae_root[i]; | 1994 | hpa_t root = vcpu->arch.mmu.pae_root[i]; |
2008 | 1995 | ||
2009 | if (root) { | 1996 | if (root && VALID_PAGE(root)) { |
2010 | root &= PT64_BASE_ADDR_MASK; | 1997 | root &= PT64_BASE_ADDR_MASK; |
2011 | sp = page_header(root); | 1998 | sp = page_header(root); |
2012 | mmu_sync_children(vcpu, sp); | 1999 | mmu_sync_children(vcpu, sp); |
@@ -2014,15 +2001,6 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2014 | } | 2001 | } |
2015 | } | 2002 | } |
2016 | 2003 | ||
2017 | static void mmu_sync_global(struct kvm_vcpu *vcpu) | ||
2018 | { | ||
2019 | struct kvm *kvm = vcpu->kvm; | ||
2020 | struct kvm_mmu_page *sp, *n; | ||
2021 | |||
2022 | list_for_each_entry_safe(sp, n, &kvm->arch.oos_global_pages, oos_link) | ||
2023 | kvm_sync_page(vcpu, sp); | ||
2024 | } | ||
2025 | |||
2026 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | 2004 | void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) |
2027 | { | 2005 | { |
2028 | spin_lock(&vcpu->kvm->mmu_lock); | 2006 | spin_lock(&vcpu->kvm->mmu_lock); |
@@ -2030,13 +2008,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) | |||
2030 | spin_unlock(&vcpu->kvm->mmu_lock); | 2008 | spin_unlock(&vcpu->kvm->mmu_lock); |
2031 | } | 2009 | } |
2032 | 2010 | ||
2033 | void kvm_mmu_sync_global(struct kvm_vcpu *vcpu) | ||
2034 | { | ||
2035 | spin_lock(&vcpu->kvm->mmu_lock); | ||
2036 | mmu_sync_global(vcpu); | ||
2037 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
2038 | } | ||
2039 | |||
2040 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) | 2011 | static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr) |
2041 | { | 2012 | { |
2042 | return vaddr; | 2013 | return vaddr; |
@@ -2151,6 +2122,14 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
2151 | nonpaging_free(vcpu); | 2122 | nonpaging_free(vcpu); |
2152 | } | 2123 | } |
2153 | 2124 | ||
2125 | static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) | ||
2126 | { | ||
2127 | int bit7; | ||
2128 | |||
2129 | bit7 = (gpte >> 7) & 1; | ||
2130 | return (gpte & vcpu->arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; | ||
2131 | } | ||
2132 | |||
2154 | #define PTTYPE 64 | 2133 | #define PTTYPE 64 |
2155 | #include "paging_tmpl.h" | 2134 | #include "paging_tmpl.h" |
2156 | #undef PTTYPE | 2135 | #undef PTTYPE |
@@ -2159,6 +2138,59 @@ static void paging_free(struct kvm_vcpu *vcpu) | |||
2159 | #include "paging_tmpl.h" | 2138 | #include "paging_tmpl.h" |
2160 | #undef PTTYPE | 2139 | #undef PTTYPE |
2161 | 2140 | ||
2141 | static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level) | ||
2142 | { | ||
2143 | struct kvm_mmu *context = &vcpu->arch.mmu; | ||
2144 | int maxphyaddr = cpuid_maxphyaddr(vcpu); | ||
2145 | u64 exb_bit_rsvd = 0; | ||
2146 | |||
2147 | if (!is_nx(vcpu)) | ||
2148 | exb_bit_rsvd = rsvd_bits(63, 63); | ||
2149 | switch (level) { | ||
2150 | case PT32_ROOT_LEVEL: | ||
2151 | /* no rsvd bits for 2 level 4K page table entries */ | ||
2152 | context->rsvd_bits_mask[0][1] = 0; | ||
2153 | context->rsvd_bits_mask[0][0] = 0; | ||
2154 | if (is_cpuid_PSE36()) | ||
2155 | /* 36bits PSE 4MB page */ | ||
2156 | context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21); | ||
2157 | else | ||
2158 | /* 32 bits PSE 4MB page */ | ||
2159 | context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21); | ||
2160 | context->rsvd_bits_mask[1][0] = ~0ull; | ||
2161 | break; | ||
2162 | case PT32E_ROOT_LEVEL: | ||
2163 | context->rsvd_bits_mask[0][2] = | ||
2164 | rsvd_bits(maxphyaddr, 63) | | ||
2165 | rsvd_bits(7, 8) | rsvd_bits(1, 2); /* PDPTE */ | ||
2166 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | ||
2167 | rsvd_bits(maxphyaddr, 62); /* PDE */ | ||
2168 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | ||
2169 | rsvd_bits(maxphyaddr, 62); /* PTE */ | ||
2170 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | ||
2171 | rsvd_bits(maxphyaddr, 62) | | ||
2172 | rsvd_bits(13, 20); /* large page */ | ||
2173 | context->rsvd_bits_mask[1][0] = ~0ull; | ||
2174 | break; | ||
2175 | case PT64_ROOT_LEVEL: | ||
2176 | context->rsvd_bits_mask[0][3] = exb_bit_rsvd | | ||
2177 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); | ||
2178 | context->rsvd_bits_mask[0][2] = exb_bit_rsvd | | ||
2179 | rsvd_bits(maxphyaddr, 51) | rsvd_bits(7, 8); | ||
2180 | context->rsvd_bits_mask[0][1] = exb_bit_rsvd | | ||
2181 | rsvd_bits(maxphyaddr, 51); | ||
2182 | context->rsvd_bits_mask[0][0] = exb_bit_rsvd | | ||
2183 | rsvd_bits(maxphyaddr, 51); | ||
2184 | context->rsvd_bits_mask[1][3] = context->rsvd_bits_mask[0][3]; | ||
2185 | context->rsvd_bits_mask[1][2] = context->rsvd_bits_mask[0][2]; | ||
2186 | context->rsvd_bits_mask[1][1] = exb_bit_rsvd | | ||
2187 | rsvd_bits(maxphyaddr, 51) | | ||
2188 | rsvd_bits(13, 20); /* large page */ | ||
2189 | context->rsvd_bits_mask[1][0] = ~0ull; | ||
2190 | break; | ||
2191 | } | ||
2192 | } | ||
2193 | |||
2162 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | 2194 | static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) |
2163 | { | 2195 | { |
2164 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2196 | struct kvm_mmu *context = &vcpu->arch.mmu; |
@@ -2179,6 +2211,7 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, int level) | |||
2179 | 2211 | ||
2180 | static int paging64_init_context(struct kvm_vcpu *vcpu) | 2212 | static int paging64_init_context(struct kvm_vcpu *vcpu) |
2181 | { | 2213 | { |
2214 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); | ||
2182 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); | 2215 | return paging64_init_context_common(vcpu, PT64_ROOT_LEVEL); |
2183 | } | 2216 | } |
2184 | 2217 | ||
@@ -2186,6 +2219,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
2186 | { | 2219 | { |
2187 | struct kvm_mmu *context = &vcpu->arch.mmu; | 2220 | struct kvm_mmu *context = &vcpu->arch.mmu; |
2188 | 2221 | ||
2222 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); | ||
2189 | context->new_cr3 = paging_new_cr3; | 2223 | context->new_cr3 = paging_new_cr3; |
2190 | context->page_fault = paging32_page_fault; | 2224 | context->page_fault = paging32_page_fault; |
2191 | context->gva_to_gpa = paging32_gva_to_gpa; | 2225 | context->gva_to_gpa = paging32_gva_to_gpa; |
@@ -2201,6 +2235,7 @@ static int paging32_init_context(struct kvm_vcpu *vcpu) | |||
2201 | 2235 | ||
2202 | static int paging32E_init_context(struct kvm_vcpu *vcpu) | 2236 | static int paging32E_init_context(struct kvm_vcpu *vcpu) |
2203 | { | 2237 | { |
2238 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); | ||
2204 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); | 2239 | return paging64_init_context_common(vcpu, PT32E_ROOT_LEVEL); |
2205 | } | 2240 | } |
2206 | 2241 | ||
@@ -2221,12 +2256,15 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) | |||
2221 | context->gva_to_gpa = nonpaging_gva_to_gpa; | 2256 | context->gva_to_gpa = nonpaging_gva_to_gpa; |
2222 | context->root_level = 0; | 2257 | context->root_level = 0; |
2223 | } else if (is_long_mode(vcpu)) { | 2258 | } else if (is_long_mode(vcpu)) { |
2259 | reset_rsvds_bits_mask(vcpu, PT64_ROOT_LEVEL); | ||
2224 | context->gva_to_gpa = paging64_gva_to_gpa; | 2260 | context->gva_to_gpa = paging64_gva_to_gpa; |
2225 | context->root_level = PT64_ROOT_LEVEL; | 2261 | context->root_level = PT64_ROOT_LEVEL; |
2226 | } else if (is_pae(vcpu)) { | 2262 | } else if (is_pae(vcpu)) { |
2263 | reset_rsvds_bits_mask(vcpu, PT32E_ROOT_LEVEL); | ||
2227 | context->gva_to_gpa = paging64_gva_to_gpa; | 2264 | context->gva_to_gpa = paging64_gva_to_gpa; |
2228 | context->root_level = PT32E_ROOT_LEVEL; | 2265 | context->root_level = PT32E_ROOT_LEVEL; |
2229 | } else { | 2266 | } else { |
2267 | reset_rsvds_bits_mask(vcpu, PT32_ROOT_LEVEL); | ||
2230 | context->gva_to_gpa = paging32_gva_to_gpa; | 2268 | context->gva_to_gpa = paging32_gva_to_gpa; |
2231 | context->root_level = PT32_ROOT_LEVEL; | 2269 | context->root_level = PT32_ROOT_LEVEL; |
2232 | } | 2270 | } |
@@ -2290,9 +2328,11 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) | |||
2290 | goto out; | 2328 | goto out; |
2291 | spin_lock(&vcpu->kvm->mmu_lock); | 2329 | spin_lock(&vcpu->kvm->mmu_lock); |
2292 | kvm_mmu_free_some_pages(vcpu); | 2330 | kvm_mmu_free_some_pages(vcpu); |
2293 | mmu_alloc_roots(vcpu); | 2331 | r = mmu_alloc_roots(vcpu); |
2294 | mmu_sync_roots(vcpu); | 2332 | mmu_sync_roots(vcpu); |
2295 | spin_unlock(&vcpu->kvm->mmu_lock); | 2333 | spin_unlock(&vcpu->kvm->mmu_lock); |
2334 | if (r) | ||
2335 | goto out; | ||
2296 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); | 2336 | kvm_x86_ops->set_cr3(vcpu, vcpu->arch.mmu.root_hpa); |
2297 | kvm_mmu_flush_tlb(vcpu); | 2337 | kvm_mmu_flush_tlb(vcpu); |
2298 | out: | 2338 | out: |
@@ -2638,14 +2678,6 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp); | |||
2638 | 2678 | ||
2639 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 2679 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
2640 | { | 2680 | { |
2641 | struct kvm_mmu_page *sp; | ||
2642 | |||
2643 | while (!list_empty(&vcpu->kvm->arch.active_mmu_pages)) { | ||
2644 | sp = container_of(vcpu->kvm->arch.active_mmu_pages.next, | ||
2645 | struct kvm_mmu_page, link); | ||
2646 | kvm_mmu_zap_page(vcpu->kvm, sp); | ||
2647 | cond_resched(); | ||
2648 | } | ||
2649 | free_page((unsigned long)vcpu->arch.mmu.pae_root); | 2681 | free_page((unsigned long)vcpu->arch.mmu.pae_root); |
2650 | } | 2682 | } |
2651 | 2683 | ||
@@ -2710,7 +2742,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
2710 | { | 2742 | { |
2711 | struct kvm_mmu_page *sp; | 2743 | struct kvm_mmu_page *sp; |
2712 | 2744 | ||
2713 | spin_lock(&kvm->mmu_lock); | ||
2714 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { | 2745 | list_for_each_entry(sp, &kvm->arch.active_mmu_pages, link) { |
2715 | int i; | 2746 | int i; |
2716 | u64 *pt; | 2747 | u64 *pt; |
@@ -2725,7 +2756,6 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot) | |||
2725 | pt[i] &= ~PT_WRITABLE_MASK; | 2756 | pt[i] &= ~PT_WRITABLE_MASK; |
2726 | } | 2757 | } |
2727 | kvm_flush_remote_tlbs(kvm); | 2758 | kvm_flush_remote_tlbs(kvm); |
2728 | spin_unlock(&kvm->mmu_lock); | ||
2729 | } | 2759 | } |
2730 | 2760 | ||
2731 | void kvm_mmu_zap_all(struct kvm *kvm) | 2761 | void kvm_mmu_zap_all(struct kvm *kvm) |
@@ -3007,11 +3037,13 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte, | |||
3007 | " in nonleaf level: levels %d gva %lx" | 3037 | " in nonleaf level: levels %d gva %lx" |
3008 | " level %d pte %llx\n", audit_msg, | 3038 | " level %d pte %llx\n", audit_msg, |
3009 | vcpu->arch.mmu.root_level, va, level, ent); | 3039 | vcpu->arch.mmu.root_level, va, level, ent); |
3010 | 3040 | else | |
3011 | audit_mappings_page(vcpu, ent, va, level - 1); | 3041 | audit_mappings_page(vcpu, ent, va, level - 1); |
3012 | } else { | 3042 | } else { |
3013 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); | 3043 | gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va); |
3014 | hpa_t hpa = (hpa_t)gpa_to_pfn(vcpu, gpa) << PAGE_SHIFT; | 3044 | gfn_t gfn = gpa >> PAGE_SHIFT; |
3045 | pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn); | ||
3046 | hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT; | ||
3015 | 3047 | ||
3016 | if (is_shadow_present_pte(ent) | 3048 | if (is_shadow_present_pte(ent) |
3017 | && (ent & PT64_BASE_ADDR_MASK) != hpa) | 3049 | && (ent & PT64_BASE_ADDR_MASK) != hpa) |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index eaab2145f62b..3494a2fb136e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -75,4 +75,9 @@ static inline int is_paging(struct kvm_vcpu *vcpu) | |||
75 | return vcpu->arch.cr0 & X86_CR0_PG; | 75 | return vcpu->arch.cr0 & X86_CR0_PG; |
76 | } | 76 | } |
77 | 77 | ||
78 | static inline int is_present_pte(unsigned long pte) | ||
79 | { | ||
80 | return pte & PT_PRESENT_MASK; | ||
81 | } | ||
82 | |||
78 | #endif | 83 | #endif |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 6bd70206c561..258e4591e1ca 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -123,6 +123,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker, | |||
123 | gfn_t table_gfn; | 123 | gfn_t table_gfn; |
124 | unsigned index, pt_access, pte_access; | 124 | unsigned index, pt_access, pte_access; |
125 | gpa_t pte_gpa; | 125 | gpa_t pte_gpa; |
126 | int rsvd_fault = 0; | ||
126 | 127 | ||
127 | pgprintk("%s: addr %lx\n", __func__, addr); | 128 | pgprintk("%s: addr %lx\n", __func__, addr); |
128 | walk: | 129 | walk: |
@@ -157,6 +158,10 @@ walk: | |||
157 | if (!is_present_pte(pte)) | 158 | if (!is_present_pte(pte)) |
158 | goto not_present; | 159 | goto not_present; |
159 | 160 | ||
161 | rsvd_fault = is_rsvd_bits_set(vcpu, pte, walker->level); | ||
162 | if (rsvd_fault) | ||
163 | goto access_error; | ||
164 | |||
160 | if (write_fault && !is_writeble_pte(pte)) | 165 | if (write_fault && !is_writeble_pte(pte)) |
161 | if (user_fault || is_write_protection(vcpu)) | 166 | if (user_fault || is_write_protection(vcpu)) |
162 | goto access_error; | 167 | goto access_error; |
@@ -209,7 +214,6 @@ walk: | |||
209 | if (ret) | 214 | if (ret) |
210 | goto walk; | 215 | goto walk; |
211 | pte |= PT_DIRTY_MASK; | 216 | pte |= PT_DIRTY_MASK; |
212 | kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)&pte, sizeof(pte), 0); | ||
213 | walker->ptes[walker->level - 1] = pte; | 217 | walker->ptes[walker->level - 1] = pte; |
214 | } | 218 | } |
215 | 219 | ||
@@ -233,6 +237,8 @@ err: | |||
233 | walker->error_code |= PFERR_USER_MASK; | 237 | walker->error_code |= PFERR_USER_MASK; |
234 | if (fetch_fault) | 238 | if (fetch_fault) |
235 | walker->error_code |= PFERR_FETCH_MASK; | 239 | walker->error_code |= PFERR_FETCH_MASK; |
240 | if (rsvd_fault) | ||
241 | walker->error_code |= PFERR_RSVD_MASK; | ||
236 | return 0; | 242 | return 0; |
237 | } | 243 | } |
238 | 244 | ||
@@ -262,8 +268,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
262 | kvm_get_pfn(pfn); | 268 | kvm_get_pfn(pfn); |
263 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 269 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
264 | gpte & PT_DIRTY_MASK, NULL, largepage, | 270 | gpte & PT_DIRTY_MASK, NULL, largepage, |
265 | gpte & PT_GLOBAL_MASK, gpte_to_gfn(gpte), | 271 | gpte_to_gfn(gpte), pfn, true); |
266 | pfn, true); | ||
267 | } | 272 | } |
268 | 273 | ||
269 | /* | 274 | /* |
@@ -297,7 +302,6 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
297 | user_fault, write_fault, | 302 | user_fault, write_fault, |
298 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, | 303 | gw->ptes[gw->level-1] & PT_DIRTY_MASK, |
299 | ptwrite, largepage, | 304 | ptwrite, largepage, |
300 | gw->ptes[gw->level-1] & PT_GLOBAL_MASK, | ||
301 | gw->gfn, pfn, false); | 305 | gw->gfn, pfn, false); |
302 | break; | 306 | break; |
303 | } | 307 | } |
@@ -380,7 +384,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
380 | return r; | 384 | return r; |
381 | 385 | ||
382 | /* | 386 | /* |
383 | * Look up the shadow pte for the faulting address. | 387 | * Look up the guest pte for the faulting address. |
384 | */ | 388 | */ |
385 | r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, | 389 | r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, |
386 | fetch_fault); | 390 | fetch_fault); |
@@ -586,7 +590,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
586 | nr_present++; | 590 | nr_present++; |
587 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); | 591 | pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); |
588 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, | 592 | set_spte(vcpu, &sp->spt[i], pte_access, 0, 0, |
589 | is_dirty_pte(gpte), 0, gpte & PT_GLOBAL_MASK, gfn, | 593 | is_dirty_pte(gpte), 0, gfn, |
590 | spte_to_pfn(sp->spt[i]), true, false); | 594 | spte_to_pfn(sp->spt[i]), true, false); |
591 | } | 595 | } |
592 | 596 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1f8510c51d6e..71510e07e69e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -19,6 +19,7 @@ | |||
19 | #include "irq.h" | 19 | #include "irq.h" |
20 | #include "mmu.h" | 20 | #include "mmu.h" |
21 | #include "kvm_cache_regs.h" | 21 | #include "kvm_cache_regs.h" |
22 | #include "x86.h" | ||
22 | 23 | ||
23 | #include <linux/module.h> | 24 | #include <linux/module.h> |
24 | #include <linux/kernel.h> | 25 | #include <linux/kernel.h> |
@@ -69,7 +70,6 @@ module_param(npt, int, S_IRUGO); | |||
69 | static int nested = 0; | 70 | static int nested = 0; |
70 | module_param(nested, int, S_IRUGO); | 71 | module_param(nested, int, S_IRUGO); |
71 | 72 | ||
72 | static void kvm_reput_irq(struct vcpu_svm *svm); | ||
73 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | 73 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); |
74 | 74 | ||
75 | static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); | 75 | static int nested_svm_exit_handled(struct vcpu_svm *svm, bool kvm_override); |
@@ -132,24 +132,6 @@ static inline u32 svm_has(u32 feat) | |||
132 | return svm_features & feat; | 132 | return svm_features & feat; |
133 | } | 133 | } |
134 | 134 | ||
135 | static inline u8 pop_irq(struct kvm_vcpu *vcpu) | ||
136 | { | ||
137 | int word_index = __ffs(vcpu->arch.irq_summary); | ||
138 | int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); | ||
139 | int irq = word_index * BITS_PER_LONG + bit_index; | ||
140 | |||
141 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); | ||
142 | if (!vcpu->arch.irq_pending[word_index]) | ||
143 | clear_bit(word_index, &vcpu->arch.irq_summary); | ||
144 | return irq; | ||
145 | } | ||
146 | |||
147 | static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) | ||
148 | { | ||
149 | set_bit(irq, vcpu->arch.irq_pending); | ||
150 | set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); | ||
151 | } | ||
152 | |||
153 | static inline void clgi(void) | 135 | static inline void clgi(void) |
154 | { | 136 | { |
155 | asm volatile (__ex(SVM_CLGI)); | 137 | asm volatile (__ex(SVM_CLGI)); |
@@ -214,17 +196,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
214 | svm->vmcb->control.event_inj_err = error_code; | 196 | svm->vmcb->control.event_inj_err = error_code; |
215 | } | 197 | } |
216 | 198 | ||
217 | static bool svm_exception_injected(struct kvm_vcpu *vcpu) | 199 | static int is_external_interrupt(u32 info) |
200 | { | ||
201 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | ||
202 | return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); | ||
203 | } | ||
204 | |||
205 | static u32 svm_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | ||
218 | { | 206 | { |
219 | struct vcpu_svm *svm = to_svm(vcpu); | 207 | struct vcpu_svm *svm = to_svm(vcpu); |
208 | u32 ret = 0; | ||
220 | 209 | ||
221 | return !(svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID); | 210 | if (svm->vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) |
211 | ret |= X86_SHADOW_INT_STI | X86_SHADOW_INT_MOV_SS; | ||
212 | return ret & mask; | ||
222 | } | 213 | } |
223 | 214 | ||
224 | static int is_external_interrupt(u32 info) | 215 | static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) |
225 | { | 216 | { |
226 | info &= SVM_EVTINJ_TYPE_MASK | SVM_EVTINJ_VALID; | 217 | struct vcpu_svm *svm = to_svm(vcpu); |
227 | return info == (SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR); | 218 | |
219 | if (mask == 0) | ||
220 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; | ||
221 | else | ||
222 | svm->vmcb->control.int_state |= SVM_INTERRUPT_SHADOW_MASK; | ||
223 | |||
228 | } | 224 | } |
229 | 225 | ||
230 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | 226 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) |
@@ -232,7 +228,9 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
232 | struct vcpu_svm *svm = to_svm(vcpu); | 228 | struct vcpu_svm *svm = to_svm(vcpu); |
233 | 229 | ||
234 | if (!svm->next_rip) { | 230 | if (!svm->next_rip) { |
235 | printk(KERN_DEBUG "%s: NOP\n", __func__); | 231 | if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) != |
232 | EMULATE_DONE) | ||
233 | printk(KERN_DEBUG "%s: NOP\n", __func__); | ||
236 | return; | 234 | return; |
237 | } | 235 | } |
238 | if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) | 236 | if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) |
@@ -240,9 +238,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | |||
240 | __func__, kvm_rip_read(vcpu), svm->next_rip); | 238 | __func__, kvm_rip_read(vcpu), svm->next_rip); |
241 | 239 | ||
242 | kvm_rip_write(vcpu, svm->next_rip); | 240 | kvm_rip_write(vcpu, svm->next_rip); |
243 | svm->vmcb->control.int_state &= ~SVM_INTERRUPT_SHADOW_MASK; | 241 | svm_set_interrupt_shadow(vcpu, 0); |
244 | |||
245 | vcpu->arch.interrupt_window_open = (svm->vcpu.arch.hflags & HF_GIF_MASK); | ||
246 | } | 242 | } |
247 | 243 | ||
248 | static int has_svm(void) | 244 | static int has_svm(void) |
@@ -830,6 +826,15 @@ static void svm_get_segment(struct kvm_vcpu *vcpu, | |||
830 | if (!var->unusable) | 826 | if (!var->unusable) |
831 | var->type |= 0x1; | 827 | var->type |= 0x1; |
832 | break; | 828 | break; |
829 | case VCPU_SREG_SS: | ||
830 | /* On AMD CPUs sometimes the DB bit in the segment | ||
831 | * descriptor is left as 1, although the whole segment has | ||
832 | * been made unusable. Clear it here to pass an Intel VMX | ||
833 | * entry check when cross vendor migrating. | ||
834 | */ | ||
835 | if (var->unusable) | ||
836 | var->db = 0; | ||
837 | break; | ||
833 | } | 838 | } |
834 | } | 839 | } |
835 | 840 | ||
@@ -960,15 +965,16 @@ static void svm_set_segment(struct kvm_vcpu *vcpu, | |||
960 | 965 | ||
961 | } | 966 | } |
962 | 967 | ||
963 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | 968 | static void update_db_intercept(struct kvm_vcpu *vcpu) |
964 | { | 969 | { |
965 | int old_debug = vcpu->guest_debug; | ||
966 | struct vcpu_svm *svm = to_svm(vcpu); | 970 | struct vcpu_svm *svm = to_svm(vcpu); |
967 | 971 | ||
968 | vcpu->guest_debug = dbg->control; | ||
969 | |||
970 | svm->vmcb->control.intercept_exceptions &= | 972 | svm->vmcb->control.intercept_exceptions &= |
971 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); | 973 | ~((1 << DB_VECTOR) | (1 << BP_VECTOR)); |
974 | |||
975 | if (vcpu->arch.singlestep) | ||
976 | svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR); | ||
977 | |||
972 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 978 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
973 | if (vcpu->guest_debug & | 979 | if (vcpu->guest_debug & |
974 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) | 980 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) |
@@ -979,6 +985,16 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | |||
979 | 1 << BP_VECTOR; | 985 | 1 << BP_VECTOR; |
980 | } else | 986 | } else |
981 | vcpu->guest_debug = 0; | 987 | vcpu->guest_debug = 0; |
988 | } | ||
989 | |||
990 | static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | ||
991 | { | ||
992 | int old_debug = vcpu->guest_debug; | ||
993 | struct vcpu_svm *svm = to_svm(vcpu); | ||
994 | |||
995 | vcpu->guest_debug = dbg->control; | ||
996 | |||
997 | update_db_intercept(vcpu); | ||
982 | 998 | ||
983 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) | 999 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
984 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; | 1000 | svm->vmcb->save.dr7 = dbg->arch.debugreg[7]; |
@@ -993,16 +1009,6 @@ static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | |||
993 | return 0; | 1009 | return 0; |
994 | } | 1010 | } |
995 | 1011 | ||
996 | static int svm_get_irq(struct kvm_vcpu *vcpu) | ||
997 | { | ||
998 | struct vcpu_svm *svm = to_svm(vcpu); | ||
999 | u32 exit_int_info = svm->vmcb->control.exit_int_info; | ||
1000 | |||
1001 | if (is_external_interrupt(exit_int_info)) | ||
1002 | return exit_int_info & SVM_EVTINJ_VEC_MASK; | ||
1003 | return -1; | ||
1004 | } | ||
1005 | |||
1006 | static void load_host_msrs(struct kvm_vcpu *vcpu) | 1012 | static void load_host_msrs(struct kvm_vcpu *vcpu) |
1007 | { | 1013 | { |
1008 | #ifdef CONFIG_X86_64 | 1014 | #ifdef CONFIG_X86_64 |
@@ -1107,17 +1113,8 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | |||
1107 | 1113 | ||
1108 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1114 | static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1109 | { | 1115 | { |
1110 | u32 exit_int_info = svm->vmcb->control.exit_int_info; | ||
1111 | struct kvm *kvm = svm->vcpu.kvm; | ||
1112 | u64 fault_address; | 1116 | u64 fault_address; |
1113 | u32 error_code; | 1117 | u32 error_code; |
1114 | bool event_injection = false; | ||
1115 | |||
1116 | if (!irqchip_in_kernel(kvm) && | ||
1117 | is_external_interrupt(exit_int_info)) { | ||
1118 | event_injection = true; | ||
1119 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); | ||
1120 | } | ||
1121 | 1118 | ||
1122 | fault_address = svm->vmcb->control.exit_info_2; | 1119 | fault_address = svm->vmcb->control.exit_info_2; |
1123 | error_code = svm->vmcb->control.exit_info_1; | 1120 | error_code = svm->vmcb->control.exit_info_1; |
@@ -1137,23 +1134,40 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1137 | */ | 1134 | */ |
1138 | if (npt_enabled) | 1135 | if (npt_enabled) |
1139 | svm_flush_tlb(&svm->vcpu); | 1136 | svm_flush_tlb(&svm->vcpu); |
1140 | 1137 | else { | |
1141 | if (!npt_enabled && event_injection) | 1138 | if (kvm_event_needs_reinjection(&svm->vcpu)) |
1142 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | 1139 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); |
1140 | } | ||
1143 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1141 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
1144 | } | 1142 | } |
1145 | 1143 | ||
1146 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1144 | static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1147 | { | 1145 | { |
1148 | if (!(svm->vcpu.guest_debug & | 1146 | if (!(svm->vcpu.guest_debug & |
1149 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { | 1147 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) && |
1148 | !svm->vcpu.arch.singlestep) { | ||
1150 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); | 1149 | kvm_queue_exception(&svm->vcpu, DB_VECTOR); |
1151 | return 1; | 1150 | return 1; |
1152 | } | 1151 | } |
1153 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | 1152 | |
1154 | kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip; | 1153 | if (svm->vcpu.arch.singlestep) { |
1155 | kvm_run->debug.arch.exception = DB_VECTOR; | 1154 | svm->vcpu.arch.singlestep = false; |
1156 | return 0; | 1155 | if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP)) |
1156 | svm->vmcb->save.rflags &= | ||
1157 | ~(X86_EFLAGS_TF | X86_EFLAGS_RF); | ||
1158 | update_db_intercept(&svm->vcpu); | ||
1159 | } | ||
1160 | |||
1161 | if (svm->vcpu.guest_debug & | ||
1162 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)){ | ||
1163 | kvm_run->exit_reason = KVM_EXIT_DEBUG; | ||
1164 | kvm_run->debug.arch.pc = | ||
1165 | svm->vmcb->save.cs.base + svm->vmcb->save.rip; | ||
1166 | kvm_run->debug.arch.exception = DB_VECTOR; | ||
1167 | return 0; | ||
1168 | } | ||
1169 | |||
1170 | return 1; | ||
1157 | } | 1171 | } |
1158 | 1172 | ||
1159 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1173 | static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
@@ -1842,17 +1856,51 @@ static int task_switch_interception(struct vcpu_svm *svm, | |||
1842 | struct kvm_run *kvm_run) | 1856 | struct kvm_run *kvm_run) |
1843 | { | 1857 | { |
1844 | u16 tss_selector; | 1858 | u16 tss_selector; |
1859 | int reason; | ||
1860 | int int_type = svm->vmcb->control.exit_int_info & | ||
1861 | SVM_EXITINTINFO_TYPE_MASK; | ||
1862 | int int_vec = svm->vmcb->control.exit_int_info & SVM_EVTINJ_VEC_MASK; | ||
1863 | uint32_t type = | ||
1864 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_TYPE_MASK; | ||
1865 | uint32_t idt_v = | ||
1866 | svm->vmcb->control.exit_int_info & SVM_EXITINTINFO_VALID; | ||
1845 | 1867 | ||
1846 | tss_selector = (u16)svm->vmcb->control.exit_info_1; | 1868 | tss_selector = (u16)svm->vmcb->control.exit_info_1; |
1869 | |||
1847 | if (svm->vmcb->control.exit_info_2 & | 1870 | if (svm->vmcb->control.exit_info_2 & |
1848 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) | 1871 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_IRET)) |
1849 | return kvm_task_switch(&svm->vcpu, tss_selector, | 1872 | reason = TASK_SWITCH_IRET; |
1850 | TASK_SWITCH_IRET); | 1873 | else if (svm->vmcb->control.exit_info_2 & |
1851 | if (svm->vmcb->control.exit_info_2 & | 1874 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) |
1852 | (1ULL << SVM_EXITINFOSHIFT_TS_REASON_JMP)) | 1875 | reason = TASK_SWITCH_JMP; |
1853 | return kvm_task_switch(&svm->vcpu, tss_selector, | 1876 | else if (idt_v) |
1854 | TASK_SWITCH_JMP); | 1877 | reason = TASK_SWITCH_GATE; |
1855 | return kvm_task_switch(&svm->vcpu, tss_selector, TASK_SWITCH_CALL); | 1878 | else |
1879 | reason = TASK_SWITCH_CALL; | ||
1880 | |||
1881 | if (reason == TASK_SWITCH_GATE) { | ||
1882 | switch (type) { | ||
1883 | case SVM_EXITINTINFO_TYPE_NMI: | ||
1884 | svm->vcpu.arch.nmi_injected = false; | ||
1885 | break; | ||
1886 | case SVM_EXITINTINFO_TYPE_EXEPT: | ||
1887 | kvm_clear_exception_queue(&svm->vcpu); | ||
1888 | break; | ||
1889 | case SVM_EXITINTINFO_TYPE_INTR: | ||
1890 | kvm_clear_interrupt_queue(&svm->vcpu); | ||
1891 | break; | ||
1892 | default: | ||
1893 | break; | ||
1894 | } | ||
1895 | } | ||
1896 | |||
1897 | if (reason != TASK_SWITCH_GATE || | ||
1898 | int_type == SVM_EXITINTINFO_TYPE_SOFT || | ||
1899 | (int_type == SVM_EXITINTINFO_TYPE_EXEPT && | ||
1900 | (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) | ||
1901 | skip_emulated_instruction(&svm->vcpu); | ||
1902 | |||
1903 | return kvm_task_switch(&svm->vcpu, tss_selector, reason); | ||
1856 | } | 1904 | } |
1857 | 1905 | ||
1858 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1906 | static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
@@ -1862,6 +1910,14 @@ static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1862 | return 1; | 1910 | return 1; |
1863 | } | 1911 | } |
1864 | 1912 | ||
1913 | static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | ||
1914 | { | ||
1915 | ++svm->vcpu.stat.nmi_window_exits; | ||
1916 | svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET); | ||
1917 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | ||
1918 | return 1; | ||
1919 | } | ||
1920 | |||
1865 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1921 | static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1866 | { | 1922 | { |
1867 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) | 1923 | if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE) |
@@ -1879,8 +1935,14 @@ static int emulate_on_interception(struct vcpu_svm *svm, | |||
1879 | 1935 | ||
1880 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1936 | static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1881 | { | 1937 | { |
1938 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | ||
1939 | /* instruction emulation calls kvm_set_cr8() */ | ||
1882 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); | 1940 | emulate_instruction(&svm->vcpu, NULL, 0, 0, 0); |
1883 | if (irqchip_in_kernel(svm->vcpu.kvm)) | 1941 | if (irqchip_in_kernel(svm->vcpu.kvm)) { |
1942 | svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | ||
1943 | return 1; | ||
1944 | } | ||
1945 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) | ||
1884 | return 1; | 1946 | return 1; |
1885 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 1947 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; |
1886 | return 0; | 1948 | return 0; |
@@ -2090,8 +2152,9 @@ static int interrupt_window_interception(struct vcpu_svm *svm, | |||
2090 | * If the user space waits to inject interrupts, exit as soon as | 2152 | * If the user space waits to inject interrupts, exit as soon as |
2091 | * possible | 2153 | * possible |
2092 | */ | 2154 | */ |
2093 | if (kvm_run->request_interrupt_window && | 2155 | if (!irqchip_in_kernel(svm->vcpu.kvm) && |
2094 | !svm->vcpu.arch.irq_summary) { | 2156 | kvm_run->request_interrupt_window && |
2157 | !kvm_cpu_has_interrupt(&svm->vcpu)) { | ||
2095 | ++svm->vcpu.stat.irq_window_exits; | 2158 | ++svm->vcpu.stat.irq_window_exits; |
2096 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 2159 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
2097 | return 0; | 2160 | return 0; |
@@ -2134,6 +2197,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
2134 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 2197 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
2135 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ | 2198 | /* [SVM_EXIT_CR0_SEL_WRITE] = emulate_on_interception, */ |
2136 | [SVM_EXIT_CPUID] = cpuid_interception, | 2199 | [SVM_EXIT_CPUID] = cpuid_interception, |
2200 | [SVM_EXIT_IRET] = iret_interception, | ||
2137 | [SVM_EXIT_INVD] = emulate_on_interception, | 2201 | [SVM_EXIT_INVD] = emulate_on_interception, |
2138 | [SVM_EXIT_HLT] = halt_interception, | 2202 | [SVM_EXIT_HLT] = halt_interception, |
2139 | [SVM_EXIT_INVLPG] = invlpg_interception, | 2203 | [SVM_EXIT_INVLPG] = invlpg_interception, |
@@ -2194,7 +2258,6 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2194 | } | 2258 | } |
2195 | } | 2259 | } |
2196 | 2260 | ||
2197 | kvm_reput_irq(svm); | ||
2198 | 2261 | ||
2199 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { | 2262 | if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { |
2200 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; | 2263 | kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; |
@@ -2205,7 +2268,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
2205 | 2268 | ||
2206 | if (is_external_interrupt(svm->vmcb->control.exit_int_info) && | 2269 | if (is_external_interrupt(svm->vmcb->control.exit_int_info) && |
2207 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && | 2270 | exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR && |
2208 | exit_code != SVM_EXIT_NPF) | 2271 | exit_code != SVM_EXIT_NPF && exit_code != SVM_EXIT_TASK_SWITCH) |
2209 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " | 2272 | printk(KERN_ERR "%s: unexpected exit_ini_info 0x%x " |
2210 | "exit_code 0x%x\n", | 2273 | "exit_code 0x%x\n", |
2211 | __func__, svm->vmcb->control.exit_int_info, | 2274 | __func__, svm->vmcb->control.exit_int_info, |
@@ -2242,6 +2305,15 @@ static void pre_svm_run(struct vcpu_svm *svm) | |||
2242 | new_asid(svm, svm_data); | 2305 | new_asid(svm, svm_data); |
2243 | } | 2306 | } |
2244 | 2307 | ||
2308 | static void svm_inject_nmi(struct kvm_vcpu *vcpu) | ||
2309 | { | ||
2310 | struct vcpu_svm *svm = to_svm(vcpu); | ||
2311 | |||
2312 | svm->vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; | ||
2313 | vcpu->arch.hflags |= HF_NMI_MASK; | ||
2314 | svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET); | ||
2315 | ++vcpu->stat.nmi_injections; | ||
2316 | } | ||
2245 | 2317 | ||
2246 | static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | 2318 | static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) |
2247 | { | 2319 | { |
@@ -2257,134 +2329,71 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
2257 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); | 2329 | ((/*control->int_vector >> 4*/ 0xf) << V_INTR_PRIO_SHIFT); |
2258 | } | 2330 | } |
2259 | 2331 | ||
2260 | static void svm_set_irq(struct kvm_vcpu *vcpu, int irq) | 2332 | static void svm_queue_irq(struct kvm_vcpu *vcpu, unsigned nr) |
2261 | { | 2333 | { |
2262 | struct vcpu_svm *svm = to_svm(vcpu); | 2334 | struct vcpu_svm *svm = to_svm(vcpu); |
2263 | 2335 | ||
2264 | nested_svm_intr(svm); | 2336 | svm->vmcb->control.event_inj = nr | |
2265 | 2337 | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_INTR; | |
2266 | svm_inject_irq(svm, irq); | ||
2267 | } | 2338 | } |
2268 | 2339 | ||
2269 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | 2340 | static void svm_set_irq(struct kvm_vcpu *vcpu) |
2270 | { | 2341 | { |
2271 | struct vcpu_svm *svm = to_svm(vcpu); | 2342 | struct vcpu_svm *svm = to_svm(vcpu); |
2272 | struct vmcb *vmcb = svm->vmcb; | ||
2273 | int max_irr, tpr; | ||
2274 | 2343 | ||
2275 | if (!irqchip_in_kernel(vcpu->kvm) || vcpu->arch.apic->vapic_addr) | 2344 | nested_svm_intr(svm); |
2276 | return; | ||
2277 | 2345 | ||
2278 | vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK; | 2346 | svm_queue_irq(vcpu, vcpu->arch.interrupt.nr); |
2347 | } | ||
2279 | 2348 | ||
2280 | max_irr = kvm_lapic_find_highest_irr(vcpu); | 2349 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
2281 | if (max_irr == -1) | 2350 | { |
2282 | return; | 2351 | struct vcpu_svm *svm = to_svm(vcpu); |
2283 | 2352 | ||
2284 | tpr = kvm_lapic_get_cr8(vcpu) << 4; | 2353 | if (irr == -1) |
2354 | return; | ||
2285 | 2355 | ||
2286 | if (tpr >= (max_irr & 0xf0)) | 2356 | if (tpr >= irr) |
2287 | vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; | 2357 | svm->vmcb->control.intercept_cr_write |= INTERCEPT_CR8_MASK; |
2288 | } | 2358 | } |
2289 | 2359 | ||
2290 | static void svm_intr_assist(struct kvm_vcpu *vcpu) | 2360 | static int svm_nmi_allowed(struct kvm_vcpu *vcpu) |
2291 | { | 2361 | { |
2292 | struct vcpu_svm *svm = to_svm(vcpu); | 2362 | struct vcpu_svm *svm = to_svm(vcpu); |
2293 | struct vmcb *vmcb = svm->vmcb; | 2363 | struct vmcb *vmcb = svm->vmcb; |
2294 | int intr_vector = -1; | 2364 | return !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
2295 | 2365 | !(svm->vcpu.arch.hflags & HF_NMI_MASK); | |
2296 | if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) && | ||
2297 | ((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) { | ||
2298 | intr_vector = vmcb->control.exit_int_info & | ||
2299 | SVM_EVTINJ_VEC_MASK; | ||
2300 | vmcb->control.exit_int_info = 0; | ||
2301 | svm_inject_irq(svm, intr_vector); | ||
2302 | goto out; | ||
2303 | } | ||
2304 | |||
2305 | if (vmcb->control.int_ctl & V_IRQ_MASK) | ||
2306 | goto out; | ||
2307 | |||
2308 | if (!kvm_cpu_has_interrupt(vcpu)) | ||
2309 | goto out; | ||
2310 | |||
2311 | if (nested_svm_intr(svm)) | ||
2312 | goto out; | ||
2313 | |||
2314 | if (!(svm->vcpu.arch.hflags & HF_GIF_MASK)) | ||
2315 | goto out; | ||
2316 | |||
2317 | if (!(vmcb->save.rflags & X86_EFLAGS_IF) || | ||
2318 | (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || | ||
2319 | (vmcb->control.event_inj & SVM_EVTINJ_VALID)) { | ||
2320 | /* unable to deliver irq, set pending irq */ | ||
2321 | svm_set_vintr(svm); | ||
2322 | svm_inject_irq(svm, 0x0); | ||
2323 | goto out; | ||
2324 | } | ||
2325 | /* Okay, we can deliver the interrupt: grab it and update PIC state. */ | ||
2326 | intr_vector = kvm_cpu_get_interrupt(vcpu); | ||
2327 | svm_inject_irq(svm, intr_vector); | ||
2328 | out: | ||
2329 | update_cr8_intercept(vcpu); | ||
2330 | } | 2366 | } |
2331 | 2367 | ||
2332 | static void kvm_reput_irq(struct vcpu_svm *svm) | 2368 | static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) |
2333 | { | 2369 | { |
2334 | struct vmcb_control_area *control = &svm->vmcb->control; | 2370 | struct vcpu_svm *svm = to_svm(vcpu); |
2335 | 2371 | struct vmcb *vmcb = svm->vmcb; | |
2336 | if ((control->int_ctl & V_IRQ_MASK) | 2372 | return (vmcb->save.rflags & X86_EFLAGS_IF) && |
2337 | && !irqchip_in_kernel(svm->vcpu.kvm)) { | 2373 | !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && |
2338 | control->int_ctl &= ~V_IRQ_MASK; | 2374 | (svm->vcpu.arch.hflags & HF_GIF_MASK); |
2339 | push_irq(&svm->vcpu, control->int_vector); | ||
2340 | } | ||
2341 | |||
2342 | svm->vcpu.arch.interrupt_window_open = | ||
2343 | !(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && | ||
2344 | (svm->vcpu.arch.hflags & HF_GIF_MASK); | ||
2345 | } | 2375 | } |
2346 | 2376 | ||
2347 | static void svm_do_inject_vector(struct vcpu_svm *svm) | 2377 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
2348 | { | 2378 | { |
2349 | struct kvm_vcpu *vcpu = &svm->vcpu; | 2379 | svm_set_vintr(to_svm(vcpu)); |
2350 | int word_index = __ffs(vcpu->arch.irq_summary); | 2380 | svm_inject_irq(to_svm(vcpu), 0x0); |
2351 | int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); | ||
2352 | int irq = word_index * BITS_PER_LONG + bit_index; | ||
2353 | |||
2354 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); | ||
2355 | if (!vcpu->arch.irq_pending[word_index]) | ||
2356 | clear_bit(word_index, &vcpu->arch.irq_summary); | ||
2357 | svm_inject_irq(svm, irq); | ||
2358 | } | 2381 | } |
2359 | 2382 | ||
2360 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, | 2383 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
2361 | struct kvm_run *kvm_run) | ||
2362 | { | 2384 | { |
2363 | struct vcpu_svm *svm = to_svm(vcpu); | 2385 | struct vcpu_svm *svm = to_svm(vcpu); |
2364 | struct vmcb_control_area *control = &svm->vmcb->control; | ||
2365 | |||
2366 | if (nested_svm_intr(svm)) | ||
2367 | return; | ||
2368 | 2386 | ||
2369 | svm->vcpu.arch.interrupt_window_open = | 2387 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) |
2370 | (!(control->int_state & SVM_INTERRUPT_SHADOW_MASK) && | 2388 | == HF_NMI_MASK) |
2371 | (svm->vmcb->save.rflags & X86_EFLAGS_IF) && | 2389 | return; /* IRET will cause a vm exit */ |
2372 | (svm->vcpu.arch.hflags & HF_GIF_MASK)); | ||
2373 | 2390 | ||
2374 | if (svm->vcpu.arch.interrupt_window_open && svm->vcpu.arch.irq_summary) | 2391 | /* Something prevents NMI from been injected. Single step over |
2375 | /* | 2392 | possible problem (IRET or exception injection or interrupt |
2376 | * If interrupts enabled, and not blocked by sti or mov ss. Good. | 2393 | shadow) */ |
2377 | */ | 2394 | vcpu->arch.singlestep = true; |
2378 | svm_do_inject_vector(svm); | 2395 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
2379 | 2396 | update_db_intercept(vcpu); | |
2380 | /* | ||
2381 | * Interrupts blocked. Wait for unblock. | ||
2382 | */ | ||
2383 | if (!svm->vcpu.arch.interrupt_window_open && | ||
2384 | (svm->vcpu.arch.irq_summary || kvm_run->request_interrupt_window)) | ||
2385 | svm_set_vintr(svm); | ||
2386 | else | ||
2387 | svm_clear_vintr(svm); | ||
2388 | } | 2397 | } |
2389 | 2398 | ||
2390 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 2399 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
@@ -2407,7 +2416,7 @@ static inline void sync_cr8_to_lapic(struct kvm_vcpu *vcpu) | |||
2407 | 2416 | ||
2408 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { | 2417 | if (!(svm->vmcb->control.intercept_cr_write & INTERCEPT_CR8_MASK)) { |
2409 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; | 2418 | int cr8 = svm->vmcb->control.int_ctl & V_TPR_MASK; |
2410 | kvm_lapic_set_tpr(vcpu, cr8); | 2419 | kvm_set_cr8(vcpu, cr8); |
2411 | } | 2420 | } |
2412 | } | 2421 | } |
2413 | 2422 | ||
@@ -2416,14 +2425,54 @@ static inline void sync_lapic_to_cr8(struct kvm_vcpu *vcpu) | |||
2416 | struct vcpu_svm *svm = to_svm(vcpu); | 2425 | struct vcpu_svm *svm = to_svm(vcpu); |
2417 | u64 cr8; | 2426 | u64 cr8; |
2418 | 2427 | ||
2419 | if (!irqchip_in_kernel(vcpu->kvm)) | ||
2420 | return; | ||
2421 | |||
2422 | cr8 = kvm_get_cr8(vcpu); | 2428 | cr8 = kvm_get_cr8(vcpu); |
2423 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; | 2429 | svm->vmcb->control.int_ctl &= ~V_TPR_MASK; |
2424 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; | 2430 | svm->vmcb->control.int_ctl |= cr8 & V_TPR_MASK; |
2425 | } | 2431 | } |
2426 | 2432 | ||
2433 | static void svm_complete_interrupts(struct vcpu_svm *svm) | ||
2434 | { | ||
2435 | u8 vector; | ||
2436 | int type; | ||
2437 | u32 exitintinfo = svm->vmcb->control.exit_int_info; | ||
2438 | |||
2439 | if (svm->vcpu.arch.hflags & HF_IRET_MASK) | ||
2440 | svm->vcpu.arch.hflags &= ~(HF_NMI_MASK | HF_IRET_MASK); | ||
2441 | |||
2442 | svm->vcpu.arch.nmi_injected = false; | ||
2443 | kvm_clear_exception_queue(&svm->vcpu); | ||
2444 | kvm_clear_interrupt_queue(&svm->vcpu); | ||
2445 | |||
2446 | if (!(exitintinfo & SVM_EXITINTINFO_VALID)) | ||
2447 | return; | ||
2448 | |||
2449 | vector = exitintinfo & SVM_EXITINTINFO_VEC_MASK; | ||
2450 | type = exitintinfo & SVM_EXITINTINFO_TYPE_MASK; | ||
2451 | |||
2452 | switch (type) { | ||
2453 | case SVM_EXITINTINFO_TYPE_NMI: | ||
2454 | svm->vcpu.arch.nmi_injected = true; | ||
2455 | break; | ||
2456 | case SVM_EXITINTINFO_TYPE_EXEPT: | ||
2457 | /* In case of software exception do not reinject an exception | ||
2458 | vector, but re-execute and instruction instead */ | ||
2459 | if (kvm_exception_is_soft(vector)) | ||
2460 | break; | ||
2461 | if (exitintinfo & SVM_EXITINTINFO_VALID_ERR) { | ||
2462 | u32 err = svm->vmcb->control.exit_int_info_err; | ||
2463 | kvm_queue_exception_e(&svm->vcpu, vector, err); | ||
2464 | |||
2465 | } else | ||
2466 | kvm_queue_exception(&svm->vcpu, vector); | ||
2467 | break; | ||
2468 | case SVM_EXITINTINFO_TYPE_INTR: | ||
2469 | kvm_queue_interrupt(&svm->vcpu, vector, false); | ||
2470 | break; | ||
2471 | default: | ||
2472 | break; | ||
2473 | } | ||
2474 | } | ||
2475 | |||
2427 | #ifdef CONFIG_X86_64 | 2476 | #ifdef CONFIG_X86_64 |
2428 | #define R "r" | 2477 | #define R "r" |
2429 | #else | 2478 | #else |
@@ -2552,6 +2601,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2552 | sync_cr8_to_lapic(vcpu); | 2601 | sync_cr8_to_lapic(vcpu); |
2553 | 2602 | ||
2554 | svm->next_rip = 0; | 2603 | svm->next_rip = 0; |
2604 | |||
2605 | svm_complete_interrupts(svm); | ||
2555 | } | 2606 | } |
2556 | 2607 | ||
2557 | #undef R | 2608 | #undef R |
@@ -2617,7 +2668,7 @@ static int get_npt_level(void) | |||
2617 | #endif | 2668 | #endif |
2618 | } | 2669 | } |
2619 | 2670 | ||
2620 | static int svm_get_mt_mask_shift(void) | 2671 | static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
2621 | { | 2672 | { |
2622 | return 0; | 2673 | return 0; |
2623 | } | 2674 | } |
@@ -2667,17 +2718,21 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
2667 | .run = svm_vcpu_run, | 2718 | .run = svm_vcpu_run, |
2668 | .handle_exit = handle_exit, | 2719 | .handle_exit = handle_exit, |
2669 | .skip_emulated_instruction = skip_emulated_instruction, | 2720 | .skip_emulated_instruction = skip_emulated_instruction, |
2721 | .set_interrupt_shadow = svm_set_interrupt_shadow, | ||
2722 | .get_interrupt_shadow = svm_get_interrupt_shadow, | ||
2670 | .patch_hypercall = svm_patch_hypercall, | 2723 | .patch_hypercall = svm_patch_hypercall, |
2671 | .get_irq = svm_get_irq, | ||
2672 | .set_irq = svm_set_irq, | 2724 | .set_irq = svm_set_irq, |
2725 | .set_nmi = svm_inject_nmi, | ||
2673 | .queue_exception = svm_queue_exception, | 2726 | .queue_exception = svm_queue_exception, |
2674 | .exception_injected = svm_exception_injected, | 2727 | .interrupt_allowed = svm_interrupt_allowed, |
2675 | .inject_pending_irq = svm_intr_assist, | 2728 | .nmi_allowed = svm_nmi_allowed, |
2676 | .inject_pending_vectors = do_interrupt_requests, | 2729 | .enable_nmi_window = enable_nmi_window, |
2730 | .enable_irq_window = enable_irq_window, | ||
2731 | .update_cr8_intercept = update_cr8_intercept, | ||
2677 | 2732 | ||
2678 | .set_tss_addr = svm_set_tss_addr, | 2733 | .set_tss_addr = svm_set_tss_addr, |
2679 | .get_tdp_level = get_npt_level, | 2734 | .get_tdp_level = get_npt_level, |
2680 | .get_mt_mask_shift = svm_get_mt_mask_shift, | 2735 | .get_mt_mask = svm_get_mt_mask, |
2681 | }; | 2736 | }; |
2682 | 2737 | ||
2683 | static int __init svm_init(void) | 2738 | static int __init svm_init(void) |
diff --git a/arch/x86/kvm/timer.c b/arch/x86/kvm/timer.c new file mode 100644 index 000000000000..86dbac072d0c --- /dev/null +++ b/arch/x86/kvm/timer.c | |||
@@ -0,0 +1,46 @@ | |||
1 | #include <linux/kvm_host.h> | ||
2 | #include <linux/kvm.h> | ||
3 | #include <linux/hrtimer.h> | ||
4 | #include <asm/atomic.h> | ||
5 | #include "kvm_timer.h" | ||
6 | |||
7 | static int __kvm_timer_fn(struct kvm_vcpu *vcpu, struct kvm_timer *ktimer) | ||
8 | { | ||
9 | int restart_timer = 0; | ||
10 | wait_queue_head_t *q = &vcpu->wq; | ||
11 | |||
12 | /* FIXME: this code should not know anything about vcpus */ | ||
13 | if (!atomic_inc_and_test(&ktimer->pending)) | ||
14 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | ||
15 | |||
16 | if (!ktimer->reinject) | ||
17 | atomic_set(&ktimer->pending, 1); | ||
18 | |||
19 | if (waitqueue_active(q)) | ||
20 | wake_up_interruptible(q); | ||
21 | |||
22 | if (ktimer->t_ops->is_periodic(ktimer)) { | ||
23 | hrtimer_add_expires_ns(&ktimer->timer, ktimer->period); | ||
24 | restart_timer = 1; | ||
25 | } | ||
26 | |||
27 | return restart_timer; | ||
28 | } | ||
29 | |||
30 | enum hrtimer_restart kvm_timer_fn(struct hrtimer *data) | ||
31 | { | ||
32 | int restart_timer; | ||
33 | struct kvm_vcpu *vcpu; | ||
34 | struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); | ||
35 | |||
36 | vcpu = ktimer->kvm->vcpus[ktimer->vcpu_id]; | ||
37 | if (!vcpu) | ||
38 | return HRTIMER_NORESTART; | ||
39 | |||
40 | restart_timer = __kvm_timer_fn(vcpu, ktimer); | ||
41 | if (restart_timer) | ||
42 | return HRTIMER_RESTART; | ||
43 | else | ||
44 | return HRTIMER_NORESTART; | ||
45 | } | ||
46 | |||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bb481330716f..e770bf349ec4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -32,26 +32,27 @@ | |||
32 | #include <asm/desc.h> | 32 | #include <asm/desc.h> |
33 | #include <asm/vmx.h> | 33 | #include <asm/vmx.h> |
34 | #include <asm/virtext.h> | 34 | #include <asm/virtext.h> |
35 | #include <asm/mce.h> | ||
35 | 36 | ||
36 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | 37 | #define __ex(x) __kvm_handle_fault_on_reboot(x) |
37 | 38 | ||
38 | MODULE_AUTHOR("Qumranet"); | 39 | MODULE_AUTHOR("Qumranet"); |
39 | MODULE_LICENSE("GPL"); | 40 | MODULE_LICENSE("GPL"); |
40 | 41 | ||
41 | static int bypass_guest_pf = 1; | 42 | static int __read_mostly bypass_guest_pf = 1; |
42 | module_param(bypass_guest_pf, bool, 0); | 43 | module_param(bypass_guest_pf, bool, S_IRUGO); |
43 | 44 | ||
44 | static int enable_vpid = 1; | 45 | static int __read_mostly enable_vpid = 1; |
45 | module_param(enable_vpid, bool, 0); | 46 | module_param_named(vpid, enable_vpid, bool, 0444); |
46 | 47 | ||
47 | static int flexpriority_enabled = 1; | 48 | static int __read_mostly flexpriority_enabled = 1; |
48 | module_param(flexpriority_enabled, bool, 0); | 49 | module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO); |
49 | 50 | ||
50 | static int enable_ept = 1; | 51 | static int __read_mostly enable_ept = 1; |
51 | module_param(enable_ept, bool, 0); | 52 | module_param_named(ept, enable_ept, bool, S_IRUGO); |
52 | 53 | ||
53 | static int emulate_invalid_guest_state = 0; | 54 | static int __read_mostly emulate_invalid_guest_state = 0; |
54 | module_param(emulate_invalid_guest_state, bool, 0); | 55 | module_param(emulate_invalid_guest_state, bool, S_IRUGO); |
55 | 56 | ||
56 | struct vmcs { | 57 | struct vmcs { |
57 | u32 revision_id; | 58 | u32 revision_id; |
@@ -97,6 +98,7 @@ struct vcpu_vmx { | |||
97 | int soft_vnmi_blocked; | 98 | int soft_vnmi_blocked; |
98 | ktime_t entry_time; | 99 | ktime_t entry_time; |
99 | s64 vnmi_blocked_time; | 100 | s64 vnmi_blocked_time; |
101 | u32 exit_reason; | ||
100 | }; | 102 | }; |
101 | 103 | ||
102 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | 104 | static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) |
@@ -111,9 +113,10 @@ static DEFINE_PER_CPU(struct vmcs *, vmxarea); | |||
111 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 113 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
112 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); | 114 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); |
113 | 115 | ||
114 | static struct page *vmx_io_bitmap_a; | 116 | static unsigned long *vmx_io_bitmap_a; |
115 | static struct page *vmx_io_bitmap_b; | 117 | static unsigned long *vmx_io_bitmap_b; |
116 | static struct page *vmx_msr_bitmap; | 118 | static unsigned long *vmx_msr_bitmap_legacy; |
119 | static unsigned long *vmx_msr_bitmap_longmode; | ||
117 | 120 | ||
118 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); | 121 | static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS); |
119 | static DEFINE_SPINLOCK(vmx_vpid_lock); | 122 | static DEFINE_SPINLOCK(vmx_vpid_lock); |
@@ -213,70 +216,78 @@ static inline int is_external_interrupt(u32 intr_info) | |||
213 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 216 | == (INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
214 | } | 217 | } |
215 | 218 | ||
219 | static inline int is_machine_check(u32 intr_info) | ||
220 | { | ||
221 | return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK | | ||
222 | INTR_INFO_VALID_MASK)) == | ||
223 | (INTR_TYPE_HARD_EXCEPTION | MC_VECTOR | INTR_INFO_VALID_MASK); | ||
224 | } | ||
225 | |||
216 | static inline int cpu_has_vmx_msr_bitmap(void) | 226 | static inline int cpu_has_vmx_msr_bitmap(void) |
217 | { | 227 | { |
218 | return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS); | 228 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_USE_MSR_BITMAPS; |
219 | } | 229 | } |
220 | 230 | ||
221 | static inline int cpu_has_vmx_tpr_shadow(void) | 231 | static inline int cpu_has_vmx_tpr_shadow(void) |
222 | { | 232 | { |
223 | return (vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW); | 233 | return vmcs_config.cpu_based_exec_ctrl & CPU_BASED_TPR_SHADOW; |
224 | } | 234 | } |
225 | 235 | ||
226 | static inline int vm_need_tpr_shadow(struct kvm *kvm) | 236 | static inline int vm_need_tpr_shadow(struct kvm *kvm) |
227 | { | 237 | { |
228 | return ((cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm))); | 238 | return (cpu_has_vmx_tpr_shadow()) && (irqchip_in_kernel(kvm)); |
229 | } | 239 | } |
230 | 240 | ||
231 | static inline int cpu_has_secondary_exec_ctrls(void) | 241 | static inline int cpu_has_secondary_exec_ctrls(void) |
232 | { | 242 | { |
233 | return (vmcs_config.cpu_based_exec_ctrl & | 243 | return vmcs_config.cpu_based_exec_ctrl & |
234 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS); | 244 | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; |
235 | } | 245 | } |
236 | 246 | ||
237 | static inline bool cpu_has_vmx_virtualize_apic_accesses(void) | 247 | static inline bool cpu_has_vmx_virtualize_apic_accesses(void) |
238 | { | 248 | { |
239 | return flexpriority_enabled | 249 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
240 | && (vmcs_config.cpu_based_2nd_exec_ctrl & | 250 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
241 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); | 251 | } |
252 | |||
253 | static inline bool cpu_has_vmx_flexpriority(void) | ||
254 | { | ||
255 | return cpu_has_vmx_tpr_shadow() && | ||
256 | cpu_has_vmx_virtualize_apic_accesses(); | ||
242 | } | 257 | } |
243 | 258 | ||
244 | static inline int cpu_has_vmx_invept_individual_addr(void) | 259 | static inline int cpu_has_vmx_invept_individual_addr(void) |
245 | { | 260 | { |
246 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT)); | 261 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT); |
247 | } | 262 | } |
248 | 263 | ||
249 | static inline int cpu_has_vmx_invept_context(void) | 264 | static inline int cpu_has_vmx_invept_context(void) |
250 | { | 265 | { |
251 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT)); | 266 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT); |
252 | } | 267 | } |
253 | 268 | ||
254 | static inline int cpu_has_vmx_invept_global(void) | 269 | static inline int cpu_has_vmx_invept_global(void) |
255 | { | 270 | { |
256 | return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT)); | 271 | return !!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT); |
257 | } | 272 | } |
258 | 273 | ||
259 | static inline int cpu_has_vmx_ept(void) | 274 | static inline int cpu_has_vmx_ept(void) |
260 | { | 275 | { |
261 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | 276 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
262 | SECONDARY_EXEC_ENABLE_EPT); | 277 | SECONDARY_EXEC_ENABLE_EPT; |
263 | } | ||
264 | |||
265 | static inline int vm_need_ept(void) | ||
266 | { | ||
267 | return (cpu_has_vmx_ept() && enable_ept); | ||
268 | } | 278 | } |
269 | 279 | ||
270 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) | 280 | static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm) |
271 | { | 281 | { |
272 | return ((cpu_has_vmx_virtualize_apic_accesses()) && | 282 | return flexpriority_enabled && |
273 | (irqchip_in_kernel(kvm))); | 283 | (cpu_has_vmx_virtualize_apic_accesses()) && |
284 | (irqchip_in_kernel(kvm)); | ||
274 | } | 285 | } |
275 | 286 | ||
276 | static inline int cpu_has_vmx_vpid(void) | 287 | static inline int cpu_has_vmx_vpid(void) |
277 | { | 288 | { |
278 | return (vmcs_config.cpu_based_2nd_exec_ctrl & | 289 | return vmcs_config.cpu_based_2nd_exec_ctrl & |
279 | SECONDARY_EXEC_ENABLE_VPID); | 290 | SECONDARY_EXEC_ENABLE_VPID; |
280 | } | 291 | } |
281 | 292 | ||
282 | static inline int cpu_has_virtual_nmis(void) | 293 | static inline int cpu_has_virtual_nmis(void) |
@@ -284,6 +295,11 @@ static inline int cpu_has_virtual_nmis(void) | |||
284 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | 295 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; |
285 | } | 296 | } |
286 | 297 | ||
298 | static inline bool report_flexpriority(void) | ||
299 | { | ||
300 | return flexpriority_enabled; | ||
301 | } | ||
302 | |||
287 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | 303 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) |
288 | { | 304 | { |
289 | int i; | 305 | int i; |
@@ -381,7 +397,7 @@ static inline void ept_sync_global(void) | |||
381 | 397 | ||
382 | static inline void ept_sync_context(u64 eptp) | 398 | static inline void ept_sync_context(u64 eptp) |
383 | { | 399 | { |
384 | if (vm_need_ept()) { | 400 | if (enable_ept) { |
385 | if (cpu_has_vmx_invept_context()) | 401 | if (cpu_has_vmx_invept_context()) |
386 | __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); | 402 | __invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0); |
387 | else | 403 | else |
@@ -391,7 +407,7 @@ static inline void ept_sync_context(u64 eptp) | |||
391 | 407 | ||
392 | static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) | 408 | static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa) |
393 | { | 409 | { |
394 | if (vm_need_ept()) { | 410 | if (enable_ept) { |
395 | if (cpu_has_vmx_invept_individual_addr()) | 411 | if (cpu_has_vmx_invept_individual_addr()) |
396 | __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, | 412 | __invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR, |
397 | eptp, gpa); | 413 | eptp, gpa); |
@@ -478,7 +494,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
478 | { | 494 | { |
479 | u32 eb; | 495 | u32 eb; |
480 | 496 | ||
481 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR); | 497 | eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR); |
482 | if (!vcpu->fpu_active) | 498 | if (!vcpu->fpu_active) |
483 | eb |= 1u << NM_VECTOR; | 499 | eb |= 1u << NM_VECTOR; |
484 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { | 500 | if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) { |
@@ -488,9 +504,9 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
488 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) | 504 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) |
489 | eb |= 1u << BP_VECTOR; | 505 | eb |= 1u << BP_VECTOR; |
490 | } | 506 | } |
491 | if (vcpu->arch.rmode.active) | 507 | if (vcpu->arch.rmode.vm86_active) |
492 | eb = ~0; | 508 | eb = ~0; |
493 | if (vm_need_ept()) | 509 | if (enable_ept) |
494 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ | 510 | eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ |
495 | vmcs_write32(EXCEPTION_BITMAP, eb); | 511 | vmcs_write32(EXCEPTION_BITMAP, eb); |
496 | } | 512 | } |
@@ -724,29 +740,50 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | |||
724 | 740 | ||
725 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) | 741 | static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) |
726 | { | 742 | { |
727 | if (vcpu->arch.rmode.active) | 743 | if (vcpu->arch.rmode.vm86_active) |
728 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; | 744 | rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; |
729 | vmcs_writel(GUEST_RFLAGS, rflags); | 745 | vmcs_writel(GUEST_RFLAGS, rflags); |
730 | } | 746 | } |
731 | 747 | ||
748 | static u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | ||
749 | { | ||
750 | u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
751 | int ret = 0; | ||
752 | |||
753 | if (interruptibility & GUEST_INTR_STATE_STI) | ||
754 | ret |= X86_SHADOW_INT_STI; | ||
755 | if (interruptibility & GUEST_INTR_STATE_MOV_SS) | ||
756 | ret |= X86_SHADOW_INT_MOV_SS; | ||
757 | |||
758 | return ret & mask; | ||
759 | } | ||
760 | |||
761 | static void vmx_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) | ||
762 | { | ||
763 | u32 interruptibility_old = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
764 | u32 interruptibility = interruptibility_old; | ||
765 | |||
766 | interruptibility &= ~(GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS); | ||
767 | |||
768 | if (mask & X86_SHADOW_INT_MOV_SS) | ||
769 | interruptibility |= GUEST_INTR_STATE_MOV_SS; | ||
770 | if (mask & X86_SHADOW_INT_STI) | ||
771 | interruptibility |= GUEST_INTR_STATE_STI; | ||
772 | |||
773 | if ((interruptibility != interruptibility_old)) | ||
774 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, interruptibility); | ||
775 | } | ||
776 | |||
732 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) | 777 | static void skip_emulated_instruction(struct kvm_vcpu *vcpu) |
733 | { | 778 | { |
734 | unsigned long rip; | 779 | unsigned long rip; |
735 | u32 interruptibility; | ||
736 | 780 | ||
737 | rip = kvm_rip_read(vcpu); | 781 | rip = kvm_rip_read(vcpu); |
738 | rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); | 782 | rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
739 | kvm_rip_write(vcpu, rip); | 783 | kvm_rip_write(vcpu, rip); |
740 | 784 | ||
741 | /* | 785 | /* skipping an emulated instruction also counts */ |
742 | * We emulated an instruction, so temporary interrupt blocking | 786 | vmx_set_interrupt_shadow(vcpu, 0); |
743 | * should be removed, if set. | ||
744 | */ | ||
745 | interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
746 | if (interruptibility & 3) | ||
747 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | ||
748 | interruptibility & ~3); | ||
749 | vcpu->arch.interrupt_window_open = 1; | ||
750 | } | 787 | } |
751 | 788 | ||
752 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | 789 | static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, |
@@ -760,7 +797,7 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
760 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; | 797 | intr_info |= INTR_INFO_DELIVER_CODE_MASK; |
761 | } | 798 | } |
762 | 799 | ||
763 | if (vcpu->arch.rmode.active) { | 800 | if (vcpu->arch.rmode.vm86_active) { |
764 | vmx->rmode.irq.pending = true; | 801 | vmx->rmode.irq.pending = true; |
765 | vmx->rmode.irq.vector = nr; | 802 | vmx->rmode.irq.vector = nr; |
766 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 803 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
@@ -773,8 +810,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
773 | return; | 810 | return; |
774 | } | 811 | } |
775 | 812 | ||
776 | if (nr == BP_VECTOR || nr == OF_VECTOR) { | 813 | if (kvm_exception_is_soft(nr)) { |
777 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1); | 814 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, |
815 | vmx->vcpu.arch.event_exit_inst_len); | ||
778 | intr_info |= INTR_TYPE_SOFT_EXCEPTION; | 816 | intr_info |= INTR_TYPE_SOFT_EXCEPTION; |
779 | } else | 817 | } else |
780 | intr_info |= INTR_TYPE_HARD_EXCEPTION; | 818 | intr_info |= INTR_TYPE_HARD_EXCEPTION; |
@@ -782,11 +820,6 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, | |||
782 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); | 820 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); |
783 | } | 821 | } |
784 | 822 | ||
785 | static bool vmx_exception_injected(struct kvm_vcpu *vcpu) | ||
786 | { | ||
787 | return false; | ||
788 | } | ||
789 | |||
790 | /* | 823 | /* |
791 | * Swap MSR entry in host/guest MSR entry array. | 824 | * Swap MSR entry in host/guest MSR entry array. |
792 | */ | 825 | */ |
@@ -812,6 +845,7 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to) | |||
812 | static void setup_msrs(struct vcpu_vmx *vmx) | 845 | static void setup_msrs(struct vcpu_vmx *vmx) |
813 | { | 846 | { |
814 | int save_nmsrs; | 847 | int save_nmsrs; |
848 | unsigned long *msr_bitmap; | ||
815 | 849 | ||
816 | vmx_load_host_state(vmx); | 850 | vmx_load_host_state(vmx); |
817 | save_nmsrs = 0; | 851 | save_nmsrs = 0; |
@@ -847,6 +881,15 @@ static void setup_msrs(struct vcpu_vmx *vmx) | |||
847 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); | 881 | __find_msr_index(vmx, MSR_KERNEL_GS_BASE); |
848 | #endif | 882 | #endif |
849 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); | 883 | vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER); |
884 | |||
885 | if (cpu_has_vmx_msr_bitmap()) { | ||
886 | if (is_long_mode(&vmx->vcpu)) | ||
887 | msr_bitmap = vmx_msr_bitmap_longmode; | ||
888 | else | ||
889 | msr_bitmap = vmx_msr_bitmap_legacy; | ||
890 | |||
891 | vmcs_write64(MSR_BITMAP, __pa(msr_bitmap)); | ||
892 | } | ||
850 | } | 893 | } |
851 | 894 | ||
852 | /* | 895 | /* |
@@ -1034,13 +1077,6 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) | |||
1034 | return 0; | 1077 | return 0; |
1035 | } | 1078 | } |
1036 | 1079 | ||
1037 | static int vmx_get_irq(struct kvm_vcpu *vcpu) | ||
1038 | { | ||
1039 | if (!vcpu->arch.interrupt.pending) | ||
1040 | return -1; | ||
1041 | return vcpu->arch.interrupt.nr; | ||
1042 | } | ||
1043 | |||
1044 | static __init int cpu_has_kvm_support(void) | 1080 | static __init int cpu_has_kvm_support(void) |
1045 | { | 1081 | { |
1046 | return cpu_has_vmx(); | 1082 | return cpu_has_vmx(); |
@@ -1241,7 +1277,7 @@ static struct vmcs *alloc_vmcs_cpu(int cpu) | |||
1241 | struct page *pages; | 1277 | struct page *pages; |
1242 | struct vmcs *vmcs; | 1278 | struct vmcs *vmcs; |
1243 | 1279 | ||
1244 | pages = alloc_pages_node(node, GFP_KERNEL, vmcs_config.order); | 1280 | pages = alloc_pages_exact_node(node, GFP_KERNEL, vmcs_config.order); |
1245 | if (!pages) | 1281 | if (!pages) |
1246 | return NULL; | 1282 | return NULL; |
1247 | vmcs = page_address(pages); | 1283 | vmcs = page_address(pages); |
@@ -1294,6 +1330,18 @@ static __init int hardware_setup(void) | |||
1294 | if (boot_cpu_has(X86_FEATURE_NX)) | 1330 | if (boot_cpu_has(X86_FEATURE_NX)) |
1295 | kvm_enable_efer_bits(EFER_NX); | 1331 | kvm_enable_efer_bits(EFER_NX); |
1296 | 1332 | ||
1333 | if (!cpu_has_vmx_vpid()) | ||
1334 | enable_vpid = 0; | ||
1335 | |||
1336 | if (!cpu_has_vmx_ept()) | ||
1337 | enable_ept = 0; | ||
1338 | |||
1339 | if (!cpu_has_vmx_flexpriority()) | ||
1340 | flexpriority_enabled = 0; | ||
1341 | |||
1342 | if (!cpu_has_vmx_tpr_shadow()) | ||
1343 | kvm_x86_ops->update_cr8_intercept = NULL; | ||
1344 | |||
1297 | return alloc_kvm_area(); | 1345 | return alloc_kvm_area(); |
1298 | } | 1346 | } |
1299 | 1347 | ||
@@ -1324,7 +1372,7 @@ static void enter_pmode(struct kvm_vcpu *vcpu) | |||
1324 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1372 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1325 | 1373 | ||
1326 | vmx->emulation_required = 1; | 1374 | vmx->emulation_required = 1; |
1327 | vcpu->arch.rmode.active = 0; | 1375 | vcpu->arch.rmode.vm86_active = 0; |
1328 | 1376 | ||
1329 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); | 1377 | vmcs_writel(GUEST_TR_BASE, vcpu->arch.rmode.tr.base); |
1330 | vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit); | 1378 | vmcs_write32(GUEST_TR_LIMIT, vcpu->arch.rmode.tr.limit); |
@@ -1386,7 +1434,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu) | |||
1386 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 1434 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
1387 | 1435 | ||
1388 | vmx->emulation_required = 1; | 1436 | vmx->emulation_required = 1; |
1389 | vcpu->arch.rmode.active = 1; | 1437 | vcpu->arch.rmode.vm86_active = 1; |
1390 | 1438 | ||
1391 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); | 1439 | vcpu->arch.rmode.tr.base = vmcs_readl(GUEST_TR_BASE); |
1392 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); | 1440 | vmcs_writel(GUEST_TR_BASE, rmode_tss_base(vcpu->kvm)); |
@@ -1485,7 +1533,7 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
1485 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 1533 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) |
1486 | { | 1534 | { |
1487 | vpid_sync_vcpu_all(to_vmx(vcpu)); | 1535 | vpid_sync_vcpu_all(to_vmx(vcpu)); |
1488 | if (vm_need_ept()) | 1536 | if (enable_ept) |
1489 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | 1537 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); |
1490 | } | 1538 | } |
1491 | 1539 | ||
@@ -1555,10 +1603,10 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1555 | 1603 | ||
1556 | vmx_fpu_deactivate(vcpu); | 1604 | vmx_fpu_deactivate(vcpu); |
1557 | 1605 | ||
1558 | if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE)) | 1606 | if (vcpu->arch.rmode.vm86_active && (cr0 & X86_CR0_PE)) |
1559 | enter_pmode(vcpu); | 1607 | enter_pmode(vcpu); |
1560 | 1608 | ||
1561 | if (!vcpu->arch.rmode.active && !(cr0 & X86_CR0_PE)) | 1609 | if (!vcpu->arch.rmode.vm86_active && !(cr0 & X86_CR0_PE)) |
1562 | enter_rmode(vcpu); | 1610 | enter_rmode(vcpu); |
1563 | 1611 | ||
1564 | #ifdef CONFIG_X86_64 | 1612 | #ifdef CONFIG_X86_64 |
@@ -1570,7 +1618,7 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
1570 | } | 1618 | } |
1571 | #endif | 1619 | #endif |
1572 | 1620 | ||
1573 | if (vm_need_ept()) | 1621 | if (enable_ept) |
1574 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); | 1622 | ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); |
1575 | 1623 | ||
1576 | vmcs_writel(CR0_READ_SHADOW, cr0); | 1624 | vmcs_writel(CR0_READ_SHADOW, cr0); |
@@ -1599,7 +1647,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1599 | u64 eptp; | 1647 | u64 eptp; |
1600 | 1648 | ||
1601 | guest_cr3 = cr3; | 1649 | guest_cr3 = cr3; |
1602 | if (vm_need_ept()) { | 1650 | if (enable_ept) { |
1603 | eptp = construct_eptp(cr3); | 1651 | eptp = construct_eptp(cr3); |
1604 | vmcs_write64(EPT_POINTER, eptp); | 1652 | vmcs_write64(EPT_POINTER, eptp); |
1605 | ept_sync_context(eptp); | 1653 | ept_sync_context(eptp); |
@@ -1616,11 +1664,11 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
1616 | 1664 | ||
1617 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 1665 | static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
1618 | { | 1666 | { |
1619 | unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ? | 1667 | unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.vm86_active ? |
1620 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); | 1668 | KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON); |
1621 | 1669 | ||
1622 | vcpu->arch.cr4 = cr4; | 1670 | vcpu->arch.cr4 = cr4; |
1623 | if (vm_need_ept()) | 1671 | if (enable_ept) |
1624 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); | 1672 | ept_update_paging_mode_cr4(&hw_cr4, vcpu); |
1625 | 1673 | ||
1626 | vmcs_writel(CR4_READ_SHADOW, cr4); | 1674 | vmcs_writel(CR4_READ_SHADOW, cr4); |
@@ -1699,7 +1747,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
1699 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; | 1747 | struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg]; |
1700 | u32 ar; | 1748 | u32 ar; |
1701 | 1749 | ||
1702 | if (vcpu->arch.rmode.active && seg == VCPU_SREG_TR) { | 1750 | if (vcpu->arch.rmode.vm86_active && seg == VCPU_SREG_TR) { |
1703 | vcpu->arch.rmode.tr.selector = var->selector; | 1751 | vcpu->arch.rmode.tr.selector = var->selector; |
1704 | vcpu->arch.rmode.tr.base = var->base; | 1752 | vcpu->arch.rmode.tr.base = var->base; |
1705 | vcpu->arch.rmode.tr.limit = var->limit; | 1753 | vcpu->arch.rmode.tr.limit = var->limit; |
@@ -1709,7 +1757,7 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu, | |||
1709 | vmcs_writel(sf->base, var->base); | 1757 | vmcs_writel(sf->base, var->base); |
1710 | vmcs_write32(sf->limit, var->limit); | 1758 | vmcs_write32(sf->limit, var->limit); |
1711 | vmcs_write16(sf->selector, var->selector); | 1759 | vmcs_write16(sf->selector, var->selector); |
1712 | if (vcpu->arch.rmode.active && var->s) { | 1760 | if (vcpu->arch.rmode.vm86_active && var->s) { |
1713 | /* | 1761 | /* |
1714 | * Hack real-mode segments into vm86 compatibility. | 1762 | * Hack real-mode segments into vm86 compatibility. |
1715 | */ | 1763 | */ |
@@ -1982,7 +2030,7 @@ static int init_rmode_identity_map(struct kvm *kvm) | |||
1982 | pfn_t identity_map_pfn; | 2030 | pfn_t identity_map_pfn; |
1983 | u32 tmp; | 2031 | u32 tmp; |
1984 | 2032 | ||
1985 | if (!vm_need_ept()) | 2033 | if (!enable_ept) |
1986 | return 1; | 2034 | return 1; |
1987 | if (unlikely(!kvm->arch.ept_identity_pagetable)) { | 2035 | if (unlikely(!kvm->arch.ept_identity_pagetable)) { |
1988 | printk(KERN_ERR "EPT: identity-mapping pagetable " | 2036 | printk(KERN_ERR "EPT: identity-mapping pagetable " |
@@ -2071,7 +2119,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
2071 | int vpid; | 2119 | int vpid; |
2072 | 2120 | ||
2073 | vmx->vpid = 0; | 2121 | vmx->vpid = 0; |
2074 | if (!enable_vpid || !cpu_has_vmx_vpid()) | 2122 | if (!enable_vpid) |
2075 | return; | 2123 | return; |
2076 | spin_lock(&vmx_vpid_lock); | 2124 | spin_lock(&vmx_vpid_lock); |
2077 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); | 2125 | vpid = find_first_zero_bit(vmx_vpid_bitmap, VMX_NR_VPIDS); |
@@ -2082,9 +2130,9 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
2082 | spin_unlock(&vmx_vpid_lock); | 2130 | spin_unlock(&vmx_vpid_lock); |
2083 | } | 2131 | } |
2084 | 2132 | ||
2085 | static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) | 2133 | static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr) |
2086 | { | 2134 | { |
2087 | void *va; | 2135 | int f = sizeof(unsigned long); |
2088 | 2136 | ||
2089 | if (!cpu_has_vmx_msr_bitmap()) | 2137 | if (!cpu_has_vmx_msr_bitmap()) |
2090 | return; | 2138 | return; |
@@ -2094,16 +2142,21 @@ static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) | |||
2094 | * have the write-low and read-high bitmap offsets the wrong way round. | 2142 | * have the write-low and read-high bitmap offsets the wrong way round. |
2095 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. | 2143 | * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff. |
2096 | */ | 2144 | */ |
2097 | va = kmap(msr_bitmap); | ||
2098 | if (msr <= 0x1fff) { | 2145 | if (msr <= 0x1fff) { |
2099 | __clear_bit(msr, va + 0x000); /* read-low */ | 2146 | __clear_bit(msr, msr_bitmap + 0x000 / f); /* read-low */ |
2100 | __clear_bit(msr, va + 0x800); /* write-low */ | 2147 | __clear_bit(msr, msr_bitmap + 0x800 / f); /* write-low */ |
2101 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { | 2148 | } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) { |
2102 | msr &= 0x1fff; | 2149 | msr &= 0x1fff; |
2103 | __clear_bit(msr, va + 0x400); /* read-high */ | 2150 | __clear_bit(msr, msr_bitmap + 0x400 / f); /* read-high */ |
2104 | __clear_bit(msr, va + 0xc00); /* write-high */ | 2151 | __clear_bit(msr, msr_bitmap + 0xc00 / f); /* write-high */ |
2105 | } | 2152 | } |
2106 | kunmap(msr_bitmap); | 2153 | } |
2154 | |||
2155 | static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only) | ||
2156 | { | ||
2157 | if (!longmode_only) | ||
2158 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy, msr); | ||
2159 | __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode, msr); | ||
2107 | } | 2160 | } |
2108 | 2161 | ||
2109 | /* | 2162 | /* |
@@ -2121,11 +2174,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2121 | u32 exec_control; | 2174 | u32 exec_control; |
2122 | 2175 | ||
2123 | /* I/O */ | 2176 | /* I/O */ |
2124 | vmcs_write64(IO_BITMAP_A, page_to_phys(vmx_io_bitmap_a)); | 2177 | vmcs_write64(IO_BITMAP_A, __pa(vmx_io_bitmap_a)); |
2125 | vmcs_write64(IO_BITMAP_B, page_to_phys(vmx_io_bitmap_b)); | 2178 | vmcs_write64(IO_BITMAP_B, __pa(vmx_io_bitmap_b)); |
2126 | 2179 | ||
2127 | if (cpu_has_vmx_msr_bitmap()) | 2180 | if (cpu_has_vmx_msr_bitmap()) |
2128 | vmcs_write64(MSR_BITMAP, page_to_phys(vmx_msr_bitmap)); | 2181 | vmcs_write64(MSR_BITMAP, __pa(vmx_msr_bitmap_legacy)); |
2129 | 2182 | ||
2130 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ | 2183 | vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */ |
2131 | 2184 | ||
@@ -2141,7 +2194,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2141 | CPU_BASED_CR8_LOAD_EXITING; | 2194 | CPU_BASED_CR8_LOAD_EXITING; |
2142 | #endif | 2195 | #endif |
2143 | } | 2196 | } |
2144 | if (!vm_need_ept()) | 2197 | if (!enable_ept) |
2145 | exec_control |= CPU_BASED_CR3_STORE_EXITING | | 2198 | exec_control |= CPU_BASED_CR3_STORE_EXITING | |
2146 | CPU_BASED_CR3_LOAD_EXITING | | 2199 | CPU_BASED_CR3_LOAD_EXITING | |
2147 | CPU_BASED_INVLPG_EXITING; | 2200 | CPU_BASED_INVLPG_EXITING; |
@@ -2154,7 +2207,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
2154 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | 2207 | ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; |
2155 | if (vmx->vpid == 0) | 2208 | if (vmx->vpid == 0) |
2156 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; | 2209 | exec_control &= ~SECONDARY_EXEC_ENABLE_VPID; |
2157 | if (!vm_need_ept()) | 2210 | if (!enable_ept) |
2158 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; | 2211 | exec_control &= ~SECONDARY_EXEC_ENABLE_EPT; |
2159 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 2212 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
2160 | } | 2213 | } |
@@ -2273,7 +2326,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
2273 | goto out; | 2326 | goto out; |
2274 | } | 2327 | } |
2275 | 2328 | ||
2276 | vmx->vcpu.arch.rmode.active = 0; | 2329 | vmx->vcpu.arch.rmode.vm86_active = 0; |
2277 | 2330 | ||
2278 | vmx->soft_vnmi_blocked = 0; | 2331 | vmx->soft_vnmi_blocked = 0; |
2279 | 2332 | ||
@@ -2402,14 +2455,16 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) | |||
2402 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2455 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
2403 | } | 2456 | } |
2404 | 2457 | ||
2405 | static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | 2458 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
2406 | { | 2459 | { |
2407 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2460 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2461 | uint32_t intr; | ||
2462 | int irq = vcpu->arch.interrupt.nr; | ||
2408 | 2463 | ||
2409 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); | 2464 | KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler); |
2410 | 2465 | ||
2411 | ++vcpu->stat.irq_injections; | 2466 | ++vcpu->stat.irq_injections; |
2412 | if (vcpu->arch.rmode.active) { | 2467 | if (vcpu->arch.rmode.vm86_active) { |
2413 | vmx->rmode.irq.pending = true; | 2468 | vmx->rmode.irq.pending = true; |
2414 | vmx->rmode.irq.vector = irq; | 2469 | vmx->rmode.irq.vector = irq; |
2415 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 2470 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
@@ -2419,8 +2474,14 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | |||
2419 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); | 2474 | kvm_rip_write(vcpu, vmx->rmode.irq.rip - 1); |
2420 | return; | 2475 | return; |
2421 | } | 2476 | } |
2422 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | 2477 | intr = irq | INTR_INFO_VALID_MASK; |
2423 | irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 2478 | if (vcpu->arch.interrupt.soft) { |
2479 | intr |= INTR_TYPE_SOFT_INTR; | ||
2480 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | ||
2481 | vmx->vcpu.arch.event_exit_inst_len); | ||
2482 | } else | ||
2483 | intr |= INTR_TYPE_EXT_INTR; | ||
2484 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); | ||
2424 | } | 2485 | } |
2425 | 2486 | ||
2426 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | 2487 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) |
@@ -2441,7 +2502,7 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2441 | } | 2502 | } |
2442 | 2503 | ||
2443 | ++vcpu->stat.nmi_injections; | 2504 | ++vcpu->stat.nmi_injections; |
2444 | if (vcpu->arch.rmode.active) { | 2505 | if (vcpu->arch.rmode.vm86_active) { |
2445 | vmx->rmode.irq.pending = true; | 2506 | vmx->rmode.irq.pending = true; |
2446 | vmx->rmode.irq.vector = NMI_VECTOR; | 2507 | vmx->rmode.irq.vector = NMI_VECTOR; |
2447 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); | 2508 | vmx->rmode.irq.rip = kvm_rip_read(vcpu); |
@@ -2456,76 +2517,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | |||
2456 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | 2517 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); |
2457 | } | 2518 | } |
2458 | 2519 | ||
2459 | static void vmx_update_window_states(struct kvm_vcpu *vcpu) | 2520 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
2460 | { | 2521 | { |
2461 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
2462 | |||
2463 | vcpu->arch.nmi_window_open = | ||
2464 | !(guest_intr & (GUEST_INTR_STATE_STI | | ||
2465 | GUEST_INTR_STATE_MOV_SS | | ||
2466 | GUEST_INTR_STATE_NMI)); | ||
2467 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | 2522 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) |
2468 | vcpu->arch.nmi_window_open = 0; | 2523 | return 0; |
2469 | |||
2470 | vcpu->arch.interrupt_window_open = | ||
2471 | ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
2472 | !(guest_intr & (GUEST_INTR_STATE_STI | | ||
2473 | GUEST_INTR_STATE_MOV_SS))); | ||
2474 | } | ||
2475 | |||
2476 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | ||
2477 | { | ||
2478 | int word_index = __ffs(vcpu->arch.irq_summary); | ||
2479 | int bit_index = __ffs(vcpu->arch.irq_pending[word_index]); | ||
2480 | int irq = word_index * BITS_PER_LONG + bit_index; | ||
2481 | 2524 | ||
2482 | clear_bit(bit_index, &vcpu->arch.irq_pending[word_index]); | 2525 | return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
2483 | if (!vcpu->arch.irq_pending[word_index]) | 2526 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS | |
2484 | clear_bit(word_index, &vcpu->arch.irq_summary); | 2527 | GUEST_INTR_STATE_NMI)); |
2485 | kvm_queue_interrupt(vcpu, irq); | ||
2486 | } | 2528 | } |
2487 | 2529 | ||
2488 | static void do_interrupt_requests(struct kvm_vcpu *vcpu, | 2530 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
2489 | struct kvm_run *kvm_run) | ||
2490 | { | 2531 | { |
2491 | vmx_update_window_states(vcpu); | 2532 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
2492 | 2533 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | |
2493 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | 2534 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); |
2494 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
2495 | GUEST_INTR_STATE_STI | | ||
2496 | GUEST_INTR_STATE_MOV_SS); | ||
2497 | |||
2498 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { | ||
2499 | if (vcpu->arch.interrupt.pending) { | ||
2500 | enable_nmi_window(vcpu); | ||
2501 | } else if (vcpu->arch.nmi_window_open) { | ||
2502 | vcpu->arch.nmi_pending = false; | ||
2503 | vcpu->arch.nmi_injected = true; | ||
2504 | } else { | ||
2505 | enable_nmi_window(vcpu); | ||
2506 | return; | ||
2507 | } | ||
2508 | } | ||
2509 | if (vcpu->arch.nmi_injected) { | ||
2510 | vmx_inject_nmi(vcpu); | ||
2511 | if (vcpu->arch.nmi_pending) | ||
2512 | enable_nmi_window(vcpu); | ||
2513 | else if (vcpu->arch.irq_summary | ||
2514 | || kvm_run->request_interrupt_window) | ||
2515 | enable_irq_window(vcpu); | ||
2516 | return; | ||
2517 | } | ||
2518 | |||
2519 | if (vcpu->arch.interrupt_window_open) { | ||
2520 | if (vcpu->arch.irq_summary && !vcpu->arch.interrupt.pending) | ||
2521 | kvm_do_inject_irq(vcpu); | ||
2522 | |||
2523 | if (vcpu->arch.interrupt.pending) | ||
2524 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | ||
2525 | } | ||
2526 | if (!vcpu->arch.interrupt_window_open && | ||
2527 | (vcpu->arch.irq_summary || kvm_run->request_interrupt_window)) | ||
2528 | enable_irq_window(vcpu); | ||
2529 | } | 2535 | } |
2530 | 2536 | ||
2531 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) | 2537 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) |
@@ -2585,6 +2591,31 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu, | |||
2585 | return 0; | 2591 | return 0; |
2586 | } | 2592 | } |
2587 | 2593 | ||
2594 | /* | ||
2595 | * Trigger machine check on the host. We assume all the MSRs are already set up | ||
2596 | * by the CPU and that we still run on the same CPU as the MCE occurred on. | ||
2597 | * We pass a fake environment to the machine check handler because we want | ||
2598 | * the guest to be always treated like user space, no matter what context | ||
2599 | * it used internally. | ||
2600 | */ | ||
2601 | static void kvm_machine_check(void) | ||
2602 | { | ||
2603 | #if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_64) | ||
2604 | struct pt_regs regs = { | ||
2605 | .cs = 3, /* Fake ring 3 no matter what the guest ran on */ | ||
2606 | .flags = X86_EFLAGS_IF, | ||
2607 | }; | ||
2608 | |||
2609 | do_machine_check(®s, 0); | ||
2610 | #endif | ||
2611 | } | ||
2612 | |||
2613 | static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2614 | { | ||
2615 | /* already handled by vcpu_run */ | ||
2616 | return 1; | ||
2617 | } | ||
2618 | |||
2588 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2619 | static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
2589 | { | 2620 | { |
2590 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2621 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
@@ -2596,17 +2627,14 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2596 | vect_info = vmx->idt_vectoring_info; | 2627 | vect_info = vmx->idt_vectoring_info; |
2597 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 2628 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
2598 | 2629 | ||
2630 | if (is_machine_check(intr_info)) | ||
2631 | return handle_machine_check(vcpu, kvm_run); | ||
2632 | |||
2599 | if ((vect_info & VECTORING_INFO_VALID_MASK) && | 2633 | if ((vect_info & VECTORING_INFO_VALID_MASK) && |
2600 | !is_page_fault(intr_info)) | 2634 | !is_page_fault(intr_info)) |
2601 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " | 2635 | printk(KERN_ERR "%s: unexpected, vectoring info 0x%x " |
2602 | "intr info 0x%x\n", __func__, vect_info, intr_info); | 2636 | "intr info 0x%x\n", __func__, vect_info, intr_info); |
2603 | 2637 | ||
2604 | if (!irqchip_in_kernel(vcpu->kvm) && is_external_interrupt(vect_info)) { | ||
2605 | int irq = vect_info & VECTORING_INFO_VECTOR_MASK; | ||
2606 | set_bit(irq, vcpu->arch.irq_pending); | ||
2607 | set_bit(irq / BITS_PER_LONG, &vcpu->arch.irq_summary); | ||
2608 | } | ||
2609 | |||
2610 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) | 2638 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR) |
2611 | return 1; /* already handled by vmx_vcpu_run() */ | 2639 | return 1; /* already handled by vmx_vcpu_run() */ |
2612 | 2640 | ||
@@ -2628,17 +2656,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2628 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); | 2656 | error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE); |
2629 | if (is_page_fault(intr_info)) { | 2657 | if (is_page_fault(intr_info)) { |
2630 | /* EPT won't cause page fault directly */ | 2658 | /* EPT won't cause page fault directly */ |
2631 | if (vm_need_ept()) | 2659 | if (enable_ept) |
2632 | BUG(); | 2660 | BUG(); |
2633 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2661 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
2634 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2662 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
2635 | (u32)((u64)cr2 >> 32), handler); | 2663 | (u32)((u64)cr2 >> 32), handler); |
2636 | if (vcpu->arch.interrupt.pending || vcpu->arch.exception.pending) | 2664 | if (kvm_event_needs_reinjection(vcpu)) |
2637 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | 2665 | kvm_mmu_unprotect_page_virt(vcpu, cr2); |
2638 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2666 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
2639 | } | 2667 | } |
2640 | 2668 | ||
2641 | if (vcpu->arch.rmode.active && | 2669 | if (vcpu->arch.rmode.vm86_active && |
2642 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, | 2670 | handle_rmode_exception(vcpu, intr_info & INTR_INFO_VECTOR_MASK, |
2643 | error_code)) { | 2671 | error_code)) { |
2644 | if (vcpu->arch.halt_request) { | 2672 | if (vcpu->arch.halt_request) { |
@@ -2753,13 +2781,18 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2753 | kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); | 2781 | kvm_set_cr4(vcpu, kvm_register_read(vcpu, reg)); |
2754 | skip_emulated_instruction(vcpu); | 2782 | skip_emulated_instruction(vcpu); |
2755 | return 1; | 2783 | return 1; |
2756 | case 8: | 2784 | case 8: { |
2757 | kvm_set_cr8(vcpu, kvm_register_read(vcpu, reg)); | 2785 | u8 cr8_prev = kvm_get_cr8(vcpu); |
2758 | skip_emulated_instruction(vcpu); | 2786 | u8 cr8 = kvm_register_read(vcpu, reg); |
2759 | if (irqchip_in_kernel(vcpu->kvm)) | 2787 | kvm_set_cr8(vcpu, cr8); |
2760 | return 1; | 2788 | skip_emulated_instruction(vcpu); |
2761 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 2789 | if (irqchip_in_kernel(vcpu->kvm)) |
2762 | return 0; | 2790 | return 1; |
2791 | if (cr8_prev <= cr8) | ||
2792 | return 1; | ||
2793 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | ||
2794 | return 0; | ||
2795 | } | ||
2763 | }; | 2796 | }; |
2764 | break; | 2797 | break; |
2765 | case 2: /* clts */ | 2798 | case 2: /* clts */ |
@@ -2957,8 +2990,9 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu, | |||
2957 | * If the user space waits to inject interrupts, exit as soon as | 2990 | * If the user space waits to inject interrupts, exit as soon as |
2958 | * possible | 2991 | * possible |
2959 | */ | 2992 | */ |
2960 | if (kvm_run->request_interrupt_window && | 2993 | if (!irqchip_in_kernel(vcpu->kvm) && |
2961 | !vcpu->arch.irq_summary) { | 2994 | kvm_run->request_interrupt_window && |
2995 | !kvm_cpu_has_interrupt(vcpu)) { | ||
2962 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; | 2996 | kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN; |
2963 | return 0; | 2997 | return 0; |
2964 | } | 2998 | } |
@@ -2980,7 +3014,7 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2980 | 3014 | ||
2981 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3015 | static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
2982 | { | 3016 | { |
2983 | u64 exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 3017 | unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
2984 | 3018 | ||
2985 | kvm_mmu_invlpg(vcpu, exit_qualification); | 3019 | kvm_mmu_invlpg(vcpu, exit_qualification); |
2986 | skip_emulated_instruction(vcpu); | 3020 | skip_emulated_instruction(vcpu); |
@@ -2996,11 +3030,11 @@ static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2996 | 3030 | ||
2997 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3031 | static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
2998 | { | 3032 | { |
2999 | u64 exit_qualification; | 3033 | unsigned long exit_qualification; |
3000 | enum emulation_result er; | 3034 | enum emulation_result er; |
3001 | unsigned long offset; | 3035 | unsigned long offset; |
3002 | 3036 | ||
3003 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 3037 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3004 | offset = exit_qualification & 0xffful; | 3038 | offset = exit_qualification & 0xffful; |
3005 | 3039 | ||
3006 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 3040 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); |
@@ -3019,22 +3053,41 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3019 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3053 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3020 | unsigned long exit_qualification; | 3054 | unsigned long exit_qualification; |
3021 | u16 tss_selector; | 3055 | u16 tss_selector; |
3022 | int reason; | 3056 | int reason, type, idt_v; |
3057 | |||
3058 | idt_v = (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK); | ||
3059 | type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); | ||
3023 | 3060 | ||
3024 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 3061 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3025 | 3062 | ||
3026 | reason = (u32)exit_qualification >> 30; | 3063 | reason = (u32)exit_qualification >> 30; |
3027 | if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected && | 3064 | if (reason == TASK_SWITCH_GATE && idt_v) { |
3028 | (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) && | 3065 | switch (type) { |
3029 | (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK) | 3066 | case INTR_TYPE_NMI_INTR: |
3030 | == INTR_TYPE_NMI_INTR) { | 3067 | vcpu->arch.nmi_injected = false; |
3031 | vcpu->arch.nmi_injected = false; | 3068 | if (cpu_has_virtual_nmis()) |
3032 | if (cpu_has_virtual_nmis()) | 3069 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
3033 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3070 | GUEST_INTR_STATE_NMI); |
3034 | GUEST_INTR_STATE_NMI); | 3071 | break; |
3072 | case INTR_TYPE_EXT_INTR: | ||
3073 | case INTR_TYPE_SOFT_INTR: | ||
3074 | kvm_clear_interrupt_queue(vcpu); | ||
3075 | break; | ||
3076 | case INTR_TYPE_HARD_EXCEPTION: | ||
3077 | case INTR_TYPE_SOFT_EXCEPTION: | ||
3078 | kvm_clear_exception_queue(vcpu); | ||
3079 | break; | ||
3080 | default: | ||
3081 | break; | ||
3082 | } | ||
3035 | } | 3083 | } |
3036 | tss_selector = exit_qualification; | 3084 | tss_selector = exit_qualification; |
3037 | 3085 | ||
3086 | if (!idt_v || (type != INTR_TYPE_HARD_EXCEPTION && | ||
3087 | type != INTR_TYPE_EXT_INTR && | ||
3088 | type != INTR_TYPE_NMI_INTR)) | ||
3089 | skip_emulated_instruction(vcpu); | ||
3090 | |||
3038 | if (!kvm_task_switch(vcpu, tss_selector, reason)) | 3091 | if (!kvm_task_switch(vcpu, tss_selector, reason)) |
3039 | return 0; | 3092 | return 0; |
3040 | 3093 | ||
@@ -3051,11 +3104,11 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3051 | 3104 | ||
3052 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3105 | static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
3053 | { | 3106 | { |
3054 | u64 exit_qualification; | 3107 | unsigned long exit_qualification; |
3055 | gpa_t gpa; | 3108 | gpa_t gpa; |
3056 | int gla_validity; | 3109 | int gla_validity; |
3057 | 3110 | ||
3058 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 3111 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
3059 | 3112 | ||
3060 | if (exit_qualification & (1 << 6)) { | 3113 | if (exit_qualification & (1 << 6)) { |
3061 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); | 3114 | printk(KERN_ERR "EPT: GPA exceeds GAW!\n"); |
@@ -3067,7 +3120,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3067 | printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); | 3120 | printk(KERN_ERR "EPT: Handling EPT violation failed!\n"); |
3068 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", | 3121 | printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n", |
3069 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), | 3122 | (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS), |
3070 | (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS)); | 3123 | vmcs_readl(GUEST_LINEAR_ADDRESS)); |
3071 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", | 3124 | printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n", |
3072 | (long unsigned int)exit_qualification); | 3125 | (long unsigned int)exit_qualification); |
3073 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; | 3126 | kvm_run->exit_reason = KVM_EXIT_UNKNOWN; |
@@ -3150,6 +3203,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
3150 | [EXIT_REASON_WBINVD] = handle_wbinvd, | 3203 | [EXIT_REASON_WBINVD] = handle_wbinvd, |
3151 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, | 3204 | [EXIT_REASON_TASK_SWITCH] = handle_task_switch, |
3152 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, | 3205 | [EXIT_REASON_EPT_VIOLATION] = handle_ept_violation, |
3206 | [EXIT_REASON_MCE_DURING_VMENTRY] = handle_machine_check, | ||
3153 | }; | 3207 | }; |
3154 | 3208 | ||
3155 | static const int kvm_vmx_max_exit_handlers = | 3209 | static const int kvm_vmx_max_exit_handlers = |
@@ -3159,10 +3213,10 @@ static const int kvm_vmx_max_exit_handlers = | |||
3159 | * The guest has exited. See if we can fix it or if we need userspace | 3213 | * The guest has exited. See if we can fix it or if we need userspace |
3160 | * assistance. | 3214 | * assistance. |
3161 | */ | 3215 | */ |
3162 | static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | 3216 | static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) |
3163 | { | 3217 | { |
3164 | u32 exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
3165 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3218 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3219 | u32 exit_reason = vmx->exit_reason; | ||
3166 | u32 vectoring_info = vmx->idt_vectoring_info; | 3220 | u32 vectoring_info = vmx->idt_vectoring_info; |
3167 | 3221 | ||
3168 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), | 3222 | KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)kvm_rip_read(vcpu), |
@@ -3178,7 +3232,7 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3178 | 3232 | ||
3179 | /* Access CR3 don't cause VMExit in paging mode, so we need | 3233 | /* Access CR3 don't cause VMExit in paging mode, so we need |
3180 | * to sync with guest real CR3. */ | 3234 | * to sync with guest real CR3. */ |
3181 | if (vm_need_ept() && is_paging(vcpu)) { | 3235 | if (enable_ept && is_paging(vcpu)) { |
3182 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); | 3236 | vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); |
3183 | ept_load_pdptrs(vcpu); | 3237 | ept_load_pdptrs(vcpu); |
3184 | } | 3238 | } |
@@ -3199,9 +3253,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3199 | __func__, vectoring_info, exit_reason); | 3253 | __func__, vectoring_info, exit_reason); |
3200 | 3254 | ||
3201 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { | 3255 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) { |
3202 | if (vcpu->arch.interrupt_window_open) { | 3256 | if (vmx_interrupt_allowed(vcpu)) { |
3203 | vmx->soft_vnmi_blocked = 0; | 3257 | vmx->soft_vnmi_blocked = 0; |
3204 | vcpu->arch.nmi_window_open = 1; | ||
3205 | } else if (vmx->vnmi_blocked_time > 1000000000LL && | 3258 | } else if (vmx->vnmi_blocked_time > 1000000000LL && |
3206 | vcpu->arch.nmi_pending) { | 3259 | vcpu->arch.nmi_pending) { |
3207 | /* | 3260 | /* |
@@ -3214,7 +3267,6 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3214 | "state on VCPU %d after 1 s timeout\n", | 3267 | "state on VCPU %d after 1 s timeout\n", |
3215 | __func__, vcpu->vcpu_id); | 3268 | __func__, vcpu->vcpu_id); |
3216 | vmx->soft_vnmi_blocked = 0; | 3269 | vmx->soft_vnmi_blocked = 0; |
3217 | vmx->vcpu.arch.nmi_window_open = 1; | ||
3218 | } | 3270 | } |
3219 | } | 3271 | } |
3220 | 3272 | ||
@@ -3228,122 +3280,107 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
3228 | return 0; | 3280 | return 0; |
3229 | } | 3281 | } |
3230 | 3282 | ||
3231 | static void update_tpr_threshold(struct kvm_vcpu *vcpu) | 3283 | static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) |
3232 | { | 3284 | { |
3233 | int max_irr, tpr; | 3285 | if (irr == -1 || tpr < irr) { |
3234 | |||
3235 | if (!vm_need_tpr_shadow(vcpu->kvm)) | ||
3236 | return; | ||
3237 | |||
3238 | if (!kvm_lapic_enabled(vcpu) || | ||
3239 | ((max_irr = kvm_lapic_find_highest_irr(vcpu)) == -1)) { | ||
3240 | vmcs_write32(TPR_THRESHOLD, 0); | 3286 | vmcs_write32(TPR_THRESHOLD, 0); |
3241 | return; | 3287 | return; |
3242 | } | 3288 | } |
3243 | 3289 | ||
3244 | tpr = (kvm_lapic_get_cr8(vcpu) & 0x0f) << 4; | 3290 | vmcs_write32(TPR_THRESHOLD, irr); |
3245 | vmcs_write32(TPR_THRESHOLD, (max_irr > tpr) ? tpr >> 4 : max_irr >> 4); | ||
3246 | } | 3291 | } |
3247 | 3292 | ||
3248 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) | 3293 | static void vmx_complete_interrupts(struct vcpu_vmx *vmx) |
3249 | { | 3294 | { |
3250 | u32 exit_intr_info; | 3295 | u32 exit_intr_info; |
3251 | u32 idt_vectoring_info; | 3296 | u32 idt_vectoring_info = vmx->idt_vectoring_info; |
3252 | bool unblock_nmi; | 3297 | bool unblock_nmi; |
3253 | u8 vector; | 3298 | u8 vector; |
3254 | int type; | 3299 | int type; |
3255 | bool idtv_info_valid; | 3300 | bool idtv_info_valid; |
3256 | u32 error; | ||
3257 | 3301 | ||
3258 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 3302 | exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
3303 | |||
3304 | vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); | ||
3305 | |||
3306 | /* Handle machine checks before interrupts are enabled */ | ||
3307 | if ((vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) | ||
3308 | || (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI | ||
3309 | && is_machine_check(exit_intr_info))) | ||
3310 | kvm_machine_check(); | ||
3311 | |||
3312 | /* We need to handle NMIs before interrupts are enabled */ | ||
3313 | if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | ||
3314 | (exit_intr_info & INTR_INFO_VALID_MASK)) { | ||
3315 | KVMTRACE_0D(NMI, &vmx->vcpu, handler); | ||
3316 | asm("int $2"); | ||
3317 | } | ||
3318 | |||
3319 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | ||
3320 | |||
3259 | if (cpu_has_virtual_nmis()) { | 3321 | if (cpu_has_virtual_nmis()) { |
3260 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; | 3322 | unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; |
3261 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; | 3323 | vector = exit_intr_info & INTR_INFO_VECTOR_MASK; |
3262 | /* | 3324 | /* |
3263 | * SDM 3: 25.7.1.2 | 3325 | * SDM 3: 27.7.1.2 (September 2008) |
3264 | * Re-set bit "block by NMI" before VM entry if vmexit caused by | 3326 | * Re-set bit "block by NMI" before VM entry if vmexit caused by |
3265 | * a guest IRET fault. | 3327 | * a guest IRET fault. |
3328 | * SDM 3: 23.2.2 (September 2008) | ||
3329 | * Bit 12 is undefined in any of the following cases: | ||
3330 | * If the VM exit sets the valid bit in the IDT-vectoring | ||
3331 | * information field. | ||
3332 | * If the VM exit is due to a double fault. | ||
3266 | */ | 3333 | */ |
3267 | if (unblock_nmi && vector != DF_VECTOR) | 3334 | if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi && |
3335 | vector != DF_VECTOR && !idtv_info_valid) | ||
3268 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, | 3336 | vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, |
3269 | GUEST_INTR_STATE_NMI); | 3337 | GUEST_INTR_STATE_NMI); |
3270 | } else if (unlikely(vmx->soft_vnmi_blocked)) | 3338 | } else if (unlikely(vmx->soft_vnmi_blocked)) |
3271 | vmx->vnmi_blocked_time += | 3339 | vmx->vnmi_blocked_time += |
3272 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); | 3340 | ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time)); |
3273 | 3341 | ||
3274 | idt_vectoring_info = vmx->idt_vectoring_info; | 3342 | vmx->vcpu.arch.nmi_injected = false; |
3275 | idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; | 3343 | kvm_clear_exception_queue(&vmx->vcpu); |
3344 | kvm_clear_interrupt_queue(&vmx->vcpu); | ||
3345 | |||
3346 | if (!idtv_info_valid) | ||
3347 | return; | ||
3348 | |||
3276 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; | 3349 | vector = idt_vectoring_info & VECTORING_INFO_VECTOR_MASK; |
3277 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; | 3350 | type = idt_vectoring_info & VECTORING_INFO_TYPE_MASK; |
3278 | if (vmx->vcpu.arch.nmi_injected) { | 3351 | |
3352 | switch (type) { | ||
3353 | case INTR_TYPE_NMI_INTR: | ||
3354 | vmx->vcpu.arch.nmi_injected = true; | ||
3279 | /* | 3355 | /* |
3280 | * SDM 3: 25.7.1.2 | 3356 | * SDM 3: 27.7.1.2 (September 2008) |
3281 | * Clear bit "block by NMI" before VM entry if a NMI delivery | 3357 | * Clear bit "block by NMI" before VM entry if a NMI |
3282 | * faulted. | 3358 | * delivery faulted. |
3283 | */ | 3359 | */ |
3284 | if (idtv_info_valid && type == INTR_TYPE_NMI_INTR) | 3360 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, |
3285 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | 3361 | GUEST_INTR_STATE_NMI); |
3286 | GUEST_INTR_STATE_NMI); | 3362 | break; |
3287 | else | 3363 | case INTR_TYPE_SOFT_EXCEPTION: |
3288 | vmx->vcpu.arch.nmi_injected = false; | 3364 | vmx->vcpu.arch.event_exit_inst_len = |
3289 | } | 3365 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
3290 | kvm_clear_exception_queue(&vmx->vcpu); | 3366 | /* fall through */ |
3291 | if (idtv_info_valid && (type == INTR_TYPE_HARD_EXCEPTION || | 3367 | case INTR_TYPE_HARD_EXCEPTION: |
3292 | type == INTR_TYPE_SOFT_EXCEPTION)) { | ||
3293 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { | 3368 | if (idt_vectoring_info & VECTORING_INFO_DELIVER_CODE_MASK) { |
3294 | error = vmcs_read32(IDT_VECTORING_ERROR_CODE); | 3369 | u32 err = vmcs_read32(IDT_VECTORING_ERROR_CODE); |
3295 | kvm_queue_exception_e(&vmx->vcpu, vector, error); | 3370 | kvm_queue_exception_e(&vmx->vcpu, vector, err); |
3296 | } else | 3371 | } else |
3297 | kvm_queue_exception(&vmx->vcpu, vector); | 3372 | kvm_queue_exception(&vmx->vcpu, vector); |
3298 | vmx->idt_vectoring_info = 0; | 3373 | break; |
3299 | } | 3374 | case INTR_TYPE_SOFT_INTR: |
3300 | kvm_clear_interrupt_queue(&vmx->vcpu); | 3375 | vmx->vcpu.arch.event_exit_inst_len = |
3301 | if (idtv_info_valid && type == INTR_TYPE_EXT_INTR) { | 3376 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN); |
3302 | kvm_queue_interrupt(&vmx->vcpu, vector); | 3377 | /* fall through */ |
3303 | vmx->idt_vectoring_info = 0; | 3378 | case INTR_TYPE_EXT_INTR: |
3304 | } | 3379 | kvm_queue_interrupt(&vmx->vcpu, vector, |
3305 | } | 3380 | type == INTR_TYPE_SOFT_INTR); |
3306 | 3381 | break; | |
3307 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) | 3382 | default: |
3308 | { | 3383 | break; |
3309 | update_tpr_threshold(vcpu); | ||
3310 | |||
3311 | vmx_update_window_states(vcpu); | ||
3312 | |||
3313 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
3314 | vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO, | ||
3315 | GUEST_INTR_STATE_STI | | ||
3316 | GUEST_INTR_STATE_MOV_SS); | ||
3317 | |||
3318 | if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) { | ||
3319 | if (vcpu->arch.interrupt.pending) { | ||
3320 | enable_nmi_window(vcpu); | ||
3321 | } else if (vcpu->arch.nmi_window_open) { | ||
3322 | vcpu->arch.nmi_pending = false; | ||
3323 | vcpu->arch.nmi_injected = true; | ||
3324 | } else { | ||
3325 | enable_nmi_window(vcpu); | ||
3326 | return; | ||
3327 | } | ||
3328 | } | ||
3329 | if (vcpu->arch.nmi_injected) { | ||
3330 | vmx_inject_nmi(vcpu); | ||
3331 | if (vcpu->arch.nmi_pending) | ||
3332 | enable_nmi_window(vcpu); | ||
3333 | else if (kvm_cpu_has_interrupt(vcpu)) | ||
3334 | enable_irq_window(vcpu); | ||
3335 | return; | ||
3336 | } | ||
3337 | if (!vcpu->arch.interrupt.pending && kvm_cpu_has_interrupt(vcpu)) { | ||
3338 | if (vcpu->arch.interrupt_window_open) | ||
3339 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu)); | ||
3340 | else | ||
3341 | enable_irq_window(vcpu); | ||
3342 | } | ||
3343 | if (vcpu->arch.interrupt.pending) { | ||
3344 | vmx_inject_irq(vcpu, vcpu->arch.interrupt.nr); | ||
3345 | if (kvm_cpu_has_interrupt(vcpu)) | ||
3346 | enable_irq_window(vcpu); | ||
3347 | } | 3384 | } |
3348 | } | 3385 | } |
3349 | 3386 | ||
@@ -3381,7 +3418,6 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx) | |||
3381 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3418 | static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
3382 | { | 3419 | { |
3383 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3420 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
3384 | u32 intr_info; | ||
3385 | 3421 | ||
3386 | /* Record the guest's net vcpu time for enforced NMI injections. */ | 3422 | /* Record the guest's net vcpu time for enforced NMI injections. */ |
3387 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) | 3423 | if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked)) |
@@ -3505,20 +3541,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3505 | if (vmx->rmode.irq.pending) | 3541 | if (vmx->rmode.irq.pending) |
3506 | fixup_rmode_irq(vmx); | 3542 | fixup_rmode_irq(vmx); |
3507 | 3543 | ||
3508 | vmx_update_window_states(vcpu); | ||
3509 | |||
3510 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 3544 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
3511 | vmx->launched = 1; | 3545 | vmx->launched = 1; |
3512 | 3546 | ||
3513 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | ||
3514 | |||
3515 | /* We need to handle NMIs before interrupts are enabled */ | ||
3516 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && | ||
3517 | (intr_info & INTR_INFO_VALID_MASK)) { | ||
3518 | KVMTRACE_0D(NMI, vcpu, handler); | ||
3519 | asm("int $2"); | ||
3520 | } | ||
3521 | |||
3522 | vmx_complete_interrupts(vmx); | 3547 | vmx_complete_interrupts(vmx); |
3523 | } | 3548 | } |
3524 | 3549 | ||
@@ -3593,7 +3618,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
3593 | if (alloc_apic_access_page(kvm) != 0) | 3618 | if (alloc_apic_access_page(kvm) != 0) |
3594 | goto free_vmcs; | 3619 | goto free_vmcs; |
3595 | 3620 | ||
3596 | if (vm_need_ept()) | 3621 | if (enable_ept) |
3597 | if (alloc_identity_pagetable(kvm) != 0) | 3622 | if (alloc_identity_pagetable(kvm) != 0) |
3598 | goto free_vmcs; | 3623 | goto free_vmcs; |
3599 | 3624 | ||
@@ -3631,9 +3656,32 @@ static int get_ept_level(void) | |||
3631 | return VMX_EPT_DEFAULT_GAW + 1; | 3656 | return VMX_EPT_DEFAULT_GAW + 1; |
3632 | } | 3657 | } |
3633 | 3658 | ||
3634 | static int vmx_get_mt_mask_shift(void) | 3659 | static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) |
3635 | { | 3660 | { |
3636 | return VMX_EPT_MT_EPTE_SHIFT; | 3661 | u64 ret; |
3662 | |||
3663 | /* For VT-d and EPT combination | ||
3664 | * 1. MMIO: always map as UC | ||
3665 | * 2. EPT with VT-d: | ||
3666 | * a. VT-d without snooping control feature: can't guarantee the | ||
3667 | * result, try to trust guest. | ||
3668 | * b. VT-d with snooping control feature: snooping control feature of | ||
3669 | * VT-d engine can guarantee the cache correctness. Just set it | ||
3670 | * to WB to keep consistent with host. So the same as item 3. | ||
3671 | * 3. EPT without VT-d: always map as WB and set IGMT=1 to keep | ||
3672 | * consistent with host MTRR | ||
3673 | */ | ||
3674 | if (is_mmio) | ||
3675 | ret = MTRR_TYPE_UNCACHABLE << VMX_EPT_MT_EPTE_SHIFT; | ||
3676 | else if (vcpu->kvm->arch.iommu_domain && | ||
3677 | !(vcpu->kvm->arch.iommu_flags & KVM_IOMMU_CACHE_COHERENCY)) | ||
3678 | ret = kvm_get_guest_memory_type(vcpu, gfn) << | ||
3679 | VMX_EPT_MT_EPTE_SHIFT; | ||
3680 | else | ||
3681 | ret = (MTRR_TYPE_WRBACK << VMX_EPT_MT_EPTE_SHIFT) | ||
3682 | | VMX_EPT_IGMT_BIT; | ||
3683 | |||
3684 | return ret; | ||
3637 | } | 3685 | } |
3638 | 3686 | ||
3639 | static struct kvm_x86_ops vmx_x86_ops = { | 3687 | static struct kvm_x86_ops vmx_x86_ops = { |
@@ -3644,7 +3692,7 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3644 | .check_processor_compatibility = vmx_check_processor_compat, | 3692 | .check_processor_compatibility = vmx_check_processor_compat, |
3645 | .hardware_enable = hardware_enable, | 3693 | .hardware_enable = hardware_enable, |
3646 | .hardware_disable = hardware_disable, | 3694 | .hardware_disable = hardware_disable, |
3647 | .cpu_has_accelerated_tpr = cpu_has_vmx_virtualize_apic_accesses, | 3695 | .cpu_has_accelerated_tpr = report_flexpriority, |
3648 | 3696 | ||
3649 | .vcpu_create = vmx_create_vcpu, | 3697 | .vcpu_create = vmx_create_vcpu, |
3650 | .vcpu_free = vmx_free_vcpu, | 3698 | .vcpu_free = vmx_free_vcpu, |
@@ -3678,78 +3726,82 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3678 | .tlb_flush = vmx_flush_tlb, | 3726 | .tlb_flush = vmx_flush_tlb, |
3679 | 3727 | ||
3680 | .run = vmx_vcpu_run, | 3728 | .run = vmx_vcpu_run, |
3681 | .handle_exit = kvm_handle_exit, | 3729 | .handle_exit = vmx_handle_exit, |
3682 | .skip_emulated_instruction = skip_emulated_instruction, | 3730 | .skip_emulated_instruction = skip_emulated_instruction, |
3731 | .set_interrupt_shadow = vmx_set_interrupt_shadow, | ||
3732 | .get_interrupt_shadow = vmx_get_interrupt_shadow, | ||
3683 | .patch_hypercall = vmx_patch_hypercall, | 3733 | .patch_hypercall = vmx_patch_hypercall, |
3684 | .get_irq = vmx_get_irq, | ||
3685 | .set_irq = vmx_inject_irq, | 3734 | .set_irq = vmx_inject_irq, |
3735 | .set_nmi = vmx_inject_nmi, | ||
3686 | .queue_exception = vmx_queue_exception, | 3736 | .queue_exception = vmx_queue_exception, |
3687 | .exception_injected = vmx_exception_injected, | 3737 | .interrupt_allowed = vmx_interrupt_allowed, |
3688 | .inject_pending_irq = vmx_intr_assist, | 3738 | .nmi_allowed = vmx_nmi_allowed, |
3689 | .inject_pending_vectors = do_interrupt_requests, | 3739 | .enable_nmi_window = enable_nmi_window, |
3740 | .enable_irq_window = enable_irq_window, | ||
3741 | .update_cr8_intercept = update_cr8_intercept, | ||
3690 | 3742 | ||
3691 | .set_tss_addr = vmx_set_tss_addr, | 3743 | .set_tss_addr = vmx_set_tss_addr, |
3692 | .get_tdp_level = get_ept_level, | 3744 | .get_tdp_level = get_ept_level, |
3693 | .get_mt_mask_shift = vmx_get_mt_mask_shift, | 3745 | .get_mt_mask = vmx_get_mt_mask, |
3694 | }; | 3746 | }; |
3695 | 3747 | ||
3696 | static int __init vmx_init(void) | 3748 | static int __init vmx_init(void) |
3697 | { | 3749 | { |
3698 | void *va; | ||
3699 | int r; | 3750 | int r; |
3700 | 3751 | ||
3701 | vmx_io_bitmap_a = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 3752 | vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL); |
3702 | if (!vmx_io_bitmap_a) | 3753 | if (!vmx_io_bitmap_a) |
3703 | return -ENOMEM; | 3754 | return -ENOMEM; |
3704 | 3755 | ||
3705 | vmx_io_bitmap_b = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 3756 | vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL); |
3706 | if (!vmx_io_bitmap_b) { | 3757 | if (!vmx_io_bitmap_b) { |
3707 | r = -ENOMEM; | 3758 | r = -ENOMEM; |
3708 | goto out; | 3759 | goto out; |
3709 | } | 3760 | } |
3710 | 3761 | ||
3711 | vmx_msr_bitmap = alloc_page(GFP_KERNEL | __GFP_HIGHMEM); | 3762 | vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL); |
3712 | if (!vmx_msr_bitmap) { | 3763 | if (!vmx_msr_bitmap_legacy) { |
3713 | r = -ENOMEM; | 3764 | r = -ENOMEM; |
3714 | goto out1; | 3765 | goto out1; |
3715 | } | 3766 | } |
3716 | 3767 | ||
3768 | vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL); | ||
3769 | if (!vmx_msr_bitmap_longmode) { | ||
3770 | r = -ENOMEM; | ||
3771 | goto out2; | ||
3772 | } | ||
3773 | |||
3717 | /* | 3774 | /* |
3718 | * Allow direct access to the PC debug port (it is often used for I/O | 3775 | * Allow direct access to the PC debug port (it is often used for I/O |
3719 | * delays, but the vmexits simply slow things down). | 3776 | * delays, but the vmexits simply slow things down). |
3720 | */ | 3777 | */ |
3721 | va = kmap(vmx_io_bitmap_a); | 3778 | memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE); |
3722 | memset(va, 0xff, PAGE_SIZE); | 3779 | clear_bit(0x80, vmx_io_bitmap_a); |
3723 | clear_bit(0x80, va); | ||
3724 | kunmap(vmx_io_bitmap_a); | ||
3725 | 3780 | ||
3726 | va = kmap(vmx_io_bitmap_b); | 3781 | memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE); |
3727 | memset(va, 0xff, PAGE_SIZE); | ||
3728 | kunmap(vmx_io_bitmap_b); | ||
3729 | 3782 | ||
3730 | va = kmap(vmx_msr_bitmap); | 3783 | memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE); |
3731 | memset(va, 0xff, PAGE_SIZE); | 3784 | memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE); |
3732 | kunmap(vmx_msr_bitmap); | ||
3733 | 3785 | ||
3734 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ | 3786 | set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */ |
3735 | 3787 | ||
3736 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); | 3788 | r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx), THIS_MODULE); |
3737 | if (r) | 3789 | if (r) |
3738 | goto out2; | 3790 | goto out3; |
3739 | 3791 | ||
3740 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_FS_BASE); | 3792 | vmx_disable_intercept_for_msr(MSR_FS_BASE, false); |
3741 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_GS_BASE); | 3793 | vmx_disable_intercept_for_msr(MSR_GS_BASE, false); |
3742 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_CS); | 3794 | vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true); |
3743 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3795 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
3744 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3796 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
3797 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | ||
3745 | 3798 | ||
3746 | if (vm_need_ept()) { | 3799 | if (enable_ept) { |
3747 | bypass_guest_pf = 0; | 3800 | bypass_guest_pf = 0; |
3748 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | 3801 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | |
3749 | VMX_EPT_WRITABLE_MASK); | 3802 | VMX_EPT_WRITABLE_MASK); |
3750 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | 3803 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, |
3751 | VMX_EPT_EXECUTABLE_MASK, | 3804 | VMX_EPT_EXECUTABLE_MASK); |
3752 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3753 | kvm_enable_tdp(); | 3805 | kvm_enable_tdp(); |
3754 | } else | 3806 | } else |
3755 | kvm_disable_tdp(); | 3807 | kvm_disable_tdp(); |
@@ -3761,20 +3813,23 @@ static int __init vmx_init(void) | |||
3761 | 3813 | ||
3762 | return 0; | 3814 | return 0; |
3763 | 3815 | ||
3816 | out3: | ||
3817 | free_page((unsigned long)vmx_msr_bitmap_longmode); | ||
3764 | out2: | 3818 | out2: |
3765 | __free_page(vmx_msr_bitmap); | 3819 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
3766 | out1: | 3820 | out1: |
3767 | __free_page(vmx_io_bitmap_b); | 3821 | free_page((unsigned long)vmx_io_bitmap_b); |
3768 | out: | 3822 | out: |
3769 | __free_page(vmx_io_bitmap_a); | 3823 | free_page((unsigned long)vmx_io_bitmap_a); |
3770 | return r; | 3824 | return r; |
3771 | } | 3825 | } |
3772 | 3826 | ||
3773 | static void __exit vmx_exit(void) | 3827 | static void __exit vmx_exit(void) |
3774 | { | 3828 | { |
3775 | __free_page(vmx_msr_bitmap); | 3829 | free_page((unsigned long)vmx_msr_bitmap_legacy); |
3776 | __free_page(vmx_io_bitmap_b); | 3830 | free_page((unsigned long)vmx_msr_bitmap_longmode); |
3777 | __free_page(vmx_io_bitmap_a); | 3831 | free_page((unsigned long)vmx_io_bitmap_b); |
3832 | free_page((unsigned long)vmx_io_bitmap_a); | ||
3778 | 3833 | ||
3779 | kvm_exit(); | 3834 | kvm_exit(); |
3780 | } | 3835 | } |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3944e917e794..249540f98513 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -91,7 +91,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
91 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 91 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
92 | { "hypercalls", VCPU_STAT(hypercalls) }, | 92 | { "hypercalls", VCPU_STAT(hypercalls) }, |
93 | { "request_irq", VCPU_STAT(request_irq_exits) }, | 93 | { "request_irq", VCPU_STAT(request_irq_exits) }, |
94 | { "request_nmi", VCPU_STAT(request_nmi_exits) }, | ||
95 | { "irq_exits", VCPU_STAT(irq_exits) }, | 94 | { "irq_exits", VCPU_STAT(irq_exits) }, |
96 | { "host_state_reload", VCPU_STAT(host_state_reload) }, | 95 | { "host_state_reload", VCPU_STAT(host_state_reload) }, |
97 | { "efer_reload", VCPU_STAT(efer_reload) }, | 96 | { "efer_reload", VCPU_STAT(efer_reload) }, |
@@ -108,7 +107,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
108 | { "mmu_recycled", VM_STAT(mmu_recycled) }, | 107 | { "mmu_recycled", VM_STAT(mmu_recycled) }, |
109 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, | 108 | { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, |
110 | { "mmu_unsync", VM_STAT(mmu_unsync) }, | 109 | { "mmu_unsync", VM_STAT(mmu_unsync) }, |
111 | { "mmu_unsync_global", VM_STAT(mmu_unsync_global) }, | ||
112 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, | 110 | { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, |
113 | { "largepages", VM_STAT(lpages) }, | 111 | { "largepages", VM_STAT(lpages) }, |
114 | { NULL } | 112 | { NULL } |
@@ -234,7 +232,8 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) | |||
234 | goto out; | 232 | goto out; |
235 | } | 233 | } |
236 | for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { | 234 | for (i = 0; i < ARRAY_SIZE(pdpte); ++i) { |
237 | if ((pdpte[i] & 1) && (pdpte[i] & 0xfffffff0000001e6ull)) { | 235 | if (is_present_pte(pdpte[i]) && |
236 | (pdpte[i] & vcpu->arch.mmu.rsvd_bits_mask[0][2])) { | ||
238 | ret = 0; | 237 | ret = 0; |
239 | goto out; | 238 | goto out; |
240 | } | 239 | } |
@@ -321,7 +320,6 @@ void kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) | |||
321 | kvm_x86_ops->set_cr0(vcpu, cr0); | 320 | kvm_x86_ops->set_cr0(vcpu, cr0); |
322 | vcpu->arch.cr0 = cr0; | 321 | vcpu->arch.cr0 = cr0; |
323 | 322 | ||
324 | kvm_mmu_sync_global(vcpu); | ||
325 | kvm_mmu_reset_context(vcpu); | 323 | kvm_mmu_reset_context(vcpu); |
326 | return; | 324 | return; |
327 | } | 325 | } |
@@ -370,7 +368,6 @@ void kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | |||
370 | kvm_x86_ops->set_cr4(vcpu, cr4); | 368 | kvm_x86_ops->set_cr4(vcpu, cr4); |
371 | vcpu->arch.cr4 = cr4; | 369 | vcpu->arch.cr4 = cr4; |
372 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; | 370 | vcpu->arch.mmu.base_role.cr4_pge = (cr4 & X86_CR4_PGE) && !tdp_enabled; |
373 | kvm_mmu_sync_global(vcpu); | ||
374 | kvm_mmu_reset_context(vcpu); | 371 | kvm_mmu_reset_context(vcpu); |
375 | } | 372 | } |
376 | EXPORT_SYMBOL_GPL(kvm_set_cr4); | 373 | EXPORT_SYMBOL_GPL(kvm_set_cr4); |
@@ -523,6 +520,9 @@ static void set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
523 | efer |= vcpu->arch.shadow_efer & EFER_LMA; | 520 | efer |= vcpu->arch.shadow_efer & EFER_LMA; |
524 | 521 | ||
525 | vcpu->arch.shadow_efer = efer; | 522 | vcpu->arch.shadow_efer = efer; |
523 | |||
524 | vcpu->arch.mmu.base_role.nxe = (efer & EFER_NX) && !tdp_enabled; | ||
525 | kvm_mmu_reset_context(vcpu); | ||
526 | } | 526 | } |
527 | 527 | ||
528 | void kvm_enable_efer_bits(u64 mask) | 528 | void kvm_enable_efer_bits(u64 mask) |
@@ -630,14 +630,17 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
630 | unsigned long flags; | 630 | unsigned long flags; |
631 | struct kvm_vcpu_arch *vcpu = &v->arch; | 631 | struct kvm_vcpu_arch *vcpu = &v->arch; |
632 | void *shared_kaddr; | 632 | void *shared_kaddr; |
633 | unsigned long this_tsc_khz; | ||
633 | 634 | ||
634 | if ((!vcpu->time_page)) | 635 | if ((!vcpu->time_page)) |
635 | return; | 636 | return; |
636 | 637 | ||
637 | if (unlikely(vcpu->hv_clock_tsc_khz != __get_cpu_var(cpu_tsc_khz))) { | 638 | this_tsc_khz = get_cpu_var(cpu_tsc_khz); |
638 | kvm_set_time_scale(__get_cpu_var(cpu_tsc_khz), &vcpu->hv_clock); | 639 | if (unlikely(vcpu->hv_clock_tsc_khz != this_tsc_khz)) { |
639 | vcpu->hv_clock_tsc_khz = __get_cpu_var(cpu_tsc_khz); | 640 | kvm_set_time_scale(this_tsc_khz, &vcpu->hv_clock); |
641 | vcpu->hv_clock_tsc_khz = this_tsc_khz; | ||
640 | } | 642 | } |
643 | put_cpu_var(cpu_tsc_khz); | ||
641 | 644 | ||
642 | /* Keep irq disabled to prevent changes to the clock */ | 645 | /* Keep irq disabled to prevent changes to the clock */ |
643 | local_irq_save(flags); | 646 | local_irq_save(flags); |
@@ -893,6 +896,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
893 | case MSR_IA32_LASTINTFROMIP: | 896 | case MSR_IA32_LASTINTFROMIP: |
894 | case MSR_IA32_LASTINTTOIP: | 897 | case MSR_IA32_LASTINTTOIP: |
895 | case MSR_VM_HSAVE_PA: | 898 | case MSR_VM_HSAVE_PA: |
899 | case MSR_P6_EVNTSEL0: | ||
900 | case MSR_P6_EVNTSEL1: | ||
896 | data = 0; | 901 | data = 0; |
897 | break; | 902 | break; |
898 | case MSR_MTRRcap: | 903 | case MSR_MTRRcap: |
@@ -1024,6 +1029,7 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
1024 | case KVM_CAP_SYNC_MMU: | 1029 | case KVM_CAP_SYNC_MMU: |
1025 | case KVM_CAP_REINJECT_CONTROL: | 1030 | case KVM_CAP_REINJECT_CONTROL: |
1026 | case KVM_CAP_IRQ_INJECT_STATUS: | 1031 | case KVM_CAP_IRQ_INJECT_STATUS: |
1032 | case KVM_CAP_ASSIGN_DEV_IRQ: | ||
1027 | r = 1; | 1033 | r = 1; |
1028 | break; | 1034 | break; |
1029 | case KVM_CAP_COALESCED_MMIO: | 1035 | case KVM_CAP_COALESCED_MMIO: |
@@ -1241,41 +1247,53 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1241 | entry->flags = 0; | 1247 | entry->flags = 0; |
1242 | } | 1248 | } |
1243 | 1249 | ||
1250 | #define F(x) bit(X86_FEATURE_##x) | ||
1251 | |||
1244 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | 1252 | static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, |
1245 | u32 index, int *nent, int maxnent) | 1253 | u32 index, int *nent, int maxnent) |
1246 | { | 1254 | { |
1247 | const u32 kvm_supported_word0_x86_features = bit(X86_FEATURE_FPU) | | 1255 | unsigned f_nx = is_efer_nx() ? F(NX) : 0; |
1248 | bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | | ||
1249 | bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | | ||
1250 | bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | | ||
1251 | bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | | ||
1252 | bit(X86_FEATURE_SEP) | bit(X86_FEATURE_PGE) | | ||
1253 | bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | | ||
1254 | bit(X86_FEATURE_CLFLSH) | bit(X86_FEATURE_MMX) | | ||
1255 | bit(X86_FEATURE_FXSR) | bit(X86_FEATURE_XMM) | | ||
1256 | bit(X86_FEATURE_XMM2) | bit(X86_FEATURE_SELFSNOOP); | ||
1257 | const u32 kvm_supported_word1_x86_features = bit(X86_FEATURE_FPU) | | ||
1258 | bit(X86_FEATURE_VME) | bit(X86_FEATURE_DE) | | ||
1259 | bit(X86_FEATURE_PSE) | bit(X86_FEATURE_TSC) | | ||
1260 | bit(X86_FEATURE_MSR) | bit(X86_FEATURE_PAE) | | ||
1261 | bit(X86_FEATURE_CX8) | bit(X86_FEATURE_APIC) | | ||
1262 | bit(X86_FEATURE_PGE) | | ||
1263 | bit(X86_FEATURE_CMOV) | bit(X86_FEATURE_PSE36) | | ||
1264 | bit(X86_FEATURE_MMX) | bit(X86_FEATURE_FXSR) | | ||
1265 | bit(X86_FEATURE_SYSCALL) | | ||
1266 | (is_efer_nx() ? bit(X86_FEATURE_NX) : 0) | | ||
1267 | #ifdef CONFIG_X86_64 | 1256 | #ifdef CONFIG_X86_64 |
1268 | bit(X86_FEATURE_LM) | | 1257 | unsigned f_lm = F(LM); |
1258 | #else | ||
1259 | unsigned f_lm = 0; | ||
1269 | #endif | 1260 | #endif |
1270 | bit(X86_FEATURE_FXSR_OPT) | | 1261 | |
1271 | bit(X86_FEATURE_MMXEXT) | | 1262 | /* cpuid 1.edx */ |
1272 | bit(X86_FEATURE_3DNOWEXT) | | 1263 | const u32 kvm_supported_word0_x86_features = |
1273 | bit(X86_FEATURE_3DNOW); | 1264 | F(FPU) | F(VME) | F(DE) | F(PSE) | |
1274 | const u32 kvm_supported_word3_x86_features = | 1265 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | |
1275 | bit(X86_FEATURE_XMM3) | bit(X86_FEATURE_CX16); | 1266 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | |
1267 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
1268 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | ||
1269 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | ||
1270 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | ||
1271 | 0 /* HTT, TM, Reserved, PBE */; | ||
1272 | /* cpuid 0x80000001.edx */ | ||
1273 | const u32 kvm_supported_word1_x86_features = | ||
1274 | F(FPU) | F(VME) | F(DE) | F(PSE) | | ||
1275 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | ||
1276 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SYSCALL) | | ||
1277 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | ||
1278 | F(PAT) | F(PSE36) | 0 /* Reserved */ | | ||
1279 | f_nx | 0 /* Reserved */ | F(MMXEXT) | F(MMX) | | ||
1280 | F(FXSR) | F(FXSR_OPT) | 0 /* GBPAGES */ | 0 /* RDTSCP */ | | ||
1281 | 0 /* Reserved */ | f_lm | F(3DNOWEXT) | F(3DNOW); | ||
1282 | /* cpuid 1.ecx */ | ||
1283 | const u32 kvm_supported_word4_x86_features = | ||
1284 | F(XMM3) | 0 /* Reserved, DTES64, MONITOR */ | | ||
1285 | 0 /* DS-CPL, VMX, SMX, EST */ | | ||
1286 | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | | ||
1287 | 0 /* Reserved */ | F(CX16) | 0 /* xTPR Update, PDCM */ | | ||
1288 | 0 /* Reserved, DCA */ | F(XMM4_1) | | ||
1289 | F(XMM4_2) | 0 /* x2APIC */ | F(MOVBE) | F(POPCNT) | | ||
1290 | 0 /* Reserved, XSAVE, OSXSAVE */; | ||
1291 | /* cpuid 0x80000001.ecx */ | ||
1276 | const u32 kvm_supported_word6_x86_features = | 1292 | const u32 kvm_supported_word6_x86_features = |
1277 | bit(X86_FEATURE_LAHF_LM) | bit(X86_FEATURE_CMP_LEGACY) | | 1293 | F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | |
1278 | bit(X86_FEATURE_SVM); | 1294 | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | |
1295 | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | | ||
1296 | 0 /* SKINIT */ | 0 /* WDT */; | ||
1279 | 1297 | ||
1280 | /* all calls to cpuid_count() should be made on the same cpu */ | 1298 | /* all calls to cpuid_count() should be made on the same cpu */ |
1281 | get_cpu(); | 1299 | get_cpu(); |
@@ -1288,7 +1306,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1288 | break; | 1306 | break; |
1289 | case 1: | 1307 | case 1: |
1290 | entry->edx &= kvm_supported_word0_x86_features; | 1308 | entry->edx &= kvm_supported_word0_x86_features; |
1291 | entry->ecx &= kvm_supported_word3_x86_features; | 1309 | entry->ecx &= kvm_supported_word4_x86_features; |
1292 | break; | 1310 | break; |
1293 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands | 1311 | /* function 2 entries are STATEFUL. That is, repeated cpuid commands |
1294 | * may return different values. This forces us to get_cpu() before | 1312 | * may return different values. This forces us to get_cpu() before |
@@ -1350,6 +1368,8 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
1350 | put_cpu(); | 1368 | put_cpu(); |
1351 | } | 1369 | } |
1352 | 1370 | ||
1371 | #undef F | ||
1372 | |||
1353 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, | 1373 | static int kvm_dev_ioctl_get_supported_cpuid(struct kvm_cpuid2 *cpuid, |
1354 | struct kvm_cpuid_entry2 __user *entries) | 1374 | struct kvm_cpuid_entry2 __user *entries) |
1355 | { | 1375 | { |
@@ -1421,8 +1441,7 @@ static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, | |||
1421 | return -ENXIO; | 1441 | return -ENXIO; |
1422 | vcpu_load(vcpu); | 1442 | vcpu_load(vcpu); |
1423 | 1443 | ||
1424 | set_bit(irq->irq, vcpu->arch.irq_pending); | 1444 | kvm_queue_interrupt(vcpu, irq->irq, false); |
1425 | set_bit(irq->irq / BITS_PER_LONG, &vcpu->arch.irq_summary); | ||
1426 | 1445 | ||
1427 | vcpu_put(vcpu); | 1446 | vcpu_put(vcpu); |
1428 | 1447 | ||
@@ -1584,8 +1603,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, | |||
1584 | r = -EINVAL; | 1603 | r = -EINVAL; |
1585 | } | 1604 | } |
1586 | out: | 1605 | out: |
1587 | if (lapic) | 1606 | kfree(lapic); |
1588 | kfree(lapic); | ||
1589 | return r; | 1607 | return r; |
1590 | } | 1608 | } |
1591 | 1609 | ||
@@ -1606,10 +1624,12 @@ static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm, | |||
1606 | return -EINVAL; | 1624 | return -EINVAL; |
1607 | 1625 | ||
1608 | down_write(&kvm->slots_lock); | 1626 | down_write(&kvm->slots_lock); |
1627 | spin_lock(&kvm->mmu_lock); | ||
1609 | 1628 | ||
1610 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); | 1629 | kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages); |
1611 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; | 1630 | kvm->arch.n_requested_mmu_pages = kvm_nr_mmu_pages; |
1612 | 1631 | ||
1632 | spin_unlock(&kvm->mmu_lock); | ||
1613 | up_write(&kvm->slots_lock); | 1633 | up_write(&kvm->slots_lock); |
1614 | return 0; | 1634 | return 0; |
1615 | } | 1635 | } |
@@ -1785,7 +1805,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, | |||
1785 | 1805 | ||
1786 | /* If nothing is dirty, don't bother messing with page tables. */ | 1806 | /* If nothing is dirty, don't bother messing with page tables. */ |
1787 | if (is_dirty) { | 1807 | if (is_dirty) { |
1808 | spin_lock(&kvm->mmu_lock); | ||
1788 | kvm_mmu_slot_remove_write_access(kvm, log->slot); | 1809 | kvm_mmu_slot_remove_write_access(kvm, log->slot); |
1810 | spin_unlock(&kvm->mmu_lock); | ||
1789 | kvm_flush_remote_tlbs(kvm); | 1811 | kvm_flush_remote_tlbs(kvm); |
1790 | memslot = &kvm->memslots[log->slot]; | 1812 | memslot = &kvm->memslots[log->slot]; |
1791 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; | 1813 | n = ALIGN(memslot->npages, BITS_PER_LONG) / 8; |
@@ -2360,7 +2382,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2360 | u16 error_code, | 2382 | u16 error_code, |
2361 | int emulation_type) | 2383 | int emulation_type) |
2362 | { | 2384 | { |
2363 | int r; | 2385 | int r, shadow_mask; |
2364 | struct decode_cache *c; | 2386 | struct decode_cache *c; |
2365 | 2387 | ||
2366 | kvm_clear_exception_queue(vcpu); | 2388 | kvm_clear_exception_queue(vcpu); |
@@ -2408,7 +2430,16 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2408 | } | 2430 | } |
2409 | } | 2431 | } |
2410 | 2432 | ||
2433 | if (emulation_type & EMULTYPE_SKIP) { | ||
2434 | kvm_rip_write(vcpu, vcpu->arch.emulate_ctxt.decode.eip); | ||
2435 | return EMULATE_DONE; | ||
2436 | } | ||
2437 | |||
2411 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 2438 | r = x86_emulate_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
2439 | shadow_mask = vcpu->arch.emulate_ctxt.interruptibility; | ||
2440 | |||
2441 | if (r == 0) | ||
2442 | kvm_x86_ops->set_interrupt_shadow(vcpu, shadow_mask); | ||
2412 | 2443 | ||
2413 | if (vcpu->arch.pio.string) | 2444 | if (vcpu->arch.pio.string) |
2414 | return EMULATE_DO_MMIO; | 2445 | return EMULATE_DO_MMIO; |
@@ -2761,7 +2792,7 @@ int kvm_arch_init(void *opaque) | |||
2761 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); | 2792 | kvm_mmu_set_nonpresent_ptes(0ull, 0ull); |
2762 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); | 2793 | kvm_mmu_set_base_ptes(PT_PRESENT_MASK); |
2763 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 2794 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
2764 | PT_DIRTY_MASK, PT64_NX_MASK, 0, 0); | 2795 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
2765 | 2796 | ||
2766 | for_each_possible_cpu(cpu) | 2797 | for_each_possible_cpu(cpu) |
2767 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; | 2798 | per_cpu(cpu_tsc_khz, cpu) = tsc_khz; |
@@ -3012,6 +3043,16 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu, | |||
3012 | return best; | 3043 | return best; |
3013 | } | 3044 | } |
3014 | 3045 | ||
3046 | int cpuid_maxphyaddr(struct kvm_vcpu *vcpu) | ||
3047 | { | ||
3048 | struct kvm_cpuid_entry2 *best; | ||
3049 | |||
3050 | best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0); | ||
3051 | if (best) | ||
3052 | return best->eax & 0xff; | ||
3053 | return 36; | ||
3054 | } | ||
3055 | |||
3015 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) | 3056 | void kvm_emulate_cpuid(struct kvm_vcpu *vcpu) |
3016 | { | 3057 | { |
3017 | u32 function, index; | 3058 | u32 function, index; |
@@ -3048,10 +3089,9 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid); | |||
3048 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, | 3089 | static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu, |
3049 | struct kvm_run *kvm_run) | 3090 | struct kvm_run *kvm_run) |
3050 | { | 3091 | { |
3051 | return (!vcpu->arch.irq_summary && | 3092 | return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) && |
3052 | kvm_run->request_interrupt_window && | 3093 | kvm_run->request_interrupt_window && |
3053 | vcpu->arch.interrupt_window_open && | 3094 | kvm_arch_interrupt_allowed(vcpu)); |
3054 | (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF)); | ||
3055 | } | 3095 | } |
3056 | 3096 | ||
3057 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, | 3097 | static void post_kvm_run_save(struct kvm_vcpu *vcpu, |
@@ -3064,8 +3104,9 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu, | |||
3064 | kvm_run->ready_for_interrupt_injection = 1; | 3104 | kvm_run->ready_for_interrupt_injection = 1; |
3065 | else | 3105 | else |
3066 | kvm_run->ready_for_interrupt_injection = | 3106 | kvm_run->ready_for_interrupt_injection = |
3067 | (vcpu->arch.interrupt_window_open && | 3107 | kvm_arch_interrupt_allowed(vcpu) && |
3068 | vcpu->arch.irq_summary == 0); | 3108 | !kvm_cpu_has_interrupt(vcpu) && |
3109 | !kvm_event_needs_reinjection(vcpu); | ||
3069 | } | 3110 | } |
3070 | 3111 | ||
3071 | static void vapic_enter(struct kvm_vcpu *vcpu) | 3112 | static void vapic_enter(struct kvm_vcpu *vcpu) |
@@ -3094,9 +3135,63 @@ static void vapic_exit(struct kvm_vcpu *vcpu) | |||
3094 | up_read(&vcpu->kvm->slots_lock); | 3135 | up_read(&vcpu->kvm->slots_lock); |
3095 | } | 3136 | } |
3096 | 3137 | ||
3138 | static void update_cr8_intercept(struct kvm_vcpu *vcpu) | ||
3139 | { | ||
3140 | int max_irr, tpr; | ||
3141 | |||
3142 | if (!kvm_x86_ops->update_cr8_intercept) | ||
3143 | return; | ||
3144 | |||
3145 | if (!vcpu->arch.apic->vapic_addr) | ||
3146 | max_irr = kvm_lapic_find_highest_irr(vcpu); | ||
3147 | else | ||
3148 | max_irr = -1; | ||
3149 | |||
3150 | if (max_irr != -1) | ||
3151 | max_irr >>= 4; | ||
3152 | |||
3153 | tpr = kvm_lapic_get_cr8(vcpu); | ||
3154 | |||
3155 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); | ||
3156 | } | ||
3157 | |||
3158 | static void inject_pending_irq(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
3159 | { | ||
3160 | if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) | ||
3161 | kvm_x86_ops->set_interrupt_shadow(vcpu, 0); | ||
3162 | |||
3163 | /* try to reinject previous events if any */ | ||
3164 | if (vcpu->arch.nmi_injected) { | ||
3165 | kvm_x86_ops->set_nmi(vcpu); | ||
3166 | return; | ||
3167 | } | ||
3168 | |||
3169 | if (vcpu->arch.interrupt.pending) { | ||
3170 | kvm_x86_ops->set_irq(vcpu); | ||
3171 | return; | ||
3172 | } | ||
3173 | |||
3174 | /* try to inject new event if pending */ | ||
3175 | if (vcpu->arch.nmi_pending) { | ||
3176 | if (kvm_x86_ops->nmi_allowed(vcpu)) { | ||
3177 | vcpu->arch.nmi_pending = false; | ||
3178 | vcpu->arch.nmi_injected = true; | ||
3179 | kvm_x86_ops->set_nmi(vcpu); | ||
3180 | } | ||
3181 | } else if (kvm_cpu_has_interrupt(vcpu)) { | ||
3182 | if (kvm_x86_ops->interrupt_allowed(vcpu)) { | ||
3183 | kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), | ||
3184 | false); | ||
3185 | kvm_x86_ops->set_irq(vcpu); | ||
3186 | } | ||
3187 | } | ||
3188 | } | ||
3189 | |||
3097 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3190 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
3098 | { | 3191 | { |
3099 | int r; | 3192 | int r; |
3193 | bool req_int_win = !irqchip_in_kernel(vcpu->kvm) && | ||
3194 | kvm_run->request_interrupt_window; | ||
3100 | 3195 | ||
3101 | if (vcpu->requests) | 3196 | if (vcpu->requests) |
3102 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) | 3197 | if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests)) |
@@ -3128,9 +3223,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3128 | } | 3223 | } |
3129 | } | 3224 | } |
3130 | 3225 | ||
3131 | clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); | ||
3132 | kvm_inject_pending_timer_irqs(vcpu); | ||
3133 | |||
3134 | preempt_disable(); | 3226 | preempt_disable(); |
3135 | 3227 | ||
3136 | kvm_x86_ops->prepare_guest_switch(vcpu); | 3228 | kvm_x86_ops->prepare_guest_switch(vcpu); |
@@ -3138,6 +3230,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3138 | 3230 | ||
3139 | local_irq_disable(); | 3231 | local_irq_disable(); |
3140 | 3232 | ||
3233 | clear_bit(KVM_REQ_KICK, &vcpu->requests); | ||
3234 | smp_mb__after_clear_bit(); | ||
3235 | |||
3141 | if (vcpu->requests || need_resched() || signal_pending(current)) { | 3236 | if (vcpu->requests || need_resched() || signal_pending(current)) { |
3142 | local_irq_enable(); | 3237 | local_irq_enable(); |
3143 | preempt_enable(); | 3238 | preempt_enable(); |
@@ -3145,21 +3240,21 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3145 | goto out; | 3240 | goto out; |
3146 | } | 3241 | } |
3147 | 3242 | ||
3148 | vcpu->guest_mode = 1; | ||
3149 | /* | ||
3150 | * Make sure that guest_mode assignment won't happen after | ||
3151 | * testing the pending IRQ vector bitmap. | ||
3152 | */ | ||
3153 | smp_wmb(); | ||
3154 | |||
3155 | if (vcpu->arch.exception.pending) | 3243 | if (vcpu->arch.exception.pending) |
3156 | __queue_exception(vcpu); | 3244 | __queue_exception(vcpu); |
3157 | else if (irqchip_in_kernel(vcpu->kvm)) | ||
3158 | kvm_x86_ops->inject_pending_irq(vcpu); | ||
3159 | else | 3245 | else |
3160 | kvm_x86_ops->inject_pending_vectors(vcpu, kvm_run); | 3246 | inject_pending_irq(vcpu, kvm_run); |
3161 | 3247 | ||
3162 | kvm_lapic_sync_to_vapic(vcpu); | 3248 | /* enable NMI/IRQ window open exits if needed */ |
3249 | if (vcpu->arch.nmi_pending) | ||
3250 | kvm_x86_ops->enable_nmi_window(vcpu); | ||
3251 | else if (kvm_cpu_has_interrupt(vcpu) || req_int_win) | ||
3252 | kvm_x86_ops->enable_irq_window(vcpu); | ||
3253 | |||
3254 | if (kvm_lapic_enabled(vcpu)) { | ||
3255 | update_cr8_intercept(vcpu); | ||
3256 | kvm_lapic_sync_to_vapic(vcpu); | ||
3257 | } | ||
3163 | 3258 | ||
3164 | up_read(&vcpu->kvm->slots_lock); | 3259 | up_read(&vcpu->kvm->slots_lock); |
3165 | 3260 | ||
@@ -3193,7 +3288,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3193 | set_debugreg(vcpu->arch.host_dr6, 6); | 3288 | set_debugreg(vcpu->arch.host_dr6, 6); |
3194 | set_debugreg(vcpu->arch.host_dr7, 7); | 3289 | set_debugreg(vcpu->arch.host_dr7, 7); |
3195 | 3290 | ||
3196 | vcpu->guest_mode = 0; | 3291 | set_bit(KVM_REQ_KICK, &vcpu->requests); |
3197 | local_irq_enable(); | 3292 | local_irq_enable(); |
3198 | 3293 | ||
3199 | ++vcpu->stat.exits; | 3294 | ++vcpu->stat.exits; |
@@ -3220,8 +3315,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3220 | profile_hit(KVM_PROFILING, (void *)rip); | 3315 | profile_hit(KVM_PROFILING, (void *)rip); |
3221 | } | 3316 | } |
3222 | 3317 | ||
3223 | if (vcpu->arch.exception.pending && kvm_x86_ops->exception_injected(vcpu)) | ||
3224 | vcpu->arch.exception.pending = false; | ||
3225 | 3318 | ||
3226 | kvm_lapic_sync_from_vapic(vcpu); | 3319 | kvm_lapic_sync_from_vapic(vcpu); |
3227 | 3320 | ||
@@ -3230,6 +3323,7 @@ out: | |||
3230 | return r; | 3323 | return r; |
3231 | } | 3324 | } |
3232 | 3325 | ||
3326 | |||
3233 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 3327 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
3234 | { | 3328 | { |
3235 | int r; | 3329 | int r; |
@@ -3256,29 +3350,42 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
3256 | kvm_vcpu_block(vcpu); | 3350 | kvm_vcpu_block(vcpu); |
3257 | down_read(&vcpu->kvm->slots_lock); | 3351 | down_read(&vcpu->kvm->slots_lock); |
3258 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) | 3352 | if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests)) |
3259 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | 3353 | { |
3354 | switch(vcpu->arch.mp_state) { | ||
3355 | case KVM_MP_STATE_HALTED: | ||
3260 | vcpu->arch.mp_state = | 3356 | vcpu->arch.mp_state = |
3261 | KVM_MP_STATE_RUNNABLE; | 3357 | KVM_MP_STATE_RUNNABLE; |
3262 | if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE) | 3358 | case KVM_MP_STATE_RUNNABLE: |
3263 | r = -EINTR; | 3359 | break; |
3360 | case KVM_MP_STATE_SIPI_RECEIVED: | ||
3361 | default: | ||
3362 | r = -EINTR; | ||
3363 | break; | ||
3364 | } | ||
3365 | } | ||
3264 | } | 3366 | } |
3265 | 3367 | ||
3266 | if (r > 0) { | 3368 | if (r <= 0) |
3267 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { | 3369 | break; |
3268 | r = -EINTR; | 3370 | |
3269 | kvm_run->exit_reason = KVM_EXIT_INTR; | 3371 | clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests); |
3270 | ++vcpu->stat.request_irq_exits; | 3372 | if (kvm_cpu_has_pending_timer(vcpu)) |
3271 | } | 3373 | kvm_inject_pending_timer_irqs(vcpu); |
3272 | if (signal_pending(current)) { | 3374 | |
3273 | r = -EINTR; | 3375 | if (dm_request_for_irq_injection(vcpu, kvm_run)) { |
3274 | kvm_run->exit_reason = KVM_EXIT_INTR; | 3376 | r = -EINTR; |
3275 | ++vcpu->stat.signal_exits; | 3377 | kvm_run->exit_reason = KVM_EXIT_INTR; |
3276 | } | 3378 | ++vcpu->stat.request_irq_exits; |
3277 | if (need_resched()) { | 3379 | } |
3278 | up_read(&vcpu->kvm->slots_lock); | 3380 | if (signal_pending(current)) { |
3279 | kvm_resched(vcpu); | 3381 | r = -EINTR; |
3280 | down_read(&vcpu->kvm->slots_lock); | 3382 | kvm_run->exit_reason = KVM_EXIT_INTR; |
3281 | } | 3383 | ++vcpu->stat.signal_exits; |
3384 | } | ||
3385 | if (need_resched()) { | ||
3386 | up_read(&vcpu->kvm->slots_lock); | ||
3387 | kvm_resched(vcpu); | ||
3388 | down_read(&vcpu->kvm->slots_lock); | ||
3282 | } | 3389 | } |
3283 | } | 3390 | } |
3284 | 3391 | ||
@@ -3442,7 +3549,6 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
3442 | struct kvm_sregs *sregs) | 3549 | struct kvm_sregs *sregs) |
3443 | { | 3550 | { |
3444 | struct descriptor_table dt; | 3551 | struct descriptor_table dt; |
3445 | int pending_vec; | ||
3446 | 3552 | ||
3447 | vcpu_load(vcpu); | 3553 | vcpu_load(vcpu); |
3448 | 3554 | ||
@@ -3472,16 +3578,11 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
3472 | sregs->efer = vcpu->arch.shadow_efer; | 3578 | sregs->efer = vcpu->arch.shadow_efer; |
3473 | sregs->apic_base = kvm_get_apic_base(vcpu); | 3579 | sregs->apic_base = kvm_get_apic_base(vcpu); |
3474 | 3580 | ||
3475 | if (irqchip_in_kernel(vcpu->kvm)) { | 3581 | memset(sregs->interrupt_bitmap, 0, sizeof sregs->interrupt_bitmap); |
3476 | memset(sregs->interrupt_bitmap, 0, | 3582 | |
3477 | sizeof sregs->interrupt_bitmap); | 3583 | if (vcpu->arch.interrupt.pending && !vcpu->arch.interrupt.soft) |
3478 | pending_vec = kvm_x86_ops->get_irq(vcpu); | 3584 | set_bit(vcpu->arch.interrupt.nr, |
3479 | if (pending_vec >= 0) | 3585 | (unsigned long *)sregs->interrupt_bitmap); |
3480 | set_bit(pending_vec, | ||
3481 | (unsigned long *)sregs->interrupt_bitmap); | ||
3482 | } else | ||
3483 | memcpy(sregs->interrupt_bitmap, vcpu->arch.irq_pending, | ||
3484 | sizeof sregs->interrupt_bitmap); | ||
3485 | 3586 | ||
3486 | vcpu_put(vcpu); | 3587 | vcpu_put(vcpu); |
3487 | 3588 | ||
@@ -3688,7 +3789,6 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu, | |||
3688 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); | 3789 | tss->fs = get_segment_selector(vcpu, VCPU_SREG_FS); |
3689 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); | 3790 | tss->gs = get_segment_selector(vcpu, VCPU_SREG_GS); |
3690 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); | 3791 | tss->ldt_selector = get_segment_selector(vcpu, VCPU_SREG_LDTR); |
3691 | tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
3692 | } | 3792 | } |
3693 | 3793 | ||
3694 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, | 3794 | static int load_state_from_tss32(struct kvm_vcpu *vcpu, |
@@ -3785,8 +3885,8 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
3785 | } | 3885 | } |
3786 | 3886 | ||
3787 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | 3887 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, |
3788 | u32 old_tss_base, | 3888 | u16 old_tss_sel, u32 old_tss_base, |
3789 | struct desc_struct *nseg_desc) | 3889 | struct desc_struct *nseg_desc) |
3790 | { | 3890 | { |
3791 | struct tss_segment_16 tss_segment_16; | 3891 | struct tss_segment_16 tss_segment_16; |
3792 | int ret = 0; | 3892 | int ret = 0; |
@@ -3805,6 +3905,16 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
3805 | &tss_segment_16, sizeof tss_segment_16)) | 3905 | &tss_segment_16, sizeof tss_segment_16)) |
3806 | goto out; | 3906 | goto out; |
3807 | 3907 | ||
3908 | if (old_tss_sel != 0xffff) { | ||
3909 | tss_segment_16.prev_task_link = old_tss_sel; | ||
3910 | |||
3911 | if (kvm_write_guest(vcpu->kvm, | ||
3912 | get_tss_base_addr(vcpu, nseg_desc), | ||
3913 | &tss_segment_16.prev_task_link, | ||
3914 | sizeof tss_segment_16.prev_task_link)) | ||
3915 | goto out; | ||
3916 | } | ||
3917 | |||
3808 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | 3918 | if (load_state_from_tss16(vcpu, &tss_segment_16)) |
3809 | goto out; | 3919 | goto out; |
3810 | 3920 | ||
@@ -3814,7 +3924,7 @@ out: | |||
3814 | } | 3924 | } |
3815 | 3925 | ||
3816 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | 3926 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, |
3817 | u32 old_tss_base, | 3927 | u16 old_tss_sel, u32 old_tss_base, |
3818 | struct desc_struct *nseg_desc) | 3928 | struct desc_struct *nseg_desc) |
3819 | { | 3929 | { |
3820 | struct tss_segment_32 tss_segment_32; | 3930 | struct tss_segment_32 tss_segment_32; |
@@ -3834,6 +3944,16 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | |||
3834 | &tss_segment_32, sizeof tss_segment_32)) | 3944 | &tss_segment_32, sizeof tss_segment_32)) |
3835 | goto out; | 3945 | goto out; |
3836 | 3946 | ||
3947 | if (old_tss_sel != 0xffff) { | ||
3948 | tss_segment_32.prev_task_link = old_tss_sel; | ||
3949 | |||
3950 | if (kvm_write_guest(vcpu->kvm, | ||
3951 | get_tss_base_addr(vcpu, nseg_desc), | ||
3952 | &tss_segment_32.prev_task_link, | ||
3953 | sizeof tss_segment_32.prev_task_link)) | ||
3954 | goto out; | ||
3955 | } | ||
3956 | |||
3837 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | 3957 | if (load_state_from_tss32(vcpu, &tss_segment_32)) |
3838 | goto out; | 3958 | goto out; |
3839 | 3959 | ||
@@ -3887,14 +4007,22 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3887 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); | 4007 | kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT); |
3888 | } | 4008 | } |
3889 | 4009 | ||
3890 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 4010 | /* set back link to prev task only if NT bit is set in eflags |
4011 | note that old_tss_sel is not used afetr this point */ | ||
4012 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
4013 | old_tss_sel = 0xffff; | ||
4014 | |||
4015 | /* set back link to prev task only if NT bit is set in eflags | ||
4016 | note that old_tss_sel is not used afetr this point */ | ||
4017 | if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE) | ||
4018 | old_tss_sel = 0xffff; | ||
3891 | 4019 | ||
3892 | if (nseg_desc.type & 8) | 4020 | if (nseg_desc.type & 8) |
3893 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, | 4021 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_sel, |
3894 | &nseg_desc); | 4022 | old_tss_base, &nseg_desc); |
3895 | else | 4023 | else |
3896 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, | 4024 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_sel, |
3897 | &nseg_desc); | 4025 | old_tss_base, &nseg_desc); |
3898 | 4026 | ||
3899 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 4027 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
3900 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); | 4028 | u32 eflags = kvm_x86_ops->get_rflags(vcpu); |
@@ -3920,7 +4048,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
3920 | struct kvm_sregs *sregs) | 4048 | struct kvm_sregs *sregs) |
3921 | { | 4049 | { |
3922 | int mmu_reset_needed = 0; | 4050 | int mmu_reset_needed = 0; |
3923 | int i, pending_vec, max_bits; | 4051 | int pending_vec, max_bits; |
3924 | struct descriptor_table dt; | 4052 | struct descriptor_table dt; |
3925 | 4053 | ||
3926 | vcpu_load(vcpu); | 4054 | vcpu_load(vcpu); |
@@ -3934,7 +4062,13 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
3934 | 4062 | ||
3935 | vcpu->arch.cr2 = sregs->cr2; | 4063 | vcpu->arch.cr2 = sregs->cr2; |
3936 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; | 4064 | mmu_reset_needed |= vcpu->arch.cr3 != sregs->cr3; |
3937 | vcpu->arch.cr3 = sregs->cr3; | 4065 | |
4066 | down_read(&vcpu->kvm->slots_lock); | ||
4067 | if (gfn_to_memslot(vcpu->kvm, sregs->cr3 >> PAGE_SHIFT)) | ||
4068 | vcpu->arch.cr3 = sregs->cr3; | ||
4069 | else | ||
4070 | set_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests); | ||
4071 | up_read(&vcpu->kvm->slots_lock); | ||
3938 | 4072 | ||
3939 | kvm_set_cr8(vcpu, sregs->cr8); | 4073 | kvm_set_cr8(vcpu, sregs->cr8); |
3940 | 4074 | ||
@@ -3956,25 +4090,14 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
3956 | if (mmu_reset_needed) | 4090 | if (mmu_reset_needed) |
3957 | kvm_mmu_reset_context(vcpu); | 4091 | kvm_mmu_reset_context(vcpu); |
3958 | 4092 | ||
3959 | if (!irqchip_in_kernel(vcpu->kvm)) { | 4093 | max_bits = (sizeof sregs->interrupt_bitmap) << 3; |
3960 | memcpy(vcpu->arch.irq_pending, sregs->interrupt_bitmap, | 4094 | pending_vec = find_first_bit( |
3961 | sizeof vcpu->arch.irq_pending); | 4095 | (const unsigned long *)sregs->interrupt_bitmap, max_bits); |
3962 | vcpu->arch.irq_summary = 0; | 4096 | if (pending_vec < max_bits) { |
3963 | for (i = 0; i < ARRAY_SIZE(vcpu->arch.irq_pending); ++i) | 4097 | kvm_queue_interrupt(vcpu, pending_vec, false); |
3964 | if (vcpu->arch.irq_pending[i]) | 4098 | pr_debug("Set back pending irq %d\n", pending_vec); |
3965 | __set_bit(i, &vcpu->arch.irq_summary); | 4099 | if (irqchip_in_kernel(vcpu->kvm)) |
3966 | } else { | 4100 | kvm_pic_clear_isr_ack(vcpu->kvm); |
3967 | max_bits = (sizeof sregs->interrupt_bitmap) << 3; | ||
3968 | pending_vec = find_first_bit( | ||
3969 | (const unsigned long *)sregs->interrupt_bitmap, | ||
3970 | max_bits); | ||
3971 | /* Only pending external irq is handled here */ | ||
3972 | if (pending_vec < max_bits) { | ||
3973 | kvm_x86_ops->set_irq(vcpu, pending_vec); | ||
3974 | pr_debug("Set back pending irq %d\n", | ||
3975 | pending_vec); | ||
3976 | } | ||
3977 | kvm_pic_clear_isr_ack(vcpu->kvm); | ||
3978 | } | 4101 | } |
3979 | 4102 | ||
3980 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 4103 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
@@ -4308,7 +4431,6 @@ struct kvm *kvm_arch_create_vm(void) | |||
4308 | return ERR_PTR(-ENOMEM); | 4431 | return ERR_PTR(-ENOMEM); |
4309 | 4432 | ||
4310 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); | 4433 | INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); |
4311 | INIT_LIST_HEAD(&kvm->arch.oos_global_pages); | ||
4312 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); | 4434 | INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); |
4313 | 4435 | ||
4314 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ | 4436 | /* Reserve bit 0 of irq_sources_bitmap for userspace irq source */ |
@@ -4411,12 +4533,14 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
4411 | } | 4533 | } |
4412 | } | 4534 | } |
4413 | 4535 | ||
4536 | spin_lock(&kvm->mmu_lock); | ||
4414 | if (!kvm->arch.n_requested_mmu_pages) { | 4537 | if (!kvm->arch.n_requested_mmu_pages) { |
4415 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); | 4538 | unsigned int nr_mmu_pages = kvm_mmu_calculate_mmu_pages(kvm); |
4416 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); | 4539 | kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages); |
4417 | } | 4540 | } |
4418 | 4541 | ||
4419 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); | 4542 | kvm_mmu_slot_remove_write_access(kvm, mem->slot); |
4543 | spin_unlock(&kvm->mmu_lock); | ||
4420 | kvm_flush_remote_tlbs(kvm); | 4544 | kvm_flush_remote_tlbs(kvm); |
4421 | 4545 | ||
4422 | return 0; | 4546 | return 0; |
@@ -4425,6 +4549,7 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
4425 | void kvm_arch_flush_shadow(struct kvm *kvm) | 4549 | void kvm_arch_flush_shadow(struct kvm *kvm) |
4426 | { | 4550 | { |
4427 | kvm_mmu_zap_all(kvm); | 4551 | kvm_mmu_zap_all(kvm); |
4552 | kvm_reload_remote_mmus(kvm); | ||
4428 | } | 4553 | } |
4429 | 4554 | ||
4430 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 4555 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
@@ -4434,28 +4559,24 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | |||
4434 | || vcpu->arch.nmi_pending; | 4559 | || vcpu->arch.nmi_pending; |
4435 | } | 4560 | } |
4436 | 4561 | ||
4437 | static void vcpu_kick_intr(void *info) | ||
4438 | { | ||
4439 | #ifdef DEBUG | ||
4440 | struct kvm_vcpu *vcpu = (struct kvm_vcpu *)info; | ||
4441 | printk(KERN_DEBUG "vcpu_kick_intr %p \n", vcpu); | ||
4442 | #endif | ||
4443 | } | ||
4444 | |||
4445 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) | 4562 | void kvm_vcpu_kick(struct kvm_vcpu *vcpu) |
4446 | { | 4563 | { |
4447 | int ipi_pcpu = vcpu->cpu; | 4564 | int me; |
4448 | int cpu = get_cpu(); | 4565 | int cpu = vcpu->cpu; |
4449 | 4566 | ||
4450 | if (waitqueue_active(&vcpu->wq)) { | 4567 | if (waitqueue_active(&vcpu->wq)) { |
4451 | wake_up_interruptible(&vcpu->wq); | 4568 | wake_up_interruptible(&vcpu->wq); |
4452 | ++vcpu->stat.halt_wakeup; | 4569 | ++vcpu->stat.halt_wakeup; |
4453 | } | 4570 | } |
4454 | /* | 4571 | |
4455 | * We may be called synchronously with irqs disabled in guest mode, | 4572 | me = get_cpu(); |
4456 | * So need not to call smp_call_function_single() in that case. | 4573 | if (cpu != me && (unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) |
4457 | */ | 4574 | if (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests)) |
4458 | if (vcpu->guest_mode && vcpu->cpu != cpu) | 4575 | smp_send_reschedule(cpu); |
4459 | smp_call_function_single(ipi_pcpu, vcpu_kick_intr, vcpu, 0); | ||
4460 | put_cpu(); | 4576 | put_cpu(); |
4461 | } | 4577 | } |
4578 | |||
4579 | int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) | ||
4580 | { | ||
4581 | return kvm_x86_ops->interrupt_allowed(vcpu); | ||
4582 | } | ||
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 6a4be78a7384..4c8e10af78e8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -8,9 +8,11 @@ static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu) | |||
8 | vcpu->arch.exception.pending = false; | 8 | vcpu->arch.exception.pending = false; |
9 | } | 9 | } |
10 | 10 | ||
11 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector) | 11 | static inline void kvm_queue_interrupt(struct kvm_vcpu *vcpu, u8 vector, |
12 | bool soft) | ||
12 | { | 13 | { |
13 | vcpu->arch.interrupt.pending = true; | 14 | vcpu->arch.interrupt.pending = true; |
15 | vcpu->arch.interrupt.soft = soft; | ||
14 | vcpu->arch.interrupt.nr = vector; | 16 | vcpu->arch.interrupt.nr = vector; |
15 | } | 17 | } |
16 | 18 | ||
@@ -19,4 +21,14 @@ static inline void kvm_clear_interrupt_queue(struct kvm_vcpu *vcpu) | |||
19 | vcpu->arch.interrupt.pending = false; | 21 | vcpu->arch.interrupt.pending = false; |
20 | } | 22 | } |
21 | 23 | ||
24 | static inline bool kvm_event_needs_reinjection(struct kvm_vcpu *vcpu) | ||
25 | { | ||
26 | return vcpu->arch.exception.pending || vcpu->arch.interrupt.pending || | ||
27 | vcpu->arch.nmi_injected; | ||
28 | } | ||
29 | |||
30 | static inline bool kvm_exception_is_soft(unsigned int nr) | ||
31 | { | ||
32 | return (nr == BP_VECTOR) || (nr == OF_VECTOR); | ||
33 | } | ||
22 | #endif | 34 | #endif |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index ca91749d2083..c1b6c232e02b 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -59,13 +59,14 @@ | |||
59 | #define SrcImm (5<<4) /* Immediate operand. */ | 59 | #define SrcImm (5<<4) /* Immediate operand. */ |
60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ | 60 | #define SrcImmByte (6<<4) /* 8-bit sign-extended immediate operand. */ |
61 | #define SrcOne (7<<4) /* Implied '1' */ | 61 | #define SrcOne (7<<4) /* Implied '1' */ |
62 | #define SrcMask (7<<4) | 62 | #define SrcImmUByte (8<<4) /* 8-bit unsigned immediate operand. */ |
63 | #define SrcMask (0xf<<4) | ||
63 | /* Generic ModRM decode. */ | 64 | /* Generic ModRM decode. */ |
64 | #define ModRM (1<<7) | 65 | #define ModRM (1<<8) |
65 | /* Destination is only written; never read. */ | 66 | /* Destination is only written; never read. */ |
66 | #define Mov (1<<8) | 67 | #define Mov (1<<9) |
67 | #define BitOp (1<<9) | 68 | #define BitOp (1<<10) |
68 | #define MemAbs (1<<10) /* Memory operand is absolute displacement */ | 69 | #define MemAbs (1<<11) /* Memory operand is absolute displacement */ |
69 | #define String (1<<12) /* String instruction (rep capable) */ | 70 | #define String (1<<12) /* String instruction (rep capable) */ |
70 | #define Stack (1<<13) /* Stack instruction (push/pop) */ | 71 | #define Stack (1<<13) /* Stack instruction (push/pop) */ |
71 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ | 72 | #define Group (1<<14) /* Bits 3:5 of modrm byte extend opcode */ |
@@ -76,6 +77,7 @@ | |||
76 | #define Src2CL (1<<29) | 77 | #define Src2CL (1<<29) |
77 | #define Src2ImmByte (2<<29) | 78 | #define Src2ImmByte (2<<29) |
78 | #define Src2One (3<<29) | 79 | #define Src2One (3<<29) |
80 | #define Src2Imm16 (4<<29) | ||
79 | #define Src2Mask (7<<29) | 81 | #define Src2Mask (7<<29) |
80 | 82 | ||
81 | enum { | 83 | enum { |
@@ -135,11 +137,11 @@ static u32 opcode_table[256] = { | |||
135 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 137 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ |
136 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 138 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ |
137 | /* 0x70 - 0x77 */ | 139 | /* 0x70 - 0x77 */ |
138 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 140 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
139 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 141 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
140 | /* 0x78 - 0x7F */ | 142 | /* 0x78 - 0x7F */ |
141 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 143 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
142 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 144 | SrcImmByte, SrcImmByte, SrcImmByte, SrcImmByte, |
143 | /* 0x80 - 0x87 */ | 145 | /* 0x80 - 0x87 */ |
144 | Group | Group1_80, Group | Group1_81, | 146 | Group | Group1_80, Group | Group1_81, |
145 | Group | Group1_82, Group | Group1_83, | 147 | Group | Group1_82, Group | Group1_83, |
@@ -153,7 +155,8 @@ static u32 opcode_table[256] = { | |||
153 | /* 0x90 - 0x97 */ | 155 | /* 0x90 - 0x97 */ |
154 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | 156 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, |
155 | /* 0x98 - 0x9F */ | 157 | /* 0x98 - 0x9F */ |
156 | 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 158 | 0, 0, SrcImm | Src2Imm16, 0, |
159 | ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | ||
157 | /* 0xA0 - 0xA7 */ | 160 | /* 0xA0 - 0xA7 */ |
158 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 161 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
159 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, | 162 | ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs, |
@@ -178,7 +181,8 @@ static u32 opcode_table[256] = { | |||
178 | 0, ImplicitOps | Stack, 0, 0, | 181 | 0, ImplicitOps | Stack, 0, 0, |
179 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, | 182 | ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov, |
180 | /* 0xC8 - 0xCF */ | 183 | /* 0xC8 - 0xCF */ |
181 | 0, 0, 0, ImplicitOps | Stack, 0, 0, 0, 0, | 184 | 0, 0, 0, ImplicitOps | Stack, |
185 | ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps, | ||
182 | /* 0xD0 - 0xD7 */ | 186 | /* 0xD0 - 0xD7 */ |
183 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 187 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
184 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, | 188 | ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM, |
@@ -187,11 +191,11 @@ static u32 opcode_table[256] = { | |||
187 | 0, 0, 0, 0, 0, 0, 0, 0, | 191 | 0, 0, 0, 0, 0, 0, 0, 0, |
188 | /* 0xE0 - 0xE7 */ | 192 | /* 0xE0 - 0xE7 */ |
189 | 0, 0, 0, 0, | 193 | 0, 0, 0, 0, |
190 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 194 | ByteOp | SrcImmUByte, SrcImmUByte, |
191 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 195 | ByteOp | SrcImmUByte, SrcImmUByte, |
192 | /* 0xE8 - 0xEF */ | 196 | /* 0xE8 - 0xEF */ |
193 | ImplicitOps | Stack, SrcImm | ImplicitOps, | 197 | SrcImm | Stack, SrcImm | ImplicitOps, |
194 | ImplicitOps, SrcImmByte | ImplicitOps, | 198 | SrcImm | Src2Imm16, SrcImmByte | ImplicitOps, |
195 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 199 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
196 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, | 200 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, |
197 | /* 0xF0 - 0xF7 */ | 201 | /* 0xF0 - 0xF7 */ |
@@ -230,10 +234,8 @@ static u32 twobyte_table[256] = { | |||
230 | /* 0x70 - 0x7F */ | 234 | /* 0x70 - 0x7F */ |
231 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 235 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
232 | /* 0x80 - 0x8F */ | 236 | /* 0x80 - 0x8F */ |
233 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 237 | SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, |
234 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | 238 | SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, SrcImm, |
235 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
236 | ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps, | ||
237 | /* 0x90 - 0x9F */ | 239 | /* 0x90 - 0x9F */ |
238 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 240 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
239 | /* 0xA0 - 0xA7 */ | 241 | /* 0xA0 - 0xA7 */ |
@@ -1044,10 +1046,14 @@ done_prefixes: | |||
1044 | } | 1046 | } |
1045 | break; | 1047 | break; |
1046 | case SrcImmByte: | 1048 | case SrcImmByte: |
1049 | case SrcImmUByte: | ||
1047 | c->src.type = OP_IMM; | 1050 | c->src.type = OP_IMM; |
1048 | c->src.ptr = (unsigned long *)c->eip; | 1051 | c->src.ptr = (unsigned long *)c->eip; |
1049 | c->src.bytes = 1; | 1052 | c->src.bytes = 1; |
1050 | c->src.val = insn_fetch(s8, 1, c->eip); | 1053 | if ((c->d & SrcMask) == SrcImmByte) |
1054 | c->src.val = insn_fetch(s8, 1, c->eip); | ||
1055 | else | ||
1056 | c->src.val = insn_fetch(u8, 1, c->eip); | ||
1051 | break; | 1057 | break; |
1052 | case SrcOne: | 1058 | case SrcOne: |
1053 | c->src.bytes = 1; | 1059 | c->src.bytes = 1; |
@@ -1072,6 +1078,12 @@ done_prefixes: | |||
1072 | c->src2.bytes = 1; | 1078 | c->src2.bytes = 1; |
1073 | c->src2.val = insn_fetch(u8, 1, c->eip); | 1079 | c->src2.val = insn_fetch(u8, 1, c->eip); |
1074 | break; | 1080 | break; |
1081 | case Src2Imm16: | ||
1082 | c->src2.type = OP_IMM; | ||
1083 | c->src2.ptr = (unsigned long *)c->eip; | ||
1084 | c->src2.bytes = 2; | ||
1085 | c->src2.val = insn_fetch(u16, 2, c->eip); | ||
1086 | break; | ||
1075 | case Src2One: | 1087 | case Src2One: |
1076 | c->src2.bytes = 1; | 1088 | c->src2.bytes = 1; |
1077 | c->src2.val = 1; | 1089 | c->src2.val = 1; |
@@ -1349,6 +1361,20 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, | |||
1349 | return 0; | 1361 | return 0; |
1350 | } | 1362 | } |
1351 | 1363 | ||
1364 | void toggle_interruptibility(struct x86_emulate_ctxt *ctxt, u32 mask) | ||
1365 | { | ||
1366 | u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(ctxt->vcpu, mask); | ||
1367 | /* | ||
1368 | * an sti; sti; sequence only disable interrupts for the first | ||
1369 | * instruction. So, if the last instruction, be it emulated or | ||
1370 | * not, left the system with the INT_STI flag enabled, it | ||
1371 | * means that the last instruction is an sti. We should not | ||
1372 | * leave the flag on in this case. The same goes for mov ss | ||
1373 | */ | ||
1374 | if (!(int_shadow & mask)) | ||
1375 | ctxt->interruptibility = mask; | ||
1376 | } | ||
1377 | |||
1352 | int | 1378 | int |
1353 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | 1379 | x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) |
1354 | { | 1380 | { |
@@ -1360,6 +1386,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
1360 | int io_dir_in; | 1386 | int io_dir_in; |
1361 | int rc = 0; | 1387 | int rc = 0; |
1362 | 1388 | ||
1389 | ctxt->interruptibility = 0; | ||
1390 | |||
1363 | /* Shadow copy of register state. Committed on successful emulation. | 1391 | /* Shadow copy of register state. Committed on successful emulation. |
1364 | * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't | 1392 | * NOTE: we can copy them from vcpu as x86_decode_insn() doesn't |
1365 | * modify them. | 1393 | * modify them. |
@@ -1531,13 +1559,10 @@ special_insn: | |||
1531 | return -1; | 1559 | return -1; |
1532 | } | 1560 | } |
1533 | return 0; | 1561 | return 0; |
1534 | case 0x70 ... 0x7f: /* jcc (short) */ { | 1562 | case 0x70 ... 0x7f: /* jcc (short) */ |
1535 | int rel = insn_fetch(s8, 1, c->eip); | ||
1536 | |||
1537 | if (test_cc(c->b, ctxt->eflags)) | 1563 | if (test_cc(c->b, ctxt->eflags)) |
1538 | jmp_rel(c, rel); | 1564 | jmp_rel(c, c->src.val); |
1539 | break; | 1565 | break; |
1540 | } | ||
1541 | case 0x80 ... 0x83: /* Grp1 */ | 1566 | case 0x80 ... 0x83: /* Grp1 */ |
1542 | switch (c->modrm_reg) { | 1567 | switch (c->modrm_reg) { |
1543 | case 0: | 1568 | case 0: |
@@ -1609,6 +1634,9 @@ special_insn: | |||
1609 | int err; | 1634 | int err; |
1610 | 1635 | ||
1611 | sel = c->src.val; | 1636 | sel = c->src.val; |
1637 | if (c->modrm_reg == VCPU_SREG_SS) | ||
1638 | toggle_interruptibility(ctxt, X86_SHADOW_INT_MOV_SS); | ||
1639 | |||
1612 | if (c->modrm_reg <= 5) { | 1640 | if (c->modrm_reg <= 5) { |
1613 | type_bits = (c->modrm_reg == 1) ? 9 : 1; | 1641 | type_bits = (c->modrm_reg == 1) ? 9 : 1; |
1614 | err = kvm_load_segment_descriptor(ctxt->vcpu, sel, | 1642 | err = kvm_load_segment_descriptor(ctxt->vcpu, sel, |
@@ -1769,59 +1797,32 @@ special_insn: | |||
1769 | break; | 1797 | break; |
1770 | case 0xe4: /* inb */ | 1798 | case 0xe4: /* inb */ |
1771 | case 0xe5: /* in */ | 1799 | case 0xe5: /* in */ |
1772 | port = insn_fetch(u8, 1, c->eip); | 1800 | port = c->src.val; |
1773 | io_dir_in = 1; | 1801 | io_dir_in = 1; |
1774 | goto do_io; | 1802 | goto do_io; |
1775 | case 0xe6: /* outb */ | 1803 | case 0xe6: /* outb */ |
1776 | case 0xe7: /* out */ | 1804 | case 0xe7: /* out */ |
1777 | port = insn_fetch(u8, 1, c->eip); | 1805 | port = c->src.val; |
1778 | io_dir_in = 0; | 1806 | io_dir_in = 0; |
1779 | goto do_io; | 1807 | goto do_io; |
1780 | case 0xe8: /* call (near) */ { | 1808 | case 0xe8: /* call (near) */ { |
1781 | long int rel; | 1809 | long int rel = c->src.val; |
1782 | switch (c->op_bytes) { | ||
1783 | case 2: | ||
1784 | rel = insn_fetch(s16, 2, c->eip); | ||
1785 | break; | ||
1786 | case 4: | ||
1787 | rel = insn_fetch(s32, 4, c->eip); | ||
1788 | break; | ||
1789 | default: | ||
1790 | DPRINTF("Call: Invalid op_bytes\n"); | ||
1791 | goto cannot_emulate; | ||
1792 | } | ||
1793 | c->src.val = (unsigned long) c->eip; | 1810 | c->src.val = (unsigned long) c->eip; |
1794 | jmp_rel(c, rel); | 1811 | jmp_rel(c, rel); |
1795 | c->op_bytes = c->ad_bytes; | ||
1796 | emulate_push(ctxt); | 1812 | emulate_push(ctxt); |
1797 | break; | 1813 | break; |
1798 | } | 1814 | } |
1799 | case 0xe9: /* jmp rel */ | 1815 | case 0xe9: /* jmp rel */ |
1800 | goto jmp; | 1816 | goto jmp; |
1801 | case 0xea: /* jmp far */ { | 1817 | case 0xea: /* jmp far */ |
1802 | uint32_t eip; | 1818 | if (kvm_load_segment_descriptor(ctxt->vcpu, c->src2.val, 9, |
1803 | uint16_t sel; | 1819 | VCPU_SREG_CS) < 0) { |
1804 | |||
1805 | switch (c->op_bytes) { | ||
1806 | case 2: | ||
1807 | eip = insn_fetch(u16, 2, c->eip); | ||
1808 | break; | ||
1809 | case 4: | ||
1810 | eip = insn_fetch(u32, 4, c->eip); | ||
1811 | break; | ||
1812 | default: | ||
1813 | DPRINTF("jmp far: Invalid op_bytes\n"); | ||
1814 | goto cannot_emulate; | ||
1815 | } | ||
1816 | sel = insn_fetch(u16, 2, c->eip); | ||
1817 | if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) { | ||
1818 | DPRINTF("jmp far: Failed to load CS descriptor\n"); | 1820 | DPRINTF("jmp far: Failed to load CS descriptor\n"); |
1819 | goto cannot_emulate; | 1821 | goto cannot_emulate; |
1820 | } | 1822 | } |
1821 | 1823 | ||
1822 | c->eip = eip; | 1824 | c->eip = c->src.val; |
1823 | break; | 1825 | break; |
1824 | } | ||
1825 | case 0xeb: | 1826 | case 0xeb: |
1826 | jmp: /* jmp rel short */ | 1827 | jmp: /* jmp rel short */ |
1827 | jmp_rel(c, c->src.val); | 1828 | jmp_rel(c, c->src.val); |
@@ -1865,6 +1866,7 @@ special_insn: | |||
1865 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1866 | c->dst.type = OP_NONE; /* Disable writeback. */ |
1866 | break; | 1867 | break; |
1867 | case 0xfb: /* sti */ | 1868 | case 0xfb: /* sti */ |
1869 | toggle_interruptibility(ctxt, X86_SHADOW_INT_STI); | ||
1868 | ctxt->eflags |= X86_EFLAGS_IF; | 1870 | ctxt->eflags |= X86_EFLAGS_IF; |
1869 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1871 | c->dst.type = OP_NONE; /* Disable writeback. */ |
1870 | break; | 1872 | break; |
@@ -2039,28 +2041,11 @@ twobyte_insn: | |||
2039 | if (!test_cc(c->b, ctxt->eflags)) | 2041 | if (!test_cc(c->b, ctxt->eflags)) |
2040 | c->dst.type = OP_NONE; /* no writeback */ | 2042 | c->dst.type = OP_NONE; /* no writeback */ |
2041 | break; | 2043 | break; |
2042 | case 0x80 ... 0x8f: /* jnz rel, etc*/ { | 2044 | case 0x80 ... 0x8f: /* jnz rel, etc*/ |
2043 | long int rel; | ||
2044 | |||
2045 | switch (c->op_bytes) { | ||
2046 | case 2: | ||
2047 | rel = insn_fetch(s16, 2, c->eip); | ||
2048 | break; | ||
2049 | case 4: | ||
2050 | rel = insn_fetch(s32, 4, c->eip); | ||
2051 | break; | ||
2052 | case 8: | ||
2053 | rel = insn_fetch(s64, 8, c->eip); | ||
2054 | break; | ||
2055 | default: | ||
2056 | DPRINTF("jnz: Invalid op_bytes\n"); | ||
2057 | goto cannot_emulate; | ||
2058 | } | ||
2059 | if (test_cc(c->b, ctxt->eflags)) | 2045 | if (test_cc(c->b, ctxt->eflags)) |
2060 | jmp_rel(c, rel); | 2046 | jmp_rel(c, c->src.val); |
2061 | c->dst.type = OP_NONE; | 2047 | c->dst.type = OP_NONE; |
2062 | break; | 2048 | break; |
2063 | } | ||
2064 | case 0xa3: | 2049 | case 0xa3: |
2065 | bt: /* bt */ | 2050 | bt: /* bt */ |
2066 | c->dst.type = OP_NONE; | 2051 | c->dst.type = OP_NONE; |
diff --git a/arch/x86/lguest/Kconfig b/arch/x86/lguest/Kconfig index 8dab8f7844d3..38718041efc3 100644 --- a/arch/x86/lguest/Kconfig +++ b/arch/x86/lguest/Kconfig | |||
@@ -2,7 +2,6 @@ config LGUEST_GUEST | |||
2 | bool "Lguest guest support" | 2 | bool "Lguest guest support" |
3 | select PARAVIRT | 3 | select PARAVIRT |
4 | depends on X86_32 | 4 | depends on X86_32 |
5 | depends on !X86_PAE | ||
6 | select VIRTIO | 5 | select VIRTIO |
7 | select VIRTIO_RING | 6 | select VIRTIO_RING |
8 | select VIRTIO_CONSOLE | 7 | select VIRTIO_CONSOLE |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 33a93b417396..7bc65f0f62c4 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -87,7 +87,7 @@ struct lguest_data lguest_data = { | |||
87 | 87 | ||
88 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a | 88 | /*G:037 async_hcall() is pretty simple: I'm quite proud of it really. We have a |
89 | * ring buffer of stored hypercalls which the Host will run though next time we | 89 | * ring buffer of stored hypercalls which the Host will run though next time we |
90 | * do a normal hypercall. Each entry in the ring has 4 slots for the hypercall | 90 | * do a normal hypercall. Each entry in the ring has 5 slots for the hypercall |
91 | * arguments, and a "hcall_status" word which is 0 if the call is ready to go, | 91 | * arguments, and a "hcall_status" word which is 0 if the call is ready to go, |
92 | * and 255 once the Host has finished with it. | 92 | * and 255 once the Host has finished with it. |
93 | * | 93 | * |
@@ -96,7 +96,8 @@ struct lguest_data lguest_data = { | |||
96 | * effect of causing the Host to run all the stored calls in the ring buffer | 96 | * effect of causing the Host to run all the stored calls in the ring buffer |
97 | * which empties it for next time! */ | 97 | * which empties it for next time! */ |
98 | static void async_hcall(unsigned long call, unsigned long arg1, | 98 | static void async_hcall(unsigned long call, unsigned long arg1, |
99 | unsigned long arg2, unsigned long arg3) | 99 | unsigned long arg2, unsigned long arg3, |
100 | unsigned long arg4) | ||
100 | { | 101 | { |
101 | /* Note: This code assumes we're uniprocessor. */ | 102 | /* Note: This code assumes we're uniprocessor. */ |
102 | static unsigned int next_call; | 103 | static unsigned int next_call; |
@@ -108,12 +109,13 @@ static void async_hcall(unsigned long call, unsigned long arg1, | |||
108 | local_irq_save(flags); | 109 | local_irq_save(flags); |
109 | if (lguest_data.hcall_status[next_call] != 0xFF) { | 110 | if (lguest_data.hcall_status[next_call] != 0xFF) { |
110 | /* Table full, so do normal hcall which will flush table. */ | 111 | /* Table full, so do normal hcall which will flush table. */ |
111 | kvm_hypercall3(call, arg1, arg2, arg3); | 112 | kvm_hypercall4(call, arg1, arg2, arg3, arg4); |
112 | } else { | 113 | } else { |
113 | lguest_data.hcalls[next_call].arg0 = call; | 114 | lguest_data.hcalls[next_call].arg0 = call; |
114 | lguest_data.hcalls[next_call].arg1 = arg1; | 115 | lguest_data.hcalls[next_call].arg1 = arg1; |
115 | lguest_data.hcalls[next_call].arg2 = arg2; | 116 | lguest_data.hcalls[next_call].arg2 = arg2; |
116 | lguest_data.hcalls[next_call].arg3 = arg3; | 117 | lguest_data.hcalls[next_call].arg3 = arg3; |
118 | lguest_data.hcalls[next_call].arg4 = arg4; | ||
117 | /* Arguments must all be written before we mark it to go */ | 119 | /* Arguments must all be written before we mark it to go */ |
118 | wmb(); | 120 | wmb(); |
119 | lguest_data.hcall_status[next_call] = 0; | 121 | lguest_data.hcall_status[next_call] = 0; |
@@ -141,7 +143,7 @@ static void lazy_hcall1(unsigned long call, | |||
141 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 143 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
142 | kvm_hypercall1(call, arg1); | 144 | kvm_hypercall1(call, arg1); |
143 | else | 145 | else |
144 | async_hcall(call, arg1, 0, 0); | 146 | async_hcall(call, arg1, 0, 0, 0); |
145 | } | 147 | } |
146 | 148 | ||
147 | static void lazy_hcall2(unsigned long call, | 149 | static void lazy_hcall2(unsigned long call, |
@@ -151,7 +153,7 @@ static void lazy_hcall2(unsigned long call, | |||
151 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 153 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
152 | kvm_hypercall2(call, arg1, arg2); | 154 | kvm_hypercall2(call, arg1, arg2); |
153 | else | 155 | else |
154 | async_hcall(call, arg1, arg2, 0); | 156 | async_hcall(call, arg1, arg2, 0, 0); |
155 | } | 157 | } |
156 | 158 | ||
157 | static void lazy_hcall3(unsigned long call, | 159 | static void lazy_hcall3(unsigned long call, |
@@ -162,18 +164,38 @@ static void lazy_hcall3(unsigned long call, | |||
162 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | 164 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) |
163 | kvm_hypercall3(call, arg1, arg2, arg3); | 165 | kvm_hypercall3(call, arg1, arg2, arg3); |
164 | else | 166 | else |
165 | async_hcall(call, arg1, arg2, arg3); | 167 | async_hcall(call, arg1, arg2, arg3, 0); |
168 | } | ||
169 | |||
170 | #ifdef CONFIG_X86_PAE | ||
171 | static void lazy_hcall4(unsigned long call, | ||
172 | unsigned long arg1, | ||
173 | unsigned long arg2, | ||
174 | unsigned long arg3, | ||
175 | unsigned long arg4) | ||
176 | { | ||
177 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) | ||
178 | kvm_hypercall4(call, arg1, arg2, arg3, arg4); | ||
179 | else | ||
180 | async_hcall(call, arg1, arg2, arg3, arg4); | ||
166 | } | 181 | } |
182 | #endif | ||
167 | 183 | ||
168 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then | 184 | /* When lazy mode is turned off reset the per-cpu lazy mode variable and then |
169 | * issue the do-nothing hypercall to flush any stored calls. */ | 185 | * issue the do-nothing hypercall to flush any stored calls. */ |
170 | static void lguest_leave_lazy_mode(void) | 186 | static void lguest_leave_lazy_mmu_mode(void) |
187 | { | ||
188 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); | ||
189 | paravirt_leave_lazy_mmu(); | ||
190 | } | ||
191 | |||
192 | static void lguest_end_context_switch(struct task_struct *next) | ||
171 | { | 193 | { |
172 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
173 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); | 194 | kvm_hypercall0(LHCALL_FLUSH_ASYNC); |
195 | paravirt_end_context_switch(next); | ||
174 | } | 196 | } |
175 | 197 | ||
176 | /*G:033 | 198 | /*G:032 |
177 | * After that diversion we return to our first native-instruction | 199 | * After that diversion we return to our first native-instruction |
178 | * replacements: four functions for interrupt control. | 200 | * replacements: four functions for interrupt control. |
179 | * | 201 | * |
@@ -193,30 +215,28 @@ static unsigned long save_fl(void) | |||
193 | { | 215 | { |
194 | return lguest_data.irq_enabled; | 216 | return lguest_data.irq_enabled; |
195 | } | 217 | } |
196 | PV_CALLEE_SAVE_REGS_THUNK(save_fl); | ||
197 | |||
198 | /* restore_flags() just sets the flags back to the value given. */ | ||
199 | static void restore_fl(unsigned long flags) | ||
200 | { | ||
201 | lguest_data.irq_enabled = flags; | ||
202 | } | ||
203 | PV_CALLEE_SAVE_REGS_THUNK(restore_fl); | ||
204 | 218 | ||
205 | /* Interrupts go off... */ | 219 | /* Interrupts go off... */ |
206 | static void irq_disable(void) | 220 | static void irq_disable(void) |
207 | { | 221 | { |
208 | lguest_data.irq_enabled = 0; | 222 | lguest_data.irq_enabled = 0; |
209 | } | 223 | } |
224 | |||
225 | /* Let's pause a moment. Remember how I said these are called so often? | ||
226 | * Jeremy Fitzhardinge optimized them so hard early in 2009 that he had to | ||
227 | * break some rules. In particular, these functions are assumed to save their | ||
228 | * own registers if they need to: normal C functions assume they can trash the | ||
229 | * eax register. To use normal C functions, we use | ||
230 | * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the | ||
231 | * C function, then restores it. */ | ||
232 | PV_CALLEE_SAVE_REGS_THUNK(save_fl); | ||
210 | PV_CALLEE_SAVE_REGS_THUNK(irq_disable); | 233 | PV_CALLEE_SAVE_REGS_THUNK(irq_disable); |
234 | /*:*/ | ||
211 | 235 | ||
212 | /* Interrupts go on... */ | 236 | /* These are in i386_head.S */ |
213 | static void irq_enable(void) | 237 | extern void lg_irq_enable(void); |
214 | { | 238 | extern void lg_restore_fl(unsigned long flags); |
215 | lguest_data.irq_enabled = X86_EFLAGS_IF; | ||
216 | } | ||
217 | PV_CALLEE_SAVE_REGS_THUNK(irq_enable); | ||
218 | 239 | ||
219 | /*:*/ | ||
220 | /*M:003 Note that we don't check for outstanding interrupts when we re-enable | 240 | /*M:003 Note that we don't check for outstanding interrupts when we re-enable |
221 | * them (or when we unmask an interrupt). This seems to work for the moment, | 241 | * them (or when we unmask an interrupt). This seems to work for the moment, |
222 | * since interrupts are rare and we'll just get the interrupt on the next timer | 242 | * since interrupts are rare and we'll just get the interrupt on the next timer |
@@ -362,8 +382,8 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
362 | case 1: /* Basic feature request. */ | 382 | case 1: /* Basic feature request. */ |
363 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ | 383 | /* We only allow kernel to see SSE3, CMPXCHG16B and SSSE3 */ |
364 | *cx &= 0x00002201; | 384 | *cx &= 0x00002201; |
365 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU. */ | 385 | /* SSE, SSE2, FXSR, MMX, CMOV, CMPXCHG8B, TSC, FPU, PAE. */ |
366 | *dx &= 0x07808111; | 386 | *dx &= 0x07808151; |
367 | /* The Host can do a nice optimization if it knows that the | 387 | /* The Host can do a nice optimization if it knows that the |
368 | * kernel mappings (addresses above 0xC0000000 or whatever | 388 | * kernel mappings (addresses above 0xC0000000 or whatever |
369 | * PAGE_OFFSET is set to) haven't changed. But Linux calls | 389 | * PAGE_OFFSET is set to) haven't changed. But Linux calls |
@@ -382,6 +402,11 @@ static void lguest_cpuid(unsigned int *ax, unsigned int *bx, | |||
382 | if (*ax > 0x80000008) | 402 | if (*ax > 0x80000008) |
383 | *ax = 0x80000008; | 403 | *ax = 0x80000008; |
384 | break; | 404 | break; |
405 | case 0x80000001: | ||
406 | /* Here we should fix nx cap depending on host. */ | ||
407 | /* For this version of PAE, we just clear NX bit. */ | ||
408 | *dx &= ~(1 << 20); | ||
409 | break; | ||
385 | } | 410 | } |
386 | } | 411 | } |
387 | 412 | ||
@@ -515,25 +540,52 @@ static void lguest_write_cr4(unsigned long val) | |||
515 | static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, | 540 | static void lguest_pte_update(struct mm_struct *mm, unsigned long addr, |
516 | pte_t *ptep) | 541 | pte_t *ptep) |
517 | { | 542 | { |
543 | #ifdef CONFIG_X86_PAE | ||
544 | lazy_hcall4(LHCALL_SET_PTE, __pa(mm->pgd), addr, | ||
545 | ptep->pte_low, ptep->pte_high); | ||
546 | #else | ||
518 | lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); | 547 | lazy_hcall3(LHCALL_SET_PTE, __pa(mm->pgd), addr, ptep->pte_low); |
548 | #endif | ||
519 | } | 549 | } |
520 | 550 | ||
521 | static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, | 551 | static void lguest_set_pte_at(struct mm_struct *mm, unsigned long addr, |
522 | pte_t *ptep, pte_t pteval) | 552 | pte_t *ptep, pte_t pteval) |
523 | { | 553 | { |
524 | *ptep = pteval; | 554 | native_set_pte(ptep, pteval); |
525 | lguest_pte_update(mm, addr, ptep); | 555 | lguest_pte_update(mm, addr, ptep); |
526 | } | 556 | } |
527 | 557 | ||
528 | /* The Guest calls this to set a top-level entry. Again, we set the entry then | 558 | /* The Guest calls lguest_set_pud to set a top-level entry and lguest_set_pmd |
529 | * tell the Host which top-level page we changed, and the index of the entry we | 559 | * to set a middle-level entry when PAE is activated. |
530 | * changed. */ | 560 | * Again, we set the entry then tell the Host which page we changed, |
561 | * and the index of the entry we changed. */ | ||
562 | #ifdef CONFIG_X86_PAE | ||
563 | static void lguest_set_pud(pud_t *pudp, pud_t pudval) | ||
564 | { | ||
565 | native_set_pud(pudp, pudval); | ||
566 | |||
567 | /* 32 bytes aligned pdpt address and the index. */ | ||
568 | lazy_hcall2(LHCALL_SET_PGD, __pa(pudp) & 0xFFFFFFE0, | ||
569 | (__pa(pudp) & 0x1F) / sizeof(pud_t)); | ||
570 | } | ||
571 | |||
531 | static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | 572 | static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) |
532 | { | 573 | { |
533 | *pmdp = pmdval; | 574 | native_set_pmd(pmdp, pmdval); |
534 | lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK, | 575 | lazy_hcall2(LHCALL_SET_PMD, __pa(pmdp) & PAGE_MASK, |
535 | (__pa(pmdp) & (PAGE_SIZE - 1)) / 4); | 576 | (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); |
577 | } | ||
578 | #else | ||
579 | |||
580 | /* The Guest calls lguest_set_pmd to set a top-level entry when PAE is not | ||
581 | * activated. */ | ||
582 | static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
583 | { | ||
584 | native_set_pmd(pmdp, pmdval); | ||
585 | lazy_hcall2(LHCALL_SET_PGD, __pa(pmdp) & PAGE_MASK, | ||
586 | (__pa(pmdp) & (PAGE_SIZE - 1)) / sizeof(pmd_t)); | ||
536 | } | 587 | } |
588 | #endif | ||
537 | 589 | ||
538 | /* There are a couple of legacy places where the kernel sets a PTE, but we | 590 | /* There are a couple of legacy places where the kernel sets a PTE, but we |
539 | * don't know the top level any more. This is useless for us, since we don't | 591 | * don't know the top level any more. This is useless for us, since we don't |
@@ -546,11 +598,31 @@ static void lguest_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |||
546 | * which brings boot back to 0.25 seconds. */ | 598 | * which brings boot back to 0.25 seconds. */ |
547 | static void lguest_set_pte(pte_t *ptep, pte_t pteval) | 599 | static void lguest_set_pte(pte_t *ptep, pte_t pteval) |
548 | { | 600 | { |
549 | *ptep = pteval; | 601 | native_set_pte(ptep, pteval); |
602 | if (cr3_changed) | ||
603 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); | ||
604 | } | ||
605 | |||
606 | #ifdef CONFIG_X86_PAE | ||
607 | static void lguest_set_pte_atomic(pte_t *ptep, pte_t pte) | ||
608 | { | ||
609 | native_set_pte_atomic(ptep, pte); | ||
550 | if (cr3_changed) | 610 | if (cr3_changed) |
551 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); | 611 | lazy_hcall1(LHCALL_FLUSH_TLB, 1); |
552 | } | 612 | } |
553 | 613 | ||
614 | void lguest_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
615 | { | ||
616 | native_pte_clear(mm, addr, ptep); | ||
617 | lguest_pte_update(mm, addr, ptep); | ||
618 | } | ||
619 | |||
620 | void lguest_pmd_clear(pmd_t *pmdp) | ||
621 | { | ||
622 | lguest_set_pmd(pmdp, __pmd(0)); | ||
623 | } | ||
624 | #endif | ||
625 | |||
554 | /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on | 626 | /* Unfortunately for Lguest, the pv_mmu_ops for page tables were based on |
555 | * native page table operations. On native hardware you can set a new page | 627 | * native page table operations. On native hardware you can set a new page |
556 | * table entry whenever you want, but if you want to remove one you have to do | 628 | * table entry whenever you want, but if you want to remove one you have to do |
@@ -622,13 +694,12 @@ static void __init lguest_init_IRQ(void) | |||
622 | { | 694 | { |
623 | unsigned int i; | 695 | unsigned int i; |
624 | 696 | ||
625 | for (i = 0; i < LGUEST_IRQS; i++) { | 697 | for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { |
626 | int vector = FIRST_EXTERNAL_VECTOR + i; | ||
627 | /* Some systems map "vectors" to interrupts weirdly. Lguest has | 698 | /* Some systems map "vectors" to interrupts weirdly. Lguest has |
628 | * a straightforward 1 to 1 mapping, so force that here. */ | 699 | * a straightforward 1 to 1 mapping, so force that here. */ |
629 | __get_cpu_var(vector_irq)[vector] = i; | 700 | __get_cpu_var(vector_irq)[i] = i - FIRST_EXTERNAL_VECTOR; |
630 | if (vector != SYSCALL_VECTOR) | 701 | if (i != SYSCALL_VECTOR) |
631 | set_intr_gate(vector, interrupt[i]); | 702 | set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]); |
632 | } | 703 | } |
633 | /* This call is required to set up for 4k stacks, where we have | 704 | /* This call is required to set up for 4k stacks, where we have |
634 | * separate stacks for hard and soft interrupts. */ | 705 | * separate stacks for hard and soft interrupts. */ |
@@ -637,7 +708,7 @@ static void __init lguest_init_IRQ(void) | |||
637 | 708 | ||
638 | void lguest_setup_irq(unsigned int irq) | 709 | void lguest_setup_irq(unsigned int irq) |
639 | { | 710 | { |
640 | irq_to_desc_alloc_cpu(irq, 0); | 711 | irq_to_desc_alloc_node(irq, 0); |
641 | set_irq_chip_and_handler_name(irq, &lguest_irq_controller, | 712 | set_irq_chip_and_handler_name(irq, &lguest_irq_controller, |
642 | handle_level_irq, "level"); | 713 | handle_level_irq, "level"); |
643 | } | 714 | } |
@@ -967,10 +1038,10 @@ static void lguest_restart(char *reason) | |||
967 | * | 1038 | * |
968 | * Our current solution is to allow the paravirt back end to optionally patch | 1039 | * Our current solution is to allow the paravirt back end to optionally patch |
969 | * over the indirect calls to replace them with something more efficient. We | 1040 | * over the indirect calls to replace them with something more efficient. We |
970 | * patch the four most commonly called functions: disable interrupts, enable | 1041 | * patch two of the simplest of the most commonly called functions: disable |
971 | * interrupts, restore interrupts and save interrupts. We usually have 6 or 10 | 1042 | * interrupts and save interrupts. We usually have 6 or 10 bytes to patch |
972 | * bytes to patch into: the Guest versions of these operations are small enough | 1043 | * into: the Guest versions of these operations are small enough that we can |
973 | * that we can fit comfortably. | 1044 | * fit comfortably. |
974 | * | 1045 | * |
975 | * First we need assembly templates of each of the patchable Guest operations, | 1046 | * First we need assembly templates of each of the patchable Guest operations, |
976 | * and these are in i386_head.S. */ | 1047 | * and these are in i386_head.S. */ |
@@ -981,8 +1052,6 @@ static const struct lguest_insns | |||
981 | const char *start, *end; | 1052 | const char *start, *end; |
982 | } lguest_insns[] = { | 1053 | } lguest_insns[] = { |
983 | [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, | 1054 | [PARAVIRT_PATCH(pv_irq_ops.irq_disable)] = { lgstart_cli, lgend_cli }, |
984 | [PARAVIRT_PATCH(pv_irq_ops.irq_enable)] = { lgstart_sti, lgend_sti }, | ||
985 | [PARAVIRT_PATCH(pv_irq_ops.restore_fl)] = { lgstart_popf, lgend_popf }, | ||
986 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, | 1055 | [PARAVIRT_PATCH(pv_irq_ops.save_fl)] = { lgstart_pushf, lgend_pushf }, |
987 | }; | 1056 | }; |
988 | 1057 | ||
@@ -1020,6 +1089,7 @@ __init void lguest_init(void) | |||
1020 | pv_info.name = "lguest"; | 1089 | pv_info.name = "lguest"; |
1021 | pv_info.paravirt_enabled = 1; | 1090 | pv_info.paravirt_enabled = 1; |
1022 | pv_info.kernel_rpl = 1; | 1091 | pv_info.kernel_rpl = 1; |
1092 | pv_info.shared_kernel_pmd = 1; | ||
1023 | 1093 | ||
1024 | /* We set up all the lguest overrides for sensitive operations. These | 1094 | /* We set up all the lguest overrides for sensitive operations. These |
1025 | * are detailed with the operations themselves. */ | 1095 | * are detailed with the operations themselves. */ |
@@ -1027,9 +1097,9 @@ __init void lguest_init(void) | |||
1027 | /* interrupt-related operations */ | 1097 | /* interrupt-related operations */ |
1028 | pv_irq_ops.init_IRQ = lguest_init_IRQ; | 1098 | pv_irq_ops.init_IRQ = lguest_init_IRQ; |
1029 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); | 1099 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); |
1030 | pv_irq_ops.restore_fl = PV_CALLEE_SAVE(restore_fl); | 1100 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); |
1031 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); | 1101 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); |
1032 | pv_irq_ops.irq_enable = PV_CALLEE_SAVE(irq_enable); | 1102 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); |
1033 | pv_irq_ops.safe_halt = lguest_safe_halt; | 1103 | pv_irq_ops.safe_halt = lguest_safe_halt; |
1034 | 1104 | ||
1035 | /* init-time operations */ | 1105 | /* init-time operations */ |
@@ -1054,8 +1124,8 @@ __init void lguest_init(void) | |||
1054 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; | 1124 | pv_cpu_ops.write_gdt_entry = lguest_write_gdt_entry; |
1055 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; | 1125 | pv_cpu_ops.write_idt_entry = lguest_write_idt_entry; |
1056 | pv_cpu_ops.wbinvd = lguest_wbinvd; | 1126 | pv_cpu_ops.wbinvd = lguest_wbinvd; |
1057 | pv_cpu_ops.lazy_mode.enter = paravirt_enter_lazy_cpu; | 1127 | pv_cpu_ops.start_context_switch = paravirt_start_context_switch; |
1058 | pv_cpu_ops.lazy_mode.leave = lguest_leave_lazy_mode; | 1128 | pv_cpu_ops.end_context_switch = lguest_end_context_switch; |
1059 | 1129 | ||
1060 | /* pagetable management */ | 1130 | /* pagetable management */ |
1061 | pv_mmu_ops.write_cr3 = lguest_write_cr3; | 1131 | pv_mmu_ops.write_cr3 = lguest_write_cr3; |
@@ -1065,10 +1135,16 @@ __init void lguest_init(void) | |||
1065 | pv_mmu_ops.set_pte = lguest_set_pte; | 1135 | pv_mmu_ops.set_pte = lguest_set_pte; |
1066 | pv_mmu_ops.set_pte_at = lguest_set_pte_at; | 1136 | pv_mmu_ops.set_pte_at = lguest_set_pte_at; |
1067 | pv_mmu_ops.set_pmd = lguest_set_pmd; | 1137 | pv_mmu_ops.set_pmd = lguest_set_pmd; |
1138 | #ifdef CONFIG_X86_PAE | ||
1139 | pv_mmu_ops.set_pte_atomic = lguest_set_pte_atomic; | ||
1140 | pv_mmu_ops.pte_clear = lguest_pte_clear; | ||
1141 | pv_mmu_ops.pmd_clear = lguest_pmd_clear; | ||
1142 | pv_mmu_ops.set_pud = lguest_set_pud; | ||
1143 | #endif | ||
1068 | pv_mmu_ops.read_cr2 = lguest_read_cr2; | 1144 | pv_mmu_ops.read_cr2 = lguest_read_cr2; |
1069 | pv_mmu_ops.read_cr3 = lguest_read_cr3; | 1145 | pv_mmu_ops.read_cr3 = lguest_read_cr3; |
1070 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; | 1146 | pv_mmu_ops.lazy_mode.enter = paravirt_enter_lazy_mmu; |
1071 | pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mode; | 1147 | pv_mmu_ops.lazy_mode.leave = lguest_leave_lazy_mmu_mode; |
1072 | pv_mmu_ops.pte_update = lguest_pte_update; | 1148 | pv_mmu_ops.pte_update = lguest_pte_update; |
1073 | pv_mmu_ops.pte_update_defer = lguest_pte_update; | 1149 | pv_mmu_ops.pte_update_defer = lguest_pte_update; |
1074 | 1150 | ||
diff --git a/arch/x86/lguest/i386_head.S b/arch/x86/lguest/i386_head.S index f79541989471..a9c8cfe61cd4 100644 --- a/arch/x86/lguest/i386_head.S +++ b/arch/x86/lguest/i386_head.S | |||
@@ -46,10 +46,64 @@ ENTRY(lguest_entry) | |||
46 | .globl lgstart_##name; .globl lgend_##name | 46 | .globl lgstart_##name; .globl lgend_##name |
47 | 47 | ||
48 | LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) | 48 | LGUEST_PATCH(cli, movl $0, lguest_data+LGUEST_DATA_irq_enabled) |
49 | LGUEST_PATCH(sti, movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled) | ||
50 | LGUEST_PATCH(popf, movl %eax, lguest_data+LGUEST_DATA_irq_enabled) | ||
51 | LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) | 49 | LGUEST_PATCH(pushf, movl lguest_data+LGUEST_DATA_irq_enabled, %eax) |
52 | /*:*/ | 50 | |
51 | /*G:033 But using those wrappers is inefficient (we'll see why that doesn't | ||
52 | * matter for save_fl and irq_disable later). If we write our routines | ||
53 | * carefully in assembler, we can avoid clobbering any registers and avoid | ||
54 | * jumping through the wrapper functions. | ||
55 | * | ||
56 | * I skipped over our first piece of assembler, but this one is worth studying | ||
57 | * in a bit more detail so I'll describe in easy stages. First, the routine | ||
58 | * to enable interrupts: */ | ||
59 | ENTRY(lg_irq_enable) | ||
60 | /* The reverse of irq_disable, this sets lguest_data.irq_enabled to | ||
61 | * X86_EFLAGS_IF (ie. "Interrupts enabled"). */ | ||
62 | movl $X86_EFLAGS_IF, lguest_data+LGUEST_DATA_irq_enabled | ||
63 | /* But now we need to check if the Host wants to know: there might have | ||
64 | * been interrupts waiting to be delivered, in which case it will have | ||
65 | * set lguest_data.irq_pending to X86_EFLAGS_IF. If it's not zero, we | ||
66 | * jump to send_interrupts, otherwise we're done. */ | ||
67 | testl $0, lguest_data+LGUEST_DATA_irq_pending | ||
68 | jnz send_interrupts | ||
69 | /* One cool thing about x86 is that you can do many things without using | ||
70 | * a register. In this case, the normal path hasn't needed to save or | ||
71 | * restore any registers at all! */ | ||
72 | ret | ||
73 | send_interrupts: | ||
74 | /* OK, now we need a register: eax is used for the hypercall number, | ||
75 | * which is LHCALL_SEND_INTERRUPTS. | ||
76 | * | ||
77 | * We used not to bother with this pending detection at all, which was | ||
78 | * much simpler. Sooner or later the Host would realize it had to | ||
79 | * send us an interrupt. But that turns out to make performance 7 | ||
80 | * times worse on a simple tcp benchmark. So now we do this the hard | ||
81 | * way. */ | ||
82 | pushl %eax | ||
83 | movl $LHCALL_SEND_INTERRUPTS, %eax | ||
84 | /* This is a vmcall instruction (same thing that KVM uses). Older | ||
85 | * assembler versions might not know the "vmcall" instruction, so we | ||
86 | * create one manually here. */ | ||
87 | .byte 0x0f,0x01,0xc1 /* KVM_HYPERCALL */ | ||
88 | popl %eax | ||
89 | ret | ||
90 | |||
91 | /* Finally, the "popf" or "restore flags" routine. The %eax register holds the | ||
92 | * flags (in practice, either X86_EFLAGS_IF or 0): if it's X86_EFLAGS_IF we're | ||
93 | * enabling interrupts again, if it's 0 we're leaving them off. */ | ||
94 | ENTRY(lg_restore_fl) | ||
95 | /* This is just "lguest_data.irq_enabled = flags;" */ | ||
96 | movl %eax, lguest_data+LGUEST_DATA_irq_enabled | ||
97 | /* Now, if the %eax value has enabled interrupts and | ||
98 | * lguest_data.irq_pending is set, we want to tell the Host so it can | ||
99 | * deliver any outstanding interrupts. Fortunately, both values will | ||
100 | * be X86_EFLAGS_IF (ie. 512) in that case, and the "testl" | ||
101 | * instruction will AND them together for us. If both are set, we | ||
102 | * jump to send_interrupts. */ | ||
103 | testl lguest_data+LGUEST_DATA_irq_pending, %eax | ||
104 | jnz send_interrupts | ||
105 | /* Again, the normal path has used no extra registers. Clever, huh? */ | ||
106 | ret | ||
53 | 107 | ||
54 | /* These demark the EIP range where host should never deliver interrupts. */ | 108 | /* These demark the EIP range where host should never deliver interrupts. */ |
55 | .global lguest_noirq_start | 109 | .global lguest_noirq_start |
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 55e11aa6d66c..f9d35632666b 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for x86 specific library files. | 2 | # Makefile for x86 specific library files. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_SMP) := msr-on-cpu.o | 5 | obj-$(CONFIG_SMP) := msr.o |
6 | 6 | ||
7 | lib-y := delay.o | 7 | lib-y := delay.o |
8 | lib-y += thunk_$(BITS).o | 8 | lib-y += thunk_$(BITS).o |
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c deleted file mode 100644 index 321cf720dbb6..000000000000 --- a/arch/x86/lib/msr-on-cpu.c +++ /dev/null | |||
@@ -1,97 +0,0 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/preempt.h> | ||
3 | #include <linux/smp.h> | ||
4 | #include <asm/msr.h> | ||
5 | |||
6 | struct msr_info { | ||
7 | u32 msr_no; | ||
8 | u32 l, h; | ||
9 | int err; | ||
10 | }; | ||
11 | |||
12 | static void __rdmsr_on_cpu(void *info) | ||
13 | { | ||
14 | struct msr_info *rv = info; | ||
15 | |||
16 | rdmsr(rv->msr_no, rv->l, rv->h); | ||
17 | } | ||
18 | |||
19 | static void __wrmsr_on_cpu(void *info) | ||
20 | { | ||
21 | struct msr_info *rv = info; | ||
22 | |||
23 | wrmsr(rv->msr_no, rv->l, rv->h); | ||
24 | } | ||
25 | |||
26 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
27 | { | ||
28 | int err; | ||
29 | struct msr_info rv; | ||
30 | |||
31 | rv.msr_no = msr_no; | ||
32 | err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); | ||
33 | *l = rv.l; | ||
34 | *h = rv.h; | ||
35 | |||
36 | return err; | ||
37 | } | ||
38 | |||
39 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
40 | { | ||
41 | int err; | ||
42 | struct msr_info rv; | ||
43 | |||
44 | rv.msr_no = msr_no; | ||
45 | rv.l = l; | ||
46 | rv.h = h; | ||
47 | err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); | ||
48 | |||
49 | return err; | ||
50 | } | ||
51 | |||
52 | /* These "safe" variants are slower and should be used when the target MSR | ||
53 | may not actually exist. */ | ||
54 | static void __rdmsr_safe_on_cpu(void *info) | ||
55 | { | ||
56 | struct msr_info *rv = info; | ||
57 | |||
58 | rv->err = rdmsr_safe(rv->msr_no, &rv->l, &rv->h); | ||
59 | } | ||
60 | |||
61 | static void __wrmsr_safe_on_cpu(void *info) | ||
62 | { | ||
63 | struct msr_info *rv = info; | ||
64 | |||
65 | rv->err = wrmsr_safe(rv->msr_no, rv->l, rv->h); | ||
66 | } | ||
67 | |||
68 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
69 | { | ||
70 | int err; | ||
71 | struct msr_info rv; | ||
72 | |||
73 | rv.msr_no = msr_no; | ||
74 | err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); | ||
75 | *l = rv.l; | ||
76 | *h = rv.h; | ||
77 | |||
78 | return err ? err : rv.err; | ||
79 | } | ||
80 | |||
81 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
82 | { | ||
83 | int err; | ||
84 | struct msr_info rv; | ||
85 | |||
86 | rv.msr_no = msr_no; | ||
87 | rv.l = l; | ||
88 | rv.h = h; | ||
89 | err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); | ||
90 | |||
91 | return err ? err : rv.err; | ||
92 | } | ||
93 | |||
94 | EXPORT_SYMBOL(rdmsr_on_cpu); | ||
95 | EXPORT_SYMBOL(wrmsr_on_cpu); | ||
96 | EXPORT_SYMBOL(rdmsr_safe_on_cpu); | ||
97 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); | ||
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c new file mode 100644 index 000000000000..1440b9c0547e --- /dev/null +++ b/arch/x86/lib/msr.c | |||
@@ -0,0 +1,183 @@ | |||
1 | #include <linux/module.h> | ||
2 | #include <linux/preempt.h> | ||
3 | #include <linux/smp.h> | ||
4 | #include <asm/msr.h> | ||
5 | |||
6 | struct msr_info { | ||
7 | u32 msr_no; | ||
8 | struct msr reg; | ||
9 | struct msr *msrs; | ||
10 | int off; | ||
11 | int err; | ||
12 | }; | ||
13 | |||
14 | static void __rdmsr_on_cpu(void *info) | ||
15 | { | ||
16 | struct msr_info *rv = info; | ||
17 | struct msr *reg; | ||
18 | int this_cpu = raw_smp_processor_id(); | ||
19 | |||
20 | if (rv->msrs) | ||
21 | reg = &rv->msrs[this_cpu - rv->off]; | ||
22 | else | ||
23 | reg = &rv->reg; | ||
24 | |||
25 | rdmsr(rv->msr_no, reg->l, reg->h); | ||
26 | } | ||
27 | |||
28 | static void __wrmsr_on_cpu(void *info) | ||
29 | { | ||
30 | struct msr_info *rv = info; | ||
31 | struct msr *reg; | ||
32 | int this_cpu = raw_smp_processor_id(); | ||
33 | |||
34 | if (rv->msrs) | ||
35 | reg = &rv->msrs[this_cpu - rv->off]; | ||
36 | else | ||
37 | reg = &rv->reg; | ||
38 | |||
39 | wrmsr(rv->msr_no, reg->l, reg->h); | ||
40 | } | ||
41 | |||
42 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
43 | { | ||
44 | int err; | ||
45 | struct msr_info rv; | ||
46 | |||
47 | memset(&rv, 0, sizeof(rv)); | ||
48 | |||
49 | rv.msr_no = msr_no; | ||
50 | err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); | ||
51 | *l = rv.reg.l; | ||
52 | *h = rv.reg.h; | ||
53 | |||
54 | return err; | ||
55 | } | ||
56 | EXPORT_SYMBOL(rdmsr_on_cpu); | ||
57 | |||
58 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
59 | { | ||
60 | int err; | ||
61 | struct msr_info rv; | ||
62 | |||
63 | memset(&rv, 0, sizeof(rv)); | ||
64 | |||
65 | rv.msr_no = msr_no; | ||
66 | rv.reg.l = l; | ||
67 | rv.reg.h = h; | ||
68 | err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); | ||
69 | |||
70 | return err; | ||
71 | } | ||
72 | EXPORT_SYMBOL(wrmsr_on_cpu); | ||
73 | |||
74 | /* rdmsr on a bunch of CPUs | ||
75 | * | ||
76 | * @mask: which CPUs | ||
77 | * @msr_no: which MSR | ||
78 | * @msrs: array of MSR values | ||
79 | * | ||
80 | */ | ||
81 | void rdmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | ||
82 | { | ||
83 | struct msr_info rv; | ||
84 | int this_cpu; | ||
85 | |||
86 | memset(&rv, 0, sizeof(rv)); | ||
87 | |||
88 | rv.off = cpumask_first(mask); | ||
89 | rv.msrs = msrs; | ||
90 | rv.msr_no = msr_no; | ||
91 | |||
92 | preempt_disable(); | ||
93 | /* | ||
94 | * FIXME: handle the CPU we're executing on separately for now until | ||
95 | * smp_call_function_many has been fixed to not skip it. | ||
96 | */ | ||
97 | this_cpu = raw_smp_processor_id(); | ||
98 | smp_call_function_single(this_cpu, __rdmsr_on_cpu, &rv, 1); | ||
99 | |||
100 | smp_call_function_many(mask, __rdmsr_on_cpu, &rv, 1); | ||
101 | preempt_enable(); | ||
102 | } | ||
103 | EXPORT_SYMBOL(rdmsr_on_cpus); | ||
104 | |||
105 | /* | ||
106 | * wrmsr on a bunch of CPUs | ||
107 | * | ||
108 | * @mask: which CPUs | ||
109 | * @msr_no: which MSR | ||
110 | * @msrs: array of MSR values | ||
111 | * | ||
112 | */ | ||
113 | void wrmsr_on_cpus(const cpumask_t *mask, u32 msr_no, struct msr *msrs) | ||
114 | { | ||
115 | struct msr_info rv; | ||
116 | int this_cpu; | ||
117 | |||
118 | memset(&rv, 0, sizeof(rv)); | ||
119 | |||
120 | rv.off = cpumask_first(mask); | ||
121 | rv.msrs = msrs; | ||
122 | rv.msr_no = msr_no; | ||
123 | |||
124 | preempt_disable(); | ||
125 | /* | ||
126 | * FIXME: handle the CPU we're executing on separately for now until | ||
127 | * smp_call_function_many has been fixed to not skip it. | ||
128 | */ | ||
129 | this_cpu = raw_smp_processor_id(); | ||
130 | smp_call_function_single(this_cpu, __wrmsr_on_cpu, &rv, 1); | ||
131 | |||
132 | smp_call_function_many(mask, __wrmsr_on_cpu, &rv, 1); | ||
133 | preempt_enable(); | ||
134 | } | ||
135 | EXPORT_SYMBOL(wrmsr_on_cpus); | ||
136 | |||
137 | /* These "safe" variants are slower and should be used when the target MSR | ||
138 | may not actually exist. */ | ||
139 | static void __rdmsr_safe_on_cpu(void *info) | ||
140 | { | ||
141 | struct msr_info *rv = info; | ||
142 | |||
143 | rv->err = rdmsr_safe(rv->msr_no, &rv->reg.l, &rv->reg.h); | ||
144 | } | ||
145 | |||
146 | static void __wrmsr_safe_on_cpu(void *info) | ||
147 | { | ||
148 | struct msr_info *rv = info; | ||
149 | |||
150 | rv->err = wrmsr_safe(rv->msr_no, rv->reg.l, rv->reg.h); | ||
151 | } | ||
152 | |||
153 | int rdmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | ||
154 | { | ||
155 | int err; | ||
156 | struct msr_info rv; | ||
157 | |||
158 | memset(&rv, 0, sizeof(rv)); | ||
159 | |||
160 | rv.msr_no = msr_no; | ||
161 | err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); | ||
162 | *l = rv.reg.l; | ||
163 | *h = rv.reg.h; | ||
164 | |||
165 | return err ? err : rv.err; | ||
166 | } | ||
167 | EXPORT_SYMBOL(rdmsr_safe_on_cpu); | ||
168 | |||
169 | int wrmsr_safe_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | ||
170 | { | ||
171 | int err; | ||
172 | struct msr_info rv; | ||
173 | |||
174 | memset(&rv, 0, sizeof(rv)); | ||
175 | |||
176 | rv.msr_no = msr_no; | ||
177 | rv.reg.l = l; | ||
178 | rv.reg.h = h; | ||
179 | err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); | ||
180 | |||
181 | return err ? err : rv.err; | ||
182 | } | ||
183 | EXPORT_SYMBOL(wrmsr_safe_on_cpu); | ||
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index ec13cb5f17ed..b7c2849ffb66 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c | |||
@@ -127,7 +127,7 @@ EXPORT_SYMBOL(__strnlen_user); | |||
127 | 127 | ||
128 | long strnlen_user(const char __user *s, long n) | 128 | long strnlen_user(const char __user *s, long n) |
129 | { | 129 | { |
130 | if (!access_ok(VERIFY_READ, s, n)) | 130 | if (!access_ok(VERIFY_READ, s, 1)) |
131 | return 0; | 131 | return 0; |
132 | return __strnlen_user(s, n); | 132 | return __strnlen_user(s, n); |
133 | } | 133 | } |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fdd30d08ab52..eefdeee8a871 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -10,6 +10,8 @@ obj-$(CONFIG_X86_PTDUMP) += dump_pagetables.o | |||
10 | 10 | ||
11 | obj-$(CONFIG_HIGHMEM) += highmem_32.o | 11 | obj-$(CONFIG_HIGHMEM) += highmem_32.o |
12 | 12 | ||
13 | obj-$(CONFIG_KMEMCHECK) += kmemcheck/ | ||
14 | |||
13 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o | 15 | obj-$(CONFIG_MMIOTRACE) += mmiotrace.o |
14 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o | 16 | mmiotrace-y := kmmio.o pf_in.o mmio-mod.o |
15 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o | 17 | obj-$(CONFIG_MMIOTRACE_TEST) += testmmiotrace.o |
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index e7277cbcfb40..a725b7f760ae 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -161,13 +161,14 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
161 | st->current_address >= st->marker[1].start_address) { | 161 | st->current_address >= st->marker[1].start_address) { |
162 | const char *unit = units; | 162 | const char *unit = units; |
163 | unsigned long delta; | 163 | unsigned long delta; |
164 | int width = sizeof(unsigned long) * 2; | ||
164 | 165 | ||
165 | /* | 166 | /* |
166 | * Now print the actual finished series | 167 | * Now print the actual finished series |
167 | */ | 168 | */ |
168 | seq_printf(m, "0x%p-0x%p ", | 169 | seq_printf(m, "0x%0*lx-0x%0*lx ", |
169 | (void *)st->start_address, | 170 | width, st->start_address, |
170 | (void *)st->current_address); | 171 | width, st->current_address); |
171 | 172 | ||
172 | delta = (st->current_address - st->start_address) >> 10; | 173 | delta = (st->current_address - st->start_address) >> 10; |
173 | while (!(delta & 1023) && unit[1]) { | 174 | while (!(delta & 1023) && unit[1]) { |
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a03b7279efa0..78a5fff857be 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -3,40 +3,18 @@ | |||
3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. | 3 | * Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs. |
4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar | 4 | * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar |
5 | */ | 5 | */ |
6 | #include <linux/interrupt.h> | 6 | #include <linux/magic.h> /* STACK_END_MAGIC */ |
7 | #include <linux/mmiotrace.h> | 7 | #include <linux/sched.h> /* test_thread_flag(), ... */ |
8 | #include <linux/bootmem.h> | 8 | #include <linux/kdebug.h> /* oops_begin/end, ... */ |
9 | #include <linux/compiler.h> | 9 | #include <linux/module.h> /* search_exception_table */ |
10 | #include <linux/highmem.h> | 10 | #include <linux/bootmem.h> /* max_low_pfn */ |
11 | #include <linux/kprobes.h> | 11 | #include <linux/kprobes.h> /* __kprobes, ... */ |
12 | #include <linux/uaccess.h> | 12 | #include <linux/mmiotrace.h> /* kmmio_handler, ... */ |
13 | #include <linux/vmalloc.h> | 13 | #include <linux/perf_counter.h> /* perf_swcounter_event */ |
14 | #include <linux/vt_kern.h> | 14 | |
15 | #include <linux/signal.h> | 15 | #include <asm/traps.h> /* dotraplinkage, ... */ |
16 | #include <linux/kernel.h> | 16 | #include <asm/pgalloc.h> /* pgd_*(), ... */ |
17 | #include <linux/ptrace.h> | 17 | #include <asm/kmemcheck.h> /* kmemcheck_*(), ... */ |
18 | #include <linux/string.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/kdebug.h> | ||
21 | #include <linux/errno.h> | ||
22 | #include <linux/magic.h> | ||
23 | #include <linux/sched.h> | ||
24 | #include <linux/types.h> | ||
25 | #include <linux/init.h> | ||
26 | #include <linux/mman.h> | ||
27 | #include <linux/tty.h> | ||
28 | #include <linux/smp.h> | ||
29 | #include <linux/mm.h> | ||
30 | |||
31 | #include <asm-generic/sections.h> | ||
32 | |||
33 | #include <asm/tlbflush.h> | ||
34 | #include <asm/pgalloc.h> | ||
35 | #include <asm/segment.h> | ||
36 | #include <asm/system.h> | ||
37 | #include <asm/proto.h> | ||
38 | #include <asm/traps.h> | ||
39 | #include <asm/desc.h> | ||
40 | 18 | ||
41 | /* | 19 | /* |
42 | * Page fault error code bits: | 20 | * Page fault error code bits: |
@@ -225,12 +203,10 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) | |||
225 | if (!pmd_present(*pmd_k)) | 203 | if (!pmd_present(*pmd_k)) |
226 | return NULL; | 204 | return NULL; |
227 | 205 | ||
228 | if (!pmd_present(*pmd)) { | 206 | if (!pmd_present(*pmd)) |
229 | set_pmd(pmd, *pmd_k); | 207 | set_pmd(pmd, *pmd_k); |
230 | arch_flush_lazy_mmu_mode(); | 208 | else |
231 | } else { | ||
232 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | 209 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); |
233 | } | ||
234 | 210 | ||
235 | return pmd_k; | 211 | return pmd_k; |
236 | } | 212 | } |
@@ -538,8 +514,6 @@ bad: | |||
538 | static int is_errata93(struct pt_regs *regs, unsigned long address) | 514 | static int is_errata93(struct pt_regs *regs, unsigned long address) |
539 | { | 515 | { |
540 | #ifdef CONFIG_X86_64 | 516 | #ifdef CONFIG_X86_64 |
541 | static int once; | ||
542 | |||
543 | if (address != regs->ip) | 517 | if (address != regs->ip) |
544 | return 0; | 518 | return 0; |
545 | 519 | ||
@@ -549,10 +523,7 @@ static int is_errata93(struct pt_regs *regs, unsigned long address) | |||
549 | address |= 0xffffffffUL << 32; | 523 | address |= 0xffffffffUL << 32; |
550 | if ((address >= (u64)_stext && address <= (u64)_etext) || | 524 | if ((address >= (u64)_stext && address <= (u64)_etext) || |
551 | (address >= MODULES_VADDR && address <= MODULES_END)) { | 525 | (address >= MODULES_VADDR && address <= MODULES_END)) { |
552 | if (!once) { | 526 | printk_once(errata93_warning); |
553 | printk(errata93_warning); | ||
554 | once = 1; | ||
555 | } | ||
556 | regs->ip = address; | 527 | regs->ip = address; |
557 | return 1; | 528 | return 1; |
558 | } | 529 | } |
@@ -981,11 +952,17 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
981 | tsk = current; | 952 | tsk = current; |
982 | mm = tsk->mm; | 953 | mm = tsk->mm; |
983 | 954 | ||
984 | prefetchw(&mm->mmap_sem); | ||
985 | |||
986 | /* Get the faulting address: */ | 955 | /* Get the faulting address: */ |
987 | address = read_cr2(); | 956 | address = read_cr2(); |
988 | 957 | ||
958 | /* | ||
959 | * Detect and handle instructions that would cause a page fault for | ||
960 | * both a tracked kernel page and a userspace page. | ||
961 | */ | ||
962 | if (kmemcheck_active(regs)) | ||
963 | kmemcheck_hide(regs); | ||
964 | prefetchw(&mm->mmap_sem); | ||
965 | |||
989 | if (unlikely(kmmio_fault(regs, address))) | 966 | if (unlikely(kmmio_fault(regs, address))) |
990 | return; | 967 | return; |
991 | 968 | ||
@@ -1003,9 +980,13 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1003 | * protection error (error_code & 9) == 0. | 980 | * protection error (error_code & 9) == 0. |
1004 | */ | 981 | */ |
1005 | if (unlikely(fault_in_kernel_space(address))) { | 982 | if (unlikely(fault_in_kernel_space(address))) { |
1006 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | 983 | if (!(error_code & (PF_RSVD | PF_USER | PF_PROT))) { |
1007 | vmalloc_fault(address) >= 0) | 984 | if (vmalloc_fault(address) >= 0) |
1008 | return; | 985 | return; |
986 | |||
987 | if (kmemcheck_fault(regs, address, error_code)) | ||
988 | return; | ||
989 | } | ||
1009 | 990 | ||
1010 | /* Can handle a stale RO->RW TLB: */ | 991 | /* Can handle a stale RO->RW TLB: */ |
1011 | if (spurious_fault(error_code, address)) | 992 | if (spurious_fault(error_code, address)) |
@@ -1044,6 +1025,8 @@ do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1044 | if (unlikely(error_code & PF_RSVD)) | 1025 | if (unlikely(error_code & PF_RSVD)) |
1045 | pgtable_bad(regs, error_code, address); | 1026 | pgtable_bad(regs, error_code, address); |
1046 | 1027 | ||
1028 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); | ||
1029 | |||
1047 | /* | 1030 | /* |
1048 | * If we're in an interrupt, have no user context or are running | 1031 | * If we're in an interrupt, have no user context or are running |
1049 | * in an atomic region then we must not take the fault: | 1032 | * in an atomic region then we must not take the fault: |
@@ -1130,17 +1113,22 @@ good_area: | |||
1130 | * make sure we exit gracefully rather than endlessly redo | 1113 | * make sure we exit gracefully rather than endlessly redo |
1131 | * the fault: | 1114 | * the fault: |
1132 | */ | 1115 | */ |
1133 | fault = handle_mm_fault(mm, vma, address, write); | 1116 | fault = handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0); |
1134 | 1117 | ||
1135 | if (unlikely(fault & VM_FAULT_ERROR)) { | 1118 | if (unlikely(fault & VM_FAULT_ERROR)) { |
1136 | mm_fault_error(regs, error_code, address, fault); | 1119 | mm_fault_error(regs, error_code, address, fault); |
1137 | return; | 1120 | return; |
1138 | } | 1121 | } |
1139 | 1122 | ||
1140 | if (fault & VM_FAULT_MAJOR) | 1123 | if (fault & VM_FAULT_MAJOR) { |
1141 | tsk->maj_flt++; | 1124 | tsk->maj_flt++; |
1142 | else | 1125 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, |
1126 | regs, address); | ||
1127 | } else { | ||
1143 | tsk->min_flt++; | 1128 | tsk->min_flt++; |
1129 | perf_swcounter_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, | ||
1130 | regs, address); | ||
1131 | } | ||
1144 | 1132 | ||
1145 | check_v8086_mode(regs, address, tsk); | 1133 | check_v8086_mode(regs, address, tsk); |
1146 | 1134 | ||
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 6340cef6798a..71da1bca13cb 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -14,7 +14,7 @@ | |||
14 | static inline pte_t gup_get_pte(pte_t *ptep) | 14 | static inline pte_t gup_get_pte(pte_t *ptep) |
15 | { | 15 | { |
16 | #ifndef CONFIG_X86_PAE | 16 | #ifndef CONFIG_X86_PAE |
17 | return *ptep; | 17 | return ACCESS_ONCE(*ptep); |
18 | #else | 18 | #else |
19 | /* | 19 | /* |
20 | * With get_user_pages_fast, we walk down the pagetables without taking | 20 | * With get_user_pages_fast, we walk down the pagetables without taking |
@@ -219,6 +219,62 @@ static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | |||
219 | return 1; | 219 | return 1; |
220 | } | 220 | } |
221 | 221 | ||
222 | /* | ||
223 | * Like get_user_pages_fast() except its IRQ-safe in that it won't fall | ||
224 | * back to the regular GUP. | ||
225 | */ | ||
226 | int __get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
227 | struct page **pages) | ||
228 | { | ||
229 | struct mm_struct *mm = current->mm; | ||
230 | unsigned long addr, len, end; | ||
231 | unsigned long next; | ||
232 | unsigned long flags; | ||
233 | pgd_t *pgdp; | ||
234 | int nr = 0; | ||
235 | |||
236 | start &= PAGE_MASK; | ||
237 | addr = start; | ||
238 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
239 | end = start + len; | ||
240 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
241 | (void __user *)start, len))) | ||
242 | return 0; | ||
243 | |||
244 | /* | ||
245 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
246 | * needs some instrumenting to determine the common sizes used by | ||
247 | * important workloads (eg. DB2), and whether limiting the batch size | ||
248 | * will decrease performance. | ||
249 | * | ||
250 | * It seems like we're in the clear for the moment. Direct-IO is | ||
251 | * the main guy that batches up lots of get_user_pages, and even | ||
252 | * they are limited to 64-at-a-time which is not so many. | ||
253 | */ | ||
254 | /* | ||
255 | * This doesn't prevent pagetable teardown, but does prevent | ||
256 | * the pagetables and pages from being freed on x86. | ||
257 | * | ||
258 | * So long as we atomically load page table pointers versus teardown | ||
259 | * (which we do on x86, with the above PAE exception), we can follow the | ||
260 | * address down to the the page and take a ref on it. | ||
261 | */ | ||
262 | local_irq_save(flags); | ||
263 | pgdp = pgd_offset(mm, addr); | ||
264 | do { | ||
265 | pgd_t pgd = *pgdp; | ||
266 | |||
267 | next = pgd_addr_end(addr, end); | ||
268 | if (pgd_none(pgd)) | ||
269 | break; | ||
270 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
271 | break; | ||
272 | } while (pgdp++, addr = next, addr != end); | ||
273 | local_irq_restore(flags); | ||
274 | |||
275 | return nr; | ||
276 | } | ||
277 | |||
222 | /** | 278 | /** |
223 | * get_user_pages_fast() - pin user pages in memory | 279 | * get_user_pages_fast() - pin user pages in memory |
224 | * @start: starting user address | 280 | * @start: starting user address |
@@ -247,11 +303,16 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, | |||
247 | start &= PAGE_MASK; | 303 | start &= PAGE_MASK; |
248 | addr = start; | 304 | addr = start; |
249 | len = (unsigned long) nr_pages << PAGE_SHIFT; | 305 | len = (unsigned long) nr_pages << PAGE_SHIFT; |
306 | |||
250 | end = start + len; | 307 | end = start + len; |
251 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | 308 | if (end < start) |
252 | (void __user *)start, len))) | ||
253 | goto slow_irqon; | 309 | goto slow_irqon; |
254 | 310 | ||
311 | #ifdef CONFIG_X86_64 | ||
312 | if (end >> __VIRTUAL_MASK_SHIFT) | ||
313 | goto slow_irqon; | ||
314 | #endif | ||
315 | |||
255 | /* | 316 | /* |
256 | * XXX: batch / limit 'nr', to avoid large irq off latency | 317 | * XXX: batch / limit 'nr', to avoid large irq off latency |
257 | * needs some instrumenting to determine the common sizes used by | 318 | * needs some instrumenting to determine the common sizes used by |
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c index 8126e8d1a2a4..58f621e81919 100644 --- a/arch/x86/mm/highmem_32.c +++ b/arch/x86/mm/highmem_32.c | |||
@@ -44,7 +44,6 @@ void *kmap_atomic_prot(struct page *page, enum km_type type, pgprot_t prot) | |||
44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); | 44 | vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); |
45 | BUG_ON(!pte_none(*(kmap_pte-idx))); | 45 | BUG_ON(!pte_none(*(kmap_pte-idx))); |
46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); | 46 | set_pte(kmap_pte-idx, mk_pte(page, prot)); |
47 | arch_flush_lazy_mmu_mode(); | ||
48 | 47 | ||
49 | return (void *)vaddr; | 48 | return (void *)vaddr; |
50 | } | 49 | } |
@@ -74,7 +73,6 @@ void kunmap_atomic(void *kvaddr, enum km_type type) | |||
74 | #endif | 73 | #endif |
75 | } | 74 | } |
76 | 75 | ||
77 | arch_flush_lazy_mmu_mode(); | ||
78 | pagefault_enable(); | 76 | pagefault_enable(); |
79 | } | 77 | } |
80 | 78 | ||
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index ae4f7b5d7104..f53b57e4086f 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/initrd.h> | ||
1 | #include <linux/ioport.h> | 2 | #include <linux/ioport.h> |
2 | #include <linux/swap.h> | 3 | #include <linux/swap.h> |
3 | 4 | ||
@@ -10,6 +11,9 @@ | |||
10 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
11 | #include <asm/system.h> | 12 | #include <asm/system.h> |
12 | #include <asm/tlbflush.h> | 13 | #include <asm/tlbflush.h> |
14 | #include <asm/tlb.h> | ||
15 | |||
16 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
13 | 17 | ||
14 | unsigned long __initdata e820_table_start; | 18 | unsigned long __initdata e820_table_start; |
15 | unsigned long __meminitdata e820_table_end; | 19 | unsigned long __meminitdata e820_table_end; |
@@ -23,6 +27,69 @@ int direct_gbpages | |||
23 | #endif | 27 | #endif |
24 | ; | 28 | ; |
25 | 29 | ||
30 | int nx_enabled; | ||
31 | |||
32 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) | ||
33 | static int disable_nx __cpuinitdata; | ||
34 | |||
35 | /* | ||
36 | * noexec = on|off | ||
37 | * | ||
38 | * Control non-executable mappings for processes. | ||
39 | * | ||
40 | * on Enable | ||
41 | * off Disable | ||
42 | */ | ||
43 | static int __init noexec_setup(char *str) | ||
44 | { | ||
45 | if (!str) | ||
46 | return -EINVAL; | ||
47 | if (!strncmp(str, "on", 2)) { | ||
48 | __supported_pte_mask |= _PAGE_NX; | ||
49 | disable_nx = 0; | ||
50 | } else if (!strncmp(str, "off", 3)) { | ||
51 | disable_nx = 1; | ||
52 | __supported_pte_mask &= ~_PAGE_NX; | ||
53 | } | ||
54 | return 0; | ||
55 | } | ||
56 | early_param("noexec", noexec_setup); | ||
57 | #endif | ||
58 | |||
59 | #ifdef CONFIG_X86_PAE | ||
60 | static void __init set_nx(void) | ||
61 | { | ||
62 | unsigned int v[4], l, h; | ||
63 | |||
64 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
65 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
66 | |||
67 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
68 | rdmsr(MSR_EFER, l, h); | ||
69 | l |= EFER_NX; | ||
70 | wrmsr(MSR_EFER, l, h); | ||
71 | nx_enabled = 1; | ||
72 | __supported_pte_mask |= _PAGE_NX; | ||
73 | } | ||
74 | } | ||
75 | } | ||
76 | #else | ||
77 | static inline void set_nx(void) | ||
78 | { | ||
79 | } | ||
80 | #endif | ||
81 | |||
82 | #ifdef CONFIG_X86_64 | ||
83 | void __cpuinit check_efer(void) | ||
84 | { | ||
85 | unsigned long efer; | ||
86 | |||
87 | rdmsrl(MSR_EFER, efer); | ||
88 | if (!(efer & EFER_NX) || disable_nx) | ||
89 | __supported_pte_mask &= ~_PAGE_NX; | ||
90 | } | ||
91 | #endif | ||
92 | |||
26 | static void __init find_early_table_space(unsigned long end, int use_pse, | 93 | static void __init find_early_table_space(unsigned long end, int use_pse, |
27 | int use_gbpages) | 94 | int use_gbpages) |
28 | { | 95 | { |
@@ -66,12 +133,11 @@ static void __init find_early_table_space(unsigned long end, int use_pse, | |||
66 | */ | 133 | */ |
67 | #ifdef CONFIG_X86_32 | 134 | #ifdef CONFIG_X86_32 |
68 | start = 0x7000; | 135 | start = 0x7000; |
69 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | 136 | #else |
70 | tables, PAGE_SIZE); | ||
71 | #else /* CONFIG_X86_64 */ | ||
72 | start = 0x8000; | 137 | start = 0x8000; |
73 | e820_table_start = find_e820_area(start, end, tables, PAGE_SIZE); | ||
74 | #endif | 138 | #endif |
139 | e820_table_start = find_e820_area(start, max_pfn_mapped<<PAGE_SHIFT, | ||
140 | tables, PAGE_SIZE); | ||
75 | if (e820_table_start == -1UL) | 141 | if (e820_table_start == -1UL) |
76 | panic("Cannot find space for the kernel page tables"); | 142 | panic("Cannot find space for the kernel page tables"); |
77 | 143 | ||
@@ -147,7 +213,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
147 | if (!after_bootmem) | 213 | if (!after_bootmem) |
148 | init_gbpages(); | 214 | init_gbpages(); |
149 | 215 | ||
150 | #ifdef CONFIG_DEBUG_PAGEALLOC | 216 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
151 | /* | 217 | /* |
152 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. | 218 | * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages. |
153 | * This will simplify cpa(), which otherwise needs to support splitting | 219 | * This will simplify cpa(), which otherwise needs to support splitting |
@@ -159,12 +225,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
159 | use_gbpages = direct_gbpages; | 225 | use_gbpages = direct_gbpages; |
160 | #endif | 226 | #endif |
161 | 227 | ||
162 | #ifdef CONFIG_X86_32 | ||
163 | #ifdef CONFIG_X86_PAE | ||
164 | set_nx(); | 228 | set_nx(); |
165 | if (nx_enabled) | 229 | if (nx_enabled) |
166 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); | 230 | printk(KERN_INFO "NX (Execute Disable) protection: active\n"); |
167 | #endif | ||
168 | 231 | ||
169 | /* Enable PSE if available */ | 232 | /* Enable PSE if available */ |
170 | if (cpu_has_pse) | 233 | if (cpu_has_pse) |
@@ -175,7 +238,6 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
175 | set_in_cr4(X86_CR4_PGE); | 238 | set_in_cr4(X86_CR4_PGE); |
176 | __supported_pte_mask |= _PAGE_GLOBAL; | 239 | __supported_pte_mask |= _PAGE_GLOBAL; |
177 | } | 240 | } |
178 | #endif | ||
179 | 241 | ||
180 | if (use_gbpages) | 242 | if (use_gbpages) |
181 | page_size_mask |= 1 << PG_LEVEL_1G; | 243 | page_size_mask |= 1 << PG_LEVEL_1G; |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 749559ed80f5..3cd7711bb949 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -49,12 +49,9 @@ | |||
49 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/setup.h> | 50 | #include <asm/setup.h> |
51 | #include <asm/cacheflush.h> | 51 | #include <asm/cacheflush.h> |
52 | #include <asm/page_types.h> | ||
52 | #include <asm/init.h> | 53 | #include <asm/init.h> |
53 | 54 | ||
54 | unsigned long max_low_pfn_mapped; | ||
55 | unsigned long max_pfn_mapped; | ||
56 | |||
57 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
58 | unsigned long highstart_pfn, highend_pfn; | 55 | unsigned long highstart_pfn, highend_pfn; |
59 | 56 | ||
60 | static noinline int do_test_wp_bit(void); | 57 | static noinline int do_test_wp_bit(void); |
@@ -114,7 +111,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) | |||
114 | pte_t *page_table = NULL; | 111 | pte_t *page_table = NULL; |
115 | 112 | ||
116 | if (after_bootmem) { | 113 | if (after_bootmem) { |
117 | #ifdef CONFIG_DEBUG_PAGEALLOC | 114 | #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK) |
118 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); | 115 | page_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); |
119 | #endif | 116 | #endif |
120 | if (!page_table) | 117 | if (!page_table) |
@@ -567,7 +564,7 @@ static inline void save_pg_dir(void) | |||
567 | } | 564 | } |
568 | #endif /* !CONFIG_ACPI_SLEEP */ | 565 | #endif /* !CONFIG_ACPI_SLEEP */ |
569 | 566 | ||
570 | void zap_low_mappings(void) | 567 | void zap_low_mappings(bool early) |
571 | { | 568 | { |
572 | int i; | 569 | int i; |
573 | 570 | ||
@@ -584,64 +581,16 @@ void zap_low_mappings(void) | |||
584 | set_pgd(swapper_pg_dir+i, __pgd(0)); | 581 | set_pgd(swapper_pg_dir+i, __pgd(0)); |
585 | #endif | 582 | #endif |
586 | } | 583 | } |
587 | flush_tlb_all(); | ||
588 | } | ||
589 | 584 | ||
590 | int nx_enabled; | 585 | if (early) |
586 | __flush_tlb(); | ||
587 | else | ||
588 | flush_tlb_all(); | ||
589 | } | ||
591 | 590 | ||
592 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); | 591 | pteval_t __supported_pte_mask __read_mostly = ~(_PAGE_NX | _PAGE_GLOBAL | _PAGE_IOMAP); |
593 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 592 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
594 | 593 | ||
595 | #ifdef CONFIG_X86_PAE | ||
596 | |||
597 | static int disable_nx __initdata; | ||
598 | |||
599 | /* | ||
600 | * noexec = on|off | ||
601 | * | ||
602 | * Control non executable mappings. | ||
603 | * | ||
604 | * on Enable | ||
605 | * off Disable | ||
606 | */ | ||
607 | static int __init noexec_setup(char *str) | ||
608 | { | ||
609 | if (!str || !strcmp(str, "on")) { | ||
610 | if (cpu_has_nx) { | ||
611 | __supported_pte_mask |= _PAGE_NX; | ||
612 | disable_nx = 0; | ||
613 | } | ||
614 | } else { | ||
615 | if (!strcmp(str, "off")) { | ||
616 | disable_nx = 1; | ||
617 | __supported_pte_mask &= ~_PAGE_NX; | ||
618 | } else { | ||
619 | return -EINVAL; | ||
620 | } | ||
621 | } | ||
622 | |||
623 | return 0; | ||
624 | } | ||
625 | early_param("noexec", noexec_setup); | ||
626 | |||
627 | void __init set_nx(void) | ||
628 | { | ||
629 | unsigned int v[4], l, h; | ||
630 | |||
631 | if (cpu_has_pae && (cpuid_eax(0x80000000) > 0x80000001)) { | ||
632 | cpuid(0x80000001, &v[0], &v[1], &v[2], &v[3]); | ||
633 | |||
634 | if ((v[3] & (1 << 20)) && !disable_nx) { | ||
635 | rdmsr(MSR_EFER, l, h); | ||
636 | l |= EFER_NX; | ||
637 | wrmsr(MSR_EFER, l, h); | ||
638 | nx_enabled = 1; | ||
639 | __supported_pte_mask |= _PAGE_NX; | ||
640 | } | ||
641 | } | ||
642 | } | ||
643 | #endif | ||
644 | |||
645 | /* user-defined highmem size */ | 594 | /* user-defined highmem size */ |
646 | static unsigned int highmem_pages = -1; | 595 | static unsigned int highmem_pages = -1; |
647 | 596 | ||
@@ -761,15 +710,15 @@ void __init initmem_init(unsigned long start_pfn, | |||
761 | highstart_pfn = highend_pfn = max_pfn; | 710 | highstart_pfn = highend_pfn = max_pfn; |
762 | if (max_pfn > max_low_pfn) | 711 | if (max_pfn > max_low_pfn) |
763 | highstart_pfn = max_low_pfn; | 712 | highstart_pfn = max_low_pfn; |
764 | memory_present(0, 0, highend_pfn); | ||
765 | e820_register_active_regions(0, 0, highend_pfn); | 713 | e820_register_active_regions(0, 0, highend_pfn); |
714 | sparse_memory_present_with_active_regions(0); | ||
766 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 715 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
767 | pages_to_mb(highend_pfn - highstart_pfn)); | 716 | pages_to_mb(highend_pfn - highstart_pfn)); |
768 | num_physpages = highend_pfn; | 717 | num_physpages = highend_pfn; |
769 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | 718 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; |
770 | #else | 719 | #else |
771 | memory_present(0, 0, max_low_pfn); | ||
772 | e820_register_active_regions(0, 0, max_low_pfn); | 720 | e820_register_active_regions(0, 0, max_low_pfn); |
721 | sparse_memory_present_with_active_regions(0); | ||
773 | num_physpages = max_low_pfn; | 722 | num_physpages = max_low_pfn; |
774 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 723 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
775 | #endif | 724 | #endif |
@@ -1011,7 +960,7 @@ void __init mem_init(void) | |||
1011 | test_wp_bit(); | 960 | test_wp_bit(); |
1012 | 961 | ||
1013 | save_pg_dir(); | 962 | save_pg_dir(); |
1014 | zap_low_mappings(); | 963 | zap_low_mappings(true); |
1015 | } | 964 | } |
1016 | 965 | ||
1017 | #ifdef CONFIG_MEMORY_HOTPLUG | 966 | #ifdef CONFIG_MEMORY_HOTPLUG |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1753e8020df6..c4378f4fd4a5 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -50,18 +50,8 @@ | |||
50 | #include <asm/cacheflush.h> | 50 | #include <asm/cacheflush.h> |
51 | #include <asm/init.h> | 51 | #include <asm/init.h> |
52 | 52 | ||
53 | /* | ||
54 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | ||
55 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | ||
56 | * apertures, ACPI and other tables without having to play with fixmaps. | ||
57 | */ | ||
58 | unsigned long max_low_pfn_mapped; | ||
59 | unsigned long max_pfn_mapped; | ||
60 | |||
61 | static unsigned long dma_reserve __initdata; | 53 | static unsigned long dma_reserve __initdata; |
62 | 54 | ||
63 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | ||
64 | |||
65 | static int __init parse_direct_gbpages_off(char *arg) | 55 | static int __init parse_direct_gbpages_off(char *arg) |
66 | { | 56 | { |
67 | direct_gbpages = 0; | 57 | direct_gbpages = 0; |
@@ -85,39 +75,6 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
85 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; | 75 | pteval_t __supported_pte_mask __read_mostly = ~_PAGE_IOMAP; |
86 | EXPORT_SYMBOL_GPL(__supported_pte_mask); | 76 | EXPORT_SYMBOL_GPL(__supported_pte_mask); |
87 | 77 | ||
88 | static int disable_nx __cpuinitdata; | ||
89 | |||
90 | /* | ||
91 | * noexec=on|off | ||
92 | * Control non-executable mappings for 64-bit processes. | ||
93 | * | ||
94 | * on Enable (default) | ||
95 | * off Disable | ||
96 | */ | ||
97 | static int __init nonx_setup(char *str) | ||
98 | { | ||
99 | if (!str) | ||
100 | return -EINVAL; | ||
101 | if (!strncmp(str, "on", 2)) { | ||
102 | __supported_pte_mask |= _PAGE_NX; | ||
103 | disable_nx = 0; | ||
104 | } else if (!strncmp(str, "off", 3)) { | ||
105 | disable_nx = 1; | ||
106 | __supported_pte_mask &= ~_PAGE_NX; | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | early_param("noexec", nonx_setup); | ||
111 | |||
112 | void __cpuinit check_efer(void) | ||
113 | { | ||
114 | unsigned long efer; | ||
115 | |||
116 | rdmsrl(MSR_EFER, efer); | ||
117 | if (!(efer & EFER_NX) || disable_nx) | ||
118 | __supported_pte_mask &= ~_PAGE_NX; | ||
119 | } | ||
120 | |||
121 | int force_personality32; | 78 | int force_personality32; |
122 | 79 | ||
123 | /* | 80 | /* |
@@ -147,7 +104,7 @@ static __ref void *spp_getpage(void) | |||
147 | void *ptr; | 104 | void *ptr; |
148 | 105 | ||
149 | if (after_bootmem) | 106 | if (after_bootmem) |
150 | ptr = (void *) get_zeroed_page(GFP_ATOMIC); | 107 | ptr = (void *) get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); |
151 | else | 108 | else |
152 | ptr = alloc_bootmem_pages(PAGE_SIZE); | 109 | ptr = alloc_bootmem_pages(PAGE_SIZE); |
153 | 110 | ||
@@ -324,7 +281,7 @@ static __ref void *alloc_low_page(unsigned long *phys) | |||
324 | void *adr; | 281 | void *adr; |
325 | 282 | ||
326 | if (after_bootmem) { | 283 | if (after_bootmem) { |
327 | adr = (void *)get_zeroed_page(GFP_ATOMIC); | 284 | adr = (void *)get_zeroed_page(GFP_ATOMIC | __GFP_NOTRACK); |
328 | *phys = __pa(adr); | 285 | *phys = __pa(adr); |
329 | 286 | ||
330 | return adr; | 287 | return adr; |
@@ -570,7 +527,7 @@ phys_pud_update(pgd_t *pgd, unsigned long addr, unsigned long end, | |||
570 | return phys_pud_init(pud, addr, end, page_size_mask); | 527 | return phys_pud_init(pud, addr, end, page_size_mask); |
571 | } | 528 | } |
572 | 529 | ||
573 | unsigned long __init | 530 | unsigned long __meminit |
574 | kernel_physical_mapping_init(unsigned long start, | 531 | kernel_physical_mapping_init(unsigned long start, |
575 | unsigned long end, | 532 | unsigned long end, |
576 | unsigned long page_size_mask) | 533 | unsigned long page_size_mask) |
@@ -628,6 +585,7 @@ void __init initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
628 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); | 585 | early_res_to_bootmem(0, end_pfn<<PAGE_SHIFT); |
629 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); | 586 | reserve_bootmem(bootmap, bootmap_size, BOOTMEM_DEFAULT); |
630 | } | 587 | } |
588 | #endif | ||
631 | 589 | ||
632 | void __init paging_init(void) | 590 | void __init paging_init(void) |
633 | { | 591 | { |
@@ -638,11 +596,10 @@ void __init paging_init(void) | |||
638 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | 596 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; |
639 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | 597 | max_zone_pfns[ZONE_NORMAL] = max_pfn; |
640 | 598 | ||
641 | memory_present(0, 0, max_pfn); | 599 | sparse_memory_present_with_active_regions(MAX_NUMNODES); |
642 | sparse_init(); | 600 | sparse_init(); |
643 | free_area_init_nodes(max_zone_pfns); | 601 | free_area_init_nodes(max_zone_pfns); |
644 | } | 602 | } |
645 | #endif | ||
646 | 603 | ||
647 | /* | 604 | /* |
648 | * Memory hotplug specific functions | 605 | * Memory hotplug specific functions |
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index 8056545e2d39..fe6f84ca121e 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c | |||
@@ -82,7 +82,6 @@ iounmap_atomic(void *kvaddr, enum km_type type) | |||
82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) | 82 | if (vaddr == __fix_to_virt(FIX_KMAP_BEGIN+idx)) |
83 | kpte_clear_flush(kmap_pte-idx, vaddr); | 83 | kpte_clear_flush(kmap_pte-idx, vaddr); |
84 | 84 | ||
85 | arch_flush_lazy_mmu_mode(); | ||
86 | pagefault_enable(); | 85 | pagefault_enable(); |
87 | } | 86 | } |
88 | EXPORT_SYMBOL_GPL(iounmap_atomic); | 87 | EXPORT_SYMBOL_GPL(iounmap_atomic); |
diff --git a/arch/x86/mm/kmemcheck/Makefile b/arch/x86/mm/kmemcheck/Makefile new file mode 100644 index 000000000000..520b3bce4095 --- /dev/null +++ b/arch/x86/mm/kmemcheck/Makefile | |||
@@ -0,0 +1 @@ | |||
obj-y := error.o kmemcheck.o opcode.o pte.o selftest.o shadow.o | |||
diff --git a/arch/x86/mm/kmemcheck/error.c b/arch/x86/mm/kmemcheck/error.c new file mode 100644 index 000000000000..4901d0dafda6 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.c | |||
@@ -0,0 +1,228 @@ | |||
1 | #include <linux/interrupt.h> | ||
2 | #include <linux/kdebug.h> | ||
3 | #include <linux/kmemcheck.h> | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/ptrace.h> | ||
7 | #include <linux/stacktrace.h> | ||
8 | #include <linux/string.h> | ||
9 | |||
10 | #include "error.h" | ||
11 | #include "shadow.h" | ||
12 | |||
13 | enum kmemcheck_error_type { | ||
14 | KMEMCHECK_ERROR_INVALID_ACCESS, | ||
15 | KMEMCHECK_ERROR_BUG, | ||
16 | }; | ||
17 | |||
18 | #define SHADOW_COPY_SIZE (1 << CONFIG_KMEMCHECK_SHADOW_COPY_SHIFT) | ||
19 | |||
20 | struct kmemcheck_error { | ||
21 | enum kmemcheck_error_type type; | ||
22 | |||
23 | union { | ||
24 | /* KMEMCHECK_ERROR_INVALID_ACCESS */ | ||
25 | struct { | ||
26 | /* Kind of access that caused the error */ | ||
27 | enum kmemcheck_shadow state; | ||
28 | /* Address and size of the erroneous read */ | ||
29 | unsigned long address; | ||
30 | unsigned int size; | ||
31 | }; | ||
32 | }; | ||
33 | |||
34 | struct pt_regs regs; | ||
35 | struct stack_trace trace; | ||
36 | unsigned long trace_entries[32]; | ||
37 | |||
38 | /* We compress it to a char. */ | ||
39 | unsigned char shadow_copy[SHADOW_COPY_SIZE]; | ||
40 | unsigned char memory_copy[SHADOW_COPY_SIZE]; | ||
41 | }; | ||
42 | |||
43 | /* | ||
44 | * Create a ring queue of errors to output. We can't call printk() directly | ||
45 | * from the kmemcheck traps, since this may call the console drivers and | ||
46 | * result in a recursive fault. | ||
47 | */ | ||
48 | static struct kmemcheck_error error_fifo[CONFIG_KMEMCHECK_QUEUE_SIZE]; | ||
49 | static unsigned int error_count; | ||
50 | static unsigned int error_rd; | ||
51 | static unsigned int error_wr; | ||
52 | static unsigned int error_missed_count; | ||
53 | |||
54 | static struct kmemcheck_error *error_next_wr(void) | ||
55 | { | ||
56 | struct kmemcheck_error *e; | ||
57 | |||
58 | if (error_count == ARRAY_SIZE(error_fifo)) { | ||
59 | ++error_missed_count; | ||
60 | return NULL; | ||
61 | } | ||
62 | |||
63 | e = &error_fifo[error_wr]; | ||
64 | if (++error_wr == ARRAY_SIZE(error_fifo)) | ||
65 | error_wr = 0; | ||
66 | ++error_count; | ||
67 | return e; | ||
68 | } | ||
69 | |||
70 | static struct kmemcheck_error *error_next_rd(void) | ||
71 | { | ||
72 | struct kmemcheck_error *e; | ||
73 | |||
74 | if (error_count == 0) | ||
75 | return NULL; | ||
76 | |||
77 | e = &error_fifo[error_rd]; | ||
78 | if (++error_rd == ARRAY_SIZE(error_fifo)) | ||
79 | error_rd = 0; | ||
80 | --error_count; | ||
81 | return e; | ||
82 | } | ||
83 | |||
84 | void kmemcheck_error_recall(void) | ||
85 | { | ||
86 | static const char *desc[] = { | ||
87 | [KMEMCHECK_SHADOW_UNALLOCATED] = "unallocated", | ||
88 | [KMEMCHECK_SHADOW_UNINITIALIZED] = "uninitialized", | ||
89 | [KMEMCHECK_SHADOW_INITIALIZED] = "initialized", | ||
90 | [KMEMCHECK_SHADOW_FREED] = "freed", | ||
91 | }; | ||
92 | |||
93 | static const char short_desc[] = { | ||
94 | [KMEMCHECK_SHADOW_UNALLOCATED] = 'a', | ||
95 | [KMEMCHECK_SHADOW_UNINITIALIZED] = 'u', | ||
96 | [KMEMCHECK_SHADOW_INITIALIZED] = 'i', | ||
97 | [KMEMCHECK_SHADOW_FREED] = 'f', | ||
98 | }; | ||
99 | |||
100 | struct kmemcheck_error *e; | ||
101 | unsigned int i; | ||
102 | |||
103 | e = error_next_rd(); | ||
104 | if (!e) | ||
105 | return; | ||
106 | |||
107 | switch (e->type) { | ||
108 | case KMEMCHECK_ERROR_INVALID_ACCESS: | ||
109 | printk(KERN_ERR "WARNING: kmemcheck: Caught %d-bit read " | ||
110 | "from %s memory (%p)\n", | ||
111 | 8 * e->size, e->state < ARRAY_SIZE(desc) ? | ||
112 | desc[e->state] : "(invalid shadow state)", | ||
113 | (void *) e->address); | ||
114 | |||
115 | printk(KERN_INFO); | ||
116 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) | ||
117 | printk("%02x", e->memory_copy[i]); | ||
118 | printk("\n"); | ||
119 | |||
120 | printk(KERN_INFO); | ||
121 | for (i = 0; i < SHADOW_COPY_SIZE; ++i) { | ||
122 | if (e->shadow_copy[i] < ARRAY_SIZE(short_desc)) | ||
123 | printk(" %c", short_desc[e->shadow_copy[i]]); | ||
124 | else | ||
125 | printk(" ?"); | ||
126 | } | ||
127 | printk("\n"); | ||
128 | printk(KERN_INFO "%*c\n", 2 + 2 | ||
129 | * (int) (e->address & (SHADOW_COPY_SIZE - 1)), '^'); | ||
130 | break; | ||
131 | case KMEMCHECK_ERROR_BUG: | ||
132 | printk(KERN_EMERG "ERROR: kmemcheck: Fatal error\n"); | ||
133 | break; | ||
134 | } | ||
135 | |||
136 | __show_regs(&e->regs, 1); | ||
137 | print_stack_trace(&e->trace, 0); | ||
138 | } | ||
139 | |||
140 | static void do_wakeup(unsigned long data) | ||
141 | { | ||
142 | while (error_count > 0) | ||
143 | kmemcheck_error_recall(); | ||
144 | |||
145 | if (error_missed_count > 0) { | ||
146 | printk(KERN_WARNING "kmemcheck: Lost %d error reports because " | ||
147 | "the queue was too small\n", error_missed_count); | ||
148 | error_missed_count = 0; | ||
149 | } | ||
150 | } | ||
151 | |||
152 | static DECLARE_TASKLET(kmemcheck_tasklet, &do_wakeup, 0); | ||
153 | |||
154 | /* | ||
155 | * Save the context of an error report. | ||
156 | */ | ||
157 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
158 | unsigned long address, unsigned int size, struct pt_regs *regs) | ||
159 | { | ||
160 | static unsigned long prev_ip; | ||
161 | |||
162 | struct kmemcheck_error *e; | ||
163 | void *shadow_copy; | ||
164 | void *memory_copy; | ||
165 | |||
166 | /* Don't report several adjacent errors from the same EIP. */ | ||
167 | if (regs->ip == prev_ip) | ||
168 | return; | ||
169 | prev_ip = regs->ip; | ||
170 | |||
171 | e = error_next_wr(); | ||
172 | if (!e) | ||
173 | return; | ||
174 | |||
175 | e->type = KMEMCHECK_ERROR_INVALID_ACCESS; | ||
176 | |||
177 | e->state = state; | ||
178 | e->address = address; | ||
179 | e->size = size; | ||
180 | |||
181 | /* Save regs */ | ||
182 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
183 | |||
184 | /* Save stack trace */ | ||
185 | e->trace.nr_entries = 0; | ||
186 | e->trace.entries = e->trace_entries; | ||
187 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
188 | e->trace.skip = 0; | ||
189 | save_stack_trace_bp(&e->trace, regs->bp); | ||
190 | |||
191 | /* Round address down to nearest 16 bytes */ | ||
192 | shadow_copy = kmemcheck_shadow_lookup(address | ||
193 | & ~(SHADOW_COPY_SIZE - 1)); | ||
194 | BUG_ON(!shadow_copy); | ||
195 | |||
196 | memcpy(e->shadow_copy, shadow_copy, SHADOW_COPY_SIZE); | ||
197 | |||
198 | kmemcheck_show_addr(address); | ||
199 | memory_copy = (void *) (address & ~(SHADOW_COPY_SIZE - 1)); | ||
200 | memcpy(e->memory_copy, memory_copy, SHADOW_COPY_SIZE); | ||
201 | kmemcheck_hide_addr(address); | ||
202 | |||
203 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
204 | } | ||
205 | |||
206 | /* | ||
207 | * Save the context of a kmemcheck bug. | ||
208 | */ | ||
209 | void kmemcheck_error_save_bug(struct pt_regs *regs) | ||
210 | { | ||
211 | struct kmemcheck_error *e; | ||
212 | |||
213 | e = error_next_wr(); | ||
214 | if (!e) | ||
215 | return; | ||
216 | |||
217 | e->type = KMEMCHECK_ERROR_BUG; | ||
218 | |||
219 | memcpy(&e->regs, regs, sizeof(*regs)); | ||
220 | |||
221 | e->trace.nr_entries = 0; | ||
222 | e->trace.entries = e->trace_entries; | ||
223 | e->trace.max_entries = ARRAY_SIZE(e->trace_entries); | ||
224 | e->trace.skip = 1; | ||
225 | save_stack_trace(&e->trace); | ||
226 | |||
227 | tasklet_hi_schedule_first(&kmemcheck_tasklet); | ||
228 | } | ||
diff --git a/arch/x86/mm/kmemcheck/error.h b/arch/x86/mm/kmemcheck/error.h new file mode 100644 index 000000000000..0efc2e8d0a20 --- /dev/null +++ b/arch/x86/mm/kmemcheck/error.h | |||
@@ -0,0 +1,15 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__ERROR_H | ||
3 | |||
4 | #include <linux/ptrace.h> | ||
5 | |||
6 | #include "shadow.h" | ||
7 | |||
8 | void kmemcheck_error_save(enum kmemcheck_shadow state, | ||
9 | unsigned long address, unsigned int size, struct pt_regs *regs); | ||
10 | |||
11 | void kmemcheck_error_save_bug(struct pt_regs *regs); | ||
12 | |||
13 | void kmemcheck_error_recall(void); | ||
14 | |||
15 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c new file mode 100644 index 000000000000..2c55ed098654 --- /dev/null +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -0,0 +1,640 @@ | |||
1 | /** | ||
2 | * kmemcheck - a heavyweight memory checker for the linux kernel | ||
3 | * Copyright (C) 2007, 2008 Vegard Nossum <vegardno@ifi.uio.no> | ||
4 | * (With a lot of help from Ingo Molnar and Pekka Enberg.) | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License (version 2) as | ||
8 | * published by the Free Software Foundation. | ||
9 | */ | ||
10 | |||
11 | #include <linux/init.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/kallsyms.h> | ||
14 | #include <linux/kernel.h> | ||
15 | #include <linux/kmemcheck.h> | ||
16 | #include <linux/mm.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/page-flags.h> | ||
19 | #include <linux/percpu.h> | ||
20 | #include <linux/ptrace.h> | ||
21 | #include <linux/string.h> | ||
22 | #include <linux/types.h> | ||
23 | |||
24 | #include <asm/cacheflush.h> | ||
25 | #include <asm/kmemcheck.h> | ||
26 | #include <asm/pgtable.h> | ||
27 | #include <asm/tlbflush.h> | ||
28 | |||
29 | #include "error.h" | ||
30 | #include "opcode.h" | ||
31 | #include "pte.h" | ||
32 | #include "selftest.h" | ||
33 | #include "shadow.h" | ||
34 | |||
35 | |||
36 | #ifdef CONFIG_KMEMCHECK_DISABLED_BY_DEFAULT | ||
37 | # define KMEMCHECK_ENABLED 0 | ||
38 | #endif | ||
39 | |||
40 | #ifdef CONFIG_KMEMCHECK_ENABLED_BY_DEFAULT | ||
41 | # define KMEMCHECK_ENABLED 1 | ||
42 | #endif | ||
43 | |||
44 | #ifdef CONFIG_KMEMCHECK_ONESHOT_BY_DEFAULT | ||
45 | # define KMEMCHECK_ENABLED 2 | ||
46 | #endif | ||
47 | |||
48 | int kmemcheck_enabled = KMEMCHECK_ENABLED; | ||
49 | |||
50 | int __init kmemcheck_init(void) | ||
51 | { | ||
52 | #ifdef CONFIG_SMP | ||
53 | /* | ||
54 | * Limit SMP to use a single CPU. We rely on the fact that this code | ||
55 | * runs before SMP is set up. | ||
56 | */ | ||
57 | if (setup_max_cpus > 1) { | ||
58 | printk(KERN_INFO | ||
59 | "kmemcheck: Limiting number of CPUs to 1.\n"); | ||
60 | setup_max_cpus = 1; | ||
61 | } | ||
62 | #endif | ||
63 | |||
64 | if (!kmemcheck_selftest()) { | ||
65 | printk(KERN_INFO "kmemcheck: self-tests failed; disabling\n"); | ||
66 | kmemcheck_enabled = 0; | ||
67 | return -EINVAL; | ||
68 | } | ||
69 | |||
70 | printk(KERN_INFO "kmemcheck: Initialized\n"); | ||
71 | return 0; | ||
72 | } | ||
73 | |||
74 | early_initcall(kmemcheck_init); | ||
75 | |||
76 | /* | ||
77 | * We need to parse the kmemcheck= option before any memory is allocated. | ||
78 | */ | ||
79 | static int __init param_kmemcheck(char *str) | ||
80 | { | ||
81 | if (!str) | ||
82 | return -EINVAL; | ||
83 | |||
84 | sscanf(str, "%d", &kmemcheck_enabled); | ||
85 | return 0; | ||
86 | } | ||
87 | |||
88 | early_param("kmemcheck", param_kmemcheck); | ||
89 | |||
90 | int kmemcheck_show_addr(unsigned long address) | ||
91 | { | ||
92 | pte_t *pte; | ||
93 | |||
94 | pte = kmemcheck_pte_lookup(address); | ||
95 | if (!pte) | ||
96 | return 0; | ||
97 | |||
98 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
99 | __flush_tlb_one(address); | ||
100 | return 1; | ||
101 | } | ||
102 | |||
103 | int kmemcheck_hide_addr(unsigned long address) | ||
104 | { | ||
105 | pte_t *pte; | ||
106 | |||
107 | pte = kmemcheck_pte_lookup(address); | ||
108 | if (!pte) | ||
109 | return 0; | ||
110 | |||
111 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
112 | __flush_tlb_one(address); | ||
113 | return 1; | ||
114 | } | ||
115 | |||
116 | struct kmemcheck_context { | ||
117 | bool busy; | ||
118 | int balance; | ||
119 | |||
120 | /* | ||
121 | * There can be at most two memory operands to an instruction, but | ||
122 | * each address can cross a page boundary -- so we may need up to | ||
123 | * four addresses that must be hidden/revealed for each fault. | ||
124 | */ | ||
125 | unsigned long addr[4]; | ||
126 | unsigned long n_addrs; | ||
127 | unsigned long flags; | ||
128 | |||
129 | /* Data size of the instruction that caused a fault. */ | ||
130 | unsigned int size; | ||
131 | }; | ||
132 | |||
133 | static DEFINE_PER_CPU(struct kmemcheck_context, kmemcheck_context); | ||
134 | |||
135 | bool kmemcheck_active(struct pt_regs *regs) | ||
136 | { | ||
137 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
138 | |||
139 | return data->balance > 0; | ||
140 | } | ||
141 | |||
142 | /* Save an address that needs to be shown/hidden */ | ||
143 | static void kmemcheck_save_addr(unsigned long addr) | ||
144 | { | ||
145 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
146 | |||
147 | BUG_ON(data->n_addrs >= ARRAY_SIZE(data->addr)); | ||
148 | data->addr[data->n_addrs++] = addr; | ||
149 | } | ||
150 | |||
151 | static unsigned int kmemcheck_show_all(void) | ||
152 | { | ||
153 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
154 | unsigned int i; | ||
155 | unsigned int n; | ||
156 | |||
157 | n = 0; | ||
158 | for (i = 0; i < data->n_addrs; ++i) | ||
159 | n += kmemcheck_show_addr(data->addr[i]); | ||
160 | |||
161 | return n; | ||
162 | } | ||
163 | |||
164 | static unsigned int kmemcheck_hide_all(void) | ||
165 | { | ||
166 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
167 | unsigned int i; | ||
168 | unsigned int n; | ||
169 | |||
170 | n = 0; | ||
171 | for (i = 0; i < data->n_addrs; ++i) | ||
172 | n += kmemcheck_hide_addr(data->addr[i]); | ||
173 | |||
174 | return n; | ||
175 | } | ||
176 | |||
177 | /* | ||
178 | * Called from the #PF handler. | ||
179 | */ | ||
180 | void kmemcheck_show(struct pt_regs *regs) | ||
181 | { | ||
182 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
183 | |||
184 | BUG_ON(!irqs_disabled()); | ||
185 | |||
186 | if (unlikely(data->balance != 0)) { | ||
187 | kmemcheck_show_all(); | ||
188 | kmemcheck_error_save_bug(regs); | ||
189 | data->balance = 0; | ||
190 | return; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * None of the addresses actually belonged to kmemcheck. Note that | ||
195 | * this is not an error. | ||
196 | */ | ||
197 | if (kmemcheck_show_all() == 0) | ||
198 | return; | ||
199 | |||
200 | ++data->balance; | ||
201 | |||
202 | /* | ||
203 | * The IF needs to be cleared as well, so that the faulting | ||
204 | * instruction can run "uninterrupted". Otherwise, we might take | ||
205 | * an interrupt and start executing that before we've had a chance | ||
206 | * to hide the page again. | ||
207 | * | ||
208 | * NOTE: In the rare case of multiple faults, we must not override | ||
209 | * the original flags: | ||
210 | */ | ||
211 | if (!(regs->flags & X86_EFLAGS_TF)) | ||
212 | data->flags = regs->flags; | ||
213 | |||
214 | regs->flags |= X86_EFLAGS_TF; | ||
215 | regs->flags &= ~X86_EFLAGS_IF; | ||
216 | } | ||
217 | |||
218 | /* | ||
219 | * Called from the #DB handler. | ||
220 | */ | ||
221 | void kmemcheck_hide(struct pt_regs *regs) | ||
222 | { | ||
223 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
224 | int n; | ||
225 | |||
226 | BUG_ON(!irqs_disabled()); | ||
227 | |||
228 | if (data->balance == 0) | ||
229 | return; | ||
230 | |||
231 | if (unlikely(data->balance != 1)) { | ||
232 | kmemcheck_show_all(); | ||
233 | kmemcheck_error_save_bug(regs); | ||
234 | data->n_addrs = 0; | ||
235 | data->balance = 0; | ||
236 | |||
237 | if (!(data->flags & X86_EFLAGS_TF)) | ||
238 | regs->flags &= ~X86_EFLAGS_TF; | ||
239 | if (data->flags & X86_EFLAGS_IF) | ||
240 | regs->flags |= X86_EFLAGS_IF; | ||
241 | return; | ||
242 | } | ||
243 | |||
244 | if (kmemcheck_enabled) | ||
245 | n = kmemcheck_hide_all(); | ||
246 | else | ||
247 | n = kmemcheck_show_all(); | ||
248 | |||
249 | if (n == 0) | ||
250 | return; | ||
251 | |||
252 | --data->balance; | ||
253 | |||
254 | data->n_addrs = 0; | ||
255 | |||
256 | if (!(data->flags & X86_EFLAGS_TF)) | ||
257 | regs->flags &= ~X86_EFLAGS_TF; | ||
258 | if (data->flags & X86_EFLAGS_IF) | ||
259 | regs->flags |= X86_EFLAGS_IF; | ||
260 | } | ||
261 | |||
262 | void kmemcheck_show_pages(struct page *p, unsigned int n) | ||
263 | { | ||
264 | unsigned int i; | ||
265 | |||
266 | for (i = 0; i < n; ++i) { | ||
267 | unsigned long address; | ||
268 | pte_t *pte; | ||
269 | unsigned int level; | ||
270 | |||
271 | address = (unsigned long) page_address(&p[i]); | ||
272 | pte = lookup_address(address, &level); | ||
273 | BUG_ON(!pte); | ||
274 | BUG_ON(level != PG_LEVEL_4K); | ||
275 | |||
276 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT)); | ||
277 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_HIDDEN)); | ||
278 | __flush_tlb_one(address); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | bool kmemcheck_page_is_tracked(struct page *p) | ||
283 | { | ||
284 | /* This will also check the "hidden" flag of the PTE. */ | ||
285 | return kmemcheck_pte_lookup((unsigned long) page_address(p)); | ||
286 | } | ||
287 | |||
288 | void kmemcheck_hide_pages(struct page *p, unsigned int n) | ||
289 | { | ||
290 | unsigned int i; | ||
291 | |||
292 | for (i = 0; i < n; ++i) { | ||
293 | unsigned long address; | ||
294 | pte_t *pte; | ||
295 | unsigned int level; | ||
296 | |||
297 | address = (unsigned long) page_address(&p[i]); | ||
298 | pte = lookup_address(address, &level); | ||
299 | BUG_ON(!pte); | ||
300 | BUG_ON(level != PG_LEVEL_4K); | ||
301 | |||
302 | set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT)); | ||
303 | set_pte(pte, __pte(pte_val(*pte) | _PAGE_HIDDEN)); | ||
304 | __flush_tlb_one(address); | ||
305 | } | ||
306 | } | ||
307 | |||
308 | /* Access may NOT cross page boundary */ | ||
309 | static void kmemcheck_read_strict(struct pt_regs *regs, | ||
310 | unsigned long addr, unsigned int size) | ||
311 | { | ||
312 | void *shadow; | ||
313 | enum kmemcheck_shadow status; | ||
314 | |||
315 | shadow = kmemcheck_shadow_lookup(addr); | ||
316 | if (!shadow) | ||
317 | return; | ||
318 | |||
319 | kmemcheck_save_addr(addr); | ||
320 | status = kmemcheck_shadow_test(shadow, size); | ||
321 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
322 | return; | ||
323 | |||
324 | if (kmemcheck_enabled) | ||
325 | kmemcheck_error_save(status, addr, size, regs); | ||
326 | |||
327 | if (kmemcheck_enabled == 2) | ||
328 | kmemcheck_enabled = 0; | ||
329 | |||
330 | /* Don't warn about it again. */ | ||
331 | kmemcheck_shadow_set(shadow, size); | ||
332 | } | ||
333 | |||
334 | /* Access may cross page boundary */ | ||
335 | static void kmemcheck_read(struct pt_regs *regs, | ||
336 | unsigned long addr, unsigned int size) | ||
337 | { | ||
338 | unsigned long page = addr & PAGE_MASK; | ||
339 | unsigned long next_addr = addr + size - 1; | ||
340 | unsigned long next_page = next_addr & PAGE_MASK; | ||
341 | |||
342 | if (likely(page == next_page)) { | ||
343 | kmemcheck_read_strict(regs, addr, size); | ||
344 | return; | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * What we do is basically to split the access across the | ||
349 | * two pages and handle each part separately. Yes, this means | ||
350 | * that we may now see reads that are 3 + 5 bytes, for | ||
351 | * example (and if both are uninitialized, there will be two | ||
352 | * reports), but it makes the code a lot simpler. | ||
353 | */ | ||
354 | kmemcheck_read_strict(regs, addr, next_page - addr); | ||
355 | kmemcheck_read_strict(regs, next_page, next_addr - next_page); | ||
356 | } | ||
357 | |||
358 | static void kmemcheck_write_strict(struct pt_regs *regs, | ||
359 | unsigned long addr, unsigned int size) | ||
360 | { | ||
361 | void *shadow; | ||
362 | |||
363 | shadow = kmemcheck_shadow_lookup(addr); | ||
364 | if (!shadow) | ||
365 | return; | ||
366 | |||
367 | kmemcheck_save_addr(addr); | ||
368 | kmemcheck_shadow_set(shadow, size); | ||
369 | } | ||
370 | |||
371 | static void kmemcheck_write(struct pt_regs *regs, | ||
372 | unsigned long addr, unsigned int size) | ||
373 | { | ||
374 | unsigned long page = addr & PAGE_MASK; | ||
375 | unsigned long next_addr = addr + size - 1; | ||
376 | unsigned long next_page = next_addr & PAGE_MASK; | ||
377 | |||
378 | if (likely(page == next_page)) { | ||
379 | kmemcheck_write_strict(regs, addr, size); | ||
380 | return; | ||
381 | } | ||
382 | |||
383 | /* See comment in kmemcheck_read(). */ | ||
384 | kmemcheck_write_strict(regs, addr, next_page - addr); | ||
385 | kmemcheck_write_strict(regs, next_page, next_addr - next_page); | ||
386 | } | ||
387 | |||
388 | /* | ||
389 | * Copying is hard. We have two addresses, each of which may be split across | ||
390 | * a page (and each page will have different shadow addresses). | ||
391 | */ | ||
392 | static void kmemcheck_copy(struct pt_regs *regs, | ||
393 | unsigned long src_addr, unsigned long dst_addr, unsigned int size) | ||
394 | { | ||
395 | uint8_t shadow[8]; | ||
396 | enum kmemcheck_shadow status; | ||
397 | |||
398 | unsigned long page; | ||
399 | unsigned long next_addr; | ||
400 | unsigned long next_page; | ||
401 | |||
402 | uint8_t *x; | ||
403 | unsigned int i; | ||
404 | unsigned int n; | ||
405 | |||
406 | BUG_ON(size > sizeof(shadow)); | ||
407 | |||
408 | page = src_addr & PAGE_MASK; | ||
409 | next_addr = src_addr + size - 1; | ||
410 | next_page = next_addr & PAGE_MASK; | ||
411 | |||
412 | if (likely(page == next_page)) { | ||
413 | /* Same page */ | ||
414 | x = kmemcheck_shadow_lookup(src_addr); | ||
415 | if (x) { | ||
416 | kmemcheck_save_addr(src_addr); | ||
417 | for (i = 0; i < size; ++i) | ||
418 | shadow[i] = x[i]; | ||
419 | } else { | ||
420 | for (i = 0; i < size; ++i) | ||
421 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
422 | } | ||
423 | } else { | ||
424 | n = next_page - src_addr; | ||
425 | BUG_ON(n > sizeof(shadow)); | ||
426 | |||
427 | /* First page */ | ||
428 | x = kmemcheck_shadow_lookup(src_addr); | ||
429 | if (x) { | ||
430 | kmemcheck_save_addr(src_addr); | ||
431 | for (i = 0; i < n; ++i) | ||
432 | shadow[i] = x[i]; | ||
433 | } else { | ||
434 | /* Not tracked */ | ||
435 | for (i = 0; i < n; ++i) | ||
436 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
437 | } | ||
438 | |||
439 | /* Second page */ | ||
440 | x = kmemcheck_shadow_lookup(next_page); | ||
441 | if (x) { | ||
442 | kmemcheck_save_addr(next_page); | ||
443 | for (i = n; i < size; ++i) | ||
444 | shadow[i] = x[i - n]; | ||
445 | } else { | ||
446 | /* Not tracked */ | ||
447 | for (i = n; i < size; ++i) | ||
448 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | page = dst_addr & PAGE_MASK; | ||
453 | next_addr = dst_addr + size - 1; | ||
454 | next_page = next_addr & PAGE_MASK; | ||
455 | |||
456 | if (likely(page == next_page)) { | ||
457 | /* Same page */ | ||
458 | x = kmemcheck_shadow_lookup(dst_addr); | ||
459 | if (x) { | ||
460 | kmemcheck_save_addr(dst_addr); | ||
461 | for (i = 0; i < size; ++i) { | ||
462 | x[i] = shadow[i]; | ||
463 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
464 | } | ||
465 | } | ||
466 | } else { | ||
467 | n = next_page - dst_addr; | ||
468 | BUG_ON(n > sizeof(shadow)); | ||
469 | |||
470 | /* First page */ | ||
471 | x = kmemcheck_shadow_lookup(dst_addr); | ||
472 | if (x) { | ||
473 | kmemcheck_save_addr(dst_addr); | ||
474 | for (i = 0; i < n; ++i) { | ||
475 | x[i] = shadow[i]; | ||
476 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
477 | } | ||
478 | } | ||
479 | |||
480 | /* Second page */ | ||
481 | x = kmemcheck_shadow_lookup(next_page); | ||
482 | if (x) { | ||
483 | kmemcheck_save_addr(next_page); | ||
484 | for (i = n; i < size; ++i) { | ||
485 | x[i - n] = shadow[i]; | ||
486 | shadow[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
487 | } | ||
488 | } | ||
489 | } | ||
490 | |||
491 | status = kmemcheck_shadow_test(shadow, size); | ||
492 | if (status == KMEMCHECK_SHADOW_INITIALIZED) | ||
493 | return; | ||
494 | |||
495 | if (kmemcheck_enabled) | ||
496 | kmemcheck_error_save(status, src_addr, size, regs); | ||
497 | |||
498 | if (kmemcheck_enabled == 2) | ||
499 | kmemcheck_enabled = 0; | ||
500 | } | ||
501 | |||
502 | enum kmemcheck_method { | ||
503 | KMEMCHECK_READ, | ||
504 | KMEMCHECK_WRITE, | ||
505 | }; | ||
506 | |||
507 | static void kmemcheck_access(struct pt_regs *regs, | ||
508 | unsigned long fallback_address, enum kmemcheck_method fallback_method) | ||
509 | { | ||
510 | const uint8_t *insn; | ||
511 | const uint8_t *insn_primary; | ||
512 | unsigned int size; | ||
513 | |||
514 | struct kmemcheck_context *data = &__get_cpu_var(kmemcheck_context); | ||
515 | |||
516 | /* Recursive fault -- ouch. */ | ||
517 | if (data->busy) { | ||
518 | kmemcheck_show_addr(fallback_address); | ||
519 | kmemcheck_error_save_bug(regs); | ||
520 | return; | ||
521 | } | ||
522 | |||
523 | data->busy = true; | ||
524 | |||
525 | insn = (const uint8_t *) regs->ip; | ||
526 | insn_primary = kmemcheck_opcode_get_primary(insn); | ||
527 | |||
528 | kmemcheck_opcode_decode(insn, &size); | ||
529 | |||
530 | switch (insn_primary[0]) { | ||
531 | #ifdef CONFIG_KMEMCHECK_BITOPS_OK | ||
532 | /* AND, OR, XOR */ | ||
533 | /* | ||
534 | * Unfortunately, these instructions have to be excluded from | ||
535 | * our regular checking since they access only some (and not | ||
536 | * all) bits. This clears out "bogus" bitfield-access warnings. | ||
537 | */ | ||
538 | case 0x80: | ||
539 | case 0x81: | ||
540 | case 0x82: | ||
541 | case 0x83: | ||
542 | switch ((insn_primary[1] >> 3) & 7) { | ||
543 | /* OR */ | ||
544 | case 1: | ||
545 | /* AND */ | ||
546 | case 4: | ||
547 | /* XOR */ | ||
548 | case 6: | ||
549 | kmemcheck_write(regs, fallback_address, size); | ||
550 | goto out; | ||
551 | |||
552 | /* ADD */ | ||
553 | case 0: | ||
554 | /* ADC */ | ||
555 | case 2: | ||
556 | /* SBB */ | ||
557 | case 3: | ||
558 | /* SUB */ | ||
559 | case 5: | ||
560 | /* CMP */ | ||
561 | case 7: | ||
562 | break; | ||
563 | } | ||
564 | break; | ||
565 | #endif | ||
566 | |||
567 | /* MOVS, MOVSB, MOVSW, MOVSD */ | ||
568 | case 0xa4: | ||
569 | case 0xa5: | ||
570 | /* | ||
571 | * These instructions are special because they take two | ||
572 | * addresses, but we only get one page fault. | ||
573 | */ | ||
574 | kmemcheck_copy(regs, regs->si, regs->di, size); | ||
575 | goto out; | ||
576 | |||
577 | /* CMPS, CMPSB, CMPSW, CMPSD */ | ||
578 | case 0xa6: | ||
579 | case 0xa7: | ||
580 | kmemcheck_read(regs, regs->si, size); | ||
581 | kmemcheck_read(regs, regs->di, size); | ||
582 | goto out; | ||
583 | } | ||
584 | |||
585 | /* | ||
586 | * If the opcode isn't special in any way, we use the data from the | ||
587 | * page fault handler to determine the address and type of memory | ||
588 | * access. | ||
589 | */ | ||
590 | switch (fallback_method) { | ||
591 | case KMEMCHECK_READ: | ||
592 | kmemcheck_read(regs, fallback_address, size); | ||
593 | goto out; | ||
594 | case KMEMCHECK_WRITE: | ||
595 | kmemcheck_write(regs, fallback_address, size); | ||
596 | goto out; | ||
597 | } | ||
598 | |||
599 | out: | ||
600 | data->busy = false; | ||
601 | } | ||
602 | |||
603 | bool kmemcheck_fault(struct pt_regs *regs, unsigned long address, | ||
604 | unsigned long error_code) | ||
605 | { | ||
606 | pte_t *pte; | ||
607 | |||
608 | /* | ||
609 | * XXX: Is it safe to assume that memory accesses from virtual 86 | ||
610 | * mode or non-kernel code segments will _never_ access kernel | ||
611 | * memory (e.g. tracked pages)? For now, we need this to avoid | ||
612 | * invoking kmemcheck for PnP BIOS calls. | ||
613 | */ | ||
614 | if (regs->flags & X86_VM_MASK) | ||
615 | return false; | ||
616 | if (regs->cs != __KERNEL_CS) | ||
617 | return false; | ||
618 | |||
619 | pte = kmemcheck_pte_lookup(address); | ||
620 | if (!pte) | ||
621 | return false; | ||
622 | |||
623 | if (error_code & 2) | ||
624 | kmemcheck_access(regs, address, KMEMCHECK_WRITE); | ||
625 | else | ||
626 | kmemcheck_access(regs, address, KMEMCHECK_READ); | ||
627 | |||
628 | kmemcheck_show(regs); | ||
629 | return true; | ||
630 | } | ||
631 | |||
632 | bool kmemcheck_trap(struct pt_regs *regs) | ||
633 | { | ||
634 | if (!kmemcheck_active(regs)) | ||
635 | return false; | ||
636 | |||
637 | /* We're done. */ | ||
638 | kmemcheck_hide(regs); | ||
639 | return true; | ||
640 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.c b/arch/x86/mm/kmemcheck/opcode.c new file mode 100644 index 000000000000..63c19e27aa6f --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.c | |||
@@ -0,0 +1,106 @@ | |||
1 | #include <linux/types.h> | ||
2 | |||
3 | #include "opcode.h" | ||
4 | |||
5 | static bool opcode_is_prefix(uint8_t b) | ||
6 | { | ||
7 | return | ||
8 | /* Group 1 */ | ||
9 | b == 0xf0 || b == 0xf2 || b == 0xf3 | ||
10 | /* Group 2 */ | ||
11 | || b == 0x2e || b == 0x36 || b == 0x3e || b == 0x26 | ||
12 | || b == 0x64 || b == 0x65 || b == 0x2e || b == 0x3e | ||
13 | /* Group 3 */ | ||
14 | || b == 0x66 | ||
15 | /* Group 4 */ | ||
16 | || b == 0x67; | ||
17 | } | ||
18 | |||
19 | #ifdef CONFIG_X86_64 | ||
20 | static bool opcode_is_rex_prefix(uint8_t b) | ||
21 | { | ||
22 | return (b & 0xf0) == 0x40; | ||
23 | } | ||
24 | #else | ||
25 | static bool opcode_is_rex_prefix(uint8_t b) | ||
26 | { | ||
27 | return false; | ||
28 | } | ||
29 | #endif | ||
30 | |||
31 | #define REX_W (1 << 3) | ||
32 | |||
33 | /* | ||
34 | * This is a VERY crude opcode decoder. We only need to find the size of the | ||
35 | * load/store that caused our #PF and this should work for all the opcodes | ||
36 | * that we care about. Moreover, the ones who invented this instruction set | ||
37 | * should be shot. | ||
38 | */ | ||
39 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size) | ||
40 | { | ||
41 | /* Default operand size */ | ||
42 | int operand_size_override = 4; | ||
43 | |||
44 | /* prefixes */ | ||
45 | for (; opcode_is_prefix(*op); ++op) { | ||
46 | if (*op == 0x66) | ||
47 | operand_size_override = 2; | ||
48 | } | ||
49 | |||
50 | /* REX prefix */ | ||
51 | if (opcode_is_rex_prefix(*op)) { | ||
52 | uint8_t rex = *op; | ||
53 | |||
54 | ++op; | ||
55 | if (rex & REX_W) { | ||
56 | switch (*op) { | ||
57 | case 0x63: | ||
58 | *size = 4; | ||
59 | return; | ||
60 | case 0x0f: | ||
61 | ++op; | ||
62 | |||
63 | switch (*op) { | ||
64 | case 0xb6: | ||
65 | case 0xbe: | ||
66 | *size = 1; | ||
67 | return; | ||
68 | case 0xb7: | ||
69 | case 0xbf: | ||
70 | *size = 2; | ||
71 | return; | ||
72 | } | ||
73 | |||
74 | break; | ||
75 | } | ||
76 | |||
77 | *size = 8; | ||
78 | return; | ||
79 | } | ||
80 | } | ||
81 | |||
82 | /* escape opcode */ | ||
83 | if (*op == 0x0f) { | ||
84 | ++op; | ||
85 | |||
86 | /* | ||
87 | * This is move with zero-extend and sign-extend, respectively; | ||
88 | * we don't have to think about 0xb6/0xbe, because this is | ||
89 | * already handled in the conditional below. | ||
90 | */ | ||
91 | if (*op == 0xb7 || *op == 0xbf) | ||
92 | operand_size_override = 2; | ||
93 | } | ||
94 | |||
95 | *size = (*op & 1) ? operand_size_override : 1; | ||
96 | } | ||
97 | |||
98 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op) | ||
99 | { | ||
100 | /* skip prefixes */ | ||
101 | while (opcode_is_prefix(*op)) | ||
102 | ++op; | ||
103 | if (opcode_is_rex_prefix(*op)) | ||
104 | ++op; | ||
105 | return op; | ||
106 | } | ||
diff --git a/arch/x86/mm/kmemcheck/opcode.h b/arch/x86/mm/kmemcheck/opcode.h new file mode 100644 index 000000000000..6956aad66b5b --- /dev/null +++ b/arch/x86/mm/kmemcheck/opcode.h | |||
@@ -0,0 +1,9 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__OPCODE_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | void kmemcheck_opcode_decode(const uint8_t *op, unsigned int *size); | ||
7 | const uint8_t *kmemcheck_opcode_get_primary(const uint8_t *op); | ||
8 | |||
9 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/pte.c b/arch/x86/mm/kmemcheck/pte.c new file mode 100644 index 000000000000..4ead26eeaf96 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.c | |||
@@ -0,0 +1,22 @@ | |||
1 | #include <linux/mm.h> | ||
2 | |||
3 | #include <asm/pgtable.h> | ||
4 | |||
5 | #include "pte.h" | ||
6 | |||
7 | pte_t *kmemcheck_pte_lookup(unsigned long address) | ||
8 | { | ||
9 | pte_t *pte; | ||
10 | unsigned int level; | ||
11 | |||
12 | pte = lookup_address(address, &level); | ||
13 | if (!pte) | ||
14 | return NULL; | ||
15 | if (level != PG_LEVEL_4K) | ||
16 | return NULL; | ||
17 | if (!pte_hidden(*pte)) | ||
18 | return NULL; | ||
19 | |||
20 | return pte; | ||
21 | } | ||
22 | |||
diff --git a/arch/x86/mm/kmemcheck/pte.h b/arch/x86/mm/kmemcheck/pte.h new file mode 100644 index 000000000000..9f5966456492 --- /dev/null +++ b/arch/x86/mm/kmemcheck/pte.h | |||
@@ -0,0 +1,10 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__PTE_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__PTE_H | ||
3 | |||
4 | #include <linux/mm.h> | ||
5 | |||
6 | #include <asm/pgtable.h> | ||
7 | |||
8 | pte_t *kmemcheck_pte_lookup(unsigned long address); | ||
9 | |||
10 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.c b/arch/x86/mm/kmemcheck/selftest.c new file mode 100644 index 000000000000..036efbea8b28 --- /dev/null +++ b/arch/x86/mm/kmemcheck/selftest.c | |||
@@ -0,0 +1,69 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | |||
3 | #include "opcode.h" | ||
4 | #include "selftest.h" | ||
5 | |||
6 | struct selftest_opcode { | ||
7 | unsigned int expected_size; | ||
8 | const uint8_t *insn; | ||
9 | const char *desc; | ||
10 | }; | ||
11 | |||
12 | static const struct selftest_opcode selftest_opcodes[] = { | ||
13 | /* REP MOVS */ | ||
14 | {1, "\xf3\xa4", "rep movsb <mem8>, <mem8>"}, | ||
15 | {4, "\xf3\xa5", "rep movsl <mem32>, <mem32>"}, | ||
16 | |||
17 | /* MOVZX / MOVZXD */ | ||
18 | {1, "\x66\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg16>"}, | ||
19 | {1, "\x0f\xb6\x51\xf8", "movzwq <mem8>, <reg32>"}, | ||
20 | |||
21 | /* MOVSX / MOVSXD */ | ||
22 | {1, "\x66\x0f\xbe\x51\xf8", "movswq <mem8>, <reg16>"}, | ||
23 | {1, "\x0f\xbe\x51\xf8", "movswq <mem8>, <reg32>"}, | ||
24 | |||
25 | #ifdef CONFIG_X86_64 | ||
26 | /* MOVZX / MOVZXD */ | ||
27 | {1, "\x49\x0f\xb6\x51\xf8", "movzbq <mem8>, <reg64>"}, | ||
28 | {2, "\x49\x0f\xb7\x51\xf8", "movzbq <mem16>, <reg64>"}, | ||
29 | |||
30 | /* MOVSX / MOVSXD */ | ||
31 | {1, "\x49\x0f\xbe\x51\xf8", "movsbq <mem8>, <reg64>"}, | ||
32 | {2, "\x49\x0f\xbf\x51\xf8", "movsbq <mem16>, <reg64>"}, | ||
33 | {4, "\x49\x63\x51\xf8", "movslq <mem32>, <reg64>"}, | ||
34 | #endif | ||
35 | }; | ||
36 | |||
37 | static bool selftest_opcode_one(const struct selftest_opcode *op) | ||
38 | { | ||
39 | unsigned size; | ||
40 | |||
41 | kmemcheck_opcode_decode(op->insn, &size); | ||
42 | |||
43 | if (size == op->expected_size) | ||
44 | return true; | ||
45 | |||
46 | printk(KERN_WARNING "kmemcheck: opcode %s: expected size %d, got %d\n", | ||
47 | op->desc, op->expected_size, size); | ||
48 | return false; | ||
49 | } | ||
50 | |||
51 | static bool selftest_opcodes_all(void) | ||
52 | { | ||
53 | bool pass = true; | ||
54 | unsigned int i; | ||
55 | |||
56 | for (i = 0; i < ARRAY_SIZE(selftest_opcodes); ++i) | ||
57 | pass = pass && selftest_opcode_one(&selftest_opcodes[i]); | ||
58 | |||
59 | return pass; | ||
60 | } | ||
61 | |||
62 | bool kmemcheck_selftest(void) | ||
63 | { | ||
64 | bool pass = true; | ||
65 | |||
66 | pass = pass && selftest_opcodes_all(); | ||
67 | |||
68 | return pass; | ||
69 | } | ||
diff --git a/arch/x86/mm/kmemcheck/selftest.h b/arch/x86/mm/kmemcheck/selftest.h new file mode 100644 index 000000000000..8fed4fe11f95 --- /dev/null +++ b/arch/x86/mm/kmemcheck/selftest.h | |||
@@ -0,0 +1,6 @@ | |||
1 | #ifndef ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
2 | #define ARCH_X86_MM_KMEMCHECK_SELFTEST_H | ||
3 | |||
4 | bool kmemcheck_selftest(void); | ||
5 | |||
6 | #endif | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.c b/arch/x86/mm/kmemcheck/shadow.c new file mode 100644 index 000000000000..e773b6bd0079 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.c | |||
@@ -0,0 +1,162 @@ | |||
1 | #include <linux/kmemcheck.h> | ||
2 | #include <linux/module.h> | ||
3 | #include <linux/mm.h> | ||
4 | #include <linux/module.h> | ||
5 | |||
6 | #include <asm/page.h> | ||
7 | #include <asm/pgtable.h> | ||
8 | |||
9 | #include "pte.h" | ||
10 | #include "shadow.h" | ||
11 | |||
12 | /* | ||
13 | * Return the shadow address for the given address. Returns NULL if the | ||
14 | * address is not tracked. | ||
15 | * | ||
16 | * We need to be extremely careful not to follow any invalid pointers, | ||
17 | * because this function can be called for *any* possible address. | ||
18 | */ | ||
19 | void *kmemcheck_shadow_lookup(unsigned long address) | ||
20 | { | ||
21 | pte_t *pte; | ||
22 | struct page *page; | ||
23 | |||
24 | if (!virt_addr_valid(address)) | ||
25 | return NULL; | ||
26 | |||
27 | pte = kmemcheck_pte_lookup(address); | ||
28 | if (!pte) | ||
29 | return NULL; | ||
30 | |||
31 | page = virt_to_page(address); | ||
32 | if (!page->shadow) | ||
33 | return NULL; | ||
34 | return page->shadow + (address & (PAGE_SIZE - 1)); | ||
35 | } | ||
36 | |||
37 | static void mark_shadow(void *address, unsigned int n, | ||
38 | enum kmemcheck_shadow status) | ||
39 | { | ||
40 | unsigned long addr = (unsigned long) address; | ||
41 | unsigned long last_addr = addr + n - 1; | ||
42 | unsigned long page = addr & PAGE_MASK; | ||
43 | unsigned long last_page = last_addr & PAGE_MASK; | ||
44 | unsigned int first_n; | ||
45 | void *shadow; | ||
46 | |||
47 | /* If the memory range crosses a page boundary, stop there. */ | ||
48 | if (page == last_page) | ||
49 | first_n = n; | ||
50 | else | ||
51 | first_n = page + PAGE_SIZE - addr; | ||
52 | |||
53 | shadow = kmemcheck_shadow_lookup(addr); | ||
54 | if (shadow) | ||
55 | memset(shadow, status, first_n); | ||
56 | |||
57 | addr += first_n; | ||
58 | n -= first_n; | ||
59 | |||
60 | /* Do full-page memset()s. */ | ||
61 | while (n >= PAGE_SIZE) { | ||
62 | shadow = kmemcheck_shadow_lookup(addr); | ||
63 | if (shadow) | ||
64 | memset(shadow, status, PAGE_SIZE); | ||
65 | |||
66 | addr += PAGE_SIZE; | ||
67 | n -= PAGE_SIZE; | ||
68 | } | ||
69 | |||
70 | /* Do the remaining page, if any. */ | ||
71 | if (n > 0) { | ||
72 | shadow = kmemcheck_shadow_lookup(addr); | ||
73 | if (shadow) | ||
74 | memset(shadow, status, n); | ||
75 | } | ||
76 | } | ||
77 | |||
78 | void kmemcheck_mark_unallocated(void *address, unsigned int n) | ||
79 | { | ||
80 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNALLOCATED); | ||
81 | } | ||
82 | |||
83 | void kmemcheck_mark_uninitialized(void *address, unsigned int n) | ||
84 | { | ||
85 | mark_shadow(address, n, KMEMCHECK_SHADOW_UNINITIALIZED); | ||
86 | } | ||
87 | |||
88 | /* | ||
89 | * Fill the shadow memory of the given address such that the memory at that | ||
90 | * address is marked as being initialized. | ||
91 | */ | ||
92 | void kmemcheck_mark_initialized(void *address, unsigned int n) | ||
93 | { | ||
94 | mark_shadow(address, n, KMEMCHECK_SHADOW_INITIALIZED); | ||
95 | } | ||
96 | EXPORT_SYMBOL_GPL(kmemcheck_mark_initialized); | ||
97 | |||
98 | void kmemcheck_mark_freed(void *address, unsigned int n) | ||
99 | { | ||
100 | mark_shadow(address, n, KMEMCHECK_SHADOW_FREED); | ||
101 | } | ||
102 | |||
103 | void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n) | ||
104 | { | ||
105 | unsigned int i; | ||
106 | |||
107 | for (i = 0; i < n; ++i) | ||
108 | kmemcheck_mark_unallocated(page_address(&p[i]), PAGE_SIZE); | ||
109 | } | ||
110 | |||
111 | void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n) | ||
112 | { | ||
113 | unsigned int i; | ||
114 | |||
115 | for (i = 0; i < n; ++i) | ||
116 | kmemcheck_mark_uninitialized(page_address(&p[i]), PAGE_SIZE); | ||
117 | } | ||
118 | |||
119 | void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n) | ||
120 | { | ||
121 | unsigned int i; | ||
122 | |||
123 | for (i = 0; i < n; ++i) | ||
124 | kmemcheck_mark_initialized(page_address(&p[i]), PAGE_SIZE); | ||
125 | } | ||
126 | |||
127 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size) | ||
128 | { | ||
129 | uint8_t *x; | ||
130 | unsigned int i; | ||
131 | |||
132 | x = shadow; | ||
133 | |||
134 | #ifdef CONFIG_KMEMCHECK_PARTIAL_OK | ||
135 | /* | ||
136 | * Make sure _some_ bytes are initialized. Gcc frequently generates | ||
137 | * code to access neighboring bytes. | ||
138 | */ | ||
139 | for (i = 0; i < size; ++i) { | ||
140 | if (x[i] == KMEMCHECK_SHADOW_INITIALIZED) | ||
141 | return x[i]; | ||
142 | } | ||
143 | #else | ||
144 | /* All bytes must be initialized. */ | ||
145 | for (i = 0; i < size; ++i) { | ||
146 | if (x[i] != KMEMCHECK_SHADOW_INITIALIZED) | ||
147 | return x[i]; | ||
148 | } | ||
149 | #endif | ||
150 | |||
151 | return x[0]; | ||
152 | } | ||
153 | |||
154 | void kmemcheck_shadow_set(void *shadow, unsigned int size) | ||
155 | { | ||
156 | uint8_t *x; | ||
157 | unsigned int i; | ||
158 | |||
159 | x = shadow; | ||
160 | for (i = 0; i < size; ++i) | ||
161 | x[i] = KMEMCHECK_SHADOW_INITIALIZED; | ||
162 | } | ||
diff --git a/arch/x86/mm/kmemcheck/shadow.h b/arch/x86/mm/kmemcheck/shadow.h new file mode 100644 index 000000000000..af46d9ab9d86 --- /dev/null +++ b/arch/x86/mm/kmemcheck/shadow.h | |||
@@ -0,0 +1,16 @@ | |||
1 | #ifndef ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
2 | #define ARCH__X86__MM__KMEMCHECK__SHADOW_H | ||
3 | |||
4 | enum kmemcheck_shadow { | ||
5 | KMEMCHECK_SHADOW_UNALLOCATED, | ||
6 | KMEMCHECK_SHADOW_UNINITIALIZED, | ||
7 | KMEMCHECK_SHADOW_INITIALIZED, | ||
8 | KMEMCHECK_SHADOW_FREED, | ||
9 | }; | ||
10 | |||
11 | void *kmemcheck_shadow_lookup(unsigned long address); | ||
12 | |||
13 | enum kmemcheck_shadow kmemcheck_shadow_test(void *shadow, unsigned int size); | ||
14 | void kmemcheck_shadow_set(void *shadow, unsigned int size); | ||
15 | |||
16 | #endif | ||
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 50dc802a1c46..16ccbd77917f 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -32,7 +32,7 @@ struct kmmio_fault_page { | |||
32 | struct list_head list; | 32 | struct list_head list; |
33 | struct kmmio_fault_page *release_next; | 33 | struct kmmio_fault_page *release_next; |
34 | unsigned long page; /* location of the fault page */ | 34 | unsigned long page; /* location of the fault page */ |
35 | bool old_presence; /* page presence prior to arming */ | 35 | pteval_t old_presence; /* page presence prior to arming */ |
36 | bool armed; | 36 | bool armed; |
37 | 37 | ||
38 | /* | 38 | /* |
@@ -97,60 +97,62 @@ static struct kmmio_probe *get_kmmio_probe(unsigned long addr) | |||
97 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) | 97 | static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long page) |
98 | { | 98 | { |
99 | struct list_head *head; | 99 | struct list_head *head; |
100 | struct kmmio_fault_page *p; | 100 | struct kmmio_fault_page *f; |
101 | 101 | ||
102 | page &= PAGE_MASK; | 102 | page &= PAGE_MASK; |
103 | head = kmmio_page_list(page); | 103 | head = kmmio_page_list(page); |
104 | list_for_each_entry_rcu(p, head, list) { | 104 | list_for_each_entry_rcu(f, head, list) { |
105 | if (p->page == page) | 105 | if (f->page == page) |
106 | return p; | 106 | return f; |
107 | } | 107 | } |
108 | return NULL; | 108 | return NULL; |
109 | } | 109 | } |
110 | 110 | ||
111 | static void set_pmd_presence(pmd_t *pmd, bool present, bool *old) | 111 | static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old) |
112 | { | 112 | { |
113 | pmdval_t v = pmd_val(*pmd); | 113 | pmdval_t v = pmd_val(*pmd); |
114 | *old = !!(v & _PAGE_PRESENT); | 114 | if (clear) { |
115 | v &= ~_PAGE_PRESENT; | 115 | *old = v & _PAGE_PRESENT; |
116 | if (present) | 116 | v &= ~_PAGE_PRESENT; |
117 | v |= _PAGE_PRESENT; | 117 | } else /* presume this has been called with clear==true previously */ |
118 | v |= *old; | ||
118 | set_pmd(pmd, __pmd(v)); | 119 | set_pmd(pmd, __pmd(v)); |
119 | } | 120 | } |
120 | 121 | ||
121 | static void set_pte_presence(pte_t *pte, bool present, bool *old) | 122 | static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old) |
122 | { | 123 | { |
123 | pteval_t v = pte_val(*pte); | 124 | pteval_t v = pte_val(*pte); |
124 | *old = !!(v & _PAGE_PRESENT); | 125 | if (clear) { |
125 | v &= ~_PAGE_PRESENT; | 126 | *old = v & _PAGE_PRESENT; |
126 | if (present) | 127 | v &= ~_PAGE_PRESENT; |
127 | v |= _PAGE_PRESENT; | 128 | } else /* presume this has been called with clear==true previously */ |
129 | v |= *old; | ||
128 | set_pte_atomic(pte, __pte(v)); | 130 | set_pte_atomic(pte, __pte(v)); |
129 | } | 131 | } |
130 | 132 | ||
131 | static int set_page_presence(unsigned long addr, bool present, bool *old) | 133 | static int clear_page_presence(struct kmmio_fault_page *f, bool clear) |
132 | { | 134 | { |
133 | unsigned int level; | 135 | unsigned int level; |
134 | pte_t *pte = lookup_address(addr, &level); | 136 | pte_t *pte = lookup_address(f->page, &level); |
135 | 137 | ||
136 | if (!pte) { | 138 | if (!pte) { |
137 | pr_err("kmmio: no pte for page 0x%08lx\n", addr); | 139 | pr_err("kmmio: no pte for page 0x%08lx\n", f->page); |
138 | return -1; | 140 | return -1; |
139 | } | 141 | } |
140 | 142 | ||
141 | switch (level) { | 143 | switch (level) { |
142 | case PG_LEVEL_2M: | 144 | case PG_LEVEL_2M: |
143 | set_pmd_presence((pmd_t *)pte, present, old); | 145 | clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence); |
144 | break; | 146 | break; |
145 | case PG_LEVEL_4K: | 147 | case PG_LEVEL_4K: |
146 | set_pte_presence(pte, present, old); | 148 | clear_pte_presence(pte, clear, &f->old_presence); |
147 | break; | 149 | break; |
148 | default: | 150 | default: |
149 | pr_err("kmmio: unexpected page level 0x%x.\n", level); | 151 | pr_err("kmmio: unexpected page level 0x%x.\n", level); |
150 | return -1; | 152 | return -1; |
151 | } | 153 | } |
152 | 154 | ||
153 | __flush_tlb_one(addr); | 155 | __flush_tlb_one(f->page); |
154 | return 0; | 156 | return 0; |
155 | } | 157 | } |
156 | 158 | ||
@@ -171,9 +173,9 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
171 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); | 173 | WARN_ONCE(f->armed, KERN_ERR "kmmio page already armed.\n"); |
172 | if (f->armed) { | 174 | if (f->armed) { |
173 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", | 175 | pr_warning("kmmio double-arm: page 0x%08lx, ref %d, old %d\n", |
174 | f->page, f->count, f->old_presence); | 176 | f->page, f->count, !!f->old_presence); |
175 | } | 177 | } |
176 | ret = set_page_presence(f->page, false, &f->old_presence); | 178 | ret = clear_page_presence(f, true); |
177 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); | 179 | WARN_ONCE(ret < 0, KERN_ERR "kmmio arming 0x%08lx failed.\n", f->page); |
178 | f->armed = true; | 180 | f->armed = true; |
179 | return ret; | 181 | return ret; |
@@ -182,8 +184,7 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) | |||
182 | /** Restore the given page to saved presence state. */ | 184 | /** Restore the given page to saved presence state. */ |
183 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) | 185 | static void disarm_kmmio_fault_page(struct kmmio_fault_page *f) |
184 | { | 186 | { |
185 | bool tmp; | 187 | int ret = clear_page_presence(f, false); |
186 | int ret = set_page_presence(f->page, f->old_presence, &tmp); | ||
187 | WARN_ONCE(ret < 0, | 188 | WARN_ONCE(ret < 0, |
188 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); | 189 | KERN_ERR "kmmio disarming 0x%08lx failed.\n", f->page); |
189 | f->armed = false; | 190 | f->armed = false; |
@@ -310,7 +311,12 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) | |||
310 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); | 311 | struct kmmio_context *ctx = &get_cpu_var(kmmio_ctx); |
311 | 312 | ||
312 | if (!ctx->active) { | 313 | if (!ctx->active) { |
313 | pr_debug("kmmio: spurious debug trap on CPU %d.\n", | 314 | /* |
315 | * debug traps without an active context are due to either | ||
316 | * something external causing them (f.e. using a debugger while | ||
317 | * mmio tracing enabled), or erroneous behaviour | ||
318 | */ | ||
319 | pr_warning("kmmio: unexpected debug trap on CPU %d.\n", | ||
314 | smp_processor_id()); | 320 | smp_processor_id()); |
315 | goto out; | 321 | goto out; |
316 | } | 322 | } |
@@ -439,12 +445,12 @@ static void rcu_free_kmmio_fault_pages(struct rcu_head *head) | |||
439 | head, | 445 | head, |
440 | struct kmmio_delayed_release, | 446 | struct kmmio_delayed_release, |
441 | rcu); | 447 | rcu); |
442 | struct kmmio_fault_page *p = dr->release_list; | 448 | struct kmmio_fault_page *f = dr->release_list; |
443 | while (p) { | 449 | while (f) { |
444 | struct kmmio_fault_page *next = p->release_next; | 450 | struct kmmio_fault_page *next = f->release_next; |
445 | BUG_ON(p->count); | 451 | BUG_ON(f->count); |
446 | kfree(p); | 452 | kfree(f); |
447 | p = next; | 453 | f = next; |
448 | } | 454 | } |
449 | kfree(dr); | 455 | kfree(dr); |
450 | } | 456 | } |
@@ -453,19 +459,19 @@ static void remove_kmmio_fault_pages(struct rcu_head *head) | |||
453 | { | 459 | { |
454 | struct kmmio_delayed_release *dr = | 460 | struct kmmio_delayed_release *dr = |
455 | container_of(head, struct kmmio_delayed_release, rcu); | 461 | container_of(head, struct kmmio_delayed_release, rcu); |
456 | struct kmmio_fault_page *p = dr->release_list; | 462 | struct kmmio_fault_page *f = dr->release_list; |
457 | struct kmmio_fault_page **prevp = &dr->release_list; | 463 | struct kmmio_fault_page **prevp = &dr->release_list; |
458 | unsigned long flags; | 464 | unsigned long flags; |
459 | 465 | ||
460 | spin_lock_irqsave(&kmmio_lock, flags); | 466 | spin_lock_irqsave(&kmmio_lock, flags); |
461 | while (p) { | 467 | while (f) { |
462 | if (!p->count) { | 468 | if (!f->count) { |
463 | list_del_rcu(&p->list); | 469 | list_del_rcu(&f->list); |
464 | prevp = &p->release_next; | 470 | prevp = &f->release_next; |
465 | } else { | 471 | } else { |
466 | *prevp = p->release_next; | 472 | *prevp = f->release_next; |
467 | } | 473 | } |
468 | p = p->release_next; | 474 | f = f->release_next; |
469 | } | 475 | } |
470 | spin_unlock_irqrestore(&kmmio_lock, flags); | 476 | spin_unlock_irqrestore(&kmmio_lock, flags); |
471 | 477 | ||
@@ -528,8 +534,8 @@ void unregister_kmmio_probe(struct kmmio_probe *p) | |||
528 | } | 534 | } |
529 | EXPORT_SYMBOL(unregister_kmmio_probe); | 535 | EXPORT_SYMBOL(unregister_kmmio_probe); |
530 | 536 | ||
531 | static int kmmio_die_notifier(struct notifier_block *nb, unsigned long val, | 537 | static int |
532 | void *args) | 538 | kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args) |
533 | { | 539 | { |
534 | struct die_args *arg = args; | 540 | struct die_args *arg = args; |
535 | 541 | ||
@@ -544,11 +550,23 @@ static struct notifier_block nb_die = { | |||
544 | .notifier_call = kmmio_die_notifier | 550 | .notifier_call = kmmio_die_notifier |
545 | }; | 551 | }; |
546 | 552 | ||
547 | static int __init init_kmmio(void) | 553 | int kmmio_init(void) |
548 | { | 554 | { |
549 | int i; | 555 | int i; |
556 | |||
550 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) | 557 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) |
551 | INIT_LIST_HEAD(&kmmio_page_table[i]); | 558 | INIT_LIST_HEAD(&kmmio_page_table[i]); |
559 | |||
552 | return register_die_notifier(&nb_die); | 560 | return register_die_notifier(&nb_die); |
553 | } | 561 | } |
554 | fs_initcall(init_kmmio); /* should be before device_initcall() */ | 562 | |
563 | void kmmio_cleanup(void) | ||
564 | { | ||
565 | int i; | ||
566 | |||
567 | unregister_die_notifier(&nb_die); | ||
568 | for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) { | ||
569 | WARN_ONCE(!list_empty(&kmmio_page_table[i]), | ||
570 | KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n"); | ||
571 | } | ||
572 | } | ||
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 605c8be06217..18d244f70205 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -40,23 +40,22 @@ static void __init reserve_bad_mem(u64 pattern, u64 start_bad, u64 end_bad) | |||
40 | 40 | ||
41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) | 41 | static void __init memtest(u64 pattern, u64 start_phys, u64 size) |
42 | { | 42 | { |
43 | u64 i, count; | 43 | u64 *p, *start, *end; |
44 | u64 *start; | ||
45 | u64 start_bad, last_bad; | 44 | u64 start_bad, last_bad; |
46 | u64 start_phys_aligned; | 45 | u64 start_phys_aligned; |
47 | size_t incr; | 46 | const size_t incr = sizeof(pattern); |
48 | 47 | ||
49 | incr = sizeof(pattern); | ||
50 | start_phys_aligned = ALIGN(start_phys, incr); | 48 | start_phys_aligned = ALIGN(start_phys, incr); |
51 | count = (size - (start_phys_aligned - start_phys))/incr; | ||
52 | start = __va(start_phys_aligned); | 49 | start = __va(start_phys_aligned); |
50 | end = start + (size - (start_phys_aligned - start_phys)) / incr; | ||
53 | start_bad = 0; | 51 | start_bad = 0; |
54 | last_bad = 0; | 52 | last_bad = 0; |
55 | 53 | ||
56 | for (i = 0; i < count; i++) | 54 | for (p = start; p < end; p++) |
57 | start[i] = pattern; | 55 | *p = pattern; |
58 | for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { | 56 | |
59 | if (*start == pattern) | 57 | for (p = start; p < end; p++, start_phys_aligned += incr) { |
58 | if (*p == pattern) | ||
60 | continue; | 59 | continue; |
61 | if (start_phys_aligned == last_bad + incr) { | 60 | if (start_phys_aligned == last_bad + incr) { |
62 | last_bad += incr; | 61 | last_bad += incr; |
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index c9342ed8b402..132772a8ec57 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -451,6 +451,7 @@ void enable_mmiotrace(void) | |||
451 | 451 | ||
452 | if (nommiotrace) | 452 | if (nommiotrace) |
453 | pr_info(NAME "MMIO tracing disabled.\n"); | 453 | pr_info(NAME "MMIO tracing disabled.\n"); |
454 | kmmio_init(); | ||
454 | enter_uniprocessor(); | 455 | enter_uniprocessor(); |
455 | spin_lock_irq(&trace_lock); | 456 | spin_lock_irq(&trace_lock); |
456 | atomic_inc(&mmiotrace_enabled); | 457 | atomic_inc(&mmiotrace_enabled); |
@@ -473,6 +474,7 @@ void disable_mmiotrace(void) | |||
473 | 474 | ||
474 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ | 475 | clear_trace_list(); /* guarantees: no more kmmio callbacks */ |
475 | leave_uniprocessor(); | 476 | leave_uniprocessor(); |
477 | kmmio_cleanup(); | ||
476 | pr_info(NAME "disabled.\n"); | 478 | pr_info(NAME "disabled.\n"); |
477 | out: | 479 | out: |
478 | mutex_unlock(&mmiotrace_mutex); | 480 | mutex_unlock(&mmiotrace_mutex); |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2d05a12029dc..459913beac71 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -179,18 +179,25 @@ static void * __init early_node_mem(int nodeid, unsigned long start, | |||
179 | } | 179 | } |
180 | 180 | ||
181 | /* Initialize bootmem allocator for a node */ | 181 | /* Initialize bootmem allocator for a node */ |
182 | void __init setup_node_bootmem(int nodeid, unsigned long start, | 182 | void __init |
183 | unsigned long end) | 183 | setup_node_bootmem(int nodeid, unsigned long start, unsigned long end) |
184 | { | 184 | { |
185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; | 185 | unsigned long start_pfn, last_pfn, bootmap_pages, bootmap_size; |
186 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
186 | unsigned long bootmap_start, nodedata_phys; | 187 | unsigned long bootmap_start, nodedata_phys; |
187 | void *bootmap; | 188 | void *bootmap; |
188 | const int pgdat_size = roundup(sizeof(pg_data_t), PAGE_SIZE); | ||
189 | int nid; | 189 | int nid; |
190 | 190 | ||
191 | if (!end) | 191 | if (!end) |
192 | return; | 192 | return; |
193 | 193 | ||
194 | /* | ||
195 | * Don't confuse VM with a node that doesn't have the | ||
196 | * minimum amount of memory: | ||
197 | */ | ||
198 | if (end && (end - start) < NODE_MIN_SIZE) | ||
199 | return; | ||
200 | |||
194 | start = roundup(start, ZONE_ALIGN); | 201 | start = roundup(start, ZONE_ALIGN); |
195 | 202 | ||
196 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, | 203 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, |
@@ -272,9 +279,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
272 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, | 279 | reserve_bootmem_node(NODE_DATA(nodeid), bootmap_start, |
273 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); | 280 | bootmap_pages<<PAGE_SHIFT, BOOTMEM_DEFAULT); |
274 | 281 | ||
275 | #ifdef CONFIG_ACPI_NUMA | ||
276 | srat_reserve_add_area(nodeid); | ||
277 | #endif | ||
278 | node_set_online(nodeid); | 282 | node_set_online(nodeid); |
279 | } | 283 | } |
280 | 284 | ||
@@ -578,21 +582,6 @@ unsigned long __init numa_free_all_bootmem(void) | |||
578 | return pages; | 582 | return pages; |
579 | } | 583 | } |
580 | 584 | ||
581 | void __init paging_init(void) | ||
582 | { | ||
583 | unsigned long max_zone_pfns[MAX_NR_ZONES]; | ||
584 | |||
585 | memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); | ||
586 | max_zone_pfns[ZONE_DMA] = MAX_DMA_PFN; | ||
587 | max_zone_pfns[ZONE_DMA32] = MAX_DMA32_PFN; | ||
588 | max_zone_pfns[ZONE_NORMAL] = max_pfn; | ||
589 | |||
590 | sparse_memory_present_with_active_regions(MAX_NUMNODES); | ||
591 | sparse_init(); | ||
592 | |||
593 | free_area_init_nodes(max_zone_pfns); | ||
594 | } | ||
595 | |||
596 | static __init int numa_setup(char *opt) | 585 | static __init int numa_setup(char *opt) |
597 | { | 586 | { |
598 | if (!opt) | 587 | if (!opt) |
@@ -606,8 +595,6 @@ static __init int numa_setup(char *opt) | |||
606 | #ifdef CONFIG_ACPI_NUMA | 595 | #ifdef CONFIG_ACPI_NUMA |
607 | if (!strncmp(opt, "noacpi", 6)) | 596 | if (!strncmp(opt, "noacpi", 6)) |
608 | acpi_numa = -1; | 597 | acpi_numa = -1; |
609 | if (!strncmp(opt, "hotadd=", 7)) | ||
610 | hotadd_percent = simple_strtoul(opt+7, NULL, 10); | ||
611 | #endif | 598 | #endif |
612 | return 0; | 599 | return 0; |
613 | } | 600 | } |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index e17efed088c5..3cfe9ced8a4c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -470,7 +470,7 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
470 | 470 | ||
471 | if (!debug_pagealloc) | 471 | if (!debug_pagealloc) |
472 | spin_unlock(&cpa_lock); | 472 | spin_unlock(&cpa_lock); |
473 | base = alloc_pages(GFP_KERNEL, 0); | 473 | base = alloc_pages(GFP_KERNEL | __GFP_NOTRACK, 0); |
474 | if (!debug_pagealloc) | 474 | if (!debug_pagealloc) |
475 | spin_lock(&cpa_lock); | 475 | spin_lock(&cpa_lock); |
476 | if (!base) | 476 | if (!base) |
@@ -839,13 +839,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
839 | 839 | ||
840 | vm_unmap_aliases(); | 840 | vm_unmap_aliases(); |
841 | 841 | ||
842 | /* | ||
843 | * If we're called with lazy mmu updates enabled, the | ||
844 | * in-memory pte state may be stale. Flush pending updates to | ||
845 | * bring them up to date. | ||
846 | */ | ||
847 | arch_flush_lazy_mmu_mode(); | ||
848 | |||
849 | cpa.vaddr = addr; | 842 | cpa.vaddr = addr; |
850 | cpa.pages = pages; | 843 | cpa.pages = pages; |
851 | cpa.numpages = numpages; | 844 | cpa.numpages = numpages; |
@@ -890,13 +883,6 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
890 | } else | 883 | } else |
891 | cpa_flush_all(cache); | 884 | cpa_flush_all(cache); |
892 | 885 | ||
893 | /* | ||
894 | * If we've been called with lazy mmu updates enabled, then | ||
895 | * make sure that everything gets flushed out before we | ||
896 | * return. | ||
897 | */ | ||
898 | arch_flush_lazy_mmu_mode(); | ||
899 | |||
900 | out: | 886 | out: |
901 | return ret; | 887 | return ret; |
902 | } | 888 | } |
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 7aa03a5389f5..8e43bdd45456 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -4,9 +4,11 @@ | |||
4 | #include <asm/tlb.h> | 4 | #include <asm/tlb.h> |
5 | #include <asm/fixmap.h> | 5 | #include <asm/fixmap.h> |
6 | 6 | ||
7 | #define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO | ||
8 | |||
7 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) | 9 | pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) |
8 | { | 10 | { |
9 | return (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); | 11 | return (pte_t *)__get_free_page(PGALLOC_GFP); |
10 | } | 12 | } |
11 | 13 | ||
12 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | 14 | pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) |
@@ -14,9 +16,9 @@ pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address) | |||
14 | struct page *pte; | 16 | struct page *pte; |
15 | 17 | ||
16 | #ifdef CONFIG_HIGHPTE | 18 | #ifdef CONFIG_HIGHPTE |
17 | pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); | 19 | pte = alloc_pages(PGALLOC_GFP | __GFP_HIGHMEM, 0); |
18 | #else | 20 | #else |
19 | pte = alloc_pages(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO, 0); | 21 | pte = alloc_pages(PGALLOC_GFP, 0); |
20 | #endif | 22 | #endif |
21 | if (pte) | 23 | if (pte) |
22 | pgtable_page_ctor(pte); | 24 | pgtable_page_ctor(pte); |
@@ -161,7 +163,7 @@ static int preallocate_pmds(pmd_t *pmds[]) | |||
161 | bool failed = false; | 163 | bool failed = false; |
162 | 164 | ||
163 | for(i = 0; i < PREALLOCATED_PMDS; i++) { | 165 | for(i = 0; i < PREALLOCATED_PMDS; i++) { |
164 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL|__GFP_REPEAT); | 166 | pmd_t *pmd = (pmd_t *)__get_free_page(PGALLOC_GFP); |
165 | if (pmd == NULL) | 167 | if (pmd == NULL) |
166 | failed = true; | 168 | failed = true; |
167 | pmds[i] = pmd; | 169 | pmds[i] = pmd; |
@@ -228,7 +230,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm) | |||
228 | pmd_t *pmds[PREALLOCATED_PMDS]; | 230 | pmd_t *pmds[PREALLOCATED_PMDS]; |
229 | unsigned long flags; | 231 | unsigned long flags; |
230 | 232 | ||
231 | pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | 233 | pgd = (pgd_t *)__get_free_page(PGALLOC_GFP); |
232 | 234 | ||
233 | if (pgd == NULL) | 235 | if (pgd == NULL) |
234 | goto out; | 236 | goto out; |
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 01765955baaf..2dfcbf9df2ae 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c | |||
@@ -31,17 +31,11 @@ static nodemask_t nodes_parsed __initdata; | |||
31 | static nodemask_t cpu_nodes_parsed __initdata; | 31 | static nodemask_t cpu_nodes_parsed __initdata; |
32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; | 32 | static struct bootnode nodes[MAX_NUMNODES] __initdata; |
33 | static struct bootnode nodes_add[MAX_NUMNODES]; | 33 | static struct bootnode nodes_add[MAX_NUMNODES]; |
34 | static int found_add_area __initdata; | ||
35 | int hotadd_percent __initdata = 0; | ||
36 | 34 | ||
37 | static int num_node_memblks __initdata; | 35 | static int num_node_memblks __initdata; |
38 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; | 36 | static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; |
39 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; | 37 | static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; |
40 | 38 | ||
41 | /* Too small nodes confuse the VM badly. Usually they result | ||
42 | from BIOS bugs. */ | ||
43 | #define NODE_MIN_SIZE (4*1024*1024) | ||
44 | |||
45 | static __init int setup_node(int pxm) | 39 | static __init int setup_node(int pxm) |
46 | { | 40 | { |
47 | return acpi_map_pxm_to_node(pxm); | 41 | return acpi_map_pxm_to_node(pxm); |
@@ -66,9 +60,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) | |||
66 | { | 60 | { |
67 | struct bootnode *nd = &nodes[i]; | 61 | struct bootnode *nd = &nodes[i]; |
68 | 62 | ||
69 | if (found_add_area) | ||
70 | return; | ||
71 | |||
72 | if (nd->start < start) { | 63 | if (nd->start < start) { |
73 | nd->start = start; | 64 | nd->start = start; |
74 | if (nd->end < nd->start) | 65 | if (nd->end < nd->start) |
@@ -86,7 +77,6 @@ static __init void bad_srat(void) | |||
86 | int i; | 77 | int i; |
87 | printk(KERN_ERR "SRAT: SRAT not used.\n"); | 78 | printk(KERN_ERR "SRAT: SRAT not used.\n"); |
88 | acpi_numa = -1; | 79 | acpi_numa = -1; |
89 | found_add_area = 0; | ||
90 | for (i = 0; i < MAX_LOCAL_APIC; i++) | 80 | for (i = 0; i < MAX_LOCAL_APIC; i++) |
91 | apicid_to_node[i] = NUMA_NO_NODE; | 81 | apicid_to_node[i] = NUMA_NO_NODE; |
92 | for (i = 0; i < MAX_NUMNODES; i++) | 82 | for (i = 0; i < MAX_NUMNODES; i++) |
@@ -182,24 +172,21 @@ acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) | |||
182 | pxm, apic_id, node); | 172 | pxm, apic_id, node); |
183 | } | 173 | } |
184 | 174 | ||
185 | static int update_end_of_memory(unsigned long end) {return -1;} | ||
186 | static int hotadd_enough_memory(struct bootnode *nd) {return 1;} | ||
187 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE | 175 | #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE |
188 | static inline int save_add_info(void) {return 1;} | 176 | static inline int save_add_info(void) {return 1;} |
189 | #else | 177 | #else |
190 | static inline int save_add_info(void) {return 0;} | 178 | static inline int save_add_info(void) {return 0;} |
191 | #endif | 179 | #endif |
192 | /* | 180 | /* |
193 | * Update nodes_add and decide if to include add are in the zone. | 181 | * Update nodes_add[] |
194 | * Both SPARSE and RESERVE need nodes_add information. | 182 | * This code supports one contiguous hot add area per node |
195 | * This code supports one contiguous hot add area per node. | ||
196 | */ | 183 | */ |
197 | static int __init | 184 | static void __init |
198 | reserve_hotadd(int node, unsigned long start, unsigned long end) | 185 | update_nodes_add(int node, unsigned long start, unsigned long end) |
199 | { | 186 | { |
200 | unsigned long s_pfn = start >> PAGE_SHIFT; | 187 | unsigned long s_pfn = start >> PAGE_SHIFT; |
201 | unsigned long e_pfn = end >> PAGE_SHIFT; | 188 | unsigned long e_pfn = end >> PAGE_SHIFT; |
202 | int ret = 0, changed = 0; | 189 | int changed = 0; |
203 | struct bootnode *nd = &nodes_add[node]; | 190 | struct bootnode *nd = &nodes_add[node]; |
204 | 191 | ||
205 | /* I had some trouble with strange memory hotadd regions breaking | 192 | /* I had some trouble with strange memory hotadd regions breaking |
@@ -210,7 +197,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
210 | mistakes */ | 197 | mistakes */ |
211 | if ((signed long)(end - start) < NODE_MIN_SIZE) { | 198 | if ((signed long)(end - start) < NODE_MIN_SIZE) { |
212 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); | 199 | printk(KERN_ERR "SRAT: Hotplug area too small\n"); |
213 | return -1; | 200 | return; |
214 | } | 201 | } |
215 | 202 | ||
216 | /* This check might be a bit too strict, but I'm keeping it for now. */ | 203 | /* This check might be a bit too strict, but I'm keeping it for now. */ |
@@ -218,12 +205,7 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
218 | printk(KERN_ERR | 205 | printk(KERN_ERR |
219 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", | 206 | "SRAT: Hotplug area %lu -> %lu has existing memory\n", |
220 | s_pfn, e_pfn); | 207 | s_pfn, e_pfn); |
221 | return -1; | 208 | return; |
222 | } | ||
223 | |||
224 | if (!hotadd_enough_memory(&nodes_add[node])) { | ||
225 | printk(KERN_ERR "SRAT: Hotplug area too large\n"); | ||
226 | return -1; | ||
227 | } | 209 | } |
228 | 210 | ||
229 | /* Looks good */ | 211 | /* Looks good */ |
@@ -245,11 +227,9 @@ reserve_hotadd(int node, unsigned long start, unsigned long end) | |||
245 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); | 227 | printk(KERN_ERR "SRAT: Hotplug zone not continuous. Partly ignored\n"); |
246 | } | 228 | } |
247 | 229 | ||
248 | ret = update_end_of_memory(nd->end); | ||
249 | |||
250 | if (changed) | 230 | if (changed) |
251 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", nd->start, nd->end); | 231 | printk(KERN_INFO "SRAT: hot plug zone found %Lx - %Lx\n", |
252 | return ret; | 232 | nd->start, nd->end); |
253 | } | 233 | } |
254 | 234 | ||
255 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ | 235 | /* Callback for parsing of the Proximity Domain <-> Memory Area mappings */ |
@@ -310,13 +290,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
310 | start, end); | 290 | start, end); |
311 | e820_register_active_regions(node, start >> PAGE_SHIFT, | 291 | e820_register_active_regions(node, start >> PAGE_SHIFT, |
312 | end >> PAGE_SHIFT); | 292 | end >> PAGE_SHIFT); |
313 | push_node_boundaries(node, nd->start >> PAGE_SHIFT, | ||
314 | nd->end >> PAGE_SHIFT); | ||
315 | 293 | ||
316 | if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && | 294 | if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { |
317 | (reserve_hotadd(node, start, end) < 0)) { | 295 | update_nodes_add(node, start, end); |
318 | /* Ignore hotadd region. Undo damage */ | 296 | /* restore nodes[node] */ |
319 | printk(KERN_NOTICE "SRAT: Hotplug region ignored\n"); | ||
320 | *nd = oldnode; | 297 | *nd = oldnode; |
321 | if ((nd->start | nd->end) == 0) | 298 | if ((nd->start | nd->end) == 0) |
322 | node_clear(node, nodes_parsed); | 299 | node_clear(node, nodes_parsed); |
@@ -345,9 +322,9 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
345 | pxmram = 0; | 322 | pxmram = 0; |
346 | } | 323 | } |
347 | 324 | ||
348 | e820ram = max_pfn - absent_pages_in_range(0, max_pfn); | 325 | e820ram = max_pfn - (e820_hole_size(0, max_pfn<<PAGE_SHIFT)>>PAGE_SHIFT); |
349 | /* We seem to lose 3 pages somewhere. Allow a bit of slack. */ | 326 | /* We seem to lose 3 pages somewhere. Allow 1M of slack. */ |
350 | if ((long)(e820ram - pxmram) >= 1*1024*1024) { | 327 | if ((long)(e820ram - pxmram) >= (1<<(20 - PAGE_SHIFT))) { |
351 | printk(KERN_ERR | 328 | printk(KERN_ERR |
352 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", | 329 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", |
353 | (pxmram << PAGE_SHIFT) >> 20, | 330 | (pxmram << PAGE_SHIFT) >> 20, |
@@ -357,17 +334,6 @@ static int __init nodes_cover_memory(const struct bootnode *nodes) | |||
357 | return 1; | 334 | return 1; |
358 | } | 335 | } |
359 | 336 | ||
360 | static void __init unparse_node(int node) | ||
361 | { | ||
362 | int i; | ||
363 | node_clear(node, nodes_parsed); | ||
364 | node_clear(node, cpu_nodes_parsed); | ||
365 | for (i = 0; i < MAX_LOCAL_APIC; i++) { | ||
366 | if (apicid_to_node[i] == node) | ||
367 | apicid_to_node[i] = NUMA_NO_NODE; | ||
368 | } | ||
369 | } | ||
370 | |||
371 | void __init acpi_numa_arch_fixup(void) {} | 337 | void __init acpi_numa_arch_fixup(void) {} |
372 | 338 | ||
373 | /* Use the information discovered above to actually set up the nodes. */ | 339 | /* Use the information discovered above to actually set up the nodes. */ |
@@ -379,18 +345,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
379 | return -1; | 345 | return -1; |
380 | 346 | ||
381 | /* First clean up the node list */ | 347 | /* First clean up the node list */ |
382 | for (i = 0; i < MAX_NUMNODES; i++) { | 348 | for (i = 0; i < MAX_NUMNODES; i++) |
383 | cutoff_node(i, start, end); | 349 | cutoff_node(i, start, end); |
384 | /* | ||
385 | * don't confuse VM with a node that doesn't have the | ||
386 | * minimum memory. | ||
387 | */ | ||
388 | if (nodes[i].end && | ||
389 | (nodes[i].end - nodes[i].start) < NODE_MIN_SIZE) { | ||
390 | unparse_node(i); | ||
391 | node_set_offline(i); | ||
392 | } | ||
393 | } | ||
394 | 350 | ||
395 | if (!nodes_cover_memory(nodes)) { | 351 | if (!nodes_cover_memory(nodes)) { |
396 | bad_srat(); | 352 | bad_srat(); |
@@ -423,7 +379,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
423 | 379 | ||
424 | if (node == NUMA_NO_NODE) | 380 | if (node == NUMA_NO_NODE) |
425 | continue; | 381 | continue; |
426 | if (!node_isset(node, node_possible_map)) | 382 | if (!node_online(node)) |
427 | numa_clear_node(i); | 383 | numa_clear_node(i); |
428 | } | 384 | } |
429 | numa_init_array(); | 385 | numa_init_array(); |
@@ -510,26 +466,6 @@ static int null_slit_node_compare(int a, int b) | |||
510 | } | 466 | } |
511 | #endif /* CONFIG_NUMA_EMU */ | 467 | #endif /* CONFIG_NUMA_EMU */ |
512 | 468 | ||
513 | void __init srat_reserve_add_area(int nodeid) | ||
514 | { | ||
515 | if (found_add_area && nodes_add[nodeid].end) { | ||
516 | u64 total_mb; | ||
517 | |||
518 | printk(KERN_INFO "SRAT: Reserving hot-add memory space " | ||
519 | "for node %d at %Lx-%Lx\n", | ||
520 | nodeid, nodes_add[nodeid].start, nodes_add[nodeid].end); | ||
521 | total_mb = (nodes_add[nodeid].end - nodes_add[nodeid].start) | ||
522 | >> PAGE_SHIFT; | ||
523 | total_mb *= sizeof(struct page); | ||
524 | total_mb >>= 20; | ||
525 | printk(KERN_INFO "SRAT: This will cost you %Lu MB of " | ||
526 | "pre-allocated memory.\n", (unsigned long long)total_mb); | ||
527 | reserve_bootmem_node(NODE_DATA(nodeid), nodes_add[nodeid].start, | ||
528 | nodes_add[nodeid].end - nodes_add[nodeid].start, | ||
529 | BOOTMEM_DEFAULT); | ||
530 | } | ||
531 | } | ||
532 | |||
533 | int __node_distance(int a, int b) | 469 | int __node_distance(int a, int b) |
534 | { | 470 | { |
535 | int index; | 471 | int index; |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 202864ad49a7..b07dd8d0b321 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -40,8 +40,9 @@ static int profile_exceptions_notify(struct notifier_block *self, | |||
40 | 40 | ||
41 | switch (val) { | 41 | switch (val) { |
42 | case DIE_NMI: | 42 | case DIE_NMI: |
43 | if (model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu))) | 43 | case DIE_NMI_IPI: |
44 | ret = NOTIFY_STOP; | 44 | model->check_ctrs(args->regs, &per_cpu(cpu_msrs, cpu)); |
45 | ret = NOTIFY_STOP; | ||
45 | break; | 46 | break; |
46 | default: | 47 | default: |
47 | break; | 48 | break; |
@@ -134,7 +135,7 @@ static void nmi_cpu_setup(void *dummy) | |||
134 | static struct notifier_block profile_exceptions_nb = { | 135 | static struct notifier_block profile_exceptions_nb = { |
135 | .notifier_call = profile_exceptions_notify, | 136 | .notifier_call = profile_exceptions_notify, |
136 | .next = NULL, | 137 | .next = NULL, |
137 | .priority = 0 | 138 | .priority = 2 |
138 | }; | 139 | }; |
139 | 140 | ||
140 | static int nmi_setup(void) | 141 | static int nmi_setup(void) |
@@ -356,14 +357,11 @@ static void exit_sysfs(void) | |||
356 | #define exit_sysfs() do { } while (0) | 357 | #define exit_sysfs() do { } while (0) |
357 | #endif /* CONFIG_PM */ | 358 | #endif /* CONFIG_PM */ |
358 | 359 | ||
359 | static int p4force; | ||
360 | module_param(p4force, int, 0); | ||
361 | |||
362 | static int __init p4_init(char **cpu_type) | 360 | static int __init p4_init(char **cpu_type) |
363 | { | 361 | { |
364 | __u8 cpu_model = boot_cpu_data.x86_model; | 362 | __u8 cpu_model = boot_cpu_data.x86_model; |
365 | 363 | ||
366 | if (!p4force && (cpu_model > 6 || cpu_model == 5)) | 364 | if (cpu_model > 6 || cpu_model == 5) |
367 | return 0; | 365 | return 0; |
368 | 366 | ||
369 | #ifndef CONFIG_SMP | 367 | #ifndef CONFIG_SMP |
@@ -389,10 +387,25 @@ static int __init p4_init(char **cpu_type) | |||
389 | return 0; | 387 | return 0; |
390 | } | 388 | } |
391 | 389 | ||
390 | static int force_arch_perfmon; | ||
391 | static int force_cpu_type(const char *str, struct kernel_param *kp) | ||
392 | { | ||
393 | if (!strcmp(str, "archperfmon")) { | ||
394 | force_arch_perfmon = 1; | ||
395 | printk(KERN_INFO "oprofile: forcing architectural perfmon\n"); | ||
396 | } | ||
397 | |||
398 | return 0; | ||
399 | } | ||
400 | module_param_call(cpu_type, force_cpu_type, NULL, NULL, 0); | ||
401 | |||
392 | static int __init ppro_init(char **cpu_type) | 402 | static int __init ppro_init(char **cpu_type) |
393 | { | 403 | { |
394 | __u8 cpu_model = boot_cpu_data.x86_model; | 404 | __u8 cpu_model = boot_cpu_data.x86_model; |
395 | 405 | ||
406 | if (force_arch_perfmon && cpu_has_arch_perfmon) | ||
407 | return 0; | ||
408 | |||
396 | switch (cpu_model) { | 409 | switch (cpu_model) { |
397 | case 0 ... 2: | 410 | case 0 ... 2: |
398 | *cpu_type = "i386/ppro"; | 411 | *cpu_type = "i386/ppro"; |
@@ -414,6 +427,13 @@ static int __init ppro_init(char **cpu_type) | |||
414 | case 15: case 23: | 427 | case 15: case 23: |
415 | *cpu_type = "i386/core_2"; | 428 | *cpu_type = "i386/core_2"; |
416 | break; | 429 | break; |
430 | case 26: | ||
431 | arch_perfmon_setup_counters(); | ||
432 | *cpu_type = "i386/core_i7"; | ||
433 | break; | ||
434 | case 28: | ||
435 | *cpu_type = "i386/atom"; | ||
436 | break; | ||
417 | default: | 437 | default: |
418 | /* Unknown */ | 438 | /* Unknown */ |
419 | return 0; | 439 | return 0; |
diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 10131fbdaada..4da7230b3d17 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c | |||
@@ -18,7 +18,7 @@ | |||
18 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
19 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
20 | #include <asm/nmi.h> | 20 | #include <asm/nmi.h> |
21 | #include <asm/intel_arch_perfmon.h> | 21 | #include <asm/perf_counter.h> |
22 | 22 | ||
23 | #include "op_x86_model.h" | 23 | #include "op_x86_model.h" |
24 | #include "op_counter.h" | 24 | #include "op_counter.h" |
@@ -136,6 +136,13 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
136 | u64 val; | 136 | u64 val; |
137 | int i; | 137 | int i; |
138 | 138 | ||
139 | /* | ||
140 | * This can happen if perf counters are in use when | ||
141 | * we steal the die notifier NMI. | ||
142 | */ | ||
143 | if (unlikely(!reset_value)) | ||
144 | goto out; | ||
145 | |||
139 | for (i = 0 ; i < num_counters; ++i) { | 146 | for (i = 0 ; i < num_counters; ++i) { |
140 | if (!reset_value[i]) | 147 | if (!reset_value[i]) |
141 | continue; | 148 | continue; |
@@ -146,6 +153,7 @@ static int ppro_check_ctrs(struct pt_regs * const regs, | |||
146 | } | 153 | } |
147 | } | 154 | } |
148 | 155 | ||
156 | out: | ||
149 | /* Only P6 based Pentium M need to re-unmask the apic vector but it | 157 | /* Only P6 based Pentium M need to re-unmask the apic vector but it |
150 | * doesn't hurt other P6 variant */ | 158 | * doesn't hurt other P6 variant */ |
151 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); | 159 | apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index c0ecf250fe51..b26626dc517c 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -38,15 +38,26 @@ count_resource(struct acpi_resource *acpi_res, void *data) | |||
38 | struct acpi_resource_address64 addr; | 38 | struct acpi_resource_address64 addr; |
39 | acpi_status status; | 39 | acpi_status status; |
40 | 40 | ||
41 | if (info->res_num >= PCI_BUS_NUM_RESOURCES) | ||
42 | return AE_OK; | ||
43 | |||
44 | status = resource_to_addr(acpi_res, &addr); | 41 | status = resource_to_addr(acpi_res, &addr); |
45 | if (ACPI_SUCCESS(status)) | 42 | if (ACPI_SUCCESS(status)) |
46 | info->res_num++; | 43 | info->res_num++; |
47 | return AE_OK; | 44 | return AE_OK; |
48 | } | 45 | } |
49 | 46 | ||
47 | static int | ||
48 | bus_has_transparent_bridge(struct pci_bus *bus) | ||
49 | { | ||
50 | struct pci_dev *dev; | ||
51 | |||
52 | list_for_each_entry(dev, &bus->devices, bus_list) { | ||
53 | u16 class = dev->class >> 8; | ||
54 | |||
55 | if (class == PCI_CLASS_BRIDGE_PCI && dev->transparent) | ||
56 | return true; | ||
57 | } | ||
58 | return false; | ||
59 | } | ||
60 | |||
50 | static acpi_status | 61 | static acpi_status |
51 | setup_resource(struct acpi_resource *acpi_res, void *data) | 62 | setup_resource(struct acpi_resource *acpi_res, void *data) |
52 | { | 63 | { |
@@ -56,9 +67,7 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
56 | acpi_status status; | 67 | acpi_status status; |
57 | unsigned long flags; | 68 | unsigned long flags; |
58 | struct resource *root; | 69 | struct resource *root; |
59 | 70 | int max_root_bus_resources = PCI_BUS_NUM_RESOURCES; | |
60 | if (info->res_num >= PCI_BUS_NUM_RESOURCES) | ||
61 | return AE_OK; | ||
62 | 71 | ||
63 | status = resource_to_addr(acpi_res, &addr); | 72 | status = resource_to_addr(acpi_res, &addr); |
64 | if (!ACPI_SUCCESS(status)) | 73 | if (!ACPI_SUCCESS(status)) |
@@ -82,6 +91,18 @@ setup_resource(struct acpi_resource *acpi_res, void *data) | |||
82 | res->end = res->start + addr.address_length - 1; | 91 | res->end = res->start + addr.address_length - 1; |
83 | res->child = NULL; | 92 | res->child = NULL; |
84 | 93 | ||
94 | if (bus_has_transparent_bridge(info->bus)) | ||
95 | max_root_bus_resources -= 3; | ||
96 | if (info->res_num >= max_root_bus_resources) { | ||
97 | printk(KERN_WARNING "PCI: Failed to allocate 0x%lx-0x%lx " | ||
98 | "from %s for %s due to _CRS returning more than " | ||
99 | "%d resource descriptors\n", (unsigned long) res->start, | ||
100 | (unsigned long) res->end, root->name, info->name, | ||
101 | max_root_bus_resources); | ||
102 | info->res_num++; | ||
103 | return AE_OK; | ||
104 | } | ||
105 | |||
85 | if (insert_resource(root, res)) { | 106 | if (insert_resource(root, res)) { |
86 | printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " | 107 | printk(KERN_ERR "PCI: Failed to allocate 0x%lx-0x%lx " |
87 | "from %s for %s\n", (unsigned long) res->start, | 108 | "from %s for %s\n", (unsigned long) res->start, |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index a85bef20a3b9..0fb56db16d18 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -116,7 +116,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
116 | struct pci_bus *bus; | 116 | struct pci_bus *bus; |
117 | struct pci_dev *dev; | 117 | struct pci_dev *dev; |
118 | int idx; | 118 | int idx; |
119 | struct resource *r, *pr; | 119 | struct resource *r; |
120 | 120 | ||
121 | /* Depth-First Search on bus tree */ | 121 | /* Depth-First Search on bus tree */ |
122 | list_for_each_entry(bus, bus_list, node) { | 122 | list_for_each_entry(bus, bus_list, node) { |
@@ -126,9 +126,8 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
126 | r = &dev->resource[idx]; | 126 | r = &dev->resource[idx]; |
127 | if (!r->flags) | 127 | if (!r->flags) |
128 | continue; | 128 | continue; |
129 | pr = pci_find_parent_resource(dev, r); | 129 | if (!r->start || |
130 | if (!r->start || !pr || | 130 | pci_claim_resource(dev, idx) < 0) { |
131 | request_resource(pr, r) < 0) { | ||
132 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); | 131 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); |
133 | /* | 132 | /* |
134 | * Something is wrong with the region. | 133 | * Something is wrong with the region. |
@@ -149,7 +148,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
149 | struct pci_dev *dev = NULL; | 148 | struct pci_dev *dev = NULL; |
150 | int idx, disabled; | 149 | int idx, disabled; |
151 | u16 command; | 150 | u16 command; |
152 | struct resource *r, *pr; | 151 | struct resource *r; |
153 | 152 | ||
154 | for_each_pci_dev(dev) { | 153 | for_each_pci_dev(dev) { |
155 | pci_read_config_word(dev, PCI_COMMAND, &command); | 154 | pci_read_config_word(dev, PCI_COMMAND, &command); |
@@ -168,8 +167,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
168 | (unsigned long long) r->start, | 167 | (unsigned long long) r->start, |
169 | (unsigned long long) r->end, | 168 | (unsigned long long) r->end, |
170 | r->flags, disabled, pass); | 169 | r->flags, disabled, pass); |
171 | pr = pci_find_parent_resource(dev, r); | 170 | if (pci_claim_resource(dev, idx) < 0) { |
172 | if (!pr || request_resource(pr, r) < 0) { | ||
173 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); | 171 | dev_info(&dev->dev, "BAR %d: can't allocate resource\n", idx); |
174 | /* We'll assign a new address later */ | 172 | /* We'll assign a new address later */ |
175 | r->end -= r->start; | 173 | r->end -= r->start; |
@@ -197,7 +195,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
197 | static int __init pcibios_assign_resources(void) | 195 | static int __init pcibios_assign_resources(void) |
198 | { | 196 | { |
199 | struct pci_dev *dev = NULL; | 197 | struct pci_dev *dev = NULL; |
200 | struct resource *r, *pr; | 198 | struct resource *r; |
201 | 199 | ||
202 | if (!(pci_probe & PCI_ASSIGN_ROMS)) { | 200 | if (!(pci_probe & PCI_ASSIGN_ROMS)) { |
203 | /* | 201 | /* |
@@ -209,8 +207,7 @@ static int __init pcibios_assign_resources(void) | |||
209 | r = &dev->resource[PCI_ROM_RESOURCE]; | 207 | r = &dev->resource[PCI_ROM_RESOURCE]; |
210 | if (!r->flags || !r->start) | 208 | if (!r->flags || !r->start) |
211 | continue; | 209 | continue; |
212 | pr = pci_find_parent_resource(dev, r); | 210 | if (pci_claim_resource(dev, PCI_ROM_RESOURCE) < 0) { |
213 | if (!pr || request_resource(pr, r) < 0) { | ||
214 | r->end -= r->start; | 211 | r->end -= r->start; |
215 | r->start = 0; | 212 | r->start = 0; |
216 | } | 213 | } |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index fecbce6e7d7c..0696d506c4ad 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -889,6 +889,9 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
889 | return 0; | 889 | return 0; |
890 | } | 890 | } |
891 | 891 | ||
892 | if (io_apic_assign_pci_irqs) | ||
893 | return 0; | ||
894 | |||
892 | /* Find IRQ routing entry */ | 895 | /* Find IRQ routing entry */ |
893 | 896 | ||
894 | if (!pirq_table) | 897 | if (!pirq_table) |
@@ -1039,56 +1042,15 @@ static void __init pcibios_fixup_irqs(void) | |||
1039 | pirq_penalty[dev->irq]++; | 1042 | pirq_penalty[dev->irq]++; |
1040 | } | 1043 | } |
1041 | 1044 | ||
1045 | if (io_apic_assign_pci_irqs) | ||
1046 | return; | ||
1047 | |||
1042 | dev = NULL; | 1048 | dev = NULL; |
1043 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1049 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
1044 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | 1050 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); |
1045 | if (!pin) | 1051 | if (!pin) |
1046 | continue; | 1052 | continue; |
1047 | 1053 | ||
1048 | #ifdef CONFIG_X86_IO_APIC | ||
1049 | /* | ||
1050 | * Recalculate IRQ numbers if we use the I/O APIC. | ||
1051 | */ | ||
1052 | if (io_apic_assign_pci_irqs) { | ||
1053 | int irq; | ||
1054 | |||
1055 | /* | ||
1056 | * interrupt pins are numbered starting from 1 | ||
1057 | */ | ||
1058 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, | ||
1059 | PCI_SLOT(dev->devfn), pin - 1); | ||
1060 | /* | ||
1061 | * Busses behind bridges are typically not listed in the | ||
1062 | * MP-table. In this case we have to look up the IRQ | ||
1063 | * based on the parent bus, parent slot, and pin number. | ||
1064 | * The SMP code detects such bridged busses itself so we | ||
1065 | * should get into this branch reliably. | ||
1066 | */ | ||
1067 | if (irq < 0 && dev->bus->parent) { | ||
1068 | /* go back to the bridge */ | ||
1069 | struct pci_dev *bridge = dev->bus->self; | ||
1070 | int bus; | ||
1071 | |||
1072 | pin = pci_swizzle_interrupt_pin(dev, pin); | ||
1073 | bus = bridge->bus->number; | ||
1074 | irq = IO_APIC_get_PCI_irq_vector(bus, | ||
1075 | PCI_SLOT(bridge->devfn), pin - 1); | ||
1076 | if (irq >= 0) | ||
1077 | dev_warn(&dev->dev, | ||
1078 | "using bridge %s INT %c to " | ||
1079 | "get IRQ %d\n", | ||
1080 | pci_name(bridge), | ||
1081 | 'A' + pin - 1, irq); | ||
1082 | } | ||
1083 | if (irq >= 0) { | ||
1084 | dev_info(&dev->dev, | ||
1085 | "PCI->APIC IRQ transform: INT %c " | ||
1086 | "-> IRQ %d\n", | ||
1087 | 'A' + pin - 1, irq); | ||
1088 | dev->irq = irq; | ||
1089 | } | ||
1090 | } | ||
1091 | #endif | ||
1092 | /* | 1054 | /* |
1093 | * Still no IRQ? Try to lookup one... | 1055 | * Still no IRQ? Try to lookup one... |
1094 | */ | 1056 | */ |
@@ -1183,6 +1145,19 @@ int __init pcibios_irq_init(void) | |||
1183 | pcibios_enable_irq = pirq_enable_irq; | 1145 | pcibios_enable_irq = pirq_enable_irq; |
1184 | 1146 | ||
1185 | pcibios_fixup_irqs(); | 1147 | pcibios_fixup_irqs(); |
1148 | |||
1149 | if (io_apic_assign_pci_irqs && pci_routeirq) { | ||
1150 | struct pci_dev *dev = NULL; | ||
1151 | /* | ||
1152 | * PCI IRQ routing is set up by pci_enable_device(), but we | ||
1153 | * also do it here in case there are still broken drivers that | ||
1154 | * don't use pci_enable_device(). | ||
1155 | */ | ||
1156 | printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); | ||
1157 | for_each_pci_dev(dev) | ||
1158 | pirq_enable_irq(dev); | ||
1159 | } | ||
1160 | |||
1186 | return 0; | 1161 | return 0; |
1187 | } | 1162 | } |
1188 | 1163 | ||
@@ -1213,16 +1188,23 @@ void pcibios_penalize_isa_irq(int irq, int active) | |||
1213 | static int pirq_enable_irq(struct pci_dev *dev) | 1188 | static int pirq_enable_irq(struct pci_dev *dev) |
1214 | { | 1189 | { |
1215 | u8 pin; | 1190 | u8 pin; |
1216 | struct pci_dev *temp_dev; | ||
1217 | 1191 | ||
1218 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | 1192 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); |
1219 | if (pin && !pcibios_lookup_irq(dev, 1) && !dev->irq) { | 1193 | if (pin && !pcibios_lookup_irq(dev, 1)) { |
1220 | char *msg = ""; | 1194 | char *msg = ""; |
1221 | 1195 | ||
1196 | if (!io_apic_assign_pci_irqs && dev->irq) | ||
1197 | return 0; | ||
1198 | |||
1222 | if (io_apic_assign_pci_irqs) { | 1199 | if (io_apic_assign_pci_irqs) { |
1200 | #ifdef CONFIG_X86_IO_APIC | ||
1201 | struct pci_dev *temp_dev; | ||
1223 | int irq; | 1202 | int irq; |
1203 | struct io_apic_irq_attr irq_attr; | ||
1224 | 1204 | ||
1225 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, PCI_SLOT(dev->devfn), pin - 1); | 1205 | irq = IO_APIC_get_PCI_irq_vector(dev->bus->number, |
1206 | PCI_SLOT(dev->devfn), | ||
1207 | pin - 1, &irq_attr); | ||
1226 | /* | 1208 | /* |
1227 | * Busses behind bridges are typically not listed in the MP-table. | 1209 | * Busses behind bridges are typically not listed in the MP-table. |
1228 | * In this case we have to look up the IRQ based on the parent bus, | 1210 | * In this case we have to look up the IRQ based on the parent bus, |
@@ -1235,7 +1217,8 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1235 | 1217 | ||
1236 | pin = pci_swizzle_interrupt_pin(dev, pin); | 1218 | pin = pci_swizzle_interrupt_pin(dev, pin); |
1237 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1219 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1238 | PCI_SLOT(bridge->devfn), pin - 1); | 1220 | PCI_SLOT(bridge->devfn), |
1221 | pin - 1, &irq_attr); | ||
1239 | if (irq >= 0) | 1222 | if (irq >= 0) |
1240 | dev_warn(&dev->dev, "using bridge %s " | 1223 | dev_warn(&dev->dev, "using bridge %s " |
1241 | "INT %c to get IRQ %d\n", | 1224 | "INT %c to get IRQ %d\n", |
@@ -1245,12 +1228,15 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1245 | } | 1228 | } |
1246 | dev = temp_dev; | 1229 | dev = temp_dev; |
1247 | if (irq >= 0) { | 1230 | if (irq >= 0) { |
1231 | io_apic_set_pci_routing(&dev->dev, irq, | ||
1232 | &irq_attr); | ||
1233 | dev->irq = irq; | ||
1248 | dev_info(&dev->dev, "PCI->APIC IRQ transform: " | 1234 | dev_info(&dev->dev, "PCI->APIC IRQ transform: " |
1249 | "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); | 1235 | "INT %c -> IRQ %d\n", 'A' + pin - 1, irq); |
1250 | dev->irq = irq; | ||
1251 | return 0; | 1236 | return 0; |
1252 | } else | 1237 | } else |
1253 | msg = "; probably buggy MP table"; | 1238 | msg = "; probably buggy MP table"; |
1239 | #endif | ||
1254 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) | 1240 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) |
1255 | msg = ""; | 1241 | msg = ""; |
1256 | else | 1242 | else |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 8766b0e216c5..712443ec6d43 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -523,6 +523,69 @@ reject: | |||
523 | 523 | ||
524 | static int __initdata known_bridge; | 524 | static int __initdata known_bridge; |
525 | 525 | ||
526 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
527 | |||
528 | /* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ | ||
529 | struct acpi_mcfg_allocation *pci_mmcfg_config; | ||
530 | int pci_mmcfg_config_num; | ||
531 | |||
532 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | ||
533 | { | ||
534 | if (!strcmp(mcfg->header.oem_id, "SGI")) | ||
535 | acpi_mcfg_64bit_base_addr = TRUE; | ||
536 | |||
537 | return 0; | ||
538 | } | ||
539 | |||
540 | static int __init pci_parse_mcfg(struct acpi_table_header *header) | ||
541 | { | ||
542 | struct acpi_table_mcfg *mcfg; | ||
543 | unsigned long i; | ||
544 | int config_size; | ||
545 | |||
546 | if (!header) | ||
547 | return -EINVAL; | ||
548 | |||
549 | mcfg = (struct acpi_table_mcfg *)header; | ||
550 | |||
551 | /* how many config structures do we have */ | ||
552 | pci_mmcfg_config_num = 0; | ||
553 | i = header->length - sizeof(struct acpi_table_mcfg); | ||
554 | while (i >= sizeof(struct acpi_mcfg_allocation)) { | ||
555 | ++pci_mmcfg_config_num; | ||
556 | i -= sizeof(struct acpi_mcfg_allocation); | ||
557 | }; | ||
558 | if (pci_mmcfg_config_num == 0) { | ||
559 | printk(KERN_ERR PREFIX "MMCONFIG has no entries\n"); | ||
560 | return -ENODEV; | ||
561 | } | ||
562 | |||
563 | config_size = pci_mmcfg_config_num * sizeof(*pci_mmcfg_config); | ||
564 | pci_mmcfg_config = kmalloc(config_size, GFP_KERNEL); | ||
565 | if (!pci_mmcfg_config) { | ||
566 | printk(KERN_WARNING PREFIX | ||
567 | "No memory for MCFG config tables\n"); | ||
568 | return -ENOMEM; | ||
569 | } | ||
570 | |||
571 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); | ||
572 | |||
573 | acpi_mcfg_oem_check(mcfg); | ||
574 | |||
575 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | ||
576 | if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && | ||
577 | !acpi_mcfg_64bit_base_addr) { | ||
578 | printk(KERN_ERR PREFIX | ||
579 | "MMCONFIG not in low 4GB of memory\n"); | ||
580 | kfree(pci_mmcfg_config); | ||
581 | pci_mmcfg_config_num = 0; | ||
582 | return -ENODEV; | ||
583 | } | ||
584 | } | ||
585 | |||
586 | return 0; | ||
587 | } | ||
588 | |||
526 | static void __init __pci_mmcfg_init(int early) | 589 | static void __init __pci_mmcfg_init(int early) |
527 | { | 590 | { |
528 | /* MMCONFIG disabled */ | 591 | /* MMCONFIG disabled */ |
@@ -543,7 +606,7 @@ static void __init __pci_mmcfg_init(int early) | |||
543 | } | 606 | } |
544 | 607 | ||
545 | if (!known_bridge) | 608 | if (!known_bridge) |
546 | acpi_table_parse(ACPI_SIG_MCFG, acpi_parse_mcfg); | 609 | acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg); |
547 | 610 | ||
548 | pci_mmcfg_reject_broken(early); | 611 | pci_mmcfg_reject_broken(early); |
549 | 612 | ||
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile index 58b32db33125..de2abbd07544 100644 --- a/arch/x86/power/Makefile +++ b/arch/x86/power/Makefile | |||
@@ -3,5 +3,5 @@ | |||
3 | nostackp := $(call cc-option, -fno-stack-protector) | 3 | nostackp := $(call cc-option, -fno-stack-protector) |
4 | CFLAGS_cpu_$(BITS).o := $(nostackp) | 4 | CFLAGS_cpu_$(BITS).o := $(nostackp) |
5 | 5 | ||
6 | obj-$(CONFIG_PM_SLEEP) += cpu_$(BITS).o | 6 | obj-$(CONFIG_PM_SLEEP) += cpu.o |
7 | obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o | 7 | obj-$(CONFIG_HIBERNATION) += hibernate_$(BITS).o hibernate_asm_$(BITS).o |
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu.c index 5343540f2607..d277ef1eea51 100644 --- a/arch/x86/power/cpu_64.c +++ b/arch/x86/power/cpu.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * Suspend and hibernation support for x86-64 | 2 | * Suspend support specific for i386/x86-64. |
3 | * | 3 | * |
4 | * Distribute under GPLv2 | 4 | * Distribute under GPLv2 |
5 | * | 5 | * |
@@ -8,18 +8,28 @@ | |||
8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> | 8 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/smp.h> | ||
12 | #include <linux/suspend.h> | 11 | #include <linux/suspend.h> |
13 | #include <asm/proto.h> | 12 | #include <linux/smp.h> |
14 | #include <asm/page.h> | 13 | |
15 | #include <asm/pgtable.h> | 14 | #include <asm/pgtable.h> |
15 | #include <asm/proto.h> | ||
16 | #include <asm/mtrr.h> | 16 | #include <asm/mtrr.h> |
17 | #include <asm/page.h> | ||
18 | #include <asm/mce.h> | ||
17 | #include <asm/xcr.h> | 19 | #include <asm/xcr.h> |
18 | #include <asm/suspend.h> | 20 | #include <asm/suspend.h> |
19 | 21 | ||
20 | static void fix_processor_context(void); | 22 | #ifdef CONFIG_X86_32 |
23 | static struct saved_context saved_context; | ||
21 | 24 | ||
25 | unsigned long saved_context_ebx; | ||
26 | unsigned long saved_context_esp, saved_context_ebp; | ||
27 | unsigned long saved_context_esi, saved_context_edi; | ||
28 | unsigned long saved_context_eflags; | ||
29 | #else | ||
30 | /* CONFIG_X86_64 */ | ||
22 | struct saved_context saved_context; | 31 | struct saved_context saved_context; |
32 | #endif | ||
23 | 33 | ||
24 | /** | 34 | /** |
25 | * __save_processor_state - save CPU registers before creating a | 35 | * __save_processor_state - save CPU registers before creating a |
@@ -38,19 +48,35 @@ struct saved_context saved_context; | |||
38 | */ | 48 | */ |
39 | static void __save_processor_state(struct saved_context *ctxt) | 49 | static void __save_processor_state(struct saved_context *ctxt) |
40 | { | 50 | { |
51 | #ifdef CONFIG_X86_32 | ||
52 | mtrr_save_fixed_ranges(NULL); | ||
53 | #endif | ||
41 | kernel_fpu_begin(); | 54 | kernel_fpu_begin(); |
42 | 55 | ||
43 | /* | 56 | /* |
44 | * descriptor tables | 57 | * descriptor tables |
45 | */ | 58 | */ |
59 | #ifdef CONFIG_X86_32 | ||
60 | store_gdt(&ctxt->gdt); | ||
61 | store_idt(&ctxt->idt); | ||
62 | #else | ||
63 | /* CONFIG_X86_64 */ | ||
46 | store_gdt((struct desc_ptr *)&ctxt->gdt_limit); | 64 | store_gdt((struct desc_ptr *)&ctxt->gdt_limit); |
47 | store_idt((struct desc_ptr *)&ctxt->idt_limit); | 65 | store_idt((struct desc_ptr *)&ctxt->idt_limit); |
66 | #endif | ||
48 | store_tr(ctxt->tr); | 67 | store_tr(ctxt->tr); |
49 | 68 | ||
50 | /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ | 69 | /* XMM0..XMM15 should be handled by kernel_fpu_begin(). */ |
51 | /* | 70 | /* |
52 | * segment registers | 71 | * segment registers |
53 | */ | 72 | */ |
73 | #ifdef CONFIG_X86_32 | ||
74 | savesegment(es, ctxt->es); | ||
75 | savesegment(fs, ctxt->fs); | ||
76 | savesegment(gs, ctxt->gs); | ||
77 | savesegment(ss, ctxt->ss); | ||
78 | #else | ||
79 | /* CONFIG_X86_64 */ | ||
54 | asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); | 80 | asm volatile ("movw %%ds, %0" : "=m" (ctxt->ds)); |
55 | asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); | 81 | asm volatile ("movw %%es, %0" : "=m" (ctxt->es)); |
56 | asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); | 82 | asm volatile ("movw %%fs, %0" : "=m" (ctxt->fs)); |
@@ -62,30 +88,87 @@ static void __save_processor_state(struct saved_context *ctxt) | |||
62 | rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); | 88 | rdmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); |
63 | mtrr_save_fixed_ranges(NULL); | 89 | mtrr_save_fixed_ranges(NULL); |
64 | 90 | ||
91 | rdmsrl(MSR_EFER, ctxt->efer); | ||
92 | #endif | ||
93 | |||
65 | /* | 94 | /* |
66 | * control registers | 95 | * control registers |
67 | */ | 96 | */ |
68 | rdmsrl(MSR_EFER, ctxt->efer); | ||
69 | ctxt->cr0 = read_cr0(); | 97 | ctxt->cr0 = read_cr0(); |
70 | ctxt->cr2 = read_cr2(); | 98 | ctxt->cr2 = read_cr2(); |
71 | ctxt->cr3 = read_cr3(); | 99 | ctxt->cr3 = read_cr3(); |
100 | #ifdef CONFIG_X86_32 | ||
101 | ctxt->cr4 = read_cr4_safe(); | ||
102 | #else | ||
103 | /* CONFIG_X86_64 */ | ||
72 | ctxt->cr4 = read_cr4(); | 104 | ctxt->cr4 = read_cr4(); |
73 | ctxt->cr8 = read_cr8(); | 105 | ctxt->cr8 = read_cr8(); |
106 | #endif | ||
74 | } | 107 | } |
75 | 108 | ||
109 | /* Needed by apm.c */ | ||
76 | void save_processor_state(void) | 110 | void save_processor_state(void) |
77 | { | 111 | { |
78 | __save_processor_state(&saved_context); | 112 | __save_processor_state(&saved_context); |
79 | } | 113 | } |
114 | #ifdef CONFIG_X86_32 | ||
115 | EXPORT_SYMBOL(save_processor_state); | ||
116 | #endif | ||
80 | 117 | ||
81 | static void do_fpu_end(void) | 118 | static void do_fpu_end(void) |
82 | { | 119 | { |
83 | /* | 120 | /* |
84 | * Restore FPU regs if necessary | 121 | * Restore FPU regs if necessary. |
85 | */ | 122 | */ |
86 | kernel_fpu_end(); | 123 | kernel_fpu_end(); |
87 | } | 124 | } |
88 | 125 | ||
126 | static void fix_processor_context(void) | ||
127 | { | ||
128 | int cpu = smp_processor_id(); | ||
129 | struct tss_struct *t = &per_cpu(init_tss, cpu); | ||
130 | |||
131 | set_tss_desc(cpu, t); /* | ||
132 | * This just modifies memory; should not be | ||
133 | * necessary. But... This is necessary, because | ||
134 | * 386 hardware has concept of busy TSS or some | ||
135 | * similar stupidity. | ||
136 | */ | ||
137 | |||
138 | #ifdef CONFIG_X86_64 | ||
139 | get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; | ||
140 | |||
141 | syscall_init(); /* This sets MSR_*STAR and related */ | ||
142 | #endif | ||
143 | load_TR_desc(); /* This does ltr */ | ||
144 | load_LDT(¤t->active_mm->context); /* This does lldt */ | ||
145 | |||
146 | /* | ||
147 | * Now maybe reload the debug registers | ||
148 | */ | ||
149 | if (current->thread.debugreg7) { | ||
150 | #ifdef CONFIG_X86_32 | ||
151 | set_debugreg(current->thread.debugreg0, 0); | ||
152 | set_debugreg(current->thread.debugreg1, 1); | ||
153 | set_debugreg(current->thread.debugreg2, 2); | ||
154 | set_debugreg(current->thread.debugreg3, 3); | ||
155 | /* no 4 and 5 */ | ||
156 | set_debugreg(current->thread.debugreg6, 6); | ||
157 | set_debugreg(current->thread.debugreg7, 7); | ||
158 | #else | ||
159 | /* CONFIG_X86_64 */ | ||
160 | loaddebug(¤t->thread, 0); | ||
161 | loaddebug(¤t->thread, 1); | ||
162 | loaddebug(¤t->thread, 2); | ||
163 | loaddebug(¤t->thread, 3); | ||
164 | /* no 4 and 5 */ | ||
165 | loaddebug(¤t->thread, 6); | ||
166 | loaddebug(¤t->thread, 7); | ||
167 | #endif | ||
168 | } | ||
169 | |||
170 | } | ||
171 | |||
89 | /** | 172 | /** |
90 | * __restore_processor_state - restore the contents of CPU registers saved | 173 | * __restore_processor_state - restore the contents of CPU registers saved |
91 | * by __save_processor_state() | 174 | * by __save_processor_state() |
@@ -96,9 +179,16 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
96 | /* | 179 | /* |
97 | * control registers | 180 | * control registers |
98 | */ | 181 | */ |
182 | /* cr4 was introduced in the Pentium CPU */ | ||
183 | #ifdef CONFIG_X86_32 | ||
184 | if (ctxt->cr4) | ||
185 | write_cr4(ctxt->cr4); | ||
186 | #else | ||
187 | /* CONFIG X86_64 */ | ||
99 | wrmsrl(MSR_EFER, ctxt->efer); | 188 | wrmsrl(MSR_EFER, ctxt->efer); |
100 | write_cr8(ctxt->cr8); | 189 | write_cr8(ctxt->cr8); |
101 | write_cr4(ctxt->cr4); | 190 | write_cr4(ctxt->cr4); |
191 | #endif | ||
102 | write_cr3(ctxt->cr3); | 192 | write_cr3(ctxt->cr3); |
103 | write_cr2(ctxt->cr2); | 193 | write_cr2(ctxt->cr2); |
104 | write_cr0(ctxt->cr0); | 194 | write_cr0(ctxt->cr0); |
@@ -107,13 +197,31 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
107 | * now restore the descriptor tables to their proper values | 197 | * now restore the descriptor tables to their proper values |
108 | * ltr is done i fix_processor_context(). | 198 | * ltr is done i fix_processor_context(). |
109 | */ | 199 | */ |
200 | #ifdef CONFIG_X86_32 | ||
201 | load_gdt(&ctxt->gdt); | ||
202 | load_idt(&ctxt->idt); | ||
203 | #else | ||
204 | /* CONFIG_X86_64 */ | ||
110 | load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); | 205 | load_gdt((const struct desc_ptr *)&ctxt->gdt_limit); |
111 | load_idt((const struct desc_ptr *)&ctxt->idt_limit); | 206 | load_idt((const struct desc_ptr *)&ctxt->idt_limit); |
112 | 207 | #endif | |
113 | 208 | ||
114 | /* | 209 | /* |
115 | * segment registers | 210 | * segment registers |
116 | */ | 211 | */ |
212 | #ifdef CONFIG_X86_32 | ||
213 | loadsegment(es, ctxt->es); | ||
214 | loadsegment(fs, ctxt->fs); | ||
215 | loadsegment(gs, ctxt->gs); | ||
216 | loadsegment(ss, ctxt->ss); | ||
217 | |||
218 | /* | ||
219 | * sysenter MSRs | ||
220 | */ | ||
221 | if (boot_cpu_has(X86_FEATURE_SEP)) | ||
222 | enable_sep_cpu(); | ||
223 | #else | ||
224 | /* CONFIG_X86_64 */ | ||
117 | asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); | 225 | asm volatile ("movw %0, %%ds" :: "r" (ctxt->ds)); |
118 | asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); | 226 | asm volatile ("movw %0, %%es" :: "r" (ctxt->es)); |
119 | asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); | 227 | asm volatile ("movw %0, %%fs" :: "r" (ctxt->fs)); |
@@ -123,6 +231,7 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
123 | wrmsrl(MSR_FS_BASE, ctxt->fs_base); | 231 | wrmsrl(MSR_FS_BASE, ctxt->fs_base); |
124 | wrmsrl(MSR_GS_BASE, ctxt->gs_base); | 232 | wrmsrl(MSR_GS_BASE, ctxt->gs_base); |
125 | wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); | 233 | wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); |
234 | #endif | ||
126 | 235 | ||
127 | /* | 236 | /* |
128 | * restore XCR0 for xsave capable cpu's. | 237 | * restore XCR0 for xsave capable cpu's. |
@@ -134,41 +243,17 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
134 | 243 | ||
135 | do_fpu_end(); | 244 | do_fpu_end(); |
136 | mtrr_ap_init(); | 245 | mtrr_ap_init(); |
246 | |||
247 | #ifdef CONFIG_X86_32 | ||
248 | mcheck_init(&boot_cpu_data); | ||
249 | #endif | ||
137 | } | 250 | } |
138 | 251 | ||
252 | /* Needed by apm.c */ | ||
139 | void restore_processor_state(void) | 253 | void restore_processor_state(void) |
140 | { | 254 | { |
141 | __restore_processor_state(&saved_context); | 255 | __restore_processor_state(&saved_context); |
142 | } | 256 | } |
143 | 257 | #ifdef CONFIG_X86_32 | |
144 | static void fix_processor_context(void) | 258 | EXPORT_SYMBOL(restore_processor_state); |
145 | { | 259 | #endif |
146 | int cpu = smp_processor_id(); | ||
147 | struct tss_struct *t = &per_cpu(init_tss, cpu); | ||
148 | |||
149 | /* | ||
150 | * This just modifies memory; should not be necessary. But... This | ||
151 | * is necessary, because 386 hardware has concept of busy TSS or some | ||
152 | * similar stupidity. | ||
153 | */ | ||
154 | set_tss_desc(cpu, t); | ||
155 | |||
156 | get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS].type = 9; | ||
157 | |||
158 | syscall_init(); /* This sets MSR_*STAR and related */ | ||
159 | load_TR_desc(); /* This does ltr */ | ||
160 | load_LDT(¤t->active_mm->context); /* This does lldt */ | ||
161 | |||
162 | /* | ||
163 | * Now maybe reload the debug registers | ||
164 | */ | ||
165 | if (current->thread.debugreg7){ | ||
166 | loaddebug(¤t->thread, 0); | ||
167 | loaddebug(¤t->thread, 1); | ||
168 | loaddebug(¤t->thread, 2); | ||
169 | loaddebug(¤t->thread, 3); | ||
170 | /* no 4 and 5 */ | ||
171 | loaddebug(¤t->thread, 6); | ||
172 | loaddebug(¤t->thread, 7); | ||
173 | } | ||
174 | } | ||
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c deleted file mode 100644 index ce702c5b3a2c..000000000000 --- a/arch/x86/power/cpu_32.c +++ /dev/null | |||
@@ -1,148 +0,0 @@ | |||
1 | /* | ||
2 | * Suspend support specific for i386. | ||
3 | * | ||
4 | * Distribute under GPLv2 | ||
5 | * | ||
6 | * Copyright (c) 2002 Pavel Machek <pavel@suse.cz> | ||
7 | * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org> | ||
8 | */ | ||
9 | |||
10 | #include <linux/module.h> | ||
11 | #include <linux/suspend.h> | ||
12 | #include <asm/mtrr.h> | ||
13 | #include <asm/mce.h> | ||
14 | #include <asm/xcr.h> | ||
15 | #include <asm/suspend.h> | ||
16 | |||
17 | static struct saved_context saved_context; | ||
18 | |||
19 | unsigned long saved_context_ebx; | ||
20 | unsigned long saved_context_esp, saved_context_ebp; | ||
21 | unsigned long saved_context_esi, saved_context_edi; | ||
22 | unsigned long saved_context_eflags; | ||
23 | |||
24 | static void __save_processor_state(struct saved_context *ctxt) | ||
25 | { | ||
26 | mtrr_save_fixed_ranges(NULL); | ||
27 | kernel_fpu_begin(); | ||
28 | |||
29 | /* | ||
30 | * descriptor tables | ||
31 | */ | ||
32 | store_gdt(&ctxt->gdt); | ||
33 | store_idt(&ctxt->idt); | ||
34 | store_tr(ctxt->tr); | ||
35 | |||
36 | /* | ||
37 | * segment registers | ||
38 | */ | ||
39 | savesegment(es, ctxt->es); | ||
40 | savesegment(fs, ctxt->fs); | ||
41 | savesegment(gs, ctxt->gs); | ||
42 | savesegment(ss, ctxt->ss); | ||
43 | |||
44 | /* | ||
45 | * control registers | ||
46 | */ | ||
47 | ctxt->cr0 = read_cr0(); | ||
48 | ctxt->cr2 = read_cr2(); | ||
49 | ctxt->cr3 = read_cr3(); | ||
50 | ctxt->cr4 = read_cr4_safe(); | ||
51 | } | ||
52 | |||
53 | /* Needed by apm.c */ | ||
54 | void save_processor_state(void) | ||
55 | { | ||
56 | __save_processor_state(&saved_context); | ||
57 | } | ||
58 | EXPORT_SYMBOL(save_processor_state); | ||
59 | |||
60 | static void do_fpu_end(void) | ||
61 | { | ||
62 | /* | ||
63 | * Restore FPU regs if necessary. | ||
64 | */ | ||
65 | kernel_fpu_end(); | ||
66 | } | ||
67 | |||
68 | static void fix_processor_context(void) | ||
69 | { | ||
70 | int cpu = smp_processor_id(); | ||
71 | struct tss_struct *t = &per_cpu(init_tss, cpu); | ||
72 | |||
73 | set_tss_desc(cpu, t); /* | ||
74 | * This just modifies memory; should not be | ||
75 | * necessary. But... This is necessary, because | ||
76 | * 386 hardware has concept of busy TSS or some | ||
77 | * similar stupidity. | ||
78 | */ | ||
79 | |||
80 | load_TR_desc(); /* This does ltr */ | ||
81 | load_LDT(¤t->active_mm->context); /* This does lldt */ | ||
82 | |||
83 | /* | ||
84 | * Now maybe reload the debug registers | ||
85 | */ | ||
86 | if (current->thread.debugreg7) { | ||
87 | set_debugreg(current->thread.debugreg0, 0); | ||
88 | set_debugreg(current->thread.debugreg1, 1); | ||
89 | set_debugreg(current->thread.debugreg2, 2); | ||
90 | set_debugreg(current->thread.debugreg3, 3); | ||
91 | /* no 4 and 5 */ | ||
92 | set_debugreg(current->thread.debugreg6, 6); | ||
93 | set_debugreg(current->thread.debugreg7, 7); | ||
94 | } | ||
95 | |||
96 | } | ||
97 | |||
98 | static void __restore_processor_state(struct saved_context *ctxt) | ||
99 | { | ||
100 | /* | ||
101 | * control registers | ||
102 | */ | ||
103 | /* cr4 was introduced in the Pentium CPU */ | ||
104 | if (ctxt->cr4) | ||
105 | write_cr4(ctxt->cr4); | ||
106 | write_cr3(ctxt->cr3); | ||
107 | write_cr2(ctxt->cr2); | ||
108 | write_cr0(ctxt->cr0); | ||
109 | |||
110 | /* | ||
111 | * now restore the descriptor tables to their proper values | ||
112 | * ltr is done i fix_processor_context(). | ||
113 | */ | ||
114 | load_gdt(&ctxt->gdt); | ||
115 | load_idt(&ctxt->idt); | ||
116 | |||
117 | /* | ||
118 | * segment registers | ||
119 | */ | ||
120 | loadsegment(es, ctxt->es); | ||
121 | loadsegment(fs, ctxt->fs); | ||
122 | loadsegment(gs, ctxt->gs); | ||
123 | loadsegment(ss, ctxt->ss); | ||
124 | |||
125 | /* | ||
126 | * sysenter MSRs | ||
127 | */ | ||
128 | if (boot_cpu_has(X86_FEATURE_SEP)) | ||
129 | enable_sep_cpu(); | ||
130 | |||
131 | /* | ||
132 | * restore XCR0 for xsave capable cpu's. | ||
133 | */ | ||
134 | if (cpu_has_xsave) | ||
135 | xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask); | ||
136 | |||
137 | fix_processor_context(); | ||
138 | do_fpu_end(); | ||
139 | mtrr_ap_init(); | ||
140 | mcheck_init(&boot_cpu_data); | ||
141 | } | ||
142 | |||
143 | /* Needed by apm.c */ | ||
144 | void restore_processor_state(void) | ||
145 | { | ||
146 | __restore_processor_state(&saved_context); | ||
147 | } | ||
148 | EXPORT_SYMBOL(restore_processor_state); | ||
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 16a9020c8f11..88112b49f02c 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -123,6 +123,7 @@ quiet_cmd_vdso = VDSO $@ | |||
123 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) | 123 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) |
124 | 124 | ||
125 | VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) | 125 | VDSO_LDFLAGS = -fPIC -shared $(call ld-option, -Wl$(comma)--hash-style=sysv) |
126 | GCOV_PROFILE := n | ||
126 | 127 | ||
127 | # | 128 | # |
128 | # Install the unstripped copy of vdso*.so listed in $(vdso-install-y). | 129 | # Install the unstripped copy of vdso*.so listed in $(vdso-install-y). |
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 1241f118ab56..58bc00f68b12 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -338,6 +338,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
338 | } | 338 | } |
339 | } | 339 | } |
340 | 340 | ||
341 | current->mm->context.vdso = (void *)addr; | ||
342 | |||
341 | if (compat_uses_vma || !compat) { | 343 | if (compat_uses_vma || !compat) { |
342 | /* | 344 | /* |
343 | * MAYWRITE to allow gdb to COW and set breakpoints | 345 | * MAYWRITE to allow gdb to COW and set breakpoints |
@@ -358,11 +360,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
358 | goto up_fail; | 360 | goto up_fail; |
359 | } | 361 | } |
360 | 362 | ||
361 | current->mm->context.vdso = (void *)addr; | ||
362 | current_thread_info()->sysenter_return = | 363 | current_thread_info()->sysenter_return = |
363 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); | 364 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); |
364 | 365 | ||
365 | up_fail: | 366 | up_fail: |
367 | if (ret) | ||
368 | current->mm->context.vdso = NULL; | ||
369 | |||
366 | up_write(&mm->mmap_sem); | 370 | up_write(&mm->mmap_sem); |
367 | 371 | ||
368 | return ret; | 372 | return ret; |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 7133cdf9098b..21e1aeb9f3ea 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | #include <linux/random.h> | 10 | #include <linux/random.h> |
11 | #include <linux/elf.h> | ||
11 | #include <asm/vsyscall.h> | 12 | #include <asm/vsyscall.h> |
12 | #include <asm/vgtod.h> | 13 | #include <asm/vgtod.h> |
13 | #include <asm/proto.h> | 14 | #include <asm/proto.h> |
@@ -115,15 +116,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
115 | goto up_fail; | 116 | goto up_fail; |
116 | } | 117 | } |
117 | 118 | ||
119 | current->mm->context.vdso = (void *)addr; | ||
120 | |||
118 | ret = install_special_mapping(mm, addr, vdso_size, | 121 | ret = install_special_mapping(mm, addr, vdso_size, |
119 | VM_READ|VM_EXEC| | 122 | VM_READ|VM_EXEC| |
120 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 123 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| |
121 | VM_ALWAYSDUMP, | 124 | VM_ALWAYSDUMP, |
122 | vdso_pages); | 125 | vdso_pages); |
123 | if (ret) | 126 | if (ret) { |
127 | current->mm->context.vdso = NULL; | ||
124 | goto up_fail; | 128 | goto up_fail; |
129 | } | ||
125 | 130 | ||
126 | current->mm->context.vdso = (void *)addr; | ||
127 | up_fail: | 131 | up_fail: |
128 | up_write(&mm->mmap_sem); | 132 | up_write(&mm->mmap_sem); |
129 | return ret; | 133 | return ret; |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index f09e8c36ee80..0a1700a2be9c 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/delay.h> | 20 | #include <linux/delay.h> |
21 | #include <linux/start_kernel.h> | 21 | #include <linux/start_kernel.h> |
22 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
23 | #include <linux/kprobes.h> | ||
23 | #include <linux/bootmem.h> | 24 | #include <linux/bootmem.h> |
24 | #include <linux/module.h> | 25 | #include <linux/module.h> |
25 | #include <linux/mm.h> | 26 | #include <linux/mm.h> |
@@ -44,6 +45,7 @@ | |||
44 | #include <asm/processor.h> | 45 | #include <asm/processor.h> |
45 | #include <asm/proto.h> | 46 | #include <asm/proto.h> |
46 | #include <asm/msr-index.h> | 47 | #include <asm/msr-index.h> |
48 | #include <asm/traps.h> | ||
47 | #include <asm/setup.h> | 49 | #include <asm/setup.h> |
48 | #include <asm/desc.h> | 50 | #include <asm/desc.h> |
49 | #include <asm/pgtable.h> | 51 | #include <asm/pgtable.h> |
@@ -240,10 +242,10 @@ static unsigned long xen_get_debugreg(int reg) | |||
240 | return HYPERVISOR_get_debugreg(reg); | 242 | return HYPERVISOR_get_debugreg(reg); |
241 | } | 243 | } |
242 | 244 | ||
243 | void xen_leave_lazy(void) | 245 | static void xen_end_context_switch(struct task_struct *next) |
244 | { | 246 | { |
245 | paravirt_leave_lazy(paravirt_get_lazy_mode()); | ||
246 | xen_mc_flush(); | 247 | xen_mc_flush(); |
248 | paravirt_end_context_switch(next); | ||
247 | } | 249 | } |
248 | 250 | ||
249 | static unsigned long xen_store_tr(void) | 251 | static unsigned long xen_store_tr(void) |
@@ -428,11 +430,44 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | |||
428 | static int cvt_gate_to_trap(int vector, const gate_desc *val, | 430 | static int cvt_gate_to_trap(int vector, const gate_desc *val, |
429 | struct trap_info *info) | 431 | struct trap_info *info) |
430 | { | 432 | { |
433 | unsigned long addr; | ||
434 | |||
431 | if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) | 435 | if (val->type != GATE_TRAP && val->type != GATE_INTERRUPT) |
432 | return 0; | 436 | return 0; |
433 | 437 | ||
434 | info->vector = vector; | 438 | info->vector = vector; |
435 | info->address = gate_offset(*val); | 439 | |
440 | addr = gate_offset(*val); | ||
441 | #ifdef CONFIG_X86_64 | ||
442 | /* | ||
443 | * Look for known traps using IST, and substitute them | ||
444 | * appropriately. The debugger ones are the only ones we care | ||
445 | * about. Xen will handle faults like double_fault and | ||
446 | * machine_check, so we should never see them. Warn if | ||
447 | * there's an unexpected IST-using fault handler. | ||
448 | */ | ||
449 | if (addr == (unsigned long)debug) | ||
450 | addr = (unsigned long)xen_debug; | ||
451 | else if (addr == (unsigned long)int3) | ||
452 | addr = (unsigned long)xen_int3; | ||
453 | else if (addr == (unsigned long)stack_segment) | ||
454 | addr = (unsigned long)xen_stack_segment; | ||
455 | else if (addr == (unsigned long)double_fault || | ||
456 | addr == (unsigned long)nmi) { | ||
457 | /* Don't need to handle these */ | ||
458 | return 0; | ||
459 | #ifdef CONFIG_X86_MCE | ||
460 | } else if (addr == (unsigned long)machine_check) { | ||
461 | return 0; | ||
462 | #endif | ||
463 | } else { | ||
464 | /* Some other trap using IST? */ | ||
465 | if (WARN_ON(val->ist != 0)) | ||
466 | return 0; | ||
467 | } | ||
468 | #endif /* CONFIG_X86_64 */ | ||
469 | info->address = addr; | ||
470 | |||
436 | info->cs = gate_segment(*val); | 471 | info->cs = gate_segment(*val); |
437 | info->flags = val->dpl; | 472 | info->flags = val->dpl; |
438 | /* interrupt gates clear IF */ | 473 | /* interrupt gates clear IF */ |
@@ -623,10 +658,26 @@ static void xen_clts(void) | |||
623 | xen_mc_issue(PARAVIRT_LAZY_CPU); | 658 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
624 | } | 659 | } |
625 | 660 | ||
661 | static DEFINE_PER_CPU(unsigned long, xen_cr0_value); | ||
662 | |||
663 | static unsigned long xen_read_cr0(void) | ||
664 | { | ||
665 | unsigned long cr0 = percpu_read(xen_cr0_value); | ||
666 | |||
667 | if (unlikely(cr0 == 0)) { | ||
668 | cr0 = native_read_cr0(); | ||
669 | percpu_write(xen_cr0_value, cr0); | ||
670 | } | ||
671 | |||
672 | return cr0; | ||
673 | } | ||
674 | |||
626 | static void xen_write_cr0(unsigned long cr0) | 675 | static void xen_write_cr0(unsigned long cr0) |
627 | { | 676 | { |
628 | struct multicall_space mcs; | 677 | struct multicall_space mcs; |
629 | 678 | ||
679 | percpu_write(xen_cr0_value, cr0); | ||
680 | |||
630 | /* Only pay attention to cr0.TS; everything else is | 681 | /* Only pay attention to cr0.TS; everything else is |
631 | ignored. */ | 682 | ignored. */ |
632 | mcs = xen_mc_entry(0); | 683 | mcs = xen_mc_entry(0); |
@@ -812,7 +863,7 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
812 | 863 | ||
813 | .clts = xen_clts, | 864 | .clts = xen_clts, |
814 | 865 | ||
815 | .read_cr0 = native_read_cr0, | 866 | .read_cr0 = xen_read_cr0, |
816 | .write_cr0 = xen_write_cr0, | 867 | .write_cr0 = xen_write_cr0, |
817 | 868 | ||
818 | .read_cr4 = native_read_cr4, | 869 | .read_cr4 = native_read_cr4, |
@@ -860,10 +911,8 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
860 | /* Xen takes care of %gs when switching to usermode for us */ | 911 | /* Xen takes care of %gs when switching to usermode for us */ |
861 | .swapgs = paravirt_nop, | 912 | .swapgs = paravirt_nop, |
862 | 913 | ||
863 | .lazy_mode = { | 914 | .start_context_switch = paravirt_start_context_switch, |
864 | .enter = paravirt_enter_lazy_cpu, | 915 | .end_context_switch = xen_end_context_switch, |
865 | .leave = xen_leave_lazy, | ||
866 | }, | ||
867 | }; | 916 | }; |
868 | 917 | ||
869 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 918 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index fba55b1a4021..4ceb28581652 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -452,10 +452,6 @@ void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | |||
452 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 452 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
453 | pte_t *ptep, pte_t pteval) | 453 | pte_t *ptep, pte_t pteval) |
454 | { | 454 | { |
455 | /* updates to init_mm may be done without lock */ | ||
456 | if (mm == &init_mm) | ||
457 | preempt_disable(); | ||
458 | |||
459 | ADD_STATS(set_pte_at, 1); | 455 | ADD_STATS(set_pte_at, 1); |
460 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); | 456 | // ADD_STATS(set_pte_at_pinned, xen_page_pinned(ptep)); |
461 | ADD_STATS(set_pte_at_current, mm == current->mm); | 457 | ADD_STATS(set_pte_at_current, mm == current->mm); |
@@ -476,9 +472,7 @@ void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | |||
476 | } | 472 | } |
477 | xen_set_pte(ptep, pteval); | 473 | xen_set_pte(ptep, pteval); |
478 | 474 | ||
479 | out: | 475 | out: return; |
480 | if (mm == &init_mm) | ||
481 | preempt_enable(); | ||
482 | } | 476 | } |
483 | 477 | ||
484 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, | 478 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, |
@@ -1152,10 +1146,8 @@ static void drop_other_mm_ref(void *info) | |||
1152 | 1146 | ||
1153 | /* If this cpu still has a stale cr3 reference, then make sure | 1147 | /* If this cpu still has a stale cr3 reference, then make sure |
1154 | it has been flushed. */ | 1148 | it has been flushed. */ |
1155 | if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) { | 1149 | if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) |
1156 | load_cr3(swapper_pg_dir); | 1150 | load_cr3(swapper_pg_dir); |
1157 | arch_flush_lazy_cpu_mode(); | ||
1158 | } | ||
1159 | } | 1151 | } |
1160 | 1152 | ||
1161 | static void xen_drop_mm_ref(struct mm_struct *mm) | 1153 | static void xen_drop_mm_ref(struct mm_struct *mm) |
@@ -1168,7 +1160,6 @@ static void xen_drop_mm_ref(struct mm_struct *mm) | |||
1168 | load_cr3(swapper_pg_dir); | 1160 | load_cr3(swapper_pg_dir); |
1169 | else | 1161 | else |
1170 | leave_mm(smp_processor_id()); | 1162 | leave_mm(smp_processor_id()); |
1171 | arch_flush_lazy_cpu_mode(); | ||
1172 | } | 1163 | } |
1173 | 1164 | ||
1174 | /* Get the "official" set of cpus referring to our pagetable. */ | 1165 | /* Get the "official" set of cpus referring to our pagetable. */ |
@@ -1876,6 +1867,14 @@ __init void xen_post_allocator_init(void) | |||
1876 | xen_mark_init_mm_pinned(); | 1867 | xen_mark_init_mm_pinned(); |
1877 | } | 1868 | } |
1878 | 1869 | ||
1870 | static void xen_leave_lazy_mmu(void) | ||
1871 | { | ||
1872 | preempt_disable(); | ||
1873 | xen_mc_flush(); | ||
1874 | paravirt_leave_lazy_mmu(); | ||
1875 | preempt_enable(); | ||
1876 | } | ||
1877 | |||
1879 | const struct pv_mmu_ops xen_mmu_ops __initdata = { | 1878 | const struct pv_mmu_ops xen_mmu_ops __initdata = { |
1880 | .pagetable_setup_start = xen_pagetable_setup_start, | 1879 | .pagetable_setup_start = xen_pagetable_setup_start, |
1881 | .pagetable_setup_done = xen_pagetable_setup_done, | 1880 | .pagetable_setup_done = xen_pagetable_setup_done, |
@@ -1949,7 +1948,7 @@ const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1949 | 1948 | ||
1950 | .lazy_mode = { | 1949 | .lazy_mode = { |
1951 | .enter = paravirt_enter_lazy_mmu, | 1950 | .enter = paravirt_enter_lazy_mmu, |
1952 | .leave = xen_leave_lazy, | 1951 | .leave = xen_leave_lazy_mmu, |
1953 | }, | 1952 | }, |
1954 | 1953 | ||
1955 | .set_fixmap = xen_set_fixmap, | 1954 | .set_fixmap = xen_set_fixmap, |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 15c6c68db6a2..ad0047f47cd4 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -61,9 +61,9 @@ char * __init xen_memory_setup(void) | |||
61 | * - xen_start_info | 61 | * - xen_start_info |
62 | * See comment above "struct start_info" in <xen/interface/xen.h> | 62 | * See comment above "struct start_info" in <xen/interface/xen.h> |
63 | */ | 63 | */ |
64 | e820_add_region(__pa(xen_start_info->mfn_list), | 64 | reserve_early(__pa(xen_start_info->mfn_list), |
65 | xen_start_info->pt_base - xen_start_info->mfn_list, | 65 | __pa(xen_start_info->pt_base), |
66 | E820_RESERVED); | 66 | "XEN START INFO"); |
67 | 67 | ||
68 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); | 68 | sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); |
69 | 69 | ||
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index ca6596b05d53..22494fd4c9b5 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -30,7 +30,6 @@ pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn); | |||
30 | void xen_ident_map_ISA(void); | 30 | void xen_ident_map_ISA(void); |
31 | void xen_reserve_top(void); | 31 | void xen_reserve_top(void); |
32 | 32 | ||
33 | void xen_leave_lazy(void); | ||
34 | void xen_post_allocator_init(void); | 33 | void xen_post_allocator_init(void); |
35 | 34 | ||
36 | char * __init xen_memory_setup(void); | 35 | char * __init xen_memory_setup(void); |