diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-10-03 13:28:46 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-10-03 13:28:46 -0400 |
commit | f68ec0c24755e5cdb779be6240925f2175311d84 (patch) | |
tree | a7b7128e61a8456385d82bd1c7ca5f14eecbf2ca /arch/x86 | |
parent | 98920dc3d1113b883cbc73e3293446d3525c6042 (diff) | |
parent | 94aca1dac6f6d21f4b07e4864baf7768cabcc6e7 (diff) |
Merge commit 'v2.6.27-rc8' into x86/setup
Diffstat (limited to 'arch/x86')
190 files changed, 6835 insertions, 3381 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 96e0c2ebc388..ed92864d1325 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -21,12 +21,16 @@ config X86 | |||
21 | select HAVE_UNSTABLE_SCHED_CLOCK | 21 | select HAVE_UNSTABLE_SCHED_CLOCK |
22 | select HAVE_IDE | 22 | select HAVE_IDE |
23 | select HAVE_OPROFILE | 23 | select HAVE_OPROFILE |
24 | select HAVE_IOREMAP_PROT | ||
24 | select HAVE_KPROBES | 25 | select HAVE_KPROBES |
26 | select ARCH_WANT_OPTIONAL_GPIOLIB | ||
25 | select HAVE_KRETPROBES | 27 | select HAVE_KRETPROBES |
26 | select HAVE_DYNAMIC_FTRACE | 28 | select HAVE_DYNAMIC_FTRACE |
27 | select HAVE_FTRACE | 29 | select HAVE_FTRACE |
28 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) | 30 | select HAVE_KVM if ((X86_32 && !X86_VOYAGER && !X86_VISWS && !X86_NUMAQ) || X86_64) |
29 | select HAVE_ARCH_KGDB if !X86_VOYAGER | 31 | select HAVE_ARCH_KGDB if !X86_VOYAGER |
32 | select HAVE_GENERIC_DMA_COHERENT if X86_32 | ||
33 | select HAVE_EFFICIENT_UNALIGNED_ACCESS | ||
30 | 34 | ||
31 | config ARCH_DEFCONFIG | 35 | config ARCH_DEFCONFIG |
32 | string | 36 | string |
@@ -329,20 +333,6 @@ config X86_BIGSMP | |||
329 | 333 | ||
330 | endif | 334 | endif |
331 | 335 | ||
332 | config X86_RDC321X | ||
333 | bool "RDC R-321x SoC" | ||
334 | depends on X86_32 | ||
335 | select M486 | ||
336 | select X86_REBOOTFIXUPS | ||
337 | select GENERIC_GPIO | ||
338 | select LEDS_CLASS | ||
339 | select LEDS_GPIO | ||
340 | select NEW_LEDS | ||
341 | help | ||
342 | This option is needed for RDC R-321x system-on-chip, also known | ||
343 | as R-8610-(G). | ||
344 | If you don't have one of these chips, you should say N here. | ||
345 | |||
346 | config X86_VSMP | 336 | config X86_VSMP |
347 | bool "Support for ScaleMP vSMP" | 337 | bool "Support for ScaleMP vSMP" |
348 | select PARAVIRT | 338 | select PARAVIRT |
@@ -366,6 +356,16 @@ config X86_VISWS | |||
366 | A kernel compiled for the Visual Workstation will run on general | 356 | A kernel compiled for the Visual Workstation will run on general |
367 | PCs as well. See <file:Documentation/sgi-visws.txt> for details. | 357 | PCs as well. See <file:Documentation/sgi-visws.txt> for details. |
368 | 358 | ||
359 | config X86_RDC321X | ||
360 | bool "RDC R-321x SoC" | ||
361 | depends on X86_32 | ||
362 | select M486 | ||
363 | select X86_REBOOTFIXUPS | ||
364 | help | ||
365 | This option is needed for RDC R-321x system-on-chip, also known | ||
366 | as R-8610-(G). | ||
367 | If you don't have one of these chips, you should say N here. | ||
368 | |||
369 | config SCHED_NO_NO_OMIT_FRAME_POINTER | 369 | config SCHED_NO_NO_OMIT_FRAME_POINTER |
370 | def_bool y | 370 | def_bool y |
371 | prompt "Single-depth WCHAN output" | 371 | prompt "Single-depth WCHAN output" |
@@ -447,7 +447,6 @@ config PARAVIRT_DEBUG | |||
447 | 447 | ||
448 | config MEMTEST | 448 | config MEMTEST |
449 | bool "Memtest" | 449 | bool "Memtest" |
450 | depends on X86_64 | ||
451 | help | 450 | help |
452 | This option adds a kernel parameter 'memtest', which allows memtest | 451 | This option adds a kernel parameter 'memtest', which allows memtest |
453 | to be set. | 452 | to be set. |
@@ -578,35 +577,29 @@ config SWIOTLB | |||
578 | 577 | ||
579 | config IOMMU_HELPER | 578 | config IOMMU_HELPER |
580 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) | 579 | def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) |
580 | |||
581 | config MAXSMP | 581 | config MAXSMP |
582 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" | 582 | bool "Configure Maximum number of SMP Processors and NUMA Nodes" |
583 | depends on X86_64 && SMP | 583 | depends on X86_64 && SMP && BROKEN |
584 | default n | 584 | default n |
585 | help | 585 | help |
586 | Configure maximum number of CPUS and NUMA Nodes for this architecture. | 586 | Configure maximum number of CPUS and NUMA Nodes for this architecture. |
587 | If unsure, say N. | 587 | If unsure, say N. |
588 | 588 | ||
589 | if MAXSMP | ||
590 | config NR_CPUS | 589 | config NR_CPUS |
591 | int | 590 | int "Maximum number of CPUs (2-512)" if !MAXSMP |
592 | default "4096" | 591 | range 2 512 |
593 | endif | ||
594 | |||
595 | if !MAXSMP | ||
596 | config NR_CPUS | ||
597 | int "Maximum number of CPUs (2-4096)" | ||
598 | range 2 4096 | ||
599 | depends on SMP | 592 | depends on SMP |
593 | default "4096" if MAXSMP | ||
600 | default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 | 594 | default "32" if X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000 |
601 | default "8" | 595 | default "8" |
602 | help | 596 | help |
603 | This allows you to specify the maximum number of CPUs which this | 597 | This allows you to specify the maximum number of CPUs which this |
604 | kernel will support. The maximum supported value is 4096 and the | 598 | kernel will support. The maximum supported value is 512 and the |
605 | minimum value which makes sense is 2. | 599 | minimum value which makes sense is 2. |
606 | 600 | ||
607 | This is purely to save memory - each supported CPU adds | 601 | This is purely to save memory - each supported CPU adds |
608 | approximately eight kilobytes to the kernel image. | 602 | approximately eight kilobytes to the kernel image. |
609 | endif | ||
610 | 603 | ||
611 | config SCHED_SMT | 604 | config SCHED_SMT |
612 | bool "SMT (Hyperthreading) scheduler support" | 605 | bool "SMT (Hyperthreading) scheduler support" |
@@ -952,9 +945,9 @@ config NUMA | |||
952 | local memory controller of the CPU and add some more | 945 | local memory controller of the CPU and add some more |
953 | NUMA awareness to the kernel. | 946 | NUMA awareness to the kernel. |
954 | 947 | ||
955 | For i386 this is currently highly experimental and should be only | 948 | For 32-bit this is currently highly experimental and should be only |
956 | used for kernel development. It might also cause boot failures. | 949 | used for kernel development. It might also cause boot failures. |
957 | For x86_64 this is recommended on all multiprocessor Opteron systems. | 950 | For 64-bit this is recommended on all multiprocessor Opteron systems. |
958 | If the system is EM64T, you should say N unless your system is | 951 | If the system is EM64T, you should say N unless your system is |
959 | EM64T NUMA. | 952 | EM64T NUMA. |
960 | 953 | ||
@@ -997,17 +990,10 @@ config NUMA_EMU | |||
997 | into virtual nodes when booted with "numa=fake=N", where N is the | 990 | into virtual nodes when booted with "numa=fake=N", where N is the |
998 | number of nodes. This is only useful for debugging. | 991 | number of nodes. This is only useful for debugging. |
999 | 992 | ||
1000 | if MAXSMP | ||
1001 | |||
1002 | config NODES_SHIFT | ||
1003 | int | ||
1004 | default "9" | ||
1005 | endif | ||
1006 | |||
1007 | if !MAXSMP | ||
1008 | config NODES_SHIFT | 993 | config NODES_SHIFT |
1009 | int "Maximum NUMA Nodes (as a power of 2)" | 994 | int "Maximum NUMA Nodes (as a power of 2)" if !MAXSMP |
1010 | range 1 9 if X86_64 | 995 | range 1 9 if X86_64 |
996 | default "9" if MAXSMP | ||
1011 | default "6" if X86_64 | 997 | default "6" if X86_64 |
1012 | default "4" if X86_NUMAQ | 998 | default "4" if X86_NUMAQ |
1013 | default "3" | 999 | default "3" |
@@ -1015,7 +1001,6 @@ config NODES_SHIFT | |||
1015 | help | 1001 | help |
1016 | Specify the maximum number of NUMA Nodes available on the target | 1002 | Specify the maximum number of NUMA Nodes available on the target |
1017 | system. Increases memory reserved to accomodate various tables. | 1003 | system. Increases memory reserved to accomodate various tables. |
1018 | endif | ||
1019 | 1004 | ||
1020 | config HAVE_ARCH_BOOTMEM_NODE | 1005 | config HAVE_ARCH_BOOTMEM_NODE |
1021 | def_bool y | 1006 | def_bool y |
@@ -1264,7 +1249,7 @@ config KEXEC | |||
1264 | strongly in flux, so no good recommendation can be made. | 1249 | strongly in flux, so no good recommendation can be made. |
1265 | 1250 | ||
1266 | config CRASH_DUMP | 1251 | config CRASH_DUMP |
1267 | bool "kernel crash dumps (EXPERIMENTAL)" | 1252 | bool "kernel crash dumps" |
1268 | depends on X86_64 || (X86_32 && HIGHMEM) | 1253 | depends on X86_64 || (X86_32 && HIGHMEM) |
1269 | help | 1254 | help |
1270 | Generate crash dump after being started by kexec. | 1255 | Generate crash dump after being started by kexec. |
@@ -1277,6 +1262,14 @@ config CRASH_DUMP | |||
1277 | (CONFIG_RELOCATABLE=y). | 1262 | (CONFIG_RELOCATABLE=y). |
1278 | For more details see Documentation/kdump/kdump.txt | 1263 | For more details see Documentation/kdump/kdump.txt |
1279 | 1264 | ||
1265 | config KEXEC_JUMP | ||
1266 | bool "kexec jump (EXPERIMENTAL)" | ||
1267 | depends on EXPERIMENTAL | ||
1268 | depends on KEXEC && HIBERNATION && X86_32 | ||
1269 | help | ||
1270 | Jump between original kernel and kexeced kernel and invoke | ||
1271 | code in physical address mode via KEXEC | ||
1272 | |||
1280 | config PHYSICAL_START | 1273 | config PHYSICAL_START |
1281 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) | 1274 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) |
1282 | default "0x1000000" if X86_NUMAQ | 1275 | default "0x1000000" if X86_NUMAQ |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index abff1b84ed5b..b225219c448c 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -362,10 +362,6 @@ config X86_ALIGNMENT_16 | |||
362 | def_bool y | 362 | def_bool y |
363 | depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 | 363 | depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1 |
364 | 364 | ||
365 | config X86_GOOD_APIC | ||
366 | def_bool y | ||
367 | depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON || MCORE2 || MVIAC7 || X86_64 | ||
368 | |||
369 | config X86_INTEL_USERCOPY | 365 | config X86_INTEL_USERCOPY |
370 | def_bool y | 366 | def_bool y |
371 | depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 | 367 | depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON || MCORE2 |
@@ -386,14 +382,17 @@ config X86_OOSTORE | |||
386 | # P6_NOPs are a relatively minor optimization that require a family >= | 382 | # P6_NOPs are a relatively minor optimization that require a family >= |
387 | # 6 processor, except that it is broken on certain VIA chips. | 383 | # 6 processor, except that it is broken on certain VIA chips. |
388 | # Furthermore, AMD chips prefer a totally different sequence of NOPs | 384 | # Furthermore, AMD chips prefer a totally different sequence of NOPs |
389 | # (which work on all CPUs). As a result, disallow these if we're | 385 | # (which work on all CPUs). In addition, it looks like Virtual PC |
390 | # compiling X86_GENERIC but not X86_64 (these NOPs do work on all | 386 | # does not understand them. |
391 | # x86-64 capable chips); the list of processors in the right-hand clause | 387 | # |
392 | # are the cores that benefit from this optimization. | 388 | # As a result, disallow these if we're not compiling for X86_64 (these |
389 | # NOPs do work on all x86-64 capable chips); the list of processors in | ||
390 | # the right-hand clause are the cores that benefit from this optimization. | ||
393 | # | 391 | # |
394 | config X86_P6_NOP | 392 | config X86_P6_NOP |
395 | def_bool y | 393 | def_bool y |
396 | depends on (X86_64 || !X86_GENERIC) && (M686 || MPENTIUMII || MPENTIUMIII || MPENTIUMM || MCORE2 || MPENTIUM4 || MPSC) | 394 | depends on X86_64 |
395 | depends on (MCORE2 || MPENTIUM4 || MPSC) | ||
397 | 396 | ||
398 | config X86_TSC | 397 | config X86_TSC |
399 | def_bool y | 398 | def_bool y |
@@ -418,4 +417,4 @@ config X86_MINIMUM_CPU_FAMILY | |||
418 | 417 | ||
419 | config X86_DEBUGCTLMSR | 418 | config X86_DEBUGCTLMSR |
420 | def_bool y | 419 | def_bool y |
421 | depends on !(M586MMX || M586TSC || M586 || M486 || M386) | 420 | depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index ae36bfa814e5..092f019e033a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -5,13 +5,15 @@ config TRACE_IRQFLAGS_SUPPORT | |||
5 | 5 | ||
6 | source "lib/Kconfig.debug" | 6 | source "lib/Kconfig.debug" |
7 | 7 | ||
8 | config NONPROMISC_DEVMEM | 8 | config STRICT_DEVMEM |
9 | bool "Filter access to /dev/mem" | 9 | bool "Filter access to /dev/mem" |
10 | help | 10 | help |
11 | If this option is left off, you allow userspace access to all | 11 | If this option is disabled, you allow userspace (root) access to all |
12 | of memory, including kernel and userspace memory. Accidental | 12 | of memory, including kernel and userspace memory. Accidental |
13 | access to this is obviously disastrous, but specific access can | 13 | access to this is obviously disastrous, but specific access can |
14 | be used by people debugging the kernel. | 14 | be used by people debugging the kernel. Note that with PAT support |
15 | enabled, even in this case there are restrictions on /dev/mem | ||
16 | use due to the cache aliasing requirements. | ||
15 | 17 | ||
16 | If this option is switched on, the /dev/mem file only allows | 18 | If this option is switched on, the /dev/mem file only allows |
17 | userspace access to PCI space and the BIOS code and data regions. | 19 | userspace access to PCI space and the BIOS code and data regions. |
@@ -287,7 +289,6 @@ config CPA_DEBUG | |||
287 | 289 | ||
288 | config OPTIMIZE_INLINING | 290 | config OPTIMIZE_INLINING |
289 | bool "Allow gcc to uninline functions marked 'inline'" | 291 | bool "Allow gcc to uninline functions marked 'inline'" |
290 | depends on BROKEN | ||
291 | help | 292 | help |
292 | This option determines if the kernel forces gcc to inline the functions | 293 | This option determines if the kernel forces gcc to inline the functions |
293 | developers have marked 'inline'. Doing so takes away freedom from gcc to | 294 | developers have marked 'inline'. Doing so takes away freedom from gcc to |
@@ -298,5 +299,7 @@ config OPTIMIZE_INLINING | |||
298 | become the default in the future, until then this option is there to | 299 | become the default in the future, until then this option is there to |
299 | test gcc for this. | 300 | test gcc for this. |
300 | 301 | ||
302 | If unsure, say N. | ||
303 | |||
301 | endmenu | 304 | endmenu |
302 | 305 | ||
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 919ce21ea654..f5631da585b6 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -118,11 +118,6 @@ mflags-$(CONFIG_X86_GENERICARCH):= -Iinclude/asm-x86/mach-generic | |||
118 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ | 118 | fcore-$(CONFIG_X86_GENERICARCH) += arch/x86/mach-generic/ |
119 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ | 119 | mcore-$(CONFIG_X86_GENERICARCH) := arch/x86/mach-default/ |
120 | 120 | ||
121 | # RDC R-321x subarch support | ||
122 | mflags-$(CONFIG_X86_RDC321X) := -Iinclude/asm-x86/mach-rdc321x | ||
123 | mcore-$(CONFIG_X86_RDC321X) := arch/x86/mach-default/ | ||
124 | core-$(CONFIG_X86_RDC321X) += arch/x86/mach-rdc321x/ | ||
125 | |||
126 | # default subarch .h files | 121 | # default subarch .h files |
127 | mflags-y += -Iinclude/asm-x86/mach-default | 122 | mflags-y += -Iinclude/asm-x86/mach-default |
128 | 123 | ||
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index a34b9982c7cb..cc0ef13fba7a 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -24,10 +24,14 @@ | |||
24 | #include <linux/edd.h> | 24 | #include <linux/edd.h> |
25 | #include <asm/boot.h> | 25 | #include <asm/boot.h> |
26 | #include <asm/setup.h> | 26 | #include <asm/setup.h> |
27 | #include "bitops.h" | ||
28 | #include <asm/cpufeature.h> | ||
27 | 29 | ||
28 | /* Useful macros */ | 30 | /* Useful macros */ |
29 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) | 31 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) |
30 | 32 | ||
33 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x))) | ||
34 | |||
31 | extern struct setup_header hdr; | 35 | extern struct setup_header hdr; |
32 | extern struct boot_params boot_params; | 36 | extern struct boot_params boot_params; |
33 | 37 | ||
@@ -242,6 +246,12 @@ int cmdline_find_option(const char *option, char *buffer, int bufsize); | |||
242 | int cmdline_find_option_bool(const char *option); | 246 | int cmdline_find_option_bool(const char *option); |
243 | 247 | ||
244 | /* cpu.c, cpucheck.c */ | 248 | /* cpu.c, cpucheck.c */ |
249 | struct cpu_features { | ||
250 | int level; /* Family, or 64 for x86-64 */ | ||
251 | int model; | ||
252 | u32 flags[NCAPINTS]; | ||
253 | }; | ||
254 | extern struct cpu_features cpu; | ||
245 | int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); | 255 | int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); |
246 | int validate_cpu(void); | 256 | int validate_cpu(void); |
247 | 257 | ||
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index bc5553b496f7..9fea73706479 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -182,8 +182,6 @@ static unsigned outcnt; | |||
182 | static int fill_inbuf(void); | 182 | static int fill_inbuf(void); |
183 | static void flush_window(void); | 183 | static void flush_window(void); |
184 | static void error(char *m); | 184 | static void error(char *m); |
185 | static void gzip_mark(void **); | ||
186 | static void gzip_release(void **); | ||
187 | 185 | ||
188 | /* | 186 | /* |
189 | * This is set up by the setup-routine at boot-time | 187 | * This is set up by the setup-routine at boot-time |
@@ -196,9 +194,6 @@ extern int input_len; | |||
196 | 194 | ||
197 | static long bytes_out; | 195 | static long bytes_out; |
198 | 196 | ||
199 | static void *malloc(int size); | ||
200 | static void free(void *where); | ||
201 | |||
202 | static void *memset(void *s, int c, unsigned n); | 197 | static void *memset(void *s, int c, unsigned n); |
203 | static void *memcpy(void *dest, const void *src, unsigned n); | 198 | static void *memcpy(void *dest, const void *src, unsigned n); |
204 | 199 | ||
@@ -220,40 +215,6 @@ static int lines, cols; | |||
220 | 215 | ||
221 | #include "../../../../lib/inflate.c" | 216 | #include "../../../../lib/inflate.c" |
222 | 217 | ||
223 | static void *malloc(int size) | ||
224 | { | ||
225 | void *p; | ||
226 | |||
227 | if (size < 0) | ||
228 | error("Malloc error"); | ||
229 | if (free_mem_ptr <= 0) | ||
230 | error("Memory error"); | ||
231 | |||
232 | free_mem_ptr = (free_mem_ptr + 3) & ~3; /* Align */ | ||
233 | |||
234 | p = (void *)free_mem_ptr; | ||
235 | free_mem_ptr += size; | ||
236 | |||
237 | if (free_mem_ptr >= free_mem_end_ptr) | ||
238 | error("Out of memory"); | ||
239 | |||
240 | return p; | ||
241 | } | ||
242 | |||
243 | static void free(void *where) | ||
244 | { /* Don't care */ | ||
245 | } | ||
246 | |||
247 | static void gzip_mark(void **ptr) | ||
248 | { | ||
249 | *ptr = (void *) free_mem_ptr; | ||
250 | } | ||
251 | |||
252 | static void gzip_release(void **ptr) | ||
253 | { | ||
254 | free_mem_ptr = (memptr) *ptr; | ||
255 | } | ||
256 | |||
257 | static void scroll(void) | 218 | static void scroll(void) |
258 | { | 219 | { |
259 | int i; | 220 | int i; |
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c index 92d6fd73dc7d..75298fe2edca 100644 --- a/arch/x86/boot/cpu.c +++ b/arch/x86/boot/cpu.c | |||
@@ -16,9 +16,6 @@ | |||
16 | */ | 16 | */ |
17 | 17 | ||
18 | #include "boot.h" | 18 | #include "boot.h" |
19 | #include "bitops.h" | ||
20 | #include <asm/cpufeature.h> | ||
21 | |||
22 | #include "cpustr.h" | 19 | #include "cpustr.h" |
23 | 20 | ||
24 | static char *cpu_name(int level) | 21 | static char *cpu_name(int level) |
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 7804389ee005..4d3ff037201f 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c | |||
@@ -22,21 +22,13 @@ | |||
22 | 22 | ||
23 | #ifdef _SETUP | 23 | #ifdef _SETUP |
24 | # include "boot.h" | 24 | # include "boot.h" |
25 | # include "bitops.h" | ||
26 | #endif | 25 | #endif |
27 | #include <linux/types.h> | 26 | #include <linux/types.h> |
28 | #include <asm/cpufeature.h> | ||
29 | #include <asm/processor-flags.h> | 27 | #include <asm/processor-flags.h> |
30 | #include <asm/required-features.h> | 28 | #include <asm/required-features.h> |
31 | #include <asm/msr-index.h> | 29 | #include <asm/msr-index.h> |
32 | 30 | ||
33 | struct cpu_features { | 31 | struct cpu_features cpu; |
34 | int level; /* Family, or 64 for x86-64 */ | ||
35 | int model; | ||
36 | u32 flags[NCAPINTS]; | ||
37 | }; | ||
38 | |||
39 | static struct cpu_features cpu; | ||
40 | static u32 cpu_vendor[3]; | 32 | static u32 cpu_vendor[3]; |
41 | static u32 err_flags[NCAPINTS]; | 33 | static u32 err_flags[NCAPINTS]; |
42 | 34 | ||
@@ -46,12 +38,12 @@ static const u32 req_flags[NCAPINTS] = | |||
46 | { | 38 | { |
47 | REQUIRED_MASK0, | 39 | REQUIRED_MASK0, |
48 | REQUIRED_MASK1, | 40 | REQUIRED_MASK1, |
49 | REQUIRED_MASK2, | 41 | 0, /* REQUIRED_MASK2 not implemented in this file */ |
50 | REQUIRED_MASK3, | 42 | 0, /* REQUIRED_MASK3 not implemented in this file */ |
51 | REQUIRED_MASK4, | 43 | REQUIRED_MASK4, |
52 | REQUIRED_MASK5, | 44 | 0, /* REQUIRED_MASK5 not implemented in this file */ |
53 | REQUIRED_MASK6, | 45 | REQUIRED_MASK6, |
54 | REQUIRED_MASK7, | 46 | 0, /* REQUIRED_MASK7 not implemented in this file */ |
55 | }; | 47 | }; |
56 | 48 | ||
57 | #define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) | 49 | #define A32(a, b, c, d) (((d) << 24)+((c) << 16)+((b) << 8)+(a)) |
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 2296164b54d2..197421db1af1 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c | |||
@@ -73,6 +73,11 @@ static void keyboard_set_repeat(void) | |||
73 | */ | 73 | */ |
74 | static void query_ist(void) | 74 | static void query_ist(void) |
75 | { | 75 | { |
76 | /* Some older BIOSes apparently crash on this call, so filter | ||
77 | it from machines too old to have SpeedStep at all. */ | ||
78 | if (cpu.level < 6) | ||
79 | return; | ||
80 | |||
76 | asm("int $0x15" | 81 | asm("int $0x15" |
77 | : "=a" (boot_params.ist_info.signature), | 82 | : "=a" (boot_params.ist_info.signature), |
78 | "=b" (boot_params.ist_info.command), | 83 | "=b" (boot_params.ist_info.command), |
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index 53165c97336b..8c3c25f35578 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c | |||
@@ -13,7 +13,6 @@ | |||
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include "boot.h" | 15 | #include "boot.h" |
16 | #include <linux/kernel.h> | ||
17 | 16 | ||
18 | #define SMAP 0x534d4150 /* ASCII "SMAP" */ | 17 | #define SMAP 0x534d4150 /* ASCII "SMAP" */ |
19 | 18 | ||
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index a7ae4385670e..8cc9eea839e4 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -1,13 +1,13 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.26-rc1 | 3 | # Linux kernel version: 2.6.27-rc4 |
4 | # Sun May 4 19:59:02 2008 | 4 | # Mon Aug 25 15:04:00 2008 |
5 | # | 5 | # |
6 | # CONFIG_64BIT is not set | 6 | # CONFIG_64BIT is not set |
7 | CONFIG_X86_32=y | 7 | CONFIG_X86_32=y |
8 | # CONFIG_X86_64 is not set | 8 | # CONFIG_X86_64 is not set |
9 | CONFIG_X86=y | 9 | CONFIG_X86=y |
10 | CONFIG_DEFCONFIG_LIST="arch/x86/configs/i386_defconfig" | 10 | CONFIG_ARCH_DEFCONFIG="arch/x86/configs/i386_defconfig" |
11 | # CONFIG_GENERIC_LOCKBREAK is not set | 11 | # CONFIG_GENERIC_LOCKBREAK is not set |
12 | CONFIG_GENERIC_TIME=y | 12 | CONFIG_GENERIC_TIME=y |
13 | CONFIG_GENERIC_CMOS_UPDATE=y | 13 | CONFIG_GENERIC_CMOS_UPDATE=y |
@@ -53,6 +53,7 @@ CONFIG_X86_HT=y | |||
53 | CONFIG_X86_BIOS_REBOOT=y | 53 | CONFIG_X86_BIOS_REBOOT=y |
54 | CONFIG_X86_TRAMPOLINE=y | 54 | CONFIG_X86_TRAMPOLINE=y |
55 | CONFIG_KTIME_SCALAR=y | 55 | CONFIG_KTIME_SCALAR=y |
56 | CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" | ||
56 | 57 | ||
57 | # | 58 | # |
58 | # General setup | 59 | # General setup |
@@ -82,6 +83,7 @@ CONFIG_CGROUPS=y | |||
82 | CONFIG_CGROUP_NS=y | 83 | CONFIG_CGROUP_NS=y |
83 | # CONFIG_CGROUP_DEVICE is not set | 84 | # CONFIG_CGROUP_DEVICE is not set |
84 | CONFIG_CPUSETS=y | 85 | CONFIG_CPUSETS=y |
86 | CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y | ||
85 | CONFIG_GROUP_SCHED=y | 87 | CONFIG_GROUP_SCHED=y |
86 | CONFIG_FAIR_GROUP_SCHED=y | 88 | CONFIG_FAIR_GROUP_SCHED=y |
87 | # CONFIG_RT_GROUP_SCHED is not set | 89 | # CONFIG_RT_GROUP_SCHED is not set |
@@ -105,7 +107,6 @@ CONFIG_SYSCTL=y | |||
105 | # CONFIG_EMBEDDED is not set | 107 | # CONFIG_EMBEDDED is not set |
106 | CONFIG_UID16=y | 108 | CONFIG_UID16=y |
107 | CONFIG_SYSCTL_SYSCALL=y | 109 | CONFIG_SYSCTL_SYSCALL=y |
108 | CONFIG_SYSCTL_SYSCALL_CHECK=y | ||
109 | CONFIG_KALLSYMS=y | 110 | CONFIG_KALLSYMS=y |
110 | CONFIG_KALLSYMS_ALL=y | 111 | CONFIG_KALLSYMS_ALL=y |
111 | CONFIG_KALLSYMS_EXTRA_PASS=y | 112 | CONFIG_KALLSYMS_EXTRA_PASS=y |
@@ -113,6 +114,7 @@ CONFIG_HOTPLUG=y | |||
113 | CONFIG_PRINTK=y | 114 | CONFIG_PRINTK=y |
114 | CONFIG_BUG=y | 115 | CONFIG_BUG=y |
115 | CONFIG_ELF_CORE=y | 116 | CONFIG_ELF_CORE=y |
117 | CONFIG_PCSPKR_PLATFORM=y | ||
116 | # CONFIG_COMPAT_BRK is not set | 118 | # CONFIG_COMPAT_BRK is not set |
117 | CONFIG_BASE_FULL=y | 119 | CONFIG_BASE_FULL=y |
118 | CONFIG_FUTEX=y | 120 | CONFIG_FUTEX=y |
@@ -132,27 +134,35 @@ CONFIG_MARKERS=y | |||
132 | # CONFIG_OPROFILE is not set | 134 | # CONFIG_OPROFILE is not set |
133 | CONFIG_HAVE_OPROFILE=y | 135 | CONFIG_HAVE_OPROFILE=y |
134 | CONFIG_KPROBES=y | 136 | CONFIG_KPROBES=y |
137 | CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y | ||
135 | CONFIG_KRETPROBES=y | 138 | CONFIG_KRETPROBES=y |
139 | CONFIG_HAVE_IOREMAP_PROT=y | ||
136 | CONFIG_HAVE_KPROBES=y | 140 | CONFIG_HAVE_KPROBES=y |
137 | CONFIG_HAVE_KRETPROBES=y | 141 | CONFIG_HAVE_KRETPROBES=y |
142 | # CONFIG_HAVE_ARCH_TRACEHOOK is not set | ||
138 | # CONFIG_HAVE_DMA_ATTRS is not set | 143 | # CONFIG_HAVE_DMA_ATTRS is not set |
144 | CONFIG_USE_GENERIC_SMP_HELPERS=y | ||
145 | # CONFIG_HAVE_CLK is not set | ||
139 | CONFIG_PROC_PAGE_MONITOR=y | 146 | CONFIG_PROC_PAGE_MONITOR=y |
147 | CONFIG_HAVE_GENERIC_DMA_COHERENT=y | ||
140 | CONFIG_SLABINFO=y | 148 | CONFIG_SLABINFO=y |
141 | CONFIG_RT_MUTEXES=y | 149 | CONFIG_RT_MUTEXES=y |
142 | # CONFIG_TINY_SHMEM is not set | 150 | # CONFIG_TINY_SHMEM is not set |
143 | CONFIG_BASE_SMALL=0 | 151 | CONFIG_BASE_SMALL=0 |
144 | CONFIG_MODULES=y | 152 | CONFIG_MODULES=y |
153 | # CONFIG_MODULE_FORCE_LOAD is not set | ||
145 | CONFIG_MODULE_UNLOAD=y | 154 | CONFIG_MODULE_UNLOAD=y |
146 | CONFIG_MODULE_FORCE_UNLOAD=y | 155 | CONFIG_MODULE_FORCE_UNLOAD=y |
147 | # CONFIG_MODVERSIONS is not set | 156 | # CONFIG_MODVERSIONS is not set |
148 | # CONFIG_MODULE_SRCVERSION_ALL is not set | 157 | # CONFIG_MODULE_SRCVERSION_ALL is not set |
149 | # CONFIG_KMOD is not set | 158 | CONFIG_KMOD=y |
150 | CONFIG_STOP_MACHINE=y | 159 | CONFIG_STOP_MACHINE=y |
151 | CONFIG_BLOCK=y | 160 | CONFIG_BLOCK=y |
152 | # CONFIG_LBD is not set | 161 | # CONFIG_LBD is not set |
153 | CONFIG_BLK_DEV_IO_TRACE=y | 162 | CONFIG_BLK_DEV_IO_TRACE=y |
154 | # CONFIG_LSF is not set | 163 | # CONFIG_LSF is not set |
155 | CONFIG_BLK_DEV_BSG=y | 164 | CONFIG_BLK_DEV_BSG=y |
165 | # CONFIG_BLK_DEV_INTEGRITY is not set | ||
156 | 166 | ||
157 | # | 167 | # |
158 | # IO Schedulers | 168 | # IO Schedulers |
@@ -176,19 +186,17 @@ CONFIG_NO_HZ=y | |||
176 | CONFIG_HIGH_RES_TIMERS=y | 186 | CONFIG_HIGH_RES_TIMERS=y |
177 | CONFIG_GENERIC_CLOCKEVENTS_BUILD=y | 187 | CONFIG_GENERIC_CLOCKEVENTS_BUILD=y |
178 | CONFIG_SMP=y | 188 | CONFIG_SMP=y |
189 | CONFIG_X86_FIND_SMP_CONFIG=y | ||
190 | CONFIG_X86_MPPARSE=y | ||
179 | CONFIG_X86_PC=y | 191 | CONFIG_X86_PC=y |
180 | # CONFIG_X86_ELAN is not set | 192 | # CONFIG_X86_ELAN is not set |
181 | # CONFIG_X86_VOYAGER is not set | 193 | # CONFIG_X86_VOYAGER is not set |
182 | # CONFIG_X86_NUMAQ is not set | ||
183 | # CONFIG_X86_SUMMIT is not set | ||
184 | # CONFIG_X86_BIGSMP is not set | ||
185 | # CONFIG_X86_VISWS is not set | ||
186 | # CONFIG_X86_GENERICARCH is not set | 194 | # CONFIG_X86_GENERICARCH is not set |
187 | # CONFIG_X86_ES7000 is not set | ||
188 | # CONFIG_X86_RDC321X is not set | ||
189 | # CONFIG_X86_VSMP is not set | 195 | # CONFIG_X86_VSMP is not set |
196 | # CONFIG_X86_RDC321X is not set | ||
190 | CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y | 197 | CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y |
191 | # CONFIG_PARAVIRT_GUEST is not set | 198 | # CONFIG_PARAVIRT_GUEST is not set |
199 | # CONFIG_MEMTEST is not set | ||
192 | # CONFIG_M386 is not set | 200 | # CONFIG_M386 is not set |
193 | # CONFIG_M486 is not set | 201 | # CONFIG_M486 is not set |
194 | # CONFIG_M586 is not set | 202 | # CONFIG_M586 is not set |
@@ -215,21 +223,19 @@ CONFIG_SCHED_NO_NO_OMIT_FRAME_POINTER=y | |||
215 | # CONFIG_MPSC is not set | 223 | # CONFIG_MPSC is not set |
216 | CONFIG_MCORE2=y | 224 | CONFIG_MCORE2=y |
217 | # CONFIG_GENERIC_CPU is not set | 225 | # CONFIG_GENERIC_CPU is not set |
218 | # CONFIG_X86_GENERIC is not set | 226 | CONFIG_X86_GENERIC=y |
219 | CONFIG_X86_CPU=y | 227 | CONFIG_X86_CPU=y |
220 | CONFIG_X86_CMPXCHG=y | 228 | CONFIG_X86_CMPXCHG=y |
221 | CONFIG_X86_L1_CACHE_SHIFT=6 | 229 | CONFIG_X86_L1_CACHE_SHIFT=7 |
222 | CONFIG_X86_XADD=y | 230 | CONFIG_X86_XADD=y |
223 | CONFIG_X86_WP_WORKS_OK=y | 231 | CONFIG_X86_WP_WORKS_OK=y |
224 | CONFIG_X86_INVLPG=y | 232 | CONFIG_X86_INVLPG=y |
225 | CONFIG_X86_BSWAP=y | 233 | CONFIG_X86_BSWAP=y |
226 | CONFIG_X86_POPAD_OK=y | 234 | CONFIG_X86_POPAD_OK=y |
227 | CONFIG_X86_GOOD_APIC=y | ||
228 | CONFIG_X86_INTEL_USERCOPY=y | 235 | CONFIG_X86_INTEL_USERCOPY=y |
229 | CONFIG_X86_USE_PPRO_CHECKSUM=y | 236 | CONFIG_X86_USE_PPRO_CHECKSUM=y |
230 | CONFIG_X86_P6_NOP=y | ||
231 | CONFIG_X86_TSC=y | 237 | CONFIG_X86_TSC=y |
232 | CONFIG_X86_MINIMUM_CPU_FAMILY=6 | 238 | CONFIG_X86_MINIMUM_CPU_FAMILY=4 |
233 | CONFIG_X86_DEBUGCTLMSR=y | 239 | CONFIG_X86_DEBUGCTLMSR=y |
234 | CONFIG_HPET_TIMER=y | 240 | CONFIG_HPET_TIMER=y |
235 | CONFIG_HPET_EMULATE_RTC=y | 241 | CONFIG_HPET_EMULATE_RTC=y |
@@ -247,7 +253,7 @@ CONFIG_X86_IO_APIC=y | |||
247 | CONFIG_VM86=y | 253 | CONFIG_VM86=y |
248 | # CONFIG_TOSHIBA is not set | 254 | # CONFIG_TOSHIBA is not set |
249 | # CONFIG_I8K is not set | 255 | # CONFIG_I8K is not set |
250 | # CONFIG_X86_REBOOTFIXUPS is not set | 256 | CONFIG_X86_REBOOTFIXUPS=y |
251 | # CONFIG_MICROCODE is not set | 257 | # CONFIG_MICROCODE is not set |
252 | CONFIG_X86_MSR=y | 258 | CONFIG_X86_MSR=y |
253 | CONFIG_X86_CPUID=y | 259 | CONFIG_X86_CPUID=y |
@@ -256,32 +262,28 @@ CONFIG_HIGHMEM4G=y | |||
256 | # CONFIG_HIGHMEM64G is not set | 262 | # CONFIG_HIGHMEM64G is not set |
257 | CONFIG_PAGE_OFFSET=0xC0000000 | 263 | CONFIG_PAGE_OFFSET=0xC0000000 |
258 | CONFIG_HIGHMEM=y | 264 | CONFIG_HIGHMEM=y |
259 | CONFIG_NEED_NODE_MEMMAP_SIZE=y | ||
260 | CONFIG_ARCH_FLATMEM_ENABLE=y | 265 | CONFIG_ARCH_FLATMEM_ENABLE=y |
261 | CONFIG_ARCH_SPARSEMEM_ENABLE=y | 266 | CONFIG_ARCH_SPARSEMEM_ENABLE=y |
262 | CONFIG_ARCH_SELECT_MEMORY_MODEL=y | 267 | CONFIG_ARCH_SELECT_MEMORY_MODEL=y |
263 | CONFIG_SELECT_MEMORY_MODEL=y | 268 | CONFIG_SELECT_MEMORY_MODEL=y |
264 | # CONFIG_FLATMEM_MANUAL is not set | 269 | CONFIG_FLATMEM_MANUAL=y |
265 | # CONFIG_DISCONTIGMEM_MANUAL is not set | 270 | # CONFIG_DISCONTIGMEM_MANUAL is not set |
266 | CONFIG_SPARSEMEM_MANUAL=y | 271 | # CONFIG_SPARSEMEM_MANUAL is not set |
267 | CONFIG_SPARSEMEM=y | 272 | CONFIG_FLATMEM=y |
268 | CONFIG_HAVE_MEMORY_PRESENT=y | 273 | CONFIG_FLAT_NODE_MEM_MAP=y |
269 | CONFIG_SPARSEMEM_STATIC=y | 274 | CONFIG_SPARSEMEM_STATIC=y |
270 | # CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set | 275 | # CONFIG_SPARSEMEM_VMEMMAP_ENABLE is not set |
271 | |||
272 | # | ||
273 | # Memory hotplug is currently incompatible with Software Suspend | ||
274 | # | ||
275 | CONFIG_PAGEFLAGS_EXTENDED=y | 276 | CONFIG_PAGEFLAGS_EXTENDED=y |
276 | CONFIG_SPLIT_PTLOCK_CPUS=4 | 277 | CONFIG_SPLIT_PTLOCK_CPUS=4 |
277 | CONFIG_RESOURCES_64BIT=y | 278 | CONFIG_RESOURCES_64BIT=y |
278 | CONFIG_ZONE_DMA_FLAG=1 | 279 | CONFIG_ZONE_DMA_FLAG=1 |
279 | CONFIG_BOUNCE=y | 280 | CONFIG_BOUNCE=y |
280 | CONFIG_VIRT_TO_BUS=y | 281 | CONFIG_VIRT_TO_BUS=y |
281 | # CONFIG_HIGHPTE is not set | 282 | CONFIG_HIGHPTE=y |
282 | # CONFIG_MATH_EMULATION is not set | 283 | # CONFIG_MATH_EMULATION is not set |
283 | CONFIG_MTRR=y | 284 | CONFIG_MTRR=y |
284 | # CONFIG_X86_PAT is not set | 285 | # CONFIG_MTRR_SANITIZER is not set |
286 | CONFIG_X86_PAT=y | ||
285 | CONFIG_EFI=y | 287 | CONFIG_EFI=y |
286 | # CONFIG_IRQBALANCE is not set | 288 | # CONFIG_IRQBALANCE is not set |
287 | CONFIG_SECCOMP=y | 289 | CONFIG_SECCOMP=y |
@@ -293,6 +295,7 @@ CONFIG_HZ=1000 | |||
293 | CONFIG_SCHED_HRTICK=y | 295 | CONFIG_SCHED_HRTICK=y |
294 | CONFIG_KEXEC=y | 296 | CONFIG_KEXEC=y |
295 | CONFIG_CRASH_DUMP=y | 297 | CONFIG_CRASH_DUMP=y |
298 | # CONFIG_KEXEC_JUMP is not set | ||
296 | CONFIG_PHYSICAL_START=0x1000000 | 299 | CONFIG_PHYSICAL_START=0x1000000 |
297 | CONFIG_RELOCATABLE=y | 300 | CONFIG_RELOCATABLE=y |
298 | CONFIG_PHYSICAL_ALIGN=0x200000 | 301 | CONFIG_PHYSICAL_ALIGN=0x200000 |
@@ -312,6 +315,7 @@ CONFIG_PM_TRACE_RTC=y | |||
312 | CONFIG_PM_SLEEP_SMP=y | 315 | CONFIG_PM_SLEEP_SMP=y |
313 | CONFIG_PM_SLEEP=y | 316 | CONFIG_PM_SLEEP=y |
314 | CONFIG_SUSPEND=y | 317 | CONFIG_SUSPEND=y |
318 | # CONFIG_PM_TEST_SUSPEND is not set | ||
315 | CONFIG_SUSPEND_FREEZER=y | 319 | CONFIG_SUSPEND_FREEZER=y |
316 | CONFIG_HIBERNATION=y | 320 | CONFIG_HIBERNATION=y |
317 | CONFIG_PM_STD_PARTITION="" | 321 | CONFIG_PM_STD_PARTITION="" |
@@ -337,6 +341,7 @@ CONFIG_ACPI_THERMAL=y | |||
337 | CONFIG_ACPI_BLACKLIST_YEAR=0 | 341 | CONFIG_ACPI_BLACKLIST_YEAR=0 |
338 | # CONFIG_ACPI_DEBUG is not set | 342 | # CONFIG_ACPI_DEBUG is not set |
339 | CONFIG_ACPI_EC=y | 343 | CONFIG_ACPI_EC=y |
344 | # CONFIG_ACPI_PCI_SLOT is not set | ||
340 | CONFIG_ACPI_POWER=y | 345 | CONFIG_ACPI_POWER=y |
341 | CONFIG_ACPI_SYSTEM=y | 346 | CONFIG_ACPI_SYSTEM=y |
342 | CONFIG_X86_PM_TIMER=y | 347 | CONFIG_X86_PM_TIMER=y |
@@ -395,8 +400,8 @@ CONFIG_PCI=y | |||
395 | # CONFIG_PCI_GOBIOS is not set | 400 | # CONFIG_PCI_GOBIOS is not set |
396 | # CONFIG_PCI_GOMMCONFIG is not set | 401 | # CONFIG_PCI_GOMMCONFIG is not set |
397 | # CONFIG_PCI_GODIRECT is not set | 402 | # CONFIG_PCI_GODIRECT is not set |
398 | CONFIG_PCI_GOANY=y | ||
399 | # CONFIG_PCI_GOOLPC is not set | 403 | # CONFIG_PCI_GOOLPC is not set |
404 | CONFIG_PCI_GOANY=y | ||
400 | CONFIG_PCI_BIOS=y | 405 | CONFIG_PCI_BIOS=y |
401 | CONFIG_PCI_DIRECT=y | 406 | CONFIG_PCI_DIRECT=y |
402 | CONFIG_PCI_MMCONFIG=y | 407 | CONFIG_PCI_MMCONFIG=y |
@@ -448,10 +453,6 @@ CONFIG_HOTPLUG_PCI=y | |||
448 | CONFIG_BINFMT_ELF=y | 453 | CONFIG_BINFMT_ELF=y |
449 | # CONFIG_BINFMT_AOUT is not set | 454 | # CONFIG_BINFMT_AOUT is not set |
450 | CONFIG_BINFMT_MISC=y | 455 | CONFIG_BINFMT_MISC=y |
451 | |||
452 | # | ||
453 | # Networking | ||
454 | # | ||
455 | CONFIG_NET=y | 456 | CONFIG_NET=y |
456 | 457 | ||
457 | # | 458 | # |
@@ -475,7 +476,10 @@ CONFIG_IP_FIB_HASH=y | |||
475 | CONFIG_IP_MULTIPLE_TABLES=y | 476 | CONFIG_IP_MULTIPLE_TABLES=y |
476 | CONFIG_IP_ROUTE_MULTIPATH=y | 477 | CONFIG_IP_ROUTE_MULTIPATH=y |
477 | CONFIG_IP_ROUTE_VERBOSE=y | 478 | CONFIG_IP_ROUTE_VERBOSE=y |
478 | # CONFIG_IP_PNP is not set | 479 | CONFIG_IP_PNP=y |
480 | CONFIG_IP_PNP_DHCP=y | ||
481 | CONFIG_IP_PNP_BOOTP=y | ||
482 | CONFIG_IP_PNP_RARP=y | ||
479 | # CONFIG_NET_IPIP is not set | 483 | # CONFIG_NET_IPIP is not set |
480 | # CONFIG_NET_IPGRE is not set | 484 | # CONFIG_NET_IPGRE is not set |
481 | CONFIG_IP_MROUTE=y | 485 | CONFIG_IP_MROUTE=y |
@@ -618,7 +622,6 @@ CONFIG_NET_SCHED=y | |||
618 | # CONFIG_NET_SCH_HTB is not set | 622 | # CONFIG_NET_SCH_HTB is not set |
619 | # CONFIG_NET_SCH_HFSC is not set | 623 | # CONFIG_NET_SCH_HFSC is not set |
620 | # CONFIG_NET_SCH_PRIO is not set | 624 | # CONFIG_NET_SCH_PRIO is not set |
621 | # CONFIG_NET_SCH_RR is not set | ||
622 | # CONFIG_NET_SCH_RED is not set | 625 | # CONFIG_NET_SCH_RED is not set |
623 | # CONFIG_NET_SCH_SFQ is not set | 626 | # CONFIG_NET_SCH_SFQ is not set |
624 | # CONFIG_NET_SCH_TEQL is not set | 627 | # CONFIG_NET_SCH_TEQL is not set |
@@ -680,28 +683,19 @@ CONFIG_FIB_RULES=y | |||
680 | CONFIG_CFG80211=y | 683 | CONFIG_CFG80211=y |
681 | CONFIG_NL80211=y | 684 | CONFIG_NL80211=y |
682 | CONFIG_WIRELESS_EXT=y | 685 | CONFIG_WIRELESS_EXT=y |
686 | CONFIG_WIRELESS_EXT_SYSFS=y | ||
683 | CONFIG_MAC80211=y | 687 | CONFIG_MAC80211=y |
684 | 688 | ||
685 | # | 689 | # |
686 | # Rate control algorithm selection | 690 | # Rate control algorithm selection |
687 | # | 691 | # |
692 | CONFIG_MAC80211_RC_PID=y | ||
688 | CONFIG_MAC80211_RC_DEFAULT_PID=y | 693 | CONFIG_MAC80211_RC_DEFAULT_PID=y |
689 | # CONFIG_MAC80211_RC_DEFAULT_NONE is not set | ||
690 | |||
691 | # | ||
692 | # Selecting 'y' for an algorithm will | ||
693 | # | ||
694 | |||
695 | # | ||
696 | # build the algorithm into mac80211. | ||
697 | # | ||
698 | CONFIG_MAC80211_RC_DEFAULT="pid" | 694 | CONFIG_MAC80211_RC_DEFAULT="pid" |
699 | CONFIG_MAC80211_RC_PID=y | ||
700 | # CONFIG_MAC80211_MESH is not set | 695 | # CONFIG_MAC80211_MESH is not set |
701 | CONFIG_MAC80211_LEDS=y | 696 | CONFIG_MAC80211_LEDS=y |
702 | # CONFIG_MAC80211_DEBUGFS is not set | 697 | # CONFIG_MAC80211_DEBUGFS is not set |
703 | # CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set | 698 | # CONFIG_MAC80211_DEBUG_MENU is not set |
704 | # CONFIG_MAC80211_DEBUG is not set | ||
705 | # CONFIG_IEEE80211 is not set | 699 | # CONFIG_IEEE80211 is not set |
706 | # CONFIG_RFKILL is not set | 700 | # CONFIG_RFKILL is not set |
707 | # CONFIG_NET_9P is not set | 701 | # CONFIG_NET_9P is not set |
@@ -717,6 +711,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" | |||
717 | CONFIG_STANDALONE=y | 711 | CONFIG_STANDALONE=y |
718 | CONFIG_PREVENT_FIRMWARE_BUILD=y | 712 | CONFIG_PREVENT_FIRMWARE_BUILD=y |
719 | CONFIG_FW_LOADER=y | 713 | CONFIG_FW_LOADER=y |
714 | CONFIG_FIRMWARE_IN_KERNEL=y | ||
715 | CONFIG_EXTRA_FIRMWARE="" | ||
720 | # CONFIG_DEBUG_DRIVER is not set | 716 | # CONFIG_DEBUG_DRIVER is not set |
721 | CONFIG_DEBUG_DEVRES=y | 717 | CONFIG_DEBUG_DEVRES=y |
722 | # CONFIG_SYS_HYPERVISOR is not set | 718 | # CONFIG_SYS_HYPERVISOR is not set |
@@ -749,6 +745,7 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 | |||
749 | # CONFIG_BLK_DEV_XIP is not set | 745 | # CONFIG_BLK_DEV_XIP is not set |
750 | # CONFIG_CDROM_PKTCDVD is not set | 746 | # CONFIG_CDROM_PKTCDVD is not set |
751 | # CONFIG_ATA_OVER_ETH is not set | 747 | # CONFIG_ATA_OVER_ETH is not set |
748 | # CONFIG_BLK_DEV_HD is not set | ||
752 | CONFIG_MISC_DEVICES=y | 749 | CONFIG_MISC_DEVICES=y |
753 | # CONFIG_IBM_ASM is not set | 750 | # CONFIG_IBM_ASM is not set |
754 | # CONFIG_PHANTOM is not set | 751 | # CONFIG_PHANTOM is not set |
@@ -760,10 +757,12 @@ CONFIG_MISC_DEVICES=y | |||
760 | # CONFIG_FUJITSU_LAPTOP is not set | 757 | # CONFIG_FUJITSU_LAPTOP is not set |
761 | # CONFIG_TC1100_WMI is not set | 758 | # CONFIG_TC1100_WMI is not set |
762 | # CONFIG_MSI_LAPTOP is not set | 759 | # CONFIG_MSI_LAPTOP is not set |
760 | # CONFIG_COMPAL_LAPTOP is not set | ||
763 | # CONFIG_SONY_LAPTOP is not set | 761 | # CONFIG_SONY_LAPTOP is not set |
764 | # CONFIG_THINKPAD_ACPI is not set | 762 | # CONFIG_THINKPAD_ACPI is not set |
765 | # CONFIG_INTEL_MENLOW is not set | 763 | # CONFIG_INTEL_MENLOW is not set |
766 | # CONFIG_ENCLOSURE_SERVICES is not set | 764 | # CONFIG_ENCLOSURE_SERVICES is not set |
765 | # CONFIG_HP_ILO is not set | ||
767 | CONFIG_HAVE_IDE=y | 766 | CONFIG_HAVE_IDE=y |
768 | # CONFIG_IDE is not set | 767 | # CONFIG_IDE is not set |
769 | 768 | ||
@@ -802,12 +801,13 @@ CONFIG_SCSI_WAIT_SCAN=m | |||
802 | # | 801 | # |
803 | CONFIG_SCSI_SPI_ATTRS=y | 802 | CONFIG_SCSI_SPI_ATTRS=y |
804 | # CONFIG_SCSI_FC_ATTRS is not set | 803 | # CONFIG_SCSI_FC_ATTRS is not set |
805 | # CONFIG_SCSI_ISCSI_ATTRS is not set | 804 | CONFIG_SCSI_ISCSI_ATTRS=y |
806 | # CONFIG_SCSI_SAS_ATTRS is not set | 805 | # CONFIG_SCSI_SAS_ATTRS is not set |
807 | # CONFIG_SCSI_SAS_LIBSAS is not set | 806 | # CONFIG_SCSI_SAS_LIBSAS is not set |
808 | # CONFIG_SCSI_SRP_ATTRS is not set | 807 | # CONFIG_SCSI_SRP_ATTRS is not set |
809 | # CONFIG_SCSI_LOWLEVEL is not set | 808 | # CONFIG_SCSI_LOWLEVEL is not set |
810 | # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set | 809 | # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set |
810 | # CONFIG_SCSI_DH is not set | ||
811 | CONFIG_ATA=y | 811 | CONFIG_ATA=y |
812 | # CONFIG_ATA_NONSTANDARD is not set | 812 | # CONFIG_ATA_NONSTANDARD is not set |
813 | CONFIG_ATA_ACPI=y | 813 | CONFIG_ATA_ACPI=y |
@@ -842,7 +842,7 @@ CONFIG_PATA_AMD=y | |||
842 | # CONFIG_PATA_CS5536 is not set | 842 | # CONFIG_PATA_CS5536 is not set |
843 | # CONFIG_PATA_CYPRESS is not set | 843 | # CONFIG_PATA_CYPRESS is not set |
844 | # CONFIG_PATA_EFAR is not set | 844 | # CONFIG_PATA_EFAR is not set |
845 | # CONFIG_ATA_GENERIC is not set | 845 | CONFIG_ATA_GENERIC=y |
846 | # CONFIG_PATA_HPT366 is not set | 846 | # CONFIG_PATA_HPT366 is not set |
847 | # CONFIG_PATA_HPT37X is not set | 847 | # CONFIG_PATA_HPT37X is not set |
848 | # CONFIG_PATA_HPT3X2N is not set | 848 | # CONFIG_PATA_HPT3X2N is not set |
@@ -852,7 +852,7 @@ CONFIG_PATA_AMD=y | |||
852 | # CONFIG_PATA_JMICRON is not set | 852 | # CONFIG_PATA_JMICRON is not set |
853 | # CONFIG_PATA_TRIFLEX is not set | 853 | # CONFIG_PATA_TRIFLEX is not set |
854 | # CONFIG_PATA_MARVELL is not set | 854 | # CONFIG_PATA_MARVELL is not set |
855 | # CONFIG_PATA_MPIIX is not set | 855 | CONFIG_PATA_MPIIX=y |
856 | CONFIG_PATA_OLDPIIX=y | 856 | CONFIG_PATA_OLDPIIX=y |
857 | # CONFIG_PATA_NETCELL is not set | 857 | # CONFIG_PATA_NETCELL is not set |
858 | # CONFIG_PATA_NINJA32 is not set | 858 | # CONFIG_PATA_NINJA32 is not set |
@@ -871,6 +871,7 @@ CONFIG_PATA_OLDPIIX=y | |||
871 | # CONFIG_PATA_SIS is not set | 871 | # CONFIG_PATA_SIS is not set |
872 | # CONFIG_PATA_VIA is not set | 872 | # CONFIG_PATA_VIA is not set |
873 | # CONFIG_PATA_WINBOND is not set | 873 | # CONFIG_PATA_WINBOND is not set |
874 | CONFIG_PATA_SCH=y | ||
874 | CONFIG_MD=y | 875 | CONFIG_MD=y |
875 | CONFIG_BLK_DEV_MD=y | 876 | CONFIG_BLK_DEV_MD=y |
876 | # CONFIG_MD_LINEAR is not set | 877 | # CONFIG_MD_LINEAR is not set |
@@ -894,13 +895,16 @@ CONFIG_DM_ZERO=y | |||
894 | # | 895 | # |
895 | # IEEE 1394 (FireWire) support | 896 | # IEEE 1394 (FireWire) support |
896 | # | 897 | # |
898 | |||
899 | # | ||
900 | # Enable only one of the two stacks, unless you know what you are doing | ||
901 | # | ||
897 | # CONFIG_FIREWIRE is not set | 902 | # CONFIG_FIREWIRE is not set |
898 | # CONFIG_IEEE1394 is not set | 903 | # CONFIG_IEEE1394 is not set |
899 | # CONFIG_I2O is not set | 904 | # CONFIG_I2O is not set |
900 | CONFIG_MACINTOSH_DRIVERS=y | 905 | CONFIG_MACINTOSH_DRIVERS=y |
901 | CONFIG_MAC_EMUMOUSEBTN=y | 906 | CONFIG_MAC_EMUMOUSEBTN=y |
902 | CONFIG_NETDEVICES=y | 907 | CONFIG_NETDEVICES=y |
903 | # CONFIG_NETDEVICES_MULTIQUEUE is not set | ||
904 | # CONFIG_IFB is not set | 908 | # CONFIG_IFB is not set |
905 | # CONFIG_DUMMY is not set | 909 | # CONFIG_DUMMY is not set |
906 | # CONFIG_BONDING is not set | 910 | # CONFIG_BONDING is not set |
@@ -910,7 +914,23 @@ CONFIG_NETDEVICES=y | |||
910 | # CONFIG_VETH is not set | 914 | # CONFIG_VETH is not set |
911 | # CONFIG_NET_SB1000 is not set | 915 | # CONFIG_NET_SB1000 is not set |
912 | # CONFIG_ARCNET is not set | 916 | # CONFIG_ARCNET is not set |
913 | # CONFIG_PHYLIB is not set | 917 | CONFIG_PHYLIB=y |
918 | |||
919 | # | ||
920 | # MII PHY device drivers | ||
921 | # | ||
922 | # CONFIG_MARVELL_PHY is not set | ||
923 | # CONFIG_DAVICOM_PHY is not set | ||
924 | # CONFIG_QSEMI_PHY is not set | ||
925 | # CONFIG_LXT_PHY is not set | ||
926 | # CONFIG_CICADA_PHY is not set | ||
927 | # CONFIG_VITESSE_PHY is not set | ||
928 | # CONFIG_SMSC_PHY is not set | ||
929 | # CONFIG_BROADCOM_PHY is not set | ||
930 | # CONFIG_ICPLUS_PHY is not set | ||
931 | # CONFIG_REALTEK_PHY is not set | ||
932 | # CONFIG_FIXED_PHY is not set | ||
933 | # CONFIG_MDIO_BITBANG is not set | ||
914 | CONFIG_NET_ETHERNET=y | 934 | CONFIG_NET_ETHERNET=y |
915 | CONFIG_MII=y | 935 | CONFIG_MII=y |
916 | # CONFIG_HAPPYMEAL is not set | 936 | # CONFIG_HAPPYMEAL is not set |
@@ -943,10 +963,10 @@ CONFIG_FORCEDETH=y | |||
943 | CONFIG_E100=y | 963 | CONFIG_E100=y |
944 | # CONFIG_FEALNX is not set | 964 | # CONFIG_FEALNX is not set |
945 | # CONFIG_NATSEMI is not set | 965 | # CONFIG_NATSEMI is not set |
946 | # CONFIG_NE2K_PCI is not set | 966 | CONFIG_NE2K_PCI=y |
947 | # CONFIG_8139CP is not set | 967 | # CONFIG_8139CP is not set |
948 | CONFIG_8139TOO=y | 968 | CONFIG_8139TOO=y |
949 | CONFIG_8139TOO_PIO=y | 969 | # CONFIG_8139TOO_PIO is not set |
950 | # CONFIG_8139TOO_TUNE_TWISTER is not set | 970 | # CONFIG_8139TOO_TUNE_TWISTER is not set |
951 | # CONFIG_8139TOO_8129 is not set | 971 | # CONFIG_8139TOO_8129 is not set |
952 | # CONFIG_8139_OLD_RX_RESET is not set | 972 | # CONFIG_8139_OLD_RX_RESET is not set |
@@ -961,25 +981,24 @@ CONFIG_NETDEV_1000=y | |||
961 | # CONFIG_ACENIC is not set | 981 | # CONFIG_ACENIC is not set |
962 | # CONFIG_DL2K is not set | 982 | # CONFIG_DL2K is not set |
963 | CONFIG_E1000=y | 983 | CONFIG_E1000=y |
964 | # CONFIG_E1000_NAPI is not set | ||
965 | # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set | 984 | # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set |
966 | # CONFIG_E1000E is not set | 985 | CONFIG_E1000E=y |
967 | # CONFIG_E1000E_ENABLED is not set | ||
968 | # CONFIG_IP1000 is not set | 986 | # CONFIG_IP1000 is not set |
969 | # CONFIG_IGB is not set | 987 | # CONFIG_IGB is not set |
970 | # CONFIG_NS83820 is not set | 988 | # CONFIG_NS83820 is not set |
971 | # CONFIG_HAMACHI is not set | 989 | # CONFIG_HAMACHI is not set |
972 | # CONFIG_YELLOWFIN is not set | 990 | # CONFIG_YELLOWFIN is not set |
973 | # CONFIG_R8169 is not set | 991 | CONFIG_R8169=y |
974 | # CONFIG_SIS190 is not set | 992 | # CONFIG_SIS190 is not set |
975 | # CONFIG_SKGE is not set | 993 | # CONFIG_SKGE is not set |
976 | CONFIG_SKY2=y | 994 | CONFIG_SKY2=y |
977 | # CONFIG_SKY2_DEBUG is not set | 995 | # CONFIG_SKY2_DEBUG is not set |
978 | # CONFIG_VIA_VELOCITY is not set | 996 | # CONFIG_VIA_VELOCITY is not set |
979 | CONFIG_TIGON3=y | 997 | CONFIG_TIGON3=y |
980 | # CONFIG_BNX2 is not set | 998 | CONFIG_BNX2=y |
981 | # CONFIG_QLA3XXX is not set | 999 | # CONFIG_QLA3XXX is not set |
982 | # CONFIG_ATL1 is not set | 1000 | # CONFIG_ATL1 is not set |
1001 | # CONFIG_ATL1E is not set | ||
983 | CONFIG_NETDEV_10000=y | 1002 | CONFIG_NETDEV_10000=y |
984 | # CONFIG_CHELSIO_T1 is not set | 1003 | # CONFIG_CHELSIO_T1 is not set |
985 | # CONFIG_CHELSIO_T3 is not set | 1004 | # CONFIG_CHELSIO_T3 is not set |
@@ -1019,13 +1038,14 @@ CONFIG_WLAN_80211=y | |||
1019 | # CONFIG_RTL8180 is not set | 1038 | # CONFIG_RTL8180 is not set |
1020 | # CONFIG_RTL8187 is not set | 1039 | # CONFIG_RTL8187 is not set |
1021 | # CONFIG_ADM8211 is not set | 1040 | # CONFIG_ADM8211 is not set |
1041 | # CONFIG_MAC80211_HWSIM is not set | ||
1022 | # CONFIG_P54_COMMON is not set | 1042 | # CONFIG_P54_COMMON is not set |
1023 | CONFIG_ATH5K=y | 1043 | CONFIG_ATH5K=y |
1024 | # CONFIG_ATH5K_DEBUG is not set | 1044 | # CONFIG_ATH5K_DEBUG is not set |
1025 | # CONFIG_IWLWIFI is not set | 1045 | # CONFIG_ATH9K is not set |
1026 | # CONFIG_IWLCORE is not set | 1046 | # CONFIG_IWLCORE is not set |
1027 | # CONFIG_IWLWIFI_LEDS is not set | 1047 | # CONFIG_IWLWIFI_LEDS is not set |
1028 | # CONFIG_IWL4965 is not set | 1048 | # CONFIG_IWLAGN is not set |
1029 | # CONFIG_IWL3945 is not set | 1049 | # CONFIG_IWL3945 is not set |
1030 | # CONFIG_HOSTAP is not set | 1050 | # CONFIG_HOSTAP is not set |
1031 | # CONFIG_B43 is not set | 1051 | # CONFIG_B43 is not set |
@@ -1105,6 +1125,7 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y | |||
1105 | # CONFIG_MOUSE_PS2_TOUCHKIT is not set | 1125 | # CONFIG_MOUSE_PS2_TOUCHKIT is not set |
1106 | # CONFIG_MOUSE_SERIAL is not set | 1126 | # CONFIG_MOUSE_SERIAL is not set |
1107 | # CONFIG_MOUSE_APPLETOUCH is not set | 1127 | # CONFIG_MOUSE_APPLETOUCH is not set |
1128 | # CONFIG_MOUSE_BCM5974 is not set | ||
1108 | # CONFIG_MOUSE_VSXXXAA is not set | 1129 | # CONFIG_MOUSE_VSXXXAA is not set |
1109 | CONFIG_INPUT_JOYSTICK=y | 1130 | CONFIG_INPUT_JOYSTICK=y |
1110 | # CONFIG_JOYSTICK_ANALOG is not set | 1131 | # CONFIG_JOYSTICK_ANALOG is not set |
@@ -1139,12 +1160,14 @@ CONFIG_INPUT_TOUCHSCREEN=y | |||
1139 | # CONFIG_TOUCHSCREEN_GUNZE is not set | 1160 | # CONFIG_TOUCHSCREEN_GUNZE is not set |
1140 | # CONFIG_TOUCHSCREEN_ELO is not set | 1161 | # CONFIG_TOUCHSCREEN_ELO is not set |
1141 | # CONFIG_TOUCHSCREEN_MTOUCH is not set | 1162 | # CONFIG_TOUCHSCREEN_MTOUCH is not set |
1163 | # CONFIG_TOUCHSCREEN_INEXIO is not set | ||
1142 | # CONFIG_TOUCHSCREEN_MK712 is not set | 1164 | # CONFIG_TOUCHSCREEN_MK712 is not set |
1143 | # CONFIG_TOUCHSCREEN_PENMOUNT is not set | 1165 | # CONFIG_TOUCHSCREEN_PENMOUNT is not set |
1144 | # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set | 1166 | # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set |
1145 | # CONFIG_TOUCHSCREEN_TOUCHWIN is not set | 1167 | # CONFIG_TOUCHSCREEN_TOUCHWIN is not set |
1146 | # CONFIG_TOUCHSCREEN_UCB1400 is not set | 1168 | # CONFIG_TOUCHSCREEN_UCB1400 is not set |
1147 | # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set | 1169 | # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set |
1170 | # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set | ||
1148 | CONFIG_INPUT_MISC=y | 1171 | CONFIG_INPUT_MISC=y |
1149 | # CONFIG_INPUT_PCSPKR is not set | 1172 | # CONFIG_INPUT_PCSPKR is not set |
1150 | # CONFIG_INPUT_APANEL is not set | 1173 | # CONFIG_INPUT_APANEL is not set |
@@ -1173,6 +1196,7 @@ CONFIG_SERIO_LIBPS2=y | |||
1173 | # Character devices | 1196 | # Character devices |
1174 | # | 1197 | # |
1175 | CONFIG_VT=y | 1198 | CONFIG_VT=y |
1199 | CONFIG_CONSOLE_TRANSLATIONS=y | ||
1176 | CONFIG_VT_CONSOLE=y | 1200 | CONFIG_VT_CONSOLE=y |
1177 | CONFIG_HW_CONSOLE=y | 1201 | CONFIG_HW_CONSOLE=y |
1178 | CONFIG_VT_HW_CONSOLE_BINDING=y | 1202 | CONFIG_VT_HW_CONSOLE_BINDING=y |
@@ -1223,8 +1247,8 @@ CONFIG_UNIX98_PTYS=y | |||
1223 | # CONFIG_LEGACY_PTYS is not set | 1247 | # CONFIG_LEGACY_PTYS is not set |
1224 | # CONFIG_IPMI_HANDLER is not set | 1248 | # CONFIG_IPMI_HANDLER is not set |
1225 | CONFIG_HW_RANDOM=y | 1249 | CONFIG_HW_RANDOM=y |
1226 | # CONFIG_HW_RANDOM_INTEL is not set | 1250 | CONFIG_HW_RANDOM_INTEL=y |
1227 | # CONFIG_HW_RANDOM_AMD is not set | 1251 | CONFIG_HW_RANDOM_AMD=y |
1228 | CONFIG_HW_RANDOM_GEODE=y | 1252 | CONFIG_HW_RANDOM_GEODE=y |
1229 | CONFIG_HW_RANDOM_VIA=y | 1253 | CONFIG_HW_RANDOM_VIA=y |
1230 | CONFIG_NVRAM=y | 1254 | CONFIG_NVRAM=y |
@@ -1245,7 +1269,6 @@ CONFIG_NVRAM=y | |||
1245 | # CONFIG_CS5535_GPIO is not set | 1269 | # CONFIG_CS5535_GPIO is not set |
1246 | # CONFIG_RAW_DRIVER is not set | 1270 | # CONFIG_RAW_DRIVER is not set |
1247 | CONFIG_HPET=y | 1271 | CONFIG_HPET=y |
1248 | # CONFIG_HPET_RTC_IRQ is not set | ||
1249 | # CONFIG_HPET_MMAP is not set | 1272 | # CONFIG_HPET_MMAP is not set |
1250 | # CONFIG_HANGCHECK_TIMER is not set | 1273 | # CONFIG_HANGCHECK_TIMER is not set |
1251 | # CONFIG_TCG_TPM is not set | 1274 | # CONFIG_TCG_TPM is not set |
@@ -1254,43 +1277,64 @@ CONFIG_DEVPORT=y | |||
1254 | CONFIG_I2C=y | 1277 | CONFIG_I2C=y |
1255 | CONFIG_I2C_BOARDINFO=y | 1278 | CONFIG_I2C_BOARDINFO=y |
1256 | # CONFIG_I2C_CHARDEV is not set | 1279 | # CONFIG_I2C_CHARDEV is not set |
1280 | CONFIG_I2C_HELPER_AUTO=y | ||
1257 | 1281 | ||
1258 | # | 1282 | # |
1259 | # I2C Hardware Bus support | 1283 | # I2C Hardware Bus support |
1260 | # | 1284 | # |
1285 | |||
1286 | # | ||
1287 | # PC SMBus host controller drivers | ||
1288 | # | ||
1261 | # CONFIG_I2C_ALI1535 is not set | 1289 | # CONFIG_I2C_ALI1535 is not set |
1262 | # CONFIG_I2C_ALI1563 is not set | 1290 | # CONFIG_I2C_ALI1563 is not set |
1263 | # CONFIG_I2C_ALI15X3 is not set | 1291 | # CONFIG_I2C_ALI15X3 is not set |
1264 | # CONFIG_I2C_AMD756 is not set | 1292 | # CONFIG_I2C_AMD756 is not set |
1265 | # CONFIG_I2C_AMD8111 is not set | 1293 | # CONFIG_I2C_AMD8111 is not set |
1266 | CONFIG_I2C_I801=y | 1294 | CONFIG_I2C_I801=y |
1267 | # CONFIG_I2C_I810 is not set | 1295 | # CONFIG_I2C_ISCH is not set |
1268 | # CONFIG_I2C_PIIX4 is not set | 1296 | # CONFIG_I2C_PIIX4 is not set |
1269 | # CONFIG_I2C_NFORCE2 is not set | 1297 | # CONFIG_I2C_NFORCE2 is not set |
1270 | # CONFIG_I2C_OCORES is not set | ||
1271 | # CONFIG_I2C_PARPORT_LIGHT is not set | ||
1272 | # CONFIG_I2C_PROSAVAGE is not set | ||
1273 | # CONFIG_I2C_SAVAGE4 is not set | ||
1274 | # CONFIG_I2C_SIMTEC is not set | ||
1275 | # CONFIG_SCx200_ACB is not set | ||
1276 | # CONFIG_I2C_SIS5595 is not set | 1298 | # CONFIG_I2C_SIS5595 is not set |
1277 | # CONFIG_I2C_SIS630 is not set | 1299 | # CONFIG_I2C_SIS630 is not set |
1278 | # CONFIG_I2C_SIS96X is not set | 1300 | # CONFIG_I2C_SIS96X is not set |
1279 | # CONFIG_I2C_TAOS_EVM is not set | ||
1280 | # CONFIG_I2C_STUB is not set | ||
1281 | # CONFIG_I2C_TINY_USB is not set | ||
1282 | # CONFIG_I2C_VIA is not set | 1301 | # CONFIG_I2C_VIA is not set |
1283 | # CONFIG_I2C_VIAPRO is not set | 1302 | # CONFIG_I2C_VIAPRO is not set |
1303 | |||
1304 | # | ||
1305 | # I2C system bus drivers (mostly embedded / system-on-chip) | ||
1306 | # | ||
1307 | # CONFIG_I2C_OCORES is not set | ||
1308 | # CONFIG_I2C_SIMTEC is not set | ||
1309 | |||
1310 | # | ||
1311 | # External I2C/SMBus adapter drivers | ||
1312 | # | ||
1313 | # CONFIG_I2C_PARPORT_LIGHT is not set | ||
1314 | # CONFIG_I2C_TAOS_EVM is not set | ||
1315 | # CONFIG_I2C_TINY_USB is not set | ||
1316 | |||
1317 | # | ||
1318 | # Graphics adapter I2C/DDC channel drivers | ||
1319 | # | ||
1284 | # CONFIG_I2C_VOODOO3 is not set | 1320 | # CONFIG_I2C_VOODOO3 is not set |
1321 | |||
1322 | # | ||
1323 | # Other I2C/SMBus bus drivers | ||
1324 | # | ||
1285 | # CONFIG_I2C_PCA_PLATFORM is not set | 1325 | # CONFIG_I2C_PCA_PLATFORM is not set |
1326 | # CONFIG_I2C_STUB is not set | ||
1327 | # CONFIG_SCx200_ACB is not set | ||
1286 | 1328 | ||
1287 | # | 1329 | # |
1288 | # Miscellaneous I2C Chip support | 1330 | # Miscellaneous I2C Chip support |
1289 | # | 1331 | # |
1290 | # CONFIG_DS1682 is not set | 1332 | # CONFIG_DS1682 is not set |
1333 | # CONFIG_AT24 is not set | ||
1291 | # CONFIG_SENSORS_EEPROM is not set | 1334 | # CONFIG_SENSORS_EEPROM is not set |
1292 | # CONFIG_SENSORS_PCF8574 is not set | 1335 | # CONFIG_SENSORS_PCF8574 is not set |
1293 | # CONFIG_PCF8575 is not set | 1336 | # CONFIG_PCF8575 is not set |
1337 | # CONFIG_SENSORS_PCA9539 is not set | ||
1294 | # CONFIG_SENSORS_PCF8591 is not set | 1338 | # CONFIG_SENSORS_PCF8591 is not set |
1295 | # CONFIG_SENSORS_MAX6875 is not set | 1339 | # CONFIG_SENSORS_MAX6875 is not set |
1296 | # CONFIG_SENSORS_TSL2550 is not set | 1340 | # CONFIG_SENSORS_TSL2550 is not set |
@@ -1299,6 +1343,8 @@ CONFIG_I2C_I801=y | |||
1299 | # CONFIG_I2C_DEBUG_BUS is not set | 1343 | # CONFIG_I2C_DEBUG_BUS is not set |
1300 | # CONFIG_I2C_DEBUG_CHIP is not set | 1344 | # CONFIG_I2C_DEBUG_CHIP is not set |
1301 | # CONFIG_SPI is not set | 1345 | # CONFIG_SPI is not set |
1346 | CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y | ||
1347 | # CONFIG_GPIOLIB is not set | ||
1302 | # CONFIG_W1 is not set | 1348 | # CONFIG_W1 is not set |
1303 | CONFIG_POWER_SUPPLY=y | 1349 | CONFIG_POWER_SUPPLY=y |
1304 | # CONFIG_POWER_SUPPLY_DEBUG is not set | 1350 | # CONFIG_POWER_SUPPLY_DEBUG is not set |
@@ -1360,8 +1406,10 @@ CONFIG_SSB_POSSIBLE=y | |||
1360 | # | 1406 | # |
1361 | # Multifunction device drivers | 1407 | # Multifunction device drivers |
1362 | # | 1408 | # |
1409 | # CONFIG_MFD_CORE is not set | ||
1363 | # CONFIG_MFD_SM501 is not set | 1410 | # CONFIG_MFD_SM501 is not set |
1364 | # CONFIG_HTC_PASIC3 is not set | 1411 | # CONFIG_HTC_PASIC3 is not set |
1412 | # CONFIG_MFD_TMIO is not set | ||
1365 | 1413 | ||
1366 | # | 1414 | # |
1367 | # Multimedia devices | 1415 | # Multimedia devices |
@@ -1372,6 +1420,7 @@ CONFIG_SSB_POSSIBLE=y | |||
1372 | # | 1420 | # |
1373 | # CONFIG_VIDEO_DEV is not set | 1421 | # CONFIG_VIDEO_DEV is not set |
1374 | # CONFIG_DVB_CORE is not set | 1422 | # CONFIG_DVB_CORE is not set |
1423 | # CONFIG_VIDEO_MEDIA is not set | ||
1375 | 1424 | ||
1376 | # | 1425 | # |
1377 | # Multimedia drivers | 1426 | # Multimedia drivers |
@@ -1418,7 +1467,6 @@ CONFIG_FB_CFB_IMAGEBLIT=y | |||
1418 | # CONFIG_FB_SYS_IMAGEBLIT is not set | 1467 | # CONFIG_FB_SYS_IMAGEBLIT is not set |
1419 | # CONFIG_FB_FOREIGN_ENDIAN is not set | 1468 | # CONFIG_FB_FOREIGN_ENDIAN is not set |
1420 | # CONFIG_FB_SYS_FOPS is not set | 1469 | # CONFIG_FB_SYS_FOPS is not set |
1421 | CONFIG_FB_DEFERRED_IO=y | ||
1422 | # CONFIG_FB_SVGALIB is not set | 1470 | # CONFIG_FB_SVGALIB is not set |
1423 | # CONFIG_FB_MACMODES is not set | 1471 | # CONFIG_FB_MACMODES is not set |
1424 | # CONFIG_FB_BACKLIGHT is not set | 1472 | # CONFIG_FB_BACKLIGHT is not set |
@@ -1463,6 +1511,7 @@ CONFIG_FB_EFI=y | |||
1463 | # CONFIG_FB_TRIDENT is not set | 1511 | # CONFIG_FB_TRIDENT is not set |
1464 | # CONFIG_FB_ARK is not set | 1512 | # CONFIG_FB_ARK is not set |
1465 | # CONFIG_FB_PM3 is not set | 1513 | # CONFIG_FB_PM3 is not set |
1514 | # CONFIG_FB_CARMINE is not set | ||
1466 | # CONFIG_FB_GEODE is not set | 1515 | # CONFIG_FB_GEODE is not set |
1467 | # CONFIG_FB_VIRTUAL is not set | 1516 | # CONFIG_FB_VIRTUAL is not set |
1468 | CONFIG_BACKLIGHT_LCD_SUPPORT=y | 1517 | CONFIG_BACKLIGHT_LCD_SUPPORT=y |
@@ -1470,6 +1519,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y | |||
1470 | CONFIG_BACKLIGHT_CLASS_DEVICE=y | 1519 | CONFIG_BACKLIGHT_CLASS_DEVICE=y |
1471 | # CONFIG_BACKLIGHT_CORGI is not set | 1520 | # CONFIG_BACKLIGHT_CORGI is not set |
1472 | # CONFIG_BACKLIGHT_PROGEAR is not set | 1521 | # CONFIG_BACKLIGHT_PROGEAR is not set |
1522 | # CONFIG_BACKLIGHT_MBP_NVIDIA is not set | ||
1473 | 1523 | ||
1474 | # | 1524 | # |
1475 | # Display device support | 1525 | # Display device support |
@@ -1488,15 +1538,7 @@ CONFIG_LOGO=y | |||
1488 | # CONFIG_LOGO_LINUX_MONO is not set | 1538 | # CONFIG_LOGO_LINUX_MONO is not set |
1489 | # CONFIG_LOGO_LINUX_VGA16 is not set | 1539 | # CONFIG_LOGO_LINUX_VGA16 is not set |
1490 | CONFIG_LOGO_LINUX_CLUT224=y | 1540 | CONFIG_LOGO_LINUX_CLUT224=y |
1491 | |||
1492 | # | ||
1493 | # Sound | ||
1494 | # | ||
1495 | CONFIG_SOUND=y | 1541 | CONFIG_SOUND=y |
1496 | |||
1497 | # | ||
1498 | # Advanced Linux Sound Architecture | ||
1499 | # | ||
1500 | CONFIG_SND=y | 1542 | CONFIG_SND=y |
1501 | CONFIG_SND_TIMER=y | 1543 | CONFIG_SND_TIMER=y |
1502 | CONFIG_SND_PCM=y | 1544 | CONFIG_SND_PCM=y |
@@ -1514,20 +1556,14 @@ CONFIG_SND_VERBOSE_PROCFS=y | |||
1514 | # CONFIG_SND_VERBOSE_PRINTK is not set | 1556 | # CONFIG_SND_VERBOSE_PRINTK is not set |
1515 | # CONFIG_SND_DEBUG is not set | 1557 | # CONFIG_SND_DEBUG is not set |
1516 | CONFIG_SND_VMASTER=y | 1558 | CONFIG_SND_VMASTER=y |
1517 | 1559 | CONFIG_SND_DRIVERS=y | |
1518 | # | ||
1519 | # Generic devices | ||
1520 | # | ||
1521 | # CONFIG_SND_PCSP is not set | 1560 | # CONFIG_SND_PCSP is not set |
1522 | # CONFIG_SND_DUMMY is not set | 1561 | # CONFIG_SND_DUMMY is not set |
1523 | # CONFIG_SND_VIRMIDI is not set | 1562 | # CONFIG_SND_VIRMIDI is not set |
1524 | # CONFIG_SND_MTPAV is not set | 1563 | # CONFIG_SND_MTPAV is not set |
1525 | # CONFIG_SND_SERIAL_U16550 is not set | 1564 | # CONFIG_SND_SERIAL_U16550 is not set |
1526 | # CONFIG_SND_MPU401 is not set | 1565 | # CONFIG_SND_MPU401 is not set |
1527 | 1566 | CONFIG_SND_PCI=y | |
1528 | # | ||
1529 | # PCI devices | ||
1530 | # | ||
1531 | # CONFIG_SND_AD1889 is not set | 1567 | # CONFIG_SND_AD1889 is not set |
1532 | # CONFIG_SND_ALS300 is not set | 1568 | # CONFIG_SND_ALS300 is not set |
1533 | # CONFIG_SND_ALS4000 is not set | 1569 | # CONFIG_SND_ALS4000 is not set |
@@ -1602,36 +1638,14 @@ CONFIG_SND_HDA_GENERIC=y | |||
1602 | # CONFIG_SND_VIRTUOSO is not set | 1638 | # CONFIG_SND_VIRTUOSO is not set |
1603 | # CONFIG_SND_VX222 is not set | 1639 | # CONFIG_SND_VX222 is not set |
1604 | # CONFIG_SND_YMFPCI is not set | 1640 | # CONFIG_SND_YMFPCI is not set |
1605 | 1641 | CONFIG_SND_USB=y | |
1606 | # | ||
1607 | # USB devices | ||
1608 | # | ||
1609 | # CONFIG_SND_USB_AUDIO is not set | 1642 | # CONFIG_SND_USB_AUDIO is not set |
1610 | # CONFIG_SND_USB_USX2Y is not set | 1643 | # CONFIG_SND_USB_USX2Y is not set |
1611 | # CONFIG_SND_USB_CAIAQ is not set | 1644 | # CONFIG_SND_USB_CAIAQ is not set |
1612 | 1645 | CONFIG_SND_PCMCIA=y | |
1613 | # | ||
1614 | # PCMCIA devices | ||
1615 | # | ||
1616 | # CONFIG_SND_VXPOCKET is not set | 1646 | # CONFIG_SND_VXPOCKET is not set |
1617 | # CONFIG_SND_PDAUDIOCF is not set | 1647 | # CONFIG_SND_PDAUDIOCF is not set |
1618 | |||
1619 | # | ||
1620 | # System on Chip audio support | ||
1621 | # | ||
1622 | # CONFIG_SND_SOC is not set | 1648 | # CONFIG_SND_SOC is not set |
1623 | |||
1624 | # | ||
1625 | # ALSA SoC audio for Freescale SOCs | ||
1626 | # | ||
1627 | |||
1628 | # | ||
1629 | # SoC Audio for the Texas Instruments OMAP | ||
1630 | # | ||
1631 | |||
1632 | # | ||
1633 | # Open Sound System | ||
1634 | # | ||
1635 | # CONFIG_SOUND_PRIME is not set | 1649 | # CONFIG_SOUND_PRIME is not set |
1636 | CONFIG_HID_SUPPORT=y | 1650 | CONFIG_HID_SUPPORT=y |
1637 | CONFIG_HID=y | 1651 | CONFIG_HID=y |
@@ -1667,6 +1681,7 @@ CONFIG_USB_DEVICEFS=y | |||
1667 | # CONFIG_USB_DYNAMIC_MINORS is not set | 1681 | # CONFIG_USB_DYNAMIC_MINORS is not set |
1668 | CONFIG_USB_SUSPEND=y | 1682 | CONFIG_USB_SUSPEND=y |
1669 | # CONFIG_USB_OTG is not set | 1683 | # CONFIG_USB_OTG is not set |
1684 | CONFIG_USB_MON=y | ||
1670 | 1685 | ||
1671 | # | 1686 | # |
1672 | # USB Host Controller Drivers | 1687 | # USB Host Controller Drivers |
@@ -1690,6 +1705,7 @@ CONFIG_USB_UHCI_HCD=y | |||
1690 | # | 1705 | # |
1691 | # CONFIG_USB_ACM is not set | 1706 | # CONFIG_USB_ACM is not set |
1692 | CONFIG_USB_PRINTER=y | 1707 | CONFIG_USB_PRINTER=y |
1708 | # CONFIG_USB_WDM is not set | ||
1693 | 1709 | ||
1694 | # | 1710 | # |
1695 | # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' | 1711 | # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' |
@@ -1711,6 +1727,7 @@ CONFIG_USB_STORAGE=y | |||
1711 | # CONFIG_USB_STORAGE_ALAUDA is not set | 1727 | # CONFIG_USB_STORAGE_ALAUDA is not set |
1712 | # CONFIG_USB_STORAGE_ONETOUCH is not set | 1728 | # CONFIG_USB_STORAGE_ONETOUCH is not set |
1713 | # CONFIG_USB_STORAGE_KARMA is not set | 1729 | # CONFIG_USB_STORAGE_KARMA is not set |
1730 | # CONFIG_USB_STORAGE_SIERRA is not set | ||
1714 | # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set | 1731 | # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set |
1715 | CONFIG_USB_LIBUSUAL=y | 1732 | CONFIG_USB_LIBUSUAL=y |
1716 | 1733 | ||
@@ -1719,7 +1736,6 @@ CONFIG_USB_LIBUSUAL=y | |||
1719 | # | 1736 | # |
1720 | # CONFIG_USB_MDC800 is not set | 1737 | # CONFIG_USB_MDC800 is not set |
1721 | # CONFIG_USB_MICROTEK is not set | 1738 | # CONFIG_USB_MICROTEK is not set |
1722 | CONFIG_USB_MON=y | ||
1723 | 1739 | ||
1724 | # | 1740 | # |
1725 | # USB port drivers | 1741 | # USB port drivers |
@@ -1732,7 +1748,6 @@ CONFIG_USB_MON=y | |||
1732 | # CONFIG_USB_EMI62 is not set | 1748 | # CONFIG_USB_EMI62 is not set |
1733 | # CONFIG_USB_EMI26 is not set | 1749 | # CONFIG_USB_EMI26 is not set |
1734 | # CONFIG_USB_ADUTUX is not set | 1750 | # CONFIG_USB_ADUTUX is not set |
1735 | # CONFIG_USB_AUERSWALD is not set | ||
1736 | # CONFIG_USB_RIO500 is not set | 1751 | # CONFIG_USB_RIO500 is not set |
1737 | # CONFIG_USB_LEGOTOWER is not set | 1752 | # CONFIG_USB_LEGOTOWER is not set |
1738 | # CONFIG_USB_LCD is not set | 1753 | # CONFIG_USB_LCD is not set |
@@ -1749,6 +1764,7 @@ CONFIG_USB_MON=y | |||
1749 | # CONFIG_USB_TRANCEVIBRATOR is not set | 1764 | # CONFIG_USB_TRANCEVIBRATOR is not set |
1750 | # CONFIG_USB_IOWARRIOR is not set | 1765 | # CONFIG_USB_IOWARRIOR is not set |
1751 | # CONFIG_USB_TEST is not set | 1766 | # CONFIG_USB_TEST is not set |
1767 | # CONFIG_USB_ISIGHTFW is not set | ||
1752 | # CONFIG_USB_GADGET is not set | 1768 | # CONFIG_USB_GADGET is not set |
1753 | # CONFIG_MMC is not set | 1769 | # CONFIG_MMC is not set |
1754 | # CONFIG_MEMSTICK is not set | 1770 | # CONFIG_MEMSTICK is not set |
@@ -1758,7 +1774,9 @@ CONFIG_LEDS_CLASS=y | |||
1758 | # | 1774 | # |
1759 | # LED drivers | 1775 | # LED drivers |
1760 | # | 1776 | # |
1777 | # CONFIG_LEDS_PCA9532 is not set | ||
1761 | # CONFIG_LEDS_CLEVO_MAIL is not set | 1778 | # CONFIG_LEDS_CLEVO_MAIL is not set |
1779 | # CONFIG_LEDS_PCA955X is not set | ||
1762 | 1780 | ||
1763 | # | 1781 | # |
1764 | # LED Triggers | 1782 | # LED Triggers |
@@ -1804,6 +1822,7 @@ CONFIG_RTC_INTF_DEV=y | |||
1804 | # CONFIG_RTC_DRV_PCF8583 is not set | 1822 | # CONFIG_RTC_DRV_PCF8583 is not set |
1805 | # CONFIG_RTC_DRV_M41T80 is not set | 1823 | # CONFIG_RTC_DRV_M41T80 is not set |
1806 | # CONFIG_RTC_DRV_S35390A is not set | 1824 | # CONFIG_RTC_DRV_S35390A is not set |
1825 | # CONFIG_RTC_DRV_FM3130 is not set | ||
1807 | 1826 | ||
1808 | # | 1827 | # |
1809 | # SPI RTC drivers | 1828 | # SPI RTC drivers |
@@ -1836,11 +1855,13 @@ CONFIG_DMADEVICES=y | |||
1836 | # Firmware Drivers | 1855 | # Firmware Drivers |
1837 | # | 1856 | # |
1838 | # CONFIG_EDD is not set | 1857 | # CONFIG_EDD is not set |
1858 | CONFIG_FIRMWARE_MEMMAP=y | ||
1839 | CONFIG_EFI_VARS=y | 1859 | CONFIG_EFI_VARS=y |
1840 | # CONFIG_DELL_RBU is not set | 1860 | # CONFIG_DELL_RBU is not set |
1841 | # CONFIG_DCDBAS is not set | 1861 | # CONFIG_DCDBAS is not set |
1842 | CONFIG_DMIID=y | 1862 | CONFIG_DMIID=y |
1843 | # CONFIG_ISCSI_IBFT_FIND is not set | 1863 | CONFIG_ISCSI_IBFT_FIND=y |
1864 | CONFIG_ISCSI_IBFT=y | ||
1844 | 1865 | ||
1845 | # | 1866 | # |
1846 | # File systems | 1867 | # File systems |
@@ -1919,14 +1940,27 @@ CONFIG_HUGETLB_PAGE=y | |||
1919 | # CONFIG_CRAMFS is not set | 1940 | # CONFIG_CRAMFS is not set |
1920 | # CONFIG_VXFS_FS is not set | 1941 | # CONFIG_VXFS_FS is not set |
1921 | # CONFIG_MINIX_FS is not set | 1942 | # CONFIG_MINIX_FS is not set |
1943 | # CONFIG_OMFS_FS is not set | ||
1922 | # CONFIG_HPFS_FS is not set | 1944 | # CONFIG_HPFS_FS is not set |
1923 | # CONFIG_QNX4FS_FS is not set | 1945 | # CONFIG_QNX4FS_FS is not set |
1924 | # CONFIG_ROMFS_FS is not set | 1946 | # CONFIG_ROMFS_FS is not set |
1925 | # CONFIG_SYSV_FS is not set | 1947 | # CONFIG_SYSV_FS is not set |
1926 | # CONFIG_UFS_FS is not set | 1948 | # CONFIG_UFS_FS is not set |
1927 | CONFIG_NETWORK_FILESYSTEMS=y | 1949 | CONFIG_NETWORK_FILESYSTEMS=y |
1928 | # CONFIG_NFS_FS is not set | 1950 | CONFIG_NFS_FS=y |
1951 | CONFIG_NFS_V3=y | ||
1952 | CONFIG_NFS_V3_ACL=y | ||
1953 | CONFIG_NFS_V4=y | ||
1954 | CONFIG_ROOT_NFS=y | ||
1929 | # CONFIG_NFSD is not set | 1955 | # CONFIG_NFSD is not set |
1956 | CONFIG_LOCKD=y | ||
1957 | CONFIG_LOCKD_V4=y | ||
1958 | CONFIG_NFS_ACL_SUPPORT=y | ||
1959 | CONFIG_NFS_COMMON=y | ||
1960 | CONFIG_SUNRPC=y | ||
1961 | CONFIG_SUNRPC_GSS=y | ||
1962 | CONFIG_RPCSEC_GSS_KRB5=y | ||
1963 | # CONFIG_RPCSEC_GSS_SPKM3 is not set | ||
1930 | # CONFIG_SMB_FS is not set | 1964 | # CONFIG_SMB_FS is not set |
1931 | # CONFIG_CIFS is not set | 1965 | # CONFIG_CIFS is not set |
1932 | # CONFIG_NCP_FS is not set | 1966 | # CONFIG_NCP_FS is not set |
@@ -2000,9 +2034,9 @@ CONFIG_NLS_UTF8=y | |||
2000 | # Kernel hacking | 2034 | # Kernel hacking |
2001 | # | 2035 | # |
2002 | CONFIG_TRACE_IRQFLAGS_SUPPORT=y | 2036 | CONFIG_TRACE_IRQFLAGS_SUPPORT=y |
2003 | # CONFIG_PRINTK_TIME is not set | 2037 | CONFIG_PRINTK_TIME=y |
2004 | # CONFIG_ENABLE_WARN_DEPRECATED is not set | 2038 | CONFIG_ENABLE_WARN_DEPRECATED=y |
2005 | # CONFIG_ENABLE_MUST_CHECK is not set | 2039 | CONFIG_ENABLE_MUST_CHECK=y |
2006 | CONFIG_FRAME_WARN=2048 | 2040 | CONFIG_FRAME_WARN=2048 |
2007 | CONFIG_MAGIC_SYSRQ=y | 2041 | CONFIG_MAGIC_SYSRQ=y |
2008 | # CONFIG_UNUSED_SYMBOLS is not set | 2042 | # CONFIG_UNUSED_SYMBOLS is not set |
@@ -2032,6 +2066,7 @@ CONFIG_DEBUG_BUGVERBOSE=y | |||
2032 | # CONFIG_DEBUG_INFO is not set | 2066 | # CONFIG_DEBUG_INFO is not set |
2033 | # CONFIG_DEBUG_VM is not set | 2067 | # CONFIG_DEBUG_VM is not set |
2034 | # CONFIG_DEBUG_WRITECOUNT is not set | 2068 | # CONFIG_DEBUG_WRITECOUNT is not set |
2069 | CONFIG_DEBUG_MEMORY_INIT=y | ||
2035 | # CONFIG_DEBUG_LIST is not set | 2070 | # CONFIG_DEBUG_LIST is not set |
2036 | # CONFIG_DEBUG_SG is not set | 2071 | # CONFIG_DEBUG_SG is not set |
2037 | CONFIG_FRAME_POINTER=y | 2072 | CONFIG_FRAME_POINTER=y |
@@ -2042,23 +2077,32 @@ CONFIG_FRAME_POINTER=y | |||
2042 | # CONFIG_LKDTM is not set | 2077 | # CONFIG_LKDTM is not set |
2043 | # CONFIG_FAULT_INJECTION is not set | 2078 | # CONFIG_FAULT_INJECTION is not set |
2044 | # CONFIG_LATENCYTOP is not set | 2079 | # CONFIG_LATENCYTOP is not set |
2080 | CONFIG_SYSCTL_SYSCALL_CHECK=y | ||
2081 | CONFIG_HAVE_FTRACE=y | ||
2082 | CONFIG_HAVE_DYNAMIC_FTRACE=y | ||
2083 | # CONFIG_FTRACE is not set | ||
2084 | # CONFIG_IRQSOFF_TRACER is not set | ||
2085 | # CONFIG_SYSPROF_TRACER is not set | ||
2086 | # CONFIG_SCHED_TRACER is not set | ||
2087 | # CONFIG_CONTEXT_SWITCH_TRACER is not set | ||
2045 | CONFIG_PROVIDE_OHCI1394_DMA_INIT=y | 2088 | CONFIG_PROVIDE_OHCI1394_DMA_INIT=y |
2046 | # CONFIG_SAMPLES is not set | 2089 | # CONFIG_SAMPLES is not set |
2047 | # CONFIG_KGDB is not set | ||
2048 | CONFIG_HAVE_ARCH_KGDB=y | 2090 | CONFIG_HAVE_ARCH_KGDB=y |
2049 | # CONFIG_NONPROMISC_DEVMEM is not set | 2091 | # CONFIG_KGDB is not set |
2092 | # CONFIG_STRICT_DEVMEM is not set | ||
2093 | CONFIG_X86_VERBOSE_BOOTUP=y | ||
2050 | CONFIG_EARLY_PRINTK=y | 2094 | CONFIG_EARLY_PRINTK=y |
2051 | CONFIG_DEBUG_STACKOVERFLOW=y | 2095 | CONFIG_DEBUG_STACKOVERFLOW=y |
2052 | CONFIG_DEBUG_STACK_USAGE=y | 2096 | CONFIG_DEBUG_STACK_USAGE=y |
2053 | # CONFIG_DEBUG_PAGEALLOC is not set | 2097 | # CONFIG_DEBUG_PAGEALLOC is not set |
2098 | # CONFIG_DEBUG_PER_CPU_MAPS is not set | ||
2054 | # CONFIG_X86_PTDUMP is not set | 2099 | # CONFIG_X86_PTDUMP is not set |
2055 | CONFIG_DEBUG_RODATA=y | 2100 | CONFIG_DEBUG_RODATA=y |
2056 | # CONFIG_DEBUG_RODATA_TEST is not set | 2101 | # CONFIG_DEBUG_RODATA_TEST is not set |
2057 | CONFIG_DEBUG_NX_TEST=m | 2102 | CONFIG_DEBUG_NX_TEST=m |
2058 | # CONFIG_4KSTACKS is not set | 2103 | # CONFIG_4KSTACKS is not set |
2059 | CONFIG_X86_FIND_SMP_CONFIG=y | ||
2060 | CONFIG_X86_MPPARSE=y | ||
2061 | CONFIG_DOUBLEFAULT=y | 2104 | CONFIG_DOUBLEFAULT=y |
2105 | # CONFIG_MMIOTRACE is not set | ||
2062 | CONFIG_IO_DELAY_TYPE_0X80=0 | 2106 | CONFIG_IO_DELAY_TYPE_0X80=0 |
2063 | CONFIG_IO_DELAY_TYPE_0XED=1 | 2107 | CONFIG_IO_DELAY_TYPE_0XED=1 |
2064 | CONFIG_IO_DELAY_TYPE_UDELAY=2 | 2108 | CONFIG_IO_DELAY_TYPE_UDELAY=2 |
@@ -2070,6 +2114,7 @@ CONFIG_IO_DELAY_0X80=y | |||
2070 | CONFIG_DEFAULT_IO_DELAY_TYPE=0 | 2114 | CONFIG_DEFAULT_IO_DELAY_TYPE=0 |
2071 | CONFIG_DEBUG_BOOT_PARAMS=y | 2115 | CONFIG_DEBUG_BOOT_PARAMS=y |
2072 | # CONFIG_CPA_DEBUG is not set | 2116 | # CONFIG_CPA_DEBUG is not set |
2117 | # CONFIG_OPTIMIZE_INLINING is not set | ||
2073 | 2118 | ||
2074 | # | 2119 | # |
2075 | # Security options | 2120 | # Security options |
@@ -2079,7 +2124,6 @@ CONFIG_KEYS_DEBUG_PROC_KEYS=y | |||
2079 | CONFIG_SECURITY=y | 2124 | CONFIG_SECURITY=y |
2080 | CONFIG_SECURITY_NETWORK=y | 2125 | CONFIG_SECURITY_NETWORK=y |
2081 | # CONFIG_SECURITY_NETWORK_XFRM is not set | 2126 | # CONFIG_SECURITY_NETWORK_XFRM is not set |
2082 | CONFIG_SECURITY_CAPABILITIES=y | ||
2083 | CONFIG_SECURITY_FILE_CAPABILITIES=y | 2127 | CONFIG_SECURITY_FILE_CAPABILITIES=y |
2084 | # CONFIG_SECURITY_ROOTPLUG is not set | 2128 | # CONFIG_SECURITY_ROOTPLUG is not set |
2085 | CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 | 2129 | CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 |
@@ -2140,6 +2184,10 @@ CONFIG_CRYPTO_HMAC=y | |||
2140 | # CONFIG_CRYPTO_MD4 is not set | 2184 | # CONFIG_CRYPTO_MD4 is not set |
2141 | CONFIG_CRYPTO_MD5=y | 2185 | CONFIG_CRYPTO_MD5=y |
2142 | # CONFIG_CRYPTO_MICHAEL_MIC is not set | 2186 | # CONFIG_CRYPTO_MICHAEL_MIC is not set |
2187 | # CONFIG_CRYPTO_RMD128 is not set | ||
2188 | # CONFIG_CRYPTO_RMD160 is not set | ||
2189 | # CONFIG_CRYPTO_RMD256 is not set | ||
2190 | # CONFIG_CRYPTO_RMD320 is not set | ||
2143 | CONFIG_CRYPTO_SHA1=y | 2191 | CONFIG_CRYPTO_SHA1=y |
2144 | # CONFIG_CRYPTO_SHA256 is not set | 2192 | # CONFIG_CRYPTO_SHA256 is not set |
2145 | # CONFIG_CRYPTO_SHA512 is not set | 2193 | # CONFIG_CRYPTO_SHA512 is not set |
@@ -2150,7 +2198,7 @@ CONFIG_CRYPTO_SHA1=y | |||
2150 | # Ciphers | 2198 | # Ciphers |
2151 | # | 2199 | # |
2152 | CONFIG_CRYPTO_AES=y | 2200 | CONFIG_CRYPTO_AES=y |
2153 | # CONFIG_CRYPTO_AES_586 is not set | 2201 | CONFIG_CRYPTO_AES_586=y |
2154 | # CONFIG_CRYPTO_ANUBIS is not set | 2202 | # CONFIG_CRYPTO_ANUBIS is not set |
2155 | CONFIG_CRYPTO_ARC4=y | 2203 | CONFIG_CRYPTO_ARC4=y |
2156 | # CONFIG_CRYPTO_BLOWFISH is not set | 2204 | # CONFIG_CRYPTO_BLOWFISH is not set |
@@ -2192,6 +2240,7 @@ CONFIG_GENERIC_FIND_FIRST_BIT=y | |||
2192 | CONFIG_GENERIC_FIND_NEXT_BIT=y | 2240 | CONFIG_GENERIC_FIND_NEXT_BIT=y |
2193 | # CONFIG_CRC_CCITT is not set | 2241 | # CONFIG_CRC_CCITT is not set |
2194 | # CONFIG_CRC16 is not set | 2242 | # CONFIG_CRC16 is not set |
2243 | CONFIG_CRC_T10DIF=y | ||
2195 | # CONFIG_CRC_ITU_T is not set | 2244 | # CONFIG_CRC_ITU_T is not set |
2196 | CONFIG_CRC32=y | 2245 | CONFIG_CRC32=y |
2197 | # CONFIG_CRC7 is not set | 2246 | # CONFIG_CRC7 is not set |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 743e80392731..ab91941e5496 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -1,13 +1,13 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.26-rc1 | 3 | # Linux kernel version: 2.6.27-rc4 |
4 | # Sun May 4 19:59:57 2008 | 4 | # Mon Aug 25 14:40:46 2008 |
5 | # | 5 | # |
6 | CONFIG_64BIT=y | 6 | CONFIG_64BIT=y |
7 | # CONFIG_X86_32 is not set | 7 | # CONFIG_X86_32 is not set |
8 | CONFIG_X86_64=y | 8 | CONFIG_X86_64=y |
9 | CONFIG_X86=y | 9 | CONFIG_X86=y |
10 | CONFIG_DEFCONFIG_LIST="arch/x86/configs/x86_64_defconfig" | 10 | CONFIG_ARCH_DEFCONFIG="arch/x86/configs/x86_64_defconfig" |
11 | # CONFIG_GENERIC_LOCKBREAK is not set | 11 | # CONFIG_GENERIC_LOCKBREAK is not set |
12 | CONFIG_GENERIC_TIME=y | 12 | CONFIG_GENERIC_TIME=y |
13 | CONFIG_GENERIC_CMOS_UPDATE=y | 13 | CONFIG_GENERIC_CMOS_UPDATE=y |
@@ -53,6 +53,7 @@ CONFIG_X86_HT=y | |||
53 | CONFIG_X86_BIOS_REBOOT=y | 53 | CONFIG_X86_BIOS_REBOOT=y |
54 | CONFIG_X86_TRAMPOLINE=y | 54 | CONFIG_X86_TRAMPOLINE=y |
55 | # CONFIG_KTIME_SCALAR is not set | 55 | # CONFIG_KTIME_SCALAR is not set |
56 | CONFIG_DEFCONFIG_LIST="/lib/modules/$UNAME_RELEASE/.config" | ||
56 | 57 | ||
57 | # | 58 | # |
58 | # General setup | 59 | # General setup |
@@ -82,6 +83,7 @@ CONFIG_CGROUPS=y | |||
82 | CONFIG_CGROUP_NS=y | 83 | CONFIG_CGROUP_NS=y |
83 | # CONFIG_CGROUP_DEVICE is not set | 84 | # CONFIG_CGROUP_DEVICE is not set |
84 | CONFIG_CPUSETS=y | 85 | CONFIG_CPUSETS=y |
86 | CONFIG_HAVE_UNSTABLE_SCHED_CLOCK=y | ||
85 | CONFIG_GROUP_SCHED=y | 87 | CONFIG_GROUP_SCHED=y |
86 | CONFIG_FAIR_GROUP_SCHED=y | 88 | CONFIG_FAIR_GROUP_SCHED=y |
87 | # CONFIG_RT_GROUP_SCHED is not set | 89 | # CONFIG_RT_GROUP_SCHED is not set |
@@ -105,7 +107,6 @@ CONFIG_SYSCTL=y | |||
105 | # CONFIG_EMBEDDED is not set | 107 | # CONFIG_EMBEDDED is not set |
106 | CONFIG_UID16=y | 108 | CONFIG_UID16=y |
107 | CONFIG_SYSCTL_SYSCALL=y | 109 | CONFIG_SYSCTL_SYSCALL=y |
108 | CONFIG_SYSCTL_SYSCALL_CHECK=y | ||
109 | CONFIG_KALLSYMS=y | 110 | CONFIG_KALLSYMS=y |
110 | CONFIG_KALLSYMS_ALL=y | 111 | CONFIG_KALLSYMS_ALL=y |
111 | CONFIG_KALLSYMS_EXTRA_PASS=y | 112 | CONFIG_KALLSYMS_EXTRA_PASS=y |
@@ -113,6 +114,7 @@ CONFIG_HOTPLUG=y | |||
113 | CONFIG_PRINTK=y | 114 | CONFIG_PRINTK=y |
114 | CONFIG_BUG=y | 115 | CONFIG_BUG=y |
115 | CONFIG_ELF_CORE=y | 116 | CONFIG_ELF_CORE=y |
117 | CONFIG_PCSPKR_PLATFORM=y | ||
116 | # CONFIG_COMPAT_BRK is not set | 118 | # CONFIG_COMPAT_BRK is not set |
117 | CONFIG_BASE_FULL=y | 119 | CONFIG_BASE_FULL=y |
118 | CONFIG_FUTEX=y | 120 | CONFIG_FUTEX=y |
@@ -132,25 +134,33 @@ CONFIG_MARKERS=y | |||
132 | # CONFIG_OPROFILE is not set | 134 | # CONFIG_OPROFILE is not set |
133 | CONFIG_HAVE_OPROFILE=y | 135 | CONFIG_HAVE_OPROFILE=y |
134 | CONFIG_KPROBES=y | 136 | CONFIG_KPROBES=y |
137 | CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS=y | ||
135 | CONFIG_KRETPROBES=y | 138 | CONFIG_KRETPROBES=y |
139 | CONFIG_HAVE_IOREMAP_PROT=y | ||
136 | CONFIG_HAVE_KPROBES=y | 140 | CONFIG_HAVE_KPROBES=y |
137 | CONFIG_HAVE_KRETPROBES=y | 141 | CONFIG_HAVE_KRETPROBES=y |
142 | # CONFIG_HAVE_ARCH_TRACEHOOK is not set | ||
138 | # CONFIG_HAVE_DMA_ATTRS is not set | 143 | # CONFIG_HAVE_DMA_ATTRS is not set |
144 | CONFIG_USE_GENERIC_SMP_HELPERS=y | ||
145 | # CONFIG_HAVE_CLK is not set | ||
139 | CONFIG_PROC_PAGE_MONITOR=y | 146 | CONFIG_PROC_PAGE_MONITOR=y |
147 | # CONFIG_HAVE_GENERIC_DMA_COHERENT is not set | ||
140 | CONFIG_SLABINFO=y | 148 | CONFIG_SLABINFO=y |
141 | CONFIG_RT_MUTEXES=y | 149 | CONFIG_RT_MUTEXES=y |
142 | # CONFIG_TINY_SHMEM is not set | 150 | # CONFIG_TINY_SHMEM is not set |
143 | CONFIG_BASE_SMALL=0 | 151 | CONFIG_BASE_SMALL=0 |
144 | CONFIG_MODULES=y | 152 | CONFIG_MODULES=y |
153 | # CONFIG_MODULE_FORCE_LOAD is not set | ||
145 | CONFIG_MODULE_UNLOAD=y | 154 | CONFIG_MODULE_UNLOAD=y |
146 | CONFIG_MODULE_FORCE_UNLOAD=y | 155 | CONFIG_MODULE_FORCE_UNLOAD=y |
147 | # CONFIG_MODVERSIONS is not set | 156 | # CONFIG_MODVERSIONS is not set |
148 | # CONFIG_MODULE_SRCVERSION_ALL is not set | 157 | # CONFIG_MODULE_SRCVERSION_ALL is not set |
149 | # CONFIG_KMOD is not set | 158 | CONFIG_KMOD=y |
150 | CONFIG_STOP_MACHINE=y | 159 | CONFIG_STOP_MACHINE=y |
151 | CONFIG_BLOCK=y | 160 | CONFIG_BLOCK=y |
152 | CONFIG_BLK_DEV_IO_TRACE=y | 161 | CONFIG_BLK_DEV_IO_TRACE=y |
153 | CONFIG_BLK_DEV_BSG=y | 162 | CONFIG_BLK_DEV_BSG=y |
163 | # CONFIG_BLK_DEV_INTEGRITY is not set | ||
154 | CONFIG_BLOCK_COMPAT=y | 164 | CONFIG_BLOCK_COMPAT=y |
155 | 165 | ||
156 | # | 166 | # |
@@ -175,20 +185,15 @@ CONFIG_NO_HZ=y | |||
175 | CONFIG_HIGH_RES_TIMERS=y | 185 | CONFIG_HIGH_RES_TIMERS=y |
176 | CONFIG_GENERIC_CLOCKEVENTS_BUILD=y | 186 | CONFIG_GENERIC_CLOCKEVENTS_BUILD=y |
177 | CONFIG_SMP=y | 187 | CONFIG_SMP=y |
188 | CONFIG_X86_FIND_SMP_CONFIG=y | ||
189 | CONFIG_X86_MPPARSE=y | ||
178 | CONFIG_X86_PC=y | 190 | CONFIG_X86_PC=y |
179 | # CONFIG_X86_ELAN is not set | 191 | # CONFIG_X86_ELAN is not set |
180 | # CONFIG_X86_VOYAGER is not set | 192 | # CONFIG_X86_VOYAGER is not set |
181 | # CONFIG_X86_NUMAQ is not set | ||
182 | # CONFIG_X86_SUMMIT is not set | ||
183 | # CONFIG_X86_BIGSMP is not set | ||
184 | # CONFIG_X86_VISWS is not set | ||
185 | # CONFIG_X86_GENERICARCH is not set | 193 | # CONFIG_X86_GENERICARCH is not set |
186 | # CONFIG_X86_ES7000 is not set | ||
187 | # CONFIG_X86_RDC321X is not set | ||
188 | # CONFIG_X86_VSMP is not set | 194 | # CONFIG_X86_VSMP is not set |
189 | # CONFIG_PARAVIRT_GUEST is not set | 195 | # CONFIG_PARAVIRT_GUEST is not set |
190 | CONFIG_MEMTEST_BOOTPARAM=y | 196 | # CONFIG_MEMTEST is not set |
191 | CONFIG_MEMTEST_BOOTPARAM_VALUE=0 | ||
192 | # CONFIG_M386 is not set | 197 | # CONFIG_M386 is not set |
193 | # CONFIG_M486 is not set | 198 | # CONFIG_M486 is not set |
194 | # CONFIG_M586 is not set | 199 | # CONFIG_M586 is not set |
@@ -220,11 +225,12 @@ CONFIG_X86_L1_CACHE_BYTES=64 | |||
220 | CONFIG_X86_INTERNODE_CACHE_BYTES=64 | 225 | CONFIG_X86_INTERNODE_CACHE_BYTES=64 |
221 | CONFIG_X86_CMPXCHG=y | 226 | CONFIG_X86_CMPXCHG=y |
222 | CONFIG_X86_L1_CACHE_SHIFT=6 | 227 | CONFIG_X86_L1_CACHE_SHIFT=6 |
223 | CONFIG_X86_GOOD_APIC=y | 228 | CONFIG_X86_WP_WORKS_OK=y |
224 | CONFIG_X86_INTEL_USERCOPY=y | 229 | CONFIG_X86_INTEL_USERCOPY=y |
225 | CONFIG_X86_USE_PPRO_CHECKSUM=y | 230 | CONFIG_X86_USE_PPRO_CHECKSUM=y |
226 | CONFIG_X86_P6_NOP=y | 231 | CONFIG_X86_P6_NOP=y |
227 | CONFIG_X86_TSC=y | 232 | CONFIG_X86_TSC=y |
233 | CONFIG_X86_CMPXCHG64=y | ||
228 | CONFIG_X86_CMOV=y | 234 | CONFIG_X86_CMOV=y |
229 | CONFIG_X86_MINIMUM_CPU_FAMILY=64 | 235 | CONFIG_X86_MINIMUM_CPU_FAMILY=64 |
230 | CONFIG_X86_DEBUGCTLMSR=y | 236 | CONFIG_X86_DEBUGCTLMSR=y |
@@ -234,8 +240,10 @@ CONFIG_DMI=y | |||
234 | CONFIG_GART_IOMMU=y | 240 | CONFIG_GART_IOMMU=y |
235 | CONFIG_CALGARY_IOMMU=y | 241 | CONFIG_CALGARY_IOMMU=y |
236 | CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y | 242 | CONFIG_CALGARY_IOMMU_ENABLED_BY_DEFAULT=y |
243 | CONFIG_AMD_IOMMU=y | ||
237 | CONFIG_SWIOTLB=y | 244 | CONFIG_SWIOTLB=y |
238 | CONFIG_IOMMU_HELPER=y | 245 | CONFIG_IOMMU_HELPER=y |
246 | # CONFIG_MAXSMP is not set | ||
239 | CONFIG_NR_CPUS=4 | 247 | CONFIG_NR_CPUS=4 |
240 | # CONFIG_SCHED_SMT is not set | 248 | # CONFIG_SCHED_SMT is not set |
241 | CONFIG_SCHED_MC=y | 249 | CONFIG_SCHED_MC=y |
@@ -281,6 +289,7 @@ CONFIG_ZONE_DMA_FLAG=1 | |||
281 | CONFIG_BOUNCE=y | 289 | CONFIG_BOUNCE=y |
282 | CONFIG_VIRT_TO_BUS=y | 290 | CONFIG_VIRT_TO_BUS=y |
283 | CONFIG_MTRR=y | 291 | CONFIG_MTRR=y |
292 | # CONFIG_MTRR_SANITIZER is not set | ||
284 | # CONFIG_X86_PAT is not set | 293 | # CONFIG_X86_PAT is not set |
285 | CONFIG_EFI=y | 294 | CONFIG_EFI=y |
286 | CONFIG_SECCOMP=y | 295 | CONFIG_SECCOMP=y |
@@ -313,6 +322,7 @@ CONFIG_PM_TRACE_RTC=y | |||
313 | CONFIG_PM_SLEEP_SMP=y | 322 | CONFIG_PM_SLEEP_SMP=y |
314 | CONFIG_PM_SLEEP=y | 323 | CONFIG_PM_SLEEP=y |
315 | CONFIG_SUSPEND=y | 324 | CONFIG_SUSPEND=y |
325 | # CONFIG_PM_TEST_SUSPEND is not set | ||
316 | CONFIG_SUSPEND_FREEZER=y | 326 | CONFIG_SUSPEND_FREEZER=y |
317 | CONFIG_HIBERNATION=y | 327 | CONFIG_HIBERNATION=y |
318 | CONFIG_PM_STD_PARTITION="" | 328 | CONFIG_PM_STD_PARTITION="" |
@@ -339,6 +349,7 @@ CONFIG_ACPI_NUMA=y | |||
339 | CONFIG_ACPI_BLACKLIST_YEAR=0 | 349 | CONFIG_ACPI_BLACKLIST_YEAR=0 |
340 | # CONFIG_ACPI_DEBUG is not set | 350 | # CONFIG_ACPI_DEBUG is not set |
341 | CONFIG_ACPI_EC=y | 351 | CONFIG_ACPI_EC=y |
352 | # CONFIG_ACPI_PCI_SLOT is not set | ||
342 | CONFIG_ACPI_POWER=y | 353 | CONFIG_ACPI_POWER=y |
343 | CONFIG_ACPI_SYSTEM=y | 354 | CONFIG_ACPI_SYSTEM=y |
344 | CONFIG_X86_PM_TIMER=y | 355 | CONFIG_X86_PM_TIMER=y |
@@ -437,10 +448,6 @@ CONFIG_IA32_EMULATION=y | |||
437 | CONFIG_COMPAT=y | 448 | CONFIG_COMPAT=y |
438 | CONFIG_COMPAT_FOR_U64_ALIGNMENT=y | 449 | CONFIG_COMPAT_FOR_U64_ALIGNMENT=y |
439 | CONFIG_SYSVIPC_COMPAT=y | 450 | CONFIG_SYSVIPC_COMPAT=y |
440 | |||
441 | # | ||
442 | # Networking | ||
443 | # | ||
444 | CONFIG_NET=y | 451 | CONFIG_NET=y |
445 | 452 | ||
446 | # | 453 | # |
@@ -464,7 +471,10 @@ CONFIG_IP_FIB_HASH=y | |||
464 | CONFIG_IP_MULTIPLE_TABLES=y | 471 | CONFIG_IP_MULTIPLE_TABLES=y |
465 | CONFIG_IP_ROUTE_MULTIPATH=y | 472 | CONFIG_IP_ROUTE_MULTIPATH=y |
466 | CONFIG_IP_ROUTE_VERBOSE=y | 473 | CONFIG_IP_ROUTE_VERBOSE=y |
467 | # CONFIG_IP_PNP is not set | 474 | CONFIG_IP_PNP=y |
475 | CONFIG_IP_PNP_DHCP=y | ||
476 | CONFIG_IP_PNP_BOOTP=y | ||
477 | CONFIG_IP_PNP_RARP=y | ||
468 | # CONFIG_NET_IPIP is not set | 478 | # CONFIG_NET_IPIP is not set |
469 | # CONFIG_NET_IPGRE is not set | 479 | # CONFIG_NET_IPGRE is not set |
470 | CONFIG_IP_MROUTE=y | 480 | CONFIG_IP_MROUTE=y |
@@ -607,7 +617,6 @@ CONFIG_NET_SCHED=y | |||
607 | # CONFIG_NET_SCH_HTB is not set | 617 | # CONFIG_NET_SCH_HTB is not set |
608 | # CONFIG_NET_SCH_HFSC is not set | 618 | # CONFIG_NET_SCH_HFSC is not set |
609 | # CONFIG_NET_SCH_PRIO is not set | 619 | # CONFIG_NET_SCH_PRIO is not set |
610 | # CONFIG_NET_SCH_RR is not set | ||
611 | # CONFIG_NET_SCH_RED is not set | 620 | # CONFIG_NET_SCH_RED is not set |
612 | # CONFIG_NET_SCH_SFQ is not set | 621 | # CONFIG_NET_SCH_SFQ is not set |
613 | # CONFIG_NET_SCH_TEQL is not set | 622 | # CONFIG_NET_SCH_TEQL is not set |
@@ -669,28 +678,19 @@ CONFIG_FIB_RULES=y | |||
669 | CONFIG_CFG80211=y | 678 | CONFIG_CFG80211=y |
670 | CONFIG_NL80211=y | 679 | CONFIG_NL80211=y |
671 | CONFIG_WIRELESS_EXT=y | 680 | CONFIG_WIRELESS_EXT=y |
681 | CONFIG_WIRELESS_EXT_SYSFS=y | ||
672 | CONFIG_MAC80211=y | 682 | CONFIG_MAC80211=y |
673 | 683 | ||
674 | # | 684 | # |
675 | # Rate control algorithm selection | 685 | # Rate control algorithm selection |
676 | # | 686 | # |
687 | CONFIG_MAC80211_RC_PID=y | ||
677 | CONFIG_MAC80211_RC_DEFAULT_PID=y | 688 | CONFIG_MAC80211_RC_DEFAULT_PID=y |
678 | # CONFIG_MAC80211_RC_DEFAULT_NONE is not set | ||
679 | |||
680 | # | ||
681 | # Selecting 'y' for an algorithm will | ||
682 | # | ||
683 | |||
684 | # | ||
685 | # build the algorithm into mac80211. | ||
686 | # | ||
687 | CONFIG_MAC80211_RC_DEFAULT="pid" | 689 | CONFIG_MAC80211_RC_DEFAULT="pid" |
688 | CONFIG_MAC80211_RC_PID=y | ||
689 | # CONFIG_MAC80211_MESH is not set | 690 | # CONFIG_MAC80211_MESH is not set |
690 | CONFIG_MAC80211_LEDS=y | 691 | CONFIG_MAC80211_LEDS=y |
691 | # CONFIG_MAC80211_DEBUGFS is not set | 692 | # CONFIG_MAC80211_DEBUGFS is not set |
692 | # CONFIG_MAC80211_DEBUG_PACKET_ALIGNMENT is not set | 693 | # CONFIG_MAC80211_DEBUG_MENU is not set |
693 | # CONFIG_MAC80211_DEBUG is not set | ||
694 | # CONFIG_IEEE80211 is not set | 694 | # CONFIG_IEEE80211 is not set |
695 | # CONFIG_RFKILL is not set | 695 | # CONFIG_RFKILL is not set |
696 | # CONFIG_NET_9P is not set | 696 | # CONFIG_NET_9P is not set |
@@ -706,6 +706,8 @@ CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" | |||
706 | CONFIG_STANDALONE=y | 706 | CONFIG_STANDALONE=y |
707 | CONFIG_PREVENT_FIRMWARE_BUILD=y | 707 | CONFIG_PREVENT_FIRMWARE_BUILD=y |
708 | CONFIG_FW_LOADER=y | 708 | CONFIG_FW_LOADER=y |
709 | CONFIG_FIRMWARE_IN_KERNEL=y | ||
710 | CONFIG_EXTRA_FIRMWARE="" | ||
709 | # CONFIG_DEBUG_DRIVER is not set | 711 | # CONFIG_DEBUG_DRIVER is not set |
710 | CONFIG_DEBUG_DEVRES=y | 712 | CONFIG_DEBUG_DEVRES=y |
711 | # CONFIG_SYS_HYPERVISOR is not set | 713 | # CONFIG_SYS_HYPERVISOR is not set |
@@ -738,6 +740,7 @@ CONFIG_BLK_DEV_RAM_SIZE=16384 | |||
738 | # CONFIG_BLK_DEV_XIP is not set | 740 | # CONFIG_BLK_DEV_XIP is not set |
739 | # CONFIG_CDROM_PKTCDVD is not set | 741 | # CONFIG_CDROM_PKTCDVD is not set |
740 | # CONFIG_ATA_OVER_ETH is not set | 742 | # CONFIG_ATA_OVER_ETH is not set |
743 | # CONFIG_BLK_DEV_HD is not set | ||
741 | CONFIG_MISC_DEVICES=y | 744 | CONFIG_MISC_DEVICES=y |
742 | # CONFIG_IBM_ASM is not set | 745 | # CONFIG_IBM_ASM is not set |
743 | # CONFIG_PHANTOM is not set | 746 | # CONFIG_PHANTOM is not set |
@@ -748,10 +751,14 @@ CONFIG_MISC_DEVICES=y | |||
748 | # CONFIG_ASUS_LAPTOP is not set | 751 | # CONFIG_ASUS_LAPTOP is not set |
749 | # CONFIG_FUJITSU_LAPTOP is not set | 752 | # CONFIG_FUJITSU_LAPTOP is not set |
750 | # CONFIG_MSI_LAPTOP is not set | 753 | # CONFIG_MSI_LAPTOP is not set |
754 | # CONFIG_COMPAL_LAPTOP is not set | ||
751 | # CONFIG_SONY_LAPTOP is not set | 755 | # CONFIG_SONY_LAPTOP is not set |
752 | # CONFIG_THINKPAD_ACPI is not set | 756 | # CONFIG_THINKPAD_ACPI is not set |
753 | # CONFIG_INTEL_MENLOW is not set | 757 | # CONFIG_INTEL_MENLOW is not set |
754 | # CONFIG_ENCLOSURE_SERVICES is not set | 758 | # CONFIG_ENCLOSURE_SERVICES is not set |
759 | # CONFIG_SGI_XP is not set | ||
760 | # CONFIG_HP_ILO is not set | ||
761 | # CONFIG_SGI_GRU is not set | ||
755 | CONFIG_HAVE_IDE=y | 762 | CONFIG_HAVE_IDE=y |
756 | # CONFIG_IDE is not set | 763 | # CONFIG_IDE is not set |
757 | 764 | ||
@@ -790,12 +797,13 @@ CONFIG_SCSI_WAIT_SCAN=m | |||
790 | # | 797 | # |
791 | CONFIG_SCSI_SPI_ATTRS=y | 798 | CONFIG_SCSI_SPI_ATTRS=y |
792 | # CONFIG_SCSI_FC_ATTRS is not set | 799 | # CONFIG_SCSI_FC_ATTRS is not set |
793 | # CONFIG_SCSI_ISCSI_ATTRS is not set | 800 | CONFIG_SCSI_ISCSI_ATTRS=y |
794 | # CONFIG_SCSI_SAS_ATTRS is not set | 801 | # CONFIG_SCSI_SAS_ATTRS is not set |
795 | # CONFIG_SCSI_SAS_LIBSAS is not set | 802 | # CONFIG_SCSI_SAS_LIBSAS is not set |
796 | # CONFIG_SCSI_SRP_ATTRS is not set | 803 | # CONFIG_SCSI_SRP_ATTRS is not set |
797 | # CONFIG_SCSI_LOWLEVEL is not set | 804 | # CONFIG_SCSI_LOWLEVEL is not set |
798 | # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set | 805 | # CONFIG_SCSI_LOWLEVEL_PCMCIA is not set |
806 | # CONFIG_SCSI_DH is not set | ||
799 | CONFIG_ATA=y | 807 | CONFIG_ATA=y |
800 | # CONFIG_ATA_NONSTANDARD is not set | 808 | # CONFIG_ATA_NONSTANDARD is not set |
801 | CONFIG_ATA_ACPI=y | 809 | CONFIG_ATA_ACPI=y |
@@ -857,6 +865,7 @@ CONFIG_PATA_OLDPIIX=y | |||
857 | # CONFIG_PATA_SIS is not set | 865 | # CONFIG_PATA_SIS is not set |
858 | # CONFIG_PATA_VIA is not set | 866 | # CONFIG_PATA_VIA is not set |
859 | # CONFIG_PATA_WINBOND is not set | 867 | # CONFIG_PATA_WINBOND is not set |
868 | CONFIG_PATA_SCH=y | ||
860 | CONFIG_MD=y | 869 | CONFIG_MD=y |
861 | CONFIG_BLK_DEV_MD=y | 870 | CONFIG_BLK_DEV_MD=y |
862 | # CONFIG_MD_LINEAR is not set | 871 | # CONFIG_MD_LINEAR is not set |
@@ -880,13 +889,16 @@ CONFIG_DM_ZERO=y | |||
880 | # | 889 | # |
881 | # IEEE 1394 (FireWire) support | 890 | # IEEE 1394 (FireWire) support |
882 | # | 891 | # |
892 | |||
893 | # | ||
894 | # Enable only one of the two stacks, unless you know what you are doing | ||
895 | # | ||
883 | # CONFIG_FIREWIRE is not set | 896 | # CONFIG_FIREWIRE is not set |
884 | # CONFIG_IEEE1394 is not set | 897 | # CONFIG_IEEE1394 is not set |
885 | # CONFIG_I2O is not set | 898 | # CONFIG_I2O is not set |
886 | CONFIG_MACINTOSH_DRIVERS=y | 899 | CONFIG_MACINTOSH_DRIVERS=y |
887 | CONFIG_MAC_EMUMOUSEBTN=y | 900 | CONFIG_MAC_EMUMOUSEBTN=y |
888 | CONFIG_NETDEVICES=y | 901 | CONFIG_NETDEVICES=y |
889 | # CONFIG_NETDEVICES_MULTIQUEUE is not set | ||
890 | # CONFIG_IFB is not set | 902 | # CONFIG_IFB is not set |
891 | # CONFIG_DUMMY is not set | 903 | # CONFIG_DUMMY is not set |
892 | # CONFIG_BONDING is not set | 904 | # CONFIG_BONDING is not set |
@@ -896,7 +908,23 @@ CONFIG_NETDEVICES=y | |||
896 | # CONFIG_VETH is not set | 908 | # CONFIG_VETH is not set |
897 | # CONFIG_NET_SB1000 is not set | 909 | # CONFIG_NET_SB1000 is not set |
898 | # CONFIG_ARCNET is not set | 910 | # CONFIG_ARCNET is not set |
899 | # CONFIG_PHYLIB is not set | 911 | CONFIG_PHYLIB=y |
912 | |||
913 | # | ||
914 | # MII PHY device drivers | ||
915 | # | ||
916 | # CONFIG_MARVELL_PHY is not set | ||
917 | # CONFIG_DAVICOM_PHY is not set | ||
918 | # CONFIG_QSEMI_PHY is not set | ||
919 | # CONFIG_LXT_PHY is not set | ||
920 | # CONFIG_CICADA_PHY is not set | ||
921 | # CONFIG_VITESSE_PHY is not set | ||
922 | # CONFIG_SMSC_PHY is not set | ||
923 | # CONFIG_BROADCOM_PHY is not set | ||
924 | # CONFIG_ICPLUS_PHY is not set | ||
925 | # CONFIG_REALTEK_PHY is not set | ||
926 | # CONFIG_FIXED_PHY is not set | ||
927 | # CONFIG_MDIO_BITBANG is not set | ||
900 | CONFIG_NET_ETHERNET=y | 928 | CONFIG_NET_ETHERNET=y |
901 | CONFIG_MII=y | 929 | CONFIG_MII=y |
902 | # CONFIG_HAPPYMEAL is not set | 930 | # CONFIG_HAPPYMEAL is not set |
@@ -940,16 +968,15 @@ CONFIG_8139TOO_PIO=y | |||
940 | # CONFIG_SIS900 is not set | 968 | # CONFIG_SIS900 is not set |
941 | # CONFIG_EPIC100 is not set | 969 | # CONFIG_EPIC100 is not set |
942 | # CONFIG_SUNDANCE is not set | 970 | # CONFIG_SUNDANCE is not set |
971 | # CONFIG_TLAN is not set | ||
943 | # CONFIG_VIA_RHINE is not set | 972 | # CONFIG_VIA_RHINE is not set |
944 | # CONFIG_SC92031 is not set | 973 | # CONFIG_SC92031 is not set |
945 | CONFIG_NETDEV_1000=y | 974 | CONFIG_NETDEV_1000=y |
946 | # CONFIG_ACENIC is not set | 975 | # CONFIG_ACENIC is not set |
947 | # CONFIG_DL2K is not set | 976 | # CONFIG_DL2K is not set |
948 | CONFIG_E1000=y | 977 | CONFIG_E1000=y |
949 | # CONFIG_E1000_NAPI is not set | ||
950 | # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set | 978 | # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set |
951 | # CONFIG_E1000E is not set | 979 | # CONFIG_E1000E is not set |
952 | # CONFIG_E1000E_ENABLED is not set | ||
953 | # CONFIG_IP1000 is not set | 980 | # CONFIG_IP1000 is not set |
954 | # CONFIG_IGB is not set | 981 | # CONFIG_IGB is not set |
955 | # CONFIG_NS83820 is not set | 982 | # CONFIG_NS83820 is not set |
@@ -965,6 +992,7 @@ CONFIG_TIGON3=y | |||
965 | # CONFIG_BNX2 is not set | 992 | # CONFIG_BNX2 is not set |
966 | # CONFIG_QLA3XXX is not set | 993 | # CONFIG_QLA3XXX is not set |
967 | # CONFIG_ATL1 is not set | 994 | # CONFIG_ATL1 is not set |
995 | # CONFIG_ATL1E is not set | ||
968 | CONFIG_NETDEV_10000=y | 996 | CONFIG_NETDEV_10000=y |
969 | # CONFIG_CHELSIO_T1 is not set | 997 | # CONFIG_CHELSIO_T1 is not set |
970 | # CONFIG_CHELSIO_T3 is not set | 998 | # CONFIG_CHELSIO_T3 is not set |
@@ -1003,13 +1031,14 @@ CONFIG_WLAN_80211=y | |||
1003 | # CONFIG_RTL8180 is not set | 1031 | # CONFIG_RTL8180 is not set |
1004 | # CONFIG_RTL8187 is not set | 1032 | # CONFIG_RTL8187 is not set |
1005 | # CONFIG_ADM8211 is not set | 1033 | # CONFIG_ADM8211 is not set |
1034 | # CONFIG_MAC80211_HWSIM is not set | ||
1006 | # CONFIG_P54_COMMON is not set | 1035 | # CONFIG_P54_COMMON is not set |
1007 | CONFIG_ATH5K=y | 1036 | CONFIG_ATH5K=y |
1008 | # CONFIG_ATH5K_DEBUG is not set | 1037 | # CONFIG_ATH5K_DEBUG is not set |
1009 | # CONFIG_IWLWIFI is not set | 1038 | # CONFIG_ATH9K is not set |
1010 | # CONFIG_IWLCORE is not set | 1039 | # CONFIG_IWLCORE is not set |
1011 | # CONFIG_IWLWIFI_LEDS is not set | 1040 | # CONFIG_IWLWIFI_LEDS is not set |
1012 | # CONFIG_IWL4965 is not set | 1041 | # CONFIG_IWLAGN is not set |
1013 | # CONFIG_IWL3945 is not set | 1042 | # CONFIG_IWL3945 is not set |
1014 | # CONFIG_HOSTAP is not set | 1043 | # CONFIG_HOSTAP is not set |
1015 | # CONFIG_B43 is not set | 1044 | # CONFIG_B43 is not set |
@@ -1088,6 +1117,7 @@ CONFIG_MOUSE_PS2_TRACKPOINT=y | |||
1088 | # CONFIG_MOUSE_PS2_TOUCHKIT is not set | 1117 | # CONFIG_MOUSE_PS2_TOUCHKIT is not set |
1089 | # CONFIG_MOUSE_SERIAL is not set | 1118 | # CONFIG_MOUSE_SERIAL is not set |
1090 | # CONFIG_MOUSE_APPLETOUCH is not set | 1119 | # CONFIG_MOUSE_APPLETOUCH is not set |
1120 | # CONFIG_MOUSE_BCM5974 is not set | ||
1091 | # CONFIG_MOUSE_VSXXXAA is not set | 1121 | # CONFIG_MOUSE_VSXXXAA is not set |
1092 | CONFIG_INPUT_JOYSTICK=y | 1122 | CONFIG_INPUT_JOYSTICK=y |
1093 | # CONFIG_JOYSTICK_ANALOG is not set | 1123 | # CONFIG_JOYSTICK_ANALOG is not set |
@@ -1122,12 +1152,14 @@ CONFIG_INPUT_TOUCHSCREEN=y | |||
1122 | # CONFIG_TOUCHSCREEN_GUNZE is not set | 1152 | # CONFIG_TOUCHSCREEN_GUNZE is not set |
1123 | # CONFIG_TOUCHSCREEN_ELO is not set | 1153 | # CONFIG_TOUCHSCREEN_ELO is not set |
1124 | # CONFIG_TOUCHSCREEN_MTOUCH is not set | 1154 | # CONFIG_TOUCHSCREEN_MTOUCH is not set |
1155 | # CONFIG_TOUCHSCREEN_INEXIO is not set | ||
1125 | # CONFIG_TOUCHSCREEN_MK712 is not set | 1156 | # CONFIG_TOUCHSCREEN_MK712 is not set |
1126 | # CONFIG_TOUCHSCREEN_PENMOUNT is not set | 1157 | # CONFIG_TOUCHSCREEN_PENMOUNT is not set |
1127 | # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set | 1158 | # CONFIG_TOUCHSCREEN_TOUCHRIGHT is not set |
1128 | # CONFIG_TOUCHSCREEN_TOUCHWIN is not set | 1159 | # CONFIG_TOUCHSCREEN_TOUCHWIN is not set |
1129 | # CONFIG_TOUCHSCREEN_UCB1400 is not set | 1160 | # CONFIG_TOUCHSCREEN_UCB1400 is not set |
1130 | # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set | 1161 | # CONFIG_TOUCHSCREEN_USB_COMPOSITE is not set |
1162 | # CONFIG_TOUCHSCREEN_TOUCHIT213 is not set | ||
1131 | CONFIG_INPUT_MISC=y | 1163 | CONFIG_INPUT_MISC=y |
1132 | # CONFIG_INPUT_PCSPKR is not set | 1164 | # CONFIG_INPUT_PCSPKR is not set |
1133 | # CONFIG_INPUT_APANEL is not set | 1165 | # CONFIG_INPUT_APANEL is not set |
@@ -1155,6 +1187,7 @@ CONFIG_SERIO_LIBPS2=y | |||
1155 | # Character devices | 1187 | # Character devices |
1156 | # | 1188 | # |
1157 | CONFIG_VT=y | 1189 | CONFIG_VT=y |
1190 | CONFIG_CONSOLE_TRANSLATIONS=y | ||
1158 | CONFIG_VT_CONSOLE=y | 1191 | CONFIG_VT_CONSOLE=y |
1159 | CONFIG_HW_CONSOLE=y | 1192 | CONFIG_HW_CONSOLE=y |
1160 | CONFIG_VT_HW_CONSOLE_BINDING=y | 1193 | CONFIG_VT_HW_CONSOLE_BINDING=y |
@@ -1222,7 +1255,6 @@ CONFIG_NVRAM=y | |||
1222 | # CONFIG_PC8736x_GPIO is not set | 1255 | # CONFIG_PC8736x_GPIO is not set |
1223 | # CONFIG_RAW_DRIVER is not set | 1256 | # CONFIG_RAW_DRIVER is not set |
1224 | CONFIG_HPET=y | 1257 | CONFIG_HPET=y |
1225 | # CONFIG_HPET_RTC_IRQ is not set | ||
1226 | # CONFIG_HPET_MMAP is not set | 1258 | # CONFIG_HPET_MMAP is not set |
1227 | # CONFIG_HANGCHECK_TIMER is not set | 1259 | # CONFIG_HANGCHECK_TIMER is not set |
1228 | # CONFIG_TCG_TPM is not set | 1260 | # CONFIG_TCG_TPM is not set |
@@ -1231,42 +1263,63 @@ CONFIG_DEVPORT=y | |||
1231 | CONFIG_I2C=y | 1263 | CONFIG_I2C=y |
1232 | CONFIG_I2C_BOARDINFO=y | 1264 | CONFIG_I2C_BOARDINFO=y |
1233 | # CONFIG_I2C_CHARDEV is not set | 1265 | # CONFIG_I2C_CHARDEV is not set |
1266 | CONFIG_I2C_HELPER_AUTO=y | ||
1234 | 1267 | ||
1235 | # | 1268 | # |
1236 | # I2C Hardware Bus support | 1269 | # I2C Hardware Bus support |
1237 | # | 1270 | # |
1271 | |||
1272 | # | ||
1273 | # PC SMBus host controller drivers | ||
1274 | # | ||
1238 | # CONFIG_I2C_ALI1535 is not set | 1275 | # CONFIG_I2C_ALI1535 is not set |
1239 | # CONFIG_I2C_ALI1563 is not set | 1276 | # CONFIG_I2C_ALI1563 is not set |
1240 | # CONFIG_I2C_ALI15X3 is not set | 1277 | # CONFIG_I2C_ALI15X3 is not set |
1241 | # CONFIG_I2C_AMD756 is not set | 1278 | # CONFIG_I2C_AMD756 is not set |
1242 | # CONFIG_I2C_AMD8111 is not set | 1279 | # CONFIG_I2C_AMD8111 is not set |
1243 | CONFIG_I2C_I801=y | 1280 | CONFIG_I2C_I801=y |
1244 | # CONFIG_I2C_I810 is not set | 1281 | # CONFIG_I2C_ISCH is not set |
1245 | # CONFIG_I2C_PIIX4 is not set | 1282 | # CONFIG_I2C_PIIX4 is not set |
1246 | # CONFIG_I2C_NFORCE2 is not set | 1283 | # CONFIG_I2C_NFORCE2 is not set |
1247 | # CONFIG_I2C_OCORES is not set | ||
1248 | # CONFIG_I2C_PARPORT_LIGHT is not set | ||
1249 | # CONFIG_I2C_PROSAVAGE is not set | ||
1250 | # CONFIG_I2C_SAVAGE4 is not set | ||
1251 | # CONFIG_I2C_SIMTEC is not set | ||
1252 | # CONFIG_I2C_SIS5595 is not set | 1284 | # CONFIG_I2C_SIS5595 is not set |
1253 | # CONFIG_I2C_SIS630 is not set | 1285 | # CONFIG_I2C_SIS630 is not set |
1254 | # CONFIG_I2C_SIS96X is not set | 1286 | # CONFIG_I2C_SIS96X is not set |
1255 | # CONFIG_I2C_TAOS_EVM is not set | ||
1256 | # CONFIG_I2C_STUB is not set | ||
1257 | # CONFIG_I2C_TINY_USB is not set | ||
1258 | # CONFIG_I2C_VIA is not set | 1287 | # CONFIG_I2C_VIA is not set |
1259 | # CONFIG_I2C_VIAPRO is not set | 1288 | # CONFIG_I2C_VIAPRO is not set |
1289 | |||
1290 | # | ||
1291 | # I2C system bus drivers (mostly embedded / system-on-chip) | ||
1292 | # | ||
1293 | # CONFIG_I2C_OCORES is not set | ||
1294 | # CONFIG_I2C_SIMTEC is not set | ||
1295 | |||
1296 | # | ||
1297 | # External I2C/SMBus adapter drivers | ||
1298 | # | ||
1299 | # CONFIG_I2C_PARPORT_LIGHT is not set | ||
1300 | # CONFIG_I2C_TAOS_EVM is not set | ||
1301 | # CONFIG_I2C_TINY_USB is not set | ||
1302 | |||
1303 | # | ||
1304 | # Graphics adapter I2C/DDC channel drivers | ||
1305 | # | ||
1260 | # CONFIG_I2C_VOODOO3 is not set | 1306 | # CONFIG_I2C_VOODOO3 is not set |
1307 | |||
1308 | # | ||
1309 | # Other I2C/SMBus bus drivers | ||
1310 | # | ||
1261 | # CONFIG_I2C_PCA_PLATFORM is not set | 1311 | # CONFIG_I2C_PCA_PLATFORM is not set |
1312 | # CONFIG_I2C_STUB is not set | ||
1262 | 1313 | ||
1263 | # | 1314 | # |
1264 | # Miscellaneous I2C Chip support | 1315 | # Miscellaneous I2C Chip support |
1265 | # | 1316 | # |
1266 | # CONFIG_DS1682 is not set | 1317 | # CONFIG_DS1682 is not set |
1318 | # CONFIG_AT24 is not set | ||
1267 | # CONFIG_SENSORS_EEPROM is not set | 1319 | # CONFIG_SENSORS_EEPROM is not set |
1268 | # CONFIG_SENSORS_PCF8574 is not set | 1320 | # CONFIG_SENSORS_PCF8574 is not set |
1269 | # CONFIG_PCF8575 is not set | 1321 | # CONFIG_PCF8575 is not set |
1322 | # CONFIG_SENSORS_PCA9539 is not set | ||
1270 | # CONFIG_SENSORS_PCF8591 is not set | 1323 | # CONFIG_SENSORS_PCF8591 is not set |
1271 | # CONFIG_SENSORS_MAX6875 is not set | 1324 | # CONFIG_SENSORS_MAX6875 is not set |
1272 | # CONFIG_SENSORS_TSL2550 is not set | 1325 | # CONFIG_SENSORS_TSL2550 is not set |
@@ -1275,6 +1328,8 @@ CONFIG_I2C_I801=y | |||
1275 | # CONFIG_I2C_DEBUG_BUS is not set | 1328 | # CONFIG_I2C_DEBUG_BUS is not set |
1276 | # CONFIG_I2C_DEBUG_CHIP is not set | 1329 | # CONFIG_I2C_DEBUG_CHIP is not set |
1277 | # CONFIG_SPI is not set | 1330 | # CONFIG_SPI is not set |
1331 | CONFIG_ARCH_WANT_OPTIONAL_GPIOLIB=y | ||
1332 | # CONFIG_GPIOLIB is not set | ||
1278 | # CONFIG_W1 is not set | 1333 | # CONFIG_W1 is not set |
1279 | CONFIG_POWER_SUPPLY=y | 1334 | CONFIG_POWER_SUPPLY=y |
1280 | # CONFIG_POWER_SUPPLY_DEBUG is not set | 1335 | # CONFIG_POWER_SUPPLY_DEBUG is not set |
@@ -1335,8 +1390,10 @@ CONFIG_SSB_POSSIBLE=y | |||
1335 | # | 1390 | # |
1336 | # Multifunction device drivers | 1391 | # Multifunction device drivers |
1337 | # | 1392 | # |
1393 | # CONFIG_MFD_CORE is not set | ||
1338 | # CONFIG_MFD_SM501 is not set | 1394 | # CONFIG_MFD_SM501 is not set |
1339 | # CONFIG_HTC_PASIC3 is not set | 1395 | # CONFIG_HTC_PASIC3 is not set |
1396 | # CONFIG_MFD_TMIO is not set | ||
1340 | 1397 | ||
1341 | # | 1398 | # |
1342 | # Multimedia devices | 1399 | # Multimedia devices |
@@ -1347,6 +1404,7 @@ CONFIG_SSB_POSSIBLE=y | |||
1347 | # | 1404 | # |
1348 | # CONFIG_VIDEO_DEV is not set | 1405 | # CONFIG_VIDEO_DEV is not set |
1349 | # CONFIG_DVB_CORE is not set | 1406 | # CONFIG_DVB_CORE is not set |
1407 | # CONFIG_VIDEO_MEDIA is not set | ||
1350 | 1408 | ||
1351 | # | 1409 | # |
1352 | # Multimedia drivers | 1410 | # Multimedia drivers |
@@ -1387,7 +1445,6 @@ CONFIG_FB_CFB_IMAGEBLIT=y | |||
1387 | # CONFIG_FB_SYS_IMAGEBLIT is not set | 1445 | # CONFIG_FB_SYS_IMAGEBLIT is not set |
1388 | # CONFIG_FB_FOREIGN_ENDIAN is not set | 1446 | # CONFIG_FB_FOREIGN_ENDIAN is not set |
1389 | # CONFIG_FB_SYS_FOPS is not set | 1447 | # CONFIG_FB_SYS_FOPS is not set |
1390 | CONFIG_FB_DEFERRED_IO=y | ||
1391 | # CONFIG_FB_SVGALIB is not set | 1448 | # CONFIG_FB_SVGALIB is not set |
1392 | # CONFIG_FB_MACMODES is not set | 1449 | # CONFIG_FB_MACMODES is not set |
1393 | # CONFIG_FB_BACKLIGHT is not set | 1450 | # CONFIG_FB_BACKLIGHT is not set |
@@ -1430,6 +1487,7 @@ CONFIG_FB_EFI=y | |||
1430 | # CONFIG_FB_TRIDENT is not set | 1487 | # CONFIG_FB_TRIDENT is not set |
1431 | # CONFIG_FB_ARK is not set | 1488 | # CONFIG_FB_ARK is not set |
1432 | # CONFIG_FB_PM3 is not set | 1489 | # CONFIG_FB_PM3 is not set |
1490 | # CONFIG_FB_CARMINE is not set | ||
1433 | # CONFIG_FB_GEODE is not set | 1491 | # CONFIG_FB_GEODE is not set |
1434 | # CONFIG_FB_VIRTUAL is not set | 1492 | # CONFIG_FB_VIRTUAL is not set |
1435 | CONFIG_BACKLIGHT_LCD_SUPPORT=y | 1493 | CONFIG_BACKLIGHT_LCD_SUPPORT=y |
@@ -1437,6 +1495,7 @@ CONFIG_BACKLIGHT_LCD_SUPPORT=y | |||
1437 | CONFIG_BACKLIGHT_CLASS_DEVICE=y | 1495 | CONFIG_BACKLIGHT_CLASS_DEVICE=y |
1438 | # CONFIG_BACKLIGHT_CORGI is not set | 1496 | # CONFIG_BACKLIGHT_CORGI is not set |
1439 | # CONFIG_BACKLIGHT_PROGEAR is not set | 1497 | # CONFIG_BACKLIGHT_PROGEAR is not set |
1498 | # CONFIG_BACKLIGHT_MBP_NVIDIA is not set | ||
1440 | 1499 | ||
1441 | # | 1500 | # |
1442 | # Display device support | 1501 | # Display device support |
@@ -1455,15 +1514,7 @@ CONFIG_LOGO=y | |||
1455 | # CONFIG_LOGO_LINUX_MONO is not set | 1514 | # CONFIG_LOGO_LINUX_MONO is not set |
1456 | # CONFIG_LOGO_LINUX_VGA16 is not set | 1515 | # CONFIG_LOGO_LINUX_VGA16 is not set |
1457 | CONFIG_LOGO_LINUX_CLUT224=y | 1516 | CONFIG_LOGO_LINUX_CLUT224=y |
1458 | |||
1459 | # | ||
1460 | # Sound | ||
1461 | # | ||
1462 | CONFIG_SOUND=y | 1517 | CONFIG_SOUND=y |
1463 | |||
1464 | # | ||
1465 | # Advanced Linux Sound Architecture | ||
1466 | # | ||
1467 | CONFIG_SND=y | 1518 | CONFIG_SND=y |
1468 | CONFIG_SND_TIMER=y | 1519 | CONFIG_SND_TIMER=y |
1469 | CONFIG_SND_PCM=y | 1520 | CONFIG_SND_PCM=y |
@@ -1481,20 +1532,14 @@ CONFIG_SND_VERBOSE_PROCFS=y | |||
1481 | # CONFIG_SND_VERBOSE_PRINTK is not set | 1532 | # CONFIG_SND_VERBOSE_PRINTK is not set |
1482 | # CONFIG_SND_DEBUG is not set | 1533 | # CONFIG_SND_DEBUG is not set |
1483 | CONFIG_SND_VMASTER=y | 1534 | CONFIG_SND_VMASTER=y |
1484 | 1535 | CONFIG_SND_DRIVERS=y | |
1485 | # | ||
1486 | # Generic devices | ||
1487 | # | ||
1488 | # CONFIG_SND_PCSP is not set | 1536 | # CONFIG_SND_PCSP is not set |
1489 | # CONFIG_SND_DUMMY is not set | 1537 | # CONFIG_SND_DUMMY is not set |
1490 | # CONFIG_SND_VIRMIDI is not set | 1538 | # CONFIG_SND_VIRMIDI is not set |
1491 | # CONFIG_SND_MTPAV is not set | 1539 | # CONFIG_SND_MTPAV is not set |
1492 | # CONFIG_SND_SERIAL_U16550 is not set | 1540 | # CONFIG_SND_SERIAL_U16550 is not set |
1493 | # CONFIG_SND_MPU401 is not set | 1541 | # CONFIG_SND_MPU401 is not set |
1494 | 1542 | CONFIG_SND_PCI=y | |
1495 | # | ||
1496 | # PCI devices | ||
1497 | # | ||
1498 | # CONFIG_SND_AD1889 is not set | 1543 | # CONFIG_SND_AD1889 is not set |
1499 | # CONFIG_SND_ALS300 is not set | 1544 | # CONFIG_SND_ALS300 is not set |
1500 | # CONFIG_SND_ALS4000 is not set | 1545 | # CONFIG_SND_ALS4000 is not set |
@@ -1567,36 +1612,14 @@ CONFIG_SND_HDA_GENERIC=y | |||
1567 | # CONFIG_SND_VIRTUOSO is not set | 1612 | # CONFIG_SND_VIRTUOSO is not set |
1568 | # CONFIG_SND_VX222 is not set | 1613 | # CONFIG_SND_VX222 is not set |
1569 | # CONFIG_SND_YMFPCI is not set | 1614 | # CONFIG_SND_YMFPCI is not set |
1570 | 1615 | CONFIG_SND_USB=y | |
1571 | # | ||
1572 | # USB devices | ||
1573 | # | ||
1574 | # CONFIG_SND_USB_AUDIO is not set | 1616 | # CONFIG_SND_USB_AUDIO is not set |
1575 | # CONFIG_SND_USB_USX2Y is not set | 1617 | # CONFIG_SND_USB_USX2Y is not set |
1576 | # CONFIG_SND_USB_CAIAQ is not set | 1618 | # CONFIG_SND_USB_CAIAQ is not set |
1577 | 1619 | CONFIG_SND_PCMCIA=y | |
1578 | # | ||
1579 | # PCMCIA devices | ||
1580 | # | ||
1581 | # CONFIG_SND_VXPOCKET is not set | 1620 | # CONFIG_SND_VXPOCKET is not set |
1582 | # CONFIG_SND_PDAUDIOCF is not set | 1621 | # CONFIG_SND_PDAUDIOCF is not set |
1583 | |||
1584 | # | ||
1585 | # System on Chip audio support | ||
1586 | # | ||
1587 | # CONFIG_SND_SOC is not set | 1622 | # CONFIG_SND_SOC is not set |
1588 | |||
1589 | # | ||
1590 | # ALSA SoC audio for Freescale SOCs | ||
1591 | # | ||
1592 | |||
1593 | # | ||
1594 | # SoC Audio for the Texas Instruments OMAP | ||
1595 | # | ||
1596 | |||
1597 | # | ||
1598 | # Open Sound System | ||
1599 | # | ||
1600 | # CONFIG_SOUND_PRIME is not set | 1623 | # CONFIG_SOUND_PRIME is not set |
1601 | CONFIG_HID_SUPPORT=y | 1624 | CONFIG_HID_SUPPORT=y |
1602 | CONFIG_HID=y | 1625 | CONFIG_HID=y |
@@ -1632,6 +1655,7 @@ CONFIG_USB_DEVICEFS=y | |||
1632 | # CONFIG_USB_DYNAMIC_MINORS is not set | 1655 | # CONFIG_USB_DYNAMIC_MINORS is not set |
1633 | CONFIG_USB_SUSPEND=y | 1656 | CONFIG_USB_SUSPEND=y |
1634 | # CONFIG_USB_OTG is not set | 1657 | # CONFIG_USB_OTG is not set |
1658 | CONFIG_USB_MON=y | ||
1635 | 1659 | ||
1636 | # | 1660 | # |
1637 | # USB Host Controller Drivers | 1661 | # USB Host Controller Drivers |
@@ -1655,6 +1679,7 @@ CONFIG_USB_UHCI_HCD=y | |||
1655 | # | 1679 | # |
1656 | # CONFIG_USB_ACM is not set | 1680 | # CONFIG_USB_ACM is not set |
1657 | CONFIG_USB_PRINTER=y | 1681 | CONFIG_USB_PRINTER=y |
1682 | # CONFIG_USB_WDM is not set | ||
1658 | 1683 | ||
1659 | # | 1684 | # |
1660 | # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' | 1685 | # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' |
@@ -1676,6 +1701,7 @@ CONFIG_USB_STORAGE=y | |||
1676 | # CONFIG_USB_STORAGE_ALAUDA is not set | 1701 | # CONFIG_USB_STORAGE_ALAUDA is not set |
1677 | # CONFIG_USB_STORAGE_ONETOUCH is not set | 1702 | # CONFIG_USB_STORAGE_ONETOUCH is not set |
1678 | # CONFIG_USB_STORAGE_KARMA is not set | 1703 | # CONFIG_USB_STORAGE_KARMA is not set |
1704 | # CONFIG_USB_STORAGE_SIERRA is not set | ||
1679 | # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set | 1705 | # CONFIG_USB_STORAGE_CYPRESS_ATACB is not set |
1680 | CONFIG_USB_LIBUSUAL=y | 1706 | CONFIG_USB_LIBUSUAL=y |
1681 | 1707 | ||
@@ -1684,7 +1710,6 @@ CONFIG_USB_LIBUSUAL=y | |||
1684 | # | 1710 | # |
1685 | # CONFIG_USB_MDC800 is not set | 1711 | # CONFIG_USB_MDC800 is not set |
1686 | # CONFIG_USB_MICROTEK is not set | 1712 | # CONFIG_USB_MICROTEK is not set |
1687 | CONFIG_USB_MON=y | ||
1688 | 1713 | ||
1689 | # | 1714 | # |
1690 | # USB port drivers | 1715 | # USB port drivers |
@@ -1697,7 +1722,6 @@ CONFIG_USB_MON=y | |||
1697 | # CONFIG_USB_EMI62 is not set | 1722 | # CONFIG_USB_EMI62 is not set |
1698 | # CONFIG_USB_EMI26 is not set | 1723 | # CONFIG_USB_EMI26 is not set |
1699 | # CONFIG_USB_ADUTUX is not set | 1724 | # CONFIG_USB_ADUTUX is not set |
1700 | # CONFIG_USB_AUERSWALD is not set | ||
1701 | # CONFIG_USB_RIO500 is not set | 1725 | # CONFIG_USB_RIO500 is not set |
1702 | # CONFIG_USB_LEGOTOWER is not set | 1726 | # CONFIG_USB_LEGOTOWER is not set |
1703 | # CONFIG_USB_LCD is not set | 1727 | # CONFIG_USB_LCD is not set |
@@ -1714,6 +1738,7 @@ CONFIG_USB_MON=y | |||
1714 | # CONFIG_USB_TRANCEVIBRATOR is not set | 1738 | # CONFIG_USB_TRANCEVIBRATOR is not set |
1715 | # CONFIG_USB_IOWARRIOR is not set | 1739 | # CONFIG_USB_IOWARRIOR is not set |
1716 | # CONFIG_USB_TEST is not set | 1740 | # CONFIG_USB_TEST is not set |
1741 | # CONFIG_USB_ISIGHTFW is not set | ||
1717 | # CONFIG_USB_GADGET is not set | 1742 | # CONFIG_USB_GADGET is not set |
1718 | # CONFIG_MMC is not set | 1743 | # CONFIG_MMC is not set |
1719 | # CONFIG_MEMSTICK is not set | 1744 | # CONFIG_MEMSTICK is not set |
@@ -1723,7 +1748,9 @@ CONFIG_LEDS_CLASS=y | |||
1723 | # | 1748 | # |
1724 | # LED drivers | 1749 | # LED drivers |
1725 | # | 1750 | # |
1751 | # CONFIG_LEDS_PCA9532 is not set | ||
1726 | # CONFIG_LEDS_CLEVO_MAIL is not set | 1752 | # CONFIG_LEDS_CLEVO_MAIL is not set |
1753 | # CONFIG_LEDS_PCA955X is not set | ||
1727 | 1754 | ||
1728 | # | 1755 | # |
1729 | # LED Triggers | 1756 | # LED Triggers |
@@ -1769,6 +1796,7 @@ CONFIG_RTC_INTF_DEV=y | |||
1769 | # CONFIG_RTC_DRV_PCF8583 is not set | 1796 | # CONFIG_RTC_DRV_PCF8583 is not set |
1770 | # CONFIG_RTC_DRV_M41T80 is not set | 1797 | # CONFIG_RTC_DRV_M41T80 is not set |
1771 | # CONFIG_RTC_DRV_S35390A is not set | 1798 | # CONFIG_RTC_DRV_S35390A is not set |
1799 | # CONFIG_RTC_DRV_FM3130 is not set | ||
1772 | 1800 | ||
1773 | # | 1801 | # |
1774 | # SPI RTC drivers | 1802 | # SPI RTC drivers |
@@ -1801,11 +1829,13 @@ CONFIG_DMADEVICES=y | |||
1801 | # Firmware Drivers | 1829 | # Firmware Drivers |
1802 | # | 1830 | # |
1803 | # CONFIG_EDD is not set | 1831 | # CONFIG_EDD is not set |
1832 | CONFIG_FIRMWARE_MEMMAP=y | ||
1804 | CONFIG_EFI_VARS=y | 1833 | CONFIG_EFI_VARS=y |
1805 | # CONFIG_DELL_RBU is not set | 1834 | # CONFIG_DELL_RBU is not set |
1806 | # CONFIG_DCDBAS is not set | 1835 | # CONFIG_DCDBAS is not set |
1807 | CONFIG_DMIID=y | 1836 | CONFIG_DMIID=y |
1808 | # CONFIG_ISCSI_IBFT_FIND is not set | 1837 | CONFIG_ISCSI_IBFT_FIND=y |
1838 | CONFIG_ISCSI_IBFT=y | ||
1809 | 1839 | ||
1810 | # | 1840 | # |
1811 | # File systems | 1841 | # File systems |
@@ -1885,14 +1915,27 @@ CONFIG_HUGETLB_PAGE=y | |||
1885 | # CONFIG_CRAMFS is not set | 1915 | # CONFIG_CRAMFS is not set |
1886 | # CONFIG_VXFS_FS is not set | 1916 | # CONFIG_VXFS_FS is not set |
1887 | # CONFIG_MINIX_FS is not set | 1917 | # CONFIG_MINIX_FS is not set |
1918 | # CONFIG_OMFS_FS is not set | ||
1888 | # CONFIG_HPFS_FS is not set | 1919 | # CONFIG_HPFS_FS is not set |
1889 | # CONFIG_QNX4FS_FS is not set | 1920 | # CONFIG_QNX4FS_FS is not set |
1890 | # CONFIG_ROMFS_FS is not set | 1921 | # CONFIG_ROMFS_FS is not set |
1891 | # CONFIG_SYSV_FS is not set | 1922 | # CONFIG_SYSV_FS is not set |
1892 | # CONFIG_UFS_FS is not set | 1923 | # CONFIG_UFS_FS is not set |
1893 | CONFIG_NETWORK_FILESYSTEMS=y | 1924 | CONFIG_NETWORK_FILESYSTEMS=y |
1894 | # CONFIG_NFS_FS is not set | 1925 | CONFIG_NFS_FS=y |
1926 | CONFIG_NFS_V3=y | ||
1927 | CONFIG_NFS_V3_ACL=y | ||
1928 | CONFIG_NFS_V4=y | ||
1929 | CONFIG_ROOT_NFS=y | ||
1895 | # CONFIG_NFSD is not set | 1930 | # CONFIG_NFSD is not set |
1931 | CONFIG_LOCKD=y | ||
1932 | CONFIG_LOCKD_V4=y | ||
1933 | CONFIG_NFS_ACL_SUPPORT=y | ||
1934 | CONFIG_NFS_COMMON=y | ||
1935 | CONFIG_SUNRPC=y | ||
1936 | CONFIG_SUNRPC_GSS=y | ||
1937 | CONFIG_RPCSEC_GSS_KRB5=y | ||
1938 | # CONFIG_RPCSEC_GSS_SPKM3 is not set | ||
1896 | # CONFIG_SMB_FS is not set | 1939 | # CONFIG_SMB_FS is not set |
1897 | # CONFIG_CIFS is not set | 1940 | # CONFIG_CIFS is not set |
1898 | # CONFIG_NCP_FS is not set | 1941 | # CONFIG_NCP_FS is not set |
@@ -1966,9 +2009,9 @@ CONFIG_NLS_UTF8=y | |||
1966 | # Kernel hacking | 2009 | # Kernel hacking |
1967 | # | 2010 | # |
1968 | CONFIG_TRACE_IRQFLAGS_SUPPORT=y | 2011 | CONFIG_TRACE_IRQFLAGS_SUPPORT=y |
1969 | # CONFIG_PRINTK_TIME is not set | 2012 | CONFIG_PRINTK_TIME=y |
1970 | # CONFIG_ENABLE_WARN_DEPRECATED is not set | 2013 | CONFIG_ENABLE_WARN_DEPRECATED=y |
1971 | # CONFIG_ENABLE_MUST_CHECK is not set | 2014 | CONFIG_ENABLE_MUST_CHECK=y |
1972 | CONFIG_FRAME_WARN=2048 | 2015 | CONFIG_FRAME_WARN=2048 |
1973 | CONFIG_MAGIC_SYSRQ=y | 2016 | CONFIG_MAGIC_SYSRQ=y |
1974 | # CONFIG_UNUSED_SYMBOLS is not set | 2017 | # CONFIG_UNUSED_SYMBOLS is not set |
@@ -1997,6 +2040,7 @@ CONFIG_DEBUG_BUGVERBOSE=y | |||
1997 | # CONFIG_DEBUG_INFO is not set | 2040 | # CONFIG_DEBUG_INFO is not set |
1998 | # CONFIG_DEBUG_VM is not set | 2041 | # CONFIG_DEBUG_VM is not set |
1999 | # CONFIG_DEBUG_WRITECOUNT is not set | 2042 | # CONFIG_DEBUG_WRITECOUNT is not set |
2043 | CONFIG_DEBUG_MEMORY_INIT=y | ||
2000 | # CONFIG_DEBUG_LIST is not set | 2044 | # CONFIG_DEBUG_LIST is not set |
2001 | # CONFIG_DEBUG_SG is not set | 2045 | # CONFIG_DEBUG_SG is not set |
2002 | CONFIG_FRAME_POINTER=y | 2046 | CONFIG_FRAME_POINTER=y |
@@ -2007,11 +2051,20 @@ CONFIG_FRAME_POINTER=y | |||
2007 | # CONFIG_LKDTM is not set | 2051 | # CONFIG_LKDTM is not set |
2008 | # CONFIG_FAULT_INJECTION is not set | 2052 | # CONFIG_FAULT_INJECTION is not set |
2009 | # CONFIG_LATENCYTOP is not set | 2053 | # CONFIG_LATENCYTOP is not set |
2054 | CONFIG_SYSCTL_SYSCALL_CHECK=y | ||
2055 | CONFIG_HAVE_FTRACE=y | ||
2056 | CONFIG_HAVE_DYNAMIC_FTRACE=y | ||
2057 | # CONFIG_FTRACE is not set | ||
2058 | # CONFIG_IRQSOFF_TRACER is not set | ||
2059 | # CONFIG_SYSPROF_TRACER is not set | ||
2060 | # CONFIG_SCHED_TRACER is not set | ||
2061 | # CONFIG_CONTEXT_SWITCH_TRACER is not set | ||
2010 | CONFIG_PROVIDE_OHCI1394_DMA_INIT=y | 2062 | CONFIG_PROVIDE_OHCI1394_DMA_INIT=y |
2011 | # CONFIG_SAMPLES is not set | 2063 | # CONFIG_SAMPLES is not set |
2012 | # CONFIG_KGDB is not set | ||
2013 | CONFIG_HAVE_ARCH_KGDB=y | 2064 | CONFIG_HAVE_ARCH_KGDB=y |
2014 | # CONFIG_NONPROMISC_DEVMEM is not set | 2065 | # CONFIG_KGDB is not set |
2066 | # CONFIG_STRICT_DEVMEM is not set | ||
2067 | CONFIG_X86_VERBOSE_BOOTUP=y | ||
2015 | CONFIG_EARLY_PRINTK=y | 2068 | CONFIG_EARLY_PRINTK=y |
2016 | CONFIG_DEBUG_STACKOVERFLOW=y | 2069 | CONFIG_DEBUG_STACKOVERFLOW=y |
2017 | CONFIG_DEBUG_STACK_USAGE=y | 2070 | CONFIG_DEBUG_STACK_USAGE=y |
@@ -2022,8 +2075,8 @@ CONFIG_DEBUG_RODATA=y | |||
2022 | # CONFIG_DIRECT_GBPAGES is not set | 2075 | # CONFIG_DIRECT_GBPAGES is not set |
2023 | # CONFIG_DEBUG_RODATA_TEST is not set | 2076 | # CONFIG_DEBUG_RODATA_TEST is not set |
2024 | CONFIG_DEBUG_NX_TEST=m | 2077 | CONFIG_DEBUG_NX_TEST=m |
2025 | CONFIG_X86_MPPARSE=y | ||
2026 | # CONFIG_IOMMU_DEBUG is not set | 2078 | # CONFIG_IOMMU_DEBUG is not set |
2079 | # CONFIG_MMIOTRACE is not set | ||
2027 | CONFIG_IO_DELAY_TYPE_0X80=0 | 2080 | CONFIG_IO_DELAY_TYPE_0X80=0 |
2028 | CONFIG_IO_DELAY_TYPE_0XED=1 | 2081 | CONFIG_IO_DELAY_TYPE_0XED=1 |
2029 | CONFIG_IO_DELAY_TYPE_UDELAY=2 | 2082 | CONFIG_IO_DELAY_TYPE_UDELAY=2 |
@@ -2035,6 +2088,7 @@ CONFIG_IO_DELAY_0X80=y | |||
2035 | CONFIG_DEFAULT_IO_DELAY_TYPE=0 | 2088 | CONFIG_DEFAULT_IO_DELAY_TYPE=0 |
2036 | CONFIG_DEBUG_BOOT_PARAMS=y | 2089 | CONFIG_DEBUG_BOOT_PARAMS=y |
2037 | # CONFIG_CPA_DEBUG is not set | 2090 | # CONFIG_CPA_DEBUG is not set |
2091 | # CONFIG_OPTIMIZE_INLINING is not set | ||
2038 | 2092 | ||
2039 | # | 2093 | # |
2040 | # Security options | 2094 | # Security options |
@@ -2044,7 +2098,6 @@ CONFIG_KEYS_DEBUG_PROC_KEYS=y | |||
2044 | CONFIG_SECURITY=y | 2098 | CONFIG_SECURITY=y |
2045 | CONFIG_SECURITY_NETWORK=y | 2099 | CONFIG_SECURITY_NETWORK=y |
2046 | # CONFIG_SECURITY_NETWORK_XFRM is not set | 2100 | # CONFIG_SECURITY_NETWORK_XFRM is not set |
2047 | CONFIG_SECURITY_CAPABILITIES=y | ||
2048 | CONFIG_SECURITY_FILE_CAPABILITIES=y | 2101 | CONFIG_SECURITY_FILE_CAPABILITIES=y |
2049 | # CONFIG_SECURITY_ROOTPLUG is not set | 2102 | # CONFIG_SECURITY_ROOTPLUG is not set |
2050 | CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 | 2103 | CONFIG_SECURITY_DEFAULT_MMAP_MIN_ADDR=65536 |
@@ -2105,6 +2158,10 @@ CONFIG_CRYPTO_HMAC=y | |||
2105 | # CONFIG_CRYPTO_MD4 is not set | 2158 | # CONFIG_CRYPTO_MD4 is not set |
2106 | CONFIG_CRYPTO_MD5=y | 2159 | CONFIG_CRYPTO_MD5=y |
2107 | # CONFIG_CRYPTO_MICHAEL_MIC is not set | 2160 | # CONFIG_CRYPTO_MICHAEL_MIC is not set |
2161 | # CONFIG_CRYPTO_RMD128 is not set | ||
2162 | # CONFIG_CRYPTO_RMD160 is not set | ||
2163 | # CONFIG_CRYPTO_RMD256 is not set | ||
2164 | # CONFIG_CRYPTO_RMD320 is not set | ||
2108 | CONFIG_CRYPTO_SHA1=y | 2165 | CONFIG_CRYPTO_SHA1=y |
2109 | # CONFIG_CRYPTO_SHA256 is not set | 2166 | # CONFIG_CRYPTO_SHA256 is not set |
2110 | # CONFIG_CRYPTO_SHA512 is not set | 2167 | # CONFIG_CRYPTO_SHA512 is not set |
@@ -2154,6 +2211,7 @@ CONFIG_GENERIC_FIND_FIRST_BIT=y | |||
2154 | CONFIG_GENERIC_FIND_NEXT_BIT=y | 2211 | CONFIG_GENERIC_FIND_NEXT_BIT=y |
2155 | # CONFIG_CRC_CCITT is not set | 2212 | # CONFIG_CRC_CCITT is not set |
2156 | # CONFIG_CRC16 is not set | 2213 | # CONFIG_CRC16 is not set |
2214 | CONFIG_CRC_T10DIF=y | ||
2157 | # CONFIG_CRC_ITU_T is not set | 2215 | # CONFIG_CRC_ITU_T is not set |
2158 | CONFIG_CRC32=y | 2216 | CONFIG_CRC32=y |
2159 | # CONFIG_CRC7 is not set | 2217 | # CONFIG_CRC7 is not set |
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 58cccb6483b0..a0e1dbe67dc1 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c | |||
@@ -441,12 +441,6 @@ beyond_if: | |||
441 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = | 441 | regs->r8 = regs->r9 = regs->r10 = regs->r11 = |
442 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; | 442 | regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0; |
443 | set_fs(USER_DS); | 443 | set_fs(USER_DS); |
444 | if (unlikely(current->ptrace & PT_PTRACED)) { | ||
445 | if (current->ptrace & PT_TRACE_EXEC) | ||
446 | ptrace_notify((PTRACE_EVENT_EXEC << 8) | SIGTRAP); | ||
447 | else | ||
448 | send_sig(SIGTRAP, current, 0); | ||
449 | } | ||
450 | return 0; | 444 | return 0; |
451 | } | 445 | } |
452 | 446 | ||
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index cb3856a18c85..20af4c79579a 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c | |||
@@ -36,6 +36,11 @@ | |||
36 | 36 | ||
37 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) | 37 | #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) |
38 | 38 | ||
39 | #define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ | ||
40 | X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ | ||
41 | X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ | ||
42 | X86_EFLAGS_CF) | ||
43 | |||
39 | asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); | 44 | asmlinkage int do_signal(struct pt_regs *regs, sigset_t *oldset); |
40 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); | 45 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where); |
41 | 46 | ||
@@ -248,7 +253,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs, | |||
248 | regs->ss |= 3; | 253 | regs->ss |= 3; |
249 | 254 | ||
250 | err |= __get_user(tmpflags, &sc->flags); | 255 | err |= __get_user(tmpflags, &sc->flags); |
251 | regs->flags = (regs->flags & ~0x40DD5) | (tmpflags & 0x40DD5); | 256 | regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); |
252 | /* disable syscall checks */ | 257 | /* disable syscall checks */ |
253 | regs->orig_ax = -1; | 258 | regs->orig_ax = -1; |
254 | 259 | ||
@@ -515,7 +520,6 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
515 | compat_sigset_t *set, struct pt_regs *regs) | 520 | compat_sigset_t *set, struct pt_regs *regs) |
516 | { | 521 | { |
517 | struct rt_sigframe __user *frame; | 522 | struct rt_sigframe __user *frame; |
518 | struct exec_domain *ed = current_thread_info()->exec_domain; | ||
519 | void __user *restorer; | 523 | void __user *restorer; |
520 | int err = 0; | 524 | int err = 0; |
521 | 525 | ||
@@ -538,8 +542,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
538 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) | 542 | if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) |
539 | goto give_sigsegv; | 543 | goto give_sigsegv; |
540 | 544 | ||
541 | err |= __put_user((ed && ed->signal_invmap && sig < 32 | 545 | err |= __put_user(sig, &frame->sig); |
542 | ? ed->signal_invmap[sig] : sig), &frame->sig); | ||
543 | err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); | 546 | err |= __put_user(ptr_to_compat(&frame->info), &frame->pinfo); |
544 | err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); | 547 | err |= __put_user(ptr_to_compat(&frame->uc), &frame->puc); |
545 | err |= copy_siginfo_to_user32(&frame->info, info); | 548 | err |= copy_siginfo_to_user32(&frame->info, info); |
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index 20371d0635e4..ffc1bb4fed7d 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S | |||
@@ -15,6 +15,16 @@ | |||
15 | #include <asm/irqflags.h> | 15 | #include <asm/irqflags.h> |
16 | #include <linux/linkage.h> | 16 | #include <linux/linkage.h> |
17 | 17 | ||
18 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
19 | #include <linux/elf-em.h> | ||
20 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | ||
21 | #define __AUDIT_ARCH_LE 0x40000000 | ||
22 | |||
23 | #ifndef CONFIG_AUDITSYSCALL | ||
24 | #define sysexit_audit int_ret_from_sys_call | ||
25 | #define sysretl_audit int_ret_from_sys_call | ||
26 | #endif | ||
27 | |||
18 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) | 28 | #define IA32_NR_syscalls ((ia32_syscall_end - ia32_sys_call_table)/8) |
19 | 29 | ||
20 | .macro IA32_ARG_FIXUP noebp=0 | 30 | .macro IA32_ARG_FIXUP noebp=0 |
@@ -37,6 +47,11 @@ | |||
37 | movq %rax,R8(%rsp) | 47 | movq %rax,R8(%rsp) |
38 | .endm | 48 | .endm |
39 | 49 | ||
50 | /* | ||
51 | * Reload arg registers from stack in case ptrace changed them. | ||
52 | * We don't reload %eax because syscall_trace_enter() returned | ||
53 | * the value it wants us to use in the table lookup. | ||
54 | */ | ||
40 | .macro LOAD_ARGS32 offset | 55 | .macro LOAD_ARGS32 offset |
41 | movl \offset(%rsp),%r11d | 56 | movl \offset(%rsp),%r11d |
42 | movl \offset+8(%rsp),%r10d | 57 | movl \offset+8(%rsp),%r10d |
@@ -46,7 +61,6 @@ | |||
46 | movl \offset+48(%rsp),%edx | 61 | movl \offset+48(%rsp),%edx |
47 | movl \offset+56(%rsp),%esi | 62 | movl \offset+56(%rsp),%esi |
48 | movl \offset+64(%rsp),%edi | 63 | movl \offset+64(%rsp),%edi |
49 | movl \offset+72(%rsp),%eax | ||
50 | .endm | 64 | .endm |
51 | 65 | ||
52 | .macro CFI_STARTPROC32 simple | 66 | .macro CFI_STARTPROC32 simple |
@@ -137,21 +151,22 @@ ENTRY(ia32_sysenter_target) | |||
137 | .previous | 151 | .previous |
138 | GET_THREAD_INFO(%r10) | 152 | GET_THREAD_INFO(%r10) |
139 | orl $TS_COMPAT,TI_status(%r10) | 153 | orl $TS_COMPAT,TI_status(%r10) |
140 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ | 154 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) |
141 | TI_flags(%r10) | ||
142 | CFI_REMEMBER_STATE | 155 | CFI_REMEMBER_STATE |
143 | jnz sysenter_tracesys | 156 | jnz sysenter_tracesys |
144 | sysenter_do_call: | ||
145 | cmpl $(IA32_NR_syscalls-1),%eax | 157 | cmpl $(IA32_NR_syscalls-1),%eax |
146 | ja ia32_badsys | 158 | ja ia32_badsys |
159 | sysenter_do_call: | ||
147 | IA32_ARG_FIXUP 1 | 160 | IA32_ARG_FIXUP 1 |
161 | sysenter_dispatch: | ||
148 | call *ia32_sys_call_table(,%rax,8) | 162 | call *ia32_sys_call_table(,%rax,8) |
149 | movq %rax,RAX-ARGOFFSET(%rsp) | 163 | movq %rax,RAX-ARGOFFSET(%rsp) |
150 | GET_THREAD_INFO(%r10) | 164 | GET_THREAD_INFO(%r10) |
151 | DISABLE_INTERRUPTS(CLBR_NONE) | 165 | DISABLE_INTERRUPTS(CLBR_NONE) |
152 | TRACE_IRQS_OFF | 166 | TRACE_IRQS_OFF |
153 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | 167 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) |
154 | jnz int_ret_from_sys_call | 168 | jnz sysexit_audit |
169 | sysexit_from_sys_call: | ||
155 | andl $~TS_COMPAT,TI_status(%r10) | 170 | andl $~TS_COMPAT,TI_status(%r10) |
156 | /* clear IF, that popfq doesn't enable interrupts early */ | 171 | /* clear IF, that popfq doesn't enable interrupts early */ |
157 | andl $~0x200,EFLAGS-R11(%rsp) | 172 | andl $~0x200,EFLAGS-R11(%rsp) |
@@ -167,9 +182,63 @@ sysenter_do_call: | |||
167 | TRACE_IRQS_ON | 182 | TRACE_IRQS_ON |
168 | ENABLE_INTERRUPTS_SYSEXIT32 | 183 | ENABLE_INTERRUPTS_SYSEXIT32 |
169 | 184 | ||
170 | sysenter_tracesys: | 185 | #ifdef CONFIG_AUDITSYSCALL |
186 | .macro auditsys_entry_common | ||
187 | movl %esi,%r9d /* 6th arg: 4th syscall arg */ | ||
188 | movl %edx,%r8d /* 5th arg: 3rd syscall arg */ | ||
189 | /* (already in %ecx) 4th arg: 2nd syscall arg */ | ||
190 | movl %ebx,%edx /* 3rd arg: 1st syscall arg */ | ||
191 | movl %eax,%esi /* 2nd arg: syscall number */ | ||
192 | movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */ | ||
193 | call audit_syscall_entry | ||
194 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */ | ||
195 | cmpl $(IA32_NR_syscalls-1),%eax | ||
196 | ja ia32_badsys | ||
197 | movl %ebx,%edi /* reload 1st syscall arg */ | ||
198 | movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */ | ||
199 | movl RDX-ARGOFFSET(%rsp),%edx /* reload 3rd syscall arg */ | ||
200 | movl RSI-ARGOFFSET(%rsp),%ecx /* reload 4th syscall arg */ | ||
201 | movl RDI-ARGOFFSET(%rsp),%r8d /* reload 5th syscall arg */ | ||
202 | .endm | ||
203 | |||
204 | .macro auditsys_exit exit,ebpsave=RBP | ||
205 | testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | ||
206 | jnz int_ret_from_sys_call | ||
207 | TRACE_IRQS_ON | ||
208 | sti | ||
209 | movl %eax,%esi /* second arg, syscall return value */ | ||
210 | cmpl $0,%eax /* is it < 0? */ | ||
211 | setl %al /* 1 if so, 0 if not */ | ||
212 | movzbl %al,%edi /* zero-extend that into %edi */ | ||
213 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
214 | call audit_syscall_exit | ||
215 | GET_THREAD_INFO(%r10) | ||
216 | movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall return value */ | ||
217 | movl \ebpsave-ARGOFFSET(%rsp),%ebp /* reload user register value */ | ||
218 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | ||
219 | cli | ||
220 | TRACE_IRQS_OFF | ||
221 | testl %edi,TI_flags(%r10) | ||
222 | jnz int_with_check | ||
223 | jmp \exit | ||
224 | .endm | ||
225 | |||
226 | sysenter_auditsys: | ||
171 | CFI_RESTORE_STATE | 227 | CFI_RESTORE_STATE |
228 | auditsys_entry_common | ||
229 | movl %ebp,%r9d /* reload 6th syscall arg */ | ||
230 | jmp sysenter_dispatch | ||
231 | |||
232 | sysexit_audit: | ||
233 | auditsys_exit sysexit_from_sys_call | ||
234 | #endif | ||
235 | |||
236 | sysenter_tracesys: | ||
172 | xchgl %r9d,%ebp | 237 | xchgl %r9d,%ebp |
238 | #ifdef CONFIG_AUDITSYSCALL | ||
239 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | ||
240 | jz sysenter_auditsys | ||
241 | #endif | ||
173 | SAVE_REST | 242 | SAVE_REST |
174 | CLEAR_RREGS | 243 | CLEAR_RREGS |
175 | movq %r9,R9(%rsp) | 244 | movq %r9,R9(%rsp) |
@@ -242,21 +311,22 @@ ENTRY(ia32_cstar_target) | |||
242 | .previous | 311 | .previous |
243 | GET_THREAD_INFO(%r10) | 312 | GET_THREAD_INFO(%r10) |
244 | orl $TS_COMPAT,TI_status(%r10) | 313 | orl $TS_COMPAT,TI_status(%r10) |
245 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ | 314 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) |
246 | TI_flags(%r10) | ||
247 | CFI_REMEMBER_STATE | 315 | CFI_REMEMBER_STATE |
248 | jnz cstar_tracesys | 316 | jnz cstar_tracesys |
249 | cstar_do_call: | 317 | cstar_do_call: |
250 | cmpl $IA32_NR_syscalls-1,%eax | 318 | cmpl $IA32_NR_syscalls-1,%eax |
251 | ja ia32_badsys | 319 | ja ia32_badsys |
252 | IA32_ARG_FIXUP 1 | 320 | IA32_ARG_FIXUP 1 |
321 | cstar_dispatch: | ||
253 | call *ia32_sys_call_table(,%rax,8) | 322 | call *ia32_sys_call_table(,%rax,8) |
254 | movq %rax,RAX-ARGOFFSET(%rsp) | 323 | movq %rax,RAX-ARGOFFSET(%rsp) |
255 | GET_THREAD_INFO(%r10) | 324 | GET_THREAD_INFO(%r10) |
256 | DISABLE_INTERRUPTS(CLBR_NONE) | 325 | DISABLE_INTERRUPTS(CLBR_NONE) |
257 | TRACE_IRQS_OFF | 326 | TRACE_IRQS_OFF |
258 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) | 327 | testl $_TIF_ALLWORK_MASK,TI_flags(%r10) |
259 | jnz int_ret_from_sys_call | 328 | jnz sysretl_audit |
329 | sysretl_from_sys_call: | ||
260 | andl $~TS_COMPAT,TI_status(%r10) | 330 | andl $~TS_COMPAT,TI_status(%r10) |
261 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 | 331 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 |
262 | movl RIP-ARGOFFSET(%rsp),%ecx | 332 | movl RIP-ARGOFFSET(%rsp),%ecx |
@@ -268,8 +338,23 @@ cstar_do_call: | |||
268 | CFI_RESTORE rsp | 338 | CFI_RESTORE rsp |
269 | USERGS_SYSRET32 | 339 | USERGS_SYSRET32 |
270 | 340 | ||
271 | cstar_tracesys: | 341 | #ifdef CONFIG_AUDITSYSCALL |
342 | cstar_auditsys: | ||
272 | CFI_RESTORE_STATE | 343 | CFI_RESTORE_STATE |
344 | movl %r9d,R9-ARGOFFSET(%rsp) /* register to be clobbered by call */ | ||
345 | auditsys_entry_common | ||
346 | movl R9-ARGOFFSET(%rsp),%r9d /* reload 6th syscall arg */ | ||
347 | jmp cstar_dispatch | ||
348 | |||
349 | sysretl_audit: | ||
350 | auditsys_exit sysretl_from_sys_call, RCX /* user %ebp in RCX slot */ | ||
351 | #endif | ||
352 | |||
353 | cstar_tracesys: | ||
354 | #ifdef CONFIG_AUDITSYSCALL | ||
355 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%r10) | ||
356 | jz cstar_auditsys | ||
357 | #endif | ||
273 | xchgl %r9d,%ebp | 358 | xchgl %r9d,%ebp |
274 | SAVE_REST | 359 | SAVE_REST |
275 | CLEAR_RREGS | 360 | CLEAR_RREGS |
@@ -321,6 +406,7 @@ ENTRY(ia32_syscall) | |||
321 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ | 406 | /*CFI_REL_OFFSET rflags,EFLAGS-RIP*/ |
322 | /*CFI_REL_OFFSET cs,CS-RIP*/ | 407 | /*CFI_REL_OFFSET cs,CS-RIP*/ |
323 | CFI_REL_OFFSET rip,RIP-RIP | 408 | CFI_REL_OFFSET rip,RIP-RIP |
409 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
324 | SWAPGS | 410 | SWAPGS |
325 | /* | 411 | /* |
326 | * No need to follow this irqs on/off section: the syscall | 412 | * No need to follow this irqs on/off section: the syscall |
@@ -336,8 +422,7 @@ ENTRY(ia32_syscall) | |||
336 | SAVE_ARGS 0,0,1 | 422 | SAVE_ARGS 0,0,1 |
337 | GET_THREAD_INFO(%r10) | 423 | GET_THREAD_INFO(%r10) |
338 | orl $TS_COMPAT,TI_status(%r10) | 424 | orl $TS_COMPAT,TI_status(%r10) |
339 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ | 425 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) |
340 | TI_flags(%r10) | ||
341 | jnz ia32_tracesys | 426 | jnz ia32_tracesys |
342 | ia32_do_syscall: | 427 | ia32_do_syscall: |
343 | cmpl $(IA32_NR_syscalls-1),%eax | 428 | cmpl $(IA32_NR_syscalls-1),%eax |
@@ -741,4 +826,10 @@ ia32_sys_call_table: | |||
741 | .quad sys32_fallocate | 826 | .quad sys32_fallocate |
742 | .quad compat_sys_timerfd_settime /* 325 */ | 827 | .quad compat_sys_timerfd_settime /* 325 */ |
743 | .quad compat_sys_timerfd_gettime | 828 | .quad compat_sys_timerfd_gettime |
829 | .quad compat_sys_signalfd4 | ||
830 | .quad sys_eventfd2 | ||
831 | .quad sys_epoll_create1 | ||
832 | .quad sys_dup3 /* 330 */ | ||
833 | .quad sys_pipe2 | ||
834 | .quad sys_inotify_init1 | ||
744 | ia32_syscall_end: | 835 | ia32_syscall_end: |
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c index f00afdf61e67..d3c64088b981 100644 --- a/arch/x86/ia32/sys_ia32.c +++ b/arch/x86/ia32/sys_ia32.c | |||
@@ -238,7 +238,7 @@ asmlinkage long sys32_pipe(int __user *fd) | |||
238 | int retval; | 238 | int retval; |
239 | int fds[2]; | 239 | int fds[2]; |
240 | 240 | ||
241 | retval = do_pipe(fds); | 241 | retval = do_pipe_flags(fds, 0); |
242 | if (retval) | 242 | if (retval) |
243 | goto out; | 243 | goto out; |
244 | if (copy_to_user(fd, fds, sizeof(fds))) | 244 | if (copy_to_user(fd, fds, sizeof(fds))) |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index da140611bb57..3db651fc8ec5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -7,9 +7,10 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu | |||
7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) | 7 | CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) |
8 | 8 | ||
9 | ifdef CONFIG_FTRACE | 9 | ifdef CONFIG_FTRACE |
10 | # Do not profile debug utilities | 10 | # Do not profile debug and lowlevel utilities |
11 | CFLAGS_REMOVE_tsc.o = -pg | 11 | CFLAGS_REMOVE_tsc.o = -pg |
12 | CFLAGS_REMOVE_rtc.o = -pg | 12 | CFLAGS_REMOVE_rtc.o = -pg |
13 | CFLAGS_REMOVE_paravirt.o = -pg | ||
13 | endif | 14 | endif |
14 | 15 | ||
15 | # | 16 | # |
@@ -102,6 +103,7 @@ obj-$(CONFIG_OLPC) += olpc.o | |||
102 | # 64 bit specific files | 103 | # 64 bit specific files |
103 | ifeq ($(CONFIG_X86_64),y) | 104 | ifeq ($(CONFIG_X86_64),y) |
104 | obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o | 105 | obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o |
106 | obj-y += bios_uv.o | ||
105 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o | 107 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o |
106 | obj-$(CONFIG_AUDIT) += audit_64.o | 108 | obj-$(CONFIG_AUDIT) += audit_64.o |
107 | 109 | ||
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f489d7a9be92..bfd10fd211cd 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -97,6 +97,8 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; | |||
97 | #warning ACPI uses CMPXCHG, i486 and later hardware | 97 | #warning ACPI uses CMPXCHG, i486 and later hardware |
98 | #endif | 98 | #endif |
99 | 99 | ||
100 | static int acpi_mcfg_64bit_base_addr __initdata = FALSE; | ||
101 | |||
100 | /* -------------------------------------------------------------------------- | 102 | /* -------------------------------------------------------------------------- |
101 | Boot-time Configuration | 103 | Boot-time Configuration |
102 | -------------------------------------------------------------------------- */ | 104 | -------------------------------------------------------------------------- */ |
@@ -158,6 +160,14 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size) | |||
158 | struct acpi_mcfg_allocation *pci_mmcfg_config; | 160 | struct acpi_mcfg_allocation *pci_mmcfg_config; |
159 | int pci_mmcfg_config_num; | 161 | int pci_mmcfg_config_num; |
160 | 162 | ||
163 | static int __init acpi_mcfg_oem_check(struct acpi_table_mcfg *mcfg) | ||
164 | { | ||
165 | if (!strcmp(mcfg->header.oem_id, "SGI")) | ||
166 | acpi_mcfg_64bit_base_addr = TRUE; | ||
167 | |||
168 | return 0; | ||
169 | } | ||
170 | |||
161 | int __init acpi_parse_mcfg(struct acpi_table_header *header) | 171 | int __init acpi_parse_mcfg(struct acpi_table_header *header) |
162 | { | 172 | { |
163 | struct acpi_table_mcfg *mcfg; | 173 | struct acpi_table_mcfg *mcfg; |
@@ -190,8 +200,12 @@ int __init acpi_parse_mcfg(struct acpi_table_header *header) | |||
190 | } | 200 | } |
191 | 201 | ||
192 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); | 202 | memcpy(pci_mmcfg_config, &mcfg[1], config_size); |
203 | |||
204 | acpi_mcfg_oem_check(mcfg); | ||
205 | |||
193 | for (i = 0; i < pci_mmcfg_config_num; ++i) { | 206 | for (i = 0; i < pci_mmcfg_config_num; ++i) { |
194 | if (pci_mmcfg_config[i].address > 0xFFFFFFFF) { | 207 | if ((pci_mmcfg_config[i].address > 0xFFFFFFFF) && |
208 | !acpi_mcfg_64bit_base_addr) { | ||
195 | printk(KERN_ERR PREFIX | 209 | printk(KERN_ERR PREFIX |
196 | "MMCONFIG not in low 4GB of memory\n"); | 210 | "MMCONFIG not in low 4GB of memory\n"); |
197 | kfree(pci_mmcfg_config); | 211 | kfree(pci_mmcfg_config); |
@@ -1021,7 +1035,7 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
1021 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; | 1035 | mp_bus_id_to_type[MP_ISA_BUS] = MP_BUS_ISA; |
1022 | #endif | 1036 | #endif |
1023 | set_bit(MP_ISA_BUS, mp_bus_not_pci); | 1037 | set_bit(MP_ISA_BUS, mp_bus_not_pci); |
1024 | Dprintk("Bus #%d is ISA\n", MP_ISA_BUS); | 1038 | pr_debug("Bus #%d is ISA\n", MP_ISA_BUS); |
1025 | 1039 | ||
1026 | #ifdef CONFIG_X86_ES7000 | 1040 | #ifdef CONFIG_X86_ES7000 |
1027 | /* | 1041 | /* |
@@ -1127,8 +1141,8 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity) | |||
1127 | return gsi; | 1141 | return gsi; |
1128 | } | 1142 | } |
1129 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { | 1143 | if (test_bit(ioapic_pin, mp_ioapic_routing[ioapic].pin_programmed)) { |
1130 | Dprintk(KERN_DEBUG "Pin %d-%d already programmed\n", | 1144 | pr_debug(KERN_DEBUG "Pin %d-%d already programmed\n", |
1131 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); | 1145 | mp_ioapic_routing[ioapic].apic_id, ioapic_pin); |
1132 | #ifdef CONFIG_X86_32 | 1146 | #ifdef CONFIG_X86_32 |
1133 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); | 1147 | return (gsi < IRQ_COMPRESSION_START ? gsi : gsi_to_irq[gsi]); |
1134 | #else | 1148 | #else |
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index a3ddad18aaa3..426e5d91b63a 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c | |||
@@ -20,7 +20,7 @@ unsigned long acpi_realmode_flags; | |||
20 | /* address in low memory of the wakeup routine. */ | 20 | /* address in low memory of the wakeup routine. */ |
21 | static unsigned long acpi_realmode; | 21 | static unsigned long acpi_realmode; |
22 | 22 | ||
23 | #ifdef CONFIG_64BIT | 23 | #if defined(CONFIG_SMP) && defined(CONFIG_64BIT) |
24 | static char temp_stack[10240]; | 24 | static char temp_stack[10240]; |
25 | #endif | 25 | #endif |
26 | 26 | ||
@@ -86,7 +86,7 @@ int acpi_save_state_mem(void) | |||
86 | #endif /* !CONFIG_64BIT */ | 86 | #endif /* !CONFIG_64BIT */ |
87 | 87 | ||
88 | header->pmode_cr0 = read_cr0(); | 88 | header->pmode_cr0 = read_cr0(); |
89 | header->pmode_cr4 = read_cr4(); | 89 | header->pmode_cr4 = read_cr4_safe(); |
90 | header->realmode_flags = acpi_realmode_flags; | 90 | header->realmode_flags = acpi_realmode_flags; |
91 | header->real_magic = 0x12345678; | 91 | header->real_magic = 0x12345678; |
92 | 92 | ||
@@ -150,6 +150,10 @@ static int __init acpi_sleep_setup(char *str) | |||
150 | acpi_realmode_flags |= 2; | 150 | acpi_realmode_flags |= 2; |
151 | if (strncmp(str, "s3_beep", 7) == 0) | 151 | if (strncmp(str, "s3_beep", 7) == 0) |
152 | acpi_realmode_flags |= 4; | 152 | acpi_realmode_flags |= 4; |
153 | #ifdef CONFIG_HIBERNATION | ||
154 | if (strncmp(str, "s4_nohwsig", 10) == 0) | ||
155 | acpi_no_s4_hw_signature(); | ||
156 | #endif | ||
153 | if (strncmp(str, "old_ordering", 12) == 0) | 157 | if (strncmp(str, "old_ordering", 12) == 0) |
154 | acpi_old_suspend_ordering(); | 158 | acpi_old_suspend_ordering(); |
155 | str = strchr(str, ','); | 159 | str = strchr(str, ','); |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2763cb37b553..65a0c1b48696 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -145,35 +145,25 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | |||
145 | extern char __vsyscall_0; | 145 | extern char __vsyscall_0; |
146 | const unsigned char *const *find_nop_table(void) | 146 | const unsigned char *const *find_nop_table(void) |
147 | { | 147 | { |
148 | return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || | 148 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
149 | boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; | 149 | boot_cpu_has(X86_FEATURE_NOPL)) |
150 | return p6_nops; | ||
151 | else | ||
152 | return k8_nops; | ||
150 | } | 153 | } |
151 | 154 | ||
152 | #else /* CONFIG_X86_64 */ | 155 | #else /* CONFIG_X86_64 */ |
153 | 156 | ||
154 | static const struct nop { | ||
155 | int cpuid; | ||
156 | const unsigned char *const *noptable; | ||
157 | } noptypes[] = { | ||
158 | { X86_FEATURE_K8, k8_nops }, | ||
159 | { X86_FEATURE_K7, k7_nops }, | ||
160 | { X86_FEATURE_P4, p6_nops }, | ||
161 | { X86_FEATURE_P3, p6_nops }, | ||
162 | { -1, NULL } | ||
163 | }; | ||
164 | |||
165 | const unsigned char *const *find_nop_table(void) | 157 | const unsigned char *const *find_nop_table(void) |
166 | { | 158 | { |
167 | const unsigned char *const *noptable = intel_nops; | 159 | if (boot_cpu_has(X86_FEATURE_K8)) |
168 | int i; | 160 | return k8_nops; |
169 | 161 | else if (boot_cpu_has(X86_FEATURE_K7)) | |
170 | for (i = 0; noptypes[i].cpuid >= 0; i++) { | 162 | return k7_nops; |
171 | if (boot_cpu_has(noptypes[i].cpuid)) { | 163 | else if (boot_cpu_has(X86_FEATURE_NOPL)) |
172 | noptable = noptypes[i].noptable; | 164 | return p6_nops; |
173 | break; | 165 | else |
174 | } | 166 | return intel_nops; |
175 | } | ||
176 | return noptable; | ||
177 | } | 167 | } |
178 | 168 | ||
179 | #endif /* CONFIG_X86_64 */ | 169 | #endif /* CONFIG_X86_64 */ |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index f2766d84c7a0..042fdc27bc92 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -23,36 +23,49 @@ | |||
23 | #include <linux/scatterlist.h> | 23 | #include <linux/scatterlist.h> |
24 | #include <linux/iommu-helper.h> | 24 | #include <linux/iommu-helper.h> |
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
26 | #include <asm/gart.h> | 26 | #include <asm/iommu.h> |
27 | #include <asm/amd_iommu_types.h> | 27 | #include <asm/amd_iommu_types.h> |
28 | #include <asm/amd_iommu.h> | 28 | #include <asm/amd_iommu.h> |
29 | 29 | ||
30 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) | 30 | #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) |
31 | 31 | ||
32 | #define to_pages(addr, size) \ | 32 | #define EXIT_LOOP_COUNT 10000000 |
33 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
34 | 33 | ||
35 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); | 34 | static DEFINE_RWLOCK(amd_iommu_devtable_lock); |
36 | 35 | ||
37 | struct command { | 36 | /* |
37 | * general struct to manage commands send to an IOMMU | ||
38 | */ | ||
39 | struct iommu_cmd { | ||
38 | u32 data[4]; | 40 | u32 data[4]; |
39 | }; | 41 | }; |
40 | 42 | ||
41 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 43 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
42 | struct unity_map_entry *e); | 44 | struct unity_map_entry *e); |
43 | 45 | ||
46 | /* returns !0 if the IOMMU is caching non-present entries in its TLB */ | ||
44 | static int iommu_has_npcache(struct amd_iommu *iommu) | 47 | static int iommu_has_npcache(struct amd_iommu *iommu) |
45 | { | 48 | { |
46 | return iommu->cap & IOMMU_CAP_NPCACHE; | 49 | return iommu->cap & IOMMU_CAP_NPCACHE; |
47 | } | 50 | } |
48 | 51 | ||
49 | static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | 52 | /**************************************************************************** |
53 | * | ||
54 | * IOMMU command queuing functions | ||
55 | * | ||
56 | ****************************************************************************/ | ||
57 | |||
58 | /* | ||
59 | * Writes the command to the IOMMUs command buffer and informs the | ||
60 | * hardware about the new command. Must be called with iommu->lock held. | ||
61 | */ | ||
62 | static int __iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | ||
50 | { | 63 | { |
51 | u32 tail, head; | 64 | u32 tail, head; |
52 | u8 *target; | 65 | u8 *target; |
53 | 66 | ||
54 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | 67 | tail = readl(iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); |
55 | target = (iommu->cmd_buf + tail); | 68 | target = iommu->cmd_buf + tail; |
56 | memcpy_toio(target, cmd, sizeof(*cmd)); | 69 | memcpy_toio(target, cmd, sizeof(*cmd)); |
57 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; | 70 | tail = (tail + sizeof(*cmd)) % iommu->cmd_buf_size; |
58 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | 71 | head = readl(iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); |
@@ -63,7 +76,11 @@ static int __iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | |||
63 | return 0; | 76 | return 0; |
64 | } | 77 | } |
65 | 78 | ||
66 | static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | 79 | /* |
80 | * General queuing function for commands. Takes iommu->lock and calls | ||
81 | * __iommu_queue_command(). | ||
82 | */ | ||
83 | static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd) | ||
67 | { | 84 | { |
68 | unsigned long flags; | 85 | unsigned long flags; |
69 | int ret; | 86 | int ret; |
@@ -75,35 +92,59 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct command *cmd) | |||
75 | return ret; | 92 | return ret; |
76 | } | 93 | } |
77 | 94 | ||
95 | /* | ||
96 | * This function is called whenever we need to ensure that the IOMMU has | ||
97 | * completed execution of all commands we sent. It sends a | ||
98 | * COMPLETION_WAIT command and waits for it to finish. The IOMMU informs | ||
99 | * us about that by writing a value to a physical address we pass with | ||
100 | * the command. | ||
101 | */ | ||
78 | static int iommu_completion_wait(struct amd_iommu *iommu) | 102 | static int iommu_completion_wait(struct amd_iommu *iommu) |
79 | { | 103 | { |
80 | int ret; | 104 | int ret = 0, ready = 0; |
81 | struct command cmd; | 105 | unsigned status = 0; |
82 | volatile u64 ready = 0; | 106 | struct iommu_cmd cmd; |
83 | unsigned long ready_phys = virt_to_phys(&ready); | 107 | unsigned long flags, i = 0; |
84 | 108 | ||
85 | memset(&cmd, 0, sizeof(cmd)); | 109 | memset(&cmd, 0, sizeof(cmd)); |
86 | cmd.data[0] = LOW_U32(ready_phys) | CMD_COMPL_WAIT_STORE_MASK; | 110 | cmd.data[0] = CMD_COMPL_WAIT_INT_MASK; |
87 | cmd.data[1] = HIGH_U32(ready_phys); | ||
88 | cmd.data[2] = 1; /* value written to 'ready' */ | ||
89 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); | 111 | CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT); |
90 | 112 | ||
91 | iommu->need_sync = 0; | 113 | iommu->need_sync = 0; |
92 | 114 | ||
93 | ret = iommu_queue_command(iommu, &cmd); | 115 | spin_lock_irqsave(&iommu->lock, flags); |
116 | |||
117 | ret = __iommu_queue_command(iommu, &cmd); | ||
94 | 118 | ||
95 | if (ret) | 119 | if (ret) |
96 | return ret; | 120 | goto out; |
121 | |||
122 | while (!ready && (i < EXIT_LOOP_COUNT)) { | ||
123 | ++i; | ||
124 | /* wait for the bit to become one */ | ||
125 | status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
126 | ready = status & MMIO_STATUS_COM_WAIT_INT_MASK; | ||
127 | } | ||
97 | 128 | ||
98 | while (!ready) | 129 | /* set bit back to zero */ |
99 | cpu_relax(); | 130 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
131 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | ||
132 | |||
133 | if (unlikely((i == EXIT_LOOP_COUNT) && printk_ratelimit())) | ||
134 | printk(KERN_WARNING "AMD IOMMU: Completion wait loop failed\n"); | ||
135 | out: | ||
136 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
100 | 137 | ||
101 | return 0; | 138 | return 0; |
102 | } | 139 | } |
103 | 140 | ||
141 | /* | ||
142 | * Command send function for invalidating a device table entry | ||
143 | */ | ||
104 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | 144 | static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) |
105 | { | 145 | { |
106 | struct command cmd; | 146 | struct iommu_cmd cmd; |
147 | int ret; | ||
107 | 148 | ||
108 | BUG_ON(iommu == NULL); | 149 | BUG_ON(iommu == NULL); |
109 | 150 | ||
@@ -111,37 +152,50 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid) | |||
111 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); | 152 | CMD_SET_TYPE(&cmd, CMD_INV_DEV_ENTRY); |
112 | cmd.data[0] = devid; | 153 | cmd.data[0] = devid; |
113 | 154 | ||
155 | ret = iommu_queue_command(iommu, &cmd); | ||
156 | |||
114 | iommu->need_sync = 1; | 157 | iommu->need_sync = 1; |
115 | 158 | ||
116 | return iommu_queue_command(iommu, &cmd); | 159 | return ret; |
117 | } | 160 | } |
118 | 161 | ||
162 | /* | ||
163 | * Generic command send function for invalidaing TLB entries | ||
164 | */ | ||
119 | static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, | 165 | static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu, |
120 | u64 address, u16 domid, int pde, int s) | 166 | u64 address, u16 domid, int pde, int s) |
121 | { | 167 | { |
122 | struct command cmd; | 168 | struct iommu_cmd cmd; |
169 | int ret; | ||
123 | 170 | ||
124 | memset(&cmd, 0, sizeof(cmd)); | 171 | memset(&cmd, 0, sizeof(cmd)); |
125 | address &= PAGE_MASK; | 172 | address &= PAGE_MASK; |
126 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); | 173 | CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES); |
127 | cmd.data[1] |= domid; | 174 | cmd.data[1] |= domid; |
128 | cmd.data[2] = LOW_U32(address); | 175 | cmd.data[2] = lower_32_bits(address); |
129 | cmd.data[3] = HIGH_U32(address); | 176 | cmd.data[3] = upper_32_bits(address); |
130 | if (s) | 177 | if (s) /* size bit - we flush more than one 4kb page */ |
131 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; | 178 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; |
132 | if (pde) | 179 | if (pde) /* PDE bit - we wan't flush everything not only the PTEs */ |
133 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; | 180 | cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; |
134 | 181 | ||
182 | ret = iommu_queue_command(iommu, &cmd); | ||
183 | |||
135 | iommu->need_sync = 1; | 184 | iommu->need_sync = 1; |
136 | 185 | ||
137 | return iommu_queue_command(iommu, &cmd); | 186 | return ret; |
138 | } | 187 | } |
139 | 188 | ||
189 | /* | ||
190 | * TLB invalidation function which is called from the mapping functions. | ||
191 | * It invalidates a single PTE if the range to flush is within a single | ||
192 | * page. Otherwise it flushes the whole TLB of the IOMMU. | ||
193 | */ | ||
140 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | 194 | static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, |
141 | u64 address, size_t size) | 195 | u64 address, size_t size) |
142 | { | 196 | { |
143 | int s = 0; | 197 | int s = 0; |
144 | unsigned pages = to_pages(address, size); | 198 | unsigned pages = iommu_num_pages(address, size); |
145 | 199 | ||
146 | address &= PAGE_MASK; | 200 | address &= PAGE_MASK; |
147 | 201 | ||
@@ -159,6 +213,20 @@ static int iommu_flush_pages(struct amd_iommu *iommu, u16 domid, | |||
159 | return 0; | 213 | return 0; |
160 | } | 214 | } |
161 | 215 | ||
216 | /**************************************************************************** | ||
217 | * | ||
218 | * The functions below are used the create the page table mappings for | ||
219 | * unity mapped regions. | ||
220 | * | ||
221 | ****************************************************************************/ | ||
222 | |||
223 | /* | ||
224 | * Generic mapping functions. It maps a physical address into a DMA | ||
225 | * address space. It allocates the page table pages if necessary. | ||
226 | * In the future it can be extended to a generic mapping function | ||
227 | * supporting all features of AMD IOMMU page tables like level skipping | ||
228 | * and full 64 bit address spaces. | ||
229 | */ | ||
162 | static int iommu_map(struct protection_domain *dom, | 230 | static int iommu_map(struct protection_domain *dom, |
163 | unsigned long bus_addr, | 231 | unsigned long bus_addr, |
164 | unsigned long phys_addr, | 232 | unsigned long phys_addr, |
@@ -209,6 +277,10 @@ static int iommu_map(struct protection_domain *dom, | |||
209 | return 0; | 277 | return 0; |
210 | } | 278 | } |
211 | 279 | ||
280 | /* | ||
281 | * This function checks if a specific unity mapping entry is needed for | ||
282 | * this specific IOMMU. | ||
283 | */ | ||
212 | static int iommu_for_unity_map(struct amd_iommu *iommu, | 284 | static int iommu_for_unity_map(struct amd_iommu *iommu, |
213 | struct unity_map_entry *entry) | 285 | struct unity_map_entry *entry) |
214 | { | 286 | { |
@@ -223,6 +295,12 @@ static int iommu_for_unity_map(struct amd_iommu *iommu, | |||
223 | return 0; | 295 | return 0; |
224 | } | 296 | } |
225 | 297 | ||
298 | /* | ||
299 | * Init the unity mappings for a specific IOMMU in the system | ||
300 | * | ||
301 | * Basically iterates over all unity mapping entries and applies them to | ||
302 | * the default domain DMA of that IOMMU if necessary. | ||
303 | */ | ||
226 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) | 304 | static int iommu_init_unity_mappings(struct amd_iommu *iommu) |
227 | { | 305 | { |
228 | struct unity_map_entry *entry; | 306 | struct unity_map_entry *entry; |
@@ -239,6 +317,10 @@ static int iommu_init_unity_mappings(struct amd_iommu *iommu) | |||
239 | return 0; | 317 | return 0; |
240 | } | 318 | } |
241 | 319 | ||
320 | /* | ||
321 | * This function actually applies the mapping to the page table of the | ||
322 | * dma_ops domain. | ||
323 | */ | ||
242 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 324 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
243 | struct unity_map_entry *e) | 325 | struct unity_map_entry *e) |
244 | { | 326 | { |
@@ -261,6 +343,9 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
261 | return 0; | 343 | return 0; |
262 | } | 344 | } |
263 | 345 | ||
346 | /* | ||
347 | * Inits the unity mappings required for a specific device | ||
348 | */ | ||
264 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | 349 | static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, |
265 | u16 devid) | 350 | u16 devid) |
266 | { | 351 | { |
@@ -278,12 +363,26 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
278 | return 0; | 363 | return 0; |
279 | } | 364 | } |
280 | 365 | ||
366 | /**************************************************************************** | ||
367 | * | ||
368 | * The next functions belong to the address allocator for the dma_ops | ||
369 | * interface functions. They work like the allocators in the other IOMMU | ||
370 | * drivers. Its basically a bitmap which marks the allocated pages in | ||
371 | * the aperture. Maybe it could be enhanced in the future to a more | ||
372 | * efficient allocator. | ||
373 | * | ||
374 | ****************************************************************************/ | ||
281 | static unsigned long dma_mask_to_pages(unsigned long mask) | 375 | static unsigned long dma_mask_to_pages(unsigned long mask) |
282 | { | 376 | { |
283 | return (mask >> PAGE_SHIFT) + | 377 | return (mask >> PAGE_SHIFT) + |
284 | (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); | 378 | (PAGE_ALIGN(mask & ~PAGE_MASK) >> PAGE_SHIFT); |
285 | } | 379 | } |
286 | 380 | ||
381 | /* | ||
382 | * The address allocator core function. | ||
383 | * | ||
384 | * called with domain->lock held | ||
385 | */ | ||
287 | static unsigned long dma_ops_alloc_addresses(struct device *dev, | 386 | static unsigned long dma_ops_alloc_addresses(struct device *dev, |
288 | struct dma_ops_domain *dom, | 387 | struct dma_ops_domain *dom, |
289 | unsigned int pages) | 388 | unsigned int pages) |
@@ -317,6 +416,11 @@ static unsigned long dma_ops_alloc_addresses(struct device *dev, | |||
317 | return address; | 416 | return address; |
318 | } | 417 | } |
319 | 418 | ||
419 | /* | ||
420 | * The address free function. | ||
421 | * | ||
422 | * called with domain->lock held | ||
423 | */ | ||
320 | static void dma_ops_free_addresses(struct dma_ops_domain *dom, | 424 | static void dma_ops_free_addresses(struct dma_ops_domain *dom, |
321 | unsigned long address, | 425 | unsigned long address, |
322 | unsigned int pages) | 426 | unsigned int pages) |
@@ -325,6 +429,16 @@ static void dma_ops_free_addresses(struct dma_ops_domain *dom, | |||
325 | iommu_area_free(dom->bitmap, address, pages); | 429 | iommu_area_free(dom->bitmap, address, pages); |
326 | } | 430 | } |
327 | 431 | ||
432 | /**************************************************************************** | ||
433 | * | ||
434 | * The next functions belong to the domain allocation. A domain is | ||
435 | * allocated for every IOMMU as the default domain. If device isolation | ||
436 | * is enabled, every device get its own domain. The most important thing | ||
437 | * about domains is the page table mapping the DMA address space they | ||
438 | * contain. | ||
439 | * | ||
440 | ****************************************************************************/ | ||
441 | |||
328 | static u16 domain_id_alloc(void) | 442 | static u16 domain_id_alloc(void) |
329 | { | 443 | { |
330 | unsigned long flags; | 444 | unsigned long flags; |
@@ -342,6 +456,10 @@ static u16 domain_id_alloc(void) | |||
342 | return id; | 456 | return id; |
343 | } | 457 | } |
344 | 458 | ||
459 | /* | ||
460 | * Used to reserve address ranges in the aperture (e.g. for exclusion | ||
461 | * ranges. | ||
462 | */ | ||
345 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | 463 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
346 | unsigned long start_page, | 464 | unsigned long start_page, |
347 | unsigned int pages) | 465 | unsigned int pages) |
@@ -382,6 +500,10 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom) | |||
382 | free_page((unsigned long)p1); | 500 | free_page((unsigned long)p1); |
383 | } | 501 | } |
384 | 502 | ||
503 | /* | ||
504 | * Free a domain, only used if something went wrong in the | ||
505 | * allocation path and we need to free an already allocated page table | ||
506 | */ | ||
385 | static void dma_ops_domain_free(struct dma_ops_domain *dom) | 507 | static void dma_ops_domain_free(struct dma_ops_domain *dom) |
386 | { | 508 | { |
387 | if (!dom) | 509 | if (!dom) |
@@ -396,6 +518,11 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom) | |||
396 | kfree(dom); | 518 | kfree(dom); |
397 | } | 519 | } |
398 | 520 | ||
521 | /* | ||
522 | * Allocates a new protection domain usable for the dma_ops functions. | ||
523 | * It also intializes the page table and the address allocator data | ||
524 | * structures required for the dma_ops interface | ||
525 | */ | ||
399 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | 526 | static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, |
400 | unsigned order) | 527 | unsigned order) |
401 | { | 528 | { |
@@ -436,14 +563,20 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu, | |||
436 | dma_dom->bitmap[0] = 1; | 563 | dma_dom->bitmap[0] = 1; |
437 | dma_dom->next_bit = 0; | 564 | dma_dom->next_bit = 0; |
438 | 565 | ||
566 | /* Intialize the exclusion range if necessary */ | ||
439 | if (iommu->exclusion_start && | 567 | if (iommu->exclusion_start && |
440 | iommu->exclusion_start < dma_dom->aperture_size) { | 568 | iommu->exclusion_start < dma_dom->aperture_size) { |
441 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; | 569 | unsigned long startpage = iommu->exclusion_start >> PAGE_SHIFT; |
442 | int pages = to_pages(iommu->exclusion_start, | 570 | int pages = iommu_num_pages(iommu->exclusion_start, |
443 | iommu->exclusion_length); | 571 | iommu->exclusion_length); |
444 | dma_ops_reserve_addresses(dma_dom, startpage, pages); | 572 | dma_ops_reserve_addresses(dma_dom, startpage, pages); |
445 | } | 573 | } |
446 | 574 | ||
575 | /* | ||
576 | * At the last step, build the page tables so we don't need to | ||
577 | * allocate page table pages in the dma_ops mapping/unmapping | ||
578 | * path. | ||
579 | */ | ||
447 | num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); | 580 | num_pte_pages = dma_dom->aperture_size / (PAGE_SIZE * 512); |
448 | dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), | 581 | dma_dom->pte_pages = kzalloc(num_pte_pages * sizeof(void *), |
449 | GFP_KERNEL); | 582 | GFP_KERNEL); |
@@ -472,6 +605,10 @@ free_dma_dom: | |||
472 | return NULL; | 605 | return NULL; |
473 | } | 606 | } |
474 | 607 | ||
608 | /* | ||
609 | * Find out the protection domain structure for a given PCI device. This | ||
610 | * will give us the pointer to the page table root for example. | ||
611 | */ | ||
475 | static struct protection_domain *domain_for_device(u16 devid) | 612 | static struct protection_domain *domain_for_device(u16 devid) |
476 | { | 613 | { |
477 | struct protection_domain *dom; | 614 | struct protection_domain *dom; |
@@ -484,6 +621,10 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
484 | return dom; | 621 | return dom; |
485 | } | 622 | } |
486 | 623 | ||
624 | /* | ||
625 | * If a device is not yet associated with a domain, this function does | ||
626 | * assigns it visible for the hardware | ||
627 | */ | ||
487 | static void set_device_domain(struct amd_iommu *iommu, | 628 | static void set_device_domain(struct amd_iommu *iommu, |
488 | struct protection_domain *domain, | 629 | struct protection_domain *domain, |
489 | u16 devid) | 630 | u16 devid) |
@@ -508,6 +649,19 @@ static void set_device_domain(struct amd_iommu *iommu, | |||
508 | iommu->need_sync = 1; | 649 | iommu->need_sync = 1; |
509 | } | 650 | } |
510 | 651 | ||
652 | /***************************************************************************** | ||
653 | * | ||
654 | * The next functions belong to the dma_ops mapping/unmapping code. | ||
655 | * | ||
656 | *****************************************************************************/ | ||
657 | |||
658 | /* | ||
659 | * In the dma_ops path we only have the struct device. This function | ||
660 | * finds the corresponding IOMMU, the protection domain and the | ||
661 | * requestor id for a given device. | ||
662 | * If the device is not yet associated with a domain this is also done | ||
663 | * in this function. | ||
664 | */ | ||
511 | static int get_device_resources(struct device *dev, | 665 | static int get_device_resources(struct device *dev, |
512 | struct amd_iommu **iommu, | 666 | struct amd_iommu **iommu, |
513 | struct protection_domain **domain, | 667 | struct protection_domain **domain, |
@@ -520,9 +674,10 @@ static int get_device_resources(struct device *dev, | |||
520 | BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); | 674 | BUG_ON(!dev || dev->bus != &pci_bus_type || !dev->dma_mask); |
521 | 675 | ||
522 | pcidev = to_pci_dev(dev); | 676 | pcidev = to_pci_dev(dev); |
523 | _bdf = (pcidev->bus->number << 8) | pcidev->devfn; | 677 | _bdf = calc_devid(pcidev->bus->number, pcidev->devfn); |
524 | 678 | ||
525 | if (_bdf >= amd_iommu_last_bdf) { | 679 | /* device not translated by any IOMMU in the system? */ |
680 | if (_bdf > amd_iommu_last_bdf) { | ||
526 | *iommu = NULL; | 681 | *iommu = NULL; |
527 | *domain = NULL; | 682 | *domain = NULL; |
528 | *bdf = 0xffff; | 683 | *bdf = 0xffff; |
@@ -547,6 +702,10 @@ static int get_device_resources(struct device *dev, | |||
547 | return 1; | 702 | return 1; |
548 | } | 703 | } |
549 | 704 | ||
705 | /* | ||
706 | * This is the generic map function. It maps one 4kb page at paddr to | ||
707 | * the given address in the DMA address space for the domain. | ||
708 | */ | ||
550 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | 709 | static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, |
551 | struct dma_ops_domain *dom, | 710 | struct dma_ops_domain *dom, |
552 | unsigned long address, | 711 | unsigned long address, |
@@ -578,6 +737,9 @@ static dma_addr_t dma_ops_domain_map(struct amd_iommu *iommu, | |||
578 | return (dma_addr_t)address; | 737 | return (dma_addr_t)address; |
579 | } | 738 | } |
580 | 739 | ||
740 | /* | ||
741 | * The generic unmapping function for on page in the DMA address space. | ||
742 | */ | ||
581 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, | 743 | static void dma_ops_domain_unmap(struct amd_iommu *iommu, |
582 | struct dma_ops_domain *dom, | 744 | struct dma_ops_domain *dom, |
583 | unsigned long address) | 745 | unsigned long address) |
@@ -597,6 +759,12 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
597 | *pte = 0ULL; | 759 | *pte = 0ULL; |
598 | } | 760 | } |
599 | 761 | ||
762 | /* | ||
763 | * This function contains common code for mapping of a physically | ||
764 | * contiguous memory region into DMA address space. It is uses by all | ||
765 | * mapping functions provided by this IOMMU driver. | ||
766 | * Must be called with the domain lock held. | ||
767 | */ | ||
600 | static dma_addr_t __map_single(struct device *dev, | 768 | static dma_addr_t __map_single(struct device *dev, |
601 | struct amd_iommu *iommu, | 769 | struct amd_iommu *iommu, |
602 | struct dma_ops_domain *dma_dom, | 770 | struct dma_ops_domain *dma_dom, |
@@ -609,7 +777,7 @@ static dma_addr_t __map_single(struct device *dev, | |||
609 | unsigned int pages; | 777 | unsigned int pages; |
610 | int i; | 778 | int i; |
611 | 779 | ||
612 | pages = to_pages(paddr, size); | 780 | pages = iommu_num_pages(paddr, size); |
613 | paddr &= PAGE_MASK; | 781 | paddr &= PAGE_MASK; |
614 | 782 | ||
615 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); | 783 | address = dma_ops_alloc_addresses(dev, dma_dom, pages); |
@@ -628,6 +796,10 @@ out: | |||
628 | return address; | 796 | return address; |
629 | } | 797 | } |
630 | 798 | ||
799 | /* | ||
800 | * Does the reverse of the __map_single function. Must be called with | ||
801 | * the domain lock held too | ||
802 | */ | ||
631 | static void __unmap_single(struct amd_iommu *iommu, | 803 | static void __unmap_single(struct amd_iommu *iommu, |
632 | struct dma_ops_domain *dma_dom, | 804 | struct dma_ops_domain *dma_dom, |
633 | dma_addr_t dma_addr, | 805 | dma_addr_t dma_addr, |
@@ -640,7 +812,7 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
640 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) | 812 | if ((dma_addr == 0) || (dma_addr + size > dma_dom->aperture_size)) |
641 | return; | 813 | return; |
642 | 814 | ||
643 | pages = to_pages(dma_addr, size); | 815 | pages = iommu_num_pages(dma_addr, size); |
644 | dma_addr &= PAGE_MASK; | 816 | dma_addr &= PAGE_MASK; |
645 | start = dma_addr; | 817 | start = dma_addr; |
646 | 818 | ||
@@ -652,6 +824,9 @@ static void __unmap_single(struct amd_iommu *iommu, | |||
652 | dma_ops_free_addresses(dma_dom, dma_addr, pages); | 824 | dma_ops_free_addresses(dma_dom, dma_addr, pages); |
653 | } | 825 | } |
654 | 826 | ||
827 | /* | ||
828 | * The exported map_single function for dma_ops. | ||
829 | */ | ||
655 | static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | 830 | static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, |
656 | size_t size, int dir) | 831 | size_t size, int dir) |
657 | { | 832 | { |
@@ -664,6 +839,7 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr, | |||
664 | get_device_resources(dev, &iommu, &domain, &devid); | 839 | get_device_resources(dev, &iommu, &domain, &devid); |
665 | 840 | ||
666 | if (iommu == NULL || domain == NULL) | 841 | if (iommu == NULL || domain == NULL) |
842 | /* device not handled by any AMD IOMMU */ | ||
667 | return (dma_addr_t)paddr; | 843 | return (dma_addr_t)paddr; |
668 | 844 | ||
669 | spin_lock_irqsave(&domain->lock, flags); | 845 | spin_lock_irqsave(&domain->lock, flags); |
@@ -683,6 +859,9 @@ out: | |||
683 | return addr; | 859 | return addr; |
684 | } | 860 | } |
685 | 861 | ||
862 | /* | ||
863 | * The exported unmap_single function for dma_ops. | ||
864 | */ | ||
686 | static void unmap_single(struct device *dev, dma_addr_t dma_addr, | 865 | static void unmap_single(struct device *dev, dma_addr_t dma_addr, |
687 | size_t size, int dir) | 866 | size_t size, int dir) |
688 | { | 867 | { |
@@ -692,6 +871,7 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
692 | u16 devid; | 871 | u16 devid; |
693 | 872 | ||
694 | if (!get_device_resources(dev, &iommu, &domain, &devid)) | 873 | if (!get_device_resources(dev, &iommu, &domain, &devid)) |
874 | /* device not handled by any AMD IOMMU */ | ||
695 | return; | 875 | return; |
696 | 876 | ||
697 | spin_lock_irqsave(&domain->lock, flags); | 877 | spin_lock_irqsave(&domain->lock, flags); |
@@ -706,6 +886,10 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
706 | spin_unlock_irqrestore(&domain->lock, flags); | 886 | spin_unlock_irqrestore(&domain->lock, flags); |
707 | } | 887 | } |
708 | 888 | ||
889 | /* | ||
890 | * This is a special map_sg function which is used if we should map a | ||
891 | * device which is not handled by an AMD IOMMU in the system. | ||
892 | */ | ||
709 | static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, | 893 | static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, |
710 | int nelems, int dir) | 894 | int nelems, int dir) |
711 | { | 895 | { |
@@ -720,6 +904,10 @@ static int map_sg_no_iommu(struct device *dev, struct scatterlist *sglist, | |||
720 | return nelems; | 904 | return nelems; |
721 | } | 905 | } |
722 | 906 | ||
907 | /* | ||
908 | * The exported map_sg function for dma_ops (handles scatter-gather | ||
909 | * lists). | ||
910 | */ | ||
723 | static int map_sg(struct device *dev, struct scatterlist *sglist, | 911 | static int map_sg(struct device *dev, struct scatterlist *sglist, |
724 | int nelems, int dir) | 912 | int nelems, int dir) |
725 | { | 913 | { |
@@ -775,6 +963,10 @@ unmap: | |||
775 | goto out; | 963 | goto out; |
776 | } | 964 | } |
777 | 965 | ||
966 | /* | ||
967 | * The exported map_sg function for dma_ops (handles scatter-gather | ||
968 | * lists). | ||
969 | */ | ||
778 | static void unmap_sg(struct device *dev, struct scatterlist *sglist, | 970 | static void unmap_sg(struct device *dev, struct scatterlist *sglist, |
779 | int nelems, int dir) | 971 | int nelems, int dir) |
780 | { | 972 | { |
@@ -804,6 +996,9 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist, | |||
804 | spin_unlock_irqrestore(&domain->lock, flags); | 996 | spin_unlock_irqrestore(&domain->lock, flags); |
805 | } | 997 | } |
806 | 998 | ||
999 | /* | ||
1000 | * The exported alloc_coherent function for dma_ops. | ||
1001 | */ | ||
807 | static void *alloc_coherent(struct device *dev, size_t size, | 1002 | static void *alloc_coherent(struct device *dev, size_t size, |
808 | dma_addr_t *dma_addr, gfp_t flag) | 1003 | dma_addr_t *dma_addr, gfp_t flag) |
809 | { | 1004 | { |
@@ -851,6 +1046,11 @@ out: | |||
851 | return virt_addr; | 1046 | return virt_addr; |
852 | } | 1047 | } |
853 | 1048 | ||
1049 | /* | ||
1050 | * The exported free_coherent function for dma_ops. | ||
1051 | * FIXME: fix the generic x86 DMA layer so that it actually calls that | ||
1052 | * function. | ||
1053 | */ | ||
854 | static void free_coherent(struct device *dev, size_t size, | 1054 | static void free_coherent(struct device *dev, size_t size, |
855 | void *virt_addr, dma_addr_t dma_addr) | 1055 | void *virt_addr, dma_addr_t dma_addr) |
856 | { | 1056 | { |
@@ -879,6 +1079,8 @@ free_mem: | |||
879 | } | 1079 | } |
880 | 1080 | ||
881 | /* | 1081 | /* |
1082 | * The function for pre-allocating protection domains. | ||
1083 | * | ||
882 | * If the driver core informs the DMA layer if a driver grabs a device | 1084 | * If the driver core informs the DMA layer if a driver grabs a device |
883 | * we don't need to preallocate the protection domains anymore. | 1085 | * we don't need to preallocate the protection domains anymore. |
884 | * For now we have to. | 1086 | * For now we have to. |
@@ -893,7 +1095,7 @@ void prealloc_protection_domains(void) | |||
893 | 1095 | ||
894 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | 1096 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
895 | devid = (dev->bus->number << 8) | dev->devfn; | 1097 | devid = (dev->bus->number << 8) | dev->devfn; |
896 | if (devid >= amd_iommu_last_bdf) | 1098 | if (devid > amd_iommu_last_bdf) |
897 | continue; | 1099 | continue; |
898 | devid = amd_iommu_alias_table[devid]; | 1100 | devid = amd_iommu_alias_table[devid]; |
899 | if (domain_for_device(devid)) | 1101 | if (domain_for_device(devid)) |
@@ -921,12 +1123,20 @@ static struct dma_mapping_ops amd_iommu_dma_ops = { | |||
921 | .unmap_sg = unmap_sg, | 1123 | .unmap_sg = unmap_sg, |
922 | }; | 1124 | }; |
923 | 1125 | ||
1126 | /* | ||
1127 | * The function which clues the AMD IOMMU driver into dma_ops. | ||
1128 | */ | ||
924 | int __init amd_iommu_init_dma_ops(void) | 1129 | int __init amd_iommu_init_dma_ops(void) |
925 | { | 1130 | { |
926 | struct amd_iommu *iommu; | 1131 | struct amd_iommu *iommu; |
927 | int order = amd_iommu_aperture_order; | 1132 | int order = amd_iommu_aperture_order; |
928 | int ret; | 1133 | int ret; |
929 | 1134 | ||
1135 | /* | ||
1136 | * first allocate a default protection domain for every IOMMU we | ||
1137 | * found in the system. Devices not assigned to any other | ||
1138 | * protection domain will be assigned to the default one. | ||
1139 | */ | ||
930 | list_for_each_entry(iommu, &amd_iommu_list, list) { | 1140 | list_for_each_entry(iommu, &amd_iommu_list, list) { |
931 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); | 1141 | iommu->default_dom = dma_ops_domain_alloc(iommu, order); |
932 | if (iommu->default_dom == NULL) | 1142 | if (iommu->default_dom == NULL) |
@@ -936,6 +1146,10 @@ int __init amd_iommu_init_dma_ops(void) | |||
936 | goto free_domains; | 1146 | goto free_domains; |
937 | } | 1147 | } |
938 | 1148 | ||
1149 | /* | ||
1150 | * If device isolation is enabled, pre-allocate the protection | ||
1151 | * domains for each device. | ||
1152 | */ | ||
939 | if (amd_iommu_isolate) | 1153 | if (amd_iommu_isolate) |
940 | prealloc_protection_domains(); | 1154 | prealloc_protection_domains(); |
941 | 1155 | ||
@@ -947,6 +1161,7 @@ int __init amd_iommu_init_dma_ops(void) | |||
947 | gart_iommu_aperture = 0; | 1161 | gart_iommu_aperture = 0; |
948 | #endif | 1162 | #endif |
949 | 1163 | ||
1164 | /* Make the driver finally visible to the drivers */ | ||
950 | dma_ops = &amd_iommu_dma_ops; | 1165 | dma_ops = &amd_iommu_dma_ops; |
951 | 1166 | ||
952 | return 0; | 1167 | return 0; |
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 2a13e430437d..a69cc0f52042 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -25,20 +25,13 @@ | |||
25 | #include <asm/pci-direct.h> | 25 | #include <asm/pci-direct.h> |
26 | #include <asm/amd_iommu_types.h> | 26 | #include <asm/amd_iommu_types.h> |
27 | #include <asm/amd_iommu.h> | 27 | #include <asm/amd_iommu.h> |
28 | #include <asm/gart.h> | 28 | #include <asm/iommu.h> |
29 | 29 | ||
30 | /* | 30 | /* |
31 | * definitions for the ACPI scanning code | 31 | * definitions for the ACPI scanning code |
32 | */ | 32 | */ |
33 | #define UPDATE_LAST_BDF(x) do {\ | ||
34 | if ((x) > amd_iommu_last_bdf) \ | ||
35 | amd_iommu_last_bdf = (x); \ | ||
36 | } while (0); | ||
37 | |||
38 | #define DEVID(bus, devfn) (((bus) << 8) | (devfn)) | ||
39 | #define PCI_BUS(x) (((x) >> 8) & 0xff) | 33 | #define PCI_BUS(x) (((x) >> 8) & 0xff) |
40 | #define IVRS_HEADER_LENGTH 48 | 34 | #define IVRS_HEADER_LENGTH 48 |
41 | #define TBL_SIZE(x) (1 << (PAGE_SHIFT + get_order(amd_iommu_last_bdf * (x)))) | ||
42 | 35 | ||
43 | #define ACPI_IVHD_TYPE 0x10 | 36 | #define ACPI_IVHD_TYPE 0x10 |
44 | #define ACPI_IVMD_TYPE_ALL 0x20 | 37 | #define ACPI_IVMD_TYPE_ALL 0x20 |
@@ -71,6 +64,17 @@ | |||
71 | #define ACPI_DEVFLAG_LINT1 0x80 | 64 | #define ACPI_DEVFLAG_LINT1 0x80 |
72 | #define ACPI_DEVFLAG_ATSDIS 0x10000000 | 65 | #define ACPI_DEVFLAG_ATSDIS 0x10000000 |
73 | 66 | ||
67 | /* | ||
68 | * ACPI table definitions | ||
69 | * | ||
70 | * These data structures are laid over the table to parse the important values | ||
71 | * out of it. | ||
72 | */ | ||
73 | |||
74 | /* | ||
75 | * structure describing one IOMMU in the ACPI table. Typically followed by one | ||
76 | * or more ivhd_entrys. | ||
77 | */ | ||
74 | struct ivhd_header { | 78 | struct ivhd_header { |
75 | u8 type; | 79 | u8 type; |
76 | u8 flags; | 80 | u8 flags; |
@@ -83,6 +87,10 @@ struct ivhd_header { | |||
83 | u32 reserved; | 87 | u32 reserved; |
84 | } __attribute__((packed)); | 88 | } __attribute__((packed)); |
85 | 89 | ||
90 | /* | ||
91 | * A device entry describing which devices a specific IOMMU translates and | ||
92 | * which requestor ids they use. | ||
93 | */ | ||
86 | struct ivhd_entry { | 94 | struct ivhd_entry { |
87 | u8 type; | 95 | u8 type; |
88 | u16 devid; | 96 | u16 devid; |
@@ -90,6 +98,10 @@ struct ivhd_entry { | |||
90 | u32 ext; | 98 | u32 ext; |
91 | } __attribute__((packed)); | 99 | } __attribute__((packed)); |
92 | 100 | ||
101 | /* | ||
102 | * An AMD IOMMU memory definition structure. It defines things like exclusion | ||
103 | * ranges for devices and regions that should be unity mapped. | ||
104 | */ | ||
93 | struct ivmd_header { | 105 | struct ivmd_header { |
94 | u8 type; | 106 | u8 type; |
95 | u8 flags; | 107 | u8 flags; |
@@ -103,22 +115,80 @@ struct ivmd_header { | |||
103 | 115 | ||
104 | static int __initdata amd_iommu_detected; | 116 | static int __initdata amd_iommu_detected; |
105 | 117 | ||
106 | u16 amd_iommu_last_bdf; | 118 | u16 amd_iommu_last_bdf; /* largest PCI device id we have |
107 | struct list_head amd_iommu_unity_map; | 119 | to handle */ |
108 | unsigned amd_iommu_aperture_order = 26; | 120 | LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings |
109 | int amd_iommu_isolate; | 121 | we find in ACPI */ |
122 | unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */ | ||
123 | int amd_iommu_isolate; /* if 1, device isolation is enabled */ | ||
124 | |||
125 | LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the | ||
126 | system */ | ||
110 | 127 | ||
111 | struct list_head amd_iommu_list; | 128 | /* |
129 | * Pointer to the device table which is shared by all AMD IOMMUs | ||
130 | * it is indexed by the PCI device id or the HT unit id and contains | ||
131 | * information about the domain the device belongs to as well as the | ||
132 | * page table root pointer. | ||
133 | */ | ||
112 | struct dev_table_entry *amd_iommu_dev_table; | 134 | struct dev_table_entry *amd_iommu_dev_table; |
135 | |||
136 | /* | ||
137 | * The alias table is a driver specific data structure which contains the | ||
138 | * mappings of the PCI device ids to the actual requestor ids on the IOMMU. | ||
139 | * More than one device can share the same requestor id. | ||
140 | */ | ||
113 | u16 *amd_iommu_alias_table; | 141 | u16 *amd_iommu_alias_table; |
142 | |||
143 | /* | ||
144 | * The rlookup table is used to find the IOMMU which is responsible | ||
145 | * for a specific device. It is also indexed by the PCI device id. | ||
146 | */ | ||
114 | struct amd_iommu **amd_iommu_rlookup_table; | 147 | struct amd_iommu **amd_iommu_rlookup_table; |
148 | |||
149 | /* | ||
150 | * The pd table (protection domain table) is used to find the protection domain | ||
151 | * data structure a device belongs to. Indexed with the PCI device id too. | ||
152 | */ | ||
115 | struct protection_domain **amd_iommu_pd_table; | 153 | struct protection_domain **amd_iommu_pd_table; |
154 | |||
155 | /* | ||
156 | * AMD IOMMU allows up to 2^16 differend protection domains. This is a bitmap | ||
157 | * to know which ones are already in use. | ||
158 | */ | ||
116 | unsigned long *amd_iommu_pd_alloc_bitmap; | 159 | unsigned long *amd_iommu_pd_alloc_bitmap; |
117 | 160 | ||
118 | static u32 dev_table_size; | 161 | static u32 dev_table_size; /* size of the device table */ |
119 | static u32 alias_table_size; | 162 | static u32 alias_table_size; /* size of the alias table */ |
120 | static u32 rlookup_table_size; | 163 | static u32 rlookup_table_size; /* size if the rlookup table */ |
164 | |||
165 | static inline void update_last_devid(u16 devid) | ||
166 | { | ||
167 | if (devid > amd_iommu_last_bdf) | ||
168 | amd_iommu_last_bdf = devid; | ||
169 | } | ||
170 | |||
171 | static inline unsigned long tbl_size(int entry_size) | ||
172 | { | ||
173 | unsigned shift = PAGE_SHIFT + | ||
174 | get_order(amd_iommu_last_bdf * entry_size); | ||
175 | |||
176 | return 1UL << shift; | ||
177 | } | ||
178 | |||
179 | /**************************************************************************** | ||
180 | * | ||
181 | * AMD IOMMU MMIO register space handling functions | ||
182 | * | ||
183 | * These functions are used to program the IOMMU device registers in | ||
184 | * MMIO space required for that driver. | ||
185 | * | ||
186 | ****************************************************************************/ | ||
121 | 187 | ||
188 | /* | ||
189 | * This function set the exclusion range in the IOMMU. DMA accesses to the | ||
190 | * exclusion range are passed through untranslated | ||
191 | */ | ||
122 | static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) | 192 | static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) |
123 | { | 193 | { |
124 | u64 start = iommu->exclusion_start & PAGE_MASK; | 194 | u64 start = iommu->exclusion_start & PAGE_MASK; |
@@ -137,6 +207,7 @@ static void __init iommu_set_exclusion_range(struct amd_iommu *iommu) | |||
137 | &entry, sizeof(entry)); | 207 | &entry, sizeof(entry)); |
138 | } | 208 | } |
139 | 209 | ||
210 | /* Programs the physical address of the device table into the IOMMU hardware */ | ||
140 | static void __init iommu_set_device_table(struct amd_iommu *iommu) | 211 | static void __init iommu_set_device_table(struct amd_iommu *iommu) |
141 | { | 212 | { |
142 | u32 entry; | 213 | u32 entry; |
@@ -149,6 +220,7 @@ static void __init iommu_set_device_table(struct amd_iommu *iommu) | |||
149 | &entry, sizeof(entry)); | 220 | &entry, sizeof(entry)); |
150 | } | 221 | } |
151 | 222 | ||
223 | /* Generic functions to enable/disable certain features of the IOMMU. */ | ||
152 | static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) | 224 | static void __init iommu_feature_enable(struct amd_iommu *iommu, u8 bit) |
153 | { | 225 | { |
154 | u32 ctrl; | 226 | u32 ctrl; |
@@ -167,6 +239,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
167 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); | 239 | writel(ctrl, iommu->mmio_base + MMIO_CONTROL_OFFSET); |
168 | } | 240 | } |
169 | 241 | ||
242 | /* Function to enable the hardware */ | ||
170 | void __init iommu_enable(struct amd_iommu *iommu) | 243 | void __init iommu_enable(struct amd_iommu *iommu) |
171 | { | 244 | { |
172 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); | 245 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at "); |
@@ -176,6 +249,10 @@ void __init iommu_enable(struct amd_iommu *iommu) | |||
176 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 249 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
177 | } | 250 | } |
178 | 251 | ||
252 | /* | ||
253 | * mapping and unmapping functions for the IOMMU MMIO space. Each AMD IOMMU in | ||
254 | * the system has one. | ||
255 | */ | ||
179 | static u8 * __init iommu_map_mmio_space(u64 address) | 256 | static u8 * __init iommu_map_mmio_space(u64 address) |
180 | { | 257 | { |
181 | u8 *ret; | 258 | u8 *ret; |
@@ -199,16 +276,33 @@ static void __init iommu_unmap_mmio_space(struct amd_iommu *iommu) | |||
199 | release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); | 276 | release_mem_region(iommu->mmio_phys, MMIO_REGION_LENGTH); |
200 | } | 277 | } |
201 | 278 | ||
279 | /**************************************************************************** | ||
280 | * | ||
281 | * The functions below belong to the first pass of AMD IOMMU ACPI table | ||
282 | * parsing. In this pass we try to find out the highest device id this | ||
283 | * code has to handle. Upon this information the size of the shared data | ||
284 | * structures is determined later. | ||
285 | * | ||
286 | ****************************************************************************/ | ||
287 | |||
288 | /* | ||
289 | * This function reads the last device id the IOMMU has to handle from the PCI | ||
290 | * capability header for this IOMMU | ||
291 | */ | ||
202 | static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) | 292 | static int __init find_last_devid_on_pci(int bus, int dev, int fn, int cap_ptr) |
203 | { | 293 | { |
204 | u32 cap; | 294 | u32 cap; |
205 | 295 | ||
206 | cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | 296 | cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); |
207 | UPDATE_LAST_BDF(DEVID(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); | 297 | update_last_devid(calc_devid(MMIO_GET_BUS(cap), MMIO_GET_LD(cap))); |
208 | 298 | ||
209 | return 0; | 299 | return 0; |
210 | } | 300 | } |
211 | 301 | ||
302 | /* | ||
303 | * After reading the highest device id from the IOMMU PCI capability header | ||
304 | * this function looks if there is a higher device id defined in the ACPI table | ||
305 | */ | ||
212 | static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | 306 | static int __init find_last_devid_from_ivhd(struct ivhd_header *h) |
213 | { | 307 | { |
214 | u8 *p = (void *)h, *end = (void *)h; | 308 | u8 *p = (void *)h, *end = (void *)h; |
@@ -229,7 +323,8 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
229 | case IVHD_DEV_RANGE_END: | 323 | case IVHD_DEV_RANGE_END: |
230 | case IVHD_DEV_ALIAS: | 324 | case IVHD_DEV_ALIAS: |
231 | case IVHD_DEV_EXT_SELECT: | 325 | case IVHD_DEV_EXT_SELECT: |
232 | UPDATE_LAST_BDF(dev->devid); | 326 | /* all the above subfield types refer to device ids */ |
327 | update_last_devid(dev->devid); | ||
233 | break; | 328 | break; |
234 | default: | 329 | default: |
235 | break; | 330 | break; |
@@ -242,6 +337,11 @@ static int __init find_last_devid_from_ivhd(struct ivhd_header *h) | |||
242 | return 0; | 337 | return 0; |
243 | } | 338 | } |
244 | 339 | ||
340 | /* | ||
341 | * Iterate over all IVHD entries in the ACPI table and find the highest device | ||
342 | * id which we need to handle. This is the first of three functions which parse | ||
343 | * the ACPI table. So we check the checksum here. | ||
344 | */ | ||
245 | static int __init find_last_devid_acpi(struct acpi_table_header *table) | 345 | static int __init find_last_devid_acpi(struct acpi_table_header *table) |
246 | { | 346 | { |
247 | int i; | 347 | int i; |
@@ -277,19 +377,31 @@ static int __init find_last_devid_acpi(struct acpi_table_header *table) | |||
277 | return 0; | 377 | return 0; |
278 | } | 378 | } |
279 | 379 | ||
380 | /**************************************************************************** | ||
381 | * | ||
382 | * The following functions belong the the code path which parses the ACPI table | ||
383 | * the second time. In this ACPI parsing iteration we allocate IOMMU specific | ||
384 | * data structures, initialize the device/alias/rlookup table and also | ||
385 | * basically initialize the hardware. | ||
386 | * | ||
387 | ****************************************************************************/ | ||
388 | |||
389 | /* | ||
390 | * Allocates the command buffer. This buffer is per AMD IOMMU. We can | ||
391 | * write commands to that buffer later and the IOMMU will execute them | ||
392 | * asynchronously | ||
393 | */ | ||
280 | static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | 394 | static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) |
281 | { | 395 | { |
282 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL, | 396 | u8 *cmd_buf = (u8 *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
283 | get_order(CMD_BUFFER_SIZE)); | 397 | get_order(CMD_BUFFER_SIZE)); |
284 | u64 entry = 0; | 398 | u64 entry; |
285 | 399 | ||
286 | if (cmd_buf == NULL) | 400 | if (cmd_buf == NULL) |
287 | return NULL; | 401 | return NULL; |
288 | 402 | ||
289 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; | 403 | iommu->cmd_buf_size = CMD_BUFFER_SIZE; |
290 | 404 | ||
291 | memset(cmd_buf, 0, CMD_BUFFER_SIZE); | ||
292 | |||
293 | entry = (u64)virt_to_phys(cmd_buf); | 405 | entry = (u64)virt_to_phys(cmd_buf); |
294 | entry |= MMIO_CMD_SIZE_512; | 406 | entry |= MMIO_CMD_SIZE_512; |
295 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 407 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
@@ -302,11 +414,10 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
302 | 414 | ||
303 | static void __init free_command_buffer(struct amd_iommu *iommu) | 415 | static void __init free_command_buffer(struct amd_iommu *iommu) |
304 | { | 416 | { |
305 | if (iommu->cmd_buf) | 417 | free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); |
306 | free_pages((unsigned long)iommu->cmd_buf, | ||
307 | get_order(CMD_BUFFER_SIZE)); | ||
308 | } | 418 | } |
309 | 419 | ||
420 | /* sets a specific bit in the device table entry. */ | ||
310 | static void set_dev_entry_bit(u16 devid, u8 bit) | 421 | static void set_dev_entry_bit(u16 devid, u8 bit) |
311 | { | 422 | { |
312 | int i = (bit >> 5) & 0x07; | 423 | int i = (bit >> 5) & 0x07; |
@@ -315,7 +426,18 @@ static void set_dev_entry_bit(u16 devid, u8 bit) | |||
315 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); | 426 | amd_iommu_dev_table[devid].data[i] |= (1 << _bit); |
316 | } | 427 | } |
317 | 428 | ||
318 | static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) | 429 | /* Writes the specific IOMMU for a device into the rlookup table */ |
430 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) | ||
431 | { | ||
432 | amd_iommu_rlookup_table[devid] = iommu; | ||
433 | } | ||
434 | |||
435 | /* | ||
436 | * This function takes the device specific flags read from the ACPI | ||
437 | * table and sets up the device table entry with that information | ||
438 | */ | ||
439 | static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, | ||
440 | u16 devid, u32 flags, u32 ext_flags) | ||
319 | { | 441 | { |
320 | if (flags & ACPI_DEVFLAG_INITPASS) | 442 | if (flags & ACPI_DEVFLAG_INITPASS) |
321 | set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); | 443 | set_dev_entry_bit(devid, DEV_ENTRY_INIT_PASS); |
@@ -331,13 +453,14 @@ static void __init set_dev_entry_from_acpi(u16 devid, u32 flags, u32 ext_flags) | |||
331 | set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); | 453 | set_dev_entry_bit(devid, DEV_ENTRY_LINT0_PASS); |
332 | if (flags & ACPI_DEVFLAG_LINT1) | 454 | if (flags & ACPI_DEVFLAG_LINT1) |
333 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); | 455 | set_dev_entry_bit(devid, DEV_ENTRY_LINT1_PASS); |
334 | } | ||
335 | 456 | ||
336 | static void __init set_iommu_for_device(struct amd_iommu *iommu, u16 devid) | 457 | set_iommu_for_device(iommu, devid); |
337 | { | ||
338 | amd_iommu_rlookup_table[devid] = iommu; | ||
339 | } | 458 | } |
340 | 459 | ||
460 | /* | ||
461 | * Reads the device exclusion range from ACPI and initialize IOMMU with | ||
462 | * it | ||
463 | */ | ||
341 | static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | 464 | static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) |
342 | { | 465 | { |
343 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | 466 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; |
@@ -346,12 +469,22 @@ static void __init set_device_exclusion_range(u16 devid, struct ivmd_header *m) | |||
346 | return; | 469 | return; |
347 | 470 | ||
348 | if (iommu) { | 471 | if (iommu) { |
472 | /* | ||
473 | * We only can configure exclusion ranges per IOMMU, not | ||
474 | * per device. But we can enable the exclusion range per | ||
475 | * device. This is done here | ||
476 | */ | ||
349 | set_dev_entry_bit(m->devid, DEV_ENTRY_EX); | 477 | set_dev_entry_bit(m->devid, DEV_ENTRY_EX); |
350 | iommu->exclusion_start = m->range_start; | 478 | iommu->exclusion_start = m->range_start; |
351 | iommu->exclusion_length = m->range_length; | 479 | iommu->exclusion_length = m->range_length; |
352 | } | 480 | } |
353 | } | 481 | } |
354 | 482 | ||
483 | /* | ||
484 | * This function reads some important data from the IOMMU PCI space and | ||
485 | * initializes the driver data structure with it. It reads the hardware | ||
486 | * capabilities and the first/last device entries | ||
487 | */ | ||
355 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) | 488 | static void __init init_iommu_from_pci(struct amd_iommu *iommu) |
356 | { | 489 | { |
357 | int bus = PCI_BUS(iommu->devid); | 490 | int bus = PCI_BUS(iommu->devid); |
@@ -363,10 +496,16 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) | |||
363 | iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); | 496 | iommu->cap = read_pci_config(bus, dev, fn, cap_ptr+MMIO_CAP_HDR_OFFSET); |
364 | 497 | ||
365 | range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); | 498 | range = read_pci_config(bus, dev, fn, cap_ptr+MMIO_RANGE_OFFSET); |
366 | iommu->first_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_FD(range)); | 499 | iommu->first_device = calc_devid(MMIO_GET_BUS(range), |
367 | iommu->last_device = DEVID(MMIO_GET_BUS(range), MMIO_GET_LD(range)); | 500 | MMIO_GET_FD(range)); |
501 | iommu->last_device = calc_devid(MMIO_GET_BUS(range), | ||
502 | MMIO_GET_LD(range)); | ||
368 | } | 503 | } |
369 | 504 | ||
505 | /* | ||
506 | * Takes a pointer to an AMD IOMMU entry in the ACPI table and | ||
507 | * initializes the hardware and our data structures with it. | ||
508 | */ | ||
370 | static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | 509 | static void __init init_iommu_from_acpi(struct amd_iommu *iommu, |
371 | struct ivhd_header *h) | 510 | struct ivhd_header *h) |
372 | { | 511 | { |
@@ -374,7 +513,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
374 | u8 *end = p, flags = 0; | 513 | u8 *end = p, flags = 0; |
375 | u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; | 514 | u16 dev_i, devid = 0, devid_start = 0, devid_to = 0; |
376 | u32 ext_flags = 0; | 515 | u32 ext_flags = 0; |
377 | bool alias = 0; | 516 | bool alias = false; |
378 | struct ivhd_entry *e; | 517 | struct ivhd_entry *e; |
379 | 518 | ||
380 | /* | 519 | /* |
@@ -414,22 +553,23 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
414 | case IVHD_DEV_ALL: | 553 | case IVHD_DEV_ALL: |
415 | for (dev_i = iommu->first_device; | 554 | for (dev_i = iommu->first_device; |
416 | dev_i <= iommu->last_device; ++dev_i) | 555 | dev_i <= iommu->last_device; ++dev_i) |
417 | set_dev_entry_from_acpi(dev_i, e->flags, 0); | 556 | set_dev_entry_from_acpi(iommu, dev_i, |
557 | e->flags, 0); | ||
418 | break; | 558 | break; |
419 | case IVHD_DEV_SELECT: | 559 | case IVHD_DEV_SELECT: |
420 | devid = e->devid; | 560 | devid = e->devid; |
421 | set_dev_entry_from_acpi(devid, e->flags, 0); | 561 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); |
422 | break; | 562 | break; |
423 | case IVHD_DEV_SELECT_RANGE_START: | 563 | case IVHD_DEV_SELECT_RANGE_START: |
424 | devid_start = e->devid; | 564 | devid_start = e->devid; |
425 | flags = e->flags; | 565 | flags = e->flags; |
426 | ext_flags = 0; | 566 | ext_flags = 0; |
427 | alias = 0; | 567 | alias = false; |
428 | break; | 568 | break; |
429 | case IVHD_DEV_ALIAS: | 569 | case IVHD_DEV_ALIAS: |
430 | devid = e->devid; | 570 | devid = e->devid; |
431 | devid_to = e->ext >> 8; | 571 | devid_to = e->ext >> 8; |
432 | set_dev_entry_from_acpi(devid, e->flags, 0); | 572 | set_dev_entry_from_acpi(iommu, devid, e->flags, 0); |
433 | amd_iommu_alias_table[devid] = devid_to; | 573 | amd_iommu_alias_table[devid] = devid_to; |
434 | break; | 574 | break; |
435 | case IVHD_DEV_ALIAS_RANGE: | 575 | case IVHD_DEV_ALIAS_RANGE: |
@@ -437,24 +577,25 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
437 | flags = e->flags; | 577 | flags = e->flags; |
438 | devid_to = e->ext >> 8; | 578 | devid_to = e->ext >> 8; |
439 | ext_flags = 0; | 579 | ext_flags = 0; |
440 | alias = 1; | 580 | alias = true; |
441 | break; | 581 | break; |
442 | case IVHD_DEV_EXT_SELECT: | 582 | case IVHD_DEV_EXT_SELECT: |
443 | devid = e->devid; | 583 | devid = e->devid; |
444 | set_dev_entry_from_acpi(devid, e->flags, e->ext); | 584 | set_dev_entry_from_acpi(iommu, devid, e->flags, |
585 | e->ext); | ||
445 | break; | 586 | break; |
446 | case IVHD_DEV_EXT_SELECT_RANGE: | 587 | case IVHD_DEV_EXT_SELECT_RANGE: |
447 | devid_start = e->devid; | 588 | devid_start = e->devid; |
448 | flags = e->flags; | 589 | flags = e->flags; |
449 | ext_flags = e->ext; | 590 | ext_flags = e->ext; |
450 | alias = 0; | 591 | alias = false; |
451 | break; | 592 | break; |
452 | case IVHD_DEV_RANGE_END: | 593 | case IVHD_DEV_RANGE_END: |
453 | devid = e->devid; | 594 | devid = e->devid; |
454 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { | 595 | for (dev_i = devid_start; dev_i <= devid; ++dev_i) { |
455 | if (alias) | 596 | if (alias) |
456 | amd_iommu_alias_table[dev_i] = devid_to; | 597 | amd_iommu_alias_table[dev_i] = devid_to; |
457 | set_dev_entry_from_acpi( | 598 | set_dev_entry_from_acpi(iommu, |
458 | amd_iommu_alias_table[dev_i], | 599 | amd_iommu_alias_table[dev_i], |
459 | flags, ext_flags); | 600 | flags, ext_flags); |
460 | } | 601 | } |
@@ -467,6 +608,7 @@ static void __init init_iommu_from_acpi(struct amd_iommu *iommu, | |||
467 | } | 608 | } |
468 | } | 609 | } |
469 | 610 | ||
611 | /* Initializes the device->iommu mapping for the driver */ | ||
470 | static int __init init_iommu_devices(struct amd_iommu *iommu) | 612 | static int __init init_iommu_devices(struct amd_iommu *iommu) |
471 | { | 613 | { |
472 | u16 i; | 614 | u16 i; |
@@ -494,6 +636,11 @@ static void __init free_iommu_all(void) | |||
494 | } | 636 | } |
495 | } | 637 | } |
496 | 638 | ||
639 | /* | ||
640 | * This function clues the initialization function for one IOMMU | ||
641 | * together and also allocates the command buffer and programs the | ||
642 | * hardware. It does NOT enable the IOMMU. This is done afterwards. | ||
643 | */ | ||
497 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | 644 | static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) |
498 | { | 645 | { |
499 | spin_lock_init(&iommu->lock); | 646 | spin_lock_init(&iommu->lock); |
@@ -521,6 +668,10 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) | |||
521 | return 0; | 668 | return 0; |
522 | } | 669 | } |
523 | 670 | ||
671 | /* | ||
672 | * Iterates over all IOMMU entries in the ACPI table, allocates the | ||
673 | * IOMMU structure and initializes it with init_iommu_one() | ||
674 | */ | ||
524 | static int __init init_iommu_all(struct acpi_table_header *table) | 675 | static int __init init_iommu_all(struct acpi_table_header *table) |
525 | { | 676 | { |
526 | u8 *p = (u8 *)table, *end = (u8 *)table; | 677 | u8 *p = (u8 *)table, *end = (u8 *)table; |
@@ -528,8 +679,6 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
528 | struct amd_iommu *iommu; | 679 | struct amd_iommu *iommu; |
529 | int ret; | 680 | int ret; |
530 | 681 | ||
531 | INIT_LIST_HEAD(&amd_iommu_list); | ||
532 | |||
533 | end += table->length; | 682 | end += table->length; |
534 | p += IVRS_HEADER_LENGTH; | 683 | p += IVRS_HEADER_LENGTH; |
535 | 684 | ||
@@ -555,6 +704,14 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
555 | return 0; | 704 | return 0; |
556 | } | 705 | } |
557 | 706 | ||
707 | /**************************************************************************** | ||
708 | * | ||
709 | * The next functions belong to the third pass of parsing the ACPI | ||
710 | * table. In this last pass the memory mapping requirements are | ||
711 | * gathered (like exclusion and unity mapping reanges). | ||
712 | * | ||
713 | ****************************************************************************/ | ||
714 | |||
558 | static void __init free_unity_maps(void) | 715 | static void __init free_unity_maps(void) |
559 | { | 716 | { |
560 | struct unity_map_entry *entry, *next; | 717 | struct unity_map_entry *entry, *next; |
@@ -565,6 +722,7 @@ static void __init free_unity_maps(void) | |||
565 | } | 722 | } |
566 | } | 723 | } |
567 | 724 | ||
725 | /* called when we find an exclusion range definition in ACPI */ | ||
568 | static int __init init_exclusion_range(struct ivmd_header *m) | 726 | static int __init init_exclusion_range(struct ivmd_header *m) |
569 | { | 727 | { |
570 | int i; | 728 | int i; |
@@ -574,7 +732,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
574 | set_device_exclusion_range(m->devid, m); | 732 | set_device_exclusion_range(m->devid, m); |
575 | break; | 733 | break; |
576 | case ACPI_IVMD_TYPE_ALL: | 734 | case ACPI_IVMD_TYPE_ALL: |
577 | for (i = 0; i < amd_iommu_last_bdf; ++i) | 735 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
578 | set_device_exclusion_range(i, m); | 736 | set_device_exclusion_range(i, m); |
579 | break; | 737 | break; |
580 | case ACPI_IVMD_TYPE_RANGE: | 738 | case ACPI_IVMD_TYPE_RANGE: |
@@ -588,6 +746,7 @@ static int __init init_exclusion_range(struct ivmd_header *m) | |||
588 | return 0; | 746 | return 0; |
589 | } | 747 | } |
590 | 748 | ||
749 | /* called for unity map ACPI definition */ | ||
591 | static int __init init_unity_map_range(struct ivmd_header *m) | 750 | static int __init init_unity_map_range(struct ivmd_header *m) |
592 | { | 751 | { |
593 | struct unity_map_entry *e = 0; | 752 | struct unity_map_entry *e = 0; |
@@ -619,13 +778,12 @@ static int __init init_unity_map_range(struct ivmd_header *m) | |||
619 | return 0; | 778 | return 0; |
620 | } | 779 | } |
621 | 780 | ||
781 | /* iterates over all memory definitions we find in the ACPI table */ | ||
622 | static int __init init_memory_definitions(struct acpi_table_header *table) | 782 | static int __init init_memory_definitions(struct acpi_table_header *table) |
623 | { | 783 | { |
624 | u8 *p = (u8 *)table, *end = (u8 *)table; | 784 | u8 *p = (u8 *)table, *end = (u8 *)table; |
625 | struct ivmd_header *m; | 785 | struct ivmd_header *m; |
626 | 786 | ||
627 | INIT_LIST_HEAD(&amd_iommu_unity_map); | ||
628 | |||
629 | end += table->length; | 787 | end += table->length; |
630 | p += IVRS_HEADER_LENGTH; | 788 | p += IVRS_HEADER_LENGTH; |
631 | 789 | ||
@@ -642,6 +800,25 @@ static int __init init_memory_definitions(struct acpi_table_header *table) | |||
642 | return 0; | 800 | return 0; |
643 | } | 801 | } |
644 | 802 | ||
803 | /* | ||
804 | * Init the device table to not allow DMA access for devices and | ||
805 | * suppress all page faults | ||
806 | */ | ||
807 | static void init_device_table(void) | ||
808 | { | ||
809 | u16 devid; | ||
810 | |||
811 | for (devid = 0; devid <= amd_iommu_last_bdf; ++devid) { | ||
812 | set_dev_entry_bit(devid, DEV_ENTRY_VALID); | ||
813 | set_dev_entry_bit(devid, DEV_ENTRY_TRANSLATION); | ||
814 | set_dev_entry_bit(devid, DEV_ENTRY_NO_PAGE_FAULT); | ||
815 | } | ||
816 | } | ||
817 | |||
818 | /* | ||
819 | * This function finally enables all IOMMUs found in the system after | ||
820 | * they have been initialized | ||
821 | */ | ||
645 | static void __init enable_iommus(void) | 822 | static void __init enable_iommus(void) |
646 | { | 823 | { |
647 | struct amd_iommu *iommu; | 824 | struct amd_iommu *iommu; |
@@ -678,6 +855,34 @@ static struct sys_device device_amd_iommu = { | |||
678 | .cls = &amd_iommu_sysdev_class, | 855 | .cls = &amd_iommu_sysdev_class, |
679 | }; | 856 | }; |
680 | 857 | ||
858 | /* | ||
859 | * This is the core init function for AMD IOMMU hardware in the system. | ||
860 | * This function is called from the generic x86 DMA layer initialization | ||
861 | * code. | ||
862 | * | ||
863 | * This function basically parses the ACPI table for AMD IOMMU (IVRS) | ||
864 | * three times: | ||
865 | * | ||
866 | * 1 pass) Find the highest PCI device id the driver has to handle. | ||
867 | * Upon this information the size of the data structures is | ||
868 | * determined that needs to be allocated. | ||
869 | * | ||
870 | * 2 pass) Initialize the data structures just allocated with the | ||
871 | * information in the ACPI table about available AMD IOMMUs | ||
872 | * in the system. It also maps the PCI devices in the | ||
873 | * system to specific IOMMUs | ||
874 | * | ||
875 | * 3 pass) After the basic data structures are allocated and | ||
876 | * initialized we update them with information about memory | ||
877 | * remapping requirements parsed out of the ACPI table in | ||
878 | * this last pass. | ||
879 | * | ||
880 | * After that the hardware is initialized and ready to go. In the last | ||
881 | * step we do some Linux specific things like registering the driver in | ||
882 | * the dma_ops interface and initializing the suspend/resume support | ||
883 | * functions. Finally it prints some information about AMD IOMMUs and | ||
884 | * the driver state and enables the hardware. | ||
885 | */ | ||
681 | int __init amd_iommu_init(void) | 886 | int __init amd_iommu_init(void) |
682 | { | 887 | { |
683 | int i, ret = 0; | 888 | int i, ret = 0; |
@@ -699,14 +904,14 @@ int __init amd_iommu_init(void) | |||
699 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) | 904 | if (acpi_table_parse("IVRS", find_last_devid_acpi) != 0) |
700 | return -ENODEV; | 905 | return -ENODEV; |
701 | 906 | ||
702 | dev_table_size = TBL_SIZE(DEV_TABLE_ENTRY_SIZE); | 907 | dev_table_size = tbl_size(DEV_TABLE_ENTRY_SIZE); |
703 | alias_table_size = TBL_SIZE(ALIAS_TABLE_ENTRY_SIZE); | 908 | alias_table_size = tbl_size(ALIAS_TABLE_ENTRY_SIZE); |
704 | rlookup_table_size = TBL_SIZE(RLOOKUP_TABLE_ENTRY_SIZE); | 909 | rlookup_table_size = tbl_size(RLOOKUP_TABLE_ENTRY_SIZE); |
705 | 910 | ||
706 | ret = -ENOMEM; | 911 | ret = -ENOMEM; |
707 | 912 | ||
708 | /* Device table - directly used by all IOMMUs */ | 913 | /* Device table - directly used by all IOMMUs */ |
709 | amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL, | 914 | amd_iommu_dev_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
710 | get_order(dev_table_size)); | 915 | get_order(dev_table_size)); |
711 | if (amd_iommu_dev_table == NULL) | 916 | if (amd_iommu_dev_table == NULL) |
712 | goto out; | 917 | goto out; |
@@ -730,27 +935,26 @@ int __init amd_iommu_init(void) | |||
730 | * Protection Domain table - maps devices to protection domains | 935 | * Protection Domain table - maps devices to protection domains |
731 | * This table has the same size as the rlookup_table | 936 | * This table has the same size as the rlookup_table |
732 | */ | 937 | */ |
733 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL, | 938 | amd_iommu_pd_table = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, |
734 | get_order(rlookup_table_size)); | 939 | get_order(rlookup_table_size)); |
735 | if (amd_iommu_pd_table == NULL) | 940 | if (amd_iommu_pd_table == NULL) |
736 | goto free; | 941 | goto free; |
737 | 942 | ||
738 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages(GFP_KERNEL, | 943 | amd_iommu_pd_alloc_bitmap = (void *)__get_free_pages( |
944 | GFP_KERNEL | __GFP_ZERO, | ||
739 | get_order(MAX_DOMAIN_ID/8)); | 945 | get_order(MAX_DOMAIN_ID/8)); |
740 | if (amd_iommu_pd_alloc_bitmap == NULL) | 946 | if (amd_iommu_pd_alloc_bitmap == NULL) |
741 | goto free; | 947 | goto free; |
742 | 948 | ||
949 | /* init the device table */ | ||
950 | init_device_table(); | ||
951 | |||
743 | /* | 952 | /* |
744 | * memory is allocated now; initialize the device table with all zeroes | 953 | * let all alias entries point to itself |
745 | * and let all alias entries point to itself | ||
746 | */ | 954 | */ |
747 | memset(amd_iommu_dev_table, 0, dev_table_size); | 955 | for (i = 0; i <= amd_iommu_last_bdf; ++i) |
748 | for (i = 0; i < amd_iommu_last_bdf; ++i) | ||
749 | amd_iommu_alias_table[i] = i; | 956 | amd_iommu_alias_table[i] = i; |
750 | 957 | ||
751 | memset(amd_iommu_pd_table, 0, rlookup_table_size); | ||
752 | memset(amd_iommu_pd_alloc_bitmap, 0, MAX_DOMAIN_ID / 8); | ||
753 | |||
754 | /* | 958 | /* |
755 | * never allocate domain 0 because its used as the non-allocated and | 959 | * never allocate domain 0 because its used as the non-allocated and |
756 | * error value placeholder | 960 | * error value placeholder |
@@ -768,15 +972,15 @@ int __init amd_iommu_init(void) | |||
768 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) | 972 | if (acpi_table_parse("IVRS", init_memory_definitions) != 0) |
769 | goto free; | 973 | goto free; |
770 | 974 | ||
771 | ret = amd_iommu_init_dma_ops(); | 975 | ret = sysdev_class_register(&amd_iommu_sysdev_class); |
772 | if (ret) | 976 | if (ret) |
773 | goto free; | 977 | goto free; |
774 | 978 | ||
775 | ret = sysdev_class_register(&amd_iommu_sysdev_class); | 979 | ret = sysdev_register(&device_amd_iommu); |
776 | if (ret) | 980 | if (ret) |
777 | goto free; | 981 | goto free; |
778 | 982 | ||
779 | ret = sysdev_register(&device_amd_iommu); | 983 | ret = amd_iommu_init_dma_ops(); |
780 | if (ret) | 984 | if (ret) |
781 | goto free; | 985 | goto free; |
782 | 986 | ||
@@ -795,24 +999,19 @@ out: | |||
795 | return ret; | 999 | return ret; |
796 | 1000 | ||
797 | free: | 1001 | free: |
798 | if (amd_iommu_pd_alloc_bitmap) | 1002 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); |
799 | free_pages((unsigned long)amd_iommu_pd_alloc_bitmap, 1); | ||
800 | 1003 | ||
801 | if (amd_iommu_pd_table) | 1004 | free_pages((unsigned long)amd_iommu_pd_table, |
802 | free_pages((unsigned long)amd_iommu_pd_table, | 1005 | get_order(rlookup_table_size)); |
803 | get_order(rlookup_table_size)); | ||
804 | 1006 | ||
805 | if (amd_iommu_rlookup_table) | 1007 | free_pages((unsigned long)amd_iommu_rlookup_table, |
806 | free_pages((unsigned long)amd_iommu_rlookup_table, | 1008 | get_order(rlookup_table_size)); |
807 | get_order(rlookup_table_size)); | ||
808 | 1009 | ||
809 | if (amd_iommu_alias_table) | 1010 | free_pages((unsigned long)amd_iommu_alias_table, |
810 | free_pages((unsigned long)amd_iommu_alias_table, | 1011 | get_order(alias_table_size)); |
811 | get_order(alias_table_size)); | ||
812 | 1012 | ||
813 | if (amd_iommu_dev_table) | 1013 | free_pages((unsigned long)amd_iommu_dev_table, |
814 | free_pages((unsigned long)amd_iommu_dev_table, | 1014 | get_order(dev_table_size)); |
815 | get_order(dev_table_size)); | ||
816 | 1015 | ||
817 | free_iommu_all(); | 1016 | free_iommu_all(); |
818 | 1017 | ||
@@ -821,6 +1020,13 @@ free: | |||
821 | goto out; | 1020 | goto out; |
822 | } | 1021 | } |
823 | 1022 | ||
1023 | /**************************************************************************** | ||
1024 | * | ||
1025 | * Early detect code. This code runs at IOMMU detection time in the DMA | ||
1026 | * layer. It just looks if there is an IVRS ACPI table to detect AMD | ||
1027 | * IOMMUs | ||
1028 | * | ||
1029 | ****************************************************************************/ | ||
824 | static int __init early_amd_iommu_detect(struct acpi_table_header *table) | 1030 | static int __init early_amd_iommu_detect(struct acpi_table_header *table) |
825 | { | 1031 | { |
826 | return 0; | 1032 | return 0; |
@@ -828,7 +1034,7 @@ static int __init early_amd_iommu_detect(struct acpi_table_header *table) | |||
828 | 1034 | ||
829 | void __init amd_iommu_detect(void) | 1035 | void __init amd_iommu_detect(void) |
830 | { | 1036 | { |
831 | if (swiotlb || no_iommu || iommu_detected) | 1037 | if (swiotlb || no_iommu || (iommu_detected && !gart_iommu_aperture)) |
832 | return; | 1038 | return; |
833 | 1039 | ||
834 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { | 1040 | if (acpi_table_parse("IVRS", early_amd_iommu_detect) == 0) { |
@@ -841,6 +1047,13 @@ void __init amd_iommu_detect(void) | |||
841 | } | 1047 | } |
842 | } | 1048 | } |
843 | 1049 | ||
1050 | /**************************************************************************** | ||
1051 | * | ||
1052 | * Parsing functions for the AMD IOMMU specific kernel command line | ||
1053 | * options. | ||
1054 | * | ||
1055 | ****************************************************************************/ | ||
1056 | |||
844 | static int __init parse_amd_iommu_options(char *str) | 1057 | static int __init parse_amd_iommu_options(char *str) |
845 | { | 1058 | { |
846 | for (; *str; ++str) { | 1059 | for (; *str; ++str) { |
@@ -853,20 +1066,10 @@ static int __init parse_amd_iommu_options(char *str) | |||
853 | 1066 | ||
854 | static int __init parse_amd_iommu_size_options(char *str) | 1067 | static int __init parse_amd_iommu_size_options(char *str) |
855 | { | 1068 | { |
856 | for (; *str; ++str) { | 1069 | unsigned order = PAGE_SHIFT + get_order(memparse(str, &str)); |
857 | if (strcmp(str, "32M") == 0) | 1070 | |
858 | amd_iommu_aperture_order = 25; | 1071 | if ((order > 24) && (order < 31)) |
859 | if (strcmp(str, "64M") == 0) | 1072 | amd_iommu_aperture_order = order; |
860 | amd_iommu_aperture_order = 26; | ||
861 | if (strcmp(str, "128M") == 0) | ||
862 | amd_iommu_aperture_order = 27; | ||
863 | if (strcmp(str, "256M") == 0) | ||
864 | amd_iommu_aperture_order = 28; | ||
865 | if (strcmp(str, "512M") == 0) | ||
866 | amd_iommu_aperture_order = 29; | ||
867 | if (strcmp(str, "1G") == 0) | ||
868 | amd_iommu_aperture_order = 30; | ||
869 | } | ||
870 | 1073 | ||
871 | return 1; | 1074 | return 1; |
872 | } | 1075 | } |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 9f907806c1a5..44e21826db11 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/suspend.h> | 21 | #include <linux/suspend.h> |
22 | #include <asm/e820.h> | 22 | #include <asm/e820.h> |
23 | #include <asm/io.h> | 23 | #include <asm/io.h> |
24 | #include <asm/iommu.h> | ||
24 | #include <asm/gart.h> | 25 | #include <asm/gart.h> |
25 | #include <asm/pci-direct.h> | 26 | #include <asm/pci-direct.h> |
26 | #include <asm/dma.h> | 27 | #include <asm/dma.h> |
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c index a437d027f20b..f88bd0d982b0 100644 --- a/arch/x86/kernel/apic_32.c +++ b/arch/x86/kernel/apic_32.c | |||
@@ -75,7 +75,7 @@ char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; | |||
75 | /* | 75 | /* |
76 | * Debug level, exported for io_apic.c | 76 | * Debug level, exported for io_apic.c |
77 | */ | 77 | */ |
78 | int apic_verbosity; | 78 | unsigned int apic_verbosity; |
79 | 79 | ||
80 | int pic_mode; | 80 | int pic_mode; |
81 | 81 | ||
@@ -177,7 +177,7 @@ void __cpuinit enable_NMI_through_LVT0(void) | |||
177 | /* Level triggered for 82489DX */ | 177 | /* Level triggered for 82489DX */ |
178 | if (!lapic_is_integrated()) | 178 | if (!lapic_is_integrated()) |
179 | v |= APIC_LVT_LEVEL_TRIGGER; | 179 | v |= APIC_LVT_LEVEL_TRIGGER; |
180 | apic_write_around(APIC_LVT0, v); | 180 | apic_write(APIC_LVT0, v); |
181 | } | 181 | } |
182 | 182 | ||
183 | /** | 183 | /** |
@@ -212,9 +212,6 @@ int lapic_get_maxlvt(void) | |||
212 | * this function twice on the boot CPU, once with a bogus timeout | 212 | * this function twice on the boot CPU, once with a bogus timeout |
213 | * value, second time for real. The other (noncalibrating) CPUs | 213 | * value, second time for real. The other (noncalibrating) CPUs |
214 | * call this function only once, with the real, calibrated value. | 214 | * call this function only once, with the real, calibrated value. |
215 | * | ||
216 | * We do reads before writes even if unnecessary, to get around the | ||
217 | * P5 APIC double write bug. | ||
218 | */ | 215 | */ |
219 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | 216 | static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) |
220 | { | 217 | { |
@@ -229,18 +226,18 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
229 | if (!irqen) | 226 | if (!irqen) |
230 | lvtt_value |= APIC_LVT_MASKED; | 227 | lvtt_value |= APIC_LVT_MASKED; |
231 | 228 | ||
232 | apic_write_around(APIC_LVTT, lvtt_value); | 229 | apic_write(APIC_LVTT, lvtt_value); |
233 | 230 | ||
234 | /* | 231 | /* |
235 | * Divide PICLK by 16 | 232 | * Divide PICLK by 16 |
236 | */ | 233 | */ |
237 | tmp_value = apic_read(APIC_TDCR); | 234 | tmp_value = apic_read(APIC_TDCR); |
238 | apic_write_around(APIC_TDCR, (tmp_value | 235 | apic_write(APIC_TDCR, |
239 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 236 | (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | |
240 | | APIC_TDR_DIV_16); | 237 | APIC_TDR_DIV_16); |
241 | 238 | ||
242 | if (!oneshot) | 239 | if (!oneshot) |
243 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | 240 | apic_write(APIC_TMICT, clocks / APIC_DIVISOR); |
244 | } | 241 | } |
245 | 242 | ||
246 | /* | 243 | /* |
@@ -249,7 +246,7 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) | |||
249 | static int lapic_next_event(unsigned long delta, | 246 | static int lapic_next_event(unsigned long delta, |
250 | struct clock_event_device *evt) | 247 | struct clock_event_device *evt) |
251 | { | 248 | { |
252 | apic_write_around(APIC_TMICT, delta); | 249 | apic_write(APIC_TMICT, delta); |
253 | return 0; | 250 | return 0; |
254 | } | 251 | } |
255 | 252 | ||
@@ -278,7 +275,7 @@ static void lapic_timer_setup(enum clock_event_mode mode, | |||
278 | case CLOCK_EVT_MODE_SHUTDOWN: | 275 | case CLOCK_EVT_MODE_SHUTDOWN: |
279 | v = apic_read(APIC_LVTT); | 276 | v = apic_read(APIC_LVTT); |
280 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 277 | v |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
281 | apic_write_around(APIC_LVTT, v); | 278 | apic_write(APIC_LVTT, v); |
282 | break; | 279 | break; |
283 | case CLOCK_EVT_MODE_RESUME: | 280 | case CLOCK_EVT_MODE_RESUME: |
284 | /* Nothing to do here */ | 281 | /* Nothing to do here */ |
@@ -372,12 +369,7 @@ static void __init lapic_cal_handler(struct clock_event_device *dev) | |||
372 | } | 369 | } |
373 | } | 370 | } |
374 | 371 | ||
375 | /* | 372 | static int __init calibrate_APIC_clock(void) |
376 | * Setup the boot APIC | ||
377 | * | ||
378 | * Calibrate and verify the result. | ||
379 | */ | ||
380 | void __init setup_boot_APIC_clock(void) | ||
381 | { | 373 | { |
382 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); | 374 | struct clock_event_device *levt = &__get_cpu_var(lapic_events); |
383 | const long pm_100ms = PMTMR_TICKS_PER_SEC/10; | 375 | const long pm_100ms = PMTMR_TICKS_PER_SEC/10; |
@@ -387,24 +379,6 @@ void __init setup_boot_APIC_clock(void) | |||
387 | long delta, deltapm; | 379 | long delta, deltapm; |
388 | int pm_referenced = 0; | 380 | int pm_referenced = 0; |
389 | 381 | ||
390 | /* | ||
391 | * The local apic timer can be disabled via the kernel | ||
392 | * commandline or from the CPU detection code. Register the lapic | ||
393 | * timer as a dummy clock event source on SMP systems, so the | ||
394 | * broadcast mechanism is used. On UP systems simply ignore it. | ||
395 | */ | ||
396 | if (local_apic_timer_disabled) { | ||
397 | /* No broadcast on UP ! */ | ||
398 | if (num_possible_cpus() > 1) { | ||
399 | lapic_clockevent.mult = 1; | ||
400 | setup_APIC_timer(); | ||
401 | } | ||
402 | return; | ||
403 | } | ||
404 | |||
405 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
406 | "calibrating APIC timer ...\n"); | ||
407 | |||
408 | local_irq_disable(); | 382 | local_irq_disable(); |
409 | 383 | ||
410 | /* Replace the global interrupt handler */ | 384 | /* Replace the global interrupt handler */ |
@@ -489,8 +463,6 @@ void __init setup_boot_APIC_clock(void) | |||
489 | calibration_result / (1000000 / HZ), | 463 | calibration_result / (1000000 / HZ), |
490 | calibration_result % (1000000 / HZ)); | 464 | calibration_result % (1000000 / HZ)); |
491 | 465 | ||
492 | local_apic_timer_verify_ok = 1; | ||
493 | |||
494 | /* | 466 | /* |
495 | * Do a sanity check on the APIC calibration result | 467 | * Do a sanity check on the APIC calibration result |
496 | */ | 468 | */ |
@@ -498,12 +470,11 @@ void __init setup_boot_APIC_clock(void) | |||
498 | local_irq_enable(); | 470 | local_irq_enable(); |
499 | printk(KERN_WARNING | 471 | printk(KERN_WARNING |
500 | "APIC frequency too slow, disabling apic timer\n"); | 472 | "APIC frequency too slow, disabling apic timer\n"); |
501 | /* No broadcast on UP ! */ | 473 | return -1; |
502 | if (num_possible_cpus() > 1) | ||
503 | setup_APIC_timer(); | ||
504 | return; | ||
505 | } | 474 | } |
506 | 475 | ||
476 | local_apic_timer_verify_ok = 1; | ||
477 | |||
507 | /* We trust the pm timer based calibration */ | 478 | /* We trust the pm timer based calibration */ |
508 | if (!pm_referenced) { | 479 | if (!pm_referenced) { |
509 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); | 480 | apic_printk(APIC_VERBOSE, "... verify APIC timer\n"); |
@@ -543,22 +514,55 @@ void __init setup_boot_APIC_clock(void) | |||
543 | if (!local_apic_timer_verify_ok) { | 514 | if (!local_apic_timer_verify_ok) { |
544 | printk(KERN_WARNING | 515 | printk(KERN_WARNING |
545 | "APIC timer disabled due to verification failure.\n"); | 516 | "APIC timer disabled due to verification failure.\n"); |
517 | return -1; | ||
518 | } | ||
519 | |||
520 | return 0; | ||
521 | } | ||
522 | |||
523 | /* | ||
524 | * Setup the boot APIC | ||
525 | * | ||
526 | * Calibrate and verify the result. | ||
527 | */ | ||
528 | void __init setup_boot_APIC_clock(void) | ||
529 | { | ||
530 | /* | ||
531 | * The local apic timer can be disabled via the kernel | ||
532 | * commandline or from the CPU detection code. Register the lapic | ||
533 | * timer as a dummy clock event source on SMP systems, so the | ||
534 | * broadcast mechanism is used. On UP systems simply ignore it. | ||
535 | */ | ||
536 | if (local_apic_timer_disabled) { | ||
546 | /* No broadcast on UP ! */ | 537 | /* No broadcast on UP ! */ |
547 | if (num_possible_cpus() == 1) | 538 | if (num_possible_cpus() > 1) { |
548 | return; | 539 | lapic_clockevent.mult = 1; |
549 | } else { | 540 | setup_APIC_timer(); |
550 | /* | 541 | } |
551 | * If nmi_watchdog is set to IO_APIC, we need the | 542 | return; |
552 | * PIT/HPET going. Otherwise register lapic as a dummy | ||
553 | * device. | ||
554 | */ | ||
555 | if (nmi_watchdog != NMI_IO_APIC) | ||
556 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
557 | else | ||
558 | printk(KERN_WARNING "APIC timer registered as dummy," | ||
559 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
560 | } | 543 | } |
561 | 544 | ||
545 | apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n" | ||
546 | "calibrating APIC timer ...\n"); | ||
547 | |||
548 | if (calibrate_APIC_clock()) { | ||
549 | /* No broadcast on UP ! */ | ||
550 | if (num_possible_cpus() > 1) | ||
551 | setup_APIC_timer(); | ||
552 | return; | ||
553 | } | ||
554 | |||
555 | /* | ||
556 | * If nmi_watchdog is set to IO_APIC, we need the | ||
557 | * PIT/HPET going. Otherwise register lapic as a dummy | ||
558 | * device. | ||
559 | */ | ||
560 | if (nmi_watchdog != NMI_IO_APIC) | ||
561 | lapic_clockevent.features &= ~CLOCK_EVT_FEAT_DUMMY; | ||
562 | else | ||
563 | printk(KERN_WARNING "APIC timer registered as dummy," | ||
564 | " due to nmi_watchdog=%d!\n", nmi_watchdog); | ||
565 | |||
562 | /* Setup the lapic or request the broadcast */ | 566 | /* Setup the lapic or request the broadcast */ |
563 | setup_APIC_timer(); | 567 | setup_APIC_timer(); |
564 | } | 568 | } |
@@ -693,44 +697,44 @@ void clear_local_APIC(void) | |||
693 | */ | 697 | */ |
694 | if (maxlvt >= 3) { | 698 | if (maxlvt >= 3) { |
695 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ | 699 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ |
696 | apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); | 700 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); |
697 | } | 701 | } |
698 | /* | 702 | /* |
699 | * Careful: we have to set masks only first to deassert | 703 | * Careful: we have to set masks only first to deassert |
700 | * any level-triggered sources. | 704 | * any level-triggered sources. |
701 | */ | 705 | */ |
702 | v = apic_read(APIC_LVTT); | 706 | v = apic_read(APIC_LVTT); |
703 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | 707 | apic_write(APIC_LVTT, v | APIC_LVT_MASKED); |
704 | v = apic_read(APIC_LVT0); | 708 | v = apic_read(APIC_LVT0); |
705 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | 709 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
706 | v = apic_read(APIC_LVT1); | 710 | v = apic_read(APIC_LVT1); |
707 | apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); | 711 | apic_write(APIC_LVT1, v | APIC_LVT_MASKED); |
708 | if (maxlvt >= 4) { | 712 | if (maxlvt >= 4) { |
709 | v = apic_read(APIC_LVTPC); | 713 | v = apic_read(APIC_LVTPC); |
710 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); | 714 | apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); |
711 | } | 715 | } |
712 | 716 | ||
713 | /* lets not touch this if we didn't frob it */ | 717 | /* lets not touch this if we didn't frob it */ |
714 | #ifdef CONFIG_X86_MCE_P4THERMAL | 718 | #ifdef CONFIG_X86_MCE_P4THERMAL |
715 | if (maxlvt >= 5) { | 719 | if (maxlvt >= 5) { |
716 | v = apic_read(APIC_LVTTHMR); | 720 | v = apic_read(APIC_LVTTHMR); |
717 | apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED); | 721 | apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); |
718 | } | 722 | } |
719 | #endif | 723 | #endif |
720 | /* | 724 | /* |
721 | * Clean APIC state for other OSs: | 725 | * Clean APIC state for other OSs: |
722 | */ | 726 | */ |
723 | apic_write_around(APIC_LVTT, APIC_LVT_MASKED); | 727 | apic_write(APIC_LVTT, APIC_LVT_MASKED); |
724 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | 728 | apic_write(APIC_LVT0, APIC_LVT_MASKED); |
725 | apic_write_around(APIC_LVT1, APIC_LVT_MASKED); | 729 | apic_write(APIC_LVT1, APIC_LVT_MASKED); |
726 | if (maxlvt >= 3) | 730 | if (maxlvt >= 3) |
727 | apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); | 731 | apic_write(APIC_LVTERR, APIC_LVT_MASKED); |
728 | if (maxlvt >= 4) | 732 | if (maxlvt >= 4) |
729 | apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); | 733 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); |
730 | 734 | ||
731 | #ifdef CONFIG_X86_MCE_P4THERMAL | 735 | #ifdef CONFIG_X86_MCE_P4THERMAL |
732 | if (maxlvt >= 5) | 736 | if (maxlvt >= 5) |
733 | apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED); | 737 | apic_write(APIC_LVTTHMR, APIC_LVT_MASKED); |
734 | #endif | 738 | #endif |
735 | /* Integrated APIC (!82489DX) ? */ | 739 | /* Integrated APIC (!82489DX) ? */ |
736 | if (lapic_is_integrated()) { | 740 | if (lapic_is_integrated()) { |
@@ -756,7 +760,7 @@ void disable_local_APIC(void) | |||
756 | */ | 760 | */ |
757 | value = apic_read(APIC_SPIV); | 761 | value = apic_read(APIC_SPIV); |
758 | value &= ~APIC_SPIV_APIC_ENABLED; | 762 | value &= ~APIC_SPIV_APIC_ENABLED; |
759 | apic_write_around(APIC_SPIV, value); | 763 | apic_write(APIC_SPIV, value); |
760 | 764 | ||
761 | /* | 765 | /* |
762 | * When LAPIC was disabled by the BIOS and enabled by the kernel, | 766 | * When LAPIC was disabled by the BIOS and enabled by the kernel, |
@@ -865,8 +869,8 @@ void __init sync_Arb_IDs(void) | |||
865 | apic_wait_icr_idle(); | 869 | apic_wait_icr_idle(); |
866 | 870 | ||
867 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); | 871 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); |
868 | apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | 872 | apic_write(APIC_ICR, |
869 | | APIC_DM_INIT); | 873 | APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); |
870 | } | 874 | } |
871 | 875 | ||
872 | /* | 876 | /* |
@@ -902,16 +906,16 @@ void __init init_bsp_APIC(void) | |||
902 | else | 906 | else |
903 | value |= APIC_SPIV_FOCUS_DISABLED; | 907 | value |= APIC_SPIV_FOCUS_DISABLED; |
904 | value |= SPURIOUS_APIC_VECTOR; | 908 | value |= SPURIOUS_APIC_VECTOR; |
905 | apic_write_around(APIC_SPIV, value); | 909 | apic_write(APIC_SPIV, value); |
906 | 910 | ||
907 | /* | 911 | /* |
908 | * Set up the virtual wire mode. | 912 | * Set up the virtual wire mode. |
909 | */ | 913 | */ |
910 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 914 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
911 | value = APIC_DM_NMI; | 915 | value = APIC_DM_NMI; |
912 | if (!lapic_is_integrated()) /* 82489DX */ | 916 | if (!lapic_is_integrated()) /* 82489DX */ |
913 | value |= APIC_LVT_LEVEL_TRIGGER; | 917 | value |= APIC_LVT_LEVEL_TRIGGER; |
914 | apic_write_around(APIC_LVT1, value); | 918 | apic_write(APIC_LVT1, value); |
915 | } | 919 | } |
916 | 920 | ||
917 | static void __cpuinit lapic_setup_esr(void) | 921 | static void __cpuinit lapic_setup_esr(void) |
@@ -926,7 +930,7 @@ static void __cpuinit lapic_setup_esr(void) | |||
926 | 930 | ||
927 | /* enables sending errors */ | 931 | /* enables sending errors */ |
928 | value = ERROR_APIC_VECTOR; | 932 | value = ERROR_APIC_VECTOR; |
929 | apic_write_around(APIC_LVTERR, value); | 933 | apic_write(APIC_LVTERR, value); |
930 | /* | 934 | /* |
931 | * spec says clear errors after enabling vector. | 935 | * spec says clear errors after enabling vector. |
932 | */ | 936 | */ |
@@ -989,7 +993,7 @@ void __cpuinit setup_local_APIC(void) | |||
989 | */ | 993 | */ |
990 | value = apic_read(APIC_TASKPRI); | 994 | value = apic_read(APIC_TASKPRI); |
991 | value &= ~APIC_TPRI_MASK; | 995 | value &= ~APIC_TPRI_MASK; |
992 | apic_write_around(APIC_TASKPRI, value); | 996 | apic_write(APIC_TASKPRI, value); |
993 | 997 | ||
994 | /* | 998 | /* |
995 | * After a crash, we no longer service the interrupts and a pending | 999 | * After a crash, we no longer service the interrupts and a pending |
@@ -1047,7 +1051,7 @@ void __cpuinit setup_local_APIC(void) | |||
1047 | * Set spurious IRQ vector | 1051 | * Set spurious IRQ vector |
1048 | */ | 1052 | */ |
1049 | value |= SPURIOUS_APIC_VECTOR; | 1053 | value |= SPURIOUS_APIC_VECTOR; |
1050 | apic_write_around(APIC_SPIV, value); | 1054 | apic_write(APIC_SPIV, value); |
1051 | 1055 | ||
1052 | /* | 1056 | /* |
1053 | * Set up LVT0, LVT1: | 1057 | * Set up LVT0, LVT1: |
@@ -1069,7 +1073,7 @@ void __cpuinit setup_local_APIC(void) | |||
1069 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", | 1073 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", |
1070 | smp_processor_id()); | 1074 | smp_processor_id()); |
1071 | } | 1075 | } |
1072 | apic_write_around(APIC_LVT0, value); | 1076 | apic_write(APIC_LVT0, value); |
1073 | 1077 | ||
1074 | /* | 1078 | /* |
1075 | * only the BP should see the LINT1 NMI signal, obviously. | 1079 | * only the BP should see the LINT1 NMI signal, obviously. |
@@ -1080,7 +1084,7 @@ void __cpuinit setup_local_APIC(void) | |||
1080 | value = APIC_DM_NMI | APIC_LVT_MASKED; | 1084 | value = APIC_DM_NMI | APIC_LVT_MASKED; |
1081 | if (!integrated) /* 82489DX */ | 1085 | if (!integrated) /* 82489DX */ |
1082 | value |= APIC_LVT_LEVEL_TRIGGER; | 1086 | value |= APIC_LVT_LEVEL_TRIGGER; |
1083 | apic_write_around(APIC_LVT1, value); | 1087 | apic_write(APIC_LVT1, value); |
1084 | } | 1088 | } |
1085 | 1089 | ||
1086 | void __cpuinit end_local_APIC_setup(void) | 1090 | void __cpuinit end_local_APIC_setup(void) |
@@ -1091,7 +1095,7 @@ void __cpuinit end_local_APIC_setup(void) | |||
1091 | /* Disable the local apic timer */ | 1095 | /* Disable the local apic timer */ |
1092 | value = apic_read(APIC_LVTT); | 1096 | value = apic_read(APIC_LVTT); |
1093 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); | 1097 | value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); |
1094 | apic_write_around(APIC_LVTT, value); | 1098 | apic_write(APIC_LVTT, value); |
1095 | 1099 | ||
1096 | setup_apic_nmi_watchdog(NULL); | 1100 | setup_apic_nmi_watchdog(NULL); |
1097 | apic_pm_activate(); | 1101 | apic_pm_activate(); |
@@ -1214,9 +1218,6 @@ int apic_version[MAX_APICS]; | |||
1214 | 1218 | ||
1215 | int __init APIC_init_uniprocessor(void) | 1219 | int __init APIC_init_uniprocessor(void) |
1216 | { | 1220 | { |
1217 | if (disable_apic) | ||
1218 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | ||
1219 | |||
1220 | if (!smp_found_config && !cpu_has_apic) | 1221 | if (!smp_found_config && !cpu_has_apic) |
1221 | return -1; | 1222 | return -1; |
1222 | 1223 | ||
@@ -1419,7 +1420,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1419 | value &= ~APIC_VECTOR_MASK; | 1420 | value &= ~APIC_VECTOR_MASK; |
1420 | value |= APIC_SPIV_APIC_ENABLED; | 1421 | value |= APIC_SPIV_APIC_ENABLED; |
1421 | value |= 0xf; | 1422 | value |= 0xf; |
1422 | apic_write_around(APIC_SPIV, value); | 1423 | apic_write(APIC_SPIV, value); |
1423 | 1424 | ||
1424 | if (!virt_wire_setup) { | 1425 | if (!virt_wire_setup) { |
1425 | /* | 1426 | /* |
@@ -1432,10 +1433,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1432 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | 1433 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); |
1433 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 1434 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
1434 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | 1435 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); |
1435 | apic_write_around(APIC_LVT0, value); | 1436 | apic_write(APIC_LVT0, value); |
1436 | } else { | 1437 | } else { |
1437 | /* Disable LVT0 */ | 1438 | /* Disable LVT0 */ |
1438 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | 1439 | apic_write(APIC_LVT0, APIC_LVT_MASKED); |
1439 | } | 1440 | } |
1440 | 1441 | ||
1441 | /* | 1442 | /* |
@@ -1449,12 +1450,10 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
1449 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | 1450 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); |
1450 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 1451 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
1451 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | 1452 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); |
1452 | apic_write_around(APIC_LVT1, value); | 1453 | apic_write(APIC_LVT1, value); |
1453 | } | 1454 | } |
1454 | } | 1455 | } |
1455 | 1456 | ||
1456 | unsigned int __cpuinitdata maxcpus = NR_CPUS; | ||
1457 | |||
1458 | void __cpuinit generic_processor_info(int apicid, int version) | 1457 | void __cpuinit generic_processor_info(int apicid, int version) |
1459 | { | 1458 | { |
1460 | int cpu; | 1459 | int cpu; |
@@ -1481,12 +1480,6 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1481 | return; | 1480 | return; |
1482 | } | 1481 | } |
1483 | 1482 | ||
1484 | if (num_processors >= maxcpus) { | ||
1485 | printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." | ||
1486 | " Processor ignored.\n", maxcpus); | ||
1487 | return; | ||
1488 | } | ||
1489 | |||
1490 | num_processors++; | 1483 | num_processors++; |
1491 | cpus_complement(tmp_map, cpu_present_map); | 1484 | cpus_complement(tmp_map, cpu_present_map); |
1492 | cpu = first_cpu(tmp_map); | 1485 | cpu = first_cpu(tmp_map); |
@@ -1700,7 +1693,7 @@ early_param("lapic", parse_lapic); | |||
1700 | static int __init parse_nolapic(char *arg) | 1693 | static int __init parse_nolapic(char *arg) |
1701 | { | 1694 | { |
1702 | disable_apic = 1; | 1695 | disable_apic = 1; |
1703 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1696 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
1704 | return 0; | 1697 | return 0; |
1705 | } | 1698 | } |
1706 | early_param("nolapic", parse_nolapic); | 1699 | early_param("nolapic", parse_nolapic); |
@@ -1719,15 +1712,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg) | |||
1719 | } | 1712 | } |
1720 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); | 1713 | early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); |
1721 | 1714 | ||
1722 | static int __init apic_set_verbosity(char *str) | 1715 | static int __init apic_set_verbosity(char *arg) |
1723 | { | 1716 | { |
1724 | if (strcmp("debug", str) == 0) | 1717 | if (!arg) |
1718 | return -EINVAL; | ||
1719 | |||
1720 | if (strcmp(arg, "debug") == 0) | ||
1725 | apic_verbosity = APIC_DEBUG; | 1721 | apic_verbosity = APIC_DEBUG; |
1726 | else if (strcmp("verbose", str) == 0) | 1722 | else if (strcmp(arg, "verbose") == 0) |
1727 | apic_verbosity = APIC_VERBOSE; | 1723 | apic_verbosity = APIC_VERBOSE; |
1728 | return 1; | 1724 | |
1725 | return 0; | ||
1729 | } | 1726 | } |
1730 | __setup("apic=", apic_set_verbosity); | 1727 | early_param("apic", apic_set_verbosity); |
1731 | 1728 | ||
1732 | static int __init lapic_insert_resource(void) | 1729 | static int __init lapic_insert_resource(void) |
1733 | { | 1730 | { |
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c index 1e3d32e27c14..446c062e831c 100644 --- a/arch/x86/kernel/apic_64.c +++ b/arch/x86/kernel/apic_64.c | |||
@@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); | |||
54 | /* | 54 | /* |
55 | * Debug level, exported for io_apic.c | 55 | * Debug level, exported for io_apic.c |
56 | */ | 56 | */ |
57 | int apic_verbosity; | 57 | unsigned int apic_verbosity; |
58 | 58 | ||
59 | /* Have we found an MP table */ | 59 | /* Have we found an MP table */ |
60 | int smp_found_config; | 60 | int smp_found_config; |
@@ -90,7 +90,6 @@ static unsigned long apic_phys; | |||
90 | 90 | ||
91 | unsigned long mp_lapic_addr; | 91 | unsigned long mp_lapic_addr; |
92 | 92 | ||
93 | unsigned int __cpuinitdata maxcpus = NR_CPUS; | ||
94 | /* | 93 | /* |
95 | * Get the LAPIC version | 94 | * Get the LAPIC version |
96 | */ | 95 | */ |
@@ -314,7 +313,7 @@ static void setup_APIC_timer(void) | |||
314 | 313 | ||
315 | #define TICK_COUNT 100000000 | 314 | #define TICK_COUNT 100000000 |
316 | 315 | ||
317 | static void __init calibrate_APIC_clock(void) | 316 | static int __init calibrate_APIC_clock(void) |
318 | { | 317 | { |
319 | unsigned apic, apic_start; | 318 | unsigned apic, apic_start; |
320 | unsigned long tsc, tsc_start; | 319 | unsigned long tsc, tsc_start; |
@@ -368,6 +367,17 @@ static void __init calibrate_APIC_clock(void) | |||
368 | clockevent_delta2ns(0xF, &lapic_clockevent); | 367 | clockevent_delta2ns(0xF, &lapic_clockevent); |
369 | 368 | ||
370 | calibration_result = result / HZ; | 369 | calibration_result = result / HZ; |
370 | |||
371 | /* | ||
372 | * Do a sanity check on the APIC calibration result | ||
373 | */ | ||
374 | if (calibration_result < (1000000 / HZ)) { | ||
375 | printk(KERN_WARNING | ||
376 | "APIC frequency too slow, disabling apic timer\n"); | ||
377 | return -1; | ||
378 | } | ||
379 | |||
380 | return 0; | ||
371 | } | 381 | } |
372 | 382 | ||
373 | /* | 383 | /* |
@@ -394,14 +404,7 @@ void __init setup_boot_APIC_clock(void) | |||
394 | } | 404 | } |
395 | 405 | ||
396 | printk(KERN_INFO "Using local APIC timer interrupts.\n"); | 406 | printk(KERN_INFO "Using local APIC timer interrupts.\n"); |
397 | calibrate_APIC_clock(); | 407 | if (calibrate_APIC_clock()) { |
398 | |||
399 | /* | ||
400 | * Do a sanity check on the APIC calibration result | ||
401 | */ | ||
402 | if (calibration_result < (1000000 / HZ)) { | ||
403 | printk(KERN_WARNING | ||
404 | "APIC frequency too slow, disabling apic timer\n"); | ||
405 | /* No broadcast on UP ! */ | 408 | /* No broadcast on UP ! */ |
406 | if (num_possible_cpus() > 1) | 409 | if (num_possible_cpus() > 1) |
407 | setup_APIC_timer(); | 410 | setup_APIC_timer(); |
@@ -1058,12 +1061,6 @@ void __cpuinit generic_processor_info(int apicid, int version) | |||
1058 | return; | 1061 | return; |
1059 | } | 1062 | } |
1060 | 1063 | ||
1061 | if (num_processors >= maxcpus) { | ||
1062 | printk(KERN_WARNING "WARNING: maxcpus limit of %i reached." | ||
1063 | " Processor ignored.\n", maxcpus); | ||
1064 | return; | ||
1065 | } | ||
1066 | |||
1067 | num_processors++; | 1064 | num_processors++; |
1068 | cpus_complement(tmp_map, cpu_present_map); | 1065 | cpus_complement(tmp_map, cpu_present_map); |
1069 | cpu = first_cpu(tmp_map); | 1066 | cpu = first_cpu(tmp_map); |
@@ -1337,7 +1334,7 @@ early_param("apic", apic_set_verbosity); | |||
1337 | static __init int setup_disableapic(char *str) | 1334 | static __init int setup_disableapic(char *str) |
1338 | { | 1335 | { |
1339 | disable_apic = 1; | 1336 | disable_apic = 1; |
1340 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 1337 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
1341 | return 0; | 1338 | return 0; |
1342 | } | 1339 | } |
1343 | early_param("disableapic", setup_disableapic); | 1340 | early_param("disableapic", setup_disableapic); |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index bf9b441331e9..732d1f4e10ee 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -219,7 +219,6 @@ | |||
219 | #include <linux/time.h> | 219 | #include <linux/time.h> |
220 | #include <linux/sched.h> | 220 | #include <linux/sched.h> |
221 | #include <linux/pm.h> | 221 | #include <linux/pm.h> |
222 | #include <linux/pm_legacy.h> | ||
223 | #include <linux/capability.h> | 222 | #include <linux/capability.h> |
224 | #include <linux/device.h> | 223 | #include <linux/device.h> |
225 | #include <linux/kernel.h> | 224 | #include <linux/kernel.h> |
@@ -235,6 +234,7 @@ | |||
235 | #include <asm/uaccess.h> | 234 | #include <asm/uaccess.h> |
236 | #include <asm/desc.h> | 235 | #include <asm/desc.h> |
237 | #include <asm/i8253.h> | 236 | #include <asm/i8253.h> |
237 | #include <asm/olpc.h> | ||
238 | #include <asm/paravirt.h> | 238 | #include <asm/paravirt.h> |
239 | #include <asm/reboot.h> | 239 | #include <asm/reboot.h> |
240 | 240 | ||
@@ -2218,7 +2218,7 @@ static int __init apm_init(void) | |||
2218 | 2218 | ||
2219 | dmi_check_system(apm_dmi_table); | 2219 | dmi_check_system(apm_dmi_table); |
2220 | 2220 | ||
2221 | if (apm_info.bios.version == 0 || paravirt_enabled()) { | 2221 | if (apm_info.bios.version == 0 || paravirt_enabled() || machine_is_olpc()) { |
2222 | printk(KERN_INFO "apm: BIOS not found.\n"); | 2222 | printk(KERN_INFO "apm: BIOS not found.\n"); |
2223 | return -ENODEV; | 2223 | return -ENODEV; |
2224 | } | 2224 | } |
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index bacf5deeec2d..aa89387006fe 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
19 | #include <asm/bootparam.h> | 19 | #include <asm/bootparam.h> |
20 | 20 | ||
21 | #include <xen/interface/xen.h> | ||
22 | |||
21 | #define __NO_STUBS 1 | 23 | #define __NO_STUBS 1 |
22 | #undef __SYSCALL | 24 | #undef __SYSCALL |
23 | #undef _ASM_X86_64_UNISTD_H_ | 25 | #undef _ASM_X86_64_UNISTD_H_ |
@@ -131,5 +133,14 @@ int main(void) | |||
131 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); | 133 | OFFSET(BP_loadflags, boot_params, hdr.loadflags); |
132 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); | 134 | OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); |
133 | OFFSET(BP_version, boot_params, hdr.version); | 135 | OFFSET(BP_version, boot_params, hdr.version); |
136 | |||
137 | BLANK(); | ||
138 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | ||
139 | #ifdef CONFIG_XEN | ||
140 | BLANK(); | ||
141 | OFFSET(XEN_vcpu_info_mask, vcpu_info, evtchn_upcall_mask); | ||
142 | OFFSET(XEN_vcpu_info_pending, vcpu_info, evtchn_upcall_pending); | ||
143 | #undef ENTRY | ||
144 | #endif | ||
134 | return 0; | 145 | return 0; |
135 | } | 146 | } |
diff --git a/arch/x86/kernel/bios_uv.c b/arch/x86/kernel/bios_uv.c new file mode 100644 index 000000000000..c639bd55391c --- /dev/null +++ b/arch/x86/kernel/bios_uv.c | |||
@@ -0,0 +1,48 @@ | |||
1 | /* | ||
2 | * BIOS run time interface routines. | ||
3 | * | ||
4 | * Copyright (c) 2008 Silicon Graphics, Inc. All Rights Reserved. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
19 | */ | ||
20 | |||
21 | #include <asm/uv/bios.h> | ||
22 | |||
23 | const char * | ||
24 | x86_bios_strerror(long status) | ||
25 | { | ||
26 | const char *str; | ||
27 | switch (status) { | ||
28 | case 0: str = "Call completed without error"; break; | ||
29 | case -1: str = "Not implemented"; break; | ||
30 | case -2: str = "Invalid argument"; break; | ||
31 | case -3: str = "Call completed with error"; break; | ||
32 | default: str = "Unknown BIOS status code"; break; | ||
33 | } | ||
34 | return str; | ||
35 | } | ||
36 | |||
37 | long | ||
38 | x86_bios_freq_base(unsigned long which, unsigned long *ticks_per_second, | ||
39 | unsigned long *drift_info) | ||
40 | { | ||
41 | struct uv_bios_retval isrv; | ||
42 | |||
43 | BIOS_CALL(isrv, BIOS_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); | ||
44 | *ticks_per_second = isrv.v0; | ||
45 | *drift_info = isrv.v1; | ||
46 | return isrv.status; | ||
47 | } | ||
48 | EXPORT_SYMBOL_GPL(x86_bios_freq_base); | ||
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c index 84a8220a6072..a6ef672adbba 100644 --- a/arch/x86/kernel/cpu/addon_cpuid_features.c +++ b/arch/x86/kernel/cpu/addon_cpuid_features.c | |||
@@ -56,9 +56,22 @@ void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) | |||
56 | 56 | ||
57 | switch (c->x86_vendor) { | 57 | switch (c->x86_vendor) { |
58 | case X86_VENDOR_INTEL: | 58 | case X86_VENDOR_INTEL: |
59 | if (c->x86 == 0xF || (c->x86 == 6 && c->x86_model >= 15)) | 59 | /* |
60 | * There is a known erratum on Pentium III and Core Solo | ||
61 | * and Core Duo CPUs. | ||
62 | * " Page with PAT set to WC while associated MTRR is UC | ||
63 | * may consolidate to UC " | ||
64 | * Because of this erratum, it is better to stick with | ||
65 | * setting WC in MTRR rather than using PAT on these CPUs. | ||
66 | * | ||
67 | * Enable PAT WC only on P4, Core 2 or later CPUs. | ||
68 | */ | ||
69 | if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15)) | ||
60 | return; | 70 | return; |
61 | break; | 71 | |
72 | pat_disable("PAT WC disabled due to known CPU erratum."); | ||
73 | return; | ||
74 | |||
62 | case X86_VENDOR_AMD: | 75 | case X86_VENDOR_AMD: |
63 | case X86_VENDOR_CENTAUR: | 76 | case X86_VENDOR_CENTAUR: |
64 | case X86_VENDOR_TRANSMETA: | 77 | case X86_VENDOR_TRANSMETA: |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 81a07ca65d44..18514ed26104 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -24,8 +24,6 @@ | |||
24 | extern void vide(void); | 24 | extern void vide(void); |
25 | __asm__(".align 4\nvide: ret"); | 25 | __asm__(".align 4\nvide: ret"); |
26 | 26 | ||
27 | int force_mwait __cpuinitdata; | ||
28 | |||
29 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | 27 | static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) |
30 | { | 28 | { |
31 | if (cpuid_eax(0x80000000) >= 0x80000007) { | 29 | if (cpuid_eax(0x80000000) >= 0x80000007) { |
@@ -33,6 +31,11 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
33 | if (c->x86_power & (1<<8)) | 31 | if (c->x86_power & (1<<8)) |
34 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 32 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
35 | } | 33 | } |
34 | |||
35 | /* Set MTRR capability flag if appropriate */ | ||
36 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
37 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
38 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | ||
36 | } | 39 | } |
37 | 40 | ||
38 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 41 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
@@ -168,10 +171,6 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
168 | mbytes); | 171 | mbytes); |
169 | } | 172 | } |
170 | 173 | ||
171 | /* Set MTRR capability flag if appropriate */ | ||
172 | if (c->x86_model == 13 || c->x86_model == 9 || | ||
173 | (c->x86_model == 8 && c->x86_mask >= 8)) | ||
174 | set_cpu_cap(c, X86_FEATURE_K6_MTRR); | ||
175 | break; | 174 | break; |
176 | } | 175 | } |
177 | 176 | ||
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c index 7c36fb8a28d4..d1692b2a41ff 100644 --- a/arch/x86/kernel/cpu/amd_64.c +++ b/arch/x86/kernel/cpu/amd_64.c | |||
@@ -115,6 +115,8 @@ static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) | |||
115 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | 115 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ |
116 | if (c->x86_power & (1<<8)) | 116 | if (c->x86_power & (1<<8)) |
117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 117 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
118 | |||
119 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
118 | } | 120 | } |
119 | 121 | ||
120 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 122 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1b1c56bb338f..c8e315f1aa83 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -50,6 +50,8 @@ static double __initdata y = 3145727.0; | |||
50 | */ | 50 | */ |
51 | static void __init check_fpu(void) | 51 | static void __init check_fpu(void) |
52 | { | 52 | { |
53 | s32 fdiv_bug; | ||
54 | |||
53 | if (!boot_cpu_data.hard_math) { | 55 | if (!boot_cpu_data.hard_math) { |
54 | #ifndef CONFIG_MATH_EMULATION | 56 | #ifndef CONFIG_MATH_EMULATION |
55 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); | 57 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); |
@@ -74,8 +76,10 @@ static void __init check_fpu(void) | |||
74 | "fistpl %0\n\t" | 76 | "fistpl %0\n\t" |
75 | "fwait\n\t" | 77 | "fwait\n\t" |
76 | "fninit" | 78 | "fninit" |
77 | : "=m" (*&boot_cpu_data.fdiv_bug) | 79 | : "=m" (*&fdiv_bug) |
78 | : "m" (*&x), "m" (*&y)); | 80 | : "m" (*&x), "m" (*&y)); |
81 | |||
82 | boot_cpu_data.fdiv_bug = fdiv_bug; | ||
79 | if (boot_cpu_data.fdiv_bug) | 83 | if (boot_cpu_data.fdiv_bug) |
80 | printk("Hmm, FPU with FDIV bug.\n"); | 84 | printk("Hmm, FPU with FDIV bug.\n"); |
81 | } | 85 | } |
@@ -131,13 +135,7 @@ static void __init check_popad(void) | |||
131 | * (for due to lack of "invlpg" and working WP on a i386) | 135 | * (for due to lack of "invlpg" and working WP on a i386) |
132 | * - In order to run on anything without a TSC, we need to be | 136 | * - In order to run on anything without a TSC, we need to be |
133 | * compiled for a i486. | 137 | * compiled for a i486. |
134 | * - In order to support the local APIC on a buggy Pentium machine, | 138 | */ |
135 | * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, | ||
136 | * which happens implicitly if compiled for a Pentium or lower | ||
137 | * (unless an advanced selection of CPU features is used) as an | ||
138 | * otherwise config implies a properly working local APIC without | ||
139 | * the need to do extra reads from the APIC. | ||
140 | */ | ||
141 | 139 | ||
142 | static void __init check_config(void) | 140 | static void __init check_config(void) |
143 | { | 141 | { |
@@ -151,21 +149,6 @@ static void __init check_config(void) | |||
151 | if (boot_cpu_data.x86 == 3) | 149 | if (boot_cpu_data.x86 == 3) |
152 | panic("Kernel requires i486+ for 'invlpg' and other features"); | 150 | panic("Kernel requires i486+ for 'invlpg' and other features"); |
153 | #endif | 151 | #endif |
154 | |||
155 | /* | ||
156 | * If we were told we had a good local APIC, check for buggy Pentia, | ||
157 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
158 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
159 | * Specification Update"). | ||
160 | */ | ||
161 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) | ||
162 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL | ||
163 | && cpu_has_apic | ||
164 | && boot_cpu_data.x86 == 5 | ||
165 | && boot_cpu_data.x86_model == 2 | ||
166 | && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) | ||
167 | panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); | ||
168 | #endif | ||
169 | } | 152 | } |
170 | 153 | ||
171 | 154 | ||
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e0f45edd6a55..a0534c04d38a 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -314,6 +314,16 @@ enum { | |||
314 | EAMD3D = 1<<20, | 314 | EAMD3D = 1<<20, |
315 | }; | 315 | }; |
316 | 316 | ||
317 | static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c) | ||
318 | { | ||
319 | switch (c->x86) { | ||
320 | case 5: | ||
321 | /* Emulate MTRRs using Centaur's MCR. */ | ||
322 | set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); | ||
323 | break; | ||
324 | } | ||
325 | } | ||
326 | |||
317 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) | 327 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) |
318 | { | 328 | { |
319 | 329 | ||
@@ -462,6 +472,7 @@ centaur_size_cache(struct cpuinfo_x86 *c, unsigned int size) | |||
462 | static struct cpu_dev centaur_cpu_dev __cpuinitdata = { | 472 | static struct cpu_dev centaur_cpu_dev __cpuinitdata = { |
463 | .c_vendor = "Centaur", | 473 | .c_vendor = "Centaur", |
464 | .c_ident = { "CentaurHauls" }, | 474 | .c_ident = { "CentaurHauls" }, |
475 | .c_early_init = early_init_centaur, | ||
465 | .c_init = init_centaur, | 476 | .c_init = init_centaur, |
466 | .c_size_cache = centaur_size_cache, | 477 | .c_size_cache = centaur_size_cache, |
467 | }; | 478 | }; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 80ab20d4fa39..4e456bd955bb 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <asm/mtrr.h> | 13 | #include <asm/mtrr.h> |
14 | #include <asm/mce.h> | 14 | #include <asm/mce.h> |
15 | #include <asm/pat.h> | 15 | #include <asm/pat.h> |
16 | #include <asm/asm.h> | ||
16 | #ifdef CONFIG_X86_LOCAL_APIC | 17 | #ifdef CONFIG_X86_LOCAL_APIC |
17 | #include <asm/mpspec.h> | 18 | #include <asm/mpspec.h> |
18 | #include <asm/apic.h> | 19 | #include <asm/apic.h> |
@@ -334,11 +335,24 @@ static void __init early_cpu_detect(void) | |||
334 | 335 | ||
335 | get_cpu_vendor(c, 1); | 336 | get_cpu_vendor(c, 1); |
336 | 337 | ||
338 | early_get_cap(c); | ||
339 | |||
337 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | 340 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && |
338 | cpu_devs[c->x86_vendor]->c_early_init) | 341 | cpu_devs[c->x86_vendor]->c_early_init) |
339 | cpu_devs[c->x86_vendor]->c_early_init(c); | 342 | cpu_devs[c->x86_vendor]->c_early_init(c); |
343 | } | ||
340 | 344 | ||
341 | early_get_cap(c); | 345 | /* |
346 | * The NOPL instruction is supposed to exist on all CPUs with | ||
347 | * family >= 6; unfortunately, that's not true in practice because | ||
348 | * of early VIA chips and (more importantly) broken virtualizers that | ||
349 | * are not easy to detect. In the latter case it doesn't even *fail* | ||
350 | * reliably, so probing for it doesn't even work. Disable it completely | ||
351 | * unless we can find a reliable way to detect all the broken cases. | ||
352 | */ | ||
353 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | ||
354 | { | ||
355 | clear_cpu_cap(c, X86_FEATURE_NOPL); | ||
342 | } | 356 | } |
343 | 357 | ||
344 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | 358 | static void __cpuinit generic_identify(struct cpuinfo_x86 *c) |
@@ -395,8 +409,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c) | |||
395 | } | 409 | } |
396 | 410 | ||
397 | init_scattered_cpuid_features(c); | 411 | init_scattered_cpuid_features(c); |
412 | detect_nopl(c); | ||
398 | } | 413 | } |
399 | |||
400 | } | 414 | } |
401 | 415 | ||
402 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) | 416 | static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) |
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c index 7b8cc72feb40..a11f5d4477cd 100644 --- a/arch/x86/kernel/cpu/common_64.c +++ b/arch/x86/kernel/cpu/common_64.c | |||
@@ -7,19 +7,18 @@ | |||
7 | #include <linux/module.h> | 7 | #include <linux/module.h> |
8 | #include <linux/kgdb.h> | 8 | #include <linux/kgdb.h> |
9 | #include <linux/topology.h> | 9 | #include <linux/topology.h> |
10 | #include <linux/string.h> | ||
11 | #include <linux/delay.h> | 10 | #include <linux/delay.h> |
12 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
13 | #include <linux/module.h> | ||
14 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
15 | #include <asm/processor.h> | ||
16 | #include <asm/i387.h> | 13 | #include <asm/i387.h> |
17 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
18 | #include <asm/io.h> | 15 | #include <asm/io.h> |
16 | #include <asm/linkage.h> | ||
19 | #include <asm/mmu_context.h> | 17 | #include <asm/mmu_context.h> |
20 | #include <asm/mtrr.h> | 18 | #include <asm/mtrr.h> |
21 | #include <asm/mce.h> | 19 | #include <asm/mce.h> |
22 | #include <asm/pat.h> | 20 | #include <asm/pat.h> |
21 | #include <asm/asm.h> | ||
23 | #include <asm/numa.h> | 22 | #include <asm/numa.h> |
24 | #ifdef CONFIG_X86_LOCAL_APIC | 23 | #ifdef CONFIG_X86_LOCAL_APIC |
25 | #include <asm/mpspec.h> | 24 | #include <asm/mpspec.h> |
@@ -217,6 +216,39 @@ static void __init early_cpu_support_print(void) | |||
217 | } | 216 | } |
218 | } | 217 | } |
219 | 218 | ||
219 | /* | ||
220 | * The NOPL instruction is supposed to exist on all CPUs with | ||
221 | * family >= 6, unfortunately, that's not true in practice because | ||
222 | * of early VIA chips and (more importantly) broken virtualizers that | ||
223 | * are not easy to detect. Hence, probe for it based on first | ||
224 | * principles. | ||
225 | * | ||
226 | * Note: no 64-bit chip is known to lack these, but put the code here | ||
227 | * for consistency with 32 bits, and to make it utterly trivial to | ||
228 | * diagnose the problem should it ever surface. | ||
229 | */ | ||
230 | static void __cpuinit detect_nopl(struct cpuinfo_x86 *c) | ||
231 | { | ||
232 | const u32 nopl_signature = 0x888c53b1; /* Random number */ | ||
233 | u32 has_nopl = nopl_signature; | ||
234 | |||
235 | clear_cpu_cap(c, X86_FEATURE_NOPL); | ||
236 | if (c->x86 >= 6) { | ||
237 | asm volatile("\n" | ||
238 | "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */ | ||
239 | "2:\n" | ||
240 | " .section .fixup,\"ax\"\n" | ||
241 | "3: xor %0,%0\n" | ||
242 | " jmp 2b\n" | ||
243 | " .previous\n" | ||
244 | _ASM_EXTABLE(1b,3b) | ||
245 | : "+a" (has_nopl)); | ||
246 | |||
247 | if (has_nopl == nopl_signature) | ||
248 | set_cpu_cap(c, X86_FEATURE_NOPL); | ||
249 | } | ||
250 | } | ||
251 | |||
220 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); | 252 | static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c); |
221 | 253 | ||
222 | void __init early_cpu_init(void) | 254 | void __init early_cpu_init(void) |
@@ -305,7 +337,6 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
305 | c->x86_capability[2] = cpuid_edx(0x80860001); | 337 | c->x86_capability[2] = cpuid_edx(0x80860001); |
306 | } | 338 | } |
307 | 339 | ||
308 | c->extended_cpuid_level = cpuid_eax(0x80000000); | ||
309 | if (c->extended_cpuid_level >= 0x80000007) | 340 | if (c->extended_cpuid_level >= 0x80000007) |
310 | c->x86_power = cpuid_edx(0x80000007); | 341 | c->x86_power = cpuid_edx(0x80000007); |
311 | 342 | ||
@@ -316,18 +347,13 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
316 | c->x86_phys_bits = eax & 0xff; | 347 | c->x86_phys_bits = eax & 0xff; |
317 | } | 348 | } |
318 | 349 | ||
319 | /* Assume all 64-bit CPUs support 32-bit syscall */ | 350 | detect_nopl(c); |
320 | set_cpu_cap(c, X86_FEATURE_SYSCALL32); | ||
321 | 351 | ||
322 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && | 352 | if (c->x86_vendor != X86_VENDOR_UNKNOWN && |
323 | cpu_devs[c->x86_vendor]->c_early_init) | 353 | cpu_devs[c->x86_vendor]->c_early_init) |
324 | cpu_devs[c->x86_vendor]->c_early_init(c); | 354 | cpu_devs[c->x86_vendor]->c_early_init(c); |
325 | 355 | ||
326 | validate_pat_support(c); | 356 | validate_pat_support(c); |
327 | |||
328 | /* early_param could clear that, but recall get it set again */ | ||
329 | if (disable_apic) | ||
330 | clear_cpu_cap(c, X86_FEATURE_APIC); | ||
331 | } | 357 | } |
332 | 358 | ||
333 | /* | 359 | /* |
@@ -503,22 +529,24 @@ void pda_init(int cpu) | |||
503 | /* others are initialized in smpboot.c */ | 529 | /* others are initialized in smpboot.c */ |
504 | pda->pcurrent = &init_task; | 530 | pda->pcurrent = &init_task; |
505 | pda->irqstackptr = boot_cpu_stack; | 531 | pda->irqstackptr = boot_cpu_stack; |
532 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
506 | } else { | 533 | } else { |
507 | pda->irqstackptr = (char *) | 534 | if (!pda->irqstackptr) { |
508 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); | 535 | pda->irqstackptr = (char *) |
509 | if (!pda->irqstackptr) | 536 | __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER); |
510 | panic("cannot allocate irqstack for cpu %d", cpu); | 537 | if (!pda->irqstackptr) |
538 | panic("cannot allocate irqstack for cpu %d", | ||
539 | cpu); | ||
540 | pda->irqstackptr += IRQSTACKSIZE - 64; | ||
541 | } | ||
511 | 542 | ||
512 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) | 543 | if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) |
513 | pda->nodenumber = cpu_to_node(cpu); | 544 | pda->nodenumber = cpu_to_node(cpu); |
514 | } | 545 | } |
515 | |||
516 | pda->irqstackptr += IRQSTACKSIZE-64; | ||
517 | } | 546 | } |
518 | 547 | ||
519 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + | 548 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + |
520 | DEBUG_STKSZ] | 549 | DEBUG_STKSZ] __page_aligned_bss; |
521 | __attribute__((section(".bss.page_aligned"))); | ||
522 | 550 | ||
523 | extern asmlinkage void ignore_sysret(void); | 551 | extern asmlinkage void ignore_sysret(void); |
524 | 552 | ||
@@ -612,19 +640,22 @@ void __cpuinit cpu_init(void) | |||
612 | /* | 640 | /* |
613 | * set up and load the per-CPU TSS | 641 | * set up and load the per-CPU TSS |
614 | */ | 642 | */ |
615 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 643 | if (!orig_ist->ist[0]) { |
616 | static const unsigned int order[N_EXCEPTION_STACKS] = { | 644 | static const unsigned int order[N_EXCEPTION_STACKS] = { |
617 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, | 645 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, |
618 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | 646 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER |
619 | }; | 647 | }; |
620 | if (cpu) { | 648 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
621 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | 649 | if (cpu) { |
622 | if (!estacks) | 650 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); |
623 | panic("Cannot allocate exception stack %ld %d\n", | 651 | if (!estacks) |
624 | v, cpu); | 652 | panic("Cannot allocate exception " |
653 | "stack %ld %d\n", v, cpu); | ||
654 | } | ||
655 | estacks += PAGE_SIZE << order[v]; | ||
656 | orig_ist->ist[v] = t->x86_tss.ist[v] = | ||
657 | (unsigned long)estacks; | ||
625 | } | 658 | } |
626 | estacks += PAGE_SIZE << order[v]; | ||
627 | orig_ist->ist[v] = t->x86_tss.ist[v] = (unsigned long)estacks; | ||
628 | } | 659 | } |
629 | 660 | ||
630 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); | 661 | t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap); |
diff --git a/arch/x86/kernel/cpu/cpufreq/Kconfig b/arch/x86/kernel/cpu/cpufreq/Kconfig index cb7a5715596d..efae3b22a0ff 100644 --- a/arch/x86/kernel/cpu/cpufreq/Kconfig +++ b/arch/x86/kernel/cpu/cpufreq/Kconfig | |||
@@ -235,9 +235,9 @@ config X86_LONGHAUL | |||
235 | If in doubt, say N. | 235 | If in doubt, say N. |
236 | 236 | ||
237 | config X86_E_POWERSAVER | 237 | config X86_E_POWERSAVER |
238 | tristate "VIA C7 Enhanced PowerSaver (EXPERIMENTAL)" | 238 | tristate "VIA C7 Enhanced PowerSaver" |
239 | select CPU_FREQ_TABLE | 239 | select CPU_FREQ_TABLE |
240 | depends on X86_32 && EXPERIMENTAL | 240 | depends on X86_32 |
241 | help | 241 | help |
242 | This adds the CPUFreq driver for VIA C7 processors. | 242 | This adds the CPUFreq driver for VIA C7 processors. |
243 | 243 | ||
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index b0c8208df9fa..dd097b835839 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -202,7 +202,7 @@ static void drv_write(struct drv_cmd *cmd) | |||
202 | cpumask_t saved_mask = current->cpus_allowed; | 202 | cpumask_t saved_mask = current->cpus_allowed; |
203 | unsigned int i; | 203 | unsigned int i; |
204 | 204 | ||
205 | for_each_cpu_mask(i, cmd->mask) { | 205 | for_each_cpu_mask_nr(i, cmd->mask) { |
206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); | 206 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(i)); |
207 | do_drv_write(cmd); | 207 | do_drv_write(cmd); |
208 | } | 208 | } |
@@ -451,7 +451,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
451 | 451 | ||
452 | freqs.old = perf->states[perf->state].core_frequency * 1000; | 452 | freqs.old = perf->states[perf->state].core_frequency * 1000; |
453 | freqs.new = data->freq_table[next_state].frequency; | 453 | freqs.new = data->freq_table[next_state].frequency; |
454 | for_each_cpu_mask(i, cmd.mask) { | 454 | for_each_cpu_mask_nr(i, cmd.mask) { |
455 | freqs.cpu = i; | 455 | freqs.cpu = i; |
456 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 456 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
457 | } | 457 | } |
@@ -466,7 +466,7 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy, | |||
466 | } | 466 | } |
467 | } | 467 | } |
468 | 468 | ||
469 | for_each_cpu_mask(i, cmd.mask) { | 469 | for_each_cpu_mask_nr(i, cmd.mask) { |
470 | freqs.cpu = i; | 470 | freqs.cpu = i; |
471 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 471 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
472 | } | 472 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/elanfreq.c b/arch/x86/kernel/cpu/cpufreq/elanfreq.c index 94619c22f563..e4a4bf870e94 100644 --- a/arch/x86/kernel/cpu/cpufreq/elanfreq.c +++ b/arch/x86/kernel/cpu/cpufreq/elanfreq.c | |||
@@ -44,7 +44,7 @@ struct s_elan_multiplier { | |||
44 | * It is important that the frequencies | 44 | * It is important that the frequencies |
45 | * are listed in ascending order here! | 45 | * are listed in ascending order here! |
46 | */ | 46 | */ |
47 | struct s_elan_multiplier elan_multiplier[] = { | 47 | static struct s_elan_multiplier elan_multiplier[] = { |
48 | {1000, 0x02, 0x18}, | 48 | {1000, 0x02, 0x18}, |
49 | {2000, 0x02, 0x10}, | 49 | {2000, 0x02, 0x10}, |
50 | {4000, 0x02, 0x08}, | 50 | {4000, 0x02, 0x08}, |
diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 199e4e05e5dc..f1685fb91fbd 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c | |||
@@ -122,7 +122,7 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
122 | return 0; | 122 | return 0; |
123 | 123 | ||
124 | /* notifiers */ | 124 | /* notifiers */ |
125 | for_each_cpu_mask(i, policy->cpus) { | 125 | for_each_cpu_mask_nr(i, policy->cpus) { |
126 | freqs.cpu = i; | 126 | freqs.cpu = i; |
127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 127 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
128 | } | 128 | } |
@@ -130,11 +130,11 @@ static int cpufreq_p4_target(struct cpufreq_policy *policy, | |||
130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software | 130 | /* run on each logical CPU, see section 13.15.3 of IA32 Intel Architecture Software |
131 | * Developer's Manual, Volume 3 | 131 | * Developer's Manual, Volume 3 |
132 | */ | 132 | */ |
133 | for_each_cpu_mask(i, policy->cpus) | 133 | for_each_cpu_mask_nr(i, policy->cpus) |
134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); | 134 | cpufreq_p4_setdc(i, p4clockmod_table[newstate].index); |
135 | 135 | ||
136 | /* notifiers */ | 136 | /* notifiers */ |
137 | for_each_cpu_mask(i, policy->cpus) { | 137 | for_each_cpu_mask_nr(i, policy->cpus) { |
138 | freqs.cpu = i; | 138 | freqs.cpu = i; |
139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 139 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
140 | } | 140 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h index f8a63b3664e3..35fb4eaf6e1c 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k7.h +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k7.h | |||
@@ -1,5 +1,4 @@ | |||
1 | /* | 1 | /* |
2 | * $Id: powernow-k7.h,v 1.2 2003/02/10 18:26:01 davej Exp $ | ||
3 | * (C) 2003 Dave Jones. | 2 | * (C) 2003 Dave Jones. |
4 | * | 3 | * |
5 | * Licensed under the terms of the GNU GPL License version 2. | 4 | * Licensed under the terms of the GNU GPL License version 2. |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 206791eb46e3..84bb395038d8 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -66,7 +66,6 @@ static u32 find_freq_from_fid(u32 fid) | |||
66 | return 800 + (fid * 100); | 66 | return 800 + (fid * 100); |
67 | } | 67 | } |
68 | 68 | ||
69 | |||
70 | /* Return a frequency in KHz, given an input fid */ | 69 | /* Return a frequency in KHz, given an input fid */ |
71 | static u32 find_khz_freq_from_fid(u32 fid) | 70 | static u32 find_khz_freq_from_fid(u32 fid) |
72 | { | 71 | { |
@@ -78,7 +77,6 @@ static u32 find_khz_freq_from_pstate(struct cpufreq_frequency_table *data, u32 p | |||
78 | return data[pstate].frequency; | 77 | return data[pstate].frequency; |
79 | } | 78 | } |
80 | 79 | ||
81 | |||
82 | /* Return the vco fid for an input fid | 80 | /* Return the vco fid for an input fid |
83 | * | 81 | * |
84 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids | 82 | * Each "low" fid has corresponding "high" fid, and you can get to "low" fids |
@@ -166,7 +164,6 @@ static void fidvid_msr_init(void) | |||
166 | wrmsr(MSR_FIDVID_CTL, lo, hi); | 164 | wrmsr(MSR_FIDVID_CTL, lo, hi); |
167 | } | 165 | } |
168 | 166 | ||
169 | |||
170 | /* write the new fid value along with the other control fields to the msr */ | 167 | /* write the new fid value along with the other control fields to the msr */ |
171 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) | 168 | static int write_new_fid(struct powernow_k8_data *data, u32 fid) |
172 | { | 169 | { |
@@ -966,7 +963,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i | |||
966 | freqs.old = find_khz_freq_from_fid(data->currfid); | 963 | freqs.old = find_khz_freq_from_fid(data->currfid); |
967 | freqs.new = find_khz_freq_from_fid(fid); | 964 | freqs.new = find_khz_freq_from_fid(fid); |
968 | 965 | ||
969 | for_each_cpu_mask(i, *(data->available_cores)) { | 966 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
970 | freqs.cpu = i; | 967 | freqs.cpu = i; |
971 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 968 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
972 | } | 969 | } |
@@ -974,7 +971,7 @@ static int transition_frequency_fidvid(struct powernow_k8_data *data, unsigned i | |||
974 | res = transition_fid_vid(data, fid, vid); | 971 | res = transition_fid_vid(data, fid, vid); |
975 | freqs.new = find_khz_freq_from_fid(data->currfid); | 972 | freqs.new = find_khz_freq_from_fid(data->currfid); |
976 | 973 | ||
977 | for_each_cpu_mask(i, *(data->available_cores)) { | 974 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
978 | freqs.cpu = i; | 975 | freqs.cpu = i; |
979 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 976 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
980 | } | 977 | } |
@@ -997,7 +994,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
997 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); | 994 | freqs.old = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); |
998 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 995 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
999 | 996 | ||
1000 | for_each_cpu_mask(i, *(data->available_cores)) { | 997 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
1001 | freqs.cpu = i; | 998 | freqs.cpu = i; |
1002 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 999 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
1003 | } | 1000 | } |
@@ -1005,7 +1002,7 @@ static int transition_frequency_pstate(struct powernow_k8_data *data, unsigned i | |||
1005 | res = transition_pstate(data, pstate); | 1002 | res = transition_pstate(data, pstate); |
1006 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); | 1003 | freqs.new = find_khz_freq_from_pstate(data->powernow_table, pstate); |
1007 | 1004 | ||
1008 | for_each_cpu_mask(i, *(data->available_cores)) { | 1005 | for_each_cpu_mask_nr(i, *(data->available_cores)) { |
1009 | freqs.cpu = i; | 1006 | freqs.cpu = i; |
1010 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 1007 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
1011 | } | 1008 | } |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c index 908dd347c67e..15e13c01cc36 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c | |||
@@ -28,7 +28,8 @@ | |||
28 | #define PFX "speedstep-centrino: " | 28 | #define PFX "speedstep-centrino: " |
29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" | 29 | #define MAINTAINER "cpufreq@lists.linux.org.uk" |
30 | 30 | ||
31 | #define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | 31 | #define dprintk(msg...) \ |
32 | cpufreq_debug_printk(CPUFREQ_DEBUG_DRIVER, "speedstep-centrino", msg) | ||
32 | 33 | ||
33 | #define INTEL_MSR_RANGE (0xffff) | 34 | #define INTEL_MSR_RANGE (0xffff) |
34 | 35 | ||
@@ -66,11 +67,12 @@ struct cpu_model | |||
66 | 67 | ||
67 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ | 68 | struct cpufreq_frequency_table *op_points; /* clock/voltage pairs */ |
68 | }; | 69 | }; |
69 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x); | 70 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
71 | const struct cpu_id *x); | ||
70 | 72 | ||
71 | /* Operating points for current CPU */ | 73 | /* Operating points for current CPU */ |
72 | static struct cpu_model *centrino_model[NR_CPUS]; | 74 | static DEFINE_PER_CPU(struct cpu_model *, centrino_model); |
73 | static const struct cpu_id *centrino_cpu[NR_CPUS]; | 75 | static DEFINE_PER_CPU(const struct cpu_id *, centrino_cpu); |
74 | 76 | ||
75 | static struct cpufreq_driver centrino_driver; | 77 | static struct cpufreq_driver centrino_driver; |
76 | 78 | ||
@@ -255,7 +257,7 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) | |||
255 | return -ENOENT; | 257 | return -ENOENT; |
256 | } | 258 | } |
257 | 259 | ||
258 | centrino_model[policy->cpu] = model; | 260 | per_cpu(centrino_model, policy->cpu) = model; |
259 | 261 | ||
260 | dprintk("found \"%s\": max frequency: %dkHz\n", | 262 | dprintk("found \"%s\": max frequency: %dkHz\n", |
261 | model->model_name, model->max_freq); | 263 | model->model_name, model->max_freq); |
@@ -264,10 +266,14 @@ static int centrino_cpu_init_table(struct cpufreq_policy *policy) | |||
264 | } | 266 | } |
265 | 267 | ||
266 | #else | 268 | #else |
267 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) { return -ENODEV; } | 269 | static inline int centrino_cpu_init_table(struct cpufreq_policy *policy) |
270 | { | ||
271 | return -ENODEV; | ||
272 | } | ||
268 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ | 273 | #endif /* CONFIG_X86_SPEEDSTEP_CENTRINO_TABLE */ |
269 | 274 | ||
270 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, const struct cpu_id *x) | 275 | static int centrino_verify_cpu_id(const struct cpuinfo_x86 *c, |
276 | const struct cpu_id *x) | ||
271 | { | 277 | { |
272 | if ((c->x86 == x->x86) && | 278 | if ((c->x86 == x->x86) && |
273 | (c->x86_model == x->x86_model) && | 279 | (c->x86_model == x->x86_model) && |
@@ -286,23 +292,28 @@ static unsigned extract_clock(unsigned msr, unsigned int cpu, int failsafe) | |||
286 | * for centrino, as some DSDTs are buggy. | 292 | * for centrino, as some DSDTs are buggy. |
287 | * Ideally, this can be done using the acpi_data structure. | 293 | * Ideally, this can be done using the acpi_data structure. |
288 | */ | 294 | */ |
289 | if ((centrino_cpu[cpu] == &cpu_ids[CPU_BANIAS]) || | 295 | if ((per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_BANIAS]) || |
290 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_A1]) || | 296 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_A1]) || |
291 | (centrino_cpu[cpu] == &cpu_ids[CPU_DOTHAN_B0])) { | 297 | (per_cpu(centrino_cpu, cpu) == &cpu_ids[CPU_DOTHAN_B0])) { |
292 | msr = (msr >> 8) & 0xff; | 298 | msr = (msr >> 8) & 0xff; |
293 | return msr * 100000; | 299 | return msr * 100000; |
294 | } | 300 | } |
295 | 301 | ||
296 | if ((!centrino_model[cpu]) || (!centrino_model[cpu]->op_points)) | 302 | if ((!per_cpu(centrino_model, cpu)) || |
303 | (!per_cpu(centrino_model, cpu)->op_points)) | ||
297 | return 0; | 304 | return 0; |
298 | 305 | ||
299 | msr &= 0xffff; | 306 | msr &= 0xffff; |
300 | for (i=0;centrino_model[cpu]->op_points[i].frequency != CPUFREQ_TABLE_END; i++) { | 307 | for (i = 0; |
301 | if (msr == centrino_model[cpu]->op_points[i].index) | 308 | per_cpu(centrino_model, cpu)->op_points[i].frequency |
302 | return centrino_model[cpu]->op_points[i].frequency; | 309 | != CPUFREQ_TABLE_END; |
310 | i++) { | ||
311 | if (msr == per_cpu(centrino_model, cpu)->op_points[i].index) | ||
312 | return per_cpu(centrino_model, cpu)-> | ||
313 | op_points[i].frequency; | ||
303 | } | 314 | } |
304 | if (failsafe) | 315 | if (failsafe) |
305 | return centrino_model[cpu]->op_points[i-1].frequency; | 316 | return per_cpu(centrino_model, cpu)->op_points[i-1].frequency; |
306 | else | 317 | else |
307 | return 0; | 318 | return 0; |
308 | } | 319 | } |
@@ -347,7 +358,8 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
347 | int i; | 358 | int i; |
348 | 359 | ||
349 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ | 360 | /* Only Intel makes Enhanced Speedstep-capable CPUs */ |
350 | if (cpu->x86_vendor != X86_VENDOR_INTEL || !cpu_has(cpu, X86_FEATURE_EST)) | 361 | if (cpu->x86_vendor != X86_VENDOR_INTEL || |
362 | !cpu_has(cpu, X86_FEATURE_EST)) | ||
351 | return -ENODEV; | 363 | return -ENODEV; |
352 | 364 | ||
353 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) | 365 | if (cpu_has(cpu, X86_FEATURE_CONSTANT_TSC)) |
@@ -361,9 +373,9 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
361 | break; | 373 | break; |
362 | 374 | ||
363 | if (i != N_IDS) | 375 | if (i != N_IDS) |
364 | centrino_cpu[policy->cpu] = &cpu_ids[i]; | 376 | per_cpu(centrino_cpu, policy->cpu) = &cpu_ids[i]; |
365 | 377 | ||
366 | if (!centrino_cpu[policy->cpu]) { | 378 | if (!per_cpu(centrino_cpu, policy->cpu)) { |
367 | dprintk("found unsupported CPU with " | 379 | dprintk("found unsupported CPU with " |
368 | "Enhanced SpeedStep: send /proc/cpuinfo to " | 380 | "Enhanced SpeedStep: send /proc/cpuinfo to " |
369 | MAINTAINER "\n"); | 381 | MAINTAINER "\n"); |
@@ -386,23 +398,26 @@ static int centrino_cpu_init(struct cpufreq_policy *policy) | |||
386 | /* check to see if it stuck */ | 398 | /* check to see if it stuck */ |
387 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 399 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
388 | if (!(l & (1<<16))) { | 400 | if (!(l & (1<<16))) { |
389 | printk(KERN_INFO PFX "couldn't enable Enhanced SpeedStep\n"); | 401 | printk(KERN_INFO PFX |
402 | "couldn't enable Enhanced SpeedStep\n"); | ||
390 | return -ENODEV; | 403 | return -ENODEV; |
391 | } | 404 | } |
392 | } | 405 | } |
393 | 406 | ||
394 | freq = get_cur_freq(policy->cpu); | 407 | freq = get_cur_freq(policy->cpu); |
395 | 408 | policy->cpuinfo.transition_latency = 10000; | |
396 | policy->cpuinfo.transition_latency = 10000; /* 10uS transition latency */ | 409 | /* 10uS transition latency */ |
397 | policy->cur = freq; | 410 | policy->cur = freq; |
398 | 411 | ||
399 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); | 412 | dprintk("centrino_cpu_init: cur=%dkHz\n", policy->cur); |
400 | 413 | ||
401 | ret = cpufreq_frequency_table_cpuinfo(policy, centrino_model[policy->cpu]->op_points); | 414 | ret = cpufreq_frequency_table_cpuinfo(policy, |
415 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
402 | if (ret) | 416 | if (ret) |
403 | return (ret); | 417 | return (ret); |
404 | 418 | ||
405 | cpufreq_frequency_table_get_attr(centrino_model[policy->cpu]->op_points, policy->cpu); | 419 | cpufreq_frequency_table_get_attr( |
420 | per_cpu(centrino_model, policy->cpu)->op_points, policy->cpu); | ||
406 | 421 | ||
407 | return 0; | 422 | return 0; |
408 | } | 423 | } |
@@ -411,12 +426,12 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) | |||
411 | { | 426 | { |
412 | unsigned int cpu = policy->cpu; | 427 | unsigned int cpu = policy->cpu; |
413 | 428 | ||
414 | if (!centrino_model[cpu]) | 429 | if (!per_cpu(centrino_model, cpu)) |
415 | return -ENODEV; | 430 | return -ENODEV; |
416 | 431 | ||
417 | cpufreq_frequency_table_put_attr(cpu); | 432 | cpufreq_frequency_table_put_attr(cpu); |
418 | 433 | ||
419 | centrino_model[cpu] = NULL; | 434 | per_cpu(centrino_model, cpu) = NULL; |
420 | 435 | ||
421 | return 0; | 436 | return 0; |
422 | } | 437 | } |
@@ -430,17 +445,26 @@ static int centrino_cpu_exit(struct cpufreq_policy *policy) | |||
430 | */ | 445 | */ |
431 | static int centrino_verify (struct cpufreq_policy *policy) | 446 | static int centrino_verify (struct cpufreq_policy *policy) |
432 | { | 447 | { |
433 | return cpufreq_frequency_table_verify(policy, centrino_model[policy->cpu]->op_points); | 448 | return cpufreq_frequency_table_verify(policy, |
449 | per_cpu(centrino_model, policy->cpu)->op_points); | ||
434 | } | 450 | } |
435 | 451 | ||
436 | /** | 452 | /** |
437 | * centrino_setpolicy - set a new CPUFreq policy | 453 | * centrino_setpolicy - set a new CPUFreq policy |
438 | * @policy: new policy | 454 | * @policy: new policy |
439 | * @target_freq: the target frequency | 455 | * @target_freq: the target frequency |
440 | * @relation: how that frequency relates to achieved frequency (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | 456 | * @relation: how that frequency relates to achieved frequency |
457 | * (CPUFREQ_RELATION_L or CPUFREQ_RELATION_H) | ||
441 | * | 458 | * |
442 | * Sets a new CPUFreq policy. | 459 | * Sets a new CPUFreq policy. |
443 | */ | 460 | */ |
461 | struct allmasks { | ||
462 | cpumask_t online_policy_cpus; | ||
463 | cpumask_t saved_mask; | ||
464 | cpumask_t set_mask; | ||
465 | cpumask_t covered_cpus; | ||
466 | }; | ||
467 | |||
444 | static int centrino_target (struct cpufreq_policy *policy, | 468 | static int centrino_target (struct cpufreq_policy *policy, |
445 | unsigned int target_freq, | 469 | unsigned int target_freq, |
446 | unsigned int relation) | 470 | unsigned int relation) |
@@ -448,48 +472,55 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
448 | unsigned int newstate = 0; | 472 | unsigned int newstate = 0; |
449 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; | 473 | unsigned int msr, oldmsr = 0, h = 0, cpu = policy->cpu; |
450 | struct cpufreq_freqs freqs; | 474 | struct cpufreq_freqs freqs; |
451 | cpumask_t online_policy_cpus; | ||
452 | cpumask_t saved_mask; | ||
453 | cpumask_t set_mask; | ||
454 | cpumask_t covered_cpus; | ||
455 | int retval = 0; | 475 | int retval = 0; |
456 | unsigned int j, k, first_cpu, tmp; | 476 | unsigned int j, k, first_cpu, tmp; |
457 | 477 | CPUMASK_ALLOC(allmasks); | |
458 | if (unlikely(centrino_model[cpu] == NULL)) | 478 | CPUMASK_PTR(online_policy_cpus, allmasks); |
459 | return -ENODEV; | 479 | CPUMASK_PTR(saved_mask, allmasks); |
480 | CPUMASK_PTR(set_mask, allmasks); | ||
481 | CPUMASK_PTR(covered_cpus, allmasks); | ||
482 | |||
483 | if (unlikely(allmasks == NULL)) | ||
484 | return -ENOMEM; | ||
485 | |||
486 | if (unlikely(per_cpu(centrino_model, cpu) == NULL)) { | ||
487 | retval = -ENODEV; | ||
488 | goto out; | ||
489 | } | ||
460 | 490 | ||
461 | if (unlikely(cpufreq_frequency_table_target(policy, | 491 | if (unlikely(cpufreq_frequency_table_target(policy, |
462 | centrino_model[cpu]->op_points, | 492 | per_cpu(centrino_model, cpu)->op_points, |
463 | target_freq, | 493 | target_freq, |
464 | relation, | 494 | relation, |
465 | &newstate))) { | 495 | &newstate))) { |
466 | return -EINVAL; | 496 | retval = -EINVAL; |
497 | goto out; | ||
467 | } | 498 | } |
468 | 499 | ||
469 | #ifdef CONFIG_HOTPLUG_CPU | 500 | #ifdef CONFIG_HOTPLUG_CPU |
470 | /* cpufreq holds the hotplug lock, so we are safe from here on */ | 501 | /* cpufreq holds the hotplug lock, so we are safe from here on */ |
471 | cpus_and(online_policy_cpus, cpu_online_map, policy->cpus); | 502 | cpus_and(*online_policy_cpus, cpu_online_map, policy->cpus); |
472 | #else | 503 | #else |
473 | online_policy_cpus = policy->cpus; | 504 | *online_policy_cpus = policy->cpus; |
474 | #endif | 505 | #endif |
475 | 506 | ||
476 | saved_mask = current->cpus_allowed; | 507 | *saved_mask = current->cpus_allowed; |
477 | first_cpu = 1; | 508 | first_cpu = 1; |
478 | cpus_clear(covered_cpus); | 509 | cpus_clear(*covered_cpus); |
479 | for_each_cpu_mask(j, online_policy_cpus) { | 510 | for_each_cpu_mask_nr(j, *online_policy_cpus) { |
480 | /* | 511 | /* |
481 | * Support for SMP systems. | 512 | * Support for SMP systems. |
482 | * Make sure we are running on CPU that wants to change freq | 513 | * Make sure we are running on CPU that wants to change freq |
483 | */ | 514 | */ |
484 | cpus_clear(set_mask); | 515 | cpus_clear(*set_mask); |
485 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) | 516 | if (policy->shared_type == CPUFREQ_SHARED_TYPE_ANY) |
486 | cpus_or(set_mask, set_mask, online_policy_cpus); | 517 | cpus_or(*set_mask, *set_mask, *online_policy_cpus); |
487 | else | 518 | else |
488 | cpu_set(j, set_mask); | 519 | cpu_set(j, *set_mask); |
489 | 520 | ||
490 | set_cpus_allowed_ptr(current, &set_mask); | 521 | set_cpus_allowed_ptr(current, set_mask); |
491 | preempt_disable(); | 522 | preempt_disable(); |
492 | if (unlikely(!cpu_isset(smp_processor_id(), set_mask))) { | 523 | if (unlikely(!cpu_isset(smp_processor_id(), *set_mask))) { |
493 | dprintk("couldn't limit to CPUs in this domain\n"); | 524 | dprintk("couldn't limit to CPUs in this domain\n"); |
494 | retval = -EAGAIN; | 525 | retval = -EAGAIN; |
495 | if (first_cpu) { | 526 | if (first_cpu) { |
@@ -500,7 +531,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
500 | break; | 531 | break; |
501 | } | 532 | } |
502 | 533 | ||
503 | msr = centrino_model[cpu]->op_points[newstate].index; | 534 | msr = per_cpu(centrino_model, cpu)->op_points[newstate].index; |
504 | 535 | ||
505 | if (first_cpu) { | 536 | if (first_cpu) { |
506 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 537 | rdmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
@@ -517,7 +548,7 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
517 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", | 548 | dprintk("target=%dkHz old=%d new=%d msr=%04x\n", |
518 | target_freq, freqs.old, freqs.new, msr); | 549 | target_freq, freqs.old, freqs.new, msr); |
519 | 550 | ||
520 | for_each_cpu_mask(k, online_policy_cpus) { | 551 | for_each_cpu_mask_nr(k, *online_policy_cpus) { |
521 | freqs.cpu = k; | 552 | freqs.cpu = k; |
522 | cpufreq_notify_transition(&freqs, | 553 | cpufreq_notify_transition(&freqs, |
523 | CPUFREQ_PRECHANGE); | 554 | CPUFREQ_PRECHANGE); |
@@ -536,11 +567,11 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
536 | break; | 567 | break; |
537 | } | 568 | } |
538 | 569 | ||
539 | cpu_set(j, covered_cpus); | 570 | cpu_set(j, *covered_cpus); |
540 | preempt_enable(); | 571 | preempt_enable(); |
541 | } | 572 | } |
542 | 573 | ||
543 | for_each_cpu_mask(k, online_policy_cpus) { | 574 | for_each_cpu_mask_nr(k, *online_policy_cpus) { |
544 | freqs.cpu = k; | 575 | freqs.cpu = k; |
545 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 576 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
546 | } | 577 | } |
@@ -553,30 +584,32 @@ static int centrino_target (struct cpufreq_policy *policy, | |||
553 | * Best effort undo.. | 584 | * Best effort undo.. |
554 | */ | 585 | */ |
555 | 586 | ||
556 | if (!cpus_empty(covered_cpus)) { | 587 | if (!cpus_empty(*covered_cpus)) |
557 | for_each_cpu_mask(j, covered_cpus) { | 588 | for_each_cpu_mask_nr(j, *covered_cpus) { |
558 | set_cpus_allowed_ptr(current, | 589 | set_cpus_allowed_ptr(current, |
559 | &cpumask_of_cpu(j)); | 590 | &cpumask_of_cpu(j)); |
560 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); | 591 | wrmsr(MSR_IA32_PERF_CTL, oldmsr, h); |
561 | } | 592 | } |
562 | } | ||
563 | 593 | ||
564 | tmp = freqs.new; | 594 | tmp = freqs.new; |
565 | freqs.new = freqs.old; | 595 | freqs.new = freqs.old; |
566 | freqs.old = tmp; | 596 | freqs.old = tmp; |
567 | for_each_cpu_mask(j, online_policy_cpus) { | 597 | for_each_cpu_mask_nr(j, *online_policy_cpus) { |
568 | freqs.cpu = j; | 598 | freqs.cpu = j; |
569 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 599 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
570 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 600 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
571 | } | 601 | } |
572 | } | 602 | } |
573 | set_cpus_allowed_ptr(current, &saved_mask); | 603 | set_cpus_allowed_ptr(current, saved_mask); |
574 | return 0; | 604 | retval = 0; |
605 | goto out; | ||
575 | 606 | ||
576 | migrate_end: | 607 | migrate_end: |
577 | preempt_enable(); | 608 | preempt_enable(); |
578 | set_cpus_allowed_ptr(current, &saved_mask); | 609 | set_cpus_allowed_ptr(current, saved_mask); |
579 | return 0; | 610 | out: |
611 | CPUMASK_FREE(allmasks); | ||
612 | return retval; | ||
580 | } | 613 | } |
581 | 614 | ||
582 | static struct freq_attr* centrino_attr[] = { | 615 | static struct freq_attr* centrino_attr[] = { |
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c index 1b50244b1fdf..191f7263c61d 100644 --- a/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c +++ b/arch/x86/kernel/cpu/cpufreq/speedstep-ich.c | |||
@@ -279,7 +279,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
279 | 279 | ||
280 | cpus_allowed = current->cpus_allowed; | 280 | cpus_allowed = current->cpus_allowed; |
281 | 281 | ||
282 | for_each_cpu_mask(i, policy->cpus) { | 282 | for_each_cpu_mask_nr(i, policy->cpus) { |
283 | freqs.cpu = i; | 283 | freqs.cpu = i; |
284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); | 284 | cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); |
285 | } | 285 | } |
@@ -292,7 +292,7 @@ static int speedstep_target (struct cpufreq_policy *policy, | |||
292 | /* allow to be run on all CPUs */ | 292 | /* allow to be run on all CPUs */ |
293 | set_cpus_allowed_ptr(current, &cpus_allowed); | 293 | set_cpus_allowed_ptr(current, &cpus_allowed); |
294 | 294 | ||
295 | for_each_cpu_mask(i, policy->cpus) { | 295 | for_each_cpu_mask_nr(i, policy->cpus) { |
296 | freqs.cpu = i; | 296 | freqs.cpu = i; |
297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); | 297 | cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); |
298 | } | 298 | } |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 3fd7a67bb06a..898a5a2002ed 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -15,13 +15,11 @@ | |||
15 | /* | 15 | /* |
16 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU | 16 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU |
17 | */ | 17 | */ |
18 | static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | 18 | static void __cpuinit __do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) |
19 | { | 19 | { |
20 | unsigned char ccr2, ccr3; | 20 | unsigned char ccr2, ccr3; |
21 | unsigned long flags; | ||
22 | 21 | ||
23 | /* we test for DEVID by checking whether CCR3 is writable */ | 22 | /* we test for DEVID by checking whether CCR3 is writable */ |
24 | local_irq_save(flags); | ||
25 | ccr3 = getCx86(CX86_CCR3); | 23 | ccr3 = getCx86(CX86_CCR3); |
26 | setCx86(CX86_CCR3, ccr3 ^ 0x80); | 24 | setCx86(CX86_CCR3, ccr3 ^ 0x80); |
27 | getCx86(0xc0); /* dummy to change bus */ | 25 | getCx86(0xc0); /* dummy to change bus */ |
@@ -44,9 +42,16 @@ static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | |||
44 | *dir0 = getCx86(CX86_DIR0); | 42 | *dir0 = getCx86(CX86_DIR0); |
45 | *dir1 = getCx86(CX86_DIR1); | 43 | *dir1 = getCx86(CX86_DIR1); |
46 | } | 44 | } |
47 | local_irq_restore(flags); | ||
48 | } | 45 | } |
49 | 46 | ||
47 | static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | ||
48 | { | ||
49 | unsigned long flags; | ||
50 | |||
51 | local_irq_save(flags); | ||
52 | __do_cyrix_devid(dir0, dir1); | ||
53 | local_irq_restore(flags); | ||
54 | } | ||
50 | /* | 55 | /* |
51 | * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in | 56 | * Cx86_dir0_msb is a HACK needed by check_cx686_cpuid/slop in bugs.h in |
52 | * order to identify the Cyrix CPU model after we're out of setup.c | 57 | * order to identify the Cyrix CPU model after we're out of setup.c |
@@ -134,23 +139,6 @@ static void __cpuinit set_cx86_memwb(void) | |||
134 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); | 139 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); |
135 | } | 140 | } |
136 | 141 | ||
137 | static void __cpuinit set_cx86_inc(void) | ||
138 | { | ||
139 | unsigned char ccr3; | ||
140 | |||
141 | printk(KERN_INFO "Enable Incrementor on Cyrix/NSC processor.\n"); | ||
142 | |||
143 | ccr3 = getCx86(CX86_CCR3); | ||
144 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | ||
145 | /* PCR1 -- Performance Control */ | ||
146 | /* Incrementor on, whatever that is */ | ||
147 | setCx86(CX86_PCR1, getCx86(CX86_PCR1) | 0x02); | ||
148 | /* PCR0 -- Performance Control */ | ||
149 | /* Incrementor Margin 10 */ | ||
150 | setCx86(CX86_PCR0, getCx86(CX86_PCR0) | 0x04); | ||
151 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | ||
152 | } | ||
153 | |||
154 | /* | 142 | /* |
155 | * Configure later MediaGX and/or Geode processor. | 143 | * Configure later MediaGX and/or Geode processor. |
156 | */ | 144 | */ |
@@ -174,11 +162,28 @@ static void __cpuinit geode_configure(void) | |||
174 | 162 | ||
175 | set_cx86_memwb(); | 163 | set_cx86_memwb(); |
176 | set_cx86_reorder(); | 164 | set_cx86_reorder(); |
177 | set_cx86_inc(); | ||
178 | 165 | ||
179 | local_irq_restore(flags); | 166 | local_irq_restore(flags); |
180 | } | 167 | } |
181 | 168 | ||
169 | static void __cpuinit early_init_cyrix(struct cpuinfo_x86 *c) | ||
170 | { | ||
171 | unsigned char dir0, dir0_msn, dir1 = 0; | ||
172 | |||
173 | __do_cyrix_devid(&dir0, &dir1); | ||
174 | dir0_msn = dir0 >> 4; /* identifies CPU "family" */ | ||
175 | |||
176 | switch (dir0_msn) { | ||
177 | case 3: /* 6x86/6x86L */ | ||
178 | /* Emulate MTRRs using Cyrix's ARRs. */ | ||
179 | set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); | ||
180 | break; | ||
181 | case 5: /* 6x86MX/M II */ | ||
182 | /* Emulate MTRRs using Cyrix's ARRs. */ | ||
183 | set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); | ||
184 | break; | ||
185 | } | ||
186 | } | ||
182 | 187 | ||
183 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | 188 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) |
184 | { | 189 | { |
@@ -434,6 +439,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) | |||
434 | static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { | 439 | static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { |
435 | .c_vendor = "Cyrix", | 440 | .c_vendor = "Cyrix", |
436 | .c_ident = { "CyrixInstead" }, | 441 | .c_ident = { "CyrixInstead" }, |
442 | .c_early_init = early_init_cyrix, | ||
437 | .c_init = init_cyrix, | 443 | .c_init = init_cyrix, |
438 | .c_identify = cyrix_identify, | 444 | .c_identify = cyrix_identify, |
439 | }; | 445 | }; |
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c index e43ad4ad4cba..c9017799497c 100644 --- a/arch/x86/kernel/cpu/feature_names.c +++ b/arch/x86/kernel/cpu/feature_names.c | |||
@@ -39,7 +39,8 @@ const char * const x86_cap_flags[NCAPINTS*32] = { | |||
39 | NULL, NULL, NULL, NULL, | 39 | NULL, NULL, NULL, NULL, |
40 | "constant_tsc", "up", NULL, "arch_perfmon", | 40 | "constant_tsc", "up", NULL, "arch_perfmon", |
41 | "pebs", "bts", NULL, NULL, | 41 | "pebs", "bts", NULL, NULL, |
42 | "rep_good", NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 42 | "rep_good", NULL, NULL, NULL, |
43 | "nopl", NULL, NULL, NULL, | ||
43 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 44 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
44 | 45 | ||
45 | /* Intel-defined (#2) */ | 46 | /* Intel-defined (#2) */ |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 70609efdf1da..b75f2569b8f8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -227,6 +227,16 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
227 | if (cpu_has_bts) | 227 | if (cpu_has_bts) |
228 | ds_init_intel(c); | 228 | ds_init_intel(c); |
229 | 229 | ||
230 | /* | ||
231 | * See if we have a good local APIC by checking for buggy Pentia, | ||
232 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
233 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
234 | * Specification Update"). | ||
235 | */ | ||
236 | if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 && | ||
237 | (c->x86_mask < 0x6 || c->x86_mask == 0xb)) | ||
238 | set_cpu_cap(c, X86_FEATURE_11AP); | ||
239 | |||
230 | #ifdef CONFIG_X86_NUMAQ | 240 | #ifdef CONFIG_X86_NUMAQ |
231 | numaq_tsc_disable(); | 241 | numaq_tsc_disable(); |
232 | #endif | 242 | #endif |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 2c8afafa18e8..6b0a10b002f1 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -489,7 +489,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | |||
489 | int sibling; | 489 | int sibling; |
490 | 490 | ||
491 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 491 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
492 | for_each_cpu_mask(sibling, this_leaf->shared_cpu_map) { | 492 | for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { |
493 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); | 493 | sibling_leaf = CPUID4_INFO_IDX(sibling, index); |
494 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); | 494 | cpu_clear(cpu, sibling_leaf->shared_cpu_map); |
495 | } | 495 | } |
@@ -780,15 +780,14 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
780 | } | 780 | } |
781 | kobject_put(per_cpu(cache_kobject, cpu)); | 781 | kobject_put(per_cpu(cache_kobject, cpu)); |
782 | cpuid4_cache_sysfs_exit(cpu); | 782 | cpuid4_cache_sysfs_exit(cpu); |
783 | break; | 783 | return retval; |
784 | } | 784 | } |
785 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); | 785 | kobject_uevent(&(this_object->kobj), KOBJ_ADD); |
786 | } | 786 | } |
787 | if (!retval) | 787 | cpu_set(cpu, cache_dev_map); |
788 | cpu_set(cpu, cache_dev_map); | ||
789 | 788 | ||
790 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); | 789 | kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); |
791 | return retval; | 790 | return 0; |
792 | } | 791 | } |
793 | 792 | ||
794 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | 793 | static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c index c4a7ec31394c..726a5fcdf341 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_64.c | |||
@@ -580,7 +580,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, | |||
580 | char __user *buf = ubuf; | 580 | char __user *buf = ubuf; |
581 | int i, err; | 581 | int i, err; |
582 | 582 | ||
583 | cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL); | 583 | cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); |
584 | if (!cpu_tsc) | 584 | if (!cpu_tsc) |
585 | return -ENOMEM; | 585 | return -ENOMEM; |
586 | 586 | ||
@@ -759,13 +759,18 @@ static struct sysdev_class mce_sysclass = { | |||
759 | }; | 759 | }; |
760 | 760 | ||
761 | DEFINE_PER_CPU(struct sys_device, device_mce); | 761 | DEFINE_PER_CPU(struct sys_device, device_mce); |
762 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu) __cpuinitdata; | ||
762 | 763 | ||
763 | /* Why are there no generic functions for this? */ | 764 | /* Why are there no generic functions for this? */ |
764 | #define ACCESSOR(name, var, start) \ | 765 | #define ACCESSOR(name, var, start) \ |
765 | static ssize_t show_ ## name(struct sys_device *s, char *buf) { \ | 766 | static ssize_t show_ ## name(struct sys_device *s, \ |
767 | struct sysdev_attribute *attr, \ | ||
768 | char *buf) { \ | ||
766 | return sprintf(buf, "%lx\n", (unsigned long)var); \ | 769 | return sprintf(buf, "%lx\n", (unsigned long)var); \ |
767 | } \ | 770 | } \ |
768 | static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t siz) { \ | 771 | static ssize_t set_ ## name(struct sys_device *s, \ |
772 | struct sysdev_attribute *attr, \ | ||
773 | const char *buf, size_t siz) { \ | ||
769 | char *end; \ | 774 | char *end; \ |
770 | unsigned long new = simple_strtoul(buf, &end, 0); \ | 775 | unsigned long new = simple_strtoul(buf, &end, 0); \ |
771 | if (end == buf) return -EINVAL; \ | 776 | if (end == buf) return -EINVAL; \ |
@@ -786,14 +791,16 @@ ACCESSOR(bank3ctl,bank[3],mce_restart()) | |||
786 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 791 | ACCESSOR(bank4ctl,bank[4],mce_restart()) |
787 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | 792 | ACCESSOR(bank5ctl,bank[5],mce_restart()) |
788 | 793 | ||
789 | static ssize_t show_trigger(struct sys_device *s, char *buf) | 794 | static ssize_t show_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
795 | char *buf) | ||
790 | { | 796 | { |
791 | strcpy(buf, trigger); | 797 | strcpy(buf, trigger); |
792 | strcat(buf, "\n"); | 798 | strcat(buf, "\n"); |
793 | return strlen(trigger) + 1; | 799 | return strlen(trigger) + 1; |
794 | } | 800 | } |
795 | 801 | ||
796 | static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) | 802 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
803 | const char *buf,size_t siz) | ||
797 | { | 804 | { |
798 | char *p; | 805 | char *p; |
799 | int len; | 806 | int len; |
@@ -806,12 +813,12 @@ static ssize_t set_trigger(struct sys_device *s,const char *buf,size_t siz) | |||
806 | } | 813 | } |
807 | 814 | ||
808 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 815 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); |
809 | ACCESSOR(tolerant,tolerant,) | 816 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
810 | ACCESSOR(check_interval,check_interval,mce_restart()) | 817 | ACCESSOR(check_interval,check_interval,mce_restart()) |
811 | static struct sysdev_attribute *mce_attributes[] = { | 818 | static struct sysdev_attribute *mce_attributes[] = { |
812 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | 819 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, |
813 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, | 820 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl, |
814 | &attr_tolerant, &attr_check_interval, &attr_trigger, | 821 | &attr_tolerant.attr, &attr_check_interval, &attr_trigger, |
815 | NULL | 822 | NULL |
816 | }; | 823 | }; |
817 | 824 | ||
@@ -877,9 +884,13 @@ static int __cpuinit mce_cpu_callback(struct notifier_block *nfb, | |||
877 | case CPU_ONLINE: | 884 | case CPU_ONLINE: |
878 | case CPU_ONLINE_FROZEN: | 885 | case CPU_ONLINE_FROZEN: |
879 | mce_create_device(cpu); | 886 | mce_create_device(cpu); |
887 | if (threshold_cpu_callback) | ||
888 | threshold_cpu_callback(action, cpu); | ||
880 | break; | 889 | break; |
881 | case CPU_DEAD: | 890 | case CPU_DEAD: |
882 | case CPU_DEAD_FROZEN: | 891 | case CPU_DEAD_FROZEN: |
892 | if (threshold_cpu_callback) | ||
893 | threshold_cpu_callback(action, cpu); | ||
883 | mce_remove_device(cpu); | 894 | mce_remove_device(cpu); |
884 | break; | 895 | break; |
885 | } | 896 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 7c9a813e1193..5eb390a4b2e9 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c | |||
@@ -527,7 +527,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
527 | if (err) | 527 | if (err) |
528 | goto out_free; | 528 | goto out_free; |
529 | 529 | ||
530 | for_each_cpu_mask(i, b->cpus) { | 530 | for_each_cpu_mask_nr(i, b->cpus) { |
531 | if (i == cpu) | 531 | if (i == cpu) |
532 | continue; | 532 | continue; |
533 | 533 | ||
@@ -617,7 +617,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
617 | #endif | 617 | #endif |
618 | 618 | ||
619 | /* remove all sibling symlinks before unregistering */ | 619 | /* remove all sibling symlinks before unregistering */ |
620 | for_each_cpu_mask(i, b->cpus) { | 620 | for_each_cpu_mask_nr(i, b->cpus) { |
621 | if (i == cpu) | 621 | if (i == cpu) |
622 | continue; | 622 | continue; |
623 | 623 | ||
@@ -628,6 +628,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank) | |||
628 | deallocate_threshold_block(cpu, bank); | 628 | deallocate_threshold_block(cpu, bank); |
629 | 629 | ||
630 | free_out: | 630 | free_out: |
631 | kobject_del(b->kobj); | ||
631 | kobject_put(b->kobj); | 632 | kobject_put(b->kobj); |
632 | kfree(b); | 633 | kfree(b); |
633 | per_cpu(threshold_banks, cpu)[bank] = NULL; | 634 | per_cpu(threshold_banks, cpu)[bank] = NULL; |
@@ -645,14 +646,11 @@ static void threshold_remove_device(unsigned int cpu) | |||
645 | } | 646 | } |
646 | 647 | ||
647 | /* get notified when a cpu comes on/off */ | 648 | /* get notified when a cpu comes on/off */ |
648 | static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, | 649 | static void __cpuinit amd_64_threshold_cpu_callback(unsigned long action, |
649 | unsigned long action, void *hcpu) | 650 | unsigned int cpu) |
650 | { | 651 | { |
651 | /* cpu was unsigned int to begin with */ | ||
652 | unsigned int cpu = (unsigned long)hcpu; | ||
653 | |||
654 | if (cpu >= NR_CPUS) | 652 | if (cpu >= NR_CPUS) |
655 | goto out; | 653 | return; |
656 | 654 | ||
657 | switch (action) { | 655 | switch (action) { |
658 | case CPU_ONLINE: | 656 | case CPU_ONLINE: |
@@ -666,14 +664,8 @@ static int __cpuinit threshold_cpu_callback(struct notifier_block *nfb, | |||
666 | default: | 664 | default: |
667 | break; | 665 | break; |
668 | } | 666 | } |
669 | out: | ||
670 | return NOTIFY_OK; | ||
671 | } | 667 | } |
672 | 668 | ||
673 | static struct notifier_block threshold_cpu_notifier __cpuinitdata = { | ||
674 | .notifier_call = threshold_cpu_callback, | ||
675 | }; | ||
676 | |||
677 | static __init int threshold_init_device(void) | 669 | static __init int threshold_init_device(void) |
678 | { | 670 | { |
679 | unsigned lcpu = 0; | 671 | unsigned lcpu = 0; |
@@ -684,7 +676,7 @@ static __init int threshold_init_device(void) | |||
684 | if (err) | 676 | if (err) |
685 | return err; | 677 | return err; |
686 | } | 678 | } |
687 | register_hotcpu_notifier(&threshold_cpu_notifier); | 679 | threshold_cpu_callback = amd_64_threshold_cpu_callback; |
688 | return 0; | 680 | return 0; |
689 | } | 681 | } |
690 | 682 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index eef001ad3bde..9b60fce09f75 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -102,7 +102,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
102 | /* The temperature transition interrupt handler setup */ | 102 | /* The temperature transition interrupt handler setup */ |
103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ | 103 | h = THERMAL_APIC_VECTOR; /* our delivery vector */ |
104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ | 104 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ |
105 | apic_write_around(APIC_LVTTHMR, h); | 105 | apic_write(APIC_LVTTHMR, h); |
106 | 106 | ||
107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 107 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); | 108 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); |
@@ -114,7 +114,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
114 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); | 114 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); |
115 | 115 | ||
116 | l = apic_read(APIC_LVTTHMR); | 116 | l = apic_read(APIC_LVTTHMR); |
117 | apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 117 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | 118 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); |
119 | 119 | ||
120 | /* enable thermal throttle processing */ | 120 | /* enable thermal throttle processing */ |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 1f4cc48c14c6..d5ae2243f0b9 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -35,6 +35,7 @@ atomic_t therm_throt_en = ATOMIC_INIT(0); | |||
35 | 35 | ||
36 | #define define_therm_throt_sysdev_show_func(name) \ | 36 | #define define_therm_throt_sysdev_show_func(name) \ |
37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | 37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ |
38 | struct sysdev_attribute *attr, \ | ||
38 | char *buf) \ | 39 | char *buf) \ |
39 | { \ | 40 | { \ |
40 | unsigned int cpu = dev->id; \ | 41 | unsigned int cpu = dev->id; \ |
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 509bd3d9eacd..cb7d3b6a80eb 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -379,6 +379,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
379 | unsigned long *size, mtrr_type *type) | 379 | unsigned long *size, mtrr_type *type) |
380 | { | 380 | { |
381 | unsigned int mask_lo, mask_hi, base_lo, base_hi; | 381 | unsigned int mask_lo, mask_hi, base_lo, base_hi; |
382 | unsigned int tmp, hi; | ||
382 | 383 | ||
383 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 384 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
384 | if ((mask_lo & 0x800) == 0) { | 385 | if ((mask_lo & 0x800) == 0) { |
@@ -392,8 +393,23 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
392 | rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); | 393 | rdmsr(MTRRphysBase_MSR(reg), base_lo, base_hi); |
393 | 394 | ||
394 | /* Work out the shifted address mask. */ | 395 | /* Work out the shifted address mask. */ |
395 | mask_lo = size_or_mask | mask_hi << (32 - PAGE_SHIFT) | 396 | tmp = mask_hi << (32 - PAGE_SHIFT) | mask_lo >> PAGE_SHIFT; |
396 | | mask_lo >> PAGE_SHIFT; | 397 | mask_lo = size_or_mask | tmp; |
398 | /* Expand tmp with high bits to all 1s*/ | ||
399 | hi = fls(tmp); | ||
400 | if (hi > 0) { | ||
401 | tmp |= ~((1<<(hi - 1)) - 1); | ||
402 | |||
403 | if (tmp != mask_lo) { | ||
404 | static int once = 1; | ||
405 | |||
406 | if (once) { | ||
407 | printk(KERN_INFO "mtrr: your BIOS has set up an incorrect mask, fixing it up.\n"); | ||
408 | once = 0; | ||
409 | } | ||
410 | mask_lo = tmp; | ||
411 | } | ||
412 | } | ||
397 | 413 | ||
398 | /* This works correctly if size is a power of two, i.e. a | 414 | /* This works correctly if size is a power of two, i.e. a |
399 | contiguous range. */ | 415 | contiguous range. */ |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 6f23969c8faf..b117d7f8a564 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -1496,11 +1496,8 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1496 | 1496 | ||
1497 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | 1497 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ |
1498 | if (!highest_pfn) { | 1498 | if (!highest_pfn) { |
1499 | if (!kvm_para_available()) { | 1499 | WARN(!kvm_para_available(), KERN_WARNING |
1500 | printk(KERN_WARNING | ||
1501 | "WARNING: strange, CPU MTRRs all blank?\n"); | 1500 | "WARNING: strange, CPU MTRRs all blank?\n"); |
1502 | WARN_ON(1); | ||
1503 | } | ||
1504 | return 0; | 1501 | return 0; |
1505 | } | 1502 | } |
1506 | 1503 | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 6d4bdc02388a..05cc22dbd4ff 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -250,7 +250,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr, | |||
250 | 250 | ||
251 | do_div(count, nmi_hz); | 251 | do_div(count, nmi_hz); |
252 | if(descr) | 252 | if(descr) |
253 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 253 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
254 | wrmsrl(perfctr_msr, 0 - count); | 254 | wrmsrl(perfctr_msr, 0 - count); |
255 | } | 255 | } |
256 | 256 | ||
@@ -261,7 +261,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr, | |||
261 | 261 | ||
262 | do_div(count, nmi_hz); | 262 | do_div(count, nmi_hz); |
263 | if(descr) | 263 | if(descr) |
264 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 264 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
265 | wrmsr(perfctr_msr, (u32)(-count), 0); | 265 | wrmsr(perfctr_msr, (u32)(-count), 0); |
266 | } | 266 | } |
267 | 267 | ||
@@ -478,7 +478,13 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
478 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | 478 | perfctr_msr = MSR_P4_IQ_PERFCTR1; |
479 | evntsel_msr = MSR_P4_CRU_ESCR0; | 479 | evntsel_msr = MSR_P4_CRU_ESCR0; |
480 | cccr_msr = MSR_P4_IQ_CCCR1; | 480 | cccr_msr = MSR_P4_IQ_CCCR1; |
481 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | 481 | |
482 | /* Pentium 4 D processors don't support P4_CCCR_OVF_PMI1 */ | ||
483 | if (boot_cpu_data.x86_model == 4 && boot_cpu_data.x86_mask == 4) | ||
484 | cccr_val = P4_CCCR_OVF_PMI0; | ||
485 | else | ||
486 | cccr_val = P4_CCCR_OVF_PMI1; | ||
487 | cccr_val |= P4_CCCR_ESCR_SELECT(4); | ||
482 | } | 488 | } |
483 | 489 | ||
484 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | 490 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index 0d0d9057e7c0..a26c480b9491 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -160,7 +160,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) | |||
160 | { | 160 | { |
161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ | 161 | if (*pos == 0) /* just in case, cpu 0 is not the first */ |
162 | *pos = first_cpu(cpu_online_map); | 162 | *pos = first_cpu(cpu_online_map); |
163 | if ((*pos) < NR_CPUS && cpu_online(*pos)) | 163 | if ((*pos) < nr_cpu_ids && cpu_online(*pos)) |
164 | return &cpu_data(*pos); | 164 | return &cpu_data(*pos); |
165 | return NULL; | 165 | return NULL; |
166 | } | 166 | } |
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 2de5fa2bbf77..8e9cd6a8ec12 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -89,6 +89,8 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
89 | struct cpuid_regs cmd; | 89 | struct cpuid_regs cmd; |
90 | int cpu = iminor(file->f_path.dentry->d_inode); | 90 | int cpu = iminor(file->f_path.dentry->d_inode); |
91 | u64 pos = *ppos; | 91 | u64 pos = *ppos; |
92 | ssize_t bytes = 0; | ||
93 | int err = 0; | ||
92 | 94 | ||
93 | if (count % 16) | 95 | if (count % 16) |
94 | return -EINVAL; /* Invalid chunk size */ | 96 | return -EINVAL; /* Invalid chunk size */ |
@@ -96,14 +98,19 @@ static ssize_t cpuid_read(struct file *file, char __user *buf, | |||
96 | for (; count; count -= 16) { | 98 | for (; count; count -= 16) { |
97 | cmd.eax = pos; | 99 | cmd.eax = pos; |
98 | cmd.ecx = pos >> 32; | 100 | cmd.ecx = pos >> 32; |
99 | smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); | 101 | err = smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1); |
100 | if (copy_to_user(tmp, &cmd, 16)) | 102 | if (err) |
101 | return -EFAULT; | 103 | break; |
104 | if (copy_to_user(tmp, &cmd, 16)) { | ||
105 | err = -EFAULT; | ||
106 | break; | ||
107 | } | ||
102 | tmp += 16; | 108 | tmp += 16; |
109 | bytes += 16; | ||
103 | *ppos = ++pos; | 110 | *ppos = ++pos; |
104 | } | 111 | } |
105 | 112 | ||
106 | return tmp - buf; | 113 | return bytes ? bytes : err; |
107 | } | 114 | } |
108 | 115 | ||
109 | static int cpuid_open(struct inode *inode, struct file *file) | 116 | static int cpuid_open(struct inode *inode, struct file *file) |
@@ -141,8 +148,8 @@ static __cpuinit int cpuid_device_create(int cpu) | |||
141 | { | 148 | { |
142 | struct device *dev; | 149 | struct device *dev; |
143 | 150 | ||
144 | dev = device_create(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), | 151 | dev = device_create_drvdata(cpuid_class, NULL, MKDEV(CPUID_MAJOR, cpu), |
145 | "cpu%d", cpu); | 152 | NULL, "cpu%d", cpu); |
146 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; | 153 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; |
147 | } | 154 | } |
148 | 155 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 28c29180b380..66e48aa2dd1b 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -877,7 +877,8 @@ void __init early_res_to_bootmem(u64 start, u64 end) | |||
877 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) | 877 | for (i = 0; i < MAX_EARLY_RES && early_res[i].end; i++) |
878 | count++; | 878 | count++; |
879 | 879 | ||
880 | printk(KERN_INFO "(%d early reservations) ==> bootmem\n", count); | 880 | printk(KERN_INFO "(%d early reservations) ==> bootmem [%010llx - %010llx]\n", |
881 | count, start, end); | ||
881 | for (i = 0; i < count; i++) { | 882 | for (i = 0; i < count; i++) { |
882 | struct early_res *r = &early_res[i]; | 883 | struct early_res *r = &early_res[i]; |
883 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, | 884 | printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, |
@@ -1202,7 +1203,7 @@ static int __init parse_memmap_opt(char *p) | |||
1202 | if (!p) | 1203 | if (!p) |
1203 | return -EINVAL; | 1204 | return -EINVAL; |
1204 | 1205 | ||
1205 | if (!strcmp(p, "exactmap")) { | 1206 | if (!strncmp(p, "exactmap", 8)) { |
1206 | #ifdef CONFIG_CRASH_DUMP | 1207 | #ifdef CONFIG_CRASH_DUMP |
1207 | /* | 1208 | /* |
1208 | * If we are doing a crash dump, we still need to know | 1209 | * If we are doing a crash dump, we still need to know |
@@ -1298,11 +1299,6 @@ void __init e820_reserve_resources(void) | |||
1298 | } | 1299 | } |
1299 | } | 1300 | } |
1300 | 1301 | ||
1301 | /* | ||
1302 | * Non-standard memory setup can be specified via this quirk: | ||
1303 | */ | ||
1304 | char * (*arch_memory_setup_quirk)(void); | ||
1305 | |||
1306 | char *__init default_machine_specific_memory_setup(void) | 1302 | char *__init default_machine_specific_memory_setup(void) |
1307 | { | 1303 | { |
1308 | char *who = "BIOS-e820"; | 1304 | char *who = "BIOS-e820"; |
@@ -1343,8 +1339,8 @@ char *__init default_machine_specific_memory_setup(void) | |||
1343 | 1339 | ||
1344 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) | 1340 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) |
1345 | { | 1341 | { |
1346 | if (arch_memory_setup_quirk) { | 1342 | if (x86_quirks->arch_memory_setup) { |
1347 | char *who = arch_memory_setup_quirk(); | 1343 | char *who = x86_quirks->arch_memory_setup(); |
1348 | 1344 | ||
1349 | if (who) | 1345 | if (who) |
1350 | return who; | 1346 | return who; |
@@ -1367,24 +1363,3 @@ void __init setup_memory_map(void) | |||
1367 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1363 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
1368 | e820_print_map(who); | 1364 | e820_print_map(who); |
1369 | } | 1365 | } |
1370 | |||
1371 | #ifdef CONFIG_X86_64 | ||
1372 | int __init arch_get_ram_range(int slot, u64 *addr, u64 *size) | ||
1373 | { | ||
1374 | int i; | ||
1375 | |||
1376 | if (slot < 0 || slot >= e820.nr_map) | ||
1377 | return -1; | ||
1378 | for (i = slot; i < e820.nr_map; i++) { | ||
1379 | if (e820.map[i].type != E820_RAM) | ||
1380 | continue; | ||
1381 | break; | ||
1382 | } | ||
1383 | if (i == e820.nr_map || e820.map[i].addr > (max_pfn << PAGE_SHIFT)) | ||
1384 | return -1; | ||
1385 | *addr = e820.map[i].addr; | ||
1386 | *size = min_t(u64, e820.map[i].size + e820.map[i].addr, | ||
1387 | max_pfn << PAGE_SHIFT) - *addr; | ||
1388 | return i + 1; | ||
1389 | } | ||
1390 | #endif | ||
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index a0e11c0cc872..4353cf5e6fac 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -16,10 +16,7 @@ | |||
16 | #include <asm/dma.h> | 16 | #include <asm/dma.h> |
17 | #include <asm/io_apic.h> | 17 | #include <asm/io_apic.h> |
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | 19 | #include <asm/iommu.h> | |
20 | #ifdef CONFIG_GART_IOMMU | ||
21 | #include <asm/gart.h> | ||
22 | #endif | ||
23 | 20 | ||
24 | static void __init fix_hypertransport_config(int num, int slot, int func) | 21 | static void __init fix_hypertransport_config(int num, int slot, int func) |
25 | { | 22 | { |
diff --git a/arch/x86/kernel/efi_32.c b/arch/x86/kernel/efi_32.c index 4b63c8e1f13b..5cab48ee61a4 100644 --- a/arch/x86/kernel/efi_32.c +++ b/arch/x86/kernel/efi_32.c | |||
@@ -53,7 +53,7 @@ void efi_call_phys_prelog(void) | |||
53 | * directory. If I have PAE, I just need to duplicate one entry in | 53 | * directory. If I have PAE, I just need to duplicate one entry in |
54 | * page directory. | 54 | * page directory. |
55 | */ | 55 | */ |
56 | cr4 = read_cr4(); | 56 | cr4 = read_cr4_safe(); |
57 | 57 | ||
58 | if (cr4 & X86_CR4_PAE) { | 58 | if (cr4 & X86_CR4_PAE) { |
59 | efi_bak_pg_dir_pointer[0].pgd = | 59 | efi_bak_pg_dir_pointer[0].pgd = |
@@ -91,7 +91,7 @@ void efi_call_phys_epilog(void) | |||
91 | gdt_descr.size = GDT_SIZE - 1; | 91 | gdt_descr.size = GDT_SIZE - 1; |
92 | load_gdt(&gdt_descr); | 92 | load_gdt(&gdt_descr); |
93 | 93 | ||
94 | cr4 = read_cr4(); | 94 | cr4 = read_cr4_safe(); |
95 | 95 | ||
96 | if (cr4 & X86_CR4_PAE) { | 96 | if (cr4 & X86_CR4_PAE) { |
97 | swapper_pg_dir[pgd_index(0)].pgd = | 97 | swapper_pg_dir[pgd_index(0)].pgd = |
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 6bc07f0f1202..109792bc7cfa 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S | |||
@@ -54,6 +54,16 @@ | |||
54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
55 | #include <asm/irq_vectors.h> | 55 | #include <asm/irq_vectors.h> |
56 | 56 | ||
57 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
58 | #include <linux/elf-em.h> | ||
59 | #define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE) | ||
60 | #define __AUDIT_ARCH_LE 0x40000000 | ||
61 | |||
62 | #ifndef CONFIG_AUDITSYSCALL | ||
63 | #define sysenter_audit syscall_trace_entry | ||
64 | #define sysexit_audit syscall_exit_work | ||
65 | #endif | ||
66 | |||
57 | /* | 67 | /* |
58 | * We use macros for low-level operations which need to be overridden | 68 | * We use macros for low-level operations which need to be overridden |
59 | * for paravirtualization. The following will never clobber any registers: | 69 | * for paravirtualization. The following will never clobber any registers: |
@@ -332,8 +342,9 @@ sysenter_past_esp: | |||
332 | GET_THREAD_INFO(%ebp) | 342 | GET_THREAD_INFO(%ebp) |
333 | 343 | ||
334 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 344 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
335 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 345 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
336 | jnz syscall_trace_entry | 346 | jnz sysenter_audit |
347 | sysenter_do_call: | ||
337 | cmpl $(nr_syscalls), %eax | 348 | cmpl $(nr_syscalls), %eax |
338 | jae syscall_badsys | 349 | jae syscall_badsys |
339 | call *sys_call_table(,%eax,4) | 350 | call *sys_call_table(,%eax,4) |
@@ -343,7 +354,8 @@ sysenter_past_esp: | |||
343 | TRACE_IRQS_OFF | 354 | TRACE_IRQS_OFF |
344 | movl TI_flags(%ebp), %ecx | 355 | movl TI_flags(%ebp), %ecx |
345 | testw $_TIF_ALLWORK_MASK, %cx | 356 | testw $_TIF_ALLWORK_MASK, %cx |
346 | jne syscall_exit_work | 357 | jne sysexit_audit |
358 | sysenter_exit: | ||
347 | /* if something modifies registers it must also disable sysexit */ | 359 | /* if something modifies registers it must also disable sysexit */ |
348 | movl PT_EIP(%esp), %edx | 360 | movl PT_EIP(%esp), %edx |
349 | movl PT_OLDESP(%esp), %ecx | 361 | movl PT_OLDESP(%esp), %ecx |
@@ -351,6 +363,45 @@ sysenter_past_esp: | |||
351 | TRACE_IRQS_ON | 363 | TRACE_IRQS_ON |
352 | 1: mov PT_FS(%esp), %fs | 364 | 1: mov PT_FS(%esp), %fs |
353 | ENABLE_INTERRUPTS_SYSEXIT | 365 | ENABLE_INTERRUPTS_SYSEXIT |
366 | |||
367 | #ifdef CONFIG_AUDITSYSCALL | ||
368 | sysenter_audit: | ||
369 | testw $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | ||
370 | jnz syscall_trace_entry | ||
371 | addl $4,%esp | ||
372 | CFI_ADJUST_CFA_OFFSET -4 | ||
373 | /* %esi already in 8(%esp) 6th arg: 4th syscall arg */ | ||
374 | /* %edx already in 4(%esp) 5th arg: 3rd syscall arg */ | ||
375 | /* %ecx already in 0(%esp) 4th arg: 2nd syscall arg */ | ||
376 | movl %ebx,%ecx /* 3rd arg: 1st syscall arg */ | ||
377 | movl %eax,%edx /* 2nd arg: syscall number */ | ||
378 | movl $AUDIT_ARCH_I386,%eax /* 1st arg: audit arch */ | ||
379 | call audit_syscall_entry | ||
380 | pushl %ebx | ||
381 | CFI_ADJUST_CFA_OFFSET 4 | ||
382 | movl PT_EAX(%esp),%eax /* reload syscall number */ | ||
383 | jmp sysenter_do_call | ||
384 | |||
385 | sysexit_audit: | ||
386 | testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | ||
387 | jne syscall_exit_work | ||
388 | TRACE_IRQS_ON | ||
389 | ENABLE_INTERRUPTS(CLBR_ANY) | ||
390 | movl %eax,%edx /* second arg, syscall return value */ | ||
391 | cmpl $0,%eax /* is it < 0? */ | ||
392 | setl %al /* 1 if so, 0 if not */ | ||
393 | movzbl %al,%eax /* zero-extend that */ | ||
394 | inc %eax /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
395 | call audit_syscall_exit | ||
396 | DISABLE_INTERRUPTS(CLBR_ANY) | ||
397 | TRACE_IRQS_OFF | ||
398 | movl TI_flags(%ebp), %ecx | ||
399 | testw $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %cx | ||
400 | jne syscall_exit_work | ||
401 | movl PT_EAX(%esp),%eax /* reload syscall return value */ | ||
402 | jmp sysenter_exit | ||
403 | #endif | ||
404 | |||
354 | CFI_ENDPROC | 405 | CFI_ENDPROC |
355 | .pushsection .fixup,"ax" | 406 | .pushsection .fixup,"ax" |
356 | 2: movl $0,PT_FS(%esp) | 407 | 2: movl $0,PT_FS(%esp) |
@@ -370,7 +421,7 @@ ENTRY(system_call) | |||
370 | GET_THREAD_INFO(%ebp) | 421 | GET_THREAD_INFO(%ebp) |
371 | # system call tracing in operation / emulation | 422 | # system call tracing in operation / emulation |
372 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ | 423 | /* Note, _TIF_SECCOMP is bit number 8, and so it needs testw and not testb */ |
373 | testw $(_TIF_SYSCALL_EMU|_TIF_SYSCALL_TRACE|_TIF_SECCOMP|_TIF_SYSCALL_AUDIT),TI_flags(%ebp) | 424 | testw $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%ebp) |
374 | jnz syscall_trace_entry | 425 | jnz syscall_trace_entry |
375 | cmpl $(nr_syscalls), %eax | 426 | cmpl $(nr_syscalls), %eax |
376 | jae syscall_badsys | 427 | jae syscall_badsys |
@@ -383,10 +434,6 @@ syscall_exit: | |||
383 | # setting need_resched or sigpending | 434 | # setting need_resched or sigpending |
384 | # between sampling and the iret | 435 | # between sampling and the iret |
385 | TRACE_IRQS_OFF | 436 | TRACE_IRQS_OFF |
386 | testl $X86_EFLAGS_TF,PT_EFLAGS(%esp) # If tracing set singlestep flag on exit | ||
387 | jz no_singlestep | ||
388 | orl $_TIF_SINGLESTEP,TI_flags(%ebp) | ||
389 | no_singlestep: | ||
390 | movl TI_flags(%ebp), %ecx | 437 | movl TI_flags(%ebp), %ecx |
391 | testw $_TIF_ALLWORK_MASK, %cx # current->work | 438 | testw $_TIF_ALLWORK_MASK, %cx # current->work |
392 | jne syscall_exit_work | 439 | jne syscall_exit_work |
@@ -514,12 +561,8 @@ END(work_pending) | |||
514 | syscall_trace_entry: | 561 | syscall_trace_entry: |
515 | movl $-ENOSYS,PT_EAX(%esp) | 562 | movl $-ENOSYS,PT_EAX(%esp) |
516 | movl %esp, %eax | 563 | movl %esp, %eax |
517 | xorl %edx,%edx | 564 | call syscall_trace_enter |
518 | call do_syscall_trace | 565 | /* What it returned is what we'll actually use. */ |
519 | cmpl $0, %eax | ||
520 | jne resume_userspace # ret != 0 -> running under PTRACE_SYSEMU, | ||
521 | # so must skip actual syscall | ||
522 | movl PT_ORIG_EAX(%esp), %eax | ||
523 | cmpl $(nr_syscalls), %eax | 566 | cmpl $(nr_syscalls), %eax |
524 | jnae syscall_call | 567 | jnae syscall_call |
525 | jmp syscall_exit | 568 | jmp syscall_exit |
@@ -528,14 +571,13 @@ END(syscall_trace_entry) | |||
528 | # perform syscall exit tracing | 571 | # perform syscall exit tracing |
529 | ALIGN | 572 | ALIGN |
530 | syscall_exit_work: | 573 | syscall_exit_work: |
531 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | 574 | testb $_TIF_WORK_SYSCALL_EXIT, %cl |
532 | jz work_pending | 575 | jz work_pending |
533 | TRACE_IRQS_ON | 576 | TRACE_IRQS_ON |
534 | ENABLE_INTERRUPTS(CLBR_ANY) # could let do_syscall_trace() call | 577 | ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call |
535 | # schedule() instead | 578 | # schedule() instead |
536 | movl %esp, %eax | 579 | movl %esp, %eax |
537 | movl $1, %edx | 580 | call syscall_trace_leave |
538 | call do_syscall_trace | ||
539 | jmp resume_userspace | 581 | jmp resume_userspace |
540 | END(syscall_exit_work) | 582 | END(syscall_exit_work) |
541 | CFI_ENDPROC | 583 | CFI_ENDPROC |
@@ -1024,6 +1066,7 @@ ENDPROC(kernel_thread_helper) | |||
1024 | ENTRY(xen_sysenter_target) | 1066 | ENTRY(xen_sysenter_target) |
1025 | RING0_INT_FRAME | 1067 | RING0_INT_FRAME |
1026 | addl $5*4, %esp /* remove xen-provided frame */ | 1068 | addl $5*4, %esp /* remove xen-provided frame */ |
1069 | CFI_ADJUST_CFA_OFFSET -5*4 | ||
1027 | jmp sysenter_past_esp | 1070 | jmp sysenter_past_esp |
1028 | CFI_ENDPROC | 1071 | CFI_ENDPROC |
1029 | 1072 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ae63e584c340..89434d439605 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -53,6 +53,12 @@ | |||
53 | #include <asm/paravirt.h> | 53 | #include <asm/paravirt.h> |
54 | #include <asm/ftrace.h> | 54 | #include <asm/ftrace.h> |
55 | 55 | ||
56 | /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ | ||
57 | #include <linux/elf-em.h> | ||
58 | #define AUDIT_ARCH_X86_64 (EM_X86_64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) | ||
59 | #define __AUDIT_ARCH_64BIT 0x80000000 | ||
60 | #define __AUDIT_ARCH_LE 0x40000000 | ||
61 | |||
56 | .code64 | 62 | .code64 |
57 | 63 | ||
58 | #ifdef CONFIG_FTRACE | 64 | #ifdef CONFIG_FTRACE |
@@ -349,9 +355,9 @@ ENTRY(system_call_after_swapgs) | |||
349 | movq %rcx,RIP-ARGOFFSET(%rsp) | 355 | movq %rcx,RIP-ARGOFFSET(%rsp) |
350 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | 356 | CFI_REL_OFFSET rip,RIP-ARGOFFSET |
351 | GET_THREAD_INFO(%rcx) | 357 | GET_THREAD_INFO(%rcx) |
352 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \ | 358 | testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%rcx) |
353 | TI_flags(%rcx) | ||
354 | jnz tracesys | 359 | jnz tracesys |
360 | system_call_fastpath: | ||
355 | cmpq $__NR_syscall_max,%rax | 361 | cmpq $__NR_syscall_max,%rax |
356 | ja badsys | 362 | ja badsys |
357 | movq %r10,%rcx | 363 | movq %r10,%rcx |
@@ -403,16 +409,16 @@ sysret_careful: | |||
403 | sysret_signal: | 409 | sysret_signal: |
404 | TRACE_IRQS_ON | 410 | TRACE_IRQS_ON |
405 | ENABLE_INTERRUPTS(CLBR_NONE) | 411 | ENABLE_INTERRUPTS(CLBR_NONE) |
406 | testl $_TIF_DO_NOTIFY_MASK,%edx | 412 | #ifdef CONFIG_AUDITSYSCALL |
407 | jz 1f | 413 | bt $TIF_SYSCALL_AUDIT,%edx |
408 | 414 | jc sysret_audit | |
409 | /* Really a signal */ | 415 | #endif |
410 | /* edx: work flags (arg3) */ | 416 | /* edx: work flags (arg3) */ |
411 | leaq do_notify_resume(%rip),%rax | 417 | leaq do_notify_resume(%rip),%rax |
412 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 | 418 | leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 |
413 | xorl %esi,%esi # oldset -> arg2 | 419 | xorl %esi,%esi # oldset -> arg2 |
414 | call ptregscall_common | 420 | call ptregscall_common |
415 | 1: movl $_TIF_WORK_MASK,%edi | 421 | movl $_TIF_WORK_MASK,%edi |
416 | /* Use IRET because user could have changed frame. This | 422 | /* Use IRET because user could have changed frame. This |
417 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ | 423 | works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ |
418 | DISABLE_INTERRUPTS(CLBR_NONE) | 424 | DISABLE_INTERRUPTS(CLBR_NONE) |
@@ -423,14 +429,56 @@ badsys: | |||
423 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) | 429 | movq $-ENOSYS,RAX-ARGOFFSET(%rsp) |
424 | jmp ret_from_sys_call | 430 | jmp ret_from_sys_call |
425 | 431 | ||
432 | #ifdef CONFIG_AUDITSYSCALL | ||
433 | /* | ||
434 | * Fast path for syscall audit without full syscall trace. | ||
435 | * We just call audit_syscall_entry() directly, and then | ||
436 | * jump back to the normal fast path. | ||
437 | */ | ||
438 | auditsys: | ||
439 | movq %r10,%r9 /* 6th arg: 4th syscall arg */ | ||
440 | movq %rdx,%r8 /* 5th arg: 3rd syscall arg */ | ||
441 | movq %rsi,%rcx /* 4th arg: 2nd syscall arg */ | ||
442 | movq %rdi,%rdx /* 3rd arg: 1st syscall arg */ | ||
443 | movq %rax,%rsi /* 2nd arg: syscall number */ | ||
444 | movl $AUDIT_ARCH_X86_64,%edi /* 1st arg: audit arch */ | ||
445 | call audit_syscall_entry | ||
446 | LOAD_ARGS 0 /* reload call-clobbered registers */ | ||
447 | jmp system_call_fastpath | ||
448 | |||
449 | /* | ||
450 | * Return fast path for syscall audit. Call audit_syscall_exit() | ||
451 | * directly and then jump back to the fast path with TIF_SYSCALL_AUDIT | ||
452 | * masked off. | ||
453 | */ | ||
454 | sysret_audit: | ||
455 | movq %rax,%rsi /* second arg, syscall return value */ | ||
456 | cmpq $0,%rax /* is it < 0? */ | ||
457 | setl %al /* 1 if so, 0 if not */ | ||
458 | movzbl %al,%edi /* zero-extend that into %edi */ | ||
459 | inc %edi /* first arg, 0->1(AUDITSC_SUCCESS), 1->2(AUDITSC_FAILURE) */ | ||
460 | call audit_syscall_exit | ||
461 | movl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT),%edi | ||
462 | jmp sysret_check | ||
463 | #endif /* CONFIG_AUDITSYSCALL */ | ||
464 | |||
426 | /* Do syscall tracing */ | 465 | /* Do syscall tracing */ |
427 | tracesys: | 466 | tracesys: |
467 | #ifdef CONFIG_AUDITSYSCALL | ||
468 | testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT),TI_flags(%rcx) | ||
469 | jz auditsys | ||
470 | #endif | ||
428 | SAVE_REST | 471 | SAVE_REST |
429 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ | 472 | movq $-ENOSYS,RAX(%rsp) /* ptrace can change this for a bad syscall */ |
430 | FIXUP_TOP_OF_STACK %rdi | 473 | FIXUP_TOP_OF_STACK %rdi |
431 | movq %rsp,%rdi | 474 | movq %rsp,%rdi |
432 | call syscall_trace_enter | 475 | call syscall_trace_enter |
433 | LOAD_ARGS ARGOFFSET /* reload args from stack in case ptrace changed it */ | 476 | /* |
477 | * Reload arg registers from stack in case ptrace changed them. | ||
478 | * We don't reload %rax because syscall_trace_enter() returned | ||
479 | * the value it wants us to use in the table lookup. | ||
480 | */ | ||
481 | LOAD_ARGS ARGOFFSET, 1 | ||
434 | RESTORE_REST | 482 | RESTORE_REST |
435 | cmpq $__NR_syscall_max,%rax | 483 | cmpq $__NR_syscall_max,%rax |
436 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ | 484 | ja int_ret_from_sys_call /* RAX(%rsp) set to -ENOSYS above */ |
@@ -444,6 +492,7 @@ tracesys: | |||
444 | * Has correct top of stack, but partial stack frame. | 492 | * Has correct top of stack, but partial stack frame. |
445 | */ | 493 | */ |
446 | .globl int_ret_from_sys_call | 494 | .globl int_ret_from_sys_call |
495 | .globl int_with_check | ||
447 | int_ret_from_sys_call: | 496 | int_ret_from_sys_call: |
448 | DISABLE_INTERRUPTS(CLBR_NONE) | 497 | DISABLE_INTERRUPTS(CLBR_NONE) |
449 | TRACE_IRQS_OFF | 498 | TRACE_IRQS_OFF |
@@ -483,7 +532,7 @@ int_very_careful: | |||
483 | ENABLE_INTERRUPTS(CLBR_NONE) | 532 | ENABLE_INTERRUPTS(CLBR_NONE) |
484 | SAVE_REST | 533 | SAVE_REST |
485 | /* Check for syscall exit trace */ | 534 | /* Check for syscall exit trace */ |
486 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edx | 535 | testl $_TIF_WORK_SYSCALL_EXIT,%edx |
487 | jz int_signal | 536 | jz int_signal |
488 | pushq %rdi | 537 | pushq %rdi |
489 | CFI_ADJUST_CFA_OFFSET 8 | 538 | CFI_ADJUST_CFA_OFFSET 8 |
@@ -491,7 +540,7 @@ int_very_careful: | |||
491 | call syscall_trace_leave | 540 | call syscall_trace_leave |
492 | popq %rdi | 541 | popq %rdi |
493 | CFI_ADJUST_CFA_OFFSET -8 | 542 | CFI_ADJUST_CFA_OFFSET -8 |
494 | andl $~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP),%edi | 543 | andl $~(_TIF_WORK_SYSCALL_EXIT|_TIF_SYSCALL_EMU),%edi |
495 | jmp int_restore_rest | 544 | jmp int_restore_rest |
496 | 545 | ||
497 | int_signal: | 546 | int_signal: |
@@ -1189,6 +1238,7 @@ END(device_not_available) | |||
1189 | /* runs on exception stack */ | 1238 | /* runs on exception stack */ |
1190 | KPROBE_ENTRY(debug) | 1239 | KPROBE_ENTRY(debug) |
1191 | INTR_FRAME | 1240 | INTR_FRAME |
1241 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1192 | pushq $0 | 1242 | pushq $0 |
1193 | CFI_ADJUST_CFA_OFFSET 8 | 1243 | CFI_ADJUST_CFA_OFFSET 8 |
1194 | paranoidentry do_debug, DEBUG_STACK | 1244 | paranoidentry do_debug, DEBUG_STACK |
@@ -1198,6 +1248,7 @@ KPROBE_END(debug) | |||
1198 | /* runs on exception stack */ | 1248 | /* runs on exception stack */ |
1199 | KPROBE_ENTRY(nmi) | 1249 | KPROBE_ENTRY(nmi) |
1200 | INTR_FRAME | 1250 | INTR_FRAME |
1251 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1201 | pushq $-1 | 1252 | pushq $-1 |
1202 | CFI_ADJUST_CFA_OFFSET 8 | 1253 | CFI_ADJUST_CFA_OFFSET 8 |
1203 | paranoidentry do_nmi, 0, 0 | 1254 | paranoidentry do_nmi, 0, 0 |
@@ -1211,6 +1262,7 @@ KPROBE_END(nmi) | |||
1211 | 1262 | ||
1212 | KPROBE_ENTRY(int3) | 1263 | KPROBE_ENTRY(int3) |
1213 | INTR_FRAME | 1264 | INTR_FRAME |
1265 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1214 | pushq $0 | 1266 | pushq $0 |
1215 | CFI_ADJUST_CFA_OFFSET 8 | 1267 | CFI_ADJUST_CFA_OFFSET 8 |
1216 | paranoidentry do_int3, DEBUG_STACK | 1268 | paranoidentry do_int3, DEBUG_STACK |
@@ -1237,6 +1289,7 @@ END(coprocessor_segment_overrun) | |||
1237 | /* runs on exception stack */ | 1289 | /* runs on exception stack */ |
1238 | ENTRY(double_fault) | 1290 | ENTRY(double_fault) |
1239 | XCPT_FRAME | 1291 | XCPT_FRAME |
1292 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1240 | paranoidentry do_double_fault | 1293 | paranoidentry do_double_fault |
1241 | jmp paranoid_exit1 | 1294 | jmp paranoid_exit1 |
1242 | CFI_ENDPROC | 1295 | CFI_ENDPROC |
@@ -1253,6 +1306,7 @@ END(segment_not_present) | |||
1253 | /* runs on exception stack */ | 1306 | /* runs on exception stack */ |
1254 | ENTRY(stack_segment) | 1307 | ENTRY(stack_segment) |
1255 | XCPT_FRAME | 1308 | XCPT_FRAME |
1309 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1256 | paranoidentry do_stack_segment | 1310 | paranoidentry do_stack_segment |
1257 | jmp paranoid_exit1 | 1311 | jmp paranoid_exit1 |
1258 | CFI_ENDPROC | 1312 | CFI_ENDPROC |
@@ -1278,6 +1332,7 @@ END(spurious_interrupt_bug) | |||
1278 | /* runs on exception stack */ | 1332 | /* runs on exception stack */ |
1279 | ENTRY(machine_check) | 1333 | ENTRY(machine_check) |
1280 | INTR_FRAME | 1334 | INTR_FRAME |
1335 | PARAVIRT_ADJUST_EXCEPTION_FRAME | ||
1281 | pushq $0 | 1336 | pushq $0 |
1282 | CFI_ADJUST_CFA_OFFSET 8 | 1337 | CFI_ADJUST_CFA_OFFSET 8 |
1283 | paranoidentry do_machine_check | 1338 | paranoidentry do_machine_check |
@@ -1312,3 +1367,103 @@ KPROBE_ENTRY(ignore_sysret) | |||
1312 | sysret | 1367 | sysret |
1313 | CFI_ENDPROC | 1368 | CFI_ENDPROC |
1314 | ENDPROC(ignore_sysret) | 1369 | ENDPROC(ignore_sysret) |
1370 | |||
1371 | #ifdef CONFIG_XEN | ||
1372 | ENTRY(xen_hypervisor_callback) | ||
1373 | zeroentry xen_do_hypervisor_callback | ||
1374 | END(xen_hypervisor_callback) | ||
1375 | |||
1376 | /* | ||
1377 | # A note on the "critical region" in our callback handler. | ||
1378 | # We want to avoid stacking callback handlers due to events occurring | ||
1379 | # during handling of the last event. To do this, we keep events disabled | ||
1380 | # until we've done all processing. HOWEVER, we must enable events before | ||
1381 | # popping the stack frame (can't be done atomically) and so it would still | ||
1382 | # be possible to get enough handler activations to overflow the stack. | ||
1383 | # Although unlikely, bugs of that kind are hard to track down, so we'd | ||
1384 | # like to avoid the possibility. | ||
1385 | # So, on entry to the handler we detect whether we interrupted an | ||
1386 | # existing activation in its critical region -- if so, we pop the current | ||
1387 | # activation and restart the handler using the previous one. | ||
1388 | */ | ||
1389 | ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) | ||
1390 | CFI_STARTPROC | ||
1391 | /* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will | ||
1392 | see the correct pointer to the pt_regs */ | ||
1393 | movq %rdi, %rsp # we don't return, adjust the stack frame | ||
1394 | CFI_ENDPROC | ||
1395 | CFI_DEFAULT_STACK | ||
1396 | 11: incl %gs:pda_irqcount | ||
1397 | movq %rsp,%rbp | ||
1398 | CFI_DEF_CFA_REGISTER rbp | ||
1399 | cmovzq %gs:pda_irqstackptr,%rsp | ||
1400 | pushq %rbp # backlink for old unwinder | ||
1401 | call xen_evtchn_do_upcall | ||
1402 | popq %rsp | ||
1403 | CFI_DEF_CFA_REGISTER rsp | ||
1404 | decl %gs:pda_irqcount | ||
1405 | jmp error_exit | ||
1406 | CFI_ENDPROC | ||
1407 | END(do_hypervisor_callback) | ||
1408 | |||
1409 | /* | ||
1410 | # Hypervisor uses this for application faults while it executes. | ||
1411 | # We get here for two reasons: | ||
1412 | # 1. Fault while reloading DS, ES, FS or GS | ||
1413 | # 2. Fault while executing IRET | ||
1414 | # Category 1 we do not need to fix up as Xen has already reloaded all segment | ||
1415 | # registers that could be reloaded and zeroed the others. | ||
1416 | # Category 2 we fix up by killing the current process. We cannot use the | ||
1417 | # normal Linux return path in this case because if we use the IRET hypercall | ||
1418 | # to pop the stack frame we end up in an infinite loop of failsafe callbacks. | ||
1419 | # We distinguish between categories by comparing each saved segment register | ||
1420 | # with its current contents: any discrepancy means we in category 1. | ||
1421 | */ | ||
1422 | ENTRY(xen_failsafe_callback) | ||
1423 | framesz = (RIP-0x30) /* workaround buggy gas */ | ||
1424 | _frame framesz | ||
1425 | CFI_REL_OFFSET rcx, 0 | ||
1426 | CFI_REL_OFFSET r11, 8 | ||
1427 | movw %ds,%cx | ||
1428 | cmpw %cx,0x10(%rsp) | ||
1429 | CFI_REMEMBER_STATE | ||
1430 | jne 1f | ||
1431 | movw %es,%cx | ||
1432 | cmpw %cx,0x18(%rsp) | ||
1433 | jne 1f | ||
1434 | movw %fs,%cx | ||
1435 | cmpw %cx,0x20(%rsp) | ||
1436 | jne 1f | ||
1437 | movw %gs,%cx | ||
1438 | cmpw %cx,0x28(%rsp) | ||
1439 | jne 1f | ||
1440 | /* All segments match their saved values => Category 2 (Bad IRET). */ | ||
1441 | movq (%rsp),%rcx | ||
1442 | CFI_RESTORE rcx | ||
1443 | movq 8(%rsp),%r11 | ||
1444 | CFI_RESTORE r11 | ||
1445 | addq $0x30,%rsp | ||
1446 | CFI_ADJUST_CFA_OFFSET -0x30 | ||
1447 | pushq $0 | ||
1448 | CFI_ADJUST_CFA_OFFSET 8 | ||
1449 | pushq %r11 | ||
1450 | CFI_ADJUST_CFA_OFFSET 8 | ||
1451 | pushq %rcx | ||
1452 | CFI_ADJUST_CFA_OFFSET 8 | ||
1453 | jmp general_protection | ||
1454 | CFI_RESTORE_STATE | ||
1455 | 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ | ||
1456 | movq (%rsp),%rcx | ||
1457 | CFI_RESTORE rcx | ||
1458 | movq 8(%rsp),%r11 | ||
1459 | CFI_RESTORE r11 | ||
1460 | addq $0x30,%rsp | ||
1461 | CFI_ADJUST_CFA_OFFSET -0x30 | ||
1462 | pushq $0 | ||
1463 | CFI_ADJUST_CFA_OFFSET 8 | ||
1464 | SAVE_ALL | ||
1465 | jmp error_exit | ||
1466 | CFI_ENDPROC | ||
1467 | END(xen_failsafe_callback) | ||
1468 | |||
1469 | #endif /* CONFIG_XEN */ | ||
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c index 1fa8be5bd217..eaff0bbb1444 100644 --- a/arch/x86/kernel/genapic_64.c +++ b/arch/x86/kernel/genapic_64.c | |||
@@ -99,3 +99,4 @@ int is_uv_system(void) | |||
99 | { | 99 | { |
100 | return uv_system_type != UV_NONE; | 100 | return uv_system_type != UV_NONE; |
101 | } | 101 | } |
102 | EXPORT_SYMBOL_GPL(is_uv_system); | ||
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c index 1a9c68845ee8..786548a62d38 100644 --- a/arch/x86/kernel/genapic_flat_64.c +++ b/arch/x86/kernel/genapic_flat_64.c | |||
@@ -168,7 +168,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) | |||
168 | * May as well be the first. | 168 | * May as well be the first. |
169 | */ | 169 | */ |
170 | cpu = first_cpu(cpumask); | 170 | cpu = first_cpu(cpumask); |
171 | if ((unsigned)cpu < NR_CPUS) | 171 | if ((unsigned)cpu < nr_cpu_ids) |
172 | return per_cpu(x86_cpu_to_apicid, cpu); | 172 | return per_cpu(x86_cpu_to_apicid, cpu); |
173 | else | 173 | else |
174 | return BAD_APICID; | 174 | return BAD_APICID; |
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 711f11c30b06..bfa837cb16be 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <asm/pgtable.h> | 24 | #include <asm/pgtable.h> |
25 | #include <asm/uv/uv_mmrs.h> | 25 | #include <asm/uv/uv_mmrs.h> |
26 | #include <asm/uv/uv_hub.h> | 26 | #include <asm/uv/uv_hub.h> |
27 | #include <asm/uv/bios.h> | ||
27 | 28 | ||
28 | DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); | 29 | DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); |
29 | EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); | 30 | EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); |
@@ -40,6 +41,9 @@ EXPORT_SYMBOL_GPL(uv_cpu_to_blade); | |||
40 | short uv_possible_blades; | 41 | short uv_possible_blades; |
41 | EXPORT_SYMBOL_GPL(uv_possible_blades); | 42 | EXPORT_SYMBOL_GPL(uv_possible_blades); |
42 | 43 | ||
44 | unsigned long sn_rtc_cycles_per_second; | ||
45 | EXPORT_SYMBOL(sn_rtc_cycles_per_second); | ||
46 | |||
43 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ | 47 | /* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ |
44 | 48 | ||
45 | static cpumask_t uv_target_cpus(void) | 49 | static cpumask_t uv_target_cpus(void) |
@@ -94,7 +98,7 @@ static void uv_send_IPI_mask(cpumask_t mask, int vector) | |||
94 | { | 98 | { |
95 | unsigned int cpu; | 99 | unsigned int cpu; |
96 | 100 | ||
97 | for (cpu = 0; cpu < NR_CPUS; ++cpu) | 101 | for_each_possible_cpu(cpu) |
98 | if (cpu_isset(cpu, mask)) | 102 | if (cpu_isset(cpu, mask)) |
99 | uv_send_IPI_one(cpu, vector); | 103 | uv_send_IPI_one(cpu, vector); |
100 | } | 104 | } |
@@ -128,7 +132,7 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) | |||
128 | * May as well be the first. | 132 | * May as well be the first. |
129 | */ | 133 | */ |
130 | cpu = first_cpu(cpumask); | 134 | cpu = first_cpu(cpumask); |
131 | if ((unsigned)cpu < NR_CPUS) | 135 | if ((unsigned)cpu < nr_cpu_ids) |
132 | return per_cpu(x86_cpu_to_apicid, cpu); | 136 | return per_cpu(x86_cpu_to_apicid, cpu); |
133 | else | 137 | else |
134 | return BAD_APICID; | 138 | return BAD_APICID; |
@@ -218,7 +222,7 @@ static __init void map_low_mmrs(void) | |||
218 | 222 | ||
219 | enum map_type {map_wb, map_uc}; | 223 | enum map_type {map_wb, map_uc}; |
220 | 224 | ||
221 | static void map_high(char *id, unsigned long base, int shift, enum map_type map_type) | 225 | static __init void map_high(char *id, unsigned long base, int shift, enum map_type map_type) |
222 | { | 226 | { |
223 | unsigned long bytes, paddr; | 227 | unsigned long bytes, paddr; |
224 | 228 | ||
@@ -272,7 +276,26 @@ static __init void map_mmioh_high(int max_pnode) | |||
272 | map_high("MMIOH", mmioh.s.base, shift, map_uc); | 276 | map_high("MMIOH", mmioh.s.base, shift, map_uc); |
273 | } | 277 | } |
274 | 278 | ||
275 | static __init void uv_system_init(void) | 279 | static __init void uv_rtc_init(void) |
280 | { | ||
281 | long status, ticks_per_sec, drift; | ||
282 | |||
283 | status = | ||
284 | x86_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec, | ||
285 | &drift); | ||
286 | if (status != 0 || ticks_per_sec < 100000) { | ||
287 | printk(KERN_WARNING | ||
288 | "unable to determine platform RTC clock frequency, " | ||
289 | "guessing.\n"); | ||
290 | /* BIOS gives wrong value for clock freq. so guess */ | ||
291 | sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; | ||
292 | } else | ||
293 | sn_rtc_cycles_per_second = ticks_per_sec; | ||
294 | } | ||
295 | |||
296 | static bool uv_system_inited; | ||
297 | |||
298 | void __init uv_system_init(void) | ||
276 | { | 299 | { |
277 | union uvh_si_addr_map_config_u m_n_config; | 300 | union uvh_si_addr_map_config_u m_n_config; |
278 | union uvh_node_id_u node_id; | 301 | union uvh_node_id_u node_id; |
@@ -326,6 +349,8 @@ static __init void uv_system_init(void) | |||
326 | gnode_upper = (((unsigned long)node_id.s.node_id) & | 349 | gnode_upper = (((unsigned long)node_id.s.node_id) & |
327 | ~((1 << n_val) - 1)) << m_val; | 350 | ~((1 << n_val) - 1)) << m_val; |
328 | 351 | ||
352 | uv_rtc_init(); | ||
353 | |||
329 | for_each_present_cpu(cpu) { | 354 | for_each_present_cpu(cpu) { |
330 | nid = cpu_to_node(cpu); | 355 | nid = cpu_to_node(cpu); |
331 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); | 356 | pnode = uv_apicid_to_pnode(per_cpu(x86_cpu_to_apicid, cpu)); |
@@ -360,6 +385,7 @@ static __init void uv_system_init(void) | |||
360 | map_mmr_high(max_pnode); | 385 | map_mmr_high(max_pnode); |
361 | map_config_high(max_pnode); | 386 | map_config_high(max_pnode); |
362 | map_mmioh_high(max_pnode); | 387 | map_mmioh_high(max_pnode); |
388 | uv_system_inited = true; | ||
363 | } | 389 | } |
364 | 390 | ||
365 | /* | 391 | /* |
@@ -368,8 +394,7 @@ static __init void uv_system_init(void) | |||
368 | */ | 394 | */ |
369 | void __cpuinit uv_cpu_init(void) | 395 | void __cpuinit uv_cpu_init(void) |
370 | { | 396 | { |
371 | if (!uv_node_to_blade) | 397 | BUG_ON(!uv_system_inited); |
372 | uv_system_init(); | ||
373 | 398 | ||
374 | uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; | 399 | uv_blade_info[uv_numa_blade_id()].nr_online_cpus++; |
375 | 400 | ||
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index c97819829146..9bfc4d72fb2e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -39,6 +39,13 @@ static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata; | |||
39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; | 39 | static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly; |
40 | #endif | 40 | #endif |
41 | 41 | ||
42 | void __init x86_64_init_pda(void) | ||
43 | { | ||
44 | _cpu_pda = __cpu_pda; | ||
45 | cpu_pda(0) = &_boot_cpu_pda; | ||
46 | pda_init(0); | ||
47 | } | ||
48 | |||
42 | static void __init zap_identity_mappings(void) | 49 | static void __init zap_identity_mappings(void) |
43 | { | 50 | { |
44 | pgd_t *pgd = pgd_offset_k(0UL); | 51 | pgd_t *pgd = pgd_offset_k(0UL); |
@@ -81,6 +88,7 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
81 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); | 88 | BUILD_BUG_ON(!(MODULES_VADDR > __START_KERNEL)); |
82 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == | 89 | BUILD_BUG_ON(!(((MODULES_END - 1) & PGDIR_MASK) == |
83 | (__START_KERNEL & PGDIR_MASK))); | 90 | (__START_KERNEL & PGDIR_MASK))); |
91 | BUILD_BUG_ON(__fix_to_virt(__end_of_fixed_addresses) <= MODULES_END); | ||
84 | 92 | ||
85 | /* clear bss before set_intr_gate with early_idt_handler */ | 93 | /* clear bss before set_intr_gate with early_idt_handler */ |
86 | clear_bss(); | 94 | clear_bss(); |
@@ -102,9 +110,7 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
102 | 110 | ||
103 | early_printk("Kernel alive\n"); | 111 | early_printk("Kernel alive\n"); |
104 | 112 | ||
105 | _cpu_pda = __cpu_pda; | 113 | x86_64_init_pda(); |
106 | cpu_pda(0) = &_boot_cpu_pda; | ||
107 | pda_init(0); | ||
108 | 114 | ||
109 | early_printk("Kernel really alive\n"); | 115 | early_printk("Kernel really alive\n"); |
110 | 116 | ||
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f67e93441caf..a7010c3a377a 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -456,9 +456,6 @@ is386: movl $2,%ecx # set MP | |||
456 | 1: | 456 | 1: |
457 | #endif /* CONFIG_SMP */ | 457 | #endif /* CONFIG_SMP */ |
458 | jmp *(initial_code) | 458 | jmp *(initial_code) |
459 | .align 4 | ||
460 | ENTRY(initial_code) | ||
461 | .long i386_start_kernel | ||
462 | 459 | ||
463 | /* | 460 | /* |
464 | * We depend on ET to be correct. This checks for 287/387. | 461 | * We depend on ET to be correct. This checks for 287/387. |
@@ -601,6 +598,11 @@ ignore_int: | |||
601 | #endif | 598 | #endif |
602 | iret | 599 | iret |
603 | 600 | ||
601 | .section .cpuinit.data,"wa" | ||
602 | .align 4 | ||
603 | ENTRY(initial_code) | ||
604 | .long i386_start_kernel | ||
605 | |||
604 | .section .text | 606 | .section .text |
605 | /* | 607 | /* |
606 | * Real beginning of normal "text" segment | 608 | * Real beginning of normal "text" segment |
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index b07ac7b217cb..db3280afe886 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -407,6 +407,7 @@ ENTRY(phys_base) | |||
407 | /* This must match the first entry in level2_kernel_pgt */ | 407 | /* This must match the first entry in level2_kernel_pgt */ |
408 | .quad 0x0000000000000000 | 408 | .quad 0x0000000000000000 |
409 | 409 | ||
410 | #include "../../x86/xen/xen-head.S" | ||
410 | 411 | ||
411 | .section .bss, "aw", @nobits | 412 | .section .bss, "aw", @nobits |
412 | .align L1_CACHE_BYTES | 413 | .align L1_CACHE_BYTES |
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 0ea6a19bfdfe..73deaffadd03 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -210,8 +210,8 @@ static void hpet_legacy_clockevent_register(void) | |||
210 | /* Calculate the min / max delta */ | 210 | /* Calculate the min / max delta */ |
211 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, | 211 | hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, |
212 | &hpet_clockevent); | 212 | &hpet_clockevent); |
213 | hpet_clockevent.min_delta_ns = clockevent_delta2ns(0x30, | 213 | /* 5 usec minimum reprogramming delta. */ |
214 | &hpet_clockevent); | 214 | hpet_clockevent.min_delta_ns = 5000; |
215 | 215 | ||
216 | /* | 216 | /* |
217 | * Start hpet with the boot cpu mask and make it | 217 | * Start hpet with the boot cpu mask and make it |
@@ -270,15 +270,22 @@ static void hpet_legacy_set_mode(enum clock_event_mode mode, | |||
270 | } | 270 | } |
271 | 271 | ||
272 | static int hpet_legacy_next_event(unsigned long delta, | 272 | static int hpet_legacy_next_event(unsigned long delta, |
273 | struct clock_event_device *evt) | 273 | struct clock_event_device *evt) |
274 | { | 274 | { |
275 | unsigned long cnt; | 275 | u32 cnt; |
276 | 276 | ||
277 | cnt = hpet_readl(HPET_COUNTER); | 277 | cnt = hpet_readl(HPET_COUNTER); |
278 | cnt += delta; | 278 | cnt += (u32) delta; |
279 | hpet_writel(cnt, HPET_T0_CMP); | 279 | hpet_writel(cnt, HPET_T0_CMP); |
280 | 280 | ||
281 | return ((long)(hpet_readl(HPET_COUNTER) - cnt ) > 0) ? -ETIME : 0; | 281 | /* |
282 | * We need to read back the CMP register to make sure that | ||
283 | * what we wrote hit the chip before we compare it to the | ||
284 | * counter. | ||
285 | */ | ||
286 | WARN_ON((u32)hpet_readl(HPET_T0_CMP) != cnt); | ||
287 | |||
288 | return (s32)((u32)hpet_readl(HPET_COUNTER) - cnt) >= 0 ? -ETIME : 0; | ||
282 | } | 289 | } |
283 | 290 | ||
284 | /* | 291 | /* |
@@ -359,6 +366,7 @@ static int hpet_clocksource_register(void) | |||
359 | int __init hpet_enable(void) | 366 | int __init hpet_enable(void) |
360 | { | 367 | { |
361 | unsigned long id; | 368 | unsigned long id; |
369 | int i; | ||
362 | 370 | ||
363 | if (!is_hpet_capable()) | 371 | if (!is_hpet_capable()) |
364 | return 0; | 372 | return 0; |
@@ -369,6 +377,29 @@ int __init hpet_enable(void) | |||
369 | * Read the period and check for a sane value: | 377 | * Read the period and check for a sane value: |
370 | */ | 378 | */ |
371 | hpet_period = hpet_readl(HPET_PERIOD); | 379 | hpet_period = hpet_readl(HPET_PERIOD); |
380 | |||
381 | /* | ||
382 | * AMD SB700 based systems with spread spectrum enabled use a | ||
383 | * SMM based HPET emulation to provide proper frequency | ||
384 | * setting. The SMM code is initialized with the first HPET | ||
385 | * register access and takes some time to complete. During | ||
386 | * this time the config register reads 0xffffffff. We check | ||
387 | * for max. 1000 loops whether the config register reads a non | ||
388 | * 0xffffffff value to make sure that HPET is up and running | ||
389 | * before we go further. A counting loop is safe, as the HPET | ||
390 | * access takes thousands of CPU cycles. On non SB700 based | ||
391 | * machines this check is only done once and has no side | ||
392 | * effects. | ||
393 | */ | ||
394 | for (i = 0; hpet_readl(HPET_CFG) == 0xFFFFFFFF; i++) { | ||
395 | if (i == 1000) { | ||
396 | printk(KERN_WARNING | ||
397 | "HPET config register value = 0xFFFFFFFF. " | ||
398 | "Disabling HPET\n"); | ||
399 | goto out_nohpet; | ||
400 | } | ||
401 | } | ||
402 | |||
372 | if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) | 403 | if (hpet_period < HPET_MIN_PERIOD || hpet_period > HPET_MAX_PERIOD) |
373 | goto out_nohpet; | 404 | goto out_nohpet; |
374 | 405 | ||
@@ -468,7 +499,7 @@ void hpet_disable(void) | |||
468 | #define RTC_NUM_INTS 1 | 499 | #define RTC_NUM_INTS 1 |
469 | 500 | ||
470 | static unsigned long hpet_rtc_flags; | 501 | static unsigned long hpet_rtc_flags; |
471 | static unsigned long hpet_prev_update_sec; | 502 | static int hpet_prev_update_sec; |
472 | static struct rtc_time hpet_alarm_time; | 503 | static struct rtc_time hpet_alarm_time; |
473 | static unsigned long hpet_pie_count; | 504 | static unsigned long hpet_pie_count; |
474 | static unsigned long hpet_t1_cmp; | 505 | static unsigned long hpet_t1_cmp; |
@@ -575,6 +606,9 @@ int hpet_set_rtc_irq_bit(unsigned long bit_mask) | |||
575 | 606 | ||
576 | hpet_rtc_flags |= bit_mask; | 607 | hpet_rtc_flags |= bit_mask; |
577 | 608 | ||
609 | if ((bit_mask & RTC_UIE) && !(oldbits & RTC_UIE)) | ||
610 | hpet_prev_update_sec = -1; | ||
611 | |||
578 | if (!oldbits) | 612 | if (!oldbits) |
579 | hpet_rtc_timer_init(); | 613 | hpet_rtc_timer_init(); |
580 | 614 | ||
@@ -652,7 +686,7 @@ static void hpet_rtc_timer_reinit(void) | |||
652 | if (hpet_rtc_flags & RTC_PIE) | 686 | if (hpet_rtc_flags & RTC_PIE) |
653 | hpet_pie_count += lost_ints; | 687 | hpet_pie_count += lost_ints; |
654 | if (printk_ratelimit()) | 688 | if (printk_ratelimit()) |
655 | printk(KERN_WARNING "rtc: lost %d interrupts\n", | 689 | printk(KERN_WARNING "hpet1: lost %d rtc interrupts\n", |
656 | lost_ints); | 690 | lost_ints); |
657 | } | 691 | } |
658 | } | 692 | } |
@@ -670,7 +704,8 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id) | |||
670 | 704 | ||
671 | if (hpet_rtc_flags & RTC_UIE && | 705 | if (hpet_rtc_flags & RTC_UIE && |
672 | curr_time.tm_sec != hpet_prev_update_sec) { | 706 | curr_time.tm_sec != hpet_prev_update_sec) { |
673 | rtc_int_flag = RTC_UF; | 707 | if (hpet_prev_update_sec >= 0) |
708 | rtc_int_flag = RTC_UF; | ||
674 | hpet_prev_update_sec = curr_time.tm_sec; | 709 | hpet_prev_update_sec = curr_time.tm_sec; |
675 | } | 710 | } |
676 | 711 | ||
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c index 558abf4c796a..09cddb57bec4 100644 --- a/arch/x86/kernel/io_apic_32.c +++ b/arch/x86/kernel/io_apic_32.c | |||
@@ -57,7 +57,7 @@ atomic_t irq_mis_count; | |||
57 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | 57 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
58 | 58 | ||
59 | static DEFINE_SPINLOCK(ioapic_lock); | 59 | static DEFINE_SPINLOCK(ioapic_lock); |
60 | static DEFINE_SPINLOCK(vector_lock); | 60 | DEFINE_SPINLOCK(vector_lock); |
61 | 61 | ||
62 | int timer_through_8259 __initdata; | 62 | int timer_through_8259 __initdata; |
63 | 63 | ||
@@ -756,7 +756,7 @@ void send_IPI_self(int vector) | |||
756 | /* | 756 | /* |
757 | * Send the IPI. The write to APIC_ICR fires this off. | 757 | * Send the IPI. The write to APIC_ICR fires this off. |
758 | */ | 758 | */ |
759 | apic_write_around(APIC_ICR, cfg); | 759 | apic_write(APIC_ICR, cfg); |
760 | } | 760 | } |
761 | #endif /* !CONFIG_SMP */ | 761 | #endif /* !CONFIG_SMP */ |
762 | 762 | ||
@@ -1209,10 +1209,6 @@ static int assign_irq_vector(int irq) | |||
1209 | return vector; | 1209 | return vector; |
1210 | } | 1210 | } |
1211 | 1211 | ||
1212 | void setup_vector_irq(int cpu) | ||
1213 | { | ||
1214 | } | ||
1215 | |||
1216 | static struct irq_chip ioapic_chip; | 1212 | static struct irq_chip ioapic_chip; |
1217 | 1213 | ||
1218 | #define IOAPIC_AUTO -1 | 1214 | #define IOAPIC_AUTO -1 |
@@ -2030,7 +2026,7 @@ static void mask_lapic_irq(unsigned int irq) | |||
2030 | unsigned long v; | 2026 | unsigned long v; |
2031 | 2027 | ||
2032 | v = apic_read(APIC_LVT0); | 2028 | v = apic_read(APIC_LVT0); |
2033 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | 2029 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
2034 | } | 2030 | } |
2035 | 2031 | ||
2036 | static void unmask_lapic_irq(unsigned int irq) | 2032 | static void unmask_lapic_irq(unsigned int irq) |
@@ -2038,7 +2034,7 @@ static void unmask_lapic_irq(unsigned int irq) | |||
2038 | unsigned long v; | 2034 | unsigned long v; |
2039 | 2035 | ||
2040 | v = apic_read(APIC_LVT0); | 2036 | v = apic_read(APIC_LVT0); |
2041 | apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); | 2037 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
2042 | } | 2038 | } |
2043 | 2039 | ||
2044 | static struct irq_chip lapic_chip __read_mostly = { | 2040 | static struct irq_chip lapic_chip __read_mostly = { |
@@ -2168,7 +2164,7 @@ static inline void __init check_timer(void) | |||
2168 | * The AEOI mode will finish them in the 8259A | 2164 | * The AEOI mode will finish them in the 8259A |
2169 | * automatically. | 2165 | * automatically. |
2170 | */ | 2166 | */ |
2171 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 2167 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
2172 | init_8259A(1); | 2168 | init_8259A(1); |
2173 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); | 2169 | timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); |
2174 | 2170 | ||
@@ -2177,8 +2173,9 @@ static inline void __init check_timer(void) | |||
2177 | pin2 = ioapic_i8259.pin; | 2173 | pin2 = ioapic_i8259.pin; |
2178 | apic2 = ioapic_i8259.apic; | 2174 | apic2 = ioapic_i8259.apic; |
2179 | 2175 | ||
2180 | printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | 2176 | apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " |
2181 | vector, apic1, pin1, apic2, pin2); | 2177 | "apic1=%d pin1=%d apic2=%d pin2=%d\n", |
2178 | vector, apic1, pin1, apic2, pin2); | ||
2182 | 2179 | ||
2183 | /* | 2180 | /* |
2184 | * Some BIOS writers are clueless and report the ExtINTA | 2181 | * Some BIOS writers are clueless and report the ExtINTA |
@@ -2216,12 +2213,13 @@ static inline void __init check_timer(void) | |||
2216 | } | 2213 | } |
2217 | clear_IO_APIC_pin(apic1, pin1); | 2214 | clear_IO_APIC_pin(apic1, pin1); |
2218 | if (!no_pin1) | 2215 | if (!no_pin1) |
2219 | printk(KERN_ERR "..MP-BIOS bug: " | 2216 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
2220 | "8254 timer not connected to IO-APIC\n"); | 2217 | "8254 timer not connected to IO-APIC\n"); |
2221 | 2218 | ||
2222 | printk(KERN_INFO "...trying to set up timer (IRQ0) " | 2219 | apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " |
2223 | "through the 8259A ... "); | 2220 | "(IRQ0) through the 8259A ...\n"); |
2224 | printk("\n..... (found pin %d) ...", pin2); | 2221 | apic_printk(APIC_QUIET, KERN_INFO |
2222 | "..... (found apic %d pin %d) ...\n", apic2, pin2); | ||
2225 | /* | 2223 | /* |
2226 | * legacy devices should be connected to IO APIC #0 | 2224 | * legacy devices should be connected to IO APIC #0 |
2227 | */ | 2225 | */ |
@@ -2230,7 +2228,7 @@ static inline void __init check_timer(void) | |||
2230 | unmask_IO_APIC_irq(0); | 2228 | unmask_IO_APIC_irq(0); |
2231 | enable_8259A_irq(0); | 2229 | enable_8259A_irq(0); |
2232 | if (timer_irq_works()) { | 2230 | if (timer_irq_works()) { |
2233 | printk("works.\n"); | 2231 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
2234 | timer_through_8259 = 1; | 2232 | timer_through_8259 = 1; |
2235 | if (nmi_watchdog == NMI_IO_APIC) { | 2233 | if (nmi_watchdog == NMI_IO_APIC) { |
2236 | disable_8259A_irq(0); | 2234 | disable_8259A_irq(0); |
@@ -2244,44 +2242,47 @@ static inline void __init check_timer(void) | |||
2244 | */ | 2242 | */ |
2245 | disable_8259A_irq(0); | 2243 | disable_8259A_irq(0); |
2246 | clear_IO_APIC_pin(apic2, pin2); | 2244 | clear_IO_APIC_pin(apic2, pin2); |
2247 | printk(" failed.\n"); | 2245 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
2248 | } | 2246 | } |
2249 | 2247 | ||
2250 | if (nmi_watchdog == NMI_IO_APIC) { | 2248 | if (nmi_watchdog == NMI_IO_APIC) { |
2251 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | 2249 | apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " |
2250 | "through the IO-APIC - disabling NMI Watchdog!\n"); | ||
2252 | nmi_watchdog = NMI_NONE; | 2251 | nmi_watchdog = NMI_NONE; |
2253 | } | 2252 | } |
2254 | timer_ack = 0; | 2253 | timer_ack = 0; |
2255 | 2254 | ||
2256 | printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 2255 | apic_printk(APIC_QUIET, KERN_INFO |
2256 | "...trying to set up timer as Virtual Wire IRQ...\n"); | ||
2257 | 2257 | ||
2258 | lapic_register_intr(0, vector); | 2258 | lapic_register_intr(0, vector); |
2259 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | 2259 | apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
2260 | enable_8259A_irq(0); | 2260 | enable_8259A_irq(0); |
2261 | 2261 | ||
2262 | if (timer_irq_works()) { | 2262 | if (timer_irq_works()) { |
2263 | printk(" works.\n"); | 2263 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
2264 | goto out; | 2264 | goto out; |
2265 | } | 2265 | } |
2266 | disable_8259A_irq(0); | 2266 | disable_8259A_irq(0); |
2267 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); | 2267 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); |
2268 | printk(" failed.\n"); | 2268 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
2269 | 2269 | ||
2270 | printk(KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | 2270 | apic_printk(APIC_QUIET, KERN_INFO |
2271 | "...trying to set up timer as ExtINT IRQ...\n"); | ||
2271 | 2272 | ||
2272 | init_8259A(0); | 2273 | init_8259A(0); |
2273 | make_8259A_irq(0); | 2274 | make_8259A_irq(0); |
2274 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 2275 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
2275 | 2276 | ||
2276 | unlock_ExtINT_logic(); | 2277 | unlock_ExtINT_logic(); |
2277 | 2278 | ||
2278 | if (timer_irq_works()) { | 2279 | if (timer_irq_works()) { |
2279 | printk(" works.\n"); | 2280 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
2280 | goto out; | 2281 | goto out; |
2281 | } | 2282 | } |
2282 | printk(" failed :(.\n"); | 2283 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
2283 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " | 2284 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
2284 | "report. Then try booting with the 'noapic' option"); | 2285 | "report. Then try booting with the 'noapic' option.\n"); |
2285 | out: | 2286 | out: |
2286 | local_irq_restore(flags); | 2287 | local_irq_restore(flags); |
2287 | } | 2288 | } |
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c index 6510cde36b35..61a83b70c18f 100644 --- a/arch/x86/kernel/io_apic_64.c +++ b/arch/x86/kernel/io_apic_64.c | |||
@@ -45,6 +45,7 @@ | |||
45 | #include <asm/proto.h> | 45 | #include <asm/proto.h> |
46 | #include <asm/acpi.h> | 46 | #include <asm/acpi.h> |
47 | #include <asm/dma.h> | 47 | #include <asm/dma.h> |
48 | #include <asm/i8259.h> | ||
48 | #include <asm/nmi.h> | 49 | #include <asm/nmi.h> |
49 | #include <asm/msidef.h> | 50 | #include <asm/msidef.h> |
50 | #include <asm/hypertransport.h> | 51 | #include <asm/hypertransport.h> |
@@ -100,7 +101,7 @@ int timer_through_8259 __initdata; | |||
100 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | 101 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; |
101 | 102 | ||
102 | static DEFINE_SPINLOCK(ioapic_lock); | 103 | static DEFINE_SPINLOCK(ioapic_lock); |
103 | DEFINE_SPINLOCK(vector_lock); | 104 | static DEFINE_SPINLOCK(vector_lock); |
104 | 105 | ||
105 | /* | 106 | /* |
106 | * # of IRQ routing registers | 107 | * # of IRQ routing registers |
@@ -696,6 +697,19 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
696 | return irq; | 697 | return irq; |
697 | } | 698 | } |
698 | 699 | ||
700 | void lock_vector_lock(void) | ||
701 | { | ||
702 | /* Used to the online set of cpus does not change | ||
703 | * during assign_irq_vector. | ||
704 | */ | ||
705 | spin_lock(&vector_lock); | ||
706 | } | ||
707 | |||
708 | void unlock_vector_lock(void) | ||
709 | { | ||
710 | spin_unlock(&vector_lock); | ||
711 | } | ||
712 | |||
699 | static int __assign_irq_vector(int irq, cpumask_t mask) | 713 | static int __assign_irq_vector(int irq, cpumask_t mask) |
700 | { | 714 | { |
701 | /* | 715 | /* |
@@ -731,7 +745,7 @@ static int __assign_irq_vector(int irq, cpumask_t mask) | |||
731 | return 0; | 745 | return 0; |
732 | } | 746 | } |
733 | 747 | ||
734 | for_each_cpu_mask(cpu, mask) { | 748 | for_each_cpu_mask_nr(cpu, mask) { |
735 | cpumask_t domain, new_mask; | 749 | cpumask_t domain, new_mask; |
736 | int new_cpu; | 750 | int new_cpu; |
737 | int vector, offset; | 751 | int vector, offset; |
@@ -752,7 +766,7 @@ next: | |||
752 | continue; | 766 | continue; |
753 | if (vector == IA32_SYSCALL_VECTOR) | 767 | if (vector == IA32_SYSCALL_VECTOR) |
754 | goto next; | 768 | goto next; |
755 | for_each_cpu_mask(new_cpu, new_mask) | 769 | for_each_cpu_mask_nr(new_cpu, new_mask) |
756 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 770 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) |
757 | goto next; | 771 | goto next; |
758 | /* Found one! */ | 772 | /* Found one! */ |
@@ -762,7 +776,7 @@ next: | |||
762 | cfg->move_in_progress = 1; | 776 | cfg->move_in_progress = 1; |
763 | cfg->old_domain = cfg->domain; | 777 | cfg->old_domain = cfg->domain; |
764 | } | 778 | } |
765 | for_each_cpu_mask(new_cpu, new_mask) | 779 | for_each_cpu_mask_nr(new_cpu, new_mask) |
766 | per_cpu(vector_irq, new_cpu)[vector] = irq; | 780 | per_cpu(vector_irq, new_cpu)[vector] = irq; |
767 | cfg->vector = vector; | 781 | cfg->vector = vector; |
768 | cfg->domain = domain; | 782 | cfg->domain = domain; |
@@ -794,14 +808,14 @@ static void __clear_irq_vector(int irq) | |||
794 | 808 | ||
795 | vector = cfg->vector; | 809 | vector = cfg->vector; |
796 | cpus_and(mask, cfg->domain, cpu_online_map); | 810 | cpus_and(mask, cfg->domain, cpu_online_map); |
797 | for_each_cpu_mask(cpu, mask) | 811 | for_each_cpu_mask_nr(cpu, mask) |
798 | per_cpu(vector_irq, cpu)[vector] = -1; | 812 | per_cpu(vector_irq, cpu)[vector] = -1; |
799 | 813 | ||
800 | cfg->vector = 0; | 814 | cfg->vector = 0; |
801 | cpus_clear(cfg->domain); | 815 | cpus_clear(cfg->domain); |
802 | } | 816 | } |
803 | 817 | ||
804 | static void __setup_vector_irq(int cpu) | 818 | void __setup_vector_irq(int cpu) |
805 | { | 819 | { |
806 | /* Initialize vector_irq on a new cpu */ | 820 | /* Initialize vector_irq on a new cpu */ |
807 | /* This function must be called with vector_lock held */ | 821 | /* This function must be called with vector_lock held */ |
@@ -824,14 +838,6 @@ static void __setup_vector_irq(int cpu) | |||
824 | } | 838 | } |
825 | } | 839 | } |
826 | 840 | ||
827 | void setup_vector_irq(int cpu) | ||
828 | { | ||
829 | spin_lock(&vector_lock); | ||
830 | __setup_vector_irq(smp_processor_id()); | ||
831 | spin_unlock(&vector_lock); | ||
832 | } | ||
833 | |||
834 | |||
835 | static struct irq_chip ioapic_chip; | 841 | static struct irq_chip ioapic_chip; |
836 | 842 | ||
837 | static void ioapic_register_intr(int irq, unsigned long trigger) | 843 | static void ioapic_register_intr(int irq, unsigned long trigger) |
@@ -1372,12 +1378,10 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
1372 | static int ioapic_retrigger_irq(unsigned int irq) | 1378 | static int ioapic_retrigger_irq(unsigned int irq) |
1373 | { | 1379 | { |
1374 | struct irq_cfg *cfg = &irq_cfg[irq]; | 1380 | struct irq_cfg *cfg = &irq_cfg[irq]; |
1375 | cpumask_t mask; | ||
1376 | unsigned long flags; | 1381 | unsigned long flags; |
1377 | 1382 | ||
1378 | spin_lock_irqsave(&vector_lock, flags); | 1383 | spin_lock_irqsave(&vector_lock, flags); |
1379 | mask = cpumask_of_cpu(first_cpu(cfg->domain)); | 1384 | send_IPI_mask(cpumask_of_cpu(first_cpu(cfg->domain)), cfg->vector); |
1380 | send_IPI_mask(mask, cfg->vector); | ||
1381 | spin_unlock_irqrestore(&vector_lock, flags); | 1385 | spin_unlock_irqrestore(&vector_lock, flags); |
1382 | 1386 | ||
1383 | return 1; | 1387 | return 1; |
@@ -1696,8 +1700,9 @@ static inline void __init check_timer(void) | |||
1696 | pin2 = ioapic_i8259.pin; | 1700 | pin2 = ioapic_i8259.pin; |
1697 | apic2 = ioapic_i8259.apic; | 1701 | apic2 = ioapic_i8259.apic; |
1698 | 1702 | ||
1699 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", | 1703 | apic_printk(APIC_QUIET, KERN_INFO "..TIMER: vector=0x%02X " |
1700 | cfg->vector, apic1, pin1, apic2, pin2); | 1704 | "apic1=%d pin1=%d apic2=%d pin2=%d\n", |
1705 | cfg->vector, apic1, pin1, apic2, pin2); | ||
1701 | 1706 | ||
1702 | /* | 1707 | /* |
1703 | * Some BIOS writers are clueless and report the ExtINTA | 1708 | * Some BIOS writers are clueless and report the ExtINTA |
@@ -1735,14 +1740,13 @@ static inline void __init check_timer(void) | |||
1735 | } | 1740 | } |
1736 | clear_IO_APIC_pin(apic1, pin1); | 1741 | clear_IO_APIC_pin(apic1, pin1); |
1737 | if (!no_pin1) | 1742 | if (!no_pin1) |
1738 | apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: " | 1743 | apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " |
1739 | "8254 timer not connected to IO-APIC\n"); | 1744 | "8254 timer not connected to IO-APIC\n"); |
1740 | 1745 | ||
1741 | apic_printk(APIC_VERBOSE,KERN_INFO | 1746 | apic_printk(APIC_QUIET, KERN_INFO "...trying to set up timer " |
1742 | "...trying to set up timer (IRQ0) " | 1747 | "(IRQ0) through the 8259A ...\n"); |
1743 | "through the 8259A ... "); | 1748 | apic_printk(APIC_QUIET, KERN_INFO |
1744 | apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", | 1749 | "..... (found apic %d pin %d) ...\n", apic2, pin2); |
1745 | apic2, pin2); | ||
1746 | /* | 1750 | /* |
1747 | * legacy devices should be connected to IO APIC #0 | 1751 | * legacy devices should be connected to IO APIC #0 |
1748 | */ | 1752 | */ |
@@ -1751,7 +1755,7 @@ static inline void __init check_timer(void) | |||
1751 | unmask_IO_APIC_irq(0); | 1755 | unmask_IO_APIC_irq(0); |
1752 | enable_8259A_irq(0); | 1756 | enable_8259A_irq(0); |
1753 | if (timer_irq_works()) { | 1757 | if (timer_irq_works()) { |
1754 | apic_printk(APIC_VERBOSE," works.\n"); | 1758 | apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); |
1755 | timer_through_8259 = 1; | 1759 | timer_through_8259 = 1; |
1756 | if (nmi_watchdog == NMI_IO_APIC) { | 1760 | if (nmi_watchdog == NMI_IO_APIC) { |
1757 | disable_8259A_irq(0); | 1761 | disable_8259A_irq(0); |
@@ -1765,29 +1769,32 @@ static inline void __init check_timer(void) | |||
1765 | */ | 1769 | */ |
1766 | disable_8259A_irq(0); | 1770 | disable_8259A_irq(0); |
1767 | clear_IO_APIC_pin(apic2, pin2); | 1771 | clear_IO_APIC_pin(apic2, pin2); |
1768 | apic_printk(APIC_VERBOSE," failed.\n"); | 1772 | apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); |
1769 | } | 1773 | } |
1770 | 1774 | ||
1771 | if (nmi_watchdog == NMI_IO_APIC) { | 1775 | if (nmi_watchdog == NMI_IO_APIC) { |
1772 | printk(KERN_WARNING "timer doesn't work through the IO-APIC - disabling NMI Watchdog!\n"); | 1776 | apic_printk(APIC_QUIET, KERN_WARNING "timer doesn't work " |
1777 | "through the IO-APIC - disabling NMI Watchdog!\n"); | ||
1773 | nmi_watchdog = NMI_NONE; | 1778 | nmi_watchdog = NMI_NONE; |
1774 | } | 1779 | } |
1775 | 1780 | ||
1776 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); | 1781 | apic_printk(APIC_QUIET, KERN_INFO |
1782 | "...trying to set up timer as Virtual Wire IRQ...\n"); | ||
1777 | 1783 | ||
1778 | lapic_register_intr(0); | 1784 | lapic_register_intr(0); |
1779 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ | 1785 | apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ |
1780 | enable_8259A_irq(0); | 1786 | enable_8259A_irq(0); |
1781 | 1787 | ||
1782 | if (timer_irq_works()) { | 1788 | if (timer_irq_works()) { |
1783 | apic_printk(APIC_VERBOSE," works.\n"); | 1789 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
1784 | goto out; | 1790 | goto out; |
1785 | } | 1791 | } |
1786 | disable_8259A_irq(0); | 1792 | disable_8259A_irq(0); |
1787 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); | 1793 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); |
1788 | apic_printk(APIC_VERBOSE," failed.\n"); | 1794 | apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); |
1789 | 1795 | ||
1790 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | 1796 | apic_printk(APIC_QUIET, KERN_INFO |
1797 | "...trying to set up timer as ExtINT IRQ...\n"); | ||
1791 | 1798 | ||
1792 | init_8259A(0); | 1799 | init_8259A(0); |
1793 | make_8259A_irq(0); | 1800 | make_8259A_irq(0); |
@@ -1796,11 +1803,12 @@ static inline void __init check_timer(void) | |||
1796 | unlock_ExtINT_logic(); | 1803 | unlock_ExtINT_logic(); |
1797 | 1804 | ||
1798 | if (timer_irq_works()) { | 1805 | if (timer_irq_works()) { |
1799 | apic_printk(APIC_VERBOSE," works.\n"); | 1806 | apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); |
1800 | goto out; | 1807 | goto out; |
1801 | } | 1808 | } |
1802 | apic_printk(APIC_VERBOSE," failed :(.\n"); | 1809 | apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); |
1803 | panic("IO-APIC + timer doesn't work! Try using the 'noapic' kernel parameter\n"); | 1810 | panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " |
1811 | "report. Then try booting with the 'noapic' option.\n"); | ||
1804 | out: | 1812 | out: |
1805 | local_irq_restore(flags); | 1813 | local_irq_restore(flags); |
1806 | } | 1814 | } |
diff --git a/arch/x86/kernel/io_delay.c b/arch/x86/kernel/io_delay.c index 5921e5f0a640..720d2607aacb 100644 --- a/arch/x86/kernel/io_delay.c +++ b/arch/x86/kernel/io_delay.c | |||
@@ -92,6 +92,14 @@ static struct dmi_system_id __initdata io_delay_0xed_port_dmi_table[] = { | |||
92 | DMI_MATCH(DMI_BOARD_NAME, "30BF") | 92 | DMI_MATCH(DMI_BOARD_NAME, "30BF") |
93 | } | 93 | } |
94 | }, | 94 | }, |
95 | { | ||
96 | .callback = dmi_io_delay_0xed_port, | ||
97 | .ident = "Presario F700", | ||
98 | .matches = { | ||
99 | DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"), | ||
100 | DMI_MATCH(DMI_BOARD_NAME, "30D3") | ||
101 | } | ||
102 | }, | ||
95 | { } | 103 | { } |
96 | }; | 104 | }; |
97 | 105 | ||
@@ -103,6 +111,9 @@ void __init io_delay_init(void) | |||
103 | 111 | ||
104 | static int __init io_delay_param(char *s) | 112 | static int __init io_delay_param(char *s) |
105 | { | 113 | { |
114 | if (!s) | ||
115 | return -EINVAL; | ||
116 | |||
106 | if (!strcmp(s, "0x80")) | 117 | if (!strcmp(s, "0x80")) |
107 | io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; | 118 | io_delay_type = CONFIG_IO_DELAY_TYPE_0X80; |
108 | else if (!strcmp(s, "0xed")) | 119 | else if (!strcmp(s, "0xed")) |
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c index 9d98cda39ad9..3f7537b669d3 100644 --- a/arch/x86/kernel/ipi.c +++ b/arch/x86/kernel/ipi.c | |||
@@ -70,7 +70,7 @@ void __send_IPI_shortcut(unsigned int shortcut, int vector) | |||
70 | /* | 70 | /* |
71 | * Send the IPI. The write to APIC_ICR fires this off. | 71 | * Send the IPI. The write to APIC_ICR fires this off. |
72 | */ | 72 | */ |
73 | apic_write_around(APIC_ICR, cfg); | 73 | apic_write(APIC_ICR, cfg); |
74 | } | 74 | } |
75 | 75 | ||
76 | void send_IPI_self(int vector) | 76 | void send_IPI_self(int vector) |
@@ -98,7 +98,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) | |||
98 | * prepare target chip field | 98 | * prepare target chip field |
99 | */ | 99 | */ |
100 | cfg = __prepare_ICR2(mask); | 100 | cfg = __prepare_ICR2(mask); |
101 | apic_write_around(APIC_ICR2, cfg); | 101 | apic_write(APIC_ICR2, cfg); |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * program the ICR | 104 | * program the ICR |
@@ -108,7 +108,7 @@ static inline void __send_IPI_dest_field(unsigned long mask, int vector) | |||
108 | /* | 108 | /* |
109 | * Send the IPI. The write to APIC_ICR fires this off. | 109 | * Send the IPI. The write to APIC_ICR fires this off. |
110 | */ | 110 | */ |
111 | apic_write_around(APIC_ICR, cfg); | 111 | apic_write(APIC_ICR, cfg); |
112 | } | 112 | } |
113 | 113 | ||
114 | /* | 114 | /* |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 47a6f6f12478..1cf8c1fcc088 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -83,11 +83,8 @@ union irq_ctx { | |||
83 | static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; | 83 | static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly; |
84 | static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; | 84 | static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly; |
85 | 85 | ||
86 | static char softirq_stack[NR_CPUS * THREAD_SIZE] | 86 | static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; |
87 | __attribute__((__section__(".bss.page_aligned"))); | 87 | static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss; |
88 | |||
89 | static char hardirq_stack[NR_CPUS * THREAD_SIZE] | ||
90 | __attribute__((__section__(".bss.page_aligned"))); | ||
91 | 88 | ||
92 | static void call_on_stack(void *func, void *stack) | 89 | static void call_on_stack(void *func, void *stack) |
93 | { | 90 | { |
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index 0373e88de95a..1f26fd9ec4f4 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c | |||
@@ -43,10 +43,11 @@ | |||
43 | 43 | ||
44 | #define BUILD_IRQ(nr) \ | 44 | #define BUILD_IRQ(nr) \ |
45 | asmlinkage void IRQ_NAME(nr); \ | 45 | asmlinkage void IRQ_NAME(nr); \ |
46 | asm("\n.p2align\n" \ | 46 | asm("\n.text\n.p2align\n" \ |
47 | "IRQ" #nr "_interrupt:\n\t" \ | 47 | "IRQ" #nr "_interrupt:\n\t" \ |
48 | "push $~(" #nr ") ; " \ | 48 | "push $~(" #nr ") ; " \ |
49 | "jmp common_interrupt"); | 49 | "jmp common_interrupt\n" \ |
50 | ".previous"); | ||
50 | 51 | ||
51 | #define BI(x,y) \ | 52 | #define BI(x,y) \ |
52 | BUILD_IRQ(x##y) | 53 | BUILD_IRQ(x##y) |
diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index c03205991718..ff7d3b0124f1 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c | |||
@@ -12,9 +12,13 @@ | |||
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/io.h> | 13 | #include <linux/io.h> |
14 | #include <linux/mm.h> | 14 | #include <linux/mm.h> |
15 | #include <linux/module.h> | ||
15 | 16 | ||
16 | #include <asm/setup.h> | 17 | #include <asm/setup.h> |
17 | 18 | ||
19 | struct dentry *arch_debugfs_dir; | ||
20 | EXPORT_SYMBOL(arch_debugfs_dir); | ||
21 | |||
18 | #ifdef CONFIG_DEBUG_BOOT_PARAMS | 22 | #ifdef CONFIG_DEBUG_BOOT_PARAMS |
19 | struct setup_data_node { | 23 | struct setup_data_node { |
20 | u64 paddr; | 24 | u64 paddr; |
@@ -135,6 +139,7 @@ static int __init create_setup_data_nodes(struct dentry *parent) | |||
135 | if (PageHighMem(pg)) { | 139 | if (PageHighMem(pg)) { |
136 | data = ioremap_cache(pa_data, sizeof(*data)); | 140 | data = ioremap_cache(pa_data, sizeof(*data)); |
137 | if (!data) { | 141 | if (!data) { |
142 | kfree(node); | ||
138 | error = -ENXIO; | 143 | error = -ENXIO; |
139 | goto err_dir; | 144 | goto err_dir; |
140 | } | 145 | } |
@@ -209,6 +214,10 @@ static int __init arch_kdebugfs_init(void) | |||
209 | { | 214 | { |
210 | int error = 0; | 215 | int error = 0; |
211 | 216 | ||
217 | arch_debugfs_dir = debugfs_create_dir("x86", NULL); | ||
218 | if (!arch_debugfs_dir) | ||
219 | return -ENOMEM; | ||
220 | |||
212 | #ifdef CONFIG_DEBUG_BOOT_PARAMS | 221 | #ifdef CONFIG_DEBUG_BOOT_PARAMS |
213 | error = boot_params_kdebugfs_init(); | 222 | error = boot_params_kdebugfs_init(); |
214 | #endif | 223 | #endif |
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index f47f0eb886b8..8282a2139681 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -69,6 +69,9 @@ static int gdb_x86vector = -1; | |||
69 | */ | 69 | */ |
70 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | 70 | void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) |
71 | { | 71 | { |
72 | #ifndef CONFIG_X86_32 | ||
73 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
74 | #endif | ||
72 | gdb_regs[GDB_AX] = regs->ax; | 75 | gdb_regs[GDB_AX] = regs->ax; |
73 | gdb_regs[GDB_BX] = regs->bx; | 76 | gdb_regs[GDB_BX] = regs->bx; |
74 | gdb_regs[GDB_CX] = regs->cx; | 77 | gdb_regs[GDB_CX] = regs->cx; |
@@ -76,9 +79,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
76 | gdb_regs[GDB_SI] = regs->si; | 79 | gdb_regs[GDB_SI] = regs->si; |
77 | gdb_regs[GDB_DI] = regs->di; | 80 | gdb_regs[GDB_DI] = regs->di; |
78 | gdb_regs[GDB_BP] = regs->bp; | 81 | gdb_regs[GDB_BP] = regs->bp; |
79 | gdb_regs[GDB_PS] = regs->flags; | ||
80 | gdb_regs[GDB_PC] = regs->ip; | 82 | gdb_regs[GDB_PC] = regs->ip; |
81 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
84 | gdb_regs[GDB_PS] = regs->flags; | ||
82 | gdb_regs[GDB_DS] = regs->ds; | 85 | gdb_regs[GDB_DS] = regs->ds; |
83 | gdb_regs[GDB_ES] = regs->es; | 86 | gdb_regs[GDB_ES] = regs->es; |
84 | gdb_regs[GDB_CS] = regs->cs; | 87 | gdb_regs[GDB_CS] = regs->cs; |
@@ -94,6 +97,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
94 | gdb_regs[GDB_R13] = regs->r13; | 97 | gdb_regs[GDB_R13] = regs->r13; |
95 | gdb_regs[GDB_R14] = regs->r14; | 98 | gdb_regs[GDB_R14] = regs->r14; |
96 | gdb_regs[GDB_R15] = regs->r15; | 99 | gdb_regs[GDB_R15] = regs->r15; |
100 | gdb_regs32[GDB_PS] = regs->flags; | ||
101 | gdb_regs32[GDB_CS] = regs->cs; | ||
102 | gdb_regs32[GDB_SS] = regs->ss; | ||
97 | #endif | 103 | #endif |
98 | gdb_regs[GDB_SP] = regs->sp; | 104 | gdb_regs[GDB_SP] = regs->sp; |
99 | } | 105 | } |
@@ -112,6 +118,9 @@ void pt_regs_to_gdb_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
112 | */ | 118 | */ |
113 | void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | 119 | void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) |
114 | { | 120 | { |
121 | #ifndef CONFIG_X86_32 | ||
122 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
123 | #endif | ||
115 | gdb_regs[GDB_AX] = 0; | 124 | gdb_regs[GDB_AX] = 0; |
116 | gdb_regs[GDB_BX] = 0; | 125 | gdb_regs[GDB_BX] = 0; |
117 | gdb_regs[GDB_CX] = 0; | 126 | gdb_regs[GDB_CX] = 0; |
@@ -129,8 +138,10 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
129 | gdb_regs[GDB_FS] = 0xFFFF; | 138 | gdb_regs[GDB_FS] = 0xFFFF; |
130 | gdb_regs[GDB_GS] = 0xFFFF; | 139 | gdb_regs[GDB_GS] = 0xFFFF; |
131 | #else | 140 | #else |
132 | gdb_regs[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); | 141 | gdb_regs32[GDB_PS] = *(unsigned long *)(p->thread.sp + 8); |
133 | gdb_regs[GDB_PC] = 0; | 142 | gdb_regs32[GDB_CS] = __KERNEL_CS; |
143 | gdb_regs32[GDB_SS] = __KERNEL_DS; | ||
144 | gdb_regs[GDB_PC] = p->thread.ip; | ||
134 | gdb_regs[GDB_R8] = 0; | 145 | gdb_regs[GDB_R8] = 0; |
135 | gdb_regs[GDB_R9] = 0; | 146 | gdb_regs[GDB_R9] = 0; |
136 | gdb_regs[GDB_R10] = 0; | 147 | gdb_regs[GDB_R10] = 0; |
@@ -153,6 +164,9 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p) | |||
153 | */ | 164 | */ |
154 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | 165 | void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) |
155 | { | 166 | { |
167 | #ifndef CONFIG_X86_32 | ||
168 | u32 *gdb_regs32 = (u32 *)gdb_regs; | ||
169 | #endif | ||
156 | regs->ax = gdb_regs[GDB_AX]; | 170 | regs->ax = gdb_regs[GDB_AX]; |
157 | regs->bx = gdb_regs[GDB_BX]; | 171 | regs->bx = gdb_regs[GDB_BX]; |
158 | regs->cx = gdb_regs[GDB_CX]; | 172 | regs->cx = gdb_regs[GDB_CX]; |
@@ -160,9 +174,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
160 | regs->si = gdb_regs[GDB_SI]; | 174 | regs->si = gdb_regs[GDB_SI]; |
161 | regs->di = gdb_regs[GDB_DI]; | 175 | regs->di = gdb_regs[GDB_DI]; |
162 | regs->bp = gdb_regs[GDB_BP]; | 176 | regs->bp = gdb_regs[GDB_BP]; |
163 | regs->flags = gdb_regs[GDB_PS]; | ||
164 | regs->ip = gdb_regs[GDB_PC]; | 177 | regs->ip = gdb_regs[GDB_PC]; |
165 | #ifdef CONFIG_X86_32 | 178 | #ifdef CONFIG_X86_32 |
179 | regs->flags = gdb_regs[GDB_PS]; | ||
166 | regs->ds = gdb_regs[GDB_DS]; | 180 | regs->ds = gdb_regs[GDB_DS]; |
167 | regs->es = gdb_regs[GDB_ES]; | 181 | regs->es = gdb_regs[GDB_ES]; |
168 | regs->cs = gdb_regs[GDB_CS]; | 182 | regs->cs = gdb_regs[GDB_CS]; |
@@ -175,6 +189,9 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs) | |||
175 | regs->r13 = gdb_regs[GDB_R13]; | 189 | regs->r13 = gdb_regs[GDB_R13]; |
176 | regs->r14 = gdb_regs[GDB_R14]; | 190 | regs->r14 = gdb_regs[GDB_R14]; |
177 | regs->r15 = gdb_regs[GDB_R15]; | 191 | regs->r15 = gdb_regs[GDB_R15]; |
192 | regs->flags = gdb_regs32[GDB_PS]; | ||
193 | regs->cs = gdb_regs32[GDB_CS]; | ||
194 | regs->ss = gdb_regs32[GDB_SS]; | ||
178 | #endif | 195 | #endif |
179 | } | 196 | } |
180 | 197 | ||
@@ -378,10 +395,8 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code, | |||
378 | if (remcomInBuffer[0] == 's') { | 395 | if (remcomInBuffer[0] == 's') { |
379 | linux_regs->flags |= X86_EFLAGS_TF; | 396 | linux_regs->flags |= X86_EFLAGS_TF; |
380 | kgdb_single_step = 1; | 397 | kgdb_single_step = 1; |
381 | if (kgdb_contthread) { | 398 | atomic_set(&kgdb_cpu_doing_single_step, |
382 | atomic_set(&kgdb_cpu_doing_single_step, | 399 | raw_smp_processor_id()); |
383 | raw_smp_processor_id()); | ||
384 | } | ||
385 | } | 400 | } |
386 | 401 | ||
387 | get_debugreg(dr6, 6); | 402 | get_debugreg(dr6, 6); |
@@ -466,9 +481,15 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) | |||
466 | 481 | ||
467 | case DIE_DEBUG: | 482 | case DIE_DEBUG: |
468 | if (atomic_read(&kgdb_cpu_doing_single_step) == | 483 | if (atomic_read(&kgdb_cpu_doing_single_step) == |
469 | raw_smp_processor_id() && | 484 | raw_smp_processor_id()) { |
470 | user_mode(regs)) | 485 | if (user_mode(regs)) |
471 | return single_step_cont(regs, args); | 486 | return single_step_cont(regs, args); |
487 | break; | ||
488 | } else if (test_thread_flag(TIF_SINGLESTEP)) | ||
489 | /* This means a user thread is single stepping | ||
490 | * a system call which should be ignored | ||
491 | */ | ||
492 | return NOTIFY_DONE; | ||
472 | /* fall through */ | 493 | /* fall through */ |
473 | default: | 494 | default: |
474 | if (user_mode(regs)) | 495 | if (user_mode(regs)) |
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b8c6743a13da..6c27679ec6aa 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c | |||
@@ -431,7 +431,6 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | |||
431 | regs->ip = (unsigned long)p->ainsn.insn; | 431 | regs->ip = (unsigned long)p->ainsn.insn; |
432 | } | 432 | } |
433 | 433 | ||
434 | /* Called with kretprobe_lock held */ | ||
435 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, | 434 | void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, |
436 | struct pt_regs *regs) | 435 | struct pt_regs *regs) |
437 | { | 436 | { |
@@ -682,8 +681,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
682 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; | 681 | unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; |
683 | 682 | ||
684 | INIT_HLIST_HEAD(&empty_rp); | 683 | INIT_HLIST_HEAD(&empty_rp); |
685 | spin_lock_irqsave(&kretprobe_lock, flags); | 684 | kretprobe_hash_lock(current, &head, &flags); |
686 | head = kretprobe_inst_table_head(current); | ||
687 | /* fixup registers */ | 685 | /* fixup registers */ |
688 | #ifdef CONFIG_X86_64 | 686 | #ifdef CONFIG_X86_64 |
689 | regs->cs = __KERNEL_CS; | 687 | regs->cs = __KERNEL_CS; |
@@ -732,7 +730,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) | |||
732 | 730 | ||
733 | kretprobe_assert(ri, orig_ret_address, trampoline_address); | 731 | kretprobe_assert(ri, orig_ret_address, trampoline_address); |
734 | 732 | ||
735 | spin_unlock_irqrestore(&kretprobe_lock, flags); | 733 | kretprobe_hash_unlock(current, &flags); |
736 | 734 | ||
737 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { | 735 | hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { |
738 | hlist_del(&ri->hlist); | 736 | hlist_del(&ri->hlist); |
@@ -860,7 +858,6 @@ static int __kprobes post_kprobe_handler(struct pt_regs *regs) | |||
860 | 858 | ||
861 | resume_execution(cur, regs, kcb); | 859 | resume_execution(cur, regs, kcb); |
862 | regs->flags |= kcb->kprobe_saved_flags; | 860 | regs->flags |= kcb->kprobe_saved_flags; |
863 | trace_hardirqs_fixup_flags(regs->flags); | ||
864 | 861 | ||
865 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { | 862 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { |
866 | kcb->kprobe_status = KPROBE_HIT_SSDONE; | 863 | kcb->kprobe_status = KPROBE_HIT_SSDONE; |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 87edf1ceb1df..d02def06ca91 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -113,7 +113,7 @@ static void kvm_setup_secondary_clock(void) | |||
113 | #endif | 113 | #endif |
114 | 114 | ||
115 | #ifdef CONFIG_SMP | 115 | #ifdef CONFIG_SMP |
116 | void __init kvm_smp_prepare_boot_cpu(void) | 116 | static void __init kvm_smp_prepare_boot_cpu(void) |
117 | { | 117 | { |
118 | WARN_ON(kvm_register_clock("primary cpu clock")); | 118 | WARN_ON(kvm_register_clock("primary cpu clock")); |
119 | native_smp_prepare_boot_cpu(); | 119 | native_smp_prepare_boot_cpu(); |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index a8449571858a..b68e21f06f4f 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -62,12 +62,10 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload) | |||
62 | 62 | ||
63 | if (reload) { | 63 | if (reload) { |
64 | #ifdef CONFIG_SMP | 64 | #ifdef CONFIG_SMP |
65 | cpumask_t mask; | ||
66 | |||
67 | preempt_disable(); | 65 | preempt_disable(); |
68 | load_LDT(pc); | 66 | load_LDT(pc); |
69 | mask = cpumask_of_cpu(smp_processor_id()); | 67 | if (!cpus_equal(current->mm->cpu_vm_mask, |
70 | if (!cpus_equal(current->mm->cpu_vm_mask, mask)) | 68 | cpumask_of_cpu(smp_processor_id()))) |
71 | smp_call_function(flush_ldt, current->mm, 1); | 69 | smp_call_function(flush_ldt, current->mm, 1); |
72 | preempt_enable(); | 70 | preempt_enable(); |
73 | #else | 71 | #else |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 8864230d55af..0732adba05ca 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/init.h> | 12 | #include <linux/init.h> |
13 | #include <linux/numa.h> | 13 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 14 | #include <linux/ftrace.h> |
15 | #include <linux/suspend.h> | ||
15 | 16 | ||
16 | #include <asm/pgtable.h> | 17 | #include <asm/pgtable.h> |
17 | #include <asm/pgalloc.h> | 18 | #include <asm/pgalloc.h> |
@@ -22,6 +23,7 @@ | |||
22 | #include <asm/cpufeature.h> | 23 | #include <asm/cpufeature.h> |
23 | #include <asm/desc.h> | 24 | #include <asm/desc.h> |
24 | #include <asm/system.h> | 25 | #include <asm/system.h> |
26 | #include <asm/cacheflush.h> | ||
25 | 27 | ||
26 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 28 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
27 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | 29 | static u32 kexec_pgd[1024] PAGE_ALIGNED; |
@@ -77,7 +79,7 @@ static void load_segments(void) | |||
77 | /* | 79 | /* |
78 | * A architecture hook called to validate the | 80 | * A architecture hook called to validate the |
79 | * proposed image and prepare the control pages | 81 | * proposed image and prepare the control pages |
80 | * as needed. The pages for KEXEC_CONTROL_CODE_SIZE | 82 | * as needed. The pages for KEXEC_CONTROL_PAGE_SIZE |
81 | * have been allocated, but the segments have yet | 83 | * have been allocated, but the segments have yet |
82 | * been copied into the kernel. | 84 | * been copied into the kernel. |
83 | * | 85 | * |
@@ -85,10 +87,12 @@ static void load_segments(void) | |||
85 | * reboot code buffer to allow us to avoid allocations | 87 | * reboot code buffer to allow us to avoid allocations |
86 | * later. | 88 | * later. |
87 | * | 89 | * |
88 | * Currently nothing. | 90 | * Make control page executable. |
89 | */ | 91 | */ |
90 | int machine_kexec_prepare(struct kimage *image) | 92 | int machine_kexec_prepare(struct kimage *image) |
91 | { | 93 | { |
94 | if (nx_enabled) | ||
95 | set_pages_x(image->control_code_page, 1); | ||
92 | return 0; | 96 | return 0; |
93 | } | 97 | } |
94 | 98 | ||
@@ -98,27 +102,54 @@ int machine_kexec_prepare(struct kimage *image) | |||
98 | */ | 102 | */ |
99 | void machine_kexec_cleanup(struct kimage *image) | 103 | void machine_kexec_cleanup(struct kimage *image) |
100 | { | 104 | { |
105 | if (nx_enabled) | ||
106 | set_pages_nx(image->control_code_page, 1); | ||
101 | } | 107 | } |
102 | 108 | ||
103 | /* | 109 | /* |
104 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 110 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
105 | * We are past the point of no return, committed to rebooting now. | 111 | * We are past the point of no return, committed to rebooting now. |
106 | */ | 112 | */ |
107 | NORET_TYPE void machine_kexec(struct kimage *image) | 113 | void machine_kexec(struct kimage *image) |
108 | { | 114 | { |
109 | unsigned long page_list[PAGES_NR]; | 115 | unsigned long page_list[PAGES_NR]; |
110 | void *control_page; | 116 | void *control_page; |
117 | int save_ftrace_enabled; | ||
118 | asmlinkage unsigned long | ||
119 | (*relocate_kernel_ptr)(unsigned long indirection_page, | ||
120 | unsigned long control_page, | ||
121 | unsigned long start_address, | ||
122 | unsigned int has_pae, | ||
123 | unsigned int preserve_context); | ||
124 | |||
125 | #ifdef CONFIG_KEXEC_JUMP | ||
126 | if (kexec_image->preserve_context) | ||
127 | save_processor_state(); | ||
128 | #endif | ||
111 | 129 | ||
112 | tracer_disable(); | 130 | save_ftrace_enabled = __ftrace_enabled_save(); |
113 | 131 | ||
114 | /* Interrupts aren't acceptable while we reboot */ | 132 | /* Interrupts aren't acceptable while we reboot */ |
115 | local_irq_disable(); | 133 | local_irq_disable(); |
116 | 134 | ||
135 | if (image->preserve_context) { | ||
136 | #ifdef CONFIG_X86_IO_APIC | ||
137 | /* We need to put APICs in legacy mode so that we can | ||
138 | * get timer interrupts in second kernel. kexec/kdump | ||
139 | * paths already have calls to disable_IO_APIC() in | ||
140 | * one form or other. kexec jump path also need | ||
141 | * one. | ||
142 | */ | ||
143 | disable_IO_APIC(); | ||
144 | #endif | ||
145 | } | ||
146 | |||
117 | control_page = page_address(image->control_code_page); | 147 | control_page = page_address(image->control_code_page); |
118 | memcpy(control_page, relocate_kernel, PAGE_SIZE); | 148 | memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE); |
119 | 149 | ||
150 | relocate_kernel_ptr = control_page; | ||
120 | page_list[PA_CONTROL_PAGE] = __pa(control_page); | 151 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
121 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | 152 | page_list[VA_CONTROL_PAGE] = (unsigned long)control_page; |
122 | page_list[PA_PGD] = __pa(kexec_pgd); | 153 | page_list[PA_PGD] = __pa(kexec_pgd); |
123 | page_list[VA_PGD] = (unsigned long)kexec_pgd; | 154 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
124 | #ifdef CONFIG_X86_PAE | 155 | #ifdef CONFIG_X86_PAE |
@@ -131,6 +162,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
131 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | 162 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; |
132 | page_list[PA_PTE_1] = __pa(kexec_pte1); | 163 | page_list[PA_PTE_1] = __pa(kexec_pte1); |
133 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | 164 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; |
165 | page_list[PA_SWAP_PAGE] = (page_to_pfn(image->swap_page) << PAGE_SHIFT); | ||
134 | 166 | ||
135 | /* The segment registers are funny things, they have both a | 167 | /* The segment registers are funny things, they have both a |
136 | * visible and an invisible part. Whenever the visible part is | 168 | * visible and an invisible part. Whenever the visible part is |
@@ -149,8 +181,17 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
149 | set_idt(phys_to_virt(0),0); | 181 | set_idt(phys_to_virt(0),0); |
150 | 182 | ||
151 | /* now call it */ | 183 | /* now call it */ |
152 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, | 184 | image->start = relocate_kernel_ptr((unsigned long)image->head, |
153 | image->start, cpu_has_pae); | 185 | (unsigned long)page_list, |
186 | image->start, cpu_has_pae, | ||
187 | image->preserve_context); | ||
188 | |||
189 | #ifdef CONFIG_KEXEC_JUMP | ||
190 | if (kexec_image->preserve_context) | ||
191 | restore_processor_state(); | ||
192 | #endif | ||
193 | |||
194 | __ftrace_enabled_restore(save_ftrace_enabled); | ||
154 | } | 195 | } |
155 | 196 | ||
156 | void arch_crash_save_vmcoreinfo(void) | 197 | void arch_crash_save_vmcoreinfo(void) |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 9dd9262693a3..c43caa3a91f3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -181,7 +181,7 @@ void machine_kexec_cleanup(struct kimage *image) | |||
181 | * Do not allocate memory (or fail in any way) in machine_kexec(). | 181 | * Do not allocate memory (or fail in any way) in machine_kexec(). |
182 | * We are past the point of no return, committed to rebooting now. | 182 | * We are past the point of no return, committed to rebooting now. |
183 | */ | 183 | */ |
184 | NORET_TYPE void machine_kexec(struct kimage *image) | 184 | void machine_kexec(struct kimage *image) |
185 | { | 185 | { |
186 | unsigned long page_list[PAGES_NR]; | 186 | unsigned long page_list[PAGES_NR]; |
187 | void *control_page; | 187 | void *control_page; |
diff --git a/arch/x86/kernel/mfgpt_32.c b/arch/x86/kernel/mfgpt_32.c index 07c0f828f488..3b599518c322 100644 --- a/arch/x86/kernel/mfgpt_32.c +++ b/arch/x86/kernel/mfgpt_32.c | |||
@@ -33,6 +33,8 @@ | |||
33 | #include <linux/module.h> | 33 | #include <linux/module.h> |
34 | #include <asm/geode.h> | 34 | #include <asm/geode.h> |
35 | 35 | ||
36 | #define MFGPT_DEFAULT_IRQ 7 | ||
37 | |||
36 | static struct mfgpt_timer_t { | 38 | static struct mfgpt_timer_t { |
37 | unsigned int avail:1; | 39 | unsigned int avail:1; |
38 | } mfgpt_timers[MFGPT_MAX_TIMERS]; | 40 | } mfgpt_timers[MFGPT_MAX_TIMERS]; |
@@ -157,29 +159,48 @@ int geode_mfgpt_toggle_event(int timer, int cmp, int event, int enable) | |||
157 | } | 159 | } |
158 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); | 160 | EXPORT_SYMBOL_GPL(geode_mfgpt_toggle_event); |
159 | 161 | ||
160 | int geode_mfgpt_set_irq(int timer, int cmp, int irq, int enable) | 162 | int geode_mfgpt_set_irq(int timer, int cmp, int *irq, int enable) |
161 | { | 163 | { |
162 | u32 val, dummy; | 164 | u32 zsel, lpc, dummy; |
163 | int offset; | 165 | int shift; |
164 | 166 | ||
165 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) | 167 | if (timer < 0 || timer >= MFGPT_MAX_TIMERS) |
166 | return -EIO; | 168 | return -EIO; |
167 | 169 | ||
168 | if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) | 170 | /* |
171 | * Unfortunately, MFGPTs come in pairs sharing their IRQ lines. If VSA | ||
172 | * is using the same CMP of the timer's Siamese twin, the IRQ is set to | ||
173 | * 2, and we mustn't use nor change it. | ||
174 | * XXX: Likewise, 2 Linux drivers might clash if the 2nd overwrites the | ||
175 | * IRQ of the 1st. This can only happen if forcing an IRQ, calling this | ||
176 | * with *irq==0 is safe. Currently there _are_ no 2 drivers. | ||
177 | */ | ||
178 | rdmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); | ||
179 | shift = ((cmp == MFGPT_CMP1 ? 0 : 4) + timer % 4) * 4; | ||
180 | if (((zsel >> shift) & 0xF) == 2) | ||
169 | return -EIO; | 181 | return -EIO; |
170 | 182 | ||
171 | rdmsr(MSR_PIC_ZSEL_LOW, val, dummy); | 183 | /* Choose IRQ: if none supplied, keep IRQ already set or use default */ |
184 | if (!*irq) | ||
185 | *irq = (zsel >> shift) & 0xF; | ||
186 | if (!*irq) | ||
187 | *irq = MFGPT_DEFAULT_IRQ; | ||
172 | 188 | ||
173 | offset = (timer % 4) * 4; | 189 | /* Can't use IRQ if it's 0 (=disabled), 2, or routed to LPC */ |
174 | 190 | if (*irq < 1 || *irq == 2 || *irq > 15) | |
175 | val &= ~((0xF << offset) | (0xF << (offset + 16))); | 191 | return -EIO; |
192 | rdmsr(MSR_PIC_IRQM_LPC, lpc, dummy); | ||
193 | if (lpc & (1 << *irq)) | ||
194 | return -EIO; | ||
176 | 195 | ||
196 | /* All chosen and checked - go for it */ | ||
197 | if (geode_mfgpt_toggle_event(timer, cmp, MFGPT_EVENT_IRQ, enable)) | ||
198 | return -EIO; | ||
177 | if (enable) { | 199 | if (enable) { |
178 | val |= (irq & 0x0F) << (offset); | 200 | zsel = (zsel & ~(0xF << shift)) | (*irq << shift); |
179 | val |= (irq & 0x0F) << (offset + 16); | 201 | wrmsr(MSR_PIC_ZSEL_LOW, zsel, dummy); |
180 | } | 202 | } |
181 | 203 | ||
182 | wrmsr(MSR_PIC_ZSEL_LOW, val, dummy); | ||
183 | return 0; | 204 | return 0; |
184 | } | 205 | } |
185 | 206 | ||
@@ -242,7 +263,7 @@ EXPORT_SYMBOL_GPL(geode_mfgpt_alloc_timer); | |||
242 | static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; | 263 | static unsigned int mfgpt_tick_mode = CLOCK_EVT_MODE_SHUTDOWN; |
243 | static u16 mfgpt_event_clock; | 264 | static u16 mfgpt_event_clock; |
244 | 265 | ||
245 | static int irq = 7; | 266 | static int irq; |
246 | static int __init mfgpt_setup(char *str) | 267 | static int __init mfgpt_setup(char *str) |
247 | { | 268 | { |
248 | get_option(&str, &irq); | 269 | get_option(&str, &irq); |
@@ -346,7 +367,7 @@ int __init mfgpt_timer_setup(void) | |||
346 | mfgpt_event_clock = timer; | 367 | mfgpt_event_clock = timer; |
347 | 368 | ||
348 | /* Set up the IRQ on the MFGPT side */ | 369 | /* Set up the IRQ on the MFGPT side */ |
349 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, irq)) { | 370 | if (geode_mfgpt_setup_irq(mfgpt_event_clock, MFGPT_CMP2, &irq)) { |
350 | printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); | 371 | printk(KERN_ERR "mfgpt-timer: Could not set up IRQ %d\n", irq); |
351 | return -EIO; | 372 | return -EIO; |
352 | } | 373 | } |
@@ -374,13 +395,14 @@ int __init mfgpt_timer_setup(void) | |||
374 | &mfgpt_clockevent); | 395 | &mfgpt_clockevent); |
375 | 396 | ||
376 | printk(KERN_INFO | 397 | printk(KERN_INFO |
377 | "mfgpt-timer: registering the MFGPT timer as a clock event.\n"); | 398 | "mfgpt-timer: Registering MFGPT timer %d as a clock event, using IRQ %d\n", |
399 | timer, irq); | ||
378 | clockevents_register_device(&mfgpt_clockevent); | 400 | clockevents_register_device(&mfgpt_clockevent); |
379 | 401 | ||
380 | return 0; | 402 | return 0; |
381 | 403 | ||
382 | err: | 404 | err: |
383 | geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, irq); | 405 | geode_mfgpt_release_irq(mfgpt_event_clock, MFGPT_CMP2, &irq); |
384 | printk(KERN_ERR | 406 | printk(KERN_ERR |
385 | "mfgpt-timer: Unable to set up the MFGPT clock source\n"); | 407 | "mfgpt-timer: Unable to set up the MFGPT clock source\n"); |
386 | return -EIO; | 408 | return -EIO; |
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c index 56b933119a04..652fa5c38ebe 100644 --- a/arch/x86/kernel/microcode.c +++ b/arch/x86/kernel/microcode.c | |||
@@ -644,7 +644,9 @@ static void microcode_fini_cpu(int cpu) | |||
644 | mutex_unlock(µcode_mutex); | 644 | mutex_unlock(µcode_mutex); |
645 | } | 645 | } |
646 | 646 | ||
647 | static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) | 647 | static ssize_t reload_store(struct sys_device *dev, |
648 | struct sysdev_attribute *attr, | ||
649 | const char *buf, size_t sz) | ||
648 | { | 650 | { |
649 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 651 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
650 | char *end; | 652 | char *end; |
@@ -655,9 +657,7 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) | |||
655 | if (end == buf) | 657 | if (end == buf) |
656 | return -EINVAL; | 658 | return -EINVAL; |
657 | if (val == 1) { | 659 | if (val == 1) { |
658 | cpumask_t old; | 660 | cpumask_t old = current->cpus_allowed; |
659 | |||
660 | old = current->cpus_allowed; | ||
661 | 661 | ||
662 | get_online_cpus(); | 662 | get_online_cpus(); |
663 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); | 663 | set_cpus_allowed_ptr(current, &cpumask_of_cpu(cpu)); |
@@ -674,14 +674,16 @@ static ssize_t reload_store(struct sys_device *dev, const char *buf, size_t sz) | |||
674 | return sz; | 674 | return sz; |
675 | } | 675 | } |
676 | 676 | ||
677 | static ssize_t version_show(struct sys_device *dev, char *buf) | 677 | static ssize_t version_show(struct sys_device *dev, |
678 | struct sysdev_attribute *attr, char *buf) | ||
678 | { | 679 | { |
679 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 680 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
680 | 681 | ||
681 | return sprintf(buf, "0x%x\n", uci->rev); | 682 | return sprintf(buf, "0x%x\n", uci->rev); |
682 | } | 683 | } |
683 | 684 | ||
684 | static ssize_t pf_show(struct sys_device *dev, char *buf) | 685 | static ssize_t pf_show(struct sys_device *dev, |
686 | struct sysdev_attribute *attr, char *buf) | ||
685 | { | 687 | { |
686 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; | 688 | struct ucode_cpu_info *uci = ucode_cpu_info + dev->id; |
687 | 689 | ||
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c index fdfdc550b366..efc2f361fe85 100644 --- a/arch/x86/kernel/mmconf-fam10h_64.c +++ b/arch/x86/kernel/mmconf-fam10h_64.c | |||
@@ -238,7 +238,7 @@ static struct dmi_system_id __devinitdata mmconf_dmi_table[] = { | |||
238 | {} | 238 | {} |
239 | }; | 239 | }; |
240 | 240 | ||
241 | void __init check_enable_amd_mmconf_dmi(void) | 241 | void __cpuinit check_enable_amd_mmconf_dmi(void) |
242 | { | 242 | { |
243 | dmi_check_system(mmconf_dmi_table); | 243 | dmi_check_system(mmconf_dmi_table); |
244 | } | 244 | } |
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c index a888e67f5874..6ba87830d4b1 100644 --- a/arch/x86/kernel/module_64.c +++ b/arch/x86/kernel/module_64.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
23 | #include <linux/string.h> | 23 | #include <linux/string.h> |
24 | #include <linux/kernel.h> | 24 | #include <linux/kernel.h> |
25 | #include <linux/mm.h> | ||
25 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
26 | #include <linux/bug.h> | 27 | #include <linux/bug.h> |
27 | 28 | ||
@@ -150,7 +151,8 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
150 | const Elf_Shdr *sechdrs, | 151 | const Elf_Shdr *sechdrs, |
151 | struct module *me) | 152 | struct module *me) |
152 | { | 153 | { |
153 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL; | 154 | const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, |
155 | *para = NULL; | ||
154 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; | 156 | char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; |
155 | 157 | ||
156 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { | 158 | for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { |
@@ -160,6 +162,8 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
160 | alt = s; | 162 | alt = s; |
161 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) | 163 | if (!strcmp(".smp_locks", secstrings + s->sh_name)) |
162 | locks= s; | 164 | locks= s; |
165 | if (!strcmp(".parainstructions", secstrings + s->sh_name)) | ||
166 | para = s; | ||
163 | } | 167 | } |
164 | 168 | ||
165 | if (alt) { | 169 | if (alt) { |
@@ -175,6 +179,11 @@ int module_finalize(const Elf_Ehdr *hdr, | |||
175 | tseg, tseg + text->sh_size); | 179 | tseg, tseg + text->sh_size); |
176 | } | 180 | } |
177 | 181 | ||
182 | if (para) { | ||
183 | void *pseg = (void *)para->sh_addr; | ||
184 | apply_paravirt(pseg, pseg + para->sh_size); | ||
185 | } | ||
186 | |||
178 | return module_bug_finalize(hdr, sechdrs, me); | 187 | return module_bug_finalize(hdr, sechdrs, me); |
179 | } | 188 | } |
180 | 189 | ||
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 3b25e49380c6..b3fb430725cb 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <asm/bios_ebda.h> | 27 | #include <asm/bios_ebda.h> |
28 | #include <asm/e820.h> | 28 | #include <asm/e820.h> |
29 | #include <asm/trampoline.h> | 29 | #include <asm/trampoline.h> |
30 | #include <asm/setup.h> | ||
30 | 31 | ||
31 | #include <mach_apic.h> | 32 | #include <mach_apic.h> |
32 | #ifdef CONFIG_X86_32 | 33 | #ifdef CONFIG_X86_32 |
@@ -48,77 +49,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
48 | return sum & 0xFF; | 49 | return sum & 0xFF; |
49 | } | 50 | } |
50 | 51 | ||
51 | #ifdef CONFIG_X86_NUMAQ | 52 | static void __init MP_processor_info(struct mpc_config_processor *m) |
52 | int found_numaq; | ||
53 | /* | ||
54 | * Have to match translation table entries to main table entries by counter | ||
55 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
56 | * doing this .... | ||
57 | */ | ||
58 | struct mpc_config_translation { | ||
59 | unsigned char mpc_type; | ||
60 | unsigned char trans_len; | ||
61 | unsigned char trans_type; | ||
62 | unsigned char trans_quad; | ||
63 | unsigned char trans_global; | ||
64 | unsigned char trans_local; | ||
65 | unsigned short trans_reserved; | ||
66 | }; | ||
67 | |||
68 | |||
69 | static int mpc_record; | ||
70 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | ||
71 | __cpuinitdata; | ||
72 | |||
73 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
74 | { | ||
75 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
76 | } | ||
77 | |||
78 | |||
79 | static inline int mpc_apic_id(struct mpc_config_processor *m, | ||
80 | struct mpc_config_translation *translation_record) | ||
81 | { | ||
82 | int quad = translation_record->trans_quad; | ||
83 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | ||
84 | |||
85 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
86 | m->mpc_apicid, | ||
87 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
88 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
89 | m->mpc_apicver, quad, logical_apicid); | ||
90 | return logical_apicid; | ||
91 | } | ||
92 | |||
93 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
94 | |||
95 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
96 | |||
97 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name, | ||
98 | struct mpc_config_translation *translation) | ||
99 | { | ||
100 | int quad = translation->trans_quad; | ||
101 | int local = translation->trans_local; | ||
102 | |||
103 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
104 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
105 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
106 | m->mpc_busid, name, quad); | ||
107 | } | ||
108 | |||
109 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
110 | static void mpc_oem_pci_bus(struct mpc_config_bus *m, | ||
111 | struct mpc_config_translation *translation) | ||
112 | { | ||
113 | int quad = translation->trans_quad; | ||
114 | int local = translation->trans_local; | ||
115 | |||
116 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
117 | } | ||
118 | |||
119 | #endif | ||
120 | |||
121 | static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | ||
122 | { | 53 | { |
123 | int apicid; | 54 | int apicid; |
124 | char *bootup_cpu = ""; | 55 | char *bootup_cpu = ""; |
@@ -127,14 +58,12 @@ static void __cpuinit MP_processor_info(struct mpc_config_processor *m) | |||
127 | disabled_cpus++; | 58 | disabled_cpus++; |
128 | return; | 59 | return; |
129 | } | 60 | } |
130 | #ifdef CONFIG_X86_NUMAQ | 61 | |
131 | if (found_numaq) | 62 | if (x86_quirks->mpc_apic_id) |
132 | apicid = mpc_apic_id(m, translation_table[mpc_record]); | 63 | apicid = x86_quirks->mpc_apic_id(m); |
133 | else | 64 | else |
134 | apicid = m->mpc_apicid; | 65 | apicid = m->mpc_apicid; |
135 | #else | 66 | |
136 | apicid = m->mpc_apicid; | ||
137 | #endif | ||
138 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { | 67 | if (m->mpc_cpuflag & CPU_BOOTPROCESSOR) { |
139 | bootup_cpu = " (Bootup-CPU)"; | 68 | bootup_cpu = " (Bootup-CPU)"; |
140 | boot_cpu_physical_apicid = m->mpc_apicid; | 69 | boot_cpu_physical_apicid = m->mpc_apicid; |
@@ -151,12 +80,10 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
151 | memcpy(str, m->mpc_bustype, 6); | 80 | memcpy(str, m->mpc_bustype, 6); |
152 | str[6] = 0; | 81 | str[6] = 0; |
153 | 82 | ||
154 | #ifdef CONFIG_X86_NUMAQ | 83 | if (x86_quirks->mpc_oem_bus_info) |
155 | if (found_numaq) | 84 | x86_quirks->mpc_oem_bus_info(m, str); |
156 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | 85 | else |
157 | #else | 86 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->mpc_busid, str); |
158 | printk(KERN_INFO "Bus #%d is %s\n", m->mpc_busid, str); | ||
159 | #endif | ||
160 | 87 | ||
161 | #if MAX_MP_BUSSES < 256 | 88 | #if MAX_MP_BUSSES < 256 |
162 | if (m->mpc_busid >= MAX_MP_BUSSES) { | 89 | if (m->mpc_busid >= MAX_MP_BUSSES) { |
@@ -173,10 +100,9 @@ static void __init MP_bus_info(struct mpc_config_bus *m) | |||
173 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | 100 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; |
174 | #endif | 101 | #endif |
175 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 102 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
176 | #ifdef CONFIG_X86_NUMAQ | 103 | if (x86_quirks->mpc_oem_pci_bus) |
177 | if (found_numaq) | 104 | x86_quirks->mpc_oem_pci_bus(m); |
178 | mpc_oem_pci_bus(m, translation_table[mpc_record]); | 105 | |
179 | #endif | ||
180 | clear_bit(m->mpc_busid, mp_bus_not_pci); | 106 | clear_bit(m->mpc_busid, mp_bus_not_pci); |
181 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) | 107 | #if defined(CONFIG_EISA) || defined (CONFIG_MCA) |
182 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; | 108 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_PCI; |
@@ -228,7 +154,7 @@ static void __init MP_ioapic_info(struct mpc_config_ioapic *m) | |||
228 | 154 | ||
229 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) | 155 | static void print_MP_intsrc_info(struct mpc_config_intsrc *m) |
230 | { | 156 | { |
231 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," | 157 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
232 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 158 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
233 | m->mpc_irqtype, m->mpc_irqflag & 3, | 159 | m->mpc_irqtype, m->mpc_irqflag & 3, |
234 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 160 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, |
@@ -237,7 +163,7 @@ static void print_MP_intsrc_info(struct mpc_config_intsrc *m) | |||
237 | 163 | ||
238 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) | 164 | static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) |
239 | { | 165 | { |
240 | printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x," | 166 | apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," |
241 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", | 167 | " IRQ %02x, APIC ID %x, APIC INT %02x\n", |
242 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, | 168 | mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, |
243 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, | 169 | (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, |
@@ -309,90 +235,13 @@ static void __init MP_intsrc_info(struct mpc_config_intsrc *m) | |||
309 | 235 | ||
310 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) | 236 | static void __init MP_lintsrc_info(struct mpc_config_lintsrc *m) |
311 | { | 237 | { |
312 | printk(KERN_INFO "Lint: type %d, pol %d, trig %d, bus %02x," | 238 | apic_printk(APIC_VERBOSE, "Lint: type %d, pol %d, trig %d, bus %02x," |
313 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", | 239 | " IRQ %02x, APIC ID %x, APIC LINT %02x\n", |
314 | m->mpc_irqtype, m->mpc_irqflag & 3, | 240 | m->mpc_irqtype, m->mpc_irqflag & 3, |
315 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, | 241 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbusid, |
316 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | 242 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); |
317 | } | 243 | } |
318 | 244 | ||
319 | #ifdef CONFIG_X86_NUMAQ | ||
320 | static void __init MP_translation_info(struct mpc_config_translation *m) | ||
321 | { | ||
322 | printk(KERN_INFO | ||
323 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
324 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
325 | m->trans_local); | ||
326 | |||
327 | if (mpc_record >= MAX_MPC_ENTRY) | ||
328 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
329 | else | ||
330 | translation_table[mpc_record] = m; /* stash this for later */ | ||
331 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
332 | node_set_online(m->trans_quad); | ||
333 | } | ||
334 | |||
335 | /* | ||
336 | * Read/parse the MPC oem tables | ||
337 | */ | ||
338 | |||
339 | static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | ||
340 | unsigned short oemsize) | ||
341 | { | ||
342 | int count = sizeof(*oemtable); /* the header size */ | ||
343 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
344 | |||
345 | mpc_record = 0; | ||
346 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | ||
347 | oemtable); | ||
348 | if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { | ||
349 | printk(KERN_WARNING | ||
350 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
351 | oemtable->oem_signature[0], oemtable->oem_signature[1], | ||
352 | oemtable->oem_signature[2], oemtable->oem_signature[3]); | ||
353 | return; | ||
354 | } | ||
355 | if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { | ||
356 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
357 | return; | ||
358 | } | ||
359 | while (count < oemtable->oem_length) { | ||
360 | switch (*oemptr) { | ||
361 | case MP_TRANSLATION: | ||
362 | { | ||
363 | struct mpc_config_translation *m = | ||
364 | (struct mpc_config_translation *)oemptr; | ||
365 | MP_translation_info(m); | ||
366 | oemptr += sizeof(*m); | ||
367 | count += sizeof(*m); | ||
368 | ++mpc_record; | ||
369 | break; | ||
370 | } | ||
371 | default: | ||
372 | { | ||
373 | printk(KERN_WARNING | ||
374 | "Unrecognised OEM table entry type! - %d\n", | ||
375 | (int)*oemptr); | ||
376 | return; | ||
377 | } | ||
378 | } | ||
379 | } | ||
380 | } | ||
381 | |||
382 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
383 | char *productid) | ||
384 | { | ||
385 | if (strncmp(oem, "IBM NUMA", 8)) | ||
386 | printk("Warning! Not a NUMA-Q system!\n"); | ||
387 | else | ||
388 | found_numaq = 1; | ||
389 | |||
390 | if (mpc->mpc_oemptr) | ||
391 | smp_read_mpc_oem((struct mp_config_oemtable *)mpc->mpc_oemptr, | ||
392 | mpc->mpc_oemsize); | ||
393 | } | ||
394 | #endif /* CONFIG_X86_NUMAQ */ | ||
395 | |||
396 | /* | 245 | /* |
397 | * Read/parse the MPC | 246 | * Read/parse the MPC |
398 | */ | 247 | */ |
@@ -457,7 +306,6 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
457 | } else | 306 | } else |
458 | mps_oem_check(mpc, oem, str); | 307 | mps_oem_check(mpc, oem, str); |
459 | #endif | 308 | #endif |
460 | |||
461 | /* save the local APIC address, it might be non-default */ | 309 | /* save the local APIC address, it might be non-default */ |
462 | if (!acpi_lapic) | 310 | if (!acpi_lapic) |
463 | mp_lapic_addr = mpc->mpc_lapic; | 311 | mp_lapic_addr = mpc->mpc_lapic; |
@@ -465,12 +313,17 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
465 | if (early) | 313 | if (early) |
466 | return 1; | 314 | return 1; |
467 | 315 | ||
316 | if (mpc->mpc_oemptr && x86_quirks->smp_read_mpc_oem) { | ||
317 | struct mp_config_oemtable *oem_table = (struct mp_config_oemtable *)(unsigned long)mpc->mpc_oemptr; | ||
318 | x86_quirks->smp_read_mpc_oem(oem_table, mpc->mpc_oemsize); | ||
319 | } | ||
320 | |||
468 | /* | 321 | /* |
469 | * Now process the configuration blocks. | 322 | * Now process the configuration blocks. |
470 | */ | 323 | */ |
471 | #ifdef CONFIG_X86_NUMAQ | 324 | if (x86_quirks->mpc_record) |
472 | mpc_record = 0; | 325 | *x86_quirks->mpc_record = 0; |
473 | #endif | 326 | |
474 | while (count < mpc->mpc_length) { | 327 | while (count < mpc->mpc_length) { |
475 | switch (*mpt) { | 328 | switch (*mpt) { |
476 | case MP_PROCESSOR: | 329 | case MP_PROCESSOR: |
@@ -536,9 +389,8 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early) | |||
536 | count = mpc->mpc_length; | 389 | count = mpc->mpc_length; |
537 | break; | 390 | break; |
538 | } | 391 | } |
539 | #ifdef CONFIG_X86_NUMAQ | 392 | if (x86_quirks->mpc_record) |
540 | ++mpc_record; | 393 | (*x86_quirks->mpc_record)++; |
541 | #endif | ||
542 | } | 394 | } |
543 | 395 | ||
544 | #ifdef CONFIG_X86_GENERICARCH | 396 | #ifdef CONFIG_X86_GENERICARCH |
@@ -632,7 +484,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type) | |||
632 | } | 484 | } |
633 | 485 | ||
634 | 486 | ||
635 | static void construct_ioapic_table(int mpc_default_type) | 487 | static void __init construct_ioapic_table(int mpc_default_type) |
636 | { | 488 | { |
637 | struct mpc_config_ioapic ioapic; | 489 | struct mpc_config_ioapic ioapic; |
638 | struct mpc_config_bus bus; | 490 | struct mpc_config_bus bus; |
@@ -677,7 +529,7 @@ static void construct_ioapic_table(int mpc_default_type) | |||
677 | construct_default_ioirq_mptable(mpc_default_type); | 529 | construct_default_ioirq_mptable(mpc_default_type); |
678 | } | 530 | } |
679 | #else | 531 | #else |
680 | static inline void construct_ioapic_table(int mpc_default_type) { } | 532 | static inline void __init construct_ioapic_table(int mpc_default_type) { } |
681 | #endif | 533 | #endif |
682 | 534 | ||
683 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) | 535 | static inline void __init construct_default_ISA_mptable(int mpc_default_type) |
@@ -726,20 +578,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type) | |||
726 | static struct intel_mp_floating *mpf_found; | 578 | static struct intel_mp_floating *mpf_found; |
727 | 579 | ||
728 | /* | 580 | /* |
729 | * Machine specific quirk for finding the SMP config before other setup | ||
730 | * activities destroy the table: | ||
731 | */ | ||
732 | int (*mach_get_smp_config_quirk)(unsigned int early); | ||
733 | |||
734 | /* | ||
735 | * Scan the memory blocks for an SMP configuration block. | 581 | * Scan the memory blocks for an SMP configuration block. |
736 | */ | 582 | */ |
737 | static void __init __get_smp_config(unsigned int early) | 583 | static void __init __get_smp_config(unsigned int early) |
738 | { | 584 | { |
739 | struct intel_mp_floating *mpf = mpf_found; | 585 | struct intel_mp_floating *mpf = mpf_found; |
740 | 586 | ||
741 | if (mach_get_smp_config_quirk) { | 587 | if (x86_quirks->mach_get_smp_config) { |
742 | if (mach_get_smp_config_quirk(early)) | 588 | if (x86_quirks->mach_get_smp_config(early)) |
743 | return; | 589 | return; |
744 | } | 590 | } |
745 | if (acpi_lapic && early) | 591 | if (acpi_lapic && early) |
@@ -849,7 +695,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
849 | unsigned int *bp = phys_to_virt(base); | 695 | unsigned int *bp = phys_to_virt(base); |
850 | struct intel_mp_floating *mpf; | 696 | struct intel_mp_floating *mpf; |
851 | 697 | ||
852 | printk(KERN_DEBUG "Scan SMP from %p for %ld bytes.\n", bp, length); | 698 | apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", |
699 | bp, length); | ||
853 | BUILD_BUG_ON(sizeof(*mpf) != 16); | 700 | BUILD_BUG_ON(sizeof(*mpf) != 16); |
854 | 701 | ||
855 | while (length > 0) { | 702 | while (length > 0) { |
@@ -899,14 +746,12 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
899 | return 0; | 746 | return 0; |
900 | } | 747 | } |
901 | 748 | ||
902 | int (*mach_find_smp_config_quirk)(unsigned int reserve); | ||
903 | |||
904 | static void __init __find_smp_config(unsigned int reserve) | 749 | static void __init __find_smp_config(unsigned int reserve) |
905 | { | 750 | { |
906 | unsigned int address; | 751 | unsigned int address; |
907 | 752 | ||
908 | if (mach_find_smp_config_quirk) { | 753 | if (x86_quirks->mach_find_smp_config) { |
909 | if (mach_find_smp_config_quirk(reserve)) | 754 | if (x86_quirks->mach_find_smp_config(reserve)) |
910 | return; | 755 | return; |
911 | } | 756 | } |
912 | /* | 757 | /* |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index a153b3905f60..2e2af5d18191 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -72,21 +72,28 @@ static ssize_t msr_read(struct file *file, char __user *buf, | |||
72 | u32 data[2]; | 72 | u32 data[2]; |
73 | u32 reg = *ppos; | 73 | u32 reg = *ppos; |
74 | int cpu = iminor(file->f_path.dentry->d_inode); | 74 | int cpu = iminor(file->f_path.dentry->d_inode); |
75 | int err; | 75 | int err = 0; |
76 | ssize_t bytes = 0; | ||
76 | 77 | ||
77 | if (count % 8) | 78 | if (count % 8) |
78 | return -EINVAL; /* Invalid chunk size */ | 79 | return -EINVAL; /* Invalid chunk size */ |
79 | 80 | ||
80 | for (; count; count -= 8) { | 81 | for (; count; count -= 8) { |
81 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); | 82 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); |
82 | if (err) | 83 | if (err) { |
83 | return -EIO; | 84 | if (err == -EFAULT) /* Fix idiotic error code */ |
84 | if (copy_to_user(tmp, &data, 8)) | 85 | err = -EIO; |
85 | return -EFAULT; | 86 | break; |
87 | } | ||
88 | if (copy_to_user(tmp, &data, 8)) { | ||
89 | err = -EFAULT; | ||
90 | break; | ||
91 | } | ||
86 | tmp += 2; | 92 | tmp += 2; |
93 | bytes += 8; | ||
87 | } | 94 | } |
88 | 95 | ||
89 | return ((char __user *)tmp) - buf; | 96 | return bytes ? bytes : err; |
90 | } | 97 | } |
91 | 98 | ||
92 | static ssize_t msr_write(struct file *file, const char __user *buf, | 99 | static ssize_t msr_write(struct file *file, const char __user *buf, |
@@ -96,21 +103,28 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
96 | u32 data[2]; | 103 | u32 data[2]; |
97 | u32 reg = *ppos; | 104 | u32 reg = *ppos; |
98 | int cpu = iminor(file->f_path.dentry->d_inode); | 105 | int cpu = iminor(file->f_path.dentry->d_inode); |
99 | int err; | 106 | int err = 0; |
107 | ssize_t bytes = 0; | ||
100 | 108 | ||
101 | if (count % 8) | 109 | if (count % 8) |
102 | return -EINVAL; /* Invalid chunk size */ | 110 | return -EINVAL; /* Invalid chunk size */ |
103 | 111 | ||
104 | for (; count; count -= 8) { | 112 | for (; count; count -= 8) { |
105 | if (copy_from_user(&data, tmp, 8)) | 113 | if (copy_from_user(&data, tmp, 8)) { |
106 | return -EFAULT; | 114 | err = -EFAULT; |
115 | break; | ||
116 | } | ||
107 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); | 117 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); |
108 | if (err) | 118 | if (err) { |
109 | return -EIO; | 119 | if (err == -EFAULT) /* Fix idiotic error code */ |
120 | err = -EIO; | ||
121 | break; | ||
122 | } | ||
110 | tmp += 2; | 123 | tmp += 2; |
124 | bytes += 8; | ||
111 | } | 125 | } |
112 | 126 | ||
113 | return ((char __user *)tmp) - buf; | 127 | return bytes ? bytes : err; |
114 | } | 128 | } |
115 | 129 | ||
116 | static int msr_open(struct inode *inode, struct file *file) | 130 | static int msr_open(struct inode *inode, struct file *file) |
@@ -131,7 +145,7 @@ static int msr_open(struct inode *inode, struct file *file) | |||
131 | ret = -EIO; /* MSR not supported */ | 145 | ret = -EIO; /* MSR not supported */ |
132 | out: | 146 | out: |
133 | unlock_kernel(); | 147 | unlock_kernel(); |
134 | return 0; | 148 | return ret; |
135 | } | 149 | } |
136 | 150 | ||
137 | /* | 151 | /* |
@@ -149,8 +163,8 @@ static int __cpuinit msr_device_create(int cpu) | |||
149 | { | 163 | { |
150 | struct device *dev; | 164 | struct device *dev; |
151 | 165 | ||
152 | dev = device_create(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), | 166 | dev = device_create_drvdata(msr_class, NULL, MKDEV(MSR_MAJOR, cpu), |
153 | "msr%d", cpu); | 167 | NULL, "msr%d", cpu); |
154 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; | 168 | return IS_ERR(dev) ? PTR_ERR(dev) : 0; |
155 | } | 169 | } |
156 | 170 | ||
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index ec024b3baad0..abb78a2cc4ad 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -114,6 +114,23 @@ static __init void nmi_cpu_busy(void *data) | |||
114 | } | 114 | } |
115 | #endif | 115 | #endif |
116 | 116 | ||
117 | static void report_broken_nmi(int cpu, int *prev_nmi_count) | ||
118 | { | ||
119 | printk(KERN_CONT "\n"); | ||
120 | |||
121 | printk(KERN_WARNING | ||
122 | "WARNING: CPU#%d: NMI appears to be stuck (%d->%d)!\n", | ||
123 | cpu, prev_nmi_count[cpu], get_nmi_count(cpu)); | ||
124 | |||
125 | printk(KERN_WARNING | ||
126 | "Please report this to bugzilla.kernel.org,\n"); | ||
127 | printk(KERN_WARNING | ||
128 | "and attach the output of the 'dmesg' command.\n"); | ||
129 | |||
130 | per_cpu(wd_enabled, cpu) = 0; | ||
131 | atomic_dec(&nmi_active); | ||
132 | } | ||
133 | |||
117 | int __init check_nmi_watchdog(void) | 134 | int __init check_nmi_watchdog(void) |
118 | { | 135 | { |
119 | unsigned int *prev_nmi_count; | 136 | unsigned int *prev_nmi_count; |
@@ -141,15 +158,8 @@ int __init check_nmi_watchdog(void) | |||
141 | for_each_online_cpu(cpu) { | 158 | for_each_online_cpu(cpu) { |
142 | if (!per_cpu(wd_enabled, cpu)) | 159 | if (!per_cpu(wd_enabled, cpu)) |
143 | continue; | 160 | continue; |
144 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 161 | if (get_nmi_count(cpu) - prev_nmi_count[cpu] <= 5) |
145 | printk(KERN_WARNING "WARNING: CPU#%d: NMI " | 162 | report_broken_nmi(cpu, prev_nmi_count); |
146 | "appears to be stuck (%d->%d)!\n", | ||
147 | cpu, | ||
148 | prev_nmi_count[cpu], | ||
149 | get_nmi_count(cpu)); | ||
150 | per_cpu(wd_enabled, cpu) = 0; | ||
151 | atomic_dec(&nmi_active); | ||
152 | } | ||
153 | } | 163 | } |
154 | endflag = 1; | 164 | endflag = 1; |
155 | if (!atomic_read(&nmi_active)) { | 165 | if (!atomic_read(&nmi_active)) { |
@@ -263,7 +273,7 @@ late_initcall(init_lapic_nmi_sysfs); | |||
263 | 273 | ||
264 | static void __acpi_nmi_enable(void *__unused) | 274 | static void __acpi_nmi_enable(void *__unused) |
265 | { | 275 | { |
266 | apic_write_around(APIC_LVT0, APIC_DM_NMI); | 276 | apic_write(APIC_LVT0, APIC_DM_NMI); |
267 | } | 277 | } |
268 | 278 | ||
269 | /* | 279 | /* |
@@ -277,7 +287,7 @@ void acpi_nmi_enable(void) | |||
277 | 287 | ||
278 | static void __acpi_nmi_disable(void *__unused) | 288 | static void __acpi_nmi_disable(void *__unused) |
279 | { | 289 | { |
280 | apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | 290 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); |
281 | } | 291 | } |
282 | 292 | ||
283 | /* | 293 | /* |
@@ -448,6 +458,13 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
448 | 458 | ||
449 | #ifdef CONFIG_SYSCTL | 459 | #ifdef CONFIG_SYSCTL |
450 | 460 | ||
461 | static int __init setup_unknown_nmi_panic(char *str) | ||
462 | { | ||
463 | unknown_nmi_panic = 1; | ||
464 | return 1; | ||
465 | } | ||
466 | __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | ||
467 | |||
451 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | 468 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) |
452 | { | 469 | { |
453 | unsigned char reason = get_nmi_reason(); | 470 | unsigned char reason = get_nmi_reason(); |
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c index a23e8233b9ac..eecc8c18f010 100644 --- a/arch/x86/kernel/numaq_32.c +++ b/arch/x86/kernel/numaq_32.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <asm/processor.h> | 33 | #include <asm/processor.h> |
34 | #include <asm/mpspec.h> | 34 | #include <asm/mpspec.h> |
35 | #include <asm/e820.h> | 35 | #include <asm/e820.h> |
36 | #include <asm/setup.h> | ||
36 | 37 | ||
37 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) | 38 | #define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) |
38 | 39 | ||
@@ -71,6 +72,188 @@ static void __init smp_dump_qct(void) | |||
71 | } | 72 | } |
72 | } | 73 | } |
73 | 74 | ||
75 | |||
76 | void __cpuinit numaq_tsc_disable(void) | ||
77 | { | ||
78 | if (!found_numaq) | ||
79 | return; | ||
80 | |||
81 | if (num_online_nodes() > 1) { | ||
82 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
83 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static int __init numaq_pre_time_init(void) | ||
88 | { | ||
89 | numaq_tsc_disable(); | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | int found_numaq; | ||
94 | /* | ||
95 | * Have to match translation table entries to main table entries by counter | ||
96 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
97 | * doing this .... | ||
98 | */ | ||
99 | struct mpc_config_translation { | ||
100 | unsigned char mpc_type; | ||
101 | unsigned char trans_len; | ||
102 | unsigned char trans_type; | ||
103 | unsigned char trans_quad; | ||
104 | unsigned char trans_global; | ||
105 | unsigned char trans_local; | ||
106 | unsigned short trans_reserved; | ||
107 | }; | ||
108 | |||
109 | /* x86_quirks member */ | ||
110 | static int mpc_record; | ||
111 | static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY] | ||
112 | __cpuinitdata; | ||
113 | |||
114 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
115 | { | ||
116 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
117 | } | ||
118 | |||
119 | /* x86_quirks member */ | ||
120 | static int mpc_apic_id(struct mpc_config_processor *m) | ||
121 | { | ||
122 | int quad = translation_table[mpc_record]->trans_quad; | ||
123 | int logical_apicid = generate_logical_apicid(quad, m->mpc_apicid); | ||
124 | |||
125 | printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
126 | m->mpc_apicid, | ||
127 | (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8, | ||
128 | (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4, | ||
129 | m->mpc_apicver, quad, logical_apicid); | ||
130 | return logical_apicid; | ||
131 | } | ||
132 | |||
133 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
134 | |||
135 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
136 | |||
137 | /* x86_quirks member */ | ||
138 | static void mpc_oem_bus_info(struct mpc_config_bus *m, char *name) | ||
139 | { | ||
140 | int quad = translation_table[mpc_record]->trans_quad; | ||
141 | int local = translation_table[mpc_record]->trans_local; | ||
142 | |||
143 | mp_bus_id_to_node[m->mpc_busid] = quad; | ||
144 | mp_bus_id_to_local[m->mpc_busid] = local; | ||
145 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", | ||
146 | m->mpc_busid, name, quad); | ||
147 | } | ||
148 | |||
149 | int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
150 | |||
151 | /* x86_quirks member */ | ||
152 | static void mpc_oem_pci_bus(struct mpc_config_bus *m) | ||
153 | { | ||
154 | int quad = translation_table[mpc_record]->trans_quad; | ||
155 | int local = translation_table[mpc_record]->trans_local; | ||
156 | |||
157 | quad_local_to_mp_bus_id[quad][local] = m->mpc_busid; | ||
158 | } | ||
159 | |||
160 | static void __init MP_translation_info(struct mpc_config_translation *m) | ||
161 | { | ||
162 | printk(KERN_INFO | ||
163 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
164 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
165 | m->trans_local); | ||
166 | |||
167 | if (mpc_record >= MAX_MPC_ENTRY) | ||
168 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
169 | else | ||
170 | translation_table[mpc_record] = m; /* stash this for later */ | ||
171 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
172 | node_set_online(m->trans_quad); | ||
173 | } | ||
174 | |||
175 | static int __init mpf_checksum(unsigned char *mp, int len) | ||
176 | { | ||
177 | int sum = 0; | ||
178 | |||
179 | while (len--) | ||
180 | sum += *mp++; | ||
181 | |||
182 | return sum & 0xFF; | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * Read/parse the MPC oem tables | ||
187 | */ | ||
188 | |||
189 | static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable, | ||
190 | unsigned short oemsize) | ||
191 | { | ||
192 | int count = sizeof(*oemtable); /* the header size */ | ||
193 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
194 | |||
195 | mpc_record = 0; | ||
196 | printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n", | ||
197 | oemtable); | ||
198 | if (memcmp(oemtable->oem_signature, MPC_OEM_SIGNATURE, 4)) { | ||
199 | printk(KERN_WARNING | ||
200 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
201 | oemtable->oem_signature[0], oemtable->oem_signature[1], | ||
202 | oemtable->oem_signature[2], oemtable->oem_signature[3]); | ||
203 | return; | ||
204 | } | ||
205 | if (mpf_checksum((unsigned char *)oemtable, oemtable->oem_length)) { | ||
206 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
207 | return; | ||
208 | } | ||
209 | while (count < oemtable->oem_length) { | ||
210 | switch (*oemptr) { | ||
211 | case MP_TRANSLATION: | ||
212 | { | ||
213 | struct mpc_config_translation *m = | ||
214 | (struct mpc_config_translation *)oemptr; | ||
215 | MP_translation_info(m); | ||
216 | oemptr += sizeof(*m); | ||
217 | count += sizeof(*m); | ||
218 | ++mpc_record; | ||
219 | break; | ||
220 | } | ||
221 | default: | ||
222 | { | ||
223 | printk(KERN_WARNING | ||
224 | "Unrecognised OEM table entry type! - %d\n", | ||
225 | (int)*oemptr); | ||
226 | return; | ||
227 | } | ||
228 | } | ||
229 | } | ||
230 | } | ||
231 | |||
232 | static struct x86_quirks numaq_x86_quirks __initdata = { | ||
233 | .arch_pre_time_init = numaq_pre_time_init, | ||
234 | .arch_time_init = NULL, | ||
235 | .arch_pre_intr_init = NULL, | ||
236 | .arch_memory_setup = NULL, | ||
237 | .arch_intr_init = NULL, | ||
238 | .arch_trap_init = NULL, | ||
239 | .mach_get_smp_config = NULL, | ||
240 | .mach_find_smp_config = NULL, | ||
241 | .mpc_record = &mpc_record, | ||
242 | .mpc_apic_id = mpc_apic_id, | ||
243 | .mpc_oem_bus_info = mpc_oem_bus_info, | ||
244 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | ||
245 | .smp_read_mpc_oem = smp_read_mpc_oem, | ||
246 | }; | ||
247 | |||
248 | void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, | ||
249 | char *productid) | ||
250 | { | ||
251 | if (strncmp(oem, "IBM NUMA", 8)) | ||
252 | printk("Warning! Not a NUMA-Q system!\n"); | ||
253 | else | ||
254 | found_numaq = 1; | ||
255 | } | ||
256 | |||
74 | static __init void early_check_numaq(void) | 257 | static __init void early_check_numaq(void) |
75 | { | 258 | { |
76 | /* | 259 | /* |
@@ -82,6 +265,9 @@ static __init void early_check_numaq(void) | |||
82 | */ | 265 | */ |
83 | if (smp_found_config) | 266 | if (smp_found_config) |
84 | early_get_smp_config(); | 267 | early_get_smp_config(); |
268 | |||
269 | if (found_numaq) | ||
270 | x86_quirks = &numaq_x86_quirks; | ||
85 | } | 271 | } |
86 | 272 | ||
87 | int __init get_memcfg_numaq(void) | 273 | int __init get_memcfg_numaq(void) |
@@ -92,14 +278,3 @@ int __init get_memcfg_numaq(void) | |||
92 | smp_dump_qct(); | 278 | smp_dump_qct(); |
93 | return 1; | 279 | return 1; |
94 | } | 280 | } |
95 | |||
96 | void __init numaq_tsc_disable(void) | ||
97 | { | ||
98 | if (!found_numaq) | ||
99 | return; | ||
100 | |||
101 | if (num_online_nodes() > 1) { | ||
102 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
103 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
104 | } | ||
105 | } | ||
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index e0f571d58c19..300da17e61cb 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <asm/desc.h> | 29 | #include <asm/desc.h> |
30 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
31 | #include <asm/arch_hooks.h> | 31 | #include <asm/arch_hooks.h> |
32 | #include <asm/pgtable.h> | ||
32 | #include <asm/time.h> | 33 | #include <asm/time.h> |
33 | #include <asm/pgalloc.h> | 34 | #include <asm/pgalloc.h> |
34 | #include <asm/irq.h> | 35 | #include <asm/irq.h> |
@@ -123,6 +124,7 @@ static void *get_call_destination(u8 type) | |||
123 | .pv_irq_ops = pv_irq_ops, | 124 | .pv_irq_ops = pv_irq_ops, |
124 | .pv_apic_ops = pv_apic_ops, | 125 | .pv_apic_ops = pv_apic_ops, |
125 | .pv_mmu_ops = pv_mmu_ops, | 126 | .pv_mmu_ops = pv_mmu_ops, |
127 | .pv_lock_ops = pv_lock_ops, | ||
126 | }; | 128 | }; |
127 | return *((void **)&tmpl + type); | 129 | return *((void **)&tmpl + type); |
128 | } | 130 | } |
@@ -266,6 +268,17 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) | |||
266 | return __get_cpu_var(paravirt_lazy_mode); | 268 | return __get_cpu_var(paravirt_lazy_mode); |
267 | } | 269 | } |
268 | 270 | ||
271 | void __init paravirt_use_bytelocks(void) | ||
272 | { | ||
273 | #ifdef CONFIG_SMP | ||
274 | pv_lock_ops.spin_is_locked = __byte_spin_is_locked; | ||
275 | pv_lock_ops.spin_is_contended = __byte_spin_is_contended; | ||
276 | pv_lock_ops.spin_lock = __byte_spin_lock; | ||
277 | pv_lock_ops.spin_trylock = __byte_spin_trylock; | ||
278 | pv_lock_ops.spin_unlock = __byte_spin_unlock; | ||
279 | #endif | ||
280 | } | ||
281 | |||
269 | struct pv_info pv_info = { | 282 | struct pv_info pv_info = { |
270 | .name = "bare hardware", | 283 | .name = "bare hardware", |
271 | .paravirt_enabled = 0, | 284 | .paravirt_enabled = 0, |
@@ -361,7 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
361 | struct pv_apic_ops pv_apic_ops = { | 374 | struct pv_apic_ops pv_apic_ops = { |
362 | #ifdef CONFIG_X86_LOCAL_APIC | 375 | #ifdef CONFIG_X86_LOCAL_APIC |
363 | .apic_write = native_apic_write, | 376 | .apic_write = native_apic_write, |
364 | .apic_write_atomic = native_apic_write_atomic, | ||
365 | .apic_read = native_apic_read, | 377 | .apic_read = native_apic_read, |
366 | .setup_boot_clock = setup_boot_APIC_clock, | 378 | .setup_boot_clock = setup_boot_APIC_clock, |
367 | .setup_secondary_clock = setup_secondary_APIC_clock, | 379 | .setup_secondary_clock = setup_secondary_APIC_clock, |
@@ -373,6 +385,9 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
373 | #ifndef CONFIG_X86_64 | 385 | #ifndef CONFIG_X86_64 |
374 | .pagetable_setup_start = native_pagetable_setup_start, | 386 | .pagetable_setup_start = native_pagetable_setup_start, |
375 | .pagetable_setup_done = native_pagetable_setup_done, | 387 | .pagetable_setup_done = native_pagetable_setup_done, |
388 | #else | ||
389 | .pagetable_setup_start = paravirt_nop, | ||
390 | .pagetable_setup_done = paravirt_nop, | ||
376 | #endif | 391 | #endif |
377 | 392 | ||
378 | .read_cr2 = native_read_cr2, | 393 | .read_cr2 = native_read_cr2, |
@@ -428,7 +443,7 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
428 | #endif /* PAGETABLE_LEVELS >= 3 */ | 443 | #endif /* PAGETABLE_LEVELS >= 3 */ |
429 | 444 | ||
430 | .pte_val = native_pte_val, | 445 | .pte_val = native_pte_val, |
431 | .pte_flags = native_pte_val, | 446 | .pte_flags = native_pte_flags, |
432 | .pgd_val = native_pgd_val, | 447 | .pgd_val = native_pgd_val, |
433 | 448 | ||
434 | .make_pte = native_make_pte, | 449 | .make_pte = native_make_pte, |
@@ -446,6 +461,18 @@ struct pv_mmu_ops pv_mmu_ops = { | |||
446 | .set_fixmap = native_set_fixmap, | 461 | .set_fixmap = native_set_fixmap, |
447 | }; | 462 | }; |
448 | 463 | ||
464 | struct pv_lock_ops pv_lock_ops = { | ||
465 | #ifdef CONFIG_SMP | ||
466 | .spin_is_locked = __ticket_spin_is_locked, | ||
467 | .spin_is_contended = __ticket_spin_is_contended, | ||
468 | |||
469 | .spin_lock = __ticket_spin_lock, | ||
470 | .spin_trylock = __ticket_spin_trylock, | ||
471 | .spin_unlock = __ticket_spin_unlock, | ||
472 | #endif | ||
473 | }; | ||
474 | EXPORT_SYMBOL(pv_lock_ops); | ||
475 | |||
449 | EXPORT_SYMBOL_GPL(pv_time_ops); | 476 | EXPORT_SYMBOL_GPL(pv_time_ops); |
450 | EXPORT_SYMBOL (pv_cpu_ops); | 477 | EXPORT_SYMBOL (pv_cpu_ops); |
451 | EXPORT_SYMBOL (pv_mmu_ops); | 478 | EXPORT_SYMBOL (pv_mmu_ops); |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 6959b5c45df4..dcdac6c826e9 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/mm.h> | 29 | #include <linux/mm.h> |
30 | #include <linux/spinlock.h> | 30 | #include <linux/spinlock.h> |
31 | #include <linux/string.h> | 31 | #include <linux/string.h> |
32 | #include <linux/crash_dump.h> | ||
32 | #include <linux/dma-mapping.h> | 33 | #include <linux/dma-mapping.h> |
33 | #include <linux/bitops.h> | 34 | #include <linux/bitops.h> |
34 | #include <linux/pci_ids.h> | 35 | #include <linux/pci_ids.h> |
@@ -36,7 +37,8 @@ | |||
36 | #include <linux/delay.h> | 37 | #include <linux/delay.h> |
37 | #include <linux/scatterlist.h> | 38 | #include <linux/scatterlist.h> |
38 | #include <linux/iommu-helper.h> | 39 | #include <linux/iommu-helper.h> |
39 | #include <asm/gart.h> | 40 | |
41 | #include <asm/iommu.h> | ||
40 | #include <asm/calgary.h> | 42 | #include <asm/calgary.h> |
41 | #include <asm/tce.h> | 43 | #include <asm/tce.h> |
42 | #include <asm/pci-direct.h> | 44 | #include <asm/pci-direct.h> |
@@ -167,6 +169,8 @@ static void calgary_dump_error_regs(struct iommu_table *tbl); | |||
167 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); | 169 | static void calioc2_handle_quirks(struct iommu_table *tbl, struct pci_dev *dev); |
168 | static void calioc2_tce_cache_blast(struct iommu_table *tbl); | 170 | static void calioc2_tce_cache_blast(struct iommu_table *tbl); |
169 | static void calioc2_dump_error_regs(struct iommu_table *tbl); | 171 | static void calioc2_dump_error_regs(struct iommu_table *tbl); |
172 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl); | ||
173 | static void get_tce_space_from_tar(void); | ||
170 | 174 | ||
171 | static struct cal_chipset_ops calgary_chip_ops = { | 175 | static struct cal_chipset_ops calgary_chip_ops = { |
172 | .handle_quirks = calgary_handle_quirks, | 176 | .handle_quirks = calgary_handle_quirks, |
@@ -339,9 +343,8 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, | |||
339 | /* were we called with bad_dma_address? */ | 343 | /* were we called with bad_dma_address? */ |
340 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); | 344 | badend = bad_dma_address + (EMERGENCY_PAGES * PAGE_SIZE); |
341 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { | 345 | if (unlikely((dma_addr >= bad_dma_address) && (dma_addr < badend))) { |
342 | printk(KERN_ERR "Calgary: driver tried unmapping bad DMA " | 346 | WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " |
343 | "address 0x%Lx\n", dma_addr); | 347 | "address 0x%Lx\n", dma_addr); |
344 | WARN_ON(1); | ||
345 | return; | 348 | return; |
346 | } | 349 | } |
347 | 350 | ||
@@ -410,22 +413,6 @@ static void calgary_unmap_sg(struct device *dev, | |||
410 | } | 413 | } |
411 | } | 414 | } |
412 | 415 | ||
413 | static int calgary_nontranslate_map_sg(struct device* dev, | ||
414 | struct scatterlist *sg, int nelems, int direction) | ||
415 | { | ||
416 | struct scatterlist *s; | ||
417 | int i; | ||
418 | |||
419 | for_each_sg(sg, s, nelems, i) { | ||
420 | struct page *p = sg_page(s); | ||
421 | |||
422 | BUG_ON(!p); | ||
423 | s->dma_address = virt_to_bus(sg_virt(s)); | ||
424 | s->dma_length = s->length; | ||
425 | } | ||
426 | return nelems; | ||
427 | } | ||
428 | |||
429 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | 416 | static int calgary_map_sg(struct device *dev, struct scatterlist *sg, |
430 | int nelems, int direction) | 417 | int nelems, int direction) |
431 | { | 418 | { |
@@ -436,9 +423,6 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, | |||
436 | unsigned long entry; | 423 | unsigned long entry; |
437 | int i; | 424 | int i; |
438 | 425 | ||
439 | if (!translation_enabled(tbl)) | ||
440 | return calgary_nontranslate_map_sg(dev, sg, nelems, direction); | ||
441 | |||
442 | for_each_sg(sg, s, nelems, i) { | 426 | for_each_sg(sg, s, nelems, i) { |
443 | BUG_ON(!sg_page(s)); | 427 | BUG_ON(!sg_page(s)); |
444 | 428 | ||
@@ -474,7 +458,6 @@ error: | |||
474 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | 458 | static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, |
475 | size_t size, int direction) | 459 | size_t size, int direction) |
476 | { | 460 | { |
477 | dma_addr_t dma_handle = bad_dma_address; | ||
478 | void *vaddr = phys_to_virt(paddr); | 461 | void *vaddr = phys_to_virt(paddr); |
479 | unsigned long uaddr; | 462 | unsigned long uaddr; |
480 | unsigned int npages; | 463 | unsigned int npages; |
@@ -483,12 +466,7 @@ static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, | |||
483 | uaddr = (unsigned long)vaddr; | 466 | uaddr = (unsigned long)vaddr; |
484 | npages = num_dma_pages(uaddr, size); | 467 | npages = num_dma_pages(uaddr, size); |
485 | 468 | ||
486 | if (translation_enabled(tbl)) | 469 | return iommu_alloc(dev, tbl, vaddr, npages, direction); |
487 | dma_handle = iommu_alloc(dev, tbl, vaddr, npages, direction); | ||
488 | else | ||
489 | dma_handle = virt_to_bus(vaddr); | ||
490 | |||
491 | return dma_handle; | ||
492 | } | 470 | } |
493 | 471 | ||
494 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | 472 | static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, |
@@ -497,9 +475,6 @@ static void calgary_unmap_single(struct device *dev, dma_addr_t dma_handle, | |||
497 | struct iommu_table *tbl = find_iommu_table(dev); | 475 | struct iommu_table *tbl = find_iommu_table(dev); |
498 | unsigned int npages; | 476 | unsigned int npages; |
499 | 477 | ||
500 | if (!translation_enabled(tbl)) | ||
501 | return; | ||
502 | |||
503 | npages = num_dma_pages(dma_handle, size); | 478 | npages = num_dma_pages(dma_handle, size); |
504 | iommu_free(tbl, dma_handle, npages); | 479 | iommu_free(tbl, dma_handle, npages); |
505 | } | 480 | } |
@@ -522,18 +497,12 @@ static void* calgary_alloc_coherent(struct device *dev, size_t size, | |||
522 | goto error; | 497 | goto error; |
523 | memset(ret, 0, size); | 498 | memset(ret, 0, size); |
524 | 499 | ||
525 | if (translation_enabled(tbl)) { | 500 | /* set up tces to cover the allocated range */ |
526 | /* set up tces to cover the allocated range */ | 501 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); |
527 | mapping = iommu_alloc(dev, tbl, ret, npages, DMA_BIDIRECTIONAL); | 502 | if (mapping == bad_dma_address) |
528 | if (mapping == bad_dma_address) | 503 | goto free; |
529 | goto free; | 504 | *dma_handle = mapping; |
530 | |||
531 | *dma_handle = mapping; | ||
532 | } else /* non translated slot */ | ||
533 | *dma_handle = virt_to_bus(ret); | ||
534 | |||
535 | return ret; | 505 | return ret; |
536 | |||
537 | free: | 506 | free: |
538 | free_pages((unsigned long)ret, get_order(size)); | 507 | free_pages((unsigned long)ret, get_order(size)); |
539 | ret = NULL; | 508 | ret = NULL; |
@@ -541,7 +510,7 @@ error: | |||
541 | return ret; | 510 | return ret; |
542 | } | 511 | } |
543 | 512 | ||
544 | static const struct dma_mapping_ops calgary_dma_ops = { | 513 | static struct dma_mapping_ops calgary_dma_ops = { |
545 | .alloc_coherent = calgary_alloc_coherent, | 514 | .alloc_coherent = calgary_alloc_coherent, |
546 | .map_single = calgary_map_single, | 515 | .map_single = calgary_map_single, |
547 | .unmap_single = calgary_unmap_single, | 516 | .unmap_single = calgary_unmap_single, |
@@ -830,7 +799,11 @@ static int __init calgary_setup_tar(struct pci_dev *dev, void __iomem *bbar) | |||
830 | 799 | ||
831 | tbl = pci_iommu(dev->bus); | 800 | tbl = pci_iommu(dev->bus); |
832 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; | 801 | tbl->it_base = (unsigned long)bus_info[dev->bus->number].tce_space; |
833 | tce_free(tbl, 0, tbl->it_size); | 802 | |
803 | if (is_kdump_kernel()) | ||
804 | calgary_init_bitmap_from_tce_table(tbl); | ||
805 | else | ||
806 | tce_free(tbl, 0, tbl->it_size); | ||
834 | 807 | ||
835 | if (is_calgary(dev->device)) | 808 | if (is_calgary(dev->device)) |
836 | tbl->chip_ops = &calgary_chip_ops; | 809 | tbl->chip_ops = &calgary_chip_ops; |
@@ -1209,6 +1182,10 @@ static int __init calgary_init(void) | |||
1209 | if (ret) | 1182 | if (ret) |
1210 | return ret; | 1183 | return ret; |
1211 | 1184 | ||
1185 | /* Purely for kdump kernel case */ | ||
1186 | if (is_kdump_kernel()) | ||
1187 | get_tce_space_from_tar(); | ||
1188 | |||
1212 | do { | 1189 | do { |
1213 | dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); | 1190 | dev = pci_get_device(PCI_VENDOR_ID_IBM, PCI_ANY_ID, dev); |
1214 | if (!dev) | 1191 | if (!dev) |
@@ -1230,6 +1207,16 @@ static int __init calgary_init(void) | |||
1230 | goto error; | 1207 | goto error; |
1231 | } while (1); | 1208 | } while (1); |
1232 | 1209 | ||
1210 | dev = NULL; | ||
1211 | for_each_pci_dev(dev) { | ||
1212 | struct iommu_table *tbl; | ||
1213 | |||
1214 | tbl = find_iommu_table(&dev->dev); | ||
1215 | |||
1216 | if (translation_enabled(tbl)) | ||
1217 | dev->dev.archdata.dma_ops = &calgary_dma_ops; | ||
1218 | } | ||
1219 | |||
1233 | return ret; | 1220 | return ret; |
1234 | 1221 | ||
1235 | error: | 1222 | error: |
@@ -1251,6 +1238,7 @@ error: | |||
1251 | calgary_disable_translation(dev); | 1238 | calgary_disable_translation(dev); |
1252 | calgary_free_bus(dev); | 1239 | calgary_free_bus(dev); |
1253 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ | 1240 | pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ |
1241 | dev->dev.archdata.dma_ops = NULL; | ||
1254 | } while (1); | 1242 | } while (1); |
1255 | 1243 | ||
1256 | return ret; | 1244 | return ret; |
@@ -1280,13 +1268,15 @@ static inline int __init determine_tce_table_size(u64 ram) | |||
1280 | static int __init build_detail_arrays(void) | 1268 | static int __init build_detail_arrays(void) |
1281 | { | 1269 | { |
1282 | unsigned long ptr; | 1270 | unsigned long ptr; |
1283 | int i, scal_detail_size, rio_detail_size; | 1271 | unsigned numnodes, i; |
1272 | int scal_detail_size, rio_detail_size; | ||
1284 | 1273 | ||
1285 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES){ | 1274 | numnodes = rio_table_hdr->num_scal_dev; |
1275 | if (numnodes > MAX_NUMNODES){ | ||
1286 | printk(KERN_WARNING | 1276 | printk(KERN_WARNING |
1287 | "Calgary: MAX_NUMNODES too low! Defined as %d, " | 1277 | "Calgary: MAX_NUMNODES too low! Defined as %d, " |
1288 | "but system has %d nodes.\n", | 1278 | "but system has %d nodes.\n", |
1289 | MAX_NUMNODES, rio_table_hdr->num_scal_dev); | 1279 | MAX_NUMNODES, numnodes); |
1290 | return -ENODEV; | 1280 | return -ENODEV; |
1291 | } | 1281 | } |
1292 | 1282 | ||
@@ -1307,8 +1297,7 @@ static int __init build_detail_arrays(void) | |||
1307 | } | 1297 | } |
1308 | 1298 | ||
1309 | ptr = ((unsigned long)rio_table_hdr) + 3; | 1299 | ptr = ((unsigned long)rio_table_hdr) + 3; |
1310 | for (i = 0; i < rio_table_hdr->num_scal_dev; | 1300 | for (i = 0; i < numnodes; i++, ptr += scal_detail_size) |
1311 | i++, ptr += scal_detail_size) | ||
1312 | scal_devs[i] = (struct scal_detail *)ptr; | 1301 | scal_devs[i] = (struct scal_detail *)ptr; |
1313 | 1302 | ||
1314 | for (i = 0; i < rio_table_hdr->num_rio_dev; | 1303 | for (i = 0; i < rio_table_hdr->num_rio_dev; |
@@ -1339,6 +1328,61 @@ static int __init calgary_bus_has_devices(int bus, unsigned short pci_dev) | |||
1339 | return (val != 0xffffffff); | 1328 | return (val != 0xffffffff); |
1340 | } | 1329 | } |
1341 | 1330 | ||
1331 | /* | ||
1332 | * calgary_init_bitmap_from_tce_table(): | ||
1333 | * Funtion for kdump case. In the second/kdump kernel initialize | ||
1334 | * the bitmap based on the tce table entries obtained from first kernel | ||
1335 | */ | ||
1336 | static void calgary_init_bitmap_from_tce_table(struct iommu_table *tbl) | ||
1337 | { | ||
1338 | u64 *tp; | ||
1339 | unsigned int index; | ||
1340 | tp = ((u64 *)tbl->it_base); | ||
1341 | for (index = 0 ; index < tbl->it_size; index++) { | ||
1342 | if (*tp != 0x0) | ||
1343 | set_bit(index, tbl->it_map); | ||
1344 | tp++; | ||
1345 | } | ||
1346 | } | ||
1347 | |||
1348 | /* | ||
1349 | * get_tce_space_from_tar(): | ||
1350 | * Function for kdump case. Get the tce tables from first kernel | ||
1351 | * by reading the contents of the base adress register of calgary iommu | ||
1352 | */ | ||
1353 | static void __init get_tce_space_from_tar(void) | ||
1354 | { | ||
1355 | int bus; | ||
1356 | void __iomem *target; | ||
1357 | unsigned long tce_space; | ||
1358 | |||
1359 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | ||
1360 | struct calgary_bus_info *info = &bus_info[bus]; | ||
1361 | unsigned short pci_device; | ||
1362 | u32 val; | ||
1363 | |||
1364 | val = read_pci_config(bus, 0, 0, 0); | ||
1365 | pci_device = (val & 0xFFFF0000) >> 16; | ||
1366 | |||
1367 | if (!is_cal_pci_dev(pci_device)) | ||
1368 | continue; | ||
1369 | if (info->translation_disabled) | ||
1370 | continue; | ||
1371 | |||
1372 | if (calgary_bus_has_devices(bus, pci_device) || | ||
1373 | translate_empty_slots) { | ||
1374 | target = calgary_reg(bus_info[bus].bbar, | ||
1375 | tar_offset(bus)); | ||
1376 | tce_space = be64_to_cpu(readq(target)); | ||
1377 | tce_space = tce_space & TAR_SW_BITS; | ||
1378 | |||
1379 | tce_space = tce_space & (~specified_table_size); | ||
1380 | info->tce_space = (u64 *)__va(tce_space); | ||
1381 | } | ||
1382 | } | ||
1383 | return; | ||
1384 | } | ||
1385 | |||
1342 | void __init detect_calgary(void) | 1386 | void __init detect_calgary(void) |
1343 | { | 1387 | { |
1344 | int bus; | 1388 | int bus; |
@@ -1394,7 +1438,8 @@ void __init detect_calgary(void) | |||
1394 | return; | 1438 | return; |
1395 | } | 1439 | } |
1396 | 1440 | ||
1397 | specified_table_size = determine_tce_table_size(max_pfn * PAGE_SIZE); | 1441 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? |
1442 | saved_max_pfn : max_pfn) * PAGE_SIZE); | ||
1398 | 1443 | ||
1399 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | 1444 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { |
1400 | struct calgary_bus_info *info = &bus_info[bus]; | 1445 | struct calgary_bus_info *info = &bus_info[bus]; |
@@ -1412,10 +1457,16 @@ void __init detect_calgary(void) | |||
1412 | 1457 | ||
1413 | if (calgary_bus_has_devices(bus, pci_device) || | 1458 | if (calgary_bus_has_devices(bus, pci_device) || |
1414 | translate_empty_slots) { | 1459 | translate_empty_slots) { |
1415 | tbl = alloc_tce_table(); | 1460 | /* |
1416 | if (!tbl) | 1461 | * If it is kdump kernel, find and use tce tables |
1417 | goto cleanup; | 1462 | * from first kernel, else allocate tce tables here |
1418 | info->tce_space = tbl; | 1463 | */ |
1464 | if (!is_kdump_kernel()) { | ||
1465 | tbl = alloc_tce_table(); | ||
1466 | if (!tbl) | ||
1467 | goto cleanup; | ||
1468 | info->tce_space = tbl; | ||
1469 | } | ||
1419 | calgary_found = 1; | 1470 | calgary_found = 1; |
1420 | } | 1471 | } |
1421 | } | 1472 | } |
@@ -1430,6 +1481,10 @@ void __init detect_calgary(void) | |||
1430 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " | 1481 | printk(KERN_INFO "PCI-DMA: Calgary TCE table spec is %d, " |
1431 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, | 1482 | "CONFIG_IOMMU_DEBUG is %s.\n", specified_table_size, |
1432 | debugging ? "enabled" : "disabled"); | 1483 | debugging ? "enabled" : "disabled"); |
1484 | |||
1485 | /* swiotlb for devices that aren't behind the Calgary. */ | ||
1486 | if (max_pfn > MAX_DMA32_PFN) | ||
1487 | swiotlb = 1; | ||
1433 | } | 1488 | } |
1434 | return; | 1489 | return; |
1435 | 1490 | ||
@@ -1446,7 +1501,7 @@ int __init calgary_iommu_init(void) | |||
1446 | { | 1501 | { |
1447 | int ret; | 1502 | int ret; |
1448 | 1503 | ||
1449 | if (no_iommu || swiotlb) | 1504 | if (no_iommu || (swiotlb && !calgary_detected)) |
1450 | return -ENODEV; | 1505 | return -ENODEV; |
1451 | 1506 | ||
1452 | if (!calgary_detected) | 1507 | if (!calgary_detected) |
@@ -1459,15 +1514,14 @@ int __init calgary_iommu_init(void) | |||
1459 | if (ret) { | 1514 | if (ret) { |
1460 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " | 1515 | printk(KERN_ERR "PCI-DMA: Calgary init failed %d, " |
1461 | "falling back to no_iommu\n", ret); | 1516 | "falling back to no_iommu\n", ret); |
1462 | if (max_pfn > MAX_DMA32_PFN) | ||
1463 | printk(KERN_ERR "WARNING more than 4GB of memory, " | ||
1464 | "32bit PCI may malfunction.\n"); | ||
1465 | return ret; | 1517 | return ret; |
1466 | } | 1518 | } |
1467 | 1519 | ||
1468 | force_iommu = 1; | 1520 | force_iommu = 1; |
1469 | bad_dma_address = 0x0; | 1521 | bad_dma_address = 0x0; |
1470 | dma_ops = &calgary_dma_ops; | 1522 | /* dma_ops is set to swiotlb or nommu */ |
1523 | if (!dma_ops) | ||
1524 | dma_ops = &nommu_dma_ops; | ||
1471 | 1525 | ||
1472 | return 0; | 1526 | return 0; |
1473 | } | 1527 | } |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 8467ec2320f1..87d4d6964ec2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -5,14 +5,13 @@ | |||
5 | 5 | ||
6 | #include <asm/proto.h> | 6 | #include <asm/proto.h> |
7 | #include <asm/dma.h> | 7 | #include <asm/dma.h> |
8 | #include <asm/gart.h> | 8 | #include <asm/iommu.h> |
9 | #include <asm/calgary.h> | 9 | #include <asm/calgary.h> |
10 | #include <asm/amd_iommu.h> | 10 | #include <asm/amd_iommu.h> |
11 | 11 | ||
12 | int forbid_dac __read_mostly; | 12 | static int forbid_dac __read_mostly; |
13 | EXPORT_SYMBOL(forbid_dac); | ||
14 | 13 | ||
15 | const struct dma_mapping_ops *dma_ops; | 14 | struct dma_mapping_ops *dma_ops; |
16 | EXPORT_SYMBOL(dma_ops); | 15 | EXPORT_SYMBOL(dma_ops); |
17 | 16 | ||
18 | static int iommu_sac_force __read_mostly; | 17 | static int iommu_sac_force __read_mostly; |
@@ -114,22 +113,24 @@ void __init pci_iommu_alloc(void) | |||
114 | * The order of these functions is important for | 113 | * The order of these functions is important for |
115 | * fall-back/fail-over reasons | 114 | * fall-back/fail-over reasons |
116 | */ | 115 | */ |
117 | #ifdef CONFIG_GART_IOMMU | ||
118 | gart_iommu_hole_init(); | 116 | gart_iommu_hole_init(); |
119 | #endif | ||
120 | 117 | ||
121 | #ifdef CONFIG_CALGARY_IOMMU | ||
122 | detect_calgary(); | 118 | detect_calgary(); |
123 | #endif | ||
124 | 119 | ||
125 | detect_intel_iommu(); | 120 | detect_intel_iommu(); |
126 | 121 | ||
127 | amd_iommu_detect(); | 122 | amd_iommu_detect(); |
128 | 123 | ||
129 | #ifdef CONFIG_SWIOTLB | ||
130 | pci_swiotlb_init(); | 124 | pci_swiotlb_init(); |
131 | #endif | ||
132 | } | 125 | } |
126 | |||
127 | unsigned long iommu_num_pages(unsigned long addr, unsigned long len) | ||
128 | { | ||
129 | unsigned long size = roundup((addr & ~PAGE_MASK) + len, PAGE_SIZE); | ||
130 | |||
131 | return size >> PAGE_SHIFT; | ||
132 | } | ||
133 | EXPORT_SYMBOL(iommu_num_pages); | ||
133 | #endif | 134 | #endif |
134 | 135 | ||
135 | /* | 136 | /* |
@@ -184,9 +185,7 @@ static __init int iommu_setup(char *p) | |||
184 | swiotlb = 1; | 185 | swiotlb = 1; |
185 | #endif | 186 | #endif |
186 | 187 | ||
187 | #ifdef CONFIG_GART_IOMMU | ||
188 | gart_parse_options(p); | 188 | gart_parse_options(p); |
189 | #endif | ||
190 | 189 | ||
191 | #ifdef CONFIG_CALGARY_IOMMU | 190 | #ifdef CONFIG_CALGARY_IOMMU |
192 | if (!strncmp(p, "calgary", 7)) | 191 | if (!strncmp(p, "calgary", 7)) |
@@ -201,136 +200,19 @@ static __init int iommu_setup(char *p) | |||
201 | } | 200 | } |
202 | early_param("iommu", iommu_setup); | 201 | early_param("iommu", iommu_setup); |
203 | 202 | ||
204 | #ifdef CONFIG_X86_32 | ||
205 | int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, | ||
206 | dma_addr_t device_addr, size_t size, int flags) | ||
207 | { | ||
208 | void __iomem *mem_base = NULL; | ||
209 | int pages = size >> PAGE_SHIFT; | ||
210 | int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); | ||
211 | |||
212 | if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) | ||
213 | goto out; | ||
214 | if (!size) | ||
215 | goto out; | ||
216 | if (dev->dma_mem) | ||
217 | goto out; | ||
218 | |||
219 | /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ | ||
220 | |||
221 | mem_base = ioremap(bus_addr, size); | ||
222 | if (!mem_base) | ||
223 | goto out; | ||
224 | |||
225 | dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); | ||
226 | if (!dev->dma_mem) | ||
227 | goto out; | ||
228 | dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); | ||
229 | if (!dev->dma_mem->bitmap) | ||
230 | goto free1_out; | ||
231 | |||
232 | dev->dma_mem->virt_base = mem_base; | ||
233 | dev->dma_mem->device_base = device_addr; | ||
234 | dev->dma_mem->size = pages; | ||
235 | dev->dma_mem->flags = flags; | ||
236 | |||
237 | if (flags & DMA_MEMORY_MAP) | ||
238 | return DMA_MEMORY_MAP; | ||
239 | |||
240 | return DMA_MEMORY_IO; | ||
241 | |||
242 | free1_out: | ||
243 | kfree(dev->dma_mem); | ||
244 | out: | ||
245 | if (mem_base) | ||
246 | iounmap(mem_base); | ||
247 | return 0; | ||
248 | } | ||
249 | EXPORT_SYMBOL(dma_declare_coherent_memory); | ||
250 | |||
251 | void dma_release_declared_memory(struct device *dev) | ||
252 | { | ||
253 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
254 | |||
255 | if (!mem) | ||
256 | return; | ||
257 | dev->dma_mem = NULL; | ||
258 | iounmap(mem->virt_base); | ||
259 | kfree(mem->bitmap); | ||
260 | kfree(mem); | ||
261 | } | ||
262 | EXPORT_SYMBOL(dma_release_declared_memory); | ||
263 | |||
264 | void *dma_mark_declared_memory_occupied(struct device *dev, | ||
265 | dma_addr_t device_addr, size_t size) | ||
266 | { | ||
267 | struct dma_coherent_mem *mem = dev->dma_mem; | ||
268 | int pos, err; | ||
269 | int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); | ||
270 | |||
271 | pages >>= PAGE_SHIFT; | ||
272 | |||
273 | if (!mem) | ||
274 | return ERR_PTR(-EINVAL); | ||
275 | |||
276 | pos = (device_addr - mem->device_base) >> PAGE_SHIFT; | ||
277 | err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); | ||
278 | if (err != 0) | ||
279 | return ERR_PTR(err); | ||
280 | return mem->virt_base + (pos << PAGE_SHIFT); | ||
281 | } | ||
282 | EXPORT_SYMBOL(dma_mark_declared_memory_occupied); | ||
283 | |||
284 | static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, | ||
285 | dma_addr_t *dma_handle, void **ret) | ||
286 | { | ||
287 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
288 | int order = get_order(size); | ||
289 | |||
290 | if (mem) { | ||
291 | int page = bitmap_find_free_region(mem->bitmap, mem->size, | ||
292 | order); | ||
293 | if (page >= 0) { | ||
294 | *dma_handle = mem->device_base + (page << PAGE_SHIFT); | ||
295 | *ret = mem->virt_base + (page << PAGE_SHIFT); | ||
296 | memset(*ret, 0, size); | ||
297 | } | ||
298 | if (mem->flags & DMA_MEMORY_EXCLUSIVE) | ||
299 | *ret = NULL; | ||
300 | } | ||
301 | return (mem != NULL); | ||
302 | } | ||
303 | |||
304 | static int dma_release_coherent(struct device *dev, int order, void *vaddr) | ||
305 | { | ||
306 | struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; | ||
307 | |||
308 | if (mem && vaddr >= mem->virt_base && vaddr < | ||
309 | (mem->virt_base + (mem->size << PAGE_SHIFT))) { | ||
310 | int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; | ||
311 | |||
312 | bitmap_release_region(mem->bitmap, page, order); | ||
313 | return 1; | ||
314 | } | ||
315 | return 0; | ||
316 | } | ||
317 | #else | ||
318 | #define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) | ||
319 | #define dma_release_coherent(dev, order, vaddr) (0) | ||
320 | #endif /* CONFIG_X86_32 */ | ||
321 | |||
322 | int dma_supported(struct device *dev, u64 mask) | 203 | int dma_supported(struct device *dev, u64 mask) |
323 | { | 204 | { |
205 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
206 | |||
324 | #ifdef CONFIG_PCI | 207 | #ifdef CONFIG_PCI |
325 | if (mask > 0xffffffff && forbid_dac > 0) { | 208 | if (mask > 0xffffffff && forbid_dac > 0) { |
326 | printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", | 209 | dev_info(dev, "PCI: Disallowing DAC for device\n"); |
327 | dev->bus_id); | ||
328 | return 0; | 210 | return 0; |
329 | } | 211 | } |
330 | #endif | 212 | #endif |
331 | 213 | ||
332 | if (dma_ops->dma_supported) | 214 | if (ops->dma_supported) |
333 | return dma_ops->dma_supported(dev, mask); | 215 | return ops->dma_supported(dev, mask); |
334 | 216 | ||
335 | /* Copied from i386. Doesn't make much sense, because it will | 217 | /* Copied from i386. Doesn't make much sense, because it will |
336 | only work for pci_alloc_coherent. | 218 | only work for pci_alloc_coherent. |
@@ -351,8 +233,7 @@ int dma_supported(struct device *dev, u64 mask) | |||
351 | type. Normally this doesn't make any difference, but gives | 233 | type. Normally this doesn't make any difference, but gives |
352 | more gentle handling of IOMMU overflow. */ | 234 | more gentle handling of IOMMU overflow. */ |
353 | if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { | 235 | if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { |
354 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", | 236 | dev_info(dev, "Force SAC with mask %Lx\n", mask); |
355 | dev->bus_id, mask); | ||
356 | return 0; | 237 | return 0; |
357 | } | 238 | } |
358 | 239 | ||
@@ -378,6 +259,7 @@ void * | |||
378 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | 259 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
379 | gfp_t gfp) | 260 | gfp_t gfp) |
380 | { | 261 | { |
262 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
381 | void *memory = NULL; | 263 | void *memory = NULL; |
382 | struct page *page; | 264 | struct page *page; |
383 | unsigned long dma_mask = 0; | 265 | unsigned long dma_mask = 0; |
@@ -387,7 +269,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
387 | /* ignore region specifiers */ | 269 | /* ignore region specifiers */ |
388 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); | 270 | gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); |
389 | 271 | ||
390 | if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) | 272 | if (dma_alloc_from_coherent(dev, size, dma_handle, &memory)) |
391 | return memory; | 273 | return memory; |
392 | 274 | ||
393 | if (!dev) { | 275 | if (!dev) { |
@@ -446,8 +328,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
446 | /* Let low level make its own zone decisions */ | 328 | /* Let low level make its own zone decisions */ |
447 | gfp &= ~(GFP_DMA32|GFP_DMA); | 329 | gfp &= ~(GFP_DMA32|GFP_DMA); |
448 | 330 | ||
449 | if (dma_ops->alloc_coherent) | 331 | if (ops->alloc_coherent) |
450 | return dma_ops->alloc_coherent(dev, size, | 332 | return ops->alloc_coherent(dev, size, |
451 | dma_handle, gfp); | 333 | dma_handle, gfp); |
452 | return NULL; | 334 | return NULL; |
453 | } | 335 | } |
@@ -459,14 +341,14 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
459 | } | 341 | } |
460 | } | 342 | } |
461 | 343 | ||
462 | if (dma_ops->alloc_coherent) { | 344 | if (ops->alloc_coherent) { |
463 | free_pages((unsigned long)memory, get_order(size)); | 345 | free_pages((unsigned long)memory, get_order(size)); |
464 | gfp &= ~(GFP_DMA|GFP_DMA32); | 346 | gfp &= ~(GFP_DMA|GFP_DMA32); |
465 | return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | 347 | return ops->alloc_coherent(dev, size, dma_handle, gfp); |
466 | } | 348 | } |
467 | 349 | ||
468 | if (dma_ops->map_simple) { | 350 | if (ops->map_simple) { |
469 | *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), | 351 | *dma_handle = ops->map_simple(dev, virt_to_phys(memory), |
470 | size, | 352 | size, |
471 | PCI_DMA_BIDIRECTIONAL); | 353 | PCI_DMA_BIDIRECTIONAL); |
472 | if (*dma_handle != bad_dma_address) | 354 | if (*dma_handle != bad_dma_address) |
@@ -488,29 +370,27 @@ EXPORT_SYMBOL(dma_alloc_coherent); | |||
488 | void dma_free_coherent(struct device *dev, size_t size, | 370 | void dma_free_coherent(struct device *dev, size_t size, |
489 | void *vaddr, dma_addr_t bus) | 371 | void *vaddr, dma_addr_t bus) |
490 | { | 372 | { |
373 | struct dma_mapping_ops *ops = get_dma_ops(dev); | ||
374 | |||
491 | int order = get_order(size); | 375 | int order = get_order(size); |
492 | WARN_ON(irqs_disabled()); /* for portability */ | 376 | WARN_ON(irqs_disabled()); /* for portability */ |
493 | if (dma_release_coherent(dev, order, vaddr)) | 377 | if (dma_release_from_coherent(dev, order, vaddr)) |
494 | return; | 378 | return; |
495 | if (dma_ops->unmap_single) | 379 | if (ops->unmap_single) |
496 | dma_ops->unmap_single(dev, bus, size, 0); | 380 | ops->unmap_single(dev, bus, size, 0); |
497 | free_pages((unsigned long)vaddr, order); | 381 | free_pages((unsigned long)vaddr, order); |
498 | } | 382 | } |
499 | EXPORT_SYMBOL(dma_free_coherent); | 383 | EXPORT_SYMBOL(dma_free_coherent); |
500 | 384 | ||
501 | static int __init pci_iommu_init(void) | 385 | static int __init pci_iommu_init(void) |
502 | { | 386 | { |
503 | #ifdef CONFIG_CALGARY_IOMMU | ||
504 | calgary_iommu_init(); | 387 | calgary_iommu_init(); |
505 | #endif | ||
506 | 388 | ||
507 | intel_iommu_init(); | 389 | intel_iommu_init(); |
508 | 390 | ||
509 | amd_iommu_init(); | 391 | amd_iommu_init(); |
510 | 392 | ||
511 | #ifdef CONFIG_GART_IOMMU | ||
512 | gart_iommu_init(); | 393 | gart_iommu_init(); |
513 | #endif | ||
514 | 394 | ||
515 | no_iommu_init(); | 395 | no_iommu_init(); |
516 | return 0; | 396 | return 0; |
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index c3fe78406d18..49285f8fd4d5 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -32,6 +32,7 @@ | |||
32 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
33 | #include <asm/pgtable.h> | 33 | #include <asm/pgtable.h> |
34 | #include <asm/proto.h> | 34 | #include <asm/proto.h> |
35 | #include <asm/iommu.h> | ||
35 | #include <asm/gart.h> | 36 | #include <asm/gart.h> |
36 | #include <asm/cacheflush.h> | 37 | #include <asm/cacheflush.h> |
37 | #include <asm/swiotlb.h> | 38 | #include <asm/swiotlb.h> |
@@ -66,9 +67,6 @@ static u32 gart_unmapped_entry; | |||
66 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) | 67 | (((x) & 0xfffff000) | (((x) >> 32) << 4) | GPTE_VALID | GPTE_COHERENT) |
67 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) | 68 | #define GPTE_DECODE(x) (((x) & 0xfffff000) | (((u64)(x) & 0xff0) << 28)) |
68 | 69 | ||
69 | #define to_pages(addr, size) \ | ||
70 | (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) | ||
71 | |||
72 | #define EMERGENCY_PAGES 32 /* = 128KB */ | 70 | #define EMERGENCY_PAGES 32 /* = 128KB */ |
73 | 71 | ||
74 | #ifdef CONFIG_AGP | 72 | #ifdef CONFIG_AGP |
@@ -197,9 +195,7 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
197 | * out. Hopefully no network devices use single mappings that big. | 195 | * out. Hopefully no network devices use single mappings that big. |
198 | */ | 196 | */ |
199 | 197 | ||
200 | printk(KERN_ERR | 198 | dev_err(dev, "PCI-DMA: Out of IOMMU space for %lu bytes\n", size); |
201 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | ||
202 | size, dev->bus_id); | ||
203 | 199 | ||
204 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { | 200 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { |
205 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 201 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
@@ -242,7 +238,7 @@ nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | |||
242 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, | 238 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
243 | size_t size, int dir) | 239 | size_t size, int dir) |
244 | { | 240 | { |
245 | unsigned long npages = to_pages(phys_mem, size); | 241 | unsigned long npages = iommu_num_pages(phys_mem, size); |
246 | unsigned long iommu_page = alloc_iommu(dev, npages); | 242 | unsigned long iommu_page = alloc_iommu(dev, npages); |
247 | int i; | 243 | int i; |
248 | 244 | ||
@@ -305,7 +301,7 @@ static void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
305 | return; | 301 | return; |
306 | 302 | ||
307 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; | 303 | iommu_page = (dma_addr - iommu_bus_base)>>PAGE_SHIFT; |
308 | npages = to_pages(dma_addr, size); | 304 | npages = iommu_num_pages(dma_addr, size); |
309 | for (i = 0; i < npages; i++) { | 305 | for (i = 0; i < npages; i++) { |
310 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; | 306 | iommu_gatt_base[iommu_page + i] = gart_unmapped_entry; |
311 | CLEAR_LEAK(iommu_page + i); | 307 | CLEAR_LEAK(iommu_page + i); |
@@ -388,7 +384,7 @@ static int __dma_map_cont(struct device *dev, struct scatterlist *start, | |||
388 | } | 384 | } |
389 | 385 | ||
390 | addr = phys_addr; | 386 | addr = phys_addr; |
391 | pages = to_pages(s->offset, s->length); | 387 | pages = iommu_num_pages(s->offset, s->length); |
392 | while (pages--) { | 388 | while (pages--) { |
393 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); | 389 | iommu_gatt_base[iommu_page] = GPTE_ENCODE(addr); |
394 | SET_LEAK(iommu_page); | 390 | SET_LEAK(iommu_page); |
@@ -471,7 +467,7 @@ gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
471 | 467 | ||
472 | seg_size += s->length; | 468 | seg_size += s->length; |
473 | need = nextneed; | 469 | need = nextneed; |
474 | pages += to_pages(s->offset, s->length); | 470 | pages += iommu_num_pages(s->offset, s->length); |
475 | ps = s; | 471 | ps = s; |
476 | } | 472 | } |
477 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) | 473 | if (dma_map_cont(dev, start_sg, i - start, sgmap, pages, need) < 0) |
@@ -693,8 +689,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
693 | 689 | ||
694 | extern int agp_amd64_init(void); | 690 | extern int agp_amd64_init(void); |
695 | 691 | ||
696 | static const struct dma_mapping_ops gart_dma_ops = { | 692 | static struct dma_mapping_ops gart_dma_ops = { |
697 | .mapping_error = NULL, | ||
698 | .map_single = gart_map_single, | 693 | .map_single = gart_map_single, |
699 | .map_simple = gart_map_simple, | 694 | .map_simple = gart_map_simple, |
700 | .unmap_single = gart_unmap_single, | 695 | .unmap_single = gart_unmap_single, |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index aec43d56f49c..3f91f71cdc3e 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -7,7 +7,7 @@ | |||
7 | #include <linux/dma-mapping.h> | 7 | #include <linux/dma-mapping.h> |
8 | #include <linux/scatterlist.h> | 8 | #include <linux/scatterlist.h> |
9 | 9 | ||
10 | #include <asm/gart.h> | 10 | #include <asm/iommu.h> |
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/dma.h> | 12 | #include <asm/dma.h> |
13 | 13 | ||
@@ -72,21 +72,9 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | |||
72 | return nents; | 72 | return nents; |
73 | } | 73 | } |
74 | 74 | ||
75 | /* Make sure we keep the same behaviour */ | 75 | struct dma_mapping_ops nommu_dma_ops = { |
76 | static int nommu_mapping_error(dma_addr_t dma_addr) | ||
77 | { | ||
78 | #ifdef CONFIG_X86_32 | ||
79 | return 0; | ||
80 | #else | ||
81 | return (dma_addr == bad_dma_address); | ||
82 | #endif | ||
83 | } | ||
84 | |||
85 | |||
86 | const struct dma_mapping_ops nommu_dma_ops = { | ||
87 | .map_single = nommu_map_single, | 76 | .map_single = nommu_map_single, |
88 | .map_sg = nommu_map_sg, | 77 | .map_sg = nommu_map_sg, |
89 | .mapping_error = nommu_mapping_error, | ||
90 | .is_phys = 1, | 78 | .is_phys = 1, |
91 | }; | 79 | }; |
92 | 80 | ||
diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82299cd1d04d..c4ce0332759e 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <linux/module.h> | 5 | #include <linux/module.h> |
6 | #include <linux/dma-mapping.h> | 6 | #include <linux/dma-mapping.h> |
7 | 7 | ||
8 | #include <asm/gart.h> | 8 | #include <asm/iommu.h> |
9 | #include <asm/swiotlb.h> | 9 | #include <asm/swiotlb.h> |
10 | #include <asm/dma.h> | 10 | #include <asm/dma.h> |
11 | 11 | ||
@@ -18,7 +18,7 @@ swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, | |||
18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); | 18 | return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); |
19 | } | 19 | } |
20 | 20 | ||
21 | const struct dma_mapping_ops swiotlb_dma_ops = { | 21 | struct dma_mapping_ops swiotlb_dma_ops = { |
22 | .mapping_error = swiotlb_dma_mapping_error, | 22 | .mapping_error = swiotlb_dma_mapping_error, |
23 | .alloc_coherent = swiotlb_alloc_coherent, | 23 | .alloc_coherent = swiotlb_alloc_coherent, |
24 | .free_coherent = swiotlb_free_coherent, | 24 | .free_coherent = swiotlb_free_coherent, |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 4d629c62f4f8..876e91890777 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -15,6 +15,7 @@ unsigned long idle_nomwait; | |||
15 | EXPORT_SYMBOL(idle_nomwait); | 15 | EXPORT_SYMBOL(idle_nomwait); |
16 | 16 | ||
17 | struct kmem_cache *task_xstate_cachep; | 17 | struct kmem_cache *task_xstate_cachep; |
18 | static int force_mwait __cpuinitdata; | ||
18 | 19 | ||
19 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) | 20 | int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) |
20 | { | 21 | { |
@@ -199,6 +200,7 @@ static void poll_idle(void) | |||
199 | * | 200 | * |
200 | * idle=mwait overrides this decision and forces the usage of mwait. | 201 | * idle=mwait overrides this decision and forces the usage of mwait. |
201 | */ | 202 | */ |
203 | static int __cpuinitdata force_mwait; | ||
202 | 204 | ||
203 | #define MWAIT_INFO 0x05 | 205 | #define MWAIT_INFO 0x05 |
204 | #define MWAIT_ECX_EXTENDED_INFO 0x01 | 206 | #define MWAIT_ECX_EXTENDED_INFO 0x01 |
@@ -244,6 +246,14 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
244 | return 1; | 246 | return 1; |
245 | } | 247 | } |
246 | 248 | ||
249 | static cpumask_t c1e_mask = CPU_MASK_NONE; | ||
250 | static int c1e_detected; | ||
251 | |||
252 | void c1e_remove_cpu(int cpu) | ||
253 | { | ||
254 | cpu_clear(cpu, c1e_mask); | ||
255 | } | ||
256 | |||
247 | /* | 257 | /* |
248 | * C1E aware idle routine. We check for C1E active in the interrupt | 258 | * C1E aware idle routine. We check for C1E active in the interrupt |
249 | * pending message MSR. If we detect C1E, then we handle it the same | 259 | * pending message MSR. If we detect C1E, then we handle it the same |
@@ -251,9 +261,6 @@ static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c) | |||
251 | */ | 261 | */ |
252 | static void c1e_idle(void) | 262 | static void c1e_idle(void) |
253 | { | 263 | { |
254 | static cpumask_t c1e_mask = CPU_MASK_NONE; | ||
255 | static int c1e_detected; | ||
256 | |||
257 | if (need_resched()) | 264 | if (need_resched()) |
258 | return; | 265 | return; |
259 | 266 | ||
@@ -263,8 +270,10 @@ static void c1e_idle(void) | |||
263 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); | 270 | rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi); |
264 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { | 271 | if (lo & K8_INTP_C1E_ACTIVE_MASK) { |
265 | c1e_detected = 1; | 272 | c1e_detected = 1; |
266 | mark_tsc_unstable("TSC halt in C1E"); | 273 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) |
267 | printk(KERN_INFO "System has C1E enabled\n"); | 274 | mark_tsc_unstable("TSC halt in AMD C1E"); |
275 | printk(KERN_INFO "System has AMD C1E enabled\n"); | ||
276 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_AMDC1E); | ||
268 | } | 277 | } |
269 | } | 278 | } |
270 | 279 | ||
@@ -326,6 +335,9 @@ void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
326 | 335 | ||
327 | static int __init idle_setup(char *str) | 336 | static int __init idle_setup(char *str) |
328 | { | 337 | { |
338 | if (!str) | ||
339 | return -EINVAL; | ||
340 | |||
329 | if (!strcmp(str, "poll")) { | 341 | if (!strcmp(str, "poll")) { |
330 | printk("using polling idle threads.\n"); | 342 | printk("using polling idle threads.\n"); |
331 | pm_idle = poll_idle; | 343 | pm_idle = poll_idle; |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 0c3927accb00..31f40b24bf5d 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include <asm/tlbflush.h> | 55 | #include <asm/tlbflush.h> |
56 | #include <asm/cpu.h> | 56 | #include <asm/cpu.h> |
57 | #include <asm/kdebug.h> | 57 | #include <asm/kdebug.h> |
58 | #include <asm/idle.h> | ||
58 | 59 | ||
59 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 60 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
60 | 61 | ||
@@ -88,6 +89,7 @@ static void cpu_exit_clear(void) | |||
88 | cpu_clear(cpu, cpu_callin_map); | 89 | cpu_clear(cpu, cpu_callin_map); |
89 | 90 | ||
90 | numa_remove_cpu(cpu); | 91 | numa_remove_cpu(cpu); |
92 | c1e_remove_cpu(cpu); | ||
91 | } | 93 | } |
92 | 94 | ||
93 | /* We don't actually take CPU down, just spin without interrupts. */ | 95 | /* We don't actually take CPU down, just spin without interrupts. */ |
@@ -95,7 +97,6 @@ static inline void play_dead(void) | |||
95 | { | 97 | { |
96 | /* This must be done before dead CPU ack */ | 98 | /* This must be done before dead CPU ack */ |
97 | cpu_exit_clear(); | 99 | cpu_exit_clear(); |
98 | wbinvd(); | ||
99 | mb(); | 100 | mb(); |
100 | /* Ack it */ | 101 | /* Ack it */ |
101 | __get_cpu_var(cpu_state) = CPU_DEAD; | 102 | __get_cpu_var(cpu_state) = CPU_DEAD; |
@@ -104,8 +105,8 @@ static inline void play_dead(void) | |||
104 | * With physical CPU hotplug, we should halt the cpu | 105 | * With physical CPU hotplug, we should halt the cpu |
105 | */ | 106 | */ |
106 | local_irq_disable(); | 107 | local_irq_disable(); |
107 | while (1) | 108 | /* mask all interrupts, flush any and all caches, and halt */ |
108 | halt(); | 109 | wbinvd_halt(); |
109 | } | 110 | } |
110 | #else | 111 | #else |
111 | static inline void play_dead(void) | 112 | static inline void play_dead(void) |
@@ -128,7 +129,7 @@ void cpu_idle(void) | |||
128 | 129 | ||
129 | /* endless idle loop with no priority at all */ | 130 | /* endless idle loop with no priority at all */ |
130 | while (1) { | 131 | while (1) { |
131 | tick_nohz_stop_sched_tick(); | 132 | tick_nohz_stop_sched_tick(1); |
132 | while (!need_resched()) { | 133 | while (!need_resched()) { |
133 | 134 | ||
134 | check_pgt_cache(); | 135 | check_pgt_cache(); |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a8e53626ac9a..e12e0e4dd256 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -93,14 +93,15 @@ DECLARE_PER_CPU(int, cpu_state); | |||
93 | static inline void play_dead(void) | 93 | static inline void play_dead(void) |
94 | { | 94 | { |
95 | idle_task_exit(); | 95 | idle_task_exit(); |
96 | wbinvd(); | 96 | c1e_remove_cpu(raw_smp_processor_id()); |
97 | |||
97 | mb(); | 98 | mb(); |
98 | /* Ack it */ | 99 | /* Ack it */ |
99 | __get_cpu_var(cpu_state) = CPU_DEAD; | 100 | __get_cpu_var(cpu_state) = CPU_DEAD; |
100 | 101 | ||
101 | local_irq_disable(); | 102 | local_irq_disable(); |
102 | while (1) | 103 | /* mask all interrupts, flush any and all caches, and halt */ |
103 | halt(); | 104 | wbinvd_halt(); |
104 | } | 105 | } |
105 | #else | 106 | #else |
106 | static inline void play_dead(void) | 107 | static inline void play_dead(void) |
@@ -120,7 +121,7 @@ void cpu_idle(void) | |||
120 | current_thread_info()->status |= TS_POLLING; | 121 | current_thread_info()->status |= TS_POLLING; |
121 | /* endless idle loop with no priority at all */ | 122 | /* endless idle loop with no priority at all */ |
122 | while (1) { | 123 | while (1) { |
123 | tick_nohz_stop_sched_tick(); | 124 | tick_nohz_stop_sched_tick(1); |
124 | while (!need_resched()) { | 125 | while (!need_resched()) { |
125 | 126 | ||
126 | rmb(); | 127 | rmb(); |
@@ -537,8 +538,8 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, | |||
537 | struct task_struct * | 538 | struct task_struct * |
538 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 539 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) |
539 | { | 540 | { |
540 | struct thread_struct *prev = &prev_p->thread, | 541 | struct thread_struct *prev = &prev_p->thread; |
541 | *next = &next_p->thread; | 542 | struct thread_struct *next = &next_p->thread; |
542 | int cpu = smp_processor_id(); | 543 | int cpu = smp_processor_id(); |
543 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 544 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
544 | unsigned fsindex, gsindex; | 545 | unsigned fsindex, gsindex; |
@@ -586,35 +587,34 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
586 | 587 | ||
587 | /* | 588 | /* |
588 | * Switch FS and GS. | 589 | * Switch FS and GS. |
590 | * | ||
591 | * Segment register != 0 always requires a reload. Also | ||
592 | * reload when it has changed. When prev process used 64bit | ||
593 | * base always reload to avoid an information leak. | ||
589 | */ | 594 | */ |
590 | { | 595 | if (unlikely(fsindex | next->fsindex | prev->fs)) { |
591 | /* segment register != 0 always requires a reload. | 596 | loadsegment(fs, next->fsindex); |
592 | also reload when it has changed. | 597 | /* |
593 | when prev process used 64bit base always reload | 598 | * Check if the user used a selector != 0; if yes |
594 | to avoid an information leak. */ | 599 | * clear 64bit base, since overloaded base is always |
595 | if (unlikely(fsindex | next->fsindex | prev->fs)) { | 600 | * mapped to the Null selector |
596 | loadsegment(fs, next->fsindex); | 601 | */ |
597 | /* check if the user used a selector != 0 | 602 | if (fsindex) |
598 | * if yes clear 64bit base, since overloaded base | ||
599 | * is always mapped to the Null selector | ||
600 | */ | ||
601 | if (fsindex) | ||
602 | prev->fs = 0; | 603 | prev->fs = 0; |
603 | } | 604 | } |
604 | /* when next process has a 64bit base use it */ | 605 | /* when next process has a 64bit base use it */ |
605 | if (next->fs) | 606 | if (next->fs) |
606 | wrmsrl(MSR_FS_BASE, next->fs); | 607 | wrmsrl(MSR_FS_BASE, next->fs); |
607 | prev->fsindex = fsindex; | 608 | prev->fsindex = fsindex; |
608 | 609 | ||
609 | if (unlikely(gsindex | next->gsindex | prev->gs)) { | 610 | if (unlikely(gsindex | next->gsindex | prev->gs)) { |
610 | load_gs_index(next->gsindex); | 611 | load_gs_index(next->gsindex); |
611 | if (gsindex) | 612 | if (gsindex) |
612 | prev->gs = 0; | 613 | prev->gs = 0; |
613 | } | ||
614 | if (next->gs) | ||
615 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | ||
616 | prev->gsindex = gsindex; | ||
617 | } | 614 | } |
615 | if (next->gs) | ||
616 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | ||
617 | prev->gsindex = gsindex; | ||
618 | 618 | ||
619 | /* Must be after DS reload */ | 619 | /* Must be after DS reload */ |
620 | unlazy_fpu(prev_p); | 620 | unlazy_fpu(prev_p); |
@@ -627,7 +627,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
627 | write_pda(pcurrent, next_p); | 627 | write_pda(pcurrent, next_p); |
628 | 628 | ||
629 | write_pda(kernelstack, | 629 | write_pda(kernelstack, |
630 | (unsigned long)task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); | 630 | (unsigned long)task_stack_page(next_p) + |
631 | THREAD_SIZE - PDA_STACKOFFSET); | ||
631 | #ifdef CONFIG_CC_STACKPROTECTOR | 632 | #ifdef CONFIG_CC_STACKPROTECTOR |
632 | write_pda(stack_canary, next_p->stack_canary); | 633 | write_pda(stack_canary, next_p->stack_canary); |
633 | /* | 634 | /* |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 77040b6070e1..e37dccce85db 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -1357,8 +1357,6 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) | |||
1357 | #endif | 1357 | #endif |
1358 | } | 1358 | } |
1359 | 1359 | ||
1360 | #ifdef CONFIG_X86_32 | ||
1361 | |||
1362 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | 1360 | void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) |
1363 | { | 1361 | { |
1364 | struct siginfo info; | 1362 | struct siginfo info; |
@@ -1377,89 +1375,10 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code) | |||
1377 | force_sig_info(SIGTRAP, &info, tsk); | 1375 | force_sig_info(SIGTRAP, &info, tsk); |
1378 | } | 1376 | } |
1379 | 1377 | ||
1380 | /* notification of system call entry/exit | ||
1381 | * - triggered by current->work.syscall_trace | ||
1382 | */ | ||
1383 | int do_syscall_trace(struct pt_regs *regs, int entryexit) | ||
1384 | { | ||
1385 | int is_sysemu = test_thread_flag(TIF_SYSCALL_EMU); | ||
1386 | /* | ||
1387 | * With TIF_SYSCALL_EMU set we want to ignore TIF_SINGLESTEP for syscall | ||
1388 | * interception | ||
1389 | */ | ||
1390 | int is_singlestep = !is_sysemu && test_thread_flag(TIF_SINGLESTEP); | ||
1391 | int ret = 0; | ||
1392 | |||
1393 | /* do the secure computing check first */ | ||
1394 | if (!entryexit) | ||
1395 | secure_computing(regs->orig_ax); | ||
1396 | |||
1397 | if (unlikely(current->audit_context)) { | ||
1398 | if (entryexit) | ||
1399 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), | ||
1400 | regs->ax); | ||
1401 | /* Debug traps, when using PTRACE_SINGLESTEP, must be sent only | ||
1402 | * on the syscall exit path. Normally, when TIF_SYSCALL_AUDIT is | ||
1403 | * not used, entry.S will call us only on syscall exit, not | ||
1404 | * entry; so when TIF_SYSCALL_AUDIT is used we must avoid | ||
1405 | * calling send_sigtrap() on syscall entry. | ||
1406 | * | ||
1407 | * Note that when PTRACE_SYSEMU_SINGLESTEP is used, | ||
1408 | * is_singlestep is false, despite his name, so we will still do | ||
1409 | * the correct thing. | ||
1410 | */ | ||
1411 | else if (is_singlestep) | ||
1412 | goto out; | ||
1413 | } | ||
1414 | |||
1415 | if (!(current->ptrace & PT_PTRACED)) | ||
1416 | goto out; | ||
1417 | |||
1418 | /* If a process stops on the 1st tracepoint with SYSCALL_TRACE | ||
1419 | * and then is resumed with SYSEMU_SINGLESTEP, it will come in | ||
1420 | * here. We have to check this and return */ | ||
1421 | if (is_sysemu && entryexit) | ||
1422 | return 0; | ||
1423 | |||
1424 | /* Fake a debug trap */ | ||
1425 | if (is_singlestep) | ||
1426 | send_sigtrap(current, regs, 0); | ||
1427 | |||
1428 | if (!test_thread_flag(TIF_SYSCALL_TRACE) && !is_sysemu) | ||
1429 | goto out; | ||
1430 | |||
1431 | /* the 0x80 provides a way for the tracing parent to distinguish | ||
1432 | between a syscall stop and SIGTRAP delivery */ | ||
1433 | /* Note that the debugger could change the result of test_thread_flag!*/ | ||
1434 | ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ? 0x80:0)); | ||
1435 | |||
1436 | /* | ||
1437 | * this isn't the same as continuing with a signal, but it will do | ||
1438 | * for normal use. strace only continues with a signal if the | ||
1439 | * stopping signal is not SIGTRAP. -brl | ||
1440 | */ | ||
1441 | if (current->exit_code) { | ||
1442 | send_sig(current->exit_code, current, 1); | ||
1443 | current->exit_code = 0; | ||
1444 | } | ||
1445 | ret = is_sysemu; | ||
1446 | out: | ||
1447 | if (unlikely(current->audit_context) && !entryexit) | ||
1448 | audit_syscall_entry(AUDIT_ARCH_I386, regs->orig_ax, | ||
1449 | regs->bx, regs->cx, regs->dx, regs->si); | ||
1450 | if (ret == 0) | ||
1451 | return 0; | ||
1452 | |||
1453 | regs->orig_ax = -1; /* force skip of syscall restarting */ | ||
1454 | if (unlikely(current->audit_context)) | ||
1455 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | ||
1456 | return 1; | ||
1457 | } | ||
1458 | |||
1459 | #else /* CONFIG_X86_64 */ | ||
1460 | |||
1461 | static void syscall_trace(struct pt_regs *regs) | 1378 | static void syscall_trace(struct pt_regs *regs) |
1462 | { | 1379 | { |
1380 | if (!(current->ptrace & PT_PTRACED)) | ||
1381 | return; | ||
1463 | 1382 | ||
1464 | #if 0 | 1383 | #if 0 |
1465 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", | 1384 | printk("trace %s ip %lx sp %lx ax %d origrax %d caller %lx tiflags %x ptrace %x\n", |
@@ -1481,39 +1400,81 @@ static void syscall_trace(struct pt_regs *regs) | |||
1481 | } | 1400 | } |
1482 | } | 1401 | } |
1483 | 1402 | ||
1484 | asmlinkage void syscall_trace_enter(struct pt_regs *regs) | 1403 | #ifdef CONFIG_X86_32 |
1404 | # define IS_IA32 1 | ||
1405 | #elif defined CONFIG_IA32_EMULATION | ||
1406 | # define IS_IA32 test_thread_flag(TIF_IA32) | ||
1407 | #else | ||
1408 | # define IS_IA32 0 | ||
1409 | #endif | ||
1410 | |||
1411 | /* | ||
1412 | * We must return the syscall number to actually look up in the table. | ||
1413 | * This can be -1L to skip running any syscall at all. | ||
1414 | */ | ||
1415 | asmregparm long syscall_trace_enter(struct pt_regs *regs) | ||
1485 | { | 1416 | { |
1417 | long ret = 0; | ||
1418 | |||
1419 | /* | ||
1420 | * If we stepped into a sysenter/syscall insn, it trapped in | ||
1421 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | ||
1422 | * If user-mode had set TF itself, then it's still clear from | ||
1423 | * do_debug() and we need to set it again to restore the user | ||
1424 | * state. If we entered on the slow path, TF was already set. | ||
1425 | */ | ||
1426 | if (test_thread_flag(TIF_SINGLESTEP)) | ||
1427 | regs->flags |= X86_EFLAGS_TF; | ||
1428 | |||
1486 | /* do the secure computing check first */ | 1429 | /* do the secure computing check first */ |
1487 | secure_computing(regs->orig_ax); | 1430 | secure_computing(regs->orig_ax); |
1488 | 1431 | ||
1489 | if (test_thread_flag(TIF_SYSCALL_TRACE) | 1432 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) |
1490 | && (current->ptrace & PT_PTRACED)) | 1433 | ret = -1L; |
1434 | |||
1435 | if (ret || test_thread_flag(TIF_SYSCALL_TRACE)) | ||
1491 | syscall_trace(regs); | 1436 | syscall_trace(regs); |
1492 | 1437 | ||
1493 | if (unlikely(current->audit_context)) { | 1438 | if (unlikely(current->audit_context)) { |
1494 | if (test_thread_flag(TIF_IA32)) { | 1439 | if (IS_IA32) |
1495 | audit_syscall_entry(AUDIT_ARCH_I386, | 1440 | audit_syscall_entry(AUDIT_ARCH_I386, |
1496 | regs->orig_ax, | 1441 | regs->orig_ax, |
1497 | regs->bx, regs->cx, | 1442 | regs->bx, regs->cx, |
1498 | regs->dx, regs->si); | 1443 | regs->dx, regs->si); |
1499 | } else { | 1444 | #ifdef CONFIG_X86_64 |
1445 | else | ||
1500 | audit_syscall_entry(AUDIT_ARCH_X86_64, | 1446 | audit_syscall_entry(AUDIT_ARCH_X86_64, |
1501 | regs->orig_ax, | 1447 | regs->orig_ax, |
1502 | regs->di, regs->si, | 1448 | regs->di, regs->si, |
1503 | regs->dx, regs->r10); | 1449 | regs->dx, regs->r10); |
1504 | } | 1450 | #endif |
1505 | } | 1451 | } |
1452 | |||
1453 | return ret ?: regs->orig_ax; | ||
1506 | } | 1454 | } |
1507 | 1455 | ||
1508 | asmlinkage void syscall_trace_leave(struct pt_regs *regs) | 1456 | asmregparm void syscall_trace_leave(struct pt_regs *regs) |
1509 | { | 1457 | { |
1510 | if (unlikely(current->audit_context)) | 1458 | if (unlikely(current->audit_context)) |
1511 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1459 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1512 | 1460 | ||
1513 | if ((test_thread_flag(TIF_SYSCALL_TRACE) | 1461 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
1514 | || test_thread_flag(TIF_SINGLESTEP)) | ||
1515 | && (current->ptrace & PT_PTRACED)) | ||
1516 | syscall_trace(regs); | 1462 | syscall_trace(regs); |
1517 | } | ||
1518 | 1463 | ||
1519 | #endif /* CONFIG_X86_32 */ | 1464 | /* |
1465 | * If TIF_SYSCALL_EMU is set, we only get here because of | ||
1466 | * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP). | ||
1467 | * We already reported this syscall instruction in | ||
1468 | * syscall_trace_enter(), so don't do any more now. | ||
1469 | */ | ||
1470 | if (unlikely(test_thread_flag(TIF_SYSCALL_EMU))) | ||
1471 | return; | ||
1472 | |||
1473 | /* | ||
1474 | * If we are single-stepping, synthesize a trap to follow the | ||
1475 | * system call instruction. | ||
1476 | */ | ||
1477 | if (test_thread_flag(TIF_SINGLESTEP) && | ||
1478 | (current->ptrace & PT_PTRACED)) | ||
1479 | send_sigtrap(current, regs, 0); | ||
1480 | } | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index f8a62160e151..724adfc63cb9 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -177,6 +177,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { | |||
177 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), | 177 | DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2400"), |
178 | }, | 178 | }, |
179 | }, | 179 | }, |
180 | { /* Handle problems with rebooting on Dell T5400's */ | ||
181 | .callback = set_bios_reboot, | ||
182 | .ident = "Dell Precision T5400", | ||
183 | .matches = { | ||
184 | DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), | ||
185 | DMI_MATCH(DMI_PRODUCT_NAME, "Precision WorkStation T5400"), | ||
186 | }, | ||
187 | }, | ||
180 | { /* Handle problems with rebooting on HP laptops */ | 188 | { /* Handle problems with rebooting on HP laptops */ |
181 | .callback = set_bios_reboot, | 189 | .callback = set_bios_reboot, |
182 | .ident = "HP Compaq Laptop", | 190 | .ident = "HP Compaq Laptop", |
@@ -403,10 +411,9 @@ void native_machine_shutdown(void) | |||
403 | { | 411 | { |
404 | /* Stop the cpus and apics */ | 412 | /* Stop the cpus and apics */ |
405 | #ifdef CONFIG_SMP | 413 | #ifdef CONFIG_SMP |
406 | int reboot_cpu_id; | ||
407 | 414 | ||
408 | /* The boot cpu is always logical cpu 0 */ | 415 | /* The boot cpu is always logical cpu 0 */ |
409 | reboot_cpu_id = 0; | 416 | int reboot_cpu_id = 0; |
410 | 417 | ||
411 | #ifdef CONFIG_X86_32 | 418 | #ifdef CONFIG_X86_32 |
412 | /* See if there has been given a command line override */ | 419 | /* See if there has been given a command line override */ |
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S index c30fe25d470d..6f50664b2ba5 100644 --- a/arch/x86/kernel/relocate_kernel_32.S +++ b/arch/x86/kernel/relocate_kernel_32.S | |||
@@ -20,11 +20,45 @@ | |||
20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 20 | #define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) |
21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) | 21 | #define PAE_PGD_ATTR (_PAGE_PRESENT) |
22 | 22 | ||
23 | /* control_page + KEXEC_CONTROL_CODE_MAX_SIZE | ||
24 | * ~ control_page + PAGE_SIZE are used as data storage and stack for | ||
25 | * jumping back | ||
26 | */ | ||
27 | #define DATA(offset) (KEXEC_CONTROL_CODE_MAX_SIZE+(offset)) | ||
28 | |||
29 | /* Minimal CPU state */ | ||
30 | #define ESP DATA(0x0) | ||
31 | #define CR0 DATA(0x4) | ||
32 | #define CR3 DATA(0x8) | ||
33 | #define CR4 DATA(0xc) | ||
34 | |||
35 | /* other data */ | ||
36 | #define CP_VA_CONTROL_PAGE DATA(0x10) | ||
37 | #define CP_PA_PGD DATA(0x14) | ||
38 | #define CP_PA_SWAP_PAGE DATA(0x18) | ||
39 | #define CP_PA_BACKUP_PAGES_MAP DATA(0x1c) | ||
40 | |||
23 | .text | 41 | .text |
24 | .align PAGE_SIZE | 42 | .align PAGE_SIZE |
25 | .globl relocate_kernel | 43 | .globl relocate_kernel |
26 | relocate_kernel: | 44 | relocate_kernel: |
27 | movl 8(%esp), %ebp /* list of pages */ | 45 | /* Save the CPU context, used for jumping back */ |
46 | |||
47 | pushl %ebx | ||
48 | pushl %esi | ||
49 | pushl %edi | ||
50 | pushl %ebp | ||
51 | pushf | ||
52 | |||
53 | movl 20+8(%esp), %ebp /* list of pages */ | ||
54 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
55 | movl %esp, ESP(%edi) | ||
56 | movl %cr0, %eax | ||
57 | movl %eax, CR0(%edi) | ||
58 | movl %cr3, %eax | ||
59 | movl %eax, CR3(%edi) | ||
60 | movl %cr4, %eax | ||
61 | movl %eax, CR4(%edi) | ||
28 | 62 | ||
29 | #ifdef CONFIG_X86_PAE | 63 | #ifdef CONFIG_X86_PAE |
30 | /* map the control page at its virtual address */ | 64 | /* map the control page at its virtual address */ |
@@ -138,15 +172,25 @@ relocate_kernel: | |||
138 | 172 | ||
139 | relocate_new_kernel: | 173 | relocate_new_kernel: |
140 | /* read the arguments and say goodbye to the stack */ | 174 | /* read the arguments and say goodbye to the stack */ |
141 | movl 4(%esp), %ebx /* page_list */ | 175 | movl 20+4(%esp), %ebx /* page_list */ |
142 | movl 8(%esp), %ebp /* list of pages */ | 176 | movl 20+8(%esp), %ebp /* list of pages */ |
143 | movl 12(%esp), %edx /* start address */ | 177 | movl 20+12(%esp), %edx /* start address */ |
144 | movl 16(%esp), %ecx /* cpu_has_pae */ | 178 | movl 20+16(%esp), %ecx /* cpu_has_pae */ |
179 | movl 20+20(%esp), %esi /* preserve_context */ | ||
145 | 180 | ||
146 | /* zero out flags, and disable interrupts */ | 181 | /* zero out flags, and disable interrupts */ |
147 | pushl $0 | 182 | pushl $0 |
148 | popfl | 183 | popfl |
149 | 184 | ||
185 | /* save some information for jumping back */ | ||
186 | movl PTR(VA_CONTROL_PAGE)(%ebp), %edi | ||
187 | movl %edi, CP_VA_CONTROL_PAGE(%edi) | ||
188 | movl PTR(PA_PGD)(%ebp), %eax | ||
189 | movl %eax, CP_PA_PGD(%edi) | ||
190 | movl PTR(PA_SWAP_PAGE)(%ebp), %eax | ||
191 | movl %eax, CP_PA_SWAP_PAGE(%edi) | ||
192 | movl %ebx, CP_PA_BACKUP_PAGES_MAP(%edi) | ||
193 | |||
150 | /* get physical address of control page now */ | 194 | /* get physical address of control page now */ |
151 | /* this is impossible after page table switch */ | 195 | /* this is impossible after page table switch */ |
152 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | 196 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi |
@@ -197,8 +241,90 @@ identity_mapped: | |||
197 | xorl %eax, %eax | 241 | xorl %eax, %eax |
198 | movl %eax, %cr3 | 242 | movl %eax, %cr3 |
199 | 243 | ||
244 | movl CP_PA_SWAP_PAGE(%edi), %eax | ||
245 | pushl %eax | ||
246 | pushl %ebx | ||
247 | call swap_pages | ||
248 | addl $8, %esp | ||
249 | |||
250 | /* To be certain of avoiding problems with self-modifying code | ||
251 | * I need to execute a serializing instruction here. | ||
252 | * So I flush the TLB, it's handy, and not processor dependent. | ||
253 | */ | ||
254 | xorl %eax, %eax | ||
255 | movl %eax, %cr3 | ||
256 | |||
257 | /* set all of the registers to known values */ | ||
258 | /* leave %esp alone */ | ||
259 | |||
260 | testl %esi, %esi | ||
261 | jnz 1f | ||
262 | xorl %edi, %edi | ||
263 | xorl %eax, %eax | ||
264 | xorl %ebx, %ebx | ||
265 | xorl %ecx, %ecx | ||
266 | xorl %edx, %edx | ||
267 | xorl %esi, %esi | ||
268 | xorl %ebp, %ebp | ||
269 | ret | ||
270 | 1: | ||
271 | popl %edx | ||
272 | movl CP_PA_SWAP_PAGE(%edi), %esp | ||
273 | addl $PAGE_SIZE, %esp | ||
274 | 2: | ||
275 | call *%edx | ||
276 | |||
277 | /* get the re-entry point of the peer system */ | ||
278 | movl 0(%esp), %ebp | ||
279 | call 1f | ||
280 | 1: | ||
281 | popl %ebx | ||
282 | subl $(1b - relocate_kernel), %ebx | ||
283 | movl CP_VA_CONTROL_PAGE(%ebx), %edi | ||
284 | lea PAGE_SIZE(%ebx), %esp | ||
285 | movl CP_PA_SWAP_PAGE(%ebx), %eax | ||
286 | movl CP_PA_BACKUP_PAGES_MAP(%ebx), %edx | ||
287 | pushl %eax | ||
288 | pushl %edx | ||
289 | call swap_pages | ||
290 | addl $8, %esp | ||
291 | movl CP_PA_PGD(%ebx), %eax | ||
292 | movl %eax, %cr3 | ||
293 | movl %cr0, %eax | ||
294 | orl $(1<<31), %eax | ||
295 | movl %eax, %cr0 | ||
296 | lea PAGE_SIZE(%edi), %esp | ||
297 | movl %edi, %eax | ||
298 | addl $(virtual_mapped - relocate_kernel), %eax | ||
299 | pushl %eax | ||
300 | ret | ||
301 | |||
302 | virtual_mapped: | ||
303 | movl CR4(%edi), %eax | ||
304 | movl %eax, %cr4 | ||
305 | movl CR3(%edi), %eax | ||
306 | movl %eax, %cr3 | ||
307 | movl CR0(%edi), %eax | ||
308 | movl %eax, %cr0 | ||
309 | movl ESP(%edi), %esp | ||
310 | movl %ebp, %eax | ||
311 | |||
312 | popf | ||
313 | popl %ebp | ||
314 | popl %edi | ||
315 | popl %esi | ||
316 | popl %ebx | ||
317 | ret | ||
318 | |||
200 | /* Do the copies */ | 319 | /* Do the copies */ |
201 | movl %ebx, %ecx | 320 | swap_pages: |
321 | movl 8(%esp), %edx | ||
322 | movl 4(%esp), %ecx | ||
323 | pushl %ebp | ||
324 | pushl %ebx | ||
325 | pushl %edi | ||
326 | pushl %esi | ||
327 | movl %ecx, %ebx | ||
202 | jmp 1f | 328 | jmp 1f |
203 | 329 | ||
204 | 0: /* top, read another word from the indirection page */ | 330 | 0: /* top, read another word from the indirection page */ |
@@ -226,27 +352,31 @@ identity_mapped: | |||
226 | movl %ecx, %esi /* For every source page do a copy */ | 352 | movl %ecx, %esi /* For every source page do a copy */ |
227 | andl $0xfffff000, %esi | 353 | andl $0xfffff000, %esi |
228 | 354 | ||
355 | movl %edi, %eax | ||
356 | movl %esi, %ebp | ||
357 | |||
358 | movl %edx, %edi | ||
229 | movl $1024, %ecx | 359 | movl $1024, %ecx |
230 | rep ; movsl | 360 | rep ; movsl |
231 | jmp 0b | ||
232 | |||
233 | 3: | ||
234 | 361 | ||
235 | /* To be certain of avoiding problems with self-modifying code | 362 | movl %ebp, %edi |
236 | * I need to execute a serializing instruction here. | 363 | movl %eax, %esi |
237 | * So I flush the TLB, it's handy, and not processor dependent. | 364 | movl $1024, %ecx |
238 | */ | 365 | rep ; movsl |
239 | xorl %eax, %eax | ||
240 | movl %eax, %cr3 | ||
241 | 366 | ||
242 | /* set all of the registers to known values */ | 367 | movl %eax, %edi |
243 | /* leave %esp alone */ | 368 | movl %edx, %esi |
369 | movl $1024, %ecx | ||
370 | rep ; movsl | ||
244 | 371 | ||
245 | xorl %eax, %eax | 372 | lea PAGE_SIZE(%ebp), %esi |
246 | xorl %ebx, %ebx | 373 | jmp 0b |
247 | xorl %ecx, %ecx | 374 | 3: |
248 | xorl %edx, %edx | 375 | popl %esi |
249 | xorl %esi, %esi | 376 | popl %edi |
250 | xorl %edi, %edi | 377 | popl %ebx |
251 | xorl %ebp, %ebp | 378 | popl %ebp |
252 | ret | 379 | ret |
380 | |||
381 | .globl kexec_control_code_size | ||
382 | .set kexec_control_code_size, . - relocate_kernel | ||
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 531b55b8e81a..9838f2539dfc 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -57,12 +57,8 @@ | |||
57 | #include <linux/slab.h> | 57 | #include <linux/slab.h> |
58 | #include <linux/user.h> | 58 | #include <linux/user.h> |
59 | #include <linux/delay.h> | 59 | #include <linux/delay.h> |
60 | #include <linux/highmem.h> | ||
61 | 60 | ||
62 | #include <linux/kallsyms.h> | 61 | #include <linux/kallsyms.h> |
63 | #include <linux/edd.h> | ||
64 | #include <linux/iscsi_ibft.h> | ||
65 | #include <linux/kexec.h> | ||
66 | #include <linux/cpufreq.h> | 62 | #include <linux/cpufreq.h> |
67 | #include <linux/dma-mapping.h> | 63 | #include <linux/dma-mapping.h> |
68 | #include <linux/ctype.h> | 64 | #include <linux/ctype.h> |
@@ -96,7 +92,7 @@ | |||
96 | #include <asm/smp.h> | 92 | #include <asm/smp.h> |
97 | #include <asm/desc.h> | 93 | #include <asm/desc.h> |
98 | #include <asm/dma.h> | 94 | #include <asm/dma.h> |
99 | #include <asm/gart.h> | 95 | #include <asm/iommu.h> |
100 | #include <asm/mmu_context.h> | 96 | #include <asm/mmu_context.h> |
101 | #include <asm/proto.h> | 97 | #include <asm/proto.h> |
102 | 98 | ||
@@ -104,7 +100,6 @@ | |||
104 | #include <asm/paravirt.h> | 100 | #include <asm/paravirt.h> |
105 | 101 | ||
106 | #include <asm/percpu.h> | 102 | #include <asm/percpu.h> |
107 | #include <asm/sections.h> | ||
108 | #include <asm/topology.h> | 103 | #include <asm/topology.h> |
109 | #include <asm/apicdef.h> | 104 | #include <asm/apicdef.h> |
110 | #ifdef CONFIG_X86_64 | 105 | #ifdef CONFIG_X86_64 |
@@ -450,7 +445,7 @@ static void __init reserve_early_setup_data(void) | |||
450 | * @size: Size of the crashkernel memory to reserve. | 445 | * @size: Size of the crashkernel memory to reserve. |
451 | * Returns the base address on success, and -1ULL on failure. | 446 | * Returns the base address on success, and -1ULL on failure. |
452 | */ | 447 | */ |
453 | unsigned long long find_and_reserve_crashkernel(unsigned long long size) | 448 | unsigned long long __init find_and_reserve_crashkernel(unsigned long long size) |
454 | { | 449 | { |
455 | const unsigned long long alignment = 16<<20; /* 16M */ | 450 | const unsigned long long alignment = 16<<20; /* 16M */ |
456 | unsigned long long start = 0LL; | 451 | unsigned long long start = 0LL; |
@@ -579,6 +574,10 @@ static int __init setup_elfcorehdr(char *arg) | |||
579 | early_param("elfcorehdr", setup_elfcorehdr); | 574 | early_param("elfcorehdr", setup_elfcorehdr); |
580 | #endif | 575 | #endif |
581 | 576 | ||
577 | static struct x86_quirks default_x86_quirks __initdata; | ||
578 | |||
579 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
580 | |||
582 | /* | 581 | /* |
583 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 582 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
584 | * passed the efi memmap, systab, etc., so we should use these data structures | 583 | * passed the efi memmap, systab, etc., so we should use these data structures |
@@ -598,11 +597,11 @@ void __init setup_arch(char **cmdline_p) | |||
598 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 597 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
599 | visws_early_detect(); | 598 | visws_early_detect(); |
600 | pre_setup_arch_hook(); | 599 | pre_setup_arch_hook(); |
601 | early_cpu_init(); | ||
602 | #else | 600 | #else |
603 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 601 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
604 | #endif | 602 | #endif |
605 | 603 | ||
604 | early_cpu_init(); | ||
606 | early_ioremap_init(); | 605 | early_ioremap_init(); |
607 | 606 | ||
608 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); | 607 | ROOT_DEV = old_decode_dev(boot_params.hdr.root_dev); |
@@ -666,14 +665,23 @@ void __init setup_arch(char **cmdline_p) | |||
666 | bss_resource.start = virt_to_phys(&__bss_start); | 665 | bss_resource.start = virt_to_phys(&__bss_start); |
667 | bss_resource.end = virt_to_phys(&__bss_stop)-1; | 666 | bss_resource.end = virt_to_phys(&__bss_stop)-1; |
668 | 667 | ||
669 | #ifdef CONFIG_X86_64 | ||
670 | early_cpu_init(); | ||
671 | #endif | ||
672 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | 668 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); |
673 | *cmdline_p = command_line; | 669 | *cmdline_p = command_line; |
674 | 670 | ||
675 | parse_early_param(); | 671 | parse_early_param(); |
676 | 672 | ||
673 | #ifdef CONFIG_X86_64 | ||
674 | check_efer(); | ||
675 | #endif | ||
676 | |||
677 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | ||
678 | /* | ||
679 | * Must be before kernel pagetables are setup | ||
680 | * or fixmap area is touched. | ||
681 | */ | ||
682 | vmi_init(); | ||
683 | #endif | ||
684 | |||
677 | /* after early param, so could get panic from serial */ | 685 | /* after early param, so could get panic from serial */ |
678 | reserve_early_setup_data(); | 686 | reserve_early_setup_data(); |
679 | 687 | ||
@@ -681,7 +689,7 @@ void __init setup_arch(char **cmdline_p) | |||
681 | #ifdef CONFIG_X86_LOCAL_APIC | 689 | #ifdef CONFIG_X86_LOCAL_APIC |
682 | disable_apic = 1; | 690 | disable_apic = 1; |
683 | #endif | 691 | #endif |
684 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | 692 | setup_clear_cpu_cap(X86_FEATURE_APIC); |
685 | } | 693 | } |
686 | 694 | ||
687 | #ifdef CONFIG_PCI | 695 | #ifdef CONFIG_PCI |
@@ -734,7 +742,6 @@ void __init setup_arch(char **cmdline_p) | |||
734 | #else | 742 | #else |
735 | num_physpages = max_pfn; | 743 | num_physpages = max_pfn; |
736 | 744 | ||
737 | check_efer(); | ||
738 | 745 | ||
739 | /* How many end-of-memory variables you have, grandma! */ | 746 | /* How many end-of-memory variables you have, grandma! */ |
740 | /* need this before calling reserve_initrd */ | 747 | /* need this before calling reserve_initrd */ |
@@ -792,10 +799,6 @@ void __init setup_arch(char **cmdline_p) | |||
792 | 799 | ||
793 | initmem_init(0, max_pfn); | 800 | initmem_init(0, max_pfn); |
794 | 801 | ||
795 | #ifdef CONFIG_X86_64 | ||
796 | dma32_reserve_bootmem(); | ||
797 | #endif | ||
798 | |||
799 | #ifdef CONFIG_ACPI_SLEEP | 802 | #ifdef CONFIG_ACPI_SLEEP |
800 | /* | 803 | /* |
801 | * Reserve low memory region for sleep support. | 804 | * Reserve low memory region for sleep support. |
@@ -810,21 +813,25 @@ void __init setup_arch(char **cmdline_p) | |||
810 | #endif | 813 | #endif |
811 | reserve_crashkernel(); | 814 | reserve_crashkernel(); |
812 | 815 | ||
816 | #ifdef CONFIG_X86_64 | ||
817 | /* | ||
818 | * dma32_reserve_bootmem() allocates bootmem which may conflict | ||
819 | * with the crashkernel command line, so do that after | ||
820 | * reserve_crashkernel() | ||
821 | */ | ||
822 | dma32_reserve_bootmem(); | ||
823 | #endif | ||
824 | |||
813 | reserve_ibft_region(); | 825 | reserve_ibft_region(); |
814 | 826 | ||
815 | #ifdef CONFIG_KVM_CLOCK | 827 | #ifdef CONFIG_KVM_CLOCK |
816 | kvmclock_init(); | 828 | kvmclock_init(); |
817 | #endif | 829 | #endif |
818 | 830 | ||
819 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_32) | 831 | paravirt_pagetable_setup_start(swapper_pg_dir); |
820 | /* | ||
821 | * Must be after max_low_pfn is determined, and before kernel | ||
822 | * pagetables are setup. | ||
823 | */ | ||
824 | vmi_init(); | ||
825 | #endif | ||
826 | |||
827 | paging_init(); | 832 | paging_init(); |
833 | paravirt_pagetable_setup_done(swapper_pg_dir); | ||
834 | paravirt_post_allocator_init(); | ||
828 | 835 | ||
829 | #ifdef CONFIG_X86_64 | 836 | #ifdef CONFIG_X86_64 |
830 | map_vsyscall(); | 837 | map_vsyscall(); |
@@ -854,23 +861,9 @@ void __init setup_arch(char **cmdline_p) | |||
854 | init_cpu_to_node(); | 861 | init_cpu_to_node(); |
855 | #endif | 862 | #endif |
856 | 863 | ||
857 | #ifdef CONFIG_X86_NUMAQ | ||
858 | /* | ||
859 | * need to check online nodes num, call it | ||
860 | * here before time_init/tsc_init | ||
861 | */ | ||
862 | numaq_tsc_disable(); | ||
863 | #endif | ||
864 | |||
865 | init_apic_mappings(); | 864 | init_apic_mappings(); |
866 | ioapic_init_mappings(); | 865 | ioapic_init_mappings(); |
867 | 866 | ||
868 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | ||
869 | if (def_to_bigsmp) | ||
870 | printk(KERN_WARNING "More than 8 CPUs detected and " | ||
871 | "CONFIG_X86_PC cannot handle it.\nUse " | ||
872 | "CONFIG_X86_GENERICARCH or CONFIG_X86_BIGSMP.\n"); | ||
873 | #endif | ||
874 | kvm_guest_init(); | 867 | kvm_guest_init(); |
875 | 868 | ||
876 | e820_reserve_resources(); | 869 | e820_reserve_resources(); |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index cac68430d31f..76e305e064f9 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -80,24 +80,6 @@ static void __init setup_per_cpu_maps(void) | |||
80 | #endif | 80 | #endif |
81 | } | 81 | } |
82 | 82 | ||
83 | #ifdef CONFIG_HAVE_CPUMASK_OF_CPU_MAP | ||
84 | cpumask_t *cpumask_of_cpu_map __read_mostly; | ||
85 | EXPORT_SYMBOL(cpumask_of_cpu_map); | ||
86 | |||
87 | /* requires nr_cpu_ids to be initialized */ | ||
88 | static void __init setup_cpumask_of_cpu(void) | ||
89 | { | ||
90 | int i; | ||
91 | |||
92 | /* alloc_bootmem zeroes memory */ | ||
93 | cpumask_of_cpu_map = alloc_bootmem_low(sizeof(cpumask_t) * nr_cpu_ids); | ||
94 | for (i = 0; i < nr_cpu_ids; i++) | ||
95 | cpu_set(i, cpumask_of_cpu_map[i]); | ||
96 | } | ||
97 | #else | ||
98 | static inline void setup_cpumask_of_cpu(void) { } | ||
99 | #endif | ||
100 | |||
101 | #ifdef CONFIG_X86_32 | 83 | #ifdef CONFIG_X86_32 |
102 | /* | 84 | /* |
103 | * Great future not-so-futuristic plan: make i386 and x86_64 do it | 85 | * Great future not-so-futuristic plan: make i386 and x86_64 do it |
@@ -197,9 +179,6 @@ void __init setup_per_cpu_areas(void) | |||
197 | 179 | ||
198 | /* Setup node to cpumask map */ | 180 | /* Setup node to cpumask map */ |
199 | setup_node_to_cpumask_map(); | 181 | setup_node_to_cpumask_map(); |
200 | |||
201 | /* Setup cpumask_of_cpu map */ | ||
202 | setup_cpumask_of_cpu(); | ||
203 | } | 182 | } |
204 | 183 | ||
205 | #endif | 184 | #endif |
@@ -227,8 +206,8 @@ static void __init setup_node_to_cpumask_map(void) | |||
227 | /* allocate the map */ | 206 | /* allocate the map */ |
228 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); | 207 | map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); |
229 | 208 | ||
230 | Dprintk(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", | 209 | pr_debug(KERN_DEBUG "Node to cpumask map at %p for %d nodes\n", |
231 | map, nr_node_ids); | 210 | map, nr_node_ids); |
232 | 211 | ||
233 | /* node_to_cpumask() will now work */ | 212 | /* node_to_cpumask() will now work */ |
234 | node_to_cpumask_map = map; | 213 | node_to_cpumask_map = map; |
@@ -248,7 +227,7 @@ void __cpuinit numa_set_node(int cpu, int node) | |||
248 | per_cpu(x86_cpu_to_node_map, cpu) = node; | 227 | per_cpu(x86_cpu_to_node_map, cpu) = node; |
249 | 228 | ||
250 | else | 229 | else |
251 | Dprintk(KERN_INFO "Setting node for non-present cpu %d\n", cpu); | 230 | pr_debug("Setting node for non-present cpu %d\n", cpu); |
252 | } | 231 | } |
253 | 232 | ||
254 | void __cpuinit numa_clear_node(int cpu) | 233 | void __cpuinit numa_clear_node(int cpu) |
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c index d92373630963..6fb5bcdd8933 100644 --- a/arch/x86/kernel/signal_32.c +++ b/arch/x86/kernel/signal_32.c | |||
@@ -212,7 +212,7 @@ asmlinkage unsigned long sys_sigreturn(unsigned long __unused) | |||
212 | 212 | ||
213 | badframe: | 213 | badframe: |
214 | if (show_unhandled_signals && printk_ratelimit()) { | 214 | if (show_unhandled_signals && printk_ratelimit()) { |
215 | printk(KERN_INFO "%s%s[%d] bad frame in sigreturn frame:" | 215 | printk("%s%s[%d] bad frame in sigreturn frame:" |
216 | "%p ip:%lx sp:%lx oeax:%lx", | 216 | "%p ip:%lx sp:%lx oeax:%lx", |
217 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, | 217 | task_pid_nr(current) > 1 ? KERN_INFO : KERN_EMERG, |
218 | current->comm, task_pid_nr(current), frame, regs->ip, | 218 | current->comm, task_pid_nr(current), frame, regs->ip, |
@@ -657,18 +657,9 @@ static void do_signal(struct pt_regs *regs) | |||
657 | void | 657 | void |
658 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 658 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
659 | { | 659 | { |
660 | /* Pending single-step? */ | ||
661 | if (thread_info_flags & _TIF_SINGLESTEP) { | ||
662 | regs->flags |= X86_EFLAGS_TF; | ||
663 | clear_thread_flag(TIF_SINGLESTEP); | ||
664 | } | ||
665 | |||
666 | /* deal with pending signal delivery */ | 660 | /* deal with pending signal delivery */ |
667 | if (thread_info_flags & _TIF_SIGPENDING) | 661 | if (thread_info_flags & _TIF_SIGPENDING) |
668 | do_signal(regs); | 662 | do_signal(regs); |
669 | 663 | ||
670 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | ||
671 | hrtick_resched(); | ||
672 | |||
673 | clear_thread_flag(TIF_IRET); | 664 | clear_thread_flag(TIF_IRET); |
674 | } | 665 | } |
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index e53b267662e7..ca316b5b742c 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c | |||
@@ -53,6 +53,68 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, | |||
53 | return do_sigaltstack(uss, uoss, regs->sp); | 53 | return do_sigaltstack(uss, uoss, regs->sp); |
54 | } | 54 | } |
55 | 55 | ||
56 | /* | ||
57 | * Signal frame handlers. | ||
58 | */ | ||
59 | |||
60 | static inline int save_i387(struct _fpstate __user *buf) | ||
61 | { | ||
62 | struct task_struct *tsk = current; | ||
63 | int err = 0; | ||
64 | |||
65 | BUILD_BUG_ON(sizeof(struct user_i387_struct) != | ||
66 | sizeof(tsk->thread.xstate->fxsave)); | ||
67 | |||
68 | if ((unsigned long)buf % 16) | ||
69 | printk("save_i387: bad fpstate %p\n", buf); | ||
70 | |||
71 | if (!used_math()) | ||
72 | return 0; | ||
73 | clear_used_math(); /* trigger finit */ | ||
74 | if (task_thread_info(tsk)->status & TS_USEDFPU) { | ||
75 | err = save_i387_checking((struct i387_fxsave_struct __user *) | ||
76 | buf); | ||
77 | if (err) | ||
78 | return err; | ||
79 | task_thread_info(tsk)->status &= ~TS_USEDFPU; | ||
80 | stts(); | ||
81 | } else { | ||
82 | if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, | ||
83 | sizeof(struct i387_fxsave_struct))) | ||
84 | return -1; | ||
85 | } | ||
86 | return 1; | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * This restores directly out of user space. Exceptions are handled. | ||
91 | */ | ||
92 | static inline int restore_i387(struct _fpstate __user *buf) | ||
93 | { | ||
94 | struct task_struct *tsk = current; | ||
95 | int err; | ||
96 | |||
97 | if (!used_math()) { | ||
98 | err = init_fpu(tsk); | ||
99 | if (err) | ||
100 | return err; | ||
101 | } | ||
102 | |||
103 | if (!(task_thread_info(current)->status & TS_USEDFPU)) { | ||
104 | clts(); | ||
105 | task_thread_info(current)->status |= TS_USEDFPU; | ||
106 | } | ||
107 | err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf); | ||
108 | if (unlikely(err)) { | ||
109 | /* | ||
110 | * Encountered an error while doing the restore from the | ||
111 | * user buffer, clear the fpu state. | ||
112 | */ | ||
113 | clear_fpu(tsk); | ||
114 | clear_used_math(); | ||
115 | } | ||
116 | return err; | ||
117 | } | ||
56 | 118 | ||
57 | /* | 119 | /* |
58 | * Do a signal return; undo the signal stack. | 120 | * Do a signal return; undo the signal stack. |
@@ -487,12 +549,6 @@ static void do_signal(struct pt_regs *regs) | |||
487 | void do_notify_resume(struct pt_regs *regs, void *unused, | 549 | void do_notify_resume(struct pt_regs *regs, void *unused, |
488 | __u32 thread_info_flags) | 550 | __u32 thread_info_flags) |
489 | { | 551 | { |
490 | /* Pending single-step? */ | ||
491 | if (thread_info_flags & _TIF_SINGLESTEP) { | ||
492 | regs->flags |= X86_EFLAGS_TF; | ||
493 | clear_thread_flag(TIF_SINGLESTEP); | ||
494 | } | ||
495 | |||
496 | #ifdef CONFIG_X86_MCE | 552 | #ifdef CONFIG_X86_MCE |
497 | /* notify userspace of pending MCEs */ | 553 | /* notify userspace of pending MCEs */ |
498 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 554 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
@@ -502,9 +558,6 @@ void do_notify_resume(struct pt_regs *regs, void *unused, | |||
502 | /* deal with pending signal delivery */ | 558 | /* deal with pending signal delivery */ |
503 | if (thread_info_flags & _TIF_SIGPENDING) | 559 | if (thread_info_flags & _TIF_SIGPENDING) |
504 | do_signal(regs); | 560 | do_signal(regs); |
505 | |||
506 | if (thread_info_flags & _TIF_HRTICK_RESCHED) | ||
507 | hrtick_resched(); | ||
508 | } | 561 | } |
509 | 562 | ||
510 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) | 563 | void signal_fault(struct pt_regs *regs, void __user *frame, char *where) |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 687376ab07e8..7985c5b3f916 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -216,7 +216,7 @@ static void __cpuinit smp_callin(void) | |||
216 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, | 216 | panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, |
217 | phys_id, cpuid); | 217 | phys_id, cpuid); |
218 | } | 218 | } |
219 | Dprintk("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); | 219 | pr_debug("CPU#%d (phys ID: %d) waiting for CALLOUT\n", cpuid, phys_id); |
220 | 220 | ||
221 | /* | 221 | /* |
222 | * STARTUP IPIs are fragile beasts as they might sometimes | 222 | * STARTUP IPIs are fragile beasts as they might sometimes |
@@ -251,7 +251,7 @@ static void __cpuinit smp_callin(void) | |||
251 | * boards) | 251 | * boards) |
252 | */ | 252 | */ |
253 | 253 | ||
254 | Dprintk("CALLIN, before setup_local_APIC().\n"); | 254 | pr_debug("CALLIN, before setup_local_APIC().\n"); |
255 | smp_callin_clear_local_apic(); | 255 | smp_callin_clear_local_apic(); |
256 | setup_local_APIC(); | 256 | setup_local_APIC(); |
257 | end_local_APIC_setup(); | 257 | end_local_APIC_setup(); |
@@ -266,7 +266,7 @@ static void __cpuinit smp_callin(void) | |||
266 | local_irq_enable(); | 266 | local_irq_enable(); |
267 | calibrate_delay(); | 267 | calibrate_delay(); |
268 | local_irq_disable(); | 268 | local_irq_disable(); |
269 | Dprintk("Stack at about %p\n", &cpuid); | 269 | pr_debug("Stack at about %p\n", &cpuid); |
270 | 270 | ||
271 | /* | 271 | /* |
272 | * Save our processor parameters | 272 | * Save our processor parameters |
@@ -326,12 +326,16 @@ static void __cpuinit start_secondary(void *unused) | |||
326 | * for which cpus receive the IPI. Holding this | 326 | * for which cpus receive the IPI. Holding this |
327 | * lock helps us to not include this cpu in a currently in progress | 327 | * lock helps us to not include this cpu in a currently in progress |
328 | * smp_call_function(). | 328 | * smp_call_function(). |
329 | * | ||
330 | * We need to hold vector_lock so there the set of online cpus | ||
331 | * does not change while we are assigning vectors to cpus. Holding | ||
332 | * this lock ensures we don't half assign or remove an irq from a cpu. | ||
329 | */ | 333 | */ |
330 | ipi_call_lock_irq(); | 334 | ipi_call_lock_irq(); |
331 | #ifdef CONFIG_X86_IO_APIC | 335 | lock_vector_lock(); |
332 | setup_vector_irq(smp_processor_id()); | 336 | __setup_vector_irq(smp_processor_id()); |
333 | #endif | ||
334 | cpu_set(smp_processor_id(), cpu_online_map); | 337 | cpu_set(smp_processor_id(), cpu_online_map); |
338 | unlock_vector_lock(); | ||
335 | ipi_call_unlock_irq(); | 339 | ipi_call_unlock_irq(); |
336 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 340 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; |
337 | 341 | ||
@@ -438,7 +442,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
438 | cpu_set(cpu, cpu_sibling_setup_map); | 442 | cpu_set(cpu, cpu_sibling_setup_map); |
439 | 443 | ||
440 | if (smp_num_siblings > 1) { | 444 | if (smp_num_siblings > 1) { |
441 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 445 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { |
442 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && | 446 | if (c->phys_proc_id == cpu_data(i).phys_proc_id && |
443 | c->cpu_core_id == cpu_data(i).cpu_core_id) { | 447 | c->cpu_core_id == cpu_data(i).cpu_core_id) { |
444 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); | 448 | cpu_set(i, per_cpu(cpu_sibling_map, cpu)); |
@@ -461,7 +465,7 @@ void __cpuinit set_cpu_sibling_map(int cpu) | |||
461 | return; | 465 | return; |
462 | } | 466 | } |
463 | 467 | ||
464 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | 468 | for_each_cpu_mask_nr(i, cpu_sibling_setup_map) { |
465 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && | 469 | if (per_cpu(cpu_llc_id, cpu) != BAD_APICID && |
466 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { | 470 | per_cpu(cpu_llc_id, cpu) == per_cpu(cpu_llc_id, i)) { |
467 | cpu_set(i, c->llc_shared_map); | 471 | cpu_set(i, c->llc_shared_map); |
@@ -513,7 +517,7 @@ static void impress_friends(void) | |||
513 | /* | 517 | /* |
514 | * Allow the user to impress friends. | 518 | * Allow the user to impress friends. |
515 | */ | 519 | */ |
516 | Dprintk("Before bogomips.\n"); | 520 | pr_debug("Before bogomips.\n"); |
517 | for_each_possible_cpu(cpu) | 521 | for_each_possible_cpu(cpu) |
518 | if (cpu_isset(cpu, cpu_callout_map)) | 522 | if (cpu_isset(cpu, cpu_callout_map)) |
519 | bogosum += cpu_data(cpu).loops_per_jiffy; | 523 | bogosum += cpu_data(cpu).loops_per_jiffy; |
@@ -523,7 +527,7 @@ static void impress_friends(void) | |||
523 | bogosum/(500000/HZ), | 527 | bogosum/(500000/HZ), |
524 | (bogosum/(5000/HZ))%100); | 528 | (bogosum/(5000/HZ))%100); |
525 | 529 | ||
526 | Dprintk("Before bogocount - setting activated=1.\n"); | 530 | pr_debug("Before bogocount - setting activated=1.\n"); |
527 | } | 531 | } |
528 | 532 | ||
529 | static inline void __inquire_remote_apic(int apicid) | 533 | static inline void __inquire_remote_apic(int apicid) |
@@ -546,8 +550,8 @@ static inline void __inquire_remote_apic(int apicid) | |||
546 | printk(KERN_CONT | 550 | printk(KERN_CONT |
547 | "a previous APIC delivery may have failed\n"); | 551 | "a previous APIC delivery may have failed\n"); |
548 | 552 | ||
549 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | 553 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); |
550 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); | 554 | apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]); |
551 | 555 | ||
552 | timeout = 0; | 556 | timeout = 0; |
553 | do { | 557 | do { |
@@ -579,29 +583,24 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
579 | int maxlvt; | 583 | int maxlvt; |
580 | 584 | ||
581 | /* Target chip */ | 585 | /* Target chip */ |
582 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); | 586 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); |
583 | 587 | ||
584 | /* Boot on the stack */ | 588 | /* Boot on the stack */ |
585 | /* Kick the second */ | 589 | /* Kick the second */ |
586 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); | 590 | apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); |
587 | 591 | ||
588 | Dprintk("Waiting for send to finish...\n"); | 592 | pr_debug("Waiting for send to finish...\n"); |
589 | send_status = safe_apic_wait_icr_idle(); | 593 | send_status = safe_apic_wait_icr_idle(); |
590 | 594 | ||
591 | /* | 595 | /* |
592 | * Give the other CPU some time to accept the IPI. | 596 | * Give the other CPU some time to accept the IPI. |
593 | */ | 597 | */ |
594 | udelay(200); | 598 | udelay(200); |
595 | /* | ||
596 | * Due to the Pentium erratum 3AP. | ||
597 | */ | ||
598 | maxlvt = lapic_get_maxlvt(); | 599 | maxlvt = lapic_get_maxlvt(); |
599 | if (maxlvt > 3) { | 600 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
600 | apic_read_around(APIC_SPIV); | ||
601 | apic_write(APIC_ESR, 0); | 601 | apic_write(APIC_ESR, 0); |
602 | } | ||
603 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 602 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
604 | Dprintk("NMI sent.\n"); | 603 | pr_debug("NMI sent.\n"); |
605 | 604 | ||
606 | if (send_status) | 605 | if (send_status) |
607 | printk(KERN_ERR "APIC never delivered???\n"); | 606 | printk(KERN_ERR "APIC never delivered???\n"); |
@@ -625,42 +624,44 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
625 | return send_status; | 624 | return send_status; |
626 | } | 625 | } |
627 | 626 | ||
627 | maxlvt = lapic_get_maxlvt(); | ||
628 | |||
628 | /* | 629 | /* |
629 | * Be paranoid about clearing APIC errors. | 630 | * Be paranoid about clearing APIC errors. |
630 | */ | 631 | */ |
631 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { | 632 | if (APIC_INTEGRATED(apic_version[phys_apicid])) { |
632 | apic_read_around(APIC_SPIV); | 633 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
633 | apic_write(APIC_ESR, 0); | 634 | apic_write(APIC_ESR, 0); |
634 | apic_read(APIC_ESR); | 635 | apic_read(APIC_ESR); |
635 | } | 636 | } |
636 | 637 | ||
637 | Dprintk("Asserting INIT.\n"); | 638 | pr_debug("Asserting INIT.\n"); |
638 | 639 | ||
639 | /* | 640 | /* |
640 | * Turn INIT on target chip | 641 | * Turn INIT on target chip |
641 | */ | 642 | */ |
642 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 643 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
643 | 644 | ||
644 | /* | 645 | /* |
645 | * Send IPI | 646 | * Send IPI |
646 | */ | 647 | */ |
647 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_INT_ASSERT | 648 | apic_write(APIC_ICR, |
648 | | APIC_DM_INIT); | 649 | APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); |
649 | 650 | ||
650 | Dprintk("Waiting for send to finish...\n"); | 651 | pr_debug("Waiting for send to finish...\n"); |
651 | send_status = safe_apic_wait_icr_idle(); | 652 | send_status = safe_apic_wait_icr_idle(); |
652 | 653 | ||
653 | mdelay(10); | 654 | mdelay(10); |
654 | 655 | ||
655 | Dprintk("Deasserting INIT.\n"); | 656 | pr_debug("Deasserting INIT.\n"); |
656 | 657 | ||
657 | /* Target chip */ | 658 | /* Target chip */ |
658 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 659 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
659 | 660 | ||
660 | /* Send IPI */ | 661 | /* Send IPI */ |
661 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | 662 | apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); |
662 | 663 | ||
663 | Dprintk("Waiting for send to finish...\n"); | 664 | pr_debug("Waiting for send to finish...\n"); |
664 | send_status = safe_apic_wait_icr_idle(); | 665 | send_status = safe_apic_wait_icr_idle(); |
665 | 666 | ||
666 | mb(); | 667 | mb(); |
@@ -687,55 +688,47 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
687 | /* | 688 | /* |
688 | * Run STARTUP IPI loop. | 689 | * Run STARTUP IPI loop. |
689 | */ | 690 | */ |
690 | Dprintk("#startup loops: %d.\n", num_starts); | 691 | pr_debug("#startup loops: %d.\n", num_starts); |
691 | |||
692 | maxlvt = lapic_get_maxlvt(); | ||
693 | 692 | ||
694 | for (j = 1; j <= num_starts; j++) { | 693 | for (j = 1; j <= num_starts; j++) { |
695 | Dprintk("Sending STARTUP #%d.\n", j); | 694 | pr_debug("Sending STARTUP #%d.\n", j); |
696 | apic_read_around(APIC_SPIV); | 695 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
697 | apic_write(APIC_ESR, 0); | 696 | apic_write(APIC_ESR, 0); |
698 | apic_read(APIC_ESR); | 697 | apic_read(APIC_ESR); |
699 | Dprintk("After apic_write.\n"); | 698 | pr_debug("After apic_write.\n"); |
700 | 699 | ||
701 | /* | 700 | /* |
702 | * STARTUP IPI | 701 | * STARTUP IPI |
703 | */ | 702 | */ |
704 | 703 | ||
705 | /* Target chip */ | 704 | /* Target chip */ |
706 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); | 705 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid)); |
707 | 706 | ||
708 | /* Boot on the stack */ | 707 | /* Boot on the stack */ |
709 | /* Kick the second */ | 708 | /* Kick the second */ |
710 | apic_write_around(APIC_ICR, APIC_DM_STARTUP | 709 | apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); |
711 | | (start_eip >> 12)); | ||
712 | 710 | ||
713 | /* | 711 | /* |
714 | * Give the other CPU some time to accept the IPI. | 712 | * Give the other CPU some time to accept the IPI. |
715 | */ | 713 | */ |
716 | udelay(300); | 714 | udelay(300); |
717 | 715 | ||
718 | Dprintk("Startup point 1.\n"); | 716 | pr_debug("Startup point 1.\n"); |
719 | 717 | ||
720 | Dprintk("Waiting for send to finish...\n"); | 718 | pr_debug("Waiting for send to finish...\n"); |
721 | send_status = safe_apic_wait_icr_idle(); | 719 | send_status = safe_apic_wait_icr_idle(); |
722 | 720 | ||
723 | /* | 721 | /* |
724 | * Give the other CPU some time to accept the IPI. | 722 | * Give the other CPU some time to accept the IPI. |
725 | */ | 723 | */ |
726 | udelay(200); | 724 | udelay(200); |
727 | /* | 725 | if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ |
728 | * Due to the Pentium erratum 3AP. | ||
729 | */ | ||
730 | if (maxlvt > 3) { | ||
731 | apic_read_around(APIC_SPIV); | ||
732 | apic_write(APIC_ESR, 0); | 726 | apic_write(APIC_ESR, 0); |
733 | } | ||
734 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 727 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
735 | if (send_status || accept_status) | 728 | if (send_status || accept_status) |
736 | break; | 729 | break; |
737 | } | 730 | } |
738 | Dprintk("After Startup.\n"); | 731 | pr_debug("After Startup.\n"); |
739 | 732 | ||
740 | if (send_status) | 733 | if (send_status) |
741 | printk(KERN_ERR "APIC never delivered???\n"); | 734 | printk(KERN_ERR "APIC never delivered???\n"); |
@@ -763,12 +756,20 @@ static void __cpuinit do_fork_idle(struct work_struct *work) | |||
763 | } | 756 | } |
764 | 757 | ||
765 | #ifdef CONFIG_X86_64 | 758 | #ifdef CONFIG_X86_64 |
759 | |||
760 | /* __ref because it's safe to call free_bootmem when after_bootmem == 0. */ | ||
761 | static void __ref free_bootmem_pda(struct x8664_pda *oldpda) | ||
762 | { | ||
763 | if (!after_bootmem) | ||
764 | free_bootmem((unsigned long)oldpda, sizeof(*oldpda)); | ||
765 | } | ||
766 | |||
766 | /* | 767 | /* |
767 | * Allocate node local memory for the AP pda. | 768 | * Allocate node local memory for the AP pda. |
768 | * | 769 | * |
769 | * Must be called after the _cpu_pda pointer table is initialized. | 770 | * Must be called after the _cpu_pda pointer table is initialized. |
770 | */ | 771 | */ |
771 | static int __cpuinit get_local_pda(int cpu) | 772 | int __cpuinit get_local_pda(int cpu) |
772 | { | 773 | { |
773 | struct x8664_pda *oldpda, *newpda; | 774 | struct x8664_pda *oldpda, *newpda; |
774 | unsigned long size = sizeof(struct x8664_pda); | 775 | unsigned long size = sizeof(struct x8664_pda); |
@@ -791,8 +792,7 @@ static int __cpuinit get_local_pda(int cpu) | |||
791 | 792 | ||
792 | if (oldpda) { | 793 | if (oldpda) { |
793 | memcpy(newpda, oldpda, size); | 794 | memcpy(newpda, oldpda, size); |
794 | if (!after_bootmem) | 795 | free_bootmem_pda(oldpda); |
795 | free_bootmem((unsigned long)oldpda, size); | ||
796 | } | 796 | } |
797 | 797 | ||
798 | newpda->in_bootmem = 0; | 798 | newpda->in_bootmem = 0; |
@@ -886,7 +886,7 @@ do_rest: | |||
886 | 886 | ||
887 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { | 887 | if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { |
888 | 888 | ||
889 | Dprintk("Setting warm reset code and vector.\n"); | 889 | pr_debug("Setting warm reset code and vector.\n"); |
890 | 890 | ||
891 | store_NMI_vector(&nmi_high, &nmi_low); | 891 | store_NMI_vector(&nmi_high, &nmi_low); |
892 | 892 | ||
@@ -907,9 +907,9 @@ do_rest: | |||
907 | /* | 907 | /* |
908 | * allow APs to start initializing. | 908 | * allow APs to start initializing. |
909 | */ | 909 | */ |
910 | Dprintk("Before Callout %d.\n", cpu); | 910 | pr_debug("Before Callout %d.\n", cpu); |
911 | cpu_set(cpu, cpu_callout_map); | 911 | cpu_set(cpu, cpu_callout_map); |
912 | Dprintk("After Callout %d.\n", cpu); | 912 | pr_debug("After Callout %d.\n", cpu); |
913 | 913 | ||
914 | /* | 914 | /* |
915 | * Wait 5s total for a response | 915 | * Wait 5s total for a response |
@@ -922,10 +922,10 @@ do_rest: | |||
922 | 922 | ||
923 | if (cpu_isset(cpu, cpu_callin_map)) { | 923 | if (cpu_isset(cpu, cpu_callin_map)) { |
924 | /* number CPUs logically, starting from 1 (BSP is 0) */ | 924 | /* number CPUs logically, starting from 1 (BSP is 0) */ |
925 | Dprintk("OK.\n"); | 925 | pr_debug("OK.\n"); |
926 | printk(KERN_INFO "CPU%d: ", cpu); | 926 | printk(KERN_INFO "CPU%d: ", cpu); |
927 | print_cpu_info(&cpu_data(cpu)); | 927 | print_cpu_info(&cpu_data(cpu)); |
928 | Dprintk("CPU has booted.\n"); | 928 | pr_debug("CPU has booted.\n"); |
929 | } else { | 929 | } else { |
930 | boot_error = 1; | 930 | boot_error = 1; |
931 | if (*((volatile unsigned char *)trampoline_base) | 931 | if (*((volatile unsigned char *)trampoline_base) |
@@ -970,7 +970,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
970 | 970 | ||
971 | WARN_ON(irqs_disabled()); | 971 | WARN_ON(irqs_disabled()); |
972 | 972 | ||
973 | Dprintk("++++++++++++++++++++=_---CPU UP %u\n", cpu); | 973 | pr_debug("++++++++++++++++++++=_---CPU UP %u\n", cpu); |
974 | 974 | ||
975 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || | 975 | if (apicid == BAD_APICID || apicid == boot_cpu_physical_apicid || |
976 | !physid_isset(apicid, phys_cpu_present_map)) { | 976 | !physid_isset(apicid, phys_cpu_present_map)) { |
@@ -982,7 +982,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
982 | * Already booted CPU? | 982 | * Already booted CPU? |
983 | */ | 983 | */ |
984 | if (cpu_isset(cpu, cpu_callin_map)) { | 984 | if (cpu_isset(cpu, cpu_callin_map)) { |
985 | Dprintk("do_boot_cpu %d Already started\n", cpu); | 985 | pr_debug("do_boot_cpu %d Already started\n", cpu); |
986 | return -ENOSYS; | 986 | return -ENOSYS; |
987 | } | 987 | } |
988 | 988 | ||
@@ -1009,7 +1009,7 @@ int __cpuinit native_cpu_up(unsigned int cpu) | |||
1009 | err = do_boot_cpu(apicid, cpu); | 1009 | err = do_boot_cpu(apicid, cpu); |
1010 | #endif | 1010 | #endif |
1011 | if (err) { | 1011 | if (err) { |
1012 | Dprintk("do_boot_cpu failed %d\n", err); | 1012 | pr_debug("do_boot_cpu failed %d\n", err); |
1013 | return -EIO; | 1013 | return -EIO; |
1014 | } | 1014 | } |
1015 | 1015 | ||
@@ -1055,6 +1055,34 @@ static __init void disable_smp(void) | |||
1055 | static int __init smp_sanity_check(unsigned max_cpus) | 1055 | static int __init smp_sanity_check(unsigned max_cpus) |
1056 | { | 1056 | { |
1057 | preempt_disable(); | 1057 | preempt_disable(); |
1058 | |||
1059 | #if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) | ||
1060 | if (def_to_bigsmp && nr_cpu_ids > 8) { | ||
1061 | unsigned int cpu; | ||
1062 | unsigned nr; | ||
1063 | |||
1064 | printk(KERN_WARNING | ||
1065 | "More than 8 CPUs detected - skipping them.\n" | ||
1066 | "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); | ||
1067 | |||
1068 | nr = 0; | ||
1069 | for_each_present_cpu(cpu) { | ||
1070 | if (nr >= 8) | ||
1071 | cpu_clear(cpu, cpu_present_map); | ||
1072 | nr++; | ||
1073 | } | ||
1074 | |||
1075 | nr = 0; | ||
1076 | for_each_possible_cpu(cpu) { | ||
1077 | if (nr >= 8) | ||
1078 | cpu_clear(cpu, cpu_possible_map); | ||
1079 | nr++; | ||
1080 | } | ||
1081 | |||
1082 | nr_cpu_ids = 8; | ||
1083 | } | ||
1084 | #endif | ||
1085 | |||
1058 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { | 1086 | if (!physid_isset(hard_smp_processor_id(), phys_cpu_present_map)) { |
1059 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" | 1087 | printk(KERN_WARNING "weird, boot CPU (#%d) not listed" |
1060 | "by the BIOS.\n", hard_smp_processor_id()); | 1088 | "by the BIOS.\n", hard_smp_processor_id()); |
@@ -1193,6 +1221,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1193 | printk(KERN_INFO "CPU%d: ", 0); | 1221 | printk(KERN_INFO "CPU%d: ", 0); |
1194 | print_cpu_info(&cpu_data(0)); | 1222 | print_cpu_info(&cpu_data(0)); |
1195 | setup_boot_clock(); | 1223 | setup_boot_clock(); |
1224 | |||
1225 | if (is_uv_system()) | ||
1226 | uv_system_init(); | ||
1196 | out: | 1227 | out: |
1197 | preempt_enable(); | 1228 | preempt_enable(); |
1198 | } | 1229 | } |
@@ -1213,7 +1244,7 @@ void __init native_smp_prepare_boot_cpu(void) | |||
1213 | 1244 | ||
1214 | void __init native_smp_cpus_done(unsigned int max_cpus) | 1245 | void __init native_smp_cpus_done(unsigned int max_cpus) |
1215 | { | 1246 | { |
1216 | Dprintk("Boot done.\n"); | 1247 | pr_debug("Boot done.\n"); |
1217 | 1248 | ||
1218 | impress_friends(); | 1249 | impress_friends(); |
1219 | smp_checks(); | 1250 | smp_checks(); |
@@ -1230,7 +1261,7 @@ static void remove_siblinginfo(int cpu) | |||
1230 | int sibling; | 1261 | int sibling; |
1231 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 1262 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
1232 | 1263 | ||
1233 | for_each_cpu_mask(sibling, per_cpu(cpu_core_map, cpu)) { | 1264 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_core_map, cpu)) { |
1234 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); | 1265 | cpu_clear(cpu, per_cpu(cpu_core_map, sibling)); |
1235 | /*/ | 1266 | /*/ |
1236 | * last thread sibling in this cpu core going down | 1267 | * last thread sibling in this cpu core going down |
@@ -1239,7 +1270,7 @@ static void remove_siblinginfo(int cpu) | |||
1239 | cpu_data(sibling).booted_cores--; | 1270 | cpu_data(sibling).booted_cores--; |
1240 | } | 1271 | } |
1241 | 1272 | ||
1242 | for_each_cpu_mask(sibling, per_cpu(cpu_sibling_map, cpu)) | 1273 | for_each_cpu_mask_nr(sibling, per_cpu(cpu_sibling_map, cpu)) |
1243 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); | 1274 | cpu_clear(cpu, per_cpu(cpu_sibling_map, sibling)); |
1244 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | 1275 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); |
1245 | cpus_clear(per_cpu(cpu_core_map, cpu)); | 1276 | cpus_clear(per_cpu(cpu_core_map, cpu)); |
@@ -1311,7 +1342,7 @@ static void __ref remove_cpu_from_maps(int cpu) | |||
1311 | cpu_clear(cpu, cpu_callout_map); | 1342 | cpu_clear(cpu, cpu_callout_map); |
1312 | cpu_clear(cpu, cpu_callin_map); | 1343 | cpu_clear(cpu, cpu_callin_map); |
1313 | /* was set by cpu_init() */ | 1344 | /* was set by cpu_init() */ |
1314 | clear_bit(cpu, (unsigned long *)&cpu_initialized); | 1345 | cpu_clear(cpu, cpu_initialized); |
1315 | numa_remove_cpu(cpu); | 1346 | numa_remove_cpu(cpu); |
1316 | } | 1347 | } |
1317 | 1348 | ||
@@ -1347,7 +1378,9 @@ int __cpu_disable(void) | |||
1347 | remove_siblinginfo(cpu); | 1378 | remove_siblinginfo(cpu); |
1348 | 1379 | ||
1349 | /* It's now safe to remove this processor from the online map */ | 1380 | /* It's now safe to remove this processor from the online map */ |
1381 | lock_vector_lock(); | ||
1350 | remove_cpu_from_maps(cpu); | 1382 | remove_cpu_from_maps(cpu); |
1383 | unlock_vector_lock(); | ||
1351 | fixup_irqs(cpu_online_map); | 1384 | fixup_irqs(cpu_online_map); |
1352 | return 0; | 1385 | return 0; |
1353 | } | 1386 | } |
@@ -1381,16 +1414,3 @@ void __cpu_die(unsigned int cpu) | |||
1381 | BUG(); | 1414 | BUG(); |
1382 | } | 1415 | } |
1383 | #endif | 1416 | #endif |
1384 | |||
1385 | /* | ||
1386 | * If the BIOS enumerates physical processors before logical, | ||
1387 | * maxcpus=N at enumeration-time can be used to disable HT. | ||
1388 | */ | ||
1389 | static int __init parse_maxcpus(char *arg) | ||
1390 | { | ||
1391 | extern unsigned int maxcpus; | ||
1392 | |||
1393 | maxcpus = simple_strtoul(arg, NULL, 0); | ||
1394 | return 0; | ||
1395 | } | ||
1396 | early_param("maxcpus", parse_maxcpus); | ||
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c index 99941b37eca0..397e309839dd 100644 --- a/arch/x86/kernel/smpcommon.c +++ b/arch/x86/kernel/smpcommon.c | |||
@@ -8,18 +8,21 @@ | |||
8 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | 8 | DEFINE_PER_CPU(unsigned long, this_cpu_off); |
9 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | 9 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); |
10 | 10 | ||
11 | /* Initialize the CPU's GDT. This is either the boot CPU doing itself | 11 | /* |
12 | (still using the master per-cpu area), or a CPU doing it for a | 12 | * Initialize the CPU's GDT. This is either the boot CPU doing itself |
13 | secondary which will soon come up. */ | 13 | * (still using the master per-cpu area), or a CPU doing it for a |
14 | * secondary which will soon come up. | ||
15 | */ | ||
14 | __cpuinit void init_gdt(int cpu) | 16 | __cpuinit void init_gdt(int cpu) |
15 | { | 17 | { |
16 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | 18 | struct desc_struct gdt; |
17 | 19 | ||
18 | pack_descriptor(&gdt[GDT_ENTRY_PERCPU], | 20 | pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF, |
19 | __per_cpu_offset[cpu], 0xFFFFF, | ||
20 | 0x2 | DESCTYPE_S, 0x8); | 21 | 0x2 | DESCTYPE_S, 0x8); |
22 | gdt.s = 1; | ||
21 | 23 | ||
22 | gdt[GDT_ENTRY_PERCPU].s = 1; | 24 | write_gdt_entry(get_cpu_gdt_table(cpu), |
25 | GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); | ||
23 | 26 | ||
24 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | 27 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; |
25 | per_cpu(cpu_number, cpu) = cpu; | 28 | per_cpu(cpu_number, cpu) = cpu; |
diff --git a/arch/x86/kernel/smpcommon_32.c b/arch/x86/kernel/smpcommon_32.c deleted file mode 100644 index 8b137891791f..000000000000 --- a/arch/x86/kernel/smpcommon_32.c +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | |||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 92c20fee6781..e8b9863ef8c4 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -105,6 +105,20 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) | |||
105 | static int enable_single_step(struct task_struct *child) | 105 | static int enable_single_step(struct task_struct *child) |
106 | { | 106 | { |
107 | struct pt_regs *regs = task_pt_regs(child); | 107 | struct pt_regs *regs = task_pt_regs(child); |
108 | unsigned long oflags; | ||
109 | |||
110 | /* | ||
111 | * If we stepped into a sysenter/syscall insn, it trapped in | ||
112 | * kernel mode; do_debug() cleared TF and set TIF_SINGLESTEP. | ||
113 | * If user-mode had set TF itself, then it's still clear from | ||
114 | * do_debug() and we need to set it again to restore the user | ||
115 | * state so we don't wrongly set TIF_FORCED_TF below. | ||
116 | * If enable_single_step() was used last and that is what | ||
117 | * set TIF_SINGLESTEP, then both TF and TIF_FORCED_TF are | ||
118 | * already set and our bookkeeping is fine. | ||
119 | */ | ||
120 | if (unlikely(test_tsk_thread_flag(child, TIF_SINGLESTEP))) | ||
121 | regs->flags |= X86_EFLAGS_TF; | ||
108 | 122 | ||
109 | /* | 123 | /* |
110 | * Always set TIF_SINGLESTEP - this guarantees that | 124 | * Always set TIF_SINGLESTEP - this guarantees that |
@@ -113,11 +127,7 @@ static int enable_single_step(struct task_struct *child) | |||
113 | */ | 127 | */ |
114 | set_tsk_thread_flag(child, TIF_SINGLESTEP); | 128 | set_tsk_thread_flag(child, TIF_SINGLESTEP); |
115 | 129 | ||
116 | /* | 130 | oflags = regs->flags; |
117 | * If TF was already set, don't do anything else | ||
118 | */ | ||
119 | if (regs->flags & X86_EFLAGS_TF) | ||
120 | return 0; | ||
121 | 131 | ||
122 | /* Set TF on the kernel stack.. */ | 132 | /* Set TF on the kernel stack.. */ |
123 | regs->flags |= X86_EFLAGS_TF; | 133 | regs->flags |= X86_EFLAGS_TF; |
@@ -126,9 +136,22 @@ static int enable_single_step(struct task_struct *child) | |||
126 | * ..but if TF is changed by the instruction we will trace, | 136 | * ..but if TF is changed by the instruction we will trace, |
127 | * don't mark it as being "us" that set it, so that we | 137 | * don't mark it as being "us" that set it, so that we |
128 | * won't clear it by hand later. | 138 | * won't clear it by hand later. |
139 | * | ||
140 | * Note that if we don't actually execute the popf because | ||
141 | * of a signal arriving right now or suchlike, we will lose | ||
142 | * track of the fact that it really was "us" that set it. | ||
129 | */ | 143 | */ |
130 | if (is_setting_trap_flag(child, regs)) | 144 | if (is_setting_trap_flag(child, regs)) { |
145 | clear_tsk_thread_flag(child, TIF_FORCED_TF); | ||
131 | return 0; | 146 | return 0; |
147 | } | ||
148 | |||
149 | /* | ||
150 | * If TF was already set, check whether it was us who set it. | ||
151 | * If not, we should never attempt a block step. | ||
152 | */ | ||
153 | if (oflags & X86_EFLAGS_TF) | ||
154 | return test_tsk_thread_flag(child, TIF_FORCED_TF); | ||
132 | 155 | ||
133 | set_tsk_thread_flag(child, TIF_FORCED_TF); | 156 | set_tsk_thread_flag(child, TIF_FORCED_TF); |
134 | 157 | ||
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S index adff5562f5fd..d44395ff34c3 100644 --- a/arch/x86/kernel/syscall_table_32.S +++ b/arch/x86/kernel/syscall_table_32.S | |||
@@ -326,3 +326,9 @@ ENTRY(sys_call_table) | |||
326 | .long sys_fallocate | 326 | .long sys_fallocate |
327 | .long sys_timerfd_settime /* 325 */ | 327 | .long sys_timerfd_settime /* 325 */ |
328 | .long sys_timerfd_gettime | 328 | .long sys_timerfd_gettime |
329 | .long sys_signalfd4 | ||
330 | .long sys_eventfd2 | ||
331 | .long sys_epoll_create1 | ||
332 | .long sys_dup3 /* 330 */ | ||
333 | .long sys_pipe2 | ||
334 | .long sys_inotify_init1 | ||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 059ca6ee59b4..ffe3c664afc0 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c | |||
@@ -129,6 +129,7 @@ void __init hpet_time_init(void) | |||
129 | */ | 129 | */ |
130 | void __init time_init(void) | 130 | void __init time_init(void) |
131 | { | 131 | { |
132 | pre_time_init_hook(); | ||
132 | tsc_init(); | 133 | tsc_init(); |
133 | late_time_init = choose_time_init(); | 134 | late_time_init = choose_time_init(); |
134 | } | 135 | } |
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index d0fbb7712ab0..8b8c0d6640fa 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <asm/genapic.h> | 17 | #include <asm/genapic.h> |
18 | #include <asm/idle.h> | 18 | #include <asm/idle.h> |
19 | #include <asm/tsc.h> | 19 | #include <asm/tsc.h> |
20 | #include <asm/irq_vectors.h> | ||
20 | 21 | ||
21 | #include <mach_apic.h> | 22 | #include <mach_apic.h> |
22 | 23 | ||
@@ -783,7 +784,7 @@ static int __init uv_bau_init(void) | |||
783 | uv_init_blade(blade, node, cur_cpu); | 784 | uv_init_blade(blade, node, cur_cpu); |
784 | cur_cpu += uv_blade_nr_possible_cpus(blade); | 785 | cur_cpu += uv_blade_nr_possible_cpus(blade); |
785 | } | 786 | } |
786 | set_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); | 787 | alloc_intr_gate(UV_BAU_MESSAGE, uv_bau_message_intr1); |
787 | uv_enable_timeouts(); | 788 | uv_enable_timeouts(); |
788 | 789 | ||
789 | return 0; | 790 | return 0; |
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 8a768973c4f0..03df8e45e5a1 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c | |||
@@ -58,6 +58,7 @@ | |||
58 | #include <asm/nmi.h> | 58 | #include <asm/nmi.h> |
59 | #include <asm/smp.h> | 59 | #include <asm/smp.h> |
60 | #include <asm/io.h> | 60 | #include <asm/io.h> |
61 | #include <asm/traps.h> | ||
61 | 62 | ||
62 | #include "mach_traps.h" | 63 | #include "mach_traps.h" |
63 | 64 | ||
@@ -77,26 +78,6 @@ char ignore_fpu_irq; | |||
77 | gate_desc idt_table[256] | 78 | gate_desc idt_table[256] |
78 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 79 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
79 | 80 | ||
80 | asmlinkage void divide_error(void); | ||
81 | asmlinkage void debug(void); | ||
82 | asmlinkage void nmi(void); | ||
83 | asmlinkage void int3(void); | ||
84 | asmlinkage void overflow(void); | ||
85 | asmlinkage void bounds(void); | ||
86 | asmlinkage void invalid_op(void); | ||
87 | asmlinkage void device_not_available(void); | ||
88 | asmlinkage void coprocessor_segment_overrun(void); | ||
89 | asmlinkage void invalid_TSS(void); | ||
90 | asmlinkage void segment_not_present(void); | ||
91 | asmlinkage void stack_segment(void); | ||
92 | asmlinkage void general_protection(void); | ||
93 | asmlinkage void page_fault(void); | ||
94 | asmlinkage void coprocessor_error(void); | ||
95 | asmlinkage void simd_coprocessor_error(void); | ||
96 | asmlinkage void alignment_check(void); | ||
97 | asmlinkage void spurious_interrupt_bug(void); | ||
98 | asmlinkage void machine_check(void); | ||
99 | |||
100 | int panic_on_unrecovered_nmi; | 81 | int panic_on_unrecovered_nmi; |
101 | int kstack_depth_to_print = 24; | 82 | int kstack_depth_to_print = 24; |
102 | static unsigned int code_bytes = 64; | 83 | static unsigned int code_bytes = 64; |
@@ -256,7 +237,7 @@ static const struct stacktrace_ops print_trace_ops = { | |||
256 | 237 | ||
257 | static void | 238 | static void |
258 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 239 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
259 | unsigned long *stack, unsigned long bp, char *log_lvl) | 240 | unsigned long *stack, unsigned long bp, char *log_lvl) |
260 | { | 241 | { |
261 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); | 242 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
262 | printk("%s =======================\n", log_lvl); | 243 | printk("%s =======================\n", log_lvl); |
@@ -383,6 +364,54 @@ int is_valid_bugaddr(unsigned long ip) | |||
383 | return ud2 == 0x0b0f; | 364 | return ud2 == 0x0b0f; |
384 | } | 365 | } |
385 | 366 | ||
367 | static raw_spinlock_t die_lock = __RAW_SPIN_LOCK_UNLOCKED; | ||
368 | static int die_owner = -1; | ||
369 | static unsigned int die_nest_count; | ||
370 | |||
371 | unsigned __kprobes long oops_begin(void) | ||
372 | { | ||
373 | unsigned long flags; | ||
374 | |||
375 | oops_enter(); | ||
376 | |||
377 | if (die_owner != raw_smp_processor_id()) { | ||
378 | console_verbose(); | ||
379 | raw_local_irq_save(flags); | ||
380 | __raw_spin_lock(&die_lock); | ||
381 | die_owner = smp_processor_id(); | ||
382 | die_nest_count = 0; | ||
383 | bust_spinlocks(1); | ||
384 | } else { | ||
385 | raw_local_irq_save(flags); | ||
386 | } | ||
387 | die_nest_count++; | ||
388 | return flags; | ||
389 | } | ||
390 | |||
391 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | ||
392 | { | ||
393 | bust_spinlocks(0); | ||
394 | die_owner = -1; | ||
395 | add_taint(TAINT_DIE); | ||
396 | __raw_spin_unlock(&die_lock); | ||
397 | raw_local_irq_restore(flags); | ||
398 | |||
399 | if (!regs) | ||
400 | return; | ||
401 | |||
402 | if (kexec_should_crash(current)) | ||
403 | crash_kexec(regs); | ||
404 | |||
405 | if (in_interrupt()) | ||
406 | panic("Fatal exception in interrupt"); | ||
407 | |||
408 | if (panic_on_oops) | ||
409 | panic("Fatal exception"); | ||
410 | |||
411 | oops_exit(); | ||
412 | do_exit(signr); | ||
413 | } | ||
414 | |||
386 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) | 415 | int __kprobes __die(const char *str, struct pt_regs *regs, long err) |
387 | { | 416 | { |
388 | unsigned short ss; | 417 | unsigned short ss; |
@@ -423,31 +452,9 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err) | |||
423 | */ | 452 | */ |
424 | void die(const char *str, struct pt_regs *regs, long err) | 453 | void die(const char *str, struct pt_regs *regs, long err) |
425 | { | 454 | { |
426 | static struct { | 455 | unsigned long flags = oops_begin(); |
427 | raw_spinlock_t lock; | ||
428 | u32 lock_owner; | ||
429 | int lock_owner_depth; | ||
430 | } die = { | ||
431 | .lock = __RAW_SPIN_LOCK_UNLOCKED, | ||
432 | .lock_owner = -1, | ||
433 | .lock_owner_depth = 0 | ||
434 | }; | ||
435 | unsigned long flags; | ||
436 | |||
437 | oops_enter(); | ||
438 | |||
439 | if (die.lock_owner != raw_smp_processor_id()) { | ||
440 | console_verbose(); | ||
441 | raw_local_irq_save(flags); | ||
442 | __raw_spin_lock(&die.lock); | ||
443 | die.lock_owner = smp_processor_id(); | ||
444 | die.lock_owner_depth = 0; | ||
445 | bust_spinlocks(1); | ||
446 | } else { | ||
447 | raw_local_irq_save(flags); | ||
448 | } | ||
449 | 456 | ||
450 | if (++die.lock_owner_depth < 3) { | 457 | if (die_nest_count < 3) { |
451 | report_bug(regs->ip, regs); | 458 | report_bug(regs->ip, regs); |
452 | 459 | ||
453 | if (__die(str, regs, err)) | 460 | if (__die(str, regs, err)) |
@@ -456,26 +463,7 @@ void die(const char *str, struct pt_regs *regs, long err) | |||
456 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); | 463 | printk(KERN_EMERG "Recursive die() failure, output suppressed\n"); |
457 | } | 464 | } |
458 | 465 | ||
459 | bust_spinlocks(0); | 466 | oops_end(flags, regs, SIGSEGV); |
460 | die.lock_owner = -1; | ||
461 | add_taint(TAINT_DIE); | ||
462 | __raw_spin_unlock(&die.lock); | ||
463 | raw_local_irq_restore(flags); | ||
464 | |||
465 | if (!regs) | ||
466 | return; | ||
467 | |||
468 | if (kexec_should_crash(current)) | ||
469 | crash_kexec(regs); | ||
470 | |||
471 | if (in_interrupt()) | ||
472 | panic("Fatal exception in interrupt"); | ||
473 | |||
474 | if (panic_on_oops) | ||
475 | panic("Fatal exception"); | ||
476 | |||
477 | oops_exit(); | ||
478 | do_exit(SIGSEGV); | ||
479 | } | 467 | } |
480 | 468 | ||
481 | static inline void | 469 | static inline void |
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 2696a6837782..513caaca7115 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c | |||
@@ -51,30 +51,10 @@ | |||
51 | #include <asm/pgalloc.h> | 51 | #include <asm/pgalloc.h> |
52 | #include <asm/proto.h> | 52 | #include <asm/proto.h> |
53 | #include <asm/pda.h> | 53 | #include <asm/pda.h> |
54 | #include <asm/traps.h> | ||
54 | 55 | ||
55 | #include <mach_traps.h> | 56 | #include <mach_traps.h> |
56 | 57 | ||
57 | asmlinkage void divide_error(void); | ||
58 | asmlinkage void debug(void); | ||
59 | asmlinkage void nmi(void); | ||
60 | asmlinkage void int3(void); | ||
61 | asmlinkage void overflow(void); | ||
62 | asmlinkage void bounds(void); | ||
63 | asmlinkage void invalid_op(void); | ||
64 | asmlinkage void device_not_available(void); | ||
65 | asmlinkage void double_fault(void); | ||
66 | asmlinkage void coprocessor_segment_overrun(void); | ||
67 | asmlinkage void invalid_TSS(void); | ||
68 | asmlinkage void segment_not_present(void); | ||
69 | asmlinkage void stack_segment(void); | ||
70 | asmlinkage void general_protection(void); | ||
71 | asmlinkage void page_fault(void); | ||
72 | asmlinkage void coprocessor_error(void); | ||
73 | asmlinkage void simd_coprocessor_error(void); | ||
74 | asmlinkage void alignment_check(void); | ||
75 | asmlinkage void spurious_interrupt_bug(void); | ||
76 | asmlinkage void machine_check(void); | ||
77 | |||
78 | int panic_on_unrecovered_nmi; | 58 | int panic_on_unrecovered_nmi; |
79 | int kstack_depth_to_print = 12; | 59 | int kstack_depth_to_print = 12; |
80 | static unsigned int code_bytes = 64; | 60 | static unsigned int code_bytes = 64; |
@@ -355,17 +335,24 @@ static const struct stacktrace_ops print_trace_ops = { | |||
355 | .address = print_trace_address, | 335 | .address = print_trace_address, |
356 | }; | 336 | }; |
357 | 337 | ||
358 | void show_trace(struct task_struct *task, struct pt_regs *regs, | 338 | static void |
359 | unsigned long *stack, unsigned long bp) | 339 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, |
340 | unsigned long *stack, unsigned long bp, char *log_lvl) | ||
360 | { | 341 | { |
361 | printk("\nCall Trace:\n"); | 342 | printk("\nCall Trace:\n"); |
362 | dump_trace(task, regs, stack, bp, &print_trace_ops, NULL); | 343 | dump_trace(task, regs, stack, bp, &print_trace_ops, log_lvl); |
363 | printk("\n"); | 344 | printk("\n"); |
364 | } | 345 | } |
365 | 346 | ||
347 | void show_trace(struct task_struct *task, struct pt_regs *regs, | ||
348 | unsigned long *stack, unsigned long bp) | ||
349 | { | ||
350 | show_trace_log_lvl(task, regs, stack, bp, ""); | ||
351 | } | ||
352 | |||
366 | static void | 353 | static void |
367 | _show_stack(struct task_struct *task, struct pt_regs *regs, | 354 | show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs, |
368 | unsigned long *sp, unsigned long bp) | 355 | unsigned long *sp, unsigned long bp, char *log_lvl) |
369 | { | 356 | { |
370 | unsigned long *stack; | 357 | unsigned long *stack; |
371 | int i; | 358 | int i; |
@@ -399,12 +386,12 @@ _show_stack(struct task_struct *task, struct pt_regs *regs, | |||
399 | printk(" %016lx", *stack++); | 386 | printk(" %016lx", *stack++); |
400 | touch_nmi_watchdog(); | 387 | touch_nmi_watchdog(); |
401 | } | 388 | } |
402 | show_trace(task, regs, sp, bp); | 389 | show_trace_log_lvl(task, regs, sp, bp, log_lvl); |
403 | } | 390 | } |
404 | 391 | ||
405 | void show_stack(struct task_struct *task, unsigned long *sp) | 392 | void show_stack(struct task_struct *task, unsigned long *sp) |
406 | { | 393 | { |
407 | _show_stack(task, NULL, sp, 0); | 394 | show_stack_log_lvl(task, NULL, sp, 0, ""); |
408 | } | 395 | } |
409 | 396 | ||
410 | /* | 397 | /* |
@@ -454,7 +441,8 @@ void show_registers(struct pt_regs *regs) | |||
454 | u8 *ip; | 441 | u8 *ip; |
455 | 442 | ||
456 | printk("Stack: "); | 443 | printk("Stack: "); |
457 | _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); | 444 | show_stack_log_lvl(NULL, regs, (unsigned long *)sp, |
445 | regs->bp, ""); | ||
458 | printk("\n"); | 446 | printk("\n"); |
459 | 447 | ||
460 | printk(KERN_EMERG "Code: "); | 448 | printk(KERN_EMERG "Code: "); |
@@ -518,7 +506,7 @@ unsigned __kprobes long oops_begin(void) | |||
518 | } | 506 | } |
519 | 507 | ||
520 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) | 508 | void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr) |
521 | { | 509 | { |
522 | die_owner = -1; | 510 | die_owner = -1; |
523 | bust_spinlocks(0); | 511 | bust_spinlocks(0); |
524 | die_nest_count--; | 512 | die_nest_count--; |
@@ -1143,7 +1131,14 @@ asmlinkage void math_state_restore(void) | |||
1143 | } | 1131 | } |
1144 | 1132 | ||
1145 | clts(); /* Allow maths ops (or we recurse) */ | 1133 | clts(); /* Allow maths ops (or we recurse) */ |
1146 | restore_fpu_checking(&me->thread.xstate->fxsave); | 1134 | /* |
1135 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
1136 | */ | ||
1137 | if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { | ||
1138 | stts(); | ||
1139 | force_sig(SIGSEGV, me); | ||
1140 | return; | ||
1141 | } | ||
1147 | task_thread_info(me)->status |= TS_USEDFPU; | 1142 | task_thread_info(me)->status |= TS_USEDFPU; |
1148 | me->fpu_counter++; | 1143 | me->fpu_counter++; |
1149 | } | 1144 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 7603c0553909..8f98e9de1b82 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -104,7 +104,7 @@ __setup("notsc", notsc_setup); | |||
104 | /* | 104 | /* |
105 | * Read TSC and the reference counters. Take care of SMI disturbance | 105 | * Read TSC and the reference counters. Take care of SMI disturbance |
106 | */ | 106 | */ |
107 | static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | 107 | static u64 tsc_read_refs(u64 *pm, u64 *hpet) |
108 | { | 108 | { |
109 | u64 t1, t2; | 109 | u64 t1, t2; |
110 | int i; | 110 | int i; |
@@ -122,80 +122,216 @@ static u64 __init tsc_read_refs(u64 *pm, u64 *hpet) | |||
122 | return ULLONG_MAX; | 122 | return ULLONG_MAX; |
123 | } | 123 | } |
124 | 124 | ||
125 | /** | 125 | /* |
126 | * native_calibrate_tsc - calibrate the tsc on boot | 126 | * Try to calibrate the TSC against the Programmable |
127 | * Interrupt Timer and return the frequency of the TSC | ||
128 | * in kHz. | ||
129 | * | ||
130 | * Return ULONG_MAX on failure to calibrate. | ||
127 | */ | 131 | */ |
128 | unsigned long native_calibrate_tsc(void) | 132 | static unsigned long pit_calibrate_tsc(void) |
129 | { | 133 | { |
130 | unsigned long flags; | 134 | u64 tsc, t1, t2, delta; |
131 | u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2; | 135 | unsigned long tscmin, tscmax; |
132 | int hpet = is_hpet_enabled(); | 136 | int pitcnt; |
133 | unsigned int tsc_khz_val = 0; | ||
134 | |||
135 | local_irq_save(flags); | ||
136 | |||
137 | tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); | ||
138 | 137 | ||
138 | /* Set the Gate high, disable speaker */ | ||
139 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); | 139 | outb((inb(0x61) & ~0x02) | 0x01, 0x61); |
140 | 140 | ||
141 | /* | ||
142 | * Setup CTC channel 2* for mode 0, (interrupt on terminal | ||
143 | * count mode), binary count. Set the latch register to 50ms | ||
144 | * (LSB then MSB) to begin countdown. | ||
145 | */ | ||
141 | outb(0xb0, 0x43); | 146 | outb(0xb0, 0x43); |
142 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | 147 | outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42); |
143 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); | 148 | outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42); |
144 | tr1 = get_cycles(); | ||
145 | while ((inb(0x61) & 0x20) == 0); | ||
146 | tr2 = get_cycles(); | ||
147 | 149 | ||
148 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | 150 | tsc = t1 = t2 = get_cycles(); |
149 | 151 | ||
150 | local_irq_restore(flags); | 152 | pitcnt = 0; |
153 | tscmax = 0; | ||
154 | tscmin = ULONG_MAX; | ||
155 | while ((inb(0x61) & 0x20) == 0) { | ||
156 | t2 = get_cycles(); | ||
157 | delta = t2 - tsc; | ||
158 | tsc = t2; | ||
159 | if ((unsigned long) delta < tscmin) | ||
160 | tscmin = (unsigned int) delta; | ||
161 | if ((unsigned long) delta > tscmax) | ||
162 | tscmax = (unsigned int) delta; | ||
163 | pitcnt++; | ||
164 | } | ||
151 | 165 | ||
152 | /* | 166 | /* |
153 | * Preset the result with the raw and inaccurate PIT | 167 | * Sanity checks: |
154 | * calibration value | 168 | * |
169 | * If we were not able to read the PIT more than 5000 | ||
170 | * times, then we have been hit by a massive SMI | ||
171 | * | ||
172 | * If the maximum is 10 times larger than the minimum, | ||
173 | * then we got hit by an SMI as well. | ||
155 | */ | 174 | */ |
156 | delta = (tr2 - tr1); | 175 | if (pitcnt < 5000 || tscmax > 10 * tscmin) |
176 | return ULONG_MAX; | ||
177 | |||
178 | /* Calculate the PIT value */ | ||
179 | delta = t2 - t1; | ||
157 | do_div(delta, 50); | 180 | do_div(delta, 50); |
158 | tsc_khz_val = delta; | 181 | return delta; |
182 | } | ||
183 | |||
184 | |||
185 | /** | ||
186 | * native_calibrate_tsc - calibrate the tsc on boot | ||
187 | */ | ||
188 | unsigned long native_calibrate_tsc(void) | ||
189 | { | ||
190 | u64 tsc1, tsc2, delta, pm1, pm2, hpet1, hpet2; | ||
191 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | ||
192 | unsigned long flags; | ||
193 | int hpet = is_hpet_enabled(), i; | ||
194 | |||
195 | /* | ||
196 | * Run 5 calibration loops to get the lowest frequency value | ||
197 | * (the best estimate). We use two different calibration modes | ||
198 | * here: | ||
199 | * | ||
200 | * 1) PIT loop. We set the PIT Channel 2 to oneshot mode and | ||
201 | * load a timeout of 50ms. We read the time right after we | ||
202 | * started the timer and wait until the PIT count down reaches | ||
203 | * zero. In each wait loop iteration we read the TSC and check | ||
204 | * the delta to the previous read. We keep track of the min | ||
205 | * and max values of that delta. The delta is mostly defined | ||
206 | * by the IO time of the PIT access, so we can detect when a | ||
207 | * SMI/SMM disturbance happend between the two reads. If the | ||
208 | * maximum time is significantly larger than the minimum time, | ||
209 | * then we discard the result and have another try. | ||
210 | * | ||
211 | * 2) Reference counter. If available we use the HPET or the | ||
212 | * PMTIMER as a reference to check the sanity of that value. | ||
213 | * We use separate TSC readouts and check inside of the | ||
214 | * reference read for a SMI/SMM disturbance. We dicard | ||
215 | * disturbed values here as well. We do that around the PIT | ||
216 | * calibration delay loop as we have to wait for a certain | ||
217 | * amount of time anyway. | ||
218 | */ | ||
219 | for (i = 0; i < 5; i++) { | ||
220 | unsigned long tsc_pit_khz; | ||
221 | |||
222 | /* | ||
223 | * Read the start value and the reference count of | ||
224 | * hpet/pmtimer when available. Then do the PIT | ||
225 | * calibration, which will take at least 50ms, and | ||
226 | * read the end value. | ||
227 | */ | ||
228 | local_irq_save(flags); | ||
229 | tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL); | ||
230 | tsc_pit_khz = pit_calibrate_tsc(); | ||
231 | tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL); | ||
232 | local_irq_restore(flags); | ||
233 | |||
234 | /* Pick the lowest PIT TSC calibration so far */ | ||
235 | tsc_pit_min = min(tsc_pit_min, tsc_pit_khz); | ||
236 | |||
237 | /* hpet or pmtimer available ? */ | ||
238 | if (!hpet && !pm1 && !pm2) | ||
239 | continue; | ||
240 | |||
241 | /* Check, whether the sampling was disturbed by an SMI */ | ||
242 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) | ||
243 | continue; | ||
244 | |||
245 | tsc2 = (tsc2 - tsc1) * 1000000LL; | ||
246 | |||
247 | if (hpet) { | ||
248 | if (hpet2 < hpet1) | ||
249 | hpet2 += 0x100000000ULL; | ||
250 | hpet2 -= hpet1; | ||
251 | tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | ||
252 | do_div(tsc1, 1000000); | ||
253 | } else { | ||
254 | if (pm2 < pm1) | ||
255 | pm2 += (u64)ACPI_PM_OVRRUN; | ||
256 | pm2 -= pm1; | ||
257 | tsc1 = pm2 * 1000000000LL; | ||
258 | do_div(tsc1, PMTMR_TICKS_PER_SEC); | ||
259 | } | ||
260 | |||
261 | do_div(tsc2, tsc1); | ||
262 | tsc_ref_min = min(tsc_ref_min, (unsigned long) tsc2); | ||
263 | } | ||
264 | |||
265 | /* | ||
266 | * Now check the results. | ||
267 | */ | ||
268 | if (tsc_pit_min == ULONG_MAX) { | ||
269 | /* PIT gave no useful value */ | ||
270 | printk(KERN_WARNING "TSC: Unable to calibrate against PIT\n"); | ||
271 | |||
272 | /* We don't have an alternative source, disable TSC */ | ||
273 | if (!hpet && !pm1 && !pm2) { | ||
274 | printk("TSC: No reference (HPET/PMTIMER) available\n"); | ||
275 | return 0; | ||
276 | } | ||
277 | |||
278 | /* The alternative source failed as well, disable TSC */ | ||
279 | if (tsc_ref_min == ULONG_MAX) { | ||
280 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration " | ||
281 | "failed due to SMI disturbance.\n"); | ||
282 | return 0; | ||
283 | } | ||
284 | |||
285 | /* Use the alternative source */ | ||
286 | printk(KERN_INFO "TSC: using %s reference calibration\n", | ||
287 | hpet ? "HPET" : "PMTIMER"); | ||
288 | |||
289 | return tsc_ref_min; | ||
290 | } | ||
159 | 291 | ||
160 | /* hpet or pmtimer available ? */ | 292 | /* We don't have an alternative source, use the PIT calibration value */ |
161 | if (!hpet && !pm1 && !pm2) { | 293 | if (!hpet && !pm1 && !pm2) { |
162 | printk(KERN_INFO "TSC calibrated against PIT\n"); | 294 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); |
163 | goto out; | 295 | return tsc_pit_min; |
164 | } | 296 | } |
165 | 297 | ||
166 | /* Check, whether the sampling was disturbed by an SMI */ | 298 | /* The alternative source failed, use the PIT calibration value */ |
167 | if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) { | 299 | if (tsc_ref_min == ULONG_MAX) { |
168 | printk(KERN_WARNING "TSC calibration disturbed by SMI, " | 300 | printk(KERN_WARNING "TSC: HPET/PMTIMER calibration failed due " |
169 | "using PIT calibration result\n"); | 301 | "to SMI disturbance. Using PIT calibration\n"); |
170 | goto out; | 302 | return tsc_pit_min; |
171 | } | 303 | } |
172 | 304 | ||
173 | tsc2 = (tsc2 - tsc1) * 1000000LL; | 305 | /* Check the reference deviation */ |
174 | 306 | delta = ((u64) tsc_pit_min) * 100; | |
175 | if (hpet) { | 307 | do_div(delta, tsc_ref_min); |
176 | printk(KERN_INFO "TSC calibrated against HPET\n"); | 308 | |
177 | if (hpet2 < hpet1) | 309 | /* |
178 | hpet2 += 0x100000000ULL; | 310 | * If both calibration results are inside a 5% window, the we |
179 | hpet2 -= hpet1; | 311 | * use the lower frequency of those as it is probably the |
180 | tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD)); | 312 | * closest estimate. |
181 | do_div(tsc1, 1000000); | 313 | */ |
182 | } else { | 314 | if (delta >= 95 && delta <= 105) { |
183 | printk(KERN_INFO "TSC calibrated against PM_TIMER\n"); | 315 | printk(KERN_INFO "TSC: PIT calibration confirmed by %s.\n", |
184 | if (pm2 < pm1) | 316 | hpet ? "HPET" : "PMTIMER"); |
185 | pm2 += (u64)ACPI_PM_OVRRUN; | 317 | printk(KERN_INFO "TSC: using %s calibration value\n", |
186 | pm2 -= pm1; | 318 | tsc_pit_min <= tsc_ref_min ? "PIT" : |
187 | tsc1 = pm2 * 1000000000LL; | 319 | hpet ? "HPET" : "PMTIMER"); |
188 | do_div(tsc1, PMTMR_TICKS_PER_SEC); | 320 | return tsc_pit_min <= tsc_ref_min ? tsc_pit_min : tsc_ref_min; |
189 | } | 321 | } |
190 | 322 | ||
191 | do_div(tsc2, tsc1); | 323 | printk(KERN_WARNING "TSC: PIT calibration deviates from %s: %lu %lu.\n", |
192 | tsc_khz_val = tsc2; | 324 | hpet ? "HPET" : "PMTIMER", tsc_pit_min, tsc_ref_min); |
193 | 325 | ||
194 | out: | 326 | /* |
195 | return tsc_khz_val; | 327 | * The calibration values differ too much. In doubt, we use |
328 | * the PIT value as we know that there are PMTIMERs around | ||
329 | * running at double speed. | ||
330 | */ | ||
331 | printk(KERN_INFO "TSC: Using PIT calibration value\n"); | ||
332 | return tsc_pit_min; | ||
196 | } | 333 | } |
197 | 334 | ||
198 | |||
199 | #ifdef CONFIG_X86_32 | 335 | #ifdef CONFIG_X86_32 |
200 | /* Only called from the Powernow K7 cpu freq driver */ | 336 | /* Only called from the Powernow K7 cpu freq driver */ |
201 | int recalibrate_cpu_khz(void) | 337 | int recalibrate_cpu_khz(void) |
@@ -314,7 +450,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
314 | mark_tsc_unstable("cpufreq changes"); | 450 | mark_tsc_unstable("cpufreq changes"); |
315 | } | 451 | } |
316 | 452 | ||
317 | set_cyc2ns_scale(tsc_khz_ref, freq->cpu); | 453 | set_cyc2ns_scale(tsc_khz, freq->cpu); |
318 | 454 | ||
319 | return 0; | 455 | return 0; |
320 | } | 456 | } |
@@ -325,6 +461,10 @@ static struct notifier_block time_cpufreq_notifier_block = { | |||
325 | 461 | ||
326 | static int __init cpufreq_tsc(void) | 462 | static int __init cpufreq_tsc(void) |
327 | { | 463 | { |
464 | if (!cpu_has_tsc) | ||
465 | return 0; | ||
466 | if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) | ||
467 | return 0; | ||
328 | cpufreq_register_notifier(&time_cpufreq_notifier_block, | 468 | cpufreq_register_notifier(&time_cpufreq_notifier_block, |
329 | CPUFREQ_TRANSITION_NOTIFIER); | 469 | CPUFREQ_TRANSITION_NOTIFIER); |
330 | return 0; | 470 | return 0; |
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index 0577825cf89b..9ffb01c31c40 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -88,11 +88,9 @@ static __cpuinit void check_tsc_warp(void) | |||
88 | __raw_spin_unlock(&sync_lock); | 88 | __raw_spin_unlock(&sync_lock); |
89 | } | 89 | } |
90 | } | 90 | } |
91 | if (!(now-start)) { | 91 | WARN(!(now-start), |
92 | printk("Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", | 92 | "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n", |
93 | now-start, end-start); | 93 | now-start, end-start); |
94 | WARN_ON(1); | ||
95 | } | ||
96 | } | 94 | } |
97 | 95 | ||
98 | /* | 96 | /* |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index e94bdb6add1d..594ef47f0a63 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -73,7 +73,7 @@ int is_visws_box(void) | |||
73 | return visws_board_type >= 0; | 73 | return visws_board_type >= 0; |
74 | } | 74 | } |
75 | 75 | ||
76 | static int __init visws_time_init_quirk(void) | 76 | static int __init visws_time_init(void) |
77 | { | 77 | { |
78 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | 78 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); |
79 | 79 | ||
@@ -93,7 +93,7 @@ static int __init visws_time_init_quirk(void) | |||
93 | return 0; | 93 | return 0; |
94 | } | 94 | } |
95 | 95 | ||
96 | static int __init visws_pre_intr_init_quirk(void) | 96 | static int __init visws_pre_intr_init(void) |
97 | { | 97 | { |
98 | init_VISWS_APIC_irqs(); | 98 | init_VISWS_APIC_irqs(); |
99 | 99 | ||
@@ -114,7 +114,7 @@ EXPORT_SYMBOL(sgivwfb_mem_size); | |||
114 | 114 | ||
115 | long long mem_size __initdata = 0; | 115 | long long mem_size __initdata = 0; |
116 | 116 | ||
117 | static char * __init visws_memory_setup_quirk(void) | 117 | static char * __init visws_memory_setup(void) |
118 | { | 118 | { |
119 | long long gfx_mem_size = 8 * MB; | 119 | long long gfx_mem_size = 8 * MB; |
120 | 120 | ||
@@ -176,7 +176,7 @@ static void visws_machine_power_off(void) | |||
176 | outl(PIIX_SPECIAL_STOP, 0xCFC); | 176 | outl(PIIX_SPECIAL_STOP, 0xCFC); |
177 | } | 177 | } |
178 | 178 | ||
179 | static int __init visws_get_smp_config_quirk(unsigned int early) | 179 | static int __init visws_get_smp_config(unsigned int early) |
180 | { | 180 | { |
181 | /* | 181 | /* |
182 | * Prevent MP-table parsing by the generic code: | 182 | * Prevent MP-table parsing by the generic code: |
@@ -184,15 +184,13 @@ static int __init visws_get_smp_config_quirk(unsigned int early) | |||
184 | return 1; | 184 | return 1; |
185 | } | 185 | } |
186 | 186 | ||
187 | extern unsigned int __cpuinitdata maxcpus; | ||
188 | |||
189 | /* | 187 | /* |
190 | * The Visual Workstation is Intel MP compliant in the hardware | 188 | * The Visual Workstation is Intel MP compliant in the hardware |
191 | * sense, but it doesn't have a BIOS(-configuration table). | 189 | * sense, but it doesn't have a BIOS(-configuration table). |
192 | * No problem for Linux. | 190 | * No problem for Linux. |
193 | */ | 191 | */ |
194 | 192 | ||
195 | static void __init MP_processor_info (struct mpc_config_processor *m) | 193 | static void __init MP_processor_info(struct mpc_config_processor *m) |
196 | { | 194 | { |
197 | int ver, logical_apicid; | 195 | int ver, logical_apicid; |
198 | physid_mask_t apic_cpus; | 196 | physid_mask_t apic_cpus; |
@@ -232,7 +230,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) | |||
232 | apic_version[m->mpc_apicid] = ver; | 230 | apic_version[m->mpc_apicid] = ver; |
233 | } | 231 | } |
234 | 232 | ||
235 | int __init visws_find_smp_config_quirk(unsigned int reserve) | 233 | static int __init visws_find_smp_config(unsigned int reserve) |
236 | { | 234 | { |
237 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 235 | struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
238 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 236 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
@@ -244,8 +242,8 @@ int __init visws_find_smp_config_quirk(unsigned int reserve) | |||
244 | ncpus = CO_CPU_MAX; | 242 | ncpus = CO_CPU_MAX; |
245 | } | 243 | } |
246 | 244 | ||
247 | if (ncpus > maxcpus) | 245 | if (ncpus > setup_max_cpus) |
248 | ncpus = maxcpus; | 246 | ncpus = setup_max_cpus; |
249 | 247 | ||
250 | #ifdef CONFIG_X86_LOCAL_APIC | 248 | #ifdef CONFIG_X86_LOCAL_APIC |
251 | smp_found_config = 1; | 249 | smp_found_config = 1; |
@@ -258,7 +256,17 @@ int __init visws_find_smp_config_quirk(unsigned int reserve) | |||
258 | return 1; | 256 | return 1; |
259 | } | 257 | } |
260 | 258 | ||
261 | extern int visws_trap_init_quirk(void); | 259 | static int visws_trap_init(void); |
260 | |||
261 | static struct x86_quirks visws_x86_quirks __initdata = { | ||
262 | .arch_time_init = visws_time_init, | ||
263 | .arch_pre_intr_init = visws_pre_intr_init, | ||
264 | .arch_memory_setup = visws_memory_setup, | ||
265 | .arch_intr_init = NULL, | ||
266 | .arch_trap_init = visws_trap_init, | ||
267 | .mach_get_smp_config = visws_get_smp_config, | ||
268 | .mach_find_smp_config = visws_find_smp_config, | ||
269 | }; | ||
262 | 270 | ||
263 | void __init visws_early_detect(void) | 271 | void __init visws_early_detect(void) |
264 | { | 272 | { |
@@ -272,16 +280,10 @@ void __init visws_early_detect(void) | |||
272 | 280 | ||
273 | /* | 281 | /* |
274 | * Install special quirks for timer, interrupt and memory setup: | 282 | * Install special quirks for timer, interrupt and memory setup: |
275 | */ | ||
276 | arch_time_init_quirk = visws_time_init_quirk; | ||
277 | arch_pre_intr_init_quirk = visws_pre_intr_init_quirk; | ||
278 | arch_memory_setup_quirk = visws_memory_setup_quirk; | ||
279 | |||
280 | /* | ||
281 | * Fall back to generic behavior for traps: | 283 | * Fall back to generic behavior for traps: |
284 | * Override generic MP-table parsing: | ||
282 | */ | 285 | */ |
283 | arch_intr_init_quirk = NULL; | 286 | x86_quirks = &visws_x86_quirks; |
284 | arch_trap_init_quirk = visws_trap_init_quirk; | ||
285 | 287 | ||
286 | /* | 288 | /* |
287 | * Install reboot quirks: | 289 | * Install reboot quirks: |
@@ -294,12 +296,6 @@ void __init visws_early_detect(void) | |||
294 | */ | 296 | */ |
295 | no_broadcast = 0; | 297 | no_broadcast = 0; |
296 | 298 | ||
297 | /* | ||
298 | * Override generic MP-table parsing: | ||
299 | */ | ||
300 | mach_get_smp_config_quirk = visws_get_smp_config_quirk; | ||
301 | mach_find_smp_config_quirk = visws_find_smp_config_quirk; | ||
302 | |||
303 | #ifdef CONFIG_X86_IO_APIC | 299 | #ifdef CONFIG_X86_IO_APIC |
304 | /* | 300 | /* |
305 | * Turn off IO-APIC detection and initialization: | 301 | * Turn off IO-APIC detection and initialization: |
@@ -426,7 +422,7 @@ static __init void cobalt_init(void) | |||
426 | co_apic_read(CO_APIC_ID)); | 422 | co_apic_read(CO_APIC_ID)); |
427 | } | 423 | } |
428 | 424 | ||
429 | int __init visws_trap_init_quirk(void) | 425 | static int __init visws_trap_init(void) |
430 | { | 426 | { |
431 | lithium_init(); | 427 | lithium_init(); |
432 | cobalt_init(); | 428 | cobalt_init(); |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index b15346092b7b..6ca515d6db54 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/timer.h> | 37 | #include <asm/timer.h> |
38 | #include <asm/vmi_time.h> | 38 | #include <asm/vmi_time.h> |
39 | #include <asm/kmap_types.h> | 39 | #include <asm/kmap_types.h> |
40 | #include <asm/setup.h> | ||
40 | 41 | ||
41 | /* Convenient for calling VMI functions indirectly in the ROM */ | 42 | /* Convenient for calling VMI functions indirectly in the ROM */ |
42 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); | 43 | typedef u32 __attribute__((regparm(1))) (VROMFUNC)(void); |
@@ -683,7 +684,7 @@ void vmi_bringup(void) | |||
683 | { | 684 | { |
684 | /* We must establish the lowmem mapping for MMU ops to work */ | 685 | /* We must establish the lowmem mapping for MMU ops to work */ |
685 | if (vmi_ops.set_linear_mapping) | 686 | if (vmi_ops.set_linear_mapping) |
686 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); | 687 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, MAXMEM_PFN, 0); |
687 | } | 688 | } |
688 | 689 | ||
689 | /* | 690 | /* |
@@ -906,7 +907,6 @@ static inline int __init activate_vmi(void) | |||
906 | #ifdef CONFIG_X86_LOCAL_APIC | 907 | #ifdef CONFIG_X86_LOCAL_APIC |
907 | para_fill(pv_apic_ops.apic_read, APICRead); | 908 | para_fill(pv_apic_ops.apic_read, APICRead); |
908 | para_fill(pv_apic_ops.apic_write, APICWrite); | 909 | para_fill(pv_apic_ops.apic_write, APICWrite); |
909 | para_fill(pv_apic_ops.apic_write_atomic, APICWrite); | ||
910 | #endif | 910 | #endif |
911 | 911 | ||
912 | /* | 912 | /* |
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S index cdb2363697d2..af5bdad84604 100644 --- a/arch/x86/kernel/vmlinux_32.lds.S +++ b/arch/x86/kernel/vmlinux_32.lds.S | |||
@@ -209,3 +209,11 @@ SECTIONS | |||
209 | 209 | ||
210 | DWARF_DEBUG | 210 | DWARF_DEBUG |
211 | } | 211 | } |
212 | |||
213 | #ifdef CONFIG_KEXEC | ||
214 | /* Link time checks */ | ||
215 | #include <asm/kexec.h> | ||
216 | |||
217 | ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, | ||
218 | "kexec control code size is too big") | ||
219 | #endif | ||
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 0c029e8959c7..7766d36983fc 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -61,7 +61,7 @@ static void vsmp_irq_enable(void) | |||
61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); | 61 | native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); |
62 | } | 62 | } |
63 | 63 | ||
64 | static unsigned __init vsmp_patch(u8 type, u16 clobbers, void *ibuf, | 64 | static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, |
65 | unsigned long addr, unsigned len) | 65 | unsigned long addr, unsigned len) |
66 | { | 66 | { |
67 | switch (type) { | 67 | switch (type) { |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 8d45fabc5f3b..ce3251ce5504 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -21,6 +21,7 @@ config KVM | |||
21 | tristate "Kernel-based Virtual Machine (KVM) support" | 21 | tristate "Kernel-based Virtual Machine (KVM) support" |
22 | depends on HAVE_KVM | 22 | depends on HAVE_KVM |
23 | select PREEMPT_NOTIFIERS | 23 | select PREEMPT_NOTIFIERS |
24 | select MMU_NOTIFIER | ||
24 | select ANON_INODES | 25 | select ANON_INODES |
25 | ---help--- | 26 | ---help--- |
26 | Support hosting fully virtualized guest machines using hardware | 27 | Support hosting fully virtualized guest machines using hardware |
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index c97d35c218db..d0e940bb6f40 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile | |||
@@ -2,7 +2,8 @@ | |||
2 | # Makefile for Kernel-based Virtual Machine module | 2 | # Makefile for Kernel-based Virtual Machine module |
3 | # | 3 | # |
4 | 4 | ||
5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o) | 5 | common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ |
6 | coalesced_mmio.o) | ||
6 | ifeq ($(CONFIG_KVM_TRACE),y) | 7 | ifeq ($(CONFIG_KVM_TRACE),y) |
7 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) | 8 | common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) |
8 | endif | 9 | endif |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 3829aa7b663f..c0f7872a9124 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -91,7 +91,7 @@ static void pit_set_gate(struct kvm *kvm, int channel, u32 val) | |||
91 | c->gate = val; | 91 | c->gate = val; |
92 | } | 92 | } |
93 | 93 | ||
94 | int pit_get_gate(struct kvm *kvm, int channel) | 94 | static int pit_get_gate(struct kvm *kvm, int channel) |
95 | { | 95 | { |
96 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); | 96 | WARN_ON(!mutex_is_locked(&kvm->arch.vpit->pit_state.lock)); |
97 | 97 | ||
@@ -193,19 +193,16 @@ static void pit_latch_status(struct kvm *kvm, int channel) | |||
193 | } | 193 | } |
194 | } | 194 | } |
195 | 195 | ||
196 | int __pit_timer_fn(struct kvm_kpit_state *ps) | 196 | static int __pit_timer_fn(struct kvm_kpit_state *ps) |
197 | { | 197 | { |
198 | struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; | 198 | struct kvm_vcpu *vcpu0 = ps->pit->kvm->vcpus[0]; |
199 | struct kvm_kpit_timer *pt = &ps->pit_timer; | 199 | struct kvm_kpit_timer *pt = &ps->pit_timer; |
200 | 200 | ||
201 | atomic_inc(&pt->pending); | 201 | if (!atomic_inc_and_test(&pt->pending)) |
202 | smp_mb__after_atomic_inc(); | ||
203 | if (vcpu0) { | ||
204 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); | 202 | set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests); |
205 | if (waitqueue_active(&vcpu0->wq)) { | 203 | if (vcpu0 && waitqueue_active(&vcpu0->wq)) { |
206 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 204 | vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
207 | wake_up_interruptible(&vcpu0->wq); | 205 | wake_up_interruptible(&vcpu0->wq); |
208 | } | ||
209 | } | 206 | } |
210 | 207 | ||
211 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); | 208 | pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period); |
@@ -308,6 +305,7 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val) | |||
308 | create_pit_timer(&ps->pit_timer, val, 0); | 305 | create_pit_timer(&ps->pit_timer, val, 0); |
309 | break; | 306 | break; |
310 | case 2: | 307 | case 2: |
308 | case 3: | ||
311 | create_pit_timer(&ps->pit_timer, val, 1); | 309 | create_pit_timer(&ps->pit_timer, val, 1); |
312 | break; | 310 | break; |
313 | default: | 311 | default: |
@@ -459,7 +457,8 @@ static void pit_ioport_read(struct kvm_io_device *this, | |||
459 | mutex_unlock(&pit_state->lock); | 457 | mutex_unlock(&pit_state->lock); |
460 | } | 458 | } |
461 | 459 | ||
462 | static int pit_in_range(struct kvm_io_device *this, gpa_t addr) | 460 | static int pit_in_range(struct kvm_io_device *this, gpa_t addr, |
461 | int len, int is_write) | ||
463 | { | 462 | { |
464 | return ((addr >= KVM_PIT_BASE_ADDRESS) && | 463 | return ((addr >= KVM_PIT_BASE_ADDRESS) && |
465 | (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); | 464 | (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); |
@@ -500,7 +499,8 @@ static void speaker_ioport_read(struct kvm_io_device *this, | |||
500 | mutex_unlock(&pit_state->lock); | 499 | mutex_unlock(&pit_state->lock); |
501 | } | 500 | } |
502 | 501 | ||
503 | static int speaker_in_range(struct kvm_io_device *this, gpa_t addr) | 502 | static int speaker_in_range(struct kvm_io_device *this, gpa_t addr, |
503 | int len, int is_write) | ||
504 | { | 504 | { |
505 | return (addr == KVM_SPEAKER_BASE_ADDRESS); | 505 | return (addr == KVM_SPEAKER_BASE_ADDRESS); |
506 | } | 506 | } |
@@ -575,7 +575,7 @@ void kvm_free_pit(struct kvm *kvm) | |||
575 | } | 575 | } |
576 | } | 576 | } |
577 | 577 | ||
578 | void __inject_pit_timer_intr(struct kvm *kvm) | 578 | static void __inject_pit_timer_intr(struct kvm *kvm) |
579 | { | 579 | { |
580 | mutex_lock(&kvm->lock); | 580 | mutex_lock(&kvm->lock); |
581 | kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); | 581 | kvm_ioapic_set_irq(kvm->arch.vioapic, 0, 1); |
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index ab29cf2def47..c31164e8aa46 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c | |||
@@ -130,8 +130,10 @@ void kvm_pic_set_irq(void *opaque, int irq, int level) | |||
130 | { | 130 | { |
131 | struct kvm_pic *s = opaque; | 131 | struct kvm_pic *s = opaque; |
132 | 132 | ||
133 | pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); | 133 | if (irq >= 0 && irq < PIC_NUM_PINS) { |
134 | pic_update_irq(s); | 134 | pic_set_irq1(&s->pics[irq >> 3], irq & 7, level); |
135 | pic_update_irq(s); | ||
136 | } | ||
135 | } | 137 | } |
136 | 138 | ||
137 | /* | 139 | /* |
@@ -346,7 +348,8 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1) | |||
346 | return s->elcr; | 348 | return s->elcr; |
347 | } | 349 | } |
348 | 350 | ||
349 | static int picdev_in_range(struct kvm_io_device *this, gpa_t addr) | 351 | static int picdev_in_range(struct kvm_io_device *this, gpa_t addr, |
352 | int len, int is_write) | ||
350 | { | 353 | { |
351 | switch (addr) { | 354 | switch (addr) { |
352 | case 0x20: | 355 | case 0x20: |
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 2a15be2275c0..7ca47cbb48bb 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h | |||
@@ -30,6 +30,8 @@ | |||
30 | #include "ioapic.h" | 30 | #include "ioapic.h" |
31 | #include "lapic.h" | 31 | #include "lapic.h" |
32 | 32 | ||
33 | #define PIC_NUM_PINS 16 | ||
34 | |||
33 | struct kvm; | 35 | struct kvm; |
34 | struct kvm_vcpu; | 36 | struct kvm_vcpu; |
35 | 37 | ||
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ebc03f5ae162..73f43de69f67 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -356,8 +356,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, | |||
356 | case APIC_DM_SMI: | 356 | case APIC_DM_SMI: |
357 | printk(KERN_DEBUG "Ignoring guest SMI\n"); | 357 | printk(KERN_DEBUG "Ignoring guest SMI\n"); |
358 | break; | 358 | break; |
359 | |||
359 | case APIC_DM_NMI: | 360 | case APIC_DM_NMI: |
360 | printk(KERN_DEBUG "Ignoring guest NMI\n"); | 361 | kvm_inject_nmi(vcpu); |
361 | break; | 362 | break; |
362 | 363 | ||
363 | case APIC_DM_INIT: | 364 | case APIC_DM_INIT: |
@@ -572,6 +573,8 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset) | |||
572 | { | 573 | { |
573 | u32 val = 0; | 574 | u32 val = 0; |
574 | 575 | ||
576 | KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler); | ||
577 | |||
575 | if (offset >= LAPIC_MMIO_LENGTH) | 578 | if (offset >= LAPIC_MMIO_LENGTH) |
576 | return 0; | 579 | return 0; |
577 | 580 | ||
@@ -695,6 +698,8 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
695 | 698 | ||
696 | offset &= 0xff0; | 699 | offset &= 0xff0; |
697 | 700 | ||
701 | KVMTRACE_1D(APIC_ACCESS, apic->vcpu, (u32)offset, handler); | ||
702 | |||
698 | switch (offset) { | 703 | switch (offset) { |
699 | case APIC_ID: /* Local APIC ID */ | 704 | case APIC_ID: /* Local APIC ID */ |
700 | apic_set_reg(apic, APIC_ID, val); | 705 | apic_set_reg(apic, APIC_ID, val); |
@@ -780,7 +785,8 @@ static void apic_mmio_write(struct kvm_io_device *this, | |||
780 | 785 | ||
781 | } | 786 | } |
782 | 787 | ||
783 | static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr) | 788 | static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr, |
789 | int len, int size) | ||
784 | { | 790 | { |
785 | struct kvm_lapic *apic = (struct kvm_lapic *)this->private; | 791 | struct kvm_lapic *apic = (struct kvm_lapic *)this->private; |
786 | int ret = 0; | 792 | int ret = 0; |
@@ -939,8 +945,8 @@ static int __apic_timer_fn(struct kvm_lapic *apic) | |||
939 | int result = 0; | 945 | int result = 0; |
940 | wait_queue_head_t *q = &apic->vcpu->wq; | 946 | wait_queue_head_t *q = &apic->vcpu->wq; |
941 | 947 | ||
942 | atomic_inc(&apic->timer.pending); | 948 | if(!atomic_inc_and_test(&apic->timer.pending)) |
943 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); | 949 | set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests); |
944 | if (waitqueue_active(q)) { | 950 | if (waitqueue_active(q)) { |
945 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | 951 | apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; |
946 | wake_up_interruptible(q); | 952 | wake_up_interruptible(q); |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 676c396c9cee..81858881287e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -31,6 +31,7 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu); | |||
31 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); | 31 | u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu); |
32 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); | 32 | void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8); |
33 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); | 33 | void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value); |
34 | u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu); | ||
34 | 35 | ||
35 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); | 36 | int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest); |
36 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); | 37 | int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7e7c3969f7a2..3da2508eb22a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -66,7 +66,8 @@ static void kvm_mmu_audit(struct kvm_vcpu *vcpu, const char *msg) {} | |||
66 | #endif | 66 | #endif |
67 | 67 | ||
68 | #if defined(MMU_DEBUG) || defined(AUDIT) | 68 | #if defined(MMU_DEBUG) || defined(AUDIT) |
69 | static int dbg = 1; | 69 | static int dbg = 0; |
70 | module_param(dbg, bool, 0644); | ||
70 | #endif | 71 | #endif |
71 | 72 | ||
72 | #ifndef MMU_DEBUG | 73 | #ifndef MMU_DEBUG |
@@ -652,6 +653,88 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) | |||
652 | account_shadowed(kvm, gfn); | 653 | account_shadowed(kvm, gfn); |
653 | } | 654 | } |
654 | 655 | ||
656 | static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp) | ||
657 | { | ||
658 | u64 *spte; | ||
659 | int need_tlb_flush = 0; | ||
660 | |||
661 | while ((spte = rmap_next(kvm, rmapp, NULL))) { | ||
662 | BUG_ON(!(*spte & PT_PRESENT_MASK)); | ||
663 | rmap_printk("kvm_rmap_unmap_hva: spte %p %llx\n", spte, *spte); | ||
664 | rmap_remove(kvm, spte); | ||
665 | set_shadow_pte(spte, shadow_trap_nonpresent_pte); | ||
666 | need_tlb_flush = 1; | ||
667 | } | ||
668 | return need_tlb_flush; | ||
669 | } | ||
670 | |||
671 | static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, | ||
672 | int (*handler)(struct kvm *kvm, unsigned long *rmapp)) | ||
673 | { | ||
674 | int i; | ||
675 | int retval = 0; | ||
676 | |||
677 | /* | ||
678 | * If mmap_sem isn't taken, we can look the memslots with only | ||
679 | * the mmu_lock by skipping over the slots with userspace_addr == 0. | ||
680 | */ | ||
681 | for (i = 0; i < kvm->nmemslots; i++) { | ||
682 | struct kvm_memory_slot *memslot = &kvm->memslots[i]; | ||
683 | unsigned long start = memslot->userspace_addr; | ||
684 | unsigned long end; | ||
685 | |||
686 | /* mmu_lock protects userspace_addr */ | ||
687 | if (!start) | ||
688 | continue; | ||
689 | |||
690 | end = start + (memslot->npages << PAGE_SHIFT); | ||
691 | if (hva >= start && hva < end) { | ||
692 | gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; | ||
693 | retval |= handler(kvm, &memslot->rmap[gfn_offset]); | ||
694 | retval |= handler(kvm, | ||
695 | &memslot->lpage_info[ | ||
696 | gfn_offset / | ||
697 | KVM_PAGES_PER_HPAGE].rmap_pde); | ||
698 | } | ||
699 | } | ||
700 | |||
701 | return retval; | ||
702 | } | ||
703 | |||
704 | int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) | ||
705 | { | ||
706 | return kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); | ||
707 | } | ||
708 | |||
709 | static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp) | ||
710 | { | ||
711 | u64 *spte; | ||
712 | int young = 0; | ||
713 | |||
714 | /* always return old for EPT */ | ||
715 | if (!shadow_accessed_mask) | ||
716 | return 0; | ||
717 | |||
718 | spte = rmap_next(kvm, rmapp, NULL); | ||
719 | while (spte) { | ||
720 | int _young; | ||
721 | u64 _spte = *spte; | ||
722 | BUG_ON(!(_spte & PT_PRESENT_MASK)); | ||
723 | _young = _spte & PT_ACCESSED_MASK; | ||
724 | if (_young) { | ||
725 | young = 1; | ||
726 | clear_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | ||
727 | } | ||
728 | spte = rmap_next(kvm, rmapp, spte); | ||
729 | } | ||
730 | return young; | ||
731 | } | ||
732 | |||
733 | int kvm_age_hva(struct kvm *kvm, unsigned long hva) | ||
734 | { | ||
735 | return kvm_handle_hva(kvm, hva, kvm_age_rmapp); | ||
736 | } | ||
737 | |||
655 | #ifdef MMU_DEBUG | 738 | #ifdef MMU_DEBUG |
656 | static int is_empty_shadow_page(u64 *spt) | 739 | static int is_empty_shadow_page(u64 *spt) |
657 | { | 740 | { |
@@ -776,6 +859,15 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp, | |||
776 | BUG(); | 859 | BUG(); |
777 | } | 860 | } |
778 | 861 | ||
862 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | ||
863 | struct kvm_mmu_page *sp) | ||
864 | { | ||
865 | int i; | ||
866 | |||
867 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||
868 | sp->spt[i] = shadow_trap_nonpresent_pte; | ||
869 | } | ||
870 | |||
779 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) | 871 | static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm, gfn_t gfn) |
780 | { | 872 | { |
781 | unsigned index; | 873 | unsigned index; |
@@ -841,7 +933,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, | |||
841 | hlist_add_head(&sp->hash_link, bucket); | 933 | hlist_add_head(&sp->hash_link, bucket); |
842 | if (!metaphysical) | 934 | if (!metaphysical) |
843 | rmap_write_protect(vcpu->kvm, gfn); | 935 | rmap_write_protect(vcpu->kvm, gfn); |
844 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | 936 | if (shadow_trap_nonpresent_pte != shadow_notrap_nonpresent_pte) |
937 | vcpu->arch.mmu.prefetch_page(vcpu, sp); | ||
938 | else | ||
939 | nonpaging_prefetch_page(vcpu, sp); | ||
845 | return sp; | 940 | return sp; |
846 | } | 941 | } |
847 | 942 | ||
@@ -917,14 +1012,17 @@ static void kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp) | |||
917 | } | 1012 | } |
918 | kvm_mmu_page_unlink_children(kvm, sp); | 1013 | kvm_mmu_page_unlink_children(kvm, sp); |
919 | if (!sp->root_count) { | 1014 | if (!sp->root_count) { |
920 | if (!sp->role.metaphysical) | 1015 | if (!sp->role.metaphysical && !sp->role.invalid) |
921 | unaccount_shadowed(kvm, sp->gfn); | 1016 | unaccount_shadowed(kvm, sp->gfn); |
922 | hlist_del(&sp->hash_link); | 1017 | hlist_del(&sp->hash_link); |
923 | kvm_mmu_free_page(kvm, sp); | 1018 | kvm_mmu_free_page(kvm, sp); |
924 | } else { | 1019 | } else { |
1020 | int invalid = sp->role.invalid; | ||
925 | list_move(&sp->link, &kvm->arch.active_mmu_pages); | 1021 | list_move(&sp->link, &kvm->arch.active_mmu_pages); |
926 | sp->role.invalid = 1; | 1022 | sp->role.invalid = 1; |
927 | kvm_reload_remote_mmus(kvm); | 1023 | kvm_reload_remote_mmus(kvm); |
1024 | if (!sp->role.metaphysical && !invalid) | ||
1025 | unaccount_shadowed(kvm, sp->gfn); | ||
928 | } | 1026 | } |
929 | kvm_mmu_reset_last_pte_updated(kvm); | 1027 | kvm_mmu_reset_last_pte_updated(kvm); |
930 | } | 1028 | } |
@@ -1103,7 +1201,7 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1103 | mark_page_dirty(vcpu->kvm, gfn); | 1201 | mark_page_dirty(vcpu->kvm, gfn); |
1104 | 1202 | ||
1105 | pgprintk("%s: setting spte %llx\n", __func__, spte); | 1203 | pgprintk("%s: setting spte %llx\n", __func__, spte); |
1106 | pgprintk("instantiating %s PTE (%s) at %d (%llx) addr %llx\n", | 1204 | pgprintk("instantiating %s PTE (%s) at %ld (%llx) addr %p\n", |
1107 | (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", | 1205 | (spte&PT_PAGE_SIZE_MASK)? "2MB" : "4kB", |
1108 | (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); | 1206 | (spte&PT_WRITABLE_MASK)?"RW":"R", gfn, spte, shadow_pte); |
1109 | set_shadow_pte(shadow_pte, spte); | 1207 | set_shadow_pte(shadow_pte, spte); |
@@ -1122,8 +1220,10 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte, | |||
1122 | else | 1220 | else |
1123 | kvm_release_pfn_clean(pfn); | 1221 | kvm_release_pfn_clean(pfn); |
1124 | } | 1222 | } |
1125 | if (!ptwrite || !*ptwrite) | 1223 | if (speculative) { |
1126 | vcpu->arch.last_pte_updated = shadow_pte; | 1224 | vcpu->arch.last_pte_updated = shadow_pte; |
1225 | vcpu->arch.last_pte_gfn = gfn; | ||
1226 | } | ||
1127 | } | 1227 | } |
1128 | 1228 | ||
1129 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) | 1229 | static void nonpaging_new_cr3(struct kvm_vcpu *vcpu) |
@@ -1171,9 +1271,10 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
1171 | return -ENOMEM; | 1271 | return -ENOMEM; |
1172 | } | 1272 | } |
1173 | 1273 | ||
1174 | table[index] = __pa(new_table->spt) | 1274 | set_shadow_pte(&table[index], |
1175 | | PT_PRESENT_MASK | PT_WRITABLE_MASK | 1275 | __pa(new_table->spt) |
1176 | | shadow_user_mask | shadow_x_mask; | 1276 | | PT_PRESENT_MASK | PT_WRITABLE_MASK |
1277 | | shadow_user_mask | shadow_x_mask); | ||
1177 | } | 1278 | } |
1178 | table_addr = table[index] & PT64_BASE_ADDR_MASK; | 1279 | table_addr = table[index] & PT64_BASE_ADDR_MASK; |
1179 | } | 1280 | } |
@@ -1184,6 +1285,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1184 | int r; | 1285 | int r; |
1185 | int largepage = 0; | 1286 | int largepage = 0; |
1186 | pfn_t pfn; | 1287 | pfn_t pfn; |
1288 | unsigned long mmu_seq; | ||
1187 | 1289 | ||
1188 | down_read(¤t->mm->mmap_sem); | 1290 | down_read(¤t->mm->mmap_sem); |
1189 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { | 1291 | if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) { |
@@ -1191,6 +1293,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1191 | largepage = 1; | 1293 | largepage = 1; |
1192 | } | 1294 | } |
1193 | 1295 | ||
1296 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1297 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1194 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1298 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1195 | up_read(¤t->mm->mmap_sem); | 1299 | up_read(¤t->mm->mmap_sem); |
1196 | 1300 | ||
@@ -1201,6 +1305,8 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1201 | } | 1305 | } |
1202 | 1306 | ||
1203 | spin_lock(&vcpu->kvm->mmu_lock); | 1307 | spin_lock(&vcpu->kvm->mmu_lock); |
1308 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
1309 | goto out_unlock; | ||
1204 | kvm_mmu_free_some_pages(vcpu); | 1310 | kvm_mmu_free_some_pages(vcpu); |
1205 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, | 1311 | r = __direct_map(vcpu, v, write, largepage, gfn, pfn, |
1206 | PT32E_ROOT_LEVEL); | 1312 | PT32E_ROOT_LEVEL); |
@@ -1208,18 +1314,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, int write, gfn_t gfn) | |||
1208 | 1314 | ||
1209 | 1315 | ||
1210 | return r; | 1316 | return r; |
1211 | } | ||
1212 | |||
1213 | 1317 | ||
1214 | static void nonpaging_prefetch_page(struct kvm_vcpu *vcpu, | 1318 | out_unlock: |
1215 | struct kvm_mmu_page *sp) | 1319 | spin_unlock(&vcpu->kvm->mmu_lock); |
1216 | { | 1320 | kvm_release_pfn_clean(pfn); |
1217 | int i; | 1321 | return 0; |
1218 | |||
1219 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) | ||
1220 | sp->spt[i] = shadow_trap_nonpresent_pte; | ||
1221 | } | 1322 | } |
1222 | 1323 | ||
1324 | |||
1223 | static void mmu_free_roots(struct kvm_vcpu *vcpu) | 1325 | static void mmu_free_roots(struct kvm_vcpu *vcpu) |
1224 | { | 1326 | { |
1225 | int i; | 1327 | int i; |
@@ -1335,6 +1437,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1335 | int r; | 1437 | int r; |
1336 | int largepage = 0; | 1438 | int largepage = 0; |
1337 | gfn_t gfn = gpa >> PAGE_SHIFT; | 1439 | gfn_t gfn = gpa >> PAGE_SHIFT; |
1440 | unsigned long mmu_seq; | ||
1338 | 1441 | ||
1339 | ASSERT(vcpu); | 1442 | ASSERT(vcpu); |
1340 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); | 1443 | ASSERT(VALID_PAGE(vcpu->arch.mmu.root_hpa)); |
@@ -1348,6 +1451,8 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1348 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1451 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
1349 | largepage = 1; | 1452 | largepage = 1; |
1350 | } | 1453 | } |
1454 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1455 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1351 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1456 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1352 | up_read(¤t->mm->mmap_sem); | 1457 | up_read(¤t->mm->mmap_sem); |
1353 | if (is_error_pfn(pfn)) { | 1458 | if (is_error_pfn(pfn)) { |
@@ -1355,12 +1460,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, | |||
1355 | return 1; | 1460 | return 1; |
1356 | } | 1461 | } |
1357 | spin_lock(&vcpu->kvm->mmu_lock); | 1462 | spin_lock(&vcpu->kvm->mmu_lock); |
1463 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
1464 | goto out_unlock; | ||
1358 | kvm_mmu_free_some_pages(vcpu); | 1465 | kvm_mmu_free_some_pages(vcpu); |
1359 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, | 1466 | r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK, |
1360 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); | 1467 | largepage, gfn, pfn, kvm_x86_ops->get_tdp_level()); |
1361 | spin_unlock(&vcpu->kvm->mmu_lock); | 1468 | spin_unlock(&vcpu->kvm->mmu_lock); |
1362 | 1469 | ||
1363 | return r; | 1470 | return r; |
1471 | |||
1472 | out_unlock: | ||
1473 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
1474 | kvm_release_pfn_clean(pfn); | ||
1475 | return 0; | ||
1364 | } | 1476 | } |
1365 | 1477 | ||
1366 | static void nonpaging_free(struct kvm_vcpu *vcpu) | 1478 | static void nonpaging_free(struct kvm_vcpu *vcpu) |
@@ -1660,6 +1772,8 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1660 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); | 1772 | gfn &= ~(KVM_PAGES_PER_HPAGE-1); |
1661 | vcpu->arch.update_pte.largepage = 1; | 1773 | vcpu->arch.update_pte.largepage = 1; |
1662 | } | 1774 | } |
1775 | vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
1776 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
1663 | pfn = gfn_to_pfn(vcpu->kvm, gfn); | 1777 | pfn = gfn_to_pfn(vcpu->kvm, gfn); |
1664 | up_read(¤t->mm->mmap_sem); | 1778 | up_read(¤t->mm->mmap_sem); |
1665 | 1779 | ||
@@ -1671,6 +1785,18 @@ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1671 | vcpu->arch.update_pte.pfn = pfn; | 1785 | vcpu->arch.update_pte.pfn = pfn; |
1672 | } | 1786 | } |
1673 | 1787 | ||
1788 | static void kvm_mmu_access_page(struct kvm_vcpu *vcpu, gfn_t gfn) | ||
1789 | { | ||
1790 | u64 *spte = vcpu->arch.last_pte_updated; | ||
1791 | |||
1792 | if (spte | ||
1793 | && vcpu->arch.last_pte_gfn == gfn | ||
1794 | && shadow_accessed_mask | ||
1795 | && !(*spte & shadow_accessed_mask) | ||
1796 | && is_shadow_present_pte(*spte)) | ||
1797 | set_bit(PT_ACCESSED_SHIFT, (unsigned long *)spte); | ||
1798 | } | ||
1799 | |||
1674 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | 1800 | void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, |
1675 | const u8 *new, int bytes) | 1801 | const u8 *new, int bytes) |
1676 | { | 1802 | { |
@@ -1694,6 +1820,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, | |||
1694 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); | 1820 | pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); |
1695 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); | 1821 | mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes); |
1696 | spin_lock(&vcpu->kvm->mmu_lock); | 1822 | spin_lock(&vcpu->kvm->mmu_lock); |
1823 | kvm_mmu_access_page(vcpu, gfn); | ||
1697 | kvm_mmu_free_some_pages(vcpu); | 1824 | kvm_mmu_free_some_pages(vcpu); |
1698 | ++vcpu->kvm->stat.mmu_pte_write; | 1825 | ++vcpu->kvm->stat.mmu_pte_write; |
1699 | kvm_mmu_audit(vcpu, "pre pte write"); | 1826 | kvm_mmu_audit(vcpu, "pre pte write"); |
@@ -1791,6 +1918,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva) | |||
1791 | spin_unlock(&vcpu->kvm->mmu_lock); | 1918 | spin_unlock(&vcpu->kvm->mmu_lock); |
1792 | return r; | 1919 | return r; |
1793 | } | 1920 | } |
1921 | EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt); | ||
1794 | 1922 | ||
1795 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) | 1923 | void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu) |
1796 | { | 1924 | { |
@@ -1847,6 +1975,12 @@ void kvm_enable_tdp(void) | |||
1847 | } | 1975 | } |
1848 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); | 1976 | EXPORT_SYMBOL_GPL(kvm_enable_tdp); |
1849 | 1977 | ||
1978 | void kvm_disable_tdp(void) | ||
1979 | { | ||
1980 | tdp_enabled = false; | ||
1981 | } | ||
1982 | EXPORT_SYMBOL_GPL(kvm_disable_tdp); | ||
1983 | |||
1850 | static void free_mmu_pages(struct kvm_vcpu *vcpu) | 1984 | static void free_mmu_pages(struct kvm_vcpu *vcpu) |
1851 | { | 1985 | { |
1852 | struct kvm_mmu_page *sp; | 1986 | struct kvm_mmu_page *sp; |
@@ -1948,7 +2082,7 @@ void kvm_mmu_zap_all(struct kvm *kvm) | |||
1948 | kvm_flush_remote_tlbs(kvm); | 2082 | kvm_flush_remote_tlbs(kvm); |
1949 | } | 2083 | } |
1950 | 2084 | ||
1951 | void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) | 2085 | static void kvm_mmu_remove_one_alloc_mmu_page(struct kvm *kvm) |
1952 | { | 2086 | { |
1953 | struct kvm_mmu_page *page; | 2087 | struct kvm_mmu_page *page; |
1954 | 2088 | ||
@@ -1968,6 +2102,8 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
1968 | list_for_each_entry(kvm, &vm_list, vm_list) { | 2102 | list_for_each_entry(kvm, &vm_list, vm_list) { |
1969 | int npages; | 2103 | int npages; |
1970 | 2104 | ||
2105 | if (!down_read_trylock(&kvm->slots_lock)) | ||
2106 | continue; | ||
1971 | spin_lock(&kvm->mmu_lock); | 2107 | spin_lock(&kvm->mmu_lock); |
1972 | npages = kvm->arch.n_alloc_mmu_pages - | 2108 | npages = kvm->arch.n_alloc_mmu_pages - |
1973 | kvm->arch.n_free_mmu_pages; | 2109 | kvm->arch.n_free_mmu_pages; |
@@ -1980,6 +2116,7 @@ static int mmu_shrink(int nr_to_scan, gfp_t gfp_mask) | |||
1980 | nr_to_scan--; | 2116 | nr_to_scan--; |
1981 | 2117 | ||
1982 | spin_unlock(&kvm->mmu_lock); | 2118 | spin_unlock(&kvm->mmu_lock); |
2119 | up_read(&kvm->slots_lock); | ||
1983 | } | 2120 | } |
1984 | if (kvm_freed) | 2121 | if (kvm_freed) |
1985 | list_move_tail(&kvm_freed->vm_list, &vm_list); | 2122 | list_move_tail(&kvm_freed->vm_list, &vm_list); |
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 1730757bbc7a..258e5d56298e 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h | |||
@@ -15,7 +15,8 @@ | |||
15 | #define PT_USER_MASK (1ULL << 2) | 15 | #define PT_USER_MASK (1ULL << 2) |
16 | #define PT_PWT_MASK (1ULL << 3) | 16 | #define PT_PWT_MASK (1ULL << 3) |
17 | #define PT_PCD_MASK (1ULL << 4) | 17 | #define PT_PCD_MASK (1ULL << 4) |
18 | #define PT_ACCESSED_MASK (1ULL << 5) | 18 | #define PT_ACCESSED_SHIFT 5 |
19 | #define PT_ACCESSED_MASK (1ULL << PT_ACCESSED_SHIFT) | ||
19 | #define PT_DIRTY_MASK (1ULL << 6) | 20 | #define PT_DIRTY_MASK (1ULL << 6) |
20 | #define PT_PAGE_SIZE_MASK (1ULL << 7) | 21 | #define PT_PAGE_SIZE_MASK (1ULL << 7) |
21 | #define PT_PAT_MASK (1ULL << 7) | 22 | #define PT_PAT_MASK (1ULL << 7) |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 934c7b619396..4a814bff21f2 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -263,6 +263,8 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page, | |||
263 | pfn = vcpu->arch.update_pte.pfn; | 263 | pfn = vcpu->arch.update_pte.pfn; |
264 | if (is_error_pfn(pfn)) | 264 | if (is_error_pfn(pfn)) |
265 | return; | 265 | return; |
266 | if (mmu_notifier_retry(vcpu, vcpu->arch.update_pte.mmu_seq)) | ||
267 | return; | ||
266 | kvm_get_pfn(pfn); | 268 | kvm_get_pfn(pfn); |
267 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, | 269 | mmu_set_spte(vcpu, spte, page->role.access, pte_access, 0, 0, |
268 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), | 270 | gpte & PT_DIRTY_MASK, NULL, largepage, gpte_to_gfn(gpte), |
@@ -343,7 +345,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
343 | shadow_addr = __pa(shadow_page->spt); | 345 | shadow_addr = __pa(shadow_page->spt); |
344 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK | 346 | shadow_pte = shadow_addr | PT_PRESENT_MASK | PT_ACCESSED_MASK |
345 | | PT_WRITABLE_MASK | PT_USER_MASK; | 347 | | PT_WRITABLE_MASK | PT_USER_MASK; |
346 | *shadow_ent = shadow_pte; | 348 | set_shadow_pte(shadow_ent, shadow_pte); |
347 | } | 349 | } |
348 | 350 | ||
349 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, | 351 | mmu_set_spte(vcpu, shadow_ent, access, walker->pte_access & access, |
@@ -380,6 +382,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
380 | int r; | 382 | int r; |
381 | pfn_t pfn; | 383 | pfn_t pfn; |
382 | int largepage = 0; | 384 | int largepage = 0; |
385 | unsigned long mmu_seq; | ||
383 | 386 | ||
384 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); | 387 | pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); |
385 | kvm_mmu_audit(vcpu, "pre page fault"); | 388 | kvm_mmu_audit(vcpu, "pre page fault"); |
@@ -413,6 +416,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
413 | largepage = 1; | 416 | largepage = 1; |
414 | } | 417 | } |
415 | } | 418 | } |
419 | mmu_seq = vcpu->kvm->mmu_notifier_seq; | ||
420 | /* implicit mb(), we'll read before PT lock is unlocked */ | ||
416 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); | 421 | pfn = gfn_to_pfn(vcpu->kvm, walker.gfn); |
417 | up_read(¤t->mm->mmap_sem); | 422 | up_read(¤t->mm->mmap_sem); |
418 | 423 | ||
@@ -424,6 +429,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
424 | } | 429 | } |
425 | 430 | ||
426 | spin_lock(&vcpu->kvm->mmu_lock); | 431 | spin_lock(&vcpu->kvm->mmu_lock); |
432 | if (mmu_notifier_retry(vcpu, mmu_seq)) | ||
433 | goto out_unlock; | ||
427 | kvm_mmu_free_some_pages(vcpu); | 434 | kvm_mmu_free_some_pages(vcpu); |
428 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, | 435 | shadow_pte = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, |
429 | largepage, &write_pt, pfn); | 436 | largepage, &write_pt, pfn); |
@@ -439,6 +446,11 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, | |||
439 | spin_unlock(&vcpu->kvm->mmu_lock); | 446 | spin_unlock(&vcpu->kvm->mmu_lock); |
440 | 447 | ||
441 | return write_pt; | 448 | return write_pt; |
449 | |||
450 | out_unlock: | ||
451 | spin_unlock(&vcpu->kvm->mmu_lock); | ||
452 | kvm_release_pfn_clean(pfn); | ||
453 | return 0; | ||
442 | } | 454 | } |
443 | 455 | ||
444 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | 456 | static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) |
@@ -460,8 +472,9 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr) | |||
460 | static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | 472 | static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, |
461 | struct kvm_mmu_page *sp) | 473 | struct kvm_mmu_page *sp) |
462 | { | 474 | { |
463 | int i, offset = 0, r = 0; | 475 | int i, j, offset, r; |
464 | pt_element_t pt; | 476 | pt_element_t pt[256 / sizeof(pt_element_t)]; |
477 | gpa_t pte_gpa; | ||
465 | 478 | ||
466 | if (sp->role.metaphysical | 479 | if (sp->role.metaphysical |
467 | || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { | 480 | || (PTTYPE == 32 && sp->role.level > PT_PAGE_TABLE_LEVEL)) { |
@@ -469,19 +482,20 @@ static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, | |||
469 | return; | 482 | return; |
470 | } | 483 | } |
471 | 484 | ||
472 | if (PTTYPE == 32) | 485 | pte_gpa = gfn_to_gpa(sp->gfn); |
486 | if (PTTYPE == 32) { | ||
473 | offset = sp->role.quadrant << PT64_LEVEL_BITS; | 487 | offset = sp->role.quadrant << PT64_LEVEL_BITS; |
488 | pte_gpa += offset * sizeof(pt_element_t); | ||
489 | } | ||
474 | 490 | ||
475 | for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { | 491 | for (i = 0; i < PT64_ENT_PER_PAGE; i += ARRAY_SIZE(pt)) { |
476 | gpa_t pte_gpa = gfn_to_gpa(sp->gfn); | 492 | r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, pt, sizeof pt); |
477 | pte_gpa += (i+offset) * sizeof(pt_element_t); | 493 | pte_gpa += ARRAY_SIZE(pt) * sizeof(pt_element_t); |
478 | 494 | for (j = 0; j < ARRAY_SIZE(pt); ++j) | |
479 | r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &pt, | 495 | if (r || is_present_pte(pt[j])) |
480 | sizeof(pt_element_t)); | 496 | sp->spt[i+j] = shadow_trap_nonpresent_pte; |
481 | if (r || is_present_pte(pt)) | 497 | else |
482 | sp->spt[i] = shadow_trap_nonpresent_pte; | 498 | sp->spt[i+j] = shadow_notrap_nonpresent_pte; |
483 | else | ||
484 | sp->spt[i] = shadow_notrap_nonpresent_pte; | ||
485 | } | 499 | } |
486 | } | 500 | } |
487 | 501 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6b0d5fa5bab3..8233b86c778c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -27,6 +27,8 @@ | |||
27 | 27 | ||
28 | #include <asm/desc.h> | 28 | #include <asm/desc.h> |
29 | 29 | ||
30 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | ||
31 | |||
30 | MODULE_AUTHOR("Qumranet"); | 32 | MODULE_AUTHOR("Qumranet"); |
31 | MODULE_LICENSE("GPL"); | 33 | MODULE_LICENSE("GPL"); |
32 | 34 | ||
@@ -60,6 +62,7 @@ static int npt = 1; | |||
60 | module_param(npt, int, S_IRUGO); | 62 | module_param(npt, int, S_IRUGO); |
61 | 63 | ||
62 | static void kvm_reput_irq(struct vcpu_svm *svm); | 64 | static void kvm_reput_irq(struct vcpu_svm *svm); |
65 | static void svm_flush_tlb(struct kvm_vcpu *vcpu); | ||
63 | 66 | ||
64 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) | 67 | static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) |
65 | { | 68 | { |
@@ -129,17 +132,17 @@ static inline void push_irq(struct kvm_vcpu *vcpu, u8 irq) | |||
129 | 132 | ||
130 | static inline void clgi(void) | 133 | static inline void clgi(void) |
131 | { | 134 | { |
132 | asm volatile (SVM_CLGI); | 135 | asm volatile (__ex(SVM_CLGI)); |
133 | } | 136 | } |
134 | 137 | ||
135 | static inline void stgi(void) | 138 | static inline void stgi(void) |
136 | { | 139 | { |
137 | asm volatile (SVM_STGI); | 140 | asm volatile (__ex(SVM_STGI)); |
138 | } | 141 | } |
139 | 142 | ||
140 | static inline void invlpga(unsigned long addr, u32 asid) | 143 | static inline void invlpga(unsigned long addr, u32 asid) |
141 | { | 144 | { |
142 | asm volatile (SVM_INVLPGA :: "a"(addr), "c"(asid)); | 145 | asm volatile (__ex(SVM_INVLPGA) :: "a"(addr), "c"(asid)); |
143 | } | 146 | } |
144 | 147 | ||
145 | static inline unsigned long kvm_read_cr2(void) | 148 | static inline unsigned long kvm_read_cr2(void) |
@@ -270,19 +273,11 @@ static int has_svm(void) | |||
270 | 273 | ||
271 | static void svm_hardware_disable(void *garbage) | 274 | static void svm_hardware_disable(void *garbage) |
272 | { | 275 | { |
273 | struct svm_cpu_data *svm_data | 276 | uint64_t efer; |
274 | = per_cpu(svm_data, raw_smp_processor_id()); | ||
275 | |||
276 | if (svm_data) { | ||
277 | uint64_t efer; | ||
278 | 277 | ||
279 | wrmsrl(MSR_VM_HSAVE_PA, 0); | 278 | wrmsrl(MSR_VM_HSAVE_PA, 0); |
280 | rdmsrl(MSR_EFER, efer); | 279 | rdmsrl(MSR_EFER, efer); |
281 | wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); | 280 | wrmsrl(MSR_EFER, efer & ~MSR_EFER_SVME_MASK); |
282 | per_cpu(svm_data, raw_smp_processor_id()) = NULL; | ||
283 | __free_page(svm_data->save_area); | ||
284 | kfree(svm_data); | ||
285 | } | ||
286 | } | 281 | } |
287 | 282 | ||
288 | static void svm_hardware_enable(void *garbage) | 283 | static void svm_hardware_enable(void *garbage) |
@@ -321,6 +316,19 @@ static void svm_hardware_enable(void *garbage) | |||
321 | page_to_pfn(svm_data->save_area) << PAGE_SHIFT); | 316 | page_to_pfn(svm_data->save_area) << PAGE_SHIFT); |
322 | } | 317 | } |
323 | 318 | ||
319 | static void svm_cpu_uninit(int cpu) | ||
320 | { | ||
321 | struct svm_cpu_data *svm_data | ||
322 | = per_cpu(svm_data, raw_smp_processor_id()); | ||
323 | |||
324 | if (!svm_data) | ||
325 | return; | ||
326 | |||
327 | per_cpu(svm_data, raw_smp_processor_id()) = NULL; | ||
328 | __free_page(svm_data->save_area); | ||
329 | kfree(svm_data); | ||
330 | } | ||
331 | |||
324 | static int svm_cpu_init(int cpu) | 332 | static int svm_cpu_init(int cpu) |
325 | { | 333 | { |
326 | struct svm_cpu_data *svm_data; | 334 | struct svm_cpu_data *svm_data; |
@@ -446,7 +454,8 @@ static __init int svm_hardware_setup(void) | |||
446 | if (npt_enabled) { | 454 | if (npt_enabled) { |
447 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); | 455 | printk(KERN_INFO "kvm: Nested Paging enabled\n"); |
448 | kvm_enable_tdp(); | 456 | kvm_enable_tdp(); |
449 | } | 457 | } else |
458 | kvm_disable_tdp(); | ||
450 | 459 | ||
451 | return 0; | 460 | return 0; |
452 | 461 | ||
@@ -458,6 +467,11 @@ err: | |||
458 | 467 | ||
459 | static __exit void svm_hardware_unsetup(void) | 468 | static __exit void svm_hardware_unsetup(void) |
460 | { | 469 | { |
470 | int cpu; | ||
471 | |||
472 | for_each_online_cpu(cpu) | ||
473 | svm_cpu_uninit(cpu); | ||
474 | |||
461 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); | 475 | __free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER); |
462 | iopm_base = 0; | 476 | iopm_base = 0; |
463 | } | 477 | } |
@@ -707,10 +721,6 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu) | |||
707 | rdtscll(vcpu->arch.host_tsc); | 721 | rdtscll(vcpu->arch.host_tsc); |
708 | } | 722 | } |
709 | 723 | ||
710 | static void svm_vcpu_decache(struct kvm_vcpu *vcpu) | ||
711 | { | ||
712 | } | ||
713 | |||
714 | static void svm_cache_regs(struct kvm_vcpu *vcpu) | 724 | static void svm_cache_regs(struct kvm_vcpu *vcpu) |
715 | { | 725 | { |
716 | struct vcpu_svm *svm = to_svm(vcpu); | 726 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -869,6 +879,10 @@ set: | |||
869 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) | 879 | static void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) |
870 | { | 880 | { |
871 | unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; | 881 | unsigned long host_cr4_mce = read_cr4() & X86_CR4_MCE; |
882 | unsigned long old_cr4 = to_svm(vcpu)->vmcb->save.cr4; | ||
883 | |||
884 | if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) | ||
885 | force_new_asid(vcpu); | ||
872 | 886 | ||
873 | vcpu->arch.cr4 = cr4; | 887 | vcpu->arch.cr4 = cr4; |
874 | if (!npt_enabled) | 888 | if (!npt_enabled) |
@@ -949,7 +963,9 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *svm_data) | |||
949 | 963 | ||
950 | static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) | 964 | static unsigned long svm_get_dr(struct kvm_vcpu *vcpu, int dr) |
951 | { | 965 | { |
952 | return to_svm(vcpu)->db_regs[dr]; | 966 | unsigned long val = to_svm(vcpu)->db_regs[dr]; |
967 | KVMTRACE_2D(DR_READ, vcpu, (u32)dr, (u32)val, handler); | ||
968 | return val; | ||
953 | } | 969 | } |
954 | 970 | ||
955 | static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, | 971 | static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value, |
@@ -997,13 +1013,35 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
997 | struct kvm *kvm = svm->vcpu.kvm; | 1013 | struct kvm *kvm = svm->vcpu.kvm; |
998 | u64 fault_address; | 1014 | u64 fault_address; |
999 | u32 error_code; | 1015 | u32 error_code; |
1016 | bool event_injection = false; | ||
1000 | 1017 | ||
1001 | if (!irqchip_in_kernel(kvm) && | 1018 | if (!irqchip_in_kernel(kvm) && |
1002 | is_external_interrupt(exit_int_info)) | 1019 | is_external_interrupt(exit_int_info)) { |
1020 | event_injection = true; | ||
1003 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); | 1021 | push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK); |
1022 | } | ||
1004 | 1023 | ||
1005 | fault_address = svm->vmcb->control.exit_info_2; | 1024 | fault_address = svm->vmcb->control.exit_info_2; |
1006 | error_code = svm->vmcb->control.exit_info_1; | 1025 | error_code = svm->vmcb->control.exit_info_1; |
1026 | |||
1027 | if (!npt_enabled) | ||
1028 | KVMTRACE_3D(PAGE_FAULT, &svm->vcpu, error_code, | ||
1029 | (u32)fault_address, (u32)(fault_address >> 32), | ||
1030 | handler); | ||
1031 | else | ||
1032 | KVMTRACE_3D(TDP_FAULT, &svm->vcpu, error_code, | ||
1033 | (u32)fault_address, (u32)(fault_address >> 32), | ||
1034 | handler); | ||
1035 | /* | ||
1036 | * FIXME: Tis shouldn't be necessary here, but there is a flush | ||
1037 | * missing in the MMU code. Until we find this bug, flush the | ||
1038 | * complete TLB here on an NPF | ||
1039 | */ | ||
1040 | if (npt_enabled) | ||
1041 | svm_flush_tlb(&svm->vcpu); | ||
1042 | |||
1043 | if (event_injection) | ||
1044 | kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address); | ||
1007 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); | 1045 | return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code); |
1008 | } | 1046 | } |
1009 | 1047 | ||
@@ -1081,6 +1119,19 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1081 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); | 1119 | return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port); |
1082 | } | 1120 | } |
1083 | 1121 | ||
1122 | static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | ||
1123 | { | ||
1124 | KVMTRACE_0D(NMI, &svm->vcpu, handler); | ||
1125 | return 1; | ||
1126 | } | ||
1127 | |||
1128 | static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | ||
1129 | { | ||
1130 | ++svm->vcpu.stat.irq_exits; | ||
1131 | KVMTRACE_0D(INTR, &svm->vcpu, handler); | ||
1132 | return 1; | ||
1133 | } | ||
1134 | |||
1084 | static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | 1135 | static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) |
1085 | { | 1136 | { |
1086 | return 1; | 1137 | return 1; |
@@ -1219,6 +1270,9 @@ static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1219 | if (svm_get_msr(&svm->vcpu, ecx, &data)) | 1270 | if (svm_get_msr(&svm->vcpu, ecx, &data)) |
1220 | kvm_inject_gp(&svm->vcpu, 0); | 1271 | kvm_inject_gp(&svm->vcpu, 0); |
1221 | else { | 1272 | else { |
1273 | KVMTRACE_3D(MSR_READ, &svm->vcpu, ecx, (u32)data, | ||
1274 | (u32)(data >> 32), handler); | ||
1275 | |||
1222 | svm->vmcb->save.rax = data & 0xffffffff; | 1276 | svm->vmcb->save.rax = data & 0xffffffff; |
1223 | svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; | 1277 | svm->vcpu.arch.regs[VCPU_REGS_RDX] = data >> 32; |
1224 | svm->next_rip = svm->vmcb->save.rip + 2; | 1278 | svm->next_rip = svm->vmcb->save.rip + 2; |
@@ -1284,16 +1338,19 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data) | |||
1284 | case MSR_K7_EVNTSEL1: | 1338 | case MSR_K7_EVNTSEL1: |
1285 | case MSR_K7_EVNTSEL2: | 1339 | case MSR_K7_EVNTSEL2: |
1286 | case MSR_K7_EVNTSEL3: | 1340 | case MSR_K7_EVNTSEL3: |
1341 | case MSR_K7_PERFCTR0: | ||
1342 | case MSR_K7_PERFCTR1: | ||
1343 | case MSR_K7_PERFCTR2: | ||
1344 | case MSR_K7_PERFCTR3: | ||
1287 | /* | 1345 | /* |
1288 | * only support writing 0 to the performance counters for now | 1346 | * Just discard all writes to the performance counters; this |
1289 | * to make Windows happy. Should be replaced by a real | 1347 | * should keep both older linux and windows 64-bit guests |
1290 | * performance counter emulation later. | 1348 | * happy |
1291 | */ | 1349 | */ |
1292 | if (data != 0) | 1350 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", ecx, data); |
1293 | goto unhandled; | 1351 | |
1294 | break; | 1352 | break; |
1295 | default: | 1353 | default: |
1296 | unhandled: | ||
1297 | return kvm_set_msr_common(vcpu, ecx, data); | 1354 | return kvm_set_msr_common(vcpu, ecx, data); |
1298 | } | 1355 | } |
1299 | return 0; | 1356 | return 0; |
@@ -1304,6 +1361,10 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1304 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; | 1361 | u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX]; |
1305 | u64 data = (svm->vmcb->save.rax & -1u) | 1362 | u64 data = (svm->vmcb->save.rax & -1u) |
1306 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); | 1363 | | ((u64)(svm->vcpu.arch.regs[VCPU_REGS_RDX] & -1u) << 32); |
1364 | |||
1365 | KVMTRACE_3D(MSR_WRITE, &svm->vcpu, ecx, (u32)data, (u32)(data >> 32), | ||
1366 | handler); | ||
1367 | |||
1307 | svm->next_rip = svm->vmcb->save.rip + 2; | 1368 | svm->next_rip = svm->vmcb->save.rip + 2; |
1308 | if (svm_set_msr(&svm->vcpu, ecx, data)) | 1369 | if (svm_set_msr(&svm->vcpu, ecx, data)) |
1309 | kvm_inject_gp(&svm->vcpu, 0); | 1370 | kvm_inject_gp(&svm->vcpu, 0); |
@@ -1323,6 +1384,8 @@ static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run) | |||
1323 | static int interrupt_window_interception(struct vcpu_svm *svm, | 1384 | static int interrupt_window_interception(struct vcpu_svm *svm, |
1324 | struct kvm_run *kvm_run) | 1385 | struct kvm_run *kvm_run) |
1325 | { | 1386 | { |
1387 | KVMTRACE_0D(PEND_INTR, &svm->vcpu, handler); | ||
1388 | |||
1326 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); | 1389 | svm->vmcb->control.intercept &= ~(1ULL << INTERCEPT_VINTR); |
1327 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; | 1390 | svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; |
1328 | /* | 1391 | /* |
@@ -1364,8 +1427,8 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm, | |||
1364 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, | 1427 | [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, |
1365 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, | 1428 | [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, |
1366 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, | 1429 | [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, |
1367 | [SVM_EXIT_INTR] = nop_on_interception, | 1430 | [SVM_EXIT_INTR] = intr_interception, |
1368 | [SVM_EXIT_NMI] = nop_on_interception, | 1431 | [SVM_EXIT_NMI] = nmi_interception, |
1369 | [SVM_EXIT_SMI] = nop_on_interception, | 1432 | [SVM_EXIT_SMI] = nop_on_interception, |
1370 | [SVM_EXIT_INIT] = nop_on_interception, | 1433 | [SVM_EXIT_INIT] = nop_on_interception, |
1371 | [SVM_EXIT_VINTR] = interrupt_window_interception, | 1434 | [SVM_EXIT_VINTR] = interrupt_window_interception, |
@@ -1397,6 +1460,9 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) | |||
1397 | struct vcpu_svm *svm = to_svm(vcpu); | 1460 | struct vcpu_svm *svm = to_svm(vcpu); |
1398 | u32 exit_code = svm->vmcb->control.exit_code; | 1461 | u32 exit_code = svm->vmcb->control.exit_code; |
1399 | 1462 | ||
1463 | KVMTRACE_3D(VMEXIT, vcpu, exit_code, (u32)svm->vmcb->save.rip, | ||
1464 | (u32)((u64)svm->vmcb->save.rip >> 32), entryexit); | ||
1465 | |||
1400 | if (npt_enabled) { | 1466 | if (npt_enabled) { |
1401 | int mmu_reload = 0; | 1467 | int mmu_reload = 0; |
1402 | if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { | 1468 | if ((vcpu->arch.cr0 ^ svm->vmcb->save.cr0) & X86_CR0_PG) { |
@@ -1470,6 +1536,8 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, int irq) | |||
1470 | { | 1536 | { |
1471 | struct vmcb_control_area *control; | 1537 | struct vmcb_control_area *control; |
1472 | 1538 | ||
1539 | KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler); | ||
1540 | |||
1473 | control = &svm->vmcb->control; | 1541 | control = &svm->vmcb->control; |
1474 | control->int_vector = irq; | 1542 | control->int_vector = irq; |
1475 | control->int_ctl &= ~V_INTR_PRIO_MASK; | 1543 | control->int_ctl &= ~V_INTR_PRIO_MASK; |
@@ -1660,9 +1728,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1660 | sync_lapic_to_cr8(vcpu); | 1728 | sync_lapic_to_cr8(vcpu); |
1661 | 1729 | ||
1662 | save_host_msrs(vcpu); | 1730 | save_host_msrs(vcpu); |
1663 | fs_selector = read_fs(); | 1731 | fs_selector = kvm_read_fs(); |
1664 | gs_selector = read_gs(); | 1732 | gs_selector = kvm_read_gs(); |
1665 | ldt_selector = read_ldt(); | 1733 | ldt_selector = kvm_read_ldt(); |
1666 | svm->host_cr2 = kvm_read_cr2(); | 1734 | svm->host_cr2 = kvm_read_cr2(); |
1667 | svm->host_dr6 = read_dr6(); | 1735 | svm->host_dr6 = read_dr6(); |
1668 | svm->host_dr7 = read_dr7(); | 1736 | svm->host_dr7 = read_dr7(); |
@@ -1716,17 +1784,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1716 | /* Enter guest mode */ | 1784 | /* Enter guest mode */ |
1717 | "push %%rax \n\t" | 1785 | "push %%rax \n\t" |
1718 | "mov %c[vmcb](%[svm]), %%rax \n\t" | 1786 | "mov %c[vmcb](%[svm]), %%rax \n\t" |
1719 | SVM_VMLOAD "\n\t" | 1787 | __ex(SVM_VMLOAD) "\n\t" |
1720 | SVM_VMRUN "\n\t" | 1788 | __ex(SVM_VMRUN) "\n\t" |
1721 | SVM_VMSAVE "\n\t" | 1789 | __ex(SVM_VMSAVE) "\n\t" |
1722 | "pop %%rax \n\t" | 1790 | "pop %%rax \n\t" |
1723 | #else | 1791 | #else |
1724 | /* Enter guest mode */ | 1792 | /* Enter guest mode */ |
1725 | "push %%eax \n\t" | 1793 | "push %%eax \n\t" |
1726 | "mov %c[vmcb](%[svm]), %%eax \n\t" | 1794 | "mov %c[vmcb](%[svm]), %%eax \n\t" |
1727 | SVM_VMLOAD "\n\t" | 1795 | __ex(SVM_VMLOAD) "\n\t" |
1728 | SVM_VMRUN "\n\t" | 1796 | __ex(SVM_VMRUN) "\n\t" |
1729 | SVM_VMSAVE "\n\t" | 1797 | __ex(SVM_VMSAVE) "\n\t" |
1730 | "pop %%eax \n\t" | 1798 | "pop %%eax \n\t" |
1731 | #endif | 1799 | #endif |
1732 | 1800 | ||
@@ -1795,9 +1863,9 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
1795 | write_dr7(svm->host_dr7); | 1863 | write_dr7(svm->host_dr7); |
1796 | kvm_write_cr2(svm->host_cr2); | 1864 | kvm_write_cr2(svm->host_cr2); |
1797 | 1865 | ||
1798 | load_fs(fs_selector); | 1866 | kvm_load_fs(fs_selector); |
1799 | load_gs(gs_selector); | 1867 | kvm_load_gs(gs_selector); |
1800 | load_ldt(ldt_selector); | 1868 | kvm_load_ldt(ldt_selector); |
1801 | load_host_msrs(vcpu); | 1869 | load_host_msrs(vcpu); |
1802 | 1870 | ||
1803 | reload_tss(vcpu); | 1871 | reload_tss(vcpu); |
@@ -1889,7 +1957,6 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
1889 | .prepare_guest_switch = svm_prepare_guest_switch, | 1957 | .prepare_guest_switch = svm_prepare_guest_switch, |
1890 | .vcpu_load = svm_vcpu_load, | 1958 | .vcpu_load = svm_vcpu_load, |
1891 | .vcpu_put = svm_vcpu_put, | 1959 | .vcpu_put = svm_vcpu_put, |
1892 | .vcpu_decache = svm_vcpu_decache, | ||
1893 | 1960 | ||
1894 | .set_guest_debug = svm_guest_debug, | 1961 | .set_guest_debug = svm_guest_debug, |
1895 | .get_msr = svm_get_msr, | 1962 | .get_msr = svm_get_msr, |
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 10ce6ee4c491..7041cc52b562 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -30,6 +30,8 @@ | |||
30 | #include <asm/io.h> | 30 | #include <asm/io.h> |
31 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
32 | 32 | ||
33 | #define __ex(x) __kvm_handle_fault_on_reboot(x) | ||
34 | |||
33 | MODULE_AUTHOR("Qumranet"); | 35 | MODULE_AUTHOR("Qumranet"); |
34 | MODULE_LICENSE("GPL"); | 36 | MODULE_LICENSE("GPL"); |
35 | 37 | ||
@@ -53,6 +55,7 @@ struct vmcs { | |||
53 | 55 | ||
54 | struct vcpu_vmx { | 56 | struct vcpu_vmx { |
55 | struct kvm_vcpu vcpu; | 57 | struct kvm_vcpu vcpu; |
58 | struct list_head local_vcpus_link; | ||
56 | int launched; | 59 | int launched; |
57 | u8 fail; | 60 | u8 fail; |
58 | u32 idt_vectoring_info; | 61 | u32 idt_vectoring_info; |
@@ -88,9 +91,11 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu) | |||
88 | } | 91 | } |
89 | 92 | ||
90 | static int init_rmode(struct kvm *kvm); | 93 | static int init_rmode(struct kvm *kvm); |
94 | static u64 construct_eptp(unsigned long root_hpa); | ||
91 | 95 | ||
92 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 96 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
93 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 97 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
98 | static DEFINE_PER_CPU(struct list_head, vcpus_on_cpu); | ||
94 | 99 | ||
95 | static struct page *vmx_io_bitmap_a; | 100 | static struct page *vmx_io_bitmap_a; |
96 | static struct page *vmx_io_bitmap_b; | 101 | static struct page *vmx_io_bitmap_b; |
@@ -260,6 +265,11 @@ static inline int cpu_has_vmx_vpid(void) | |||
260 | SECONDARY_EXEC_ENABLE_VPID); | 265 | SECONDARY_EXEC_ENABLE_VPID); |
261 | } | 266 | } |
262 | 267 | ||
268 | static inline int cpu_has_virtual_nmis(void) | ||
269 | { | ||
270 | return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS; | ||
271 | } | ||
272 | |||
263 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) | 273 | static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr) |
264 | { | 274 | { |
265 | int i; | 275 | int i; |
@@ -278,7 +288,7 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva) | |||
278 | u64 gva; | 288 | u64 gva; |
279 | } operand = { vpid, 0, gva }; | 289 | } operand = { vpid, 0, gva }; |
280 | 290 | ||
281 | asm volatile (ASM_VMX_INVVPID | 291 | asm volatile (__ex(ASM_VMX_INVVPID) |
282 | /* CF==1 or ZF==1 --> rc = -1 */ | 292 | /* CF==1 or ZF==1 --> rc = -1 */ |
283 | "; ja 1f ; ud2 ; 1:" | 293 | "; ja 1f ; ud2 ; 1:" |
284 | : : "a"(&operand), "c"(ext) : "cc", "memory"); | 294 | : : "a"(&operand), "c"(ext) : "cc", "memory"); |
@@ -290,7 +300,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa) | |||
290 | u64 eptp, gpa; | 300 | u64 eptp, gpa; |
291 | } operand = {eptp, gpa}; | 301 | } operand = {eptp, gpa}; |
292 | 302 | ||
293 | asm volatile (ASM_VMX_INVEPT | 303 | asm volatile (__ex(ASM_VMX_INVEPT) |
294 | /* CF==1 or ZF==1 --> rc = -1 */ | 304 | /* CF==1 or ZF==1 --> rc = -1 */ |
295 | "; ja 1f ; ud2 ; 1:\n" | 305 | "; ja 1f ; ud2 ; 1:\n" |
296 | : : "a" (&operand), "c" (ext) : "cc", "memory"); | 306 | : : "a" (&operand), "c" (ext) : "cc", "memory"); |
@@ -311,7 +321,7 @@ static void vmcs_clear(struct vmcs *vmcs) | |||
311 | u64 phys_addr = __pa(vmcs); | 321 | u64 phys_addr = __pa(vmcs); |
312 | u8 error; | 322 | u8 error; |
313 | 323 | ||
314 | asm volatile (ASM_VMX_VMCLEAR_RAX "; setna %0" | 324 | asm volatile (__ex(ASM_VMX_VMCLEAR_RAX) "; setna %0" |
315 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 325 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) |
316 | : "cc", "memory"); | 326 | : "cc", "memory"); |
317 | if (error) | 327 | if (error) |
@@ -329,6 +339,9 @@ static void __vcpu_clear(void *arg) | |||
329 | if (per_cpu(current_vmcs, cpu) == vmx->vmcs) | 339 | if (per_cpu(current_vmcs, cpu) == vmx->vmcs) |
330 | per_cpu(current_vmcs, cpu) = NULL; | 340 | per_cpu(current_vmcs, cpu) = NULL; |
331 | rdtscll(vmx->vcpu.arch.host_tsc); | 341 | rdtscll(vmx->vcpu.arch.host_tsc); |
342 | list_del(&vmx->local_vcpus_link); | ||
343 | vmx->vcpu.cpu = -1; | ||
344 | vmx->launched = 0; | ||
332 | } | 345 | } |
333 | 346 | ||
334 | static void vcpu_clear(struct vcpu_vmx *vmx) | 347 | static void vcpu_clear(struct vcpu_vmx *vmx) |
@@ -336,7 +349,6 @@ static void vcpu_clear(struct vcpu_vmx *vmx) | |||
336 | if (vmx->vcpu.cpu == -1) | 349 | if (vmx->vcpu.cpu == -1) |
337 | return; | 350 | return; |
338 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); | 351 | smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 1); |
339 | vmx->launched = 0; | ||
340 | } | 352 | } |
341 | 353 | ||
342 | static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) | 354 | static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx) |
@@ -378,7 +390,7 @@ static unsigned long vmcs_readl(unsigned long field) | |||
378 | { | 390 | { |
379 | unsigned long value; | 391 | unsigned long value; |
380 | 392 | ||
381 | asm volatile (ASM_VMX_VMREAD_RDX_RAX | 393 | asm volatile (__ex(ASM_VMX_VMREAD_RDX_RAX) |
382 | : "=a"(value) : "d"(field) : "cc"); | 394 | : "=a"(value) : "d"(field) : "cc"); |
383 | return value; | 395 | return value; |
384 | } | 396 | } |
@@ -413,7 +425,7 @@ static void vmcs_writel(unsigned long field, unsigned long value) | |||
413 | { | 425 | { |
414 | u8 error; | 426 | u8 error; |
415 | 427 | ||
416 | asm volatile (ASM_VMX_VMWRITE_RAX_RDX "; setna %0" | 428 | asm volatile (__ex(ASM_VMX_VMWRITE_RAX_RDX) "; setna %0" |
417 | : "=q"(error) : "a"(value), "d"(field) : "cc"); | 429 | : "=q"(error) : "a"(value), "d"(field) : "cc"); |
418 | if (unlikely(error)) | 430 | if (unlikely(error)) |
419 | vmwrite_error(field, value); | 431 | vmwrite_error(field, value); |
@@ -431,10 +443,8 @@ static void vmcs_write32(unsigned long field, u32 value) | |||
431 | 443 | ||
432 | static void vmcs_write64(unsigned long field, u64 value) | 444 | static void vmcs_write64(unsigned long field, u64 value) |
433 | { | 445 | { |
434 | #ifdef CONFIG_X86_64 | ||
435 | vmcs_writel(field, value); | ||
436 | #else | ||
437 | vmcs_writel(field, value); | 446 | vmcs_writel(field, value); |
447 | #ifndef CONFIG_X86_64 | ||
438 | asm volatile (""); | 448 | asm volatile (""); |
439 | vmcs_writel(field+1, value >> 32); | 449 | vmcs_writel(field+1, value >> 32); |
440 | #endif | 450 | #endif |
@@ -474,7 +484,7 @@ static void reload_tss(void) | |||
474 | struct descriptor_table gdt; | 484 | struct descriptor_table gdt; |
475 | struct desc_struct *descs; | 485 | struct desc_struct *descs; |
476 | 486 | ||
477 | get_gdt(&gdt); | 487 | kvm_get_gdt(&gdt); |
478 | descs = (void *)gdt.base; | 488 | descs = (void *)gdt.base; |
479 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ | 489 | descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ |
480 | load_TR_desc(); | 490 | load_TR_desc(); |
@@ -530,9 +540,9 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
530 | * Set host fs and gs selectors. Unfortunately, 22.2.3 does not | 540 | * Set host fs and gs selectors. Unfortunately, 22.2.3 does not |
531 | * allow segment selectors with cpl > 0 or ti == 1. | 541 | * allow segment selectors with cpl > 0 or ti == 1. |
532 | */ | 542 | */ |
533 | vmx->host_state.ldt_sel = read_ldt(); | 543 | vmx->host_state.ldt_sel = kvm_read_ldt(); |
534 | vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; | 544 | vmx->host_state.gs_ldt_reload_needed = vmx->host_state.ldt_sel; |
535 | vmx->host_state.fs_sel = read_fs(); | 545 | vmx->host_state.fs_sel = kvm_read_fs(); |
536 | if (!(vmx->host_state.fs_sel & 7)) { | 546 | if (!(vmx->host_state.fs_sel & 7)) { |
537 | vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); | 547 | vmcs_write16(HOST_FS_SELECTOR, vmx->host_state.fs_sel); |
538 | vmx->host_state.fs_reload_needed = 0; | 548 | vmx->host_state.fs_reload_needed = 0; |
@@ -540,7 +550,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
540 | vmcs_write16(HOST_FS_SELECTOR, 0); | 550 | vmcs_write16(HOST_FS_SELECTOR, 0); |
541 | vmx->host_state.fs_reload_needed = 1; | 551 | vmx->host_state.fs_reload_needed = 1; |
542 | } | 552 | } |
543 | vmx->host_state.gs_sel = read_gs(); | 553 | vmx->host_state.gs_sel = kvm_read_gs(); |
544 | if (!(vmx->host_state.gs_sel & 7)) | 554 | if (!(vmx->host_state.gs_sel & 7)) |
545 | vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); | 555 | vmcs_write16(HOST_GS_SELECTOR, vmx->host_state.gs_sel); |
546 | else { | 556 | else { |
@@ -576,15 +586,15 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
576 | ++vmx->vcpu.stat.host_state_reload; | 586 | ++vmx->vcpu.stat.host_state_reload; |
577 | vmx->host_state.loaded = 0; | 587 | vmx->host_state.loaded = 0; |
578 | if (vmx->host_state.fs_reload_needed) | 588 | if (vmx->host_state.fs_reload_needed) |
579 | load_fs(vmx->host_state.fs_sel); | 589 | kvm_load_fs(vmx->host_state.fs_sel); |
580 | if (vmx->host_state.gs_ldt_reload_needed) { | 590 | if (vmx->host_state.gs_ldt_reload_needed) { |
581 | load_ldt(vmx->host_state.ldt_sel); | 591 | kvm_load_ldt(vmx->host_state.ldt_sel); |
582 | /* | 592 | /* |
583 | * If we have to reload gs, we must take care to | 593 | * If we have to reload gs, we must take care to |
584 | * preserve our gs base. | 594 | * preserve our gs base. |
585 | */ | 595 | */ |
586 | local_irq_save(flags); | 596 | local_irq_save(flags); |
587 | load_gs(vmx->host_state.gs_sel); | 597 | kvm_load_gs(vmx->host_state.gs_sel); |
588 | #ifdef CONFIG_X86_64 | 598 | #ifdef CONFIG_X86_64 |
589 | wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); | 599 | wrmsrl(MSR_GS_BASE, vmcs_readl(HOST_GS_BASE)); |
590 | #endif | 600 | #endif |
@@ -617,13 +627,17 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
617 | vcpu_clear(vmx); | 627 | vcpu_clear(vmx); |
618 | kvm_migrate_timers(vcpu); | 628 | kvm_migrate_timers(vcpu); |
619 | vpid_sync_vcpu_all(vmx); | 629 | vpid_sync_vcpu_all(vmx); |
630 | local_irq_disable(); | ||
631 | list_add(&vmx->local_vcpus_link, | ||
632 | &per_cpu(vcpus_on_cpu, cpu)); | ||
633 | local_irq_enable(); | ||
620 | } | 634 | } |
621 | 635 | ||
622 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { | 636 | if (per_cpu(current_vmcs, cpu) != vmx->vmcs) { |
623 | u8 error; | 637 | u8 error; |
624 | 638 | ||
625 | per_cpu(current_vmcs, cpu) = vmx->vmcs; | 639 | per_cpu(current_vmcs, cpu) = vmx->vmcs; |
626 | asm volatile (ASM_VMX_VMPTRLD_RAX "; setna %0" | 640 | asm volatile (__ex(ASM_VMX_VMPTRLD_RAX) "; setna %0" |
627 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) | 641 | : "=g"(error) : "a"(&phys_addr), "m"(phys_addr) |
628 | : "cc"); | 642 | : "cc"); |
629 | if (error) | 643 | if (error) |
@@ -640,8 +654,8 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) | |||
640 | * Linux uses per-cpu TSS and GDT, so set these when switching | 654 | * Linux uses per-cpu TSS and GDT, so set these when switching |
641 | * processors. | 655 | * processors. |
642 | */ | 656 | */ |
643 | vmcs_writel(HOST_TR_BASE, read_tr_base()); /* 22.2.4 */ | 657 | vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ |
644 | get_gdt(&dt); | 658 | kvm_get_gdt(&dt); |
645 | vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ | 659 | vmcs_writel(HOST_GDTR_BASE, dt.base); /* 22.2.4 */ |
646 | 660 | ||
647 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); | 661 | rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); |
@@ -684,11 +698,6 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) | |||
684 | update_exception_bitmap(vcpu); | 698 | update_exception_bitmap(vcpu); |
685 | } | 699 | } |
686 | 700 | ||
687 | static void vmx_vcpu_decache(struct kvm_vcpu *vcpu) | ||
688 | { | ||
689 | vcpu_clear(to_vmx(vcpu)); | ||
690 | } | ||
691 | |||
692 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) | 701 | static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) |
693 | { | 702 | { |
694 | return vmcs_readl(GUEST_RFLAGS); | 703 | return vmcs_readl(GUEST_RFLAGS); |
@@ -913,6 +922,18 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data) | |||
913 | case MSR_IA32_TIME_STAMP_COUNTER: | 922 | case MSR_IA32_TIME_STAMP_COUNTER: |
914 | guest_write_tsc(data); | 923 | guest_write_tsc(data); |
915 | break; | 924 | break; |
925 | case MSR_P6_PERFCTR0: | ||
926 | case MSR_P6_PERFCTR1: | ||
927 | case MSR_P6_EVNTSEL0: | ||
928 | case MSR_P6_EVNTSEL1: | ||
929 | /* | ||
930 | * Just discard all writes to the performance counters; this | ||
931 | * should keep both older linux and windows 64-bit guests | ||
932 | * happy | ||
933 | */ | ||
934 | pr_unimpl(vcpu, "unimplemented perfctr wrmsr: 0x%x data 0x%llx\n", msr_index, data); | ||
935 | |||
936 | break; | ||
916 | default: | 937 | default: |
917 | vmx_load_host_state(vmx); | 938 | vmx_load_host_state(vmx); |
918 | msr = find_msr_entry(vmx, msr_index); | 939 | msr = find_msr_entry(vmx, msr_index); |
@@ -1022,6 +1043,7 @@ static void hardware_enable(void *garbage) | |||
1022 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); | 1043 | u64 phys_addr = __pa(per_cpu(vmxarea, cpu)); |
1023 | u64 old; | 1044 | u64 old; |
1024 | 1045 | ||
1046 | INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu)); | ||
1025 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); | 1047 | rdmsrl(MSR_IA32_FEATURE_CONTROL, old); |
1026 | if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | | 1048 | if ((old & (MSR_IA32_FEATURE_CONTROL_LOCKED | |
1027 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) | 1049 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED)) |
@@ -1032,13 +1054,25 @@ static void hardware_enable(void *garbage) | |||
1032 | MSR_IA32_FEATURE_CONTROL_LOCKED | | 1054 | MSR_IA32_FEATURE_CONTROL_LOCKED | |
1033 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); | 1055 | MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED); |
1034 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ | 1056 | write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */ |
1035 | asm volatile (ASM_VMX_VMXON_RAX : : "a"(&phys_addr), "m"(phys_addr) | 1057 | asm volatile (ASM_VMX_VMXON_RAX |
1058 | : : "a"(&phys_addr), "m"(phys_addr) | ||
1036 | : "memory", "cc"); | 1059 | : "memory", "cc"); |
1037 | } | 1060 | } |
1038 | 1061 | ||
1062 | static void vmclear_local_vcpus(void) | ||
1063 | { | ||
1064 | int cpu = raw_smp_processor_id(); | ||
1065 | struct vcpu_vmx *vmx, *n; | ||
1066 | |||
1067 | list_for_each_entry_safe(vmx, n, &per_cpu(vcpus_on_cpu, cpu), | ||
1068 | local_vcpus_link) | ||
1069 | __vcpu_clear(vmx); | ||
1070 | } | ||
1071 | |||
1039 | static void hardware_disable(void *garbage) | 1072 | static void hardware_disable(void *garbage) |
1040 | { | 1073 | { |
1041 | asm volatile (ASM_VMX_VMXOFF : : : "cc"); | 1074 | vmclear_local_vcpus(); |
1075 | asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc"); | ||
1042 | write_cr4(read_cr4() & ~X86_CR4_VMXE); | 1076 | write_cr4(read_cr4() & ~X86_CR4_VMXE); |
1043 | } | 1077 | } |
1044 | 1078 | ||
@@ -1072,7 +1106,7 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
1072 | u32 _vmentry_control = 0; | 1106 | u32 _vmentry_control = 0; |
1073 | 1107 | ||
1074 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; | 1108 | min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING; |
1075 | opt = 0; | 1109 | opt = PIN_BASED_VIRTUAL_NMIS; |
1076 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, | 1110 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS, |
1077 | &_pin_based_exec_control) < 0) | 1111 | &_pin_based_exec_control) < 0) |
1078 | return -EIO; | 1112 | return -EIO; |
@@ -1389,6 +1423,8 @@ static void exit_lmode(struct kvm_vcpu *vcpu) | |||
1389 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) | 1423 | static void vmx_flush_tlb(struct kvm_vcpu *vcpu) |
1390 | { | 1424 | { |
1391 | vpid_sync_vcpu_all(to_vmx(vcpu)); | 1425 | vpid_sync_vcpu_all(to_vmx(vcpu)); |
1426 | if (vm_need_ept()) | ||
1427 | ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa)); | ||
1392 | } | 1428 | } |
1393 | 1429 | ||
1394 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) | 1430 | static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) |
@@ -1420,7 +1456,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1420 | if (!(cr0 & X86_CR0_PG)) { | 1456 | if (!(cr0 & X86_CR0_PG)) { |
1421 | /* From paging/starting to nonpaging */ | 1457 | /* From paging/starting to nonpaging */ |
1422 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1458 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
1423 | vmcs_config.cpu_based_exec_ctrl | | 1459 | vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) | |
1424 | (CPU_BASED_CR3_LOAD_EXITING | | 1460 | (CPU_BASED_CR3_LOAD_EXITING | |
1425 | CPU_BASED_CR3_STORE_EXITING)); | 1461 | CPU_BASED_CR3_STORE_EXITING)); |
1426 | vcpu->arch.cr0 = cr0; | 1462 | vcpu->arch.cr0 = cr0; |
@@ -1430,7 +1466,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, | |||
1430 | } else if (!is_paging(vcpu)) { | 1466 | } else if (!is_paging(vcpu)) { |
1431 | /* From nonpaging to paging */ | 1467 | /* From nonpaging to paging */ |
1432 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, | 1468 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, |
1433 | vmcs_config.cpu_based_exec_ctrl & | 1469 | vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) & |
1434 | ~(CPU_BASED_CR3_LOAD_EXITING | | 1470 | ~(CPU_BASED_CR3_LOAD_EXITING | |
1435 | CPU_BASED_CR3_STORE_EXITING)); | 1471 | CPU_BASED_CR3_STORE_EXITING)); |
1436 | vcpu->arch.cr0 = cr0; | 1472 | vcpu->arch.cr0 = cr0; |
@@ -1821,7 +1857,7 @@ static void allocate_vpid(struct vcpu_vmx *vmx) | |||
1821 | spin_unlock(&vmx_vpid_lock); | 1857 | spin_unlock(&vmx_vpid_lock); |
1822 | } | 1858 | } |
1823 | 1859 | ||
1824 | void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) | 1860 | static void vmx_disable_intercept_for_msr(struct page *msr_bitmap, u32 msr) |
1825 | { | 1861 | { |
1826 | void *va; | 1862 | void *va; |
1827 | 1863 | ||
@@ -1907,8 +1943,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1907 | vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ | 1943 | vmcs_write16(HOST_CS_SELECTOR, __KERNEL_CS); /* 22.2.4 */ |
1908 | vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 1944 | vmcs_write16(HOST_DS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
1909 | vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 1945 | vmcs_write16(HOST_ES_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
1910 | vmcs_write16(HOST_FS_SELECTOR, read_fs()); /* 22.2.4 */ | 1946 | vmcs_write16(HOST_FS_SELECTOR, kvm_read_fs()); /* 22.2.4 */ |
1911 | vmcs_write16(HOST_GS_SELECTOR, read_gs()); /* 22.2.4 */ | 1947 | vmcs_write16(HOST_GS_SELECTOR, kvm_read_gs()); /* 22.2.4 */ |
1912 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ | 1948 | vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */ |
1913 | #ifdef CONFIG_X86_64 | 1949 | #ifdef CONFIG_X86_64 |
1914 | rdmsrl(MSR_FS_BASE, a); | 1950 | rdmsrl(MSR_FS_BASE, a); |
@@ -1922,7 +1958,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
1922 | 1958 | ||
1923 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ | 1959 | vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */ |
1924 | 1960 | ||
1925 | get_idt(&dt); | 1961 | kvm_get_idt(&dt); |
1926 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ | 1962 | vmcs_writel(HOST_IDTR_BASE, dt.base); /* 22.2.4 */ |
1927 | 1963 | ||
1928 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); | 1964 | asm("mov $.Lkvm_vmx_return, %0" : "=r"(kvm_vmx_return)); |
@@ -2114,6 +2150,13 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq) | |||
2114 | irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); | 2150 | irq | INTR_TYPE_EXT_INTR | INTR_INFO_VALID_MASK); |
2115 | } | 2151 | } |
2116 | 2152 | ||
2153 | static void vmx_inject_nmi(struct kvm_vcpu *vcpu) | ||
2154 | { | ||
2155 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, | ||
2156 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); | ||
2157 | vcpu->arch.nmi_pending = 0; | ||
2158 | } | ||
2159 | |||
2117 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) | 2160 | static void kvm_do_inject_irq(struct kvm_vcpu *vcpu) |
2118 | { | 2161 | { |
2119 | int word_index = __ffs(vcpu->arch.irq_summary); | 2162 | int word_index = __ffs(vcpu->arch.irq_summary); |
@@ -2255,6 +2298,8 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2255 | cr2 = vmcs_readl(EXIT_QUALIFICATION); | 2298 | cr2 = vmcs_readl(EXIT_QUALIFICATION); |
2256 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, | 2299 | KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2, |
2257 | (u32)((u64)cr2 >> 32), handler); | 2300 | (u32)((u64)cr2 >> 32), handler); |
2301 | if (vect_info & VECTORING_INFO_VALID_MASK) | ||
2302 | kvm_mmu_unprotect_page_virt(vcpu, cr2); | ||
2258 | return kvm_mmu_page_fault(vcpu, cr2, error_code); | 2303 | return kvm_mmu_page_fault(vcpu, cr2, error_code); |
2259 | } | 2304 | } |
2260 | 2305 | ||
@@ -2554,8 +2599,6 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2554 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); | 2599 | exit_qualification = vmcs_read64(EXIT_QUALIFICATION); |
2555 | offset = exit_qualification & 0xffful; | 2600 | offset = exit_qualification & 0xffful; |
2556 | 2601 | ||
2557 | KVMTRACE_1D(APIC_ACCESS, vcpu, (u32)offset, handler); | ||
2558 | |||
2559 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); | 2602 | er = emulate_instruction(vcpu, kvm_run, 0, 0, 0); |
2560 | 2603 | ||
2561 | if (er != EMULATE_DONE) { | 2604 | if (er != EMULATE_DONE) { |
@@ -2639,6 +2682,19 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2639 | return 1; | 2682 | return 1; |
2640 | } | 2683 | } |
2641 | 2684 | ||
2685 | static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | ||
2686 | { | ||
2687 | u32 cpu_based_vm_exec_control; | ||
2688 | |||
2689 | /* clear pending NMI */ | ||
2690 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
2691 | cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; | ||
2692 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
2693 | ++vcpu->stat.nmi_window_exits; | ||
2694 | |||
2695 | return 1; | ||
2696 | } | ||
2697 | |||
2642 | /* | 2698 | /* |
2643 | * The exit handlers return 1 if the exit was handled fully and guest execution | 2699 | * The exit handlers return 1 if the exit was handled fully and guest execution |
2644 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs | 2700 | * may resume. Otherwise they set the kvm_run parameter to indicate what needs |
@@ -2649,6 +2705,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu, | |||
2649 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, | 2705 | [EXIT_REASON_EXCEPTION_NMI] = handle_exception, |
2650 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, | 2706 | [EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt, |
2651 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, | 2707 | [EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault, |
2708 | [EXIT_REASON_NMI_WINDOW] = handle_nmi_window, | ||
2652 | [EXIT_REASON_IO_INSTRUCTION] = handle_io, | 2709 | [EXIT_REASON_IO_INSTRUCTION] = handle_io, |
2653 | [EXIT_REASON_CR_ACCESS] = handle_cr, | 2710 | [EXIT_REASON_CR_ACCESS] = handle_cr, |
2654 | [EXIT_REASON_DR_ACCESS] = handle_dr, | 2711 | [EXIT_REASON_DR_ACCESS] = handle_dr, |
@@ -2736,17 +2793,52 @@ static void enable_irq_window(struct kvm_vcpu *vcpu) | |||
2736 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 2793 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
2737 | } | 2794 | } |
2738 | 2795 | ||
2796 | static void enable_nmi_window(struct kvm_vcpu *vcpu) | ||
2797 | { | ||
2798 | u32 cpu_based_vm_exec_control; | ||
2799 | |||
2800 | if (!cpu_has_virtual_nmis()) | ||
2801 | return; | ||
2802 | |||
2803 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
2804 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | ||
2805 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
2806 | } | ||
2807 | |||
2808 | static int vmx_nmi_enabled(struct kvm_vcpu *vcpu) | ||
2809 | { | ||
2810 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
2811 | return !(guest_intr & (GUEST_INTR_STATE_NMI | | ||
2812 | GUEST_INTR_STATE_MOV_SS | | ||
2813 | GUEST_INTR_STATE_STI)); | ||
2814 | } | ||
2815 | |||
2816 | static int vmx_irq_enabled(struct kvm_vcpu *vcpu) | ||
2817 | { | ||
2818 | u32 guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | ||
2819 | return (!(guest_intr & (GUEST_INTR_STATE_MOV_SS | | ||
2820 | GUEST_INTR_STATE_STI)) && | ||
2821 | (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF)); | ||
2822 | } | ||
2823 | |||
2824 | static void enable_intr_window(struct kvm_vcpu *vcpu) | ||
2825 | { | ||
2826 | if (vcpu->arch.nmi_pending) | ||
2827 | enable_nmi_window(vcpu); | ||
2828 | else if (kvm_cpu_has_interrupt(vcpu)) | ||
2829 | enable_irq_window(vcpu); | ||
2830 | } | ||
2831 | |||
2739 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) | 2832 | static void vmx_intr_assist(struct kvm_vcpu *vcpu) |
2740 | { | 2833 | { |
2741 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 2834 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2742 | u32 idtv_info_field, intr_info_field; | 2835 | u32 idtv_info_field, intr_info_field, exit_intr_info_field; |
2743 | int has_ext_irq, interrupt_window_open; | ||
2744 | int vector; | 2836 | int vector; |
2745 | 2837 | ||
2746 | update_tpr_threshold(vcpu); | 2838 | update_tpr_threshold(vcpu); |
2747 | 2839 | ||
2748 | has_ext_irq = kvm_cpu_has_interrupt(vcpu); | ||
2749 | intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); | 2840 | intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); |
2841 | exit_intr_info_field = vmcs_read32(VM_EXIT_INTR_INFO); | ||
2750 | idtv_info_field = vmx->idt_vectoring_info; | 2842 | idtv_info_field = vmx->idt_vectoring_info; |
2751 | if (intr_info_field & INTR_INFO_VALID_MASK) { | 2843 | if (intr_info_field & INTR_INFO_VALID_MASK) { |
2752 | if (idtv_info_field & INTR_INFO_VALID_MASK) { | 2844 | if (idtv_info_field & INTR_INFO_VALID_MASK) { |
@@ -2754,8 +2846,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
2754 | if (printk_ratelimit()) | 2846 | if (printk_ratelimit()) |
2755 | printk(KERN_ERR "Fault when IDT_Vectoring\n"); | 2847 | printk(KERN_ERR "Fault when IDT_Vectoring\n"); |
2756 | } | 2848 | } |
2757 | if (has_ext_irq) | 2849 | enable_intr_window(vcpu); |
2758 | enable_irq_window(vcpu); | ||
2759 | return; | 2850 | return; |
2760 | } | 2851 | } |
2761 | if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { | 2852 | if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) { |
@@ -2765,30 +2856,56 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu) | |||
2765 | u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK; | 2856 | u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK; |
2766 | 2857 | ||
2767 | vmx_inject_irq(vcpu, vect); | 2858 | vmx_inject_irq(vcpu, vect); |
2768 | if (unlikely(has_ext_irq)) | 2859 | enable_intr_window(vcpu); |
2769 | enable_irq_window(vcpu); | ||
2770 | return; | 2860 | return; |
2771 | } | 2861 | } |
2772 | 2862 | ||
2773 | KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); | 2863 | KVMTRACE_1D(REDELIVER_EVT, vcpu, idtv_info_field, handler); |
2774 | 2864 | ||
2775 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); | 2865 | /* |
2866 | * SDM 3: 25.7.1.2 | ||
2867 | * Clear bit "block by NMI" before VM entry if a NMI delivery | ||
2868 | * faulted. | ||
2869 | */ | ||
2870 | if ((idtv_info_field & VECTORING_INFO_TYPE_MASK) | ||
2871 | == INTR_TYPE_NMI_INTR && cpu_has_virtual_nmis()) | ||
2872 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | ||
2873 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | ||
2874 | ~GUEST_INTR_STATE_NMI); | ||
2875 | |||
2876 | vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field | ||
2877 | & ~INTR_INFO_RESVD_BITS_MASK); | ||
2776 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, | 2878 | vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, |
2777 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); | 2879 | vmcs_read32(VM_EXIT_INSTRUCTION_LEN)); |
2778 | 2880 | ||
2779 | if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) | 2881 | if (unlikely(idtv_info_field & INTR_INFO_DELIVER_CODE_MASK)) |
2780 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, | 2882 | vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, |
2781 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); | 2883 | vmcs_read32(IDT_VECTORING_ERROR_CODE)); |
2782 | if (unlikely(has_ext_irq)) | 2884 | enable_intr_window(vcpu); |
2783 | enable_irq_window(vcpu); | ||
2784 | return; | 2885 | return; |
2785 | } | 2886 | } |
2786 | if (!has_ext_irq) | 2887 | if (cpu_has_virtual_nmis()) { |
2888 | /* | ||
2889 | * SDM 3: 25.7.1.2 | ||
2890 | * Re-set bit "block by NMI" before VM entry if vmexit caused by | ||
2891 | * a guest IRET fault. | ||
2892 | */ | ||
2893 | if ((exit_intr_info_field & INTR_INFO_UNBLOCK_NMI) && | ||
2894 | (exit_intr_info_field & INTR_INFO_VECTOR_MASK) != 8) | ||
2895 | vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, | ||
2896 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) | | ||
2897 | GUEST_INTR_STATE_NMI); | ||
2898 | else if (vcpu->arch.nmi_pending) { | ||
2899 | if (vmx_nmi_enabled(vcpu)) | ||
2900 | vmx_inject_nmi(vcpu); | ||
2901 | enable_intr_window(vcpu); | ||
2902 | return; | ||
2903 | } | ||
2904 | |||
2905 | } | ||
2906 | if (!kvm_cpu_has_interrupt(vcpu)) | ||
2787 | return; | 2907 | return; |
2788 | interrupt_window_open = | 2908 | if (vmx_irq_enabled(vcpu)) { |
2789 | ((vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
2790 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0); | ||
2791 | if (interrupt_window_open) { | ||
2792 | vector = kvm_cpu_get_interrupt(vcpu); | 2909 | vector = kvm_cpu_get_interrupt(vcpu); |
2793 | vmx_inject_irq(vcpu, vector); | 2910 | vmx_inject_irq(vcpu, vector); |
2794 | kvm_timer_intr_post(vcpu, vector); | 2911 | kvm_timer_intr_post(vcpu, vector); |
@@ -2838,7 +2955,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2838 | "push %%edx; push %%ebp;" | 2955 | "push %%edx; push %%ebp;" |
2839 | "push %%ecx \n\t" | 2956 | "push %%ecx \n\t" |
2840 | #endif | 2957 | #endif |
2841 | ASM_VMX_VMWRITE_RSP_RDX "\n\t" | 2958 | __ex(ASM_VMX_VMWRITE_RSP_RDX) "\n\t" |
2842 | /* Check if vmlaunch of vmresume is needed */ | 2959 | /* Check if vmlaunch of vmresume is needed */ |
2843 | "cmpl $0, %c[launched](%0) \n\t" | 2960 | "cmpl $0, %c[launched](%0) \n\t" |
2844 | /* Load guest registers. Don't clobber flags. */ | 2961 | /* Load guest registers. Don't clobber flags. */ |
@@ -2873,9 +2990,9 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2873 | #endif | 2990 | #endif |
2874 | /* Enter guest mode */ | 2991 | /* Enter guest mode */ |
2875 | "jne .Llaunched \n\t" | 2992 | "jne .Llaunched \n\t" |
2876 | ASM_VMX_VMLAUNCH "\n\t" | 2993 | __ex(ASM_VMX_VMLAUNCH) "\n\t" |
2877 | "jmp .Lkvm_vmx_return \n\t" | 2994 | "jmp .Lkvm_vmx_return \n\t" |
2878 | ".Llaunched: " ASM_VMX_VMRESUME "\n\t" | 2995 | ".Llaunched: " __ex(ASM_VMX_VMRESUME) "\n\t" |
2879 | ".Lkvm_vmx_return: " | 2996 | ".Lkvm_vmx_return: " |
2880 | /* Save guest registers, load host registers, keep flags */ | 2997 | /* Save guest registers, load host registers, keep flags */ |
2881 | #ifdef CONFIG_X86_64 | 2998 | #ifdef CONFIG_X86_64 |
@@ -2949,7 +3066,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2949 | fixup_rmode_irq(vmx); | 3066 | fixup_rmode_irq(vmx); |
2950 | 3067 | ||
2951 | vcpu->arch.interrupt_window_open = | 3068 | vcpu->arch.interrupt_window_open = |
2952 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0; | 3069 | (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
3070 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)) == 0; | ||
2953 | 3071 | ||
2954 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); | 3072 | asm("mov %0, %%ds; mov %0, %%es" : : "r"(__USER_DS)); |
2955 | vmx->launched = 1; | 3073 | vmx->launched = 1; |
@@ -2957,7 +3075,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2957 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 3075 | intr_info = vmcs_read32(VM_EXIT_INTR_INFO); |
2958 | 3076 | ||
2959 | /* We need to handle NMIs before interrupts are enabled */ | 3077 | /* We need to handle NMIs before interrupts are enabled */ |
2960 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200) { /* nmi */ | 3078 | if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == 0x200 && |
3079 | (intr_info & INTR_INFO_VALID_MASK)) { | ||
2961 | KVMTRACE_0D(NMI, vcpu, handler); | 3080 | KVMTRACE_0D(NMI, vcpu, handler); |
2962 | asm("int $2"); | 3081 | asm("int $2"); |
2963 | } | 3082 | } |
@@ -2968,7 +3087,7 @@ static void vmx_free_vmcs(struct kvm_vcpu *vcpu) | |||
2968 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 3087 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
2969 | 3088 | ||
2970 | if (vmx->vmcs) { | 3089 | if (vmx->vmcs) { |
2971 | on_each_cpu(__vcpu_clear, vmx, 1); | 3090 | vcpu_clear(vmx); |
2972 | free_vmcs(vmx->vmcs); | 3091 | free_vmcs(vmx->vmcs); |
2973 | vmx->vmcs = NULL; | 3092 | vmx->vmcs = NULL; |
2974 | } | 3093 | } |
@@ -2999,15 +3118,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) | |||
2999 | return ERR_PTR(-ENOMEM); | 3118 | return ERR_PTR(-ENOMEM); |
3000 | 3119 | ||
3001 | allocate_vpid(vmx); | 3120 | allocate_vpid(vmx); |
3002 | if (id == 0 && vm_need_ept()) { | ||
3003 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3004 | VMX_EPT_WRITABLE_MASK | | ||
3005 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3006 | kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK, | ||
3007 | VMX_EPT_FAKE_DIRTY_MASK, 0ull, | ||
3008 | VMX_EPT_EXECUTABLE_MASK); | ||
3009 | kvm_enable_tdp(); | ||
3010 | } | ||
3011 | 3121 | ||
3012 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); | 3122 | err = kvm_vcpu_init(&vmx->vcpu, kvm, id); |
3013 | if (err) | 3123 | if (err) |
@@ -3095,7 +3205,6 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
3095 | .prepare_guest_switch = vmx_save_host_state, | 3205 | .prepare_guest_switch = vmx_save_host_state, |
3096 | .vcpu_load = vmx_vcpu_load, | 3206 | .vcpu_load = vmx_vcpu_load, |
3097 | .vcpu_put = vmx_vcpu_put, | 3207 | .vcpu_put = vmx_vcpu_put, |
3098 | .vcpu_decache = vmx_vcpu_decache, | ||
3099 | 3208 | ||
3100 | .set_guest_debug = set_guest_debug, | 3209 | .set_guest_debug = set_guest_debug, |
3101 | .guest_debug_pre = kvm_guest_debug_pre, | 3210 | .guest_debug_pre = kvm_guest_debug_pre, |
@@ -3187,8 +3296,16 @@ static int __init vmx_init(void) | |||
3187 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); | 3296 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP); |
3188 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); | 3297 | vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP); |
3189 | 3298 | ||
3190 | if (cpu_has_vmx_ept()) | 3299 | if (vm_need_ept()) { |
3191 | bypass_guest_pf = 0; | 3300 | bypass_guest_pf = 0; |
3301 | kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK | | ||
3302 | VMX_EPT_WRITABLE_MASK | | ||
3303 | VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT); | ||
3304 | kvm_mmu_set_mask_ptes(0ull, 0ull, 0ull, 0ull, | ||
3305 | VMX_EPT_EXECUTABLE_MASK); | ||
3306 | kvm_enable_tdp(); | ||
3307 | } else | ||
3308 | kvm_disable_tdp(); | ||
3192 | 3309 | ||
3193 | if (bypass_guest_pf) | 3310 | if (bypass_guest_pf) |
3194 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); | 3311 | kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull); |
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h index 79d94c610dfe..23e8373507ad 100644 --- a/arch/x86/kvm/vmx.h +++ b/arch/x86/kvm/vmx.h | |||
@@ -40,6 +40,7 @@ | |||
40 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 | 40 | #define CPU_BASED_CR8_LOAD_EXITING 0x00080000 |
41 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 | 41 | #define CPU_BASED_CR8_STORE_EXITING 0x00100000 |
42 | #define CPU_BASED_TPR_SHADOW 0x00200000 | 42 | #define CPU_BASED_TPR_SHADOW 0x00200000 |
43 | #define CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 | ||
43 | #define CPU_BASED_MOV_DR_EXITING 0x00800000 | 44 | #define CPU_BASED_MOV_DR_EXITING 0x00800000 |
44 | #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 | 45 | #define CPU_BASED_UNCOND_IO_EXITING 0x01000000 |
45 | #define CPU_BASED_USE_IO_BITMAPS 0x02000000 | 46 | #define CPU_BASED_USE_IO_BITMAPS 0x02000000 |
@@ -216,7 +217,7 @@ enum vmcs_field { | |||
216 | #define EXIT_REASON_TRIPLE_FAULT 2 | 217 | #define EXIT_REASON_TRIPLE_FAULT 2 |
217 | 218 | ||
218 | #define EXIT_REASON_PENDING_INTERRUPT 7 | 219 | #define EXIT_REASON_PENDING_INTERRUPT 7 |
219 | 220 | #define EXIT_REASON_NMI_WINDOW 8 | |
220 | #define EXIT_REASON_TASK_SWITCH 9 | 221 | #define EXIT_REASON_TASK_SWITCH 9 |
221 | #define EXIT_REASON_CPUID 10 | 222 | #define EXIT_REASON_CPUID 10 |
222 | #define EXIT_REASON_HLT 12 | 223 | #define EXIT_REASON_HLT 12 |
@@ -251,7 +252,9 @@ enum vmcs_field { | |||
251 | #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ | 252 | #define INTR_INFO_VECTOR_MASK 0xff /* 7:0 */ |
252 | #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ | 253 | #define INTR_INFO_INTR_TYPE_MASK 0x700 /* 10:8 */ |
253 | #define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ | 254 | #define INTR_INFO_DELIVER_CODE_MASK 0x800 /* 11 */ |
255 | #define INTR_INFO_UNBLOCK_NMI 0x1000 /* 12 */ | ||
254 | #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ | 256 | #define INTR_INFO_VALID_MASK 0x80000000 /* 31 */ |
257 | #define INTR_INFO_RESVD_BITS_MASK 0x7ffff000 | ||
255 | 258 | ||
256 | #define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK | 259 | #define VECTORING_INFO_VECTOR_MASK INTR_INFO_VECTOR_MASK |
257 | #define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK | 260 | #define VECTORING_INFO_TYPE_MASK INTR_INFO_INTR_TYPE_MASK |
@@ -259,9 +262,16 @@ enum vmcs_field { | |||
259 | #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK | 262 | #define VECTORING_INFO_VALID_MASK INTR_INFO_VALID_MASK |
260 | 263 | ||
261 | #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ | 264 | #define INTR_TYPE_EXT_INTR (0 << 8) /* external interrupt */ |
265 | #define INTR_TYPE_NMI_INTR (2 << 8) /* NMI */ | ||
262 | #define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ | 266 | #define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */ |
263 | #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ | 267 | #define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */ |
264 | 268 | ||
269 | /* GUEST_INTERRUPTIBILITY_INFO flags. */ | ||
270 | #define GUEST_INTR_STATE_STI 0x00000001 | ||
271 | #define GUEST_INTR_STATE_MOV_SS 0x00000002 | ||
272 | #define GUEST_INTR_STATE_SMI 0x00000004 | ||
273 | #define GUEST_INTR_STATE_NMI 0x00000008 | ||
274 | |||
265 | /* | 275 | /* |
266 | * Exit Qualifications for MOV for Control Register Access | 276 | * Exit Qualifications for MOV for Control Register Access |
267 | */ | 277 | */ |
@@ -360,8 +370,6 @@ enum vmcs_field { | |||
360 | #define VMX_EPT_READABLE_MASK 0x1ull | 370 | #define VMX_EPT_READABLE_MASK 0x1ull |
361 | #define VMX_EPT_WRITABLE_MASK 0x2ull | 371 | #define VMX_EPT_WRITABLE_MASK 0x2ull |
362 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull | 372 | #define VMX_EPT_EXECUTABLE_MASK 0x4ull |
363 | #define VMX_EPT_FAKE_ACCESSED_MASK (1ull << 62) | ||
364 | #define VMX_EPT_FAKE_DIRTY_MASK (1ull << 63) | ||
365 | 373 | ||
366 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul | 374 | #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul |
367 | 375 | ||
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0faa2546b1cd..0d682fc6aeb3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -72,6 +72,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { | |||
72 | { "mmio_exits", VCPU_STAT(mmio_exits) }, | 72 | { "mmio_exits", VCPU_STAT(mmio_exits) }, |
73 | { "signal_exits", VCPU_STAT(signal_exits) }, | 73 | { "signal_exits", VCPU_STAT(signal_exits) }, |
74 | { "irq_window", VCPU_STAT(irq_window_exits) }, | 74 | { "irq_window", VCPU_STAT(irq_window_exits) }, |
75 | { "nmi_window", VCPU_STAT(nmi_window_exits) }, | ||
75 | { "halt_exits", VCPU_STAT(halt_exits) }, | 76 | { "halt_exits", VCPU_STAT(halt_exits) }, |
76 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, | 77 | { "halt_wakeup", VCPU_STAT(halt_wakeup) }, |
77 | { "hypercalls", VCPU_STAT(hypercalls) }, | 78 | { "hypercalls", VCPU_STAT(hypercalls) }, |
@@ -173,6 +174,12 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long addr, | |||
173 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); | 174 | kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); |
174 | } | 175 | } |
175 | 176 | ||
177 | void kvm_inject_nmi(struct kvm_vcpu *vcpu) | ||
178 | { | ||
179 | vcpu->arch.nmi_pending = 1; | ||
180 | } | ||
181 | EXPORT_SYMBOL_GPL(kvm_inject_nmi); | ||
182 | |||
176 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) | 183 | void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) |
177 | { | 184 | { |
178 | WARN_ON(vcpu->arch.exception.pending); | 185 | WARN_ON(vcpu->arch.exception.pending); |
@@ -604,6 +611,38 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) | |||
604 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); | 611 | mark_page_dirty(v->kvm, vcpu->time >> PAGE_SHIFT); |
605 | } | 612 | } |
606 | 613 | ||
614 | static bool msr_mtrr_valid(unsigned msr) | ||
615 | { | ||
616 | switch (msr) { | ||
617 | case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1: | ||
618 | case MSR_MTRRfix64K_00000: | ||
619 | case MSR_MTRRfix16K_80000: | ||
620 | case MSR_MTRRfix16K_A0000: | ||
621 | case MSR_MTRRfix4K_C0000: | ||
622 | case MSR_MTRRfix4K_C8000: | ||
623 | case MSR_MTRRfix4K_D0000: | ||
624 | case MSR_MTRRfix4K_D8000: | ||
625 | case MSR_MTRRfix4K_E0000: | ||
626 | case MSR_MTRRfix4K_E8000: | ||
627 | case MSR_MTRRfix4K_F0000: | ||
628 | case MSR_MTRRfix4K_F8000: | ||
629 | case MSR_MTRRdefType: | ||
630 | case MSR_IA32_CR_PAT: | ||
631 | return true; | ||
632 | case 0x2f8: | ||
633 | return true; | ||
634 | } | ||
635 | return false; | ||
636 | } | ||
637 | |||
638 | static int set_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 data) | ||
639 | { | ||
640 | if (!msr_mtrr_valid(msr)) | ||
641 | return 1; | ||
642 | |||
643 | vcpu->arch.mtrr[msr - 0x200] = data; | ||
644 | return 0; | ||
645 | } | ||
607 | 646 | ||
608 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | 647 | int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) |
609 | { | 648 | { |
@@ -625,8 +664,9 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
625 | break; | 664 | break; |
626 | case MSR_IA32_UCODE_REV: | 665 | case MSR_IA32_UCODE_REV: |
627 | case MSR_IA32_UCODE_WRITE: | 666 | case MSR_IA32_UCODE_WRITE: |
628 | case 0x200 ... 0x2ff: /* MTRRs */ | ||
629 | break; | 667 | break; |
668 | case 0x200 ... 0x2ff: | ||
669 | return set_msr_mtrr(vcpu, msr, data); | ||
630 | case MSR_IA32_APICBASE: | 670 | case MSR_IA32_APICBASE: |
631 | kvm_set_apic_base(vcpu, data); | 671 | kvm_set_apic_base(vcpu, data); |
632 | break; | 672 | break; |
@@ -684,6 +724,15 @@ int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
684 | return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); | 724 | return kvm_x86_ops->get_msr(vcpu, msr_index, pdata); |
685 | } | 725 | } |
686 | 726 | ||
727 | static int get_msr_mtrr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | ||
728 | { | ||
729 | if (!msr_mtrr_valid(msr)) | ||
730 | return 1; | ||
731 | |||
732 | *pdata = vcpu->arch.mtrr[msr - 0x200]; | ||
733 | return 0; | ||
734 | } | ||
735 | |||
687 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | 736 | int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) |
688 | { | 737 | { |
689 | u64 data; | 738 | u64 data; |
@@ -705,11 +754,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
705 | case MSR_IA32_MC0_MISC+16: | 754 | case MSR_IA32_MC0_MISC+16: |
706 | case MSR_IA32_UCODE_REV: | 755 | case MSR_IA32_UCODE_REV: |
707 | case MSR_IA32_EBL_CR_POWERON: | 756 | case MSR_IA32_EBL_CR_POWERON: |
708 | /* MTRR registers */ | ||
709 | case 0xfe: | ||
710 | case 0x200 ... 0x2ff: | ||
711 | data = 0; | 757 | data = 0; |
712 | break; | 758 | break; |
759 | case MSR_MTRRcap: | ||
760 | data = 0x500 | KVM_NR_VAR_MTRR; | ||
761 | break; | ||
762 | case 0x200 ... 0x2ff: | ||
763 | return get_msr_mtrr(vcpu, msr, pdata); | ||
713 | case 0xcd: /* fsb frequency */ | 764 | case 0xcd: /* fsb frequency */ |
714 | data = 3; | 765 | data = 3; |
715 | break; | 766 | break; |
@@ -817,41 +868,6 @@ out: | |||
817 | return r; | 868 | return r; |
818 | } | 869 | } |
819 | 870 | ||
820 | /* | ||
821 | * Make sure that a cpu that is being hot-unplugged does not have any vcpus | ||
822 | * cached on it. | ||
823 | */ | ||
824 | void decache_vcpus_on_cpu(int cpu) | ||
825 | { | ||
826 | struct kvm *vm; | ||
827 | struct kvm_vcpu *vcpu; | ||
828 | int i; | ||
829 | |||
830 | spin_lock(&kvm_lock); | ||
831 | list_for_each_entry(vm, &vm_list, vm_list) | ||
832 | for (i = 0; i < KVM_MAX_VCPUS; ++i) { | ||
833 | vcpu = vm->vcpus[i]; | ||
834 | if (!vcpu) | ||
835 | continue; | ||
836 | /* | ||
837 | * If the vcpu is locked, then it is running on some | ||
838 | * other cpu and therefore it is not cached on the | ||
839 | * cpu in question. | ||
840 | * | ||
841 | * If it's not locked, check the last cpu it executed | ||
842 | * on. | ||
843 | */ | ||
844 | if (mutex_trylock(&vcpu->mutex)) { | ||
845 | if (vcpu->cpu == cpu) { | ||
846 | kvm_x86_ops->vcpu_decache(vcpu); | ||
847 | vcpu->cpu = -1; | ||
848 | } | ||
849 | mutex_unlock(&vcpu->mutex); | ||
850 | } | ||
851 | } | ||
852 | spin_unlock(&kvm_lock); | ||
853 | } | ||
854 | |||
855 | int kvm_dev_ioctl_check_extension(long ext) | 871 | int kvm_dev_ioctl_check_extension(long ext) |
856 | { | 872 | { |
857 | int r; | 873 | int r; |
@@ -867,8 +883,12 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
867 | case KVM_CAP_PIT: | 883 | case KVM_CAP_PIT: |
868 | case KVM_CAP_NOP_IO_DELAY: | 884 | case KVM_CAP_NOP_IO_DELAY: |
869 | case KVM_CAP_MP_STATE: | 885 | case KVM_CAP_MP_STATE: |
886 | case KVM_CAP_SYNC_MMU: | ||
870 | r = 1; | 887 | r = 1; |
871 | break; | 888 | break; |
889 | case KVM_CAP_COALESCED_MMIO: | ||
890 | r = KVM_COALESCED_MMIO_PAGE_OFFSET; | ||
891 | break; | ||
872 | case KVM_CAP_VAPIC: | 892 | case KVM_CAP_VAPIC: |
873 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); | 893 | r = !kvm_x86_ops->cpu_has_accelerated_tpr(); |
874 | break; | 894 | break; |
@@ -1476,6 +1496,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1476 | goto out; | 1496 | goto out; |
1477 | 1497 | ||
1478 | down_write(&kvm->slots_lock); | 1498 | down_write(&kvm->slots_lock); |
1499 | spin_lock(&kvm->mmu_lock); | ||
1479 | 1500 | ||
1480 | p = &kvm->arch.aliases[alias->slot]; | 1501 | p = &kvm->arch.aliases[alias->slot]; |
1481 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; | 1502 | p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT; |
@@ -1487,6 +1508,7 @@ static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm, | |||
1487 | break; | 1508 | break; |
1488 | kvm->arch.naliases = n; | 1509 | kvm->arch.naliases = n; |
1489 | 1510 | ||
1511 | spin_unlock(&kvm->mmu_lock); | ||
1490 | kvm_mmu_zap_all(kvm); | 1512 | kvm_mmu_zap_all(kvm); |
1491 | 1513 | ||
1492 | up_write(&kvm->slots_lock); | 1514 | up_write(&kvm->slots_lock); |
@@ -1781,13 +1803,14 @@ static void kvm_init_msr_list(void) | |||
1781 | * Only apic need an MMIO device hook, so shortcut now.. | 1803 | * Only apic need an MMIO device hook, so shortcut now.. |
1782 | */ | 1804 | */ |
1783 | static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, | 1805 | static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, |
1784 | gpa_t addr) | 1806 | gpa_t addr, int len, |
1807 | int is_write) | ||
1785 | { | 1808 | { |
1786 | struct kvm_io_device *dev; | 1809 | struct kvm_io_device *dev; |
1787 | 1810 | ||
1788 | if (vcpu->arch.apic) { | 1811 | if (vcpu->arch.apic) { |
1789 | dev = &vcpu->arch.apic->dev; | 1812 | dev = &vcpu->arch.apic->dev; |
1790 | if (dev->in_range(dev, addr)) | 1813 | if (dev->in_range(dev, addr, len, is_write)) |
1791 | return dev; | 1814 | return dev; |
1792 | } | 1815 | } |
1793 | return NULL; | 1816 | return NULL; |
@@ -1795,13 +1818,15 @@ static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, | |||
1795 | 1818 | ||
1796 | 1819 | ||
1797 | static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, | 1820 | static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, |
1798 | gpa_t addr) | 1821 | gpa_t addr, int len, |
1822 | int is_write) | ||
1799 | { | 1823 | { |
1800 | struct kvm_io_device *dev; | 1824 | struct kvm_io_device *dev; |
1801 | 1825 | ||
1802 | dev = vcpu_find_pervcpu_dev(vcpu, addr); | 1826 | dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write); |
1803 | if (dev == NULL) | 1827 | if (dev == NULL) |
1804 | dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr); | 1828 | dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, |
1829 | is_write); | ||
1805 | return dev; | 1830 | return dev; |
1806 | } | 1831 | } |
1807 | 1832 | ||
@@ -1869,7 +1894,7 @@ mmio: | |||
1869 | * Is this MMIO handled locally? | 1894 | * Is this MMIO handled locally? |
1870 | */ | 1895 | */ |
1871 | mutex_lock(&vcpu->kvm->lock); | 1896 | mutex_lock(&vcpu->kvm->lock); |
1872 | mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); | 1897 | mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); |
1873 | if (mmio_dev) { | 1898 | if (mmio_dev) { |
1874 | kvm_iodevice_read(mmio_dev, gpa, bytes, val); | 1899 | kvm_iodevice_read(mmio_dev, gpa, bytes, val); |
1875 | mutex_unlock(&vcpu->kvm->lock); | 1900 | mutex_unlock(&vcpu->kvm->lock); |
@@ -1924,7 +1949,7 @@ mmio: | |||
1924 | * Is this MMIO handled locally? | 1949 | * Is this MMIO handled locally? |
1925 | */ | 1950 | */ |
1926 | mutex_lock(&vcpu->kvm->lock); | 1951 | mutex_lock(&vcpu->kvm->lock); |
1927 | mmio_dev = vcpu_find_mmio_dev(vcpu, gpa); | 1952 | mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1); |
1928 | if (mmio_dev) { | 1953 | if (mmio_dev) { |
1929 | kvm_iodevice_write(mmio_dev, gpa, bytes, val); | 1954 | kvm_iodevice_write(mmio_dev, gpa, bytes, val); |
1930 | mutex_unlock(&vcpu->kvm->lock); | 1955 | mutex_unlock(&vcpu->kvm->lock); |
@@ -2020,6 +2045,7 @@ int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address) | |||
2020 | 2045 | ||
2021 | int emulate_clts(struct kvm_vcpu *vcpu) | 2046 | int emulate_clts(struct kvm_vcpu *vcpu) |
2022 | { | 2047 | { |
2048 | KVMTRACE_0D(CLTS, vcpu, handler); | ||
2023 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); | 2049 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 & ~X86_CR0_TS); |
2024 | return X86EMUL_CONTINUE; | 2050 | return X86EMUL_CONTINUE; |
2025 | } | 2051 | } |
@@ -2053,21 +2079,19 @@ int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value) | |||
2053 | 2079 | ||
2054 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) | 2080 | void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context) |
2055 | { | 2081 | { |
2056 | static int reported; | ||
2057 | u8 opcodes[4]; | 2082 | u8 opcodes[4]; |
2058 | unsigned long rip = vcpu->arch.rip; | 2083 | unsigned long rip = vcpu->arch.rip; |
2059 | unsigned long rip_linear; | 2084 | unsigned long rip_linear; |
2060 | 2085 | ||
2061 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | 2086 | if (!printk_ratelimit()) |
2062 | |||
2063 | if (reported) | ||
2064 | return; | 2087 | return; |
2065 | 2088 | ||
2089 | rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS); | ||
2090 | |||
2066 | emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); | 2091 | emulator_read_std(rip_linear, (void *)opcodes, 4, vcpu); |
2067 | 2092 | ||
2068 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", | 2093 | printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n", |
2069 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); | 2094 | context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]); |
2070 | reported = 1; | ||
2071 | } | 2095 | } |
2072 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); | 2096 | EXPORT_SYMBOL_GPL(kvm_report_emulation_failure); |
2073 | 2097 | ||
@@ -2105,27 +2129,6 @@ int emulate_instruction(struct kvm_vcpu *vcpu, | |||
2105 | ? X86EMUL_MODE_PROT64 : cs_db | 2129 | ? X86EMUL_MODE_PROT64 : cs_db |
2106 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; | 2130 | ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; |
2107 | 2131 | ||
2108 | if (vcpu->arch.emulate_ctxt.mode == X86EMUL_MODE_PROT64) { | ||
2109 | vcpu->arch.emulate_ctxt.cs_base = 0; | ||
2110 | vcpu->arch.emulate_ctxt.ds_base = 0; | ||
2111 | vcpu->arch.emulate_ctxt.es_base = 0; | ||
2112 | vcpu->arch.emulate_ctxt.ss_base = 0; | ||
2113 | } else { | ||
2114 | vcpu->arch.emulate_ctxt.cs_base = | ||
2115 | get_segment_base(vcpu, VCPU_SREG_CS); | ||
2116 | vcpu->arch.emulate_ctxt.ds_base = | ||
2117 | get_segment_base(vcpu, VCPU_SREG_DS); | ||
2118 | vcpu->arch.emulate_ctxt.es_base = | ||
2119 | get_segment_base(vcpu, VCPU_SREG_ES); | ||
2120 | vcpu->arch.emulate_ctxt.ss_base = | ||
2121 | get_segment_base(vcpu, VCPU_SREG_SS); | ||
2122 | } | ||
2123 | |||
2124 | vcpu->arch.emulate_ctxt.gs_base = | ||
2125 | get_segment_base(vcpu, VCPU_SREG_GS); | ||
2126 | vcpu->arch.emulate_ctxt.fs_base = | ||
2127 | get_segment_base(vcpu, VCPU_SREG_FS); | ||
2128 | |||
2129 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); | 2132 | r = x86_decode_insn(&vcpu->arch.emulate_ctxt, &emulate_ops); |
2130 | 2133 | ||
2131 | /* Reject the instructions other than VMCALL/VMMCALL when | 2134 | /* Reject the instructions other than VMCALL/VMMCALL when |
@@ -2300,9 +2303,10 @@ static void pio_string_write(struct kvm_io_device *pio_dev, | |||
2300 | } | 2303 | } |
2301 | 2304 | ||
2302 | static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, | 2305 | static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, |
2303 | gpa_t addr) | 2306 | gpa_t addr, int len, |
2307 | int is_write) | ||
2304 | { | 2308 | { |
2305 | return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr); | 2309 | return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write); |
2306 | } | 2310 | } |
2307 | 2311 | ||
2308 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | 2312 | int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, |
@@ -2331,11 +2335,10 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2331 | 2335 | ||
2332 | kvm_x86_ops->cache_regs(vcpu); | 2336 | kvm_x86_ops->cache_regs(vcpu); |
2333 | memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); | 2337 | memcpy(vcpu->arch.pio_data, &vcpu->arch.regs[VCPU_REGS_RAX], 4); |
2334 | kvm_x86_ops->decache_regs(vcpu); | ||
2335 | 2338 | ||
2336 | kvm_x86_ops->skip_emulated_instruction(vcpu); | 2339 | kvm_x86_ops->skip_emulated_instruction(vcpu); |
2337 | 2340 | ||
2338 | pio_dev = vcpu_find_pio_dev(vcpu, port); | 2341 | pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); |
2339 | if (pio_dev) { | 2342 | if (pio_dev) { |
2340 | kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); | 2343 | kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); |
2341 | complete_pio(vcpu); | 2344 | complete_pio(vcpu); |
@@ -2417,7 +2420,9 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, | |||
2417 | } | 2420 | } |
2418 | } | 2421 | } |
2419 | 2422 | ||
2420 | pio_dev = vcpu_find_pio_dev(vcpu, port); | 2423 | pio_dev = vcpu_find_pio_dev(vcpu, port, |
2424 | vcpu->arch.pio.cur_count, | ||
2425 | !vcpu->arch.pio.in); | ||
2421 | if (!vcpu->arch.pio.in) { | 2426 | if (!vcpu->arch.pio.in) { |
2422 | /* string PIO write */ | 2427 | /* string PIO write */ |
2423 | ret = pio_copy_data(vcpu); | 2428 | ret = pio_copy_data(vcpu); |
@@ -2600,27 +2605,41 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw, | |||
2600 | 2605 | ||
2601 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) | 2606 | unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr) |
2602 | { | 2607 | { |
2608 | unsigned long value; | ||
2609 | |||
2603 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); | 2610 | kvm_x86_ops->decache_cr4_guest_bits(vcpu); |
2604 | switch (cr) { | 2611 | switch (cr) { |
2605 | case 0: | 2612 | case 0: |
2606 | return vcpu->arch.cr0; | 2613 | value = vcpu->arch.cr0; |
2614 | break; | ||
2607 | case 2: | 2615 | case 2: |
2608 | return vcpu->arch.cr2; | 2616 | value = vcpu->arch.cr2; |
2617 | break; | ||
2609 | case 3: | 2618 | case 3: |
2610 | return vcpu->arch.cr3; | 2619 | value = vcpu->arch.cr3; |
2620 | break; | ||
2611 | case 4: | 2621 | case 4: |
2612 | return vcpu->arch.cr4; | 2622 | value = vcpu->arch.cr4; |
2623 | break; | ||
2613 | case 8: | 2624 | case 8: |
2614 | return kvm_get_cr8(vcpu); | 2625 | value = kvm_get_cr8(vcpu); |
2626 | break; | ||
2615 | default: | 2627 | default: |
2616 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); | 2628 | vcpu_printf(vcpu, "%s: unexpected cr %u\n", __func__, cr); |
2617 | return 0; | 2629 | return 0; |
2618 | } | 2630 | } |
2631 | KVMTRACE_3D(CR_READ, vcpu, (u32)cr, (u32)value, | ||
2632 | (u32)((u64)value >> 32), handler); | ||
2633 | |||
2634 | return value; | ||
2619 | } | 2635 | } |
2620 | 2636 | ||
2621 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, | 2637 | void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val, |
2622 | unsigned long *rflags) | 2638 | unsigned long *rflags) |
2623 | { | 2639 | { |
2640 | KVMTRACE_3D(CR_WRITE, vcpu, (u32)cr, (u32)val, | ||
2641 | (u32)((u64)val >> 32), handler); | ||
2642 | |||
2624 | switch (cr) { | 2643 | switch (cr) { |
2625 | case 0: | 2644 | case 0: |
2626 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); | 2645 | kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val)); |
@@ -2771,8 +2790,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu) | |||
2771 | if (!apic || !apic->vapic_addr) | 2790 | if (!apic || !apic->vapic_addr) |
2772 | return; | 2791 | return; |
2773 | 2792 | ||
2793 | down_read(&vcpu->kvm->slots_lock); | ||
2774 | kvm_release_page_dirty(apic->vapic_page); | 2794 | kvm_release_page_dirty(apic->vapic_page); |
2775 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); | 2795 | mark_page_dirty(vcpu->kvm, apic->vapic_addr >> PAGE_SHIFT); |
2796 | up_read(&vcpu->kvm->slots_lock); | ||
2776 | } | 2797 | } |
2777 | 2798 | ||
2778 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | 2799 | static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) |
@@ -2928,9 +2949,7 @@ out: | |||
2928 | 2949 | ||
2929 | post_kvm_run_save(vcpu, kvm_run); | 2950 | post_kvm_run_save(vcpu, kvm_run); |
2930 | 2951 | ||
2931 | down_read(&vcpu->kvm->slots_lock); | ||
2932 | vapic_exit(vcpu); | 2952 | vapic_exit(vcpu); |
2933 | up_read(&vcpu->kvm->slots_lock); | ||
2934 | 2953 | ||
2935 | return r; | 2954 | return r; |
2936 | } | 2955 | } |
@@ -2942,15 +2961,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) | |||
2942 | 2961 | ||
2943 | vcpu_load(vcpu); | 2962 | vcpu_load(vcpu); |
2944 | 2963 | ||
2964 | if (vcpu->sigset_active) | ||
2965 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
2966 | |||
2945 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { | 2967 | if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) { |
2946 | kvm_vcpu_block(vcpu); | 2968 | kvm_vcpu_block(vcpu); |
2947 | vcpu_put(vcpu); | 2969 | r = -EAGAIN; |
2948 | return -EAGAIN; | 2970 | goto out; |
2949 | } | 2971 | } |
2950 | 2972 | ||
2951 | if (vcpu->sigset_active) | ||
2952 | sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); | ||
2953 | |||
2954 | /* re-sync apic's tpr */ | 2973 | /* re-sync apic's tpr */ |
2955 | if (!irqchip_in_kernel(vcpu->kvm)) | 2974 | if (!irqchip_in_kernel(vcpu->kvm)) |
2956 | kvm_set_cr8(vcpu, kvm_run->cr8); | 2975 | kvm_set_cr8(vcpu, kvm_run->cr8); |
@@ -3070,8 +3089,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) | |||
3070 | return 0; | 3089 | return 0; |
3071 | } | 3090 | } |
3072 | 3091 | ||
3073 | static void get_segment(struct kvm_vcpu *vcpu, | 3092 | void kvm_get_segment(struct kvm_vcpu *vcpu, |
3074 | struct kvm_segment *var, int seg) | 3093 | struct kvm_segment *var, int seg) |
3075 | { | 3094 | { |
3076 | kvm_x86_ops->get_segment(vcpu, var, seg); | 3095 | kvm_x86_ops->get_segment(vcpu, var, seg); |
3077 | } | 3096 | } |
@@ -3080,7 +3099,7 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) | |||
3080 | { | 3099 | { |
3081 | struct kvm_segment cs; | 3100 | struct kvm_segment cs; |
3082 | 3101 | ||
3083 | get_segment(vcpu, &cs, VCPU_SREG_CS); | 3102 | kvm_get_segment(vcpu, &cs, VCPU_SREG_CS); |
3084 | *db = cs.db; | 3103 | *db = cs.db; |
3085 | *l = cs.l; | 3104 | *l = cs.l; |
3086 | } | 3105 | } |
@@ -3094,15 +3113,15 @@ int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu, | |||
3094 | 3113 | ||
3095 | vcpu_load(vcpu); | 3114 | vcpu_load(vcpu); |
3096 | 3115 | ||
3097 | get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 3116 | kvm_get_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
3098 | get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | 3117 | kvm_get_segment(vcpu, &sregs->ds, VCPU_SREG_DS); |
3099 | get_segment(vcpu, &sregs->es, VCPU_SREG_ES); | 3118 | kvm_get_segment(vcpu, &sregs->es, VCPU_SREG_ES); |
3100 | get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); | 3119 | kvm_get_segment(vcpu, &sregs->fs, VCPU_SREG_FS); |
3101 | get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); | 3120 | kvm_get_segment(vcpu, &sregs->gs, VCPU_SREG_GS); |
3102 | get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); | 3121 | kvm_get_segment(vcpu, &sregs->ss, VCPU_SREG_SS); |
3103 | 3122 | ||
3104 | get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | 3123 | kvm_get_segment(vcpu, &sregs->tr, VCPU_SREG_TR); |
3105 | get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 3124 | kvm_get_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
3106 | 3125 | ||
3107 | kvm_x86_ops->get_idt(vcpu, &dt); | 3126 | kvm_x86_ops->get_idt(vcpu, &dt); |
3108 | sregs->idt.limit = dt.limit; | 3127 | sregs->idt.limit = dt.limit; |
@@ -3154,7 +3173,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, | |||
3154 | return 0; | 3173 | return 0; |
3155 | } | 3174 | } |
3156 | 3175 | ||
3157 | static void set_segment(struct kvm_vcpu *vcpu, | 3176 | static void kvm_set_segment(struct kvm_vcpu *vcpu, |
3158 | struct kvm_segment *var, int seg) | 3177 | struct kvm_segment *var, int seg) |
3159 | { | 3178 | { |
3160 | kvm_x86_ops->set_segment(vcpu, var, seg); | 3179 | kvm_x86_ops->set_segment(vcpu, var, seg); |
@@ -3168,6 +3187,10 @@ static void seg_desct_to_kvm_desct(struct desc_struct *seg_desc, u16 selector, | |||
3168 | kvm_desct->base |= seg_desc->base2 << 24; | 3187 | kvm_desct->base |= seg_desc->base2 << 24; |
3169 | kvm_desct->limit = seg_desc->limit0; | 3188 | kvm_desct->limit = seg_desc->limit0; |
3170 | kvm_desct->limit |= seg_desc->limit << 16; | 3189 | kvm_desct->limit |= seg_desc->limit << 16; |
3190 | if (seg_desc->g) { | ||
3191 | kvm_desct->limit <<= 12; | ||
3192 | kvm_desct->limit |= 0xfff; | ||
3193 | } | ||
3171 | kvm_desct->selector = selector; | 3194 | kvm_desct->selector = selector; |
3172 | kvm_desct->type = seg_desc->type; | 3195 | kvm_desct->type = seg_desc->type; |
3173 | kvm_desct->present = seg_desc->p; | 3196 | kvm_desct->present = seg_desc->p; |
@@ -3191,7 +3214,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, | |||
3191 | if (selector & 1 << 2) { | 3214 | if (selector & 1 << 2) { |
3192 | struct kvm_segment kvm_seg; | 3215 | struct kvm_segment kvm_seg; |
3193 | 3216 | ||
3194 | get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); | 3217 | kvm_get_segment(vcpu, &kvm_seg, VCPU_SREG_LDTR); |
3195 | 3218 | ||
3196 | if (kvm_seg.unusable) | 3219 | if (kvm_seg.unusable) |
3197 | dtable->limit = 0; | 3220 | dtable->limit = 0; |
@@ -3207,6 +3230,7 @@ static void get_segment_descritptor_dtable(struct kvm_vcpu *vcpu, | |||
3207 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3230 | static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
3208 | struct desc_struct *seg_desc) | 3231 | struct desc_struct *seg_desc) |
3209 | { | 3232 | { |
3233 | gpa_t gpa; | ||
3210 | struct descriptor_table dtable; | 3234 | struct descriptor_table dtable; |
3211 | u16 index = selector >> 3; | 3235 | u16 index = selector >> 3; |
3212 | 3236 | ||
@@ -3216,13 +3240,16 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3216 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); | 3240 | kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc); |
3217 | return 1; | 3241 | return 1; |
3218 | } | 3242 | } |
3219 | return kvm_read_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | 3243 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); |
3244 | gpa += index * 8; | ||
3245 | return kvm_read_guest(vcpu->kvm, gpa, seg_desc, 8); | ||
3220 | } | 3246 | } |
3221 | 3247 | ||
3222 | /* allowed just for 8 bytes segments */ | 3248 | /* allowed just for 8 bytes segments */ |
3223 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3249 | static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
3224 | struct desc_struct *seg_desc) | 3250 | struct desc_struct *seg_desc) |
3225 | { | 3251 | { |
3252 | gpa_t gpa; | ||
3226 | struct descriptor_table dtable; | 3253 | struct descriptor_table dtable; |
3227 | u16 index = selector >> 3; | 3254 | u16 index = selector >> 3; |
3228 | 3255 | ||
@@ -3230,7 +3257,9 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3230 | 3257 | ||
3231 | if (dtable.limit < index * 8 + 7) | 3258 | if (dtable.limit < index * 8 + 7) |
3232 | return 1; | 3259 | return 1; |
3233 | return kvm_write_guest(vcpu->kvm, dtable.base + index * 8, seg_desc, 8); | 3260 | gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, dtable.base); |
3261 | gpa += index * 8; | ||
3262 | return kvm_write_guest(vcpu->kvm, gpa, seg_desc, 8); | ||
3234 | } | 3263 | } |
3235 | 3264 | ||
3236 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | 3265 | static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, |
@@ -3242,62 +3271,14 @@ static u32 get_tss_base_addr(struct kvm_vcpu *vcpu, | |||
3242 | base_addr |= (seg_desc->base1 << 16); | 3271 | base_addr |= (seg_desc->base1 << 16); |
3243 | base_addr |= (seg_desc->base2 << 24); | 3272 | base_addr |= (seg_desc->base2 << 24); |
3244 | 3273 | ||
3245 | return base_addr; | 3274 | return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr); |
3246 | } | ||
3247 | |||
3248 | static int load_tss_segment32(struct kvm_vcpu *vcpu, | ||
3249 | struct desc_struct *seg_desc, | ||
3250 | struct tss_segment_32 *tss) | ||
3251 | { | ||
3252 | u32 base_addr; | ||
3253 | |||
3254 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3255 | |||
3256 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
3257 | sizeof(struct tss_segment_32)); | ||
3258 | } | ||
3259 | |||
3260 | static int save_tss_segment32(struct kvm_vcpu *vcpu, | ||
3261 | struct desc_struct *seg_desc, | ||
3262 | struct tss_segment_32 *tss) | ||
3263 | { | ||
3264 | u32 base_addr; | ||
3265 | |||
3266 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3267 | |||
3268 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
3269 | sizeof(struct tss_segment_32)); | ||
3270 | } | ||
3271 | |||
3272 | static int load_tss_segment16(struct kvm_vcpu *vcpu, | ||
3273 | struct desc_struct *seg_desc, | ||
3274 | struct tss_segment_16 *tss) | ||
3275 | { | ||
3276 | u32 base_addr; | ||
3277 | |||
3278 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3279 | |||
3280 | return kvm_read_guest(vcpu->kvm, base_addr, tss, | ||
3281 | sizeof(struct tss_segment_16)); | ||
3282 | } | ||
3283 | |||
3284 | static int save_tss_segment16(struct kvm_vcpu *vcpu, | ||
3285 | struct desc_struct *seg_desc, | ||
3286 | struct tss_segment_16 *tss) | ||
3287 | { | ||
3288 | u32 base_addr; | ||
3289 | |||
3290 | base_addr = get_tss_base_addr(vcpu, seg_desc); | ||
3291 | |||
3292 | return kvm_write_guest(vcpu->kvm, base_addr, tss, | ||
3293 | sizeof(struct tss_segment_16)); | ||
3294 | } | 3275 | } |
3295 | 3276 | ||
3296 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) | 3277 | static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg) |
3297 | { | 3278 | { |
3298 | struct kvm_segment kvm_seg; | 3279 | struct kvm_segment kvm_seg; |
3299 | 3280 | ||
3300 | get_segment(vcpu, &kvm_seg, seg); | 3281 | kvm_get_segment(vcpu, &kvm_seg, seg); |
3301 | return kvm_seg.selector; | 3282 | return kvm_seg.selector; |
3302 | } | 3283 | } |
3303 | 3284 | ||
@@ -3313,8 +3294,8 @@ static int load_segment_descriptor_to_kvm_desct(struct kvm_vcpu *vcpu, | |||
3313 | return 0; | 3294 | return 0; |
3314 | } | 3295 | } |
3315 | 3296 | ||
3316 | static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | 3297 | int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, |
3317 | int type_bits, int seg) | 3298 | int type_bits, int seg) |
3318 | { | 3299 | { |
3319 | struct kvm_segment kvm_seg; | 3300 | struct kvm_segment kvm_seg; |
3320 | 3301 | ||
@@ -3327,7 +3308,7 @@ static int load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, | |||
3327 | if (!kvm_seg.s) | 3308 | if (!kvm_seg.s) |
3328 | kvm_seg.unusable = 1; | 3309 | kvm_seg.unusable = 1; |
3329 | 3310 | ||
3330 | set_segment(vcpu, &kvm_seg, seg); | 3311 | kvm_set_segment(vcpu, &kvm_seg, seg); |
3331 | return 0; | 3312 | return 0; |
3332 | } | 3313 | } |
3333 | 3314 | ||
@@ -3373,25 +3354,25 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu, | |||
3373 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; | 3354 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->esi; |
3374 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; | 3355 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->edi; |
3375 | 3356 | ||
3376 | if (load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) | 3357 | if (kvm_load_segment_descriptor(vcpu, tss->ldt_selector, 0, VCPU_SREG_LDTR)) |
3377 | return 1; | 3358 | return 1; |
3378 | 3359 | ||
3379 | if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 3360 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) |
3380 | return 1; | 3361 | return 1; |
3381 | 3362 | ||
3382 | if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 3363 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) |
3383 | return 1; | 3364 | return 1; |
3384 | 3365 | ||
3385 | if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 3366 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) |
3386 | return 1; | 3367 | return 1; |
3387 | 3368 | ||
3388 | if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 3369 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) |
3389 | return 1; | 3370 | return 1; |
3390 | 3371 | ||
3391 | if (load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) | 3372 | if (kvm_load_segment_descriptor(vcpu, tss->fs, 1, VCPU_SREG_FS)) |
3392 | return 1; | 3373 | return 1; |
3393 | 3374 | ||
3394 | if (load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) | 3375 | if (kvm_load_segment_descriptor(vcpu, tss->gs, 1, VCPU_SREG_GS)) |
3395 | return 1; | 3376 | return 1; |
3396 | return 0; | 3377 | return 0; |
3397 | } | 3378 | } |
@@ -3432,38 +3413,44 @@ static int load_state_from_tss16(struct kvm_vcpu *vcpu, | |||
3432 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; | 3413 | vcpu->arch.regs[VCPU_REGS_RSI] = tss->si; |
3433 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; | 3414 | vcpu->arch.regs[VCPU_REGS_RDI] = tss->di; |
3434 | 3415 | ||
3435 | if (load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) | 3416 | if (kvm_load_segment_descriptor(vcpu, tss->ldt, 0, VCPU_SREG_LDTR)) |
3436 | return 1; | 3417 | return 1; |
3437 | 3418 | ||
3438 | if (load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) | 3419 | if (kvm_load_segment_descriptor(vcpu, tss->es, 1, VCPU_SREG_ES)) |
3439 | return 1; | 3420 | return 1; |
3440 | 3421 | ||
3441 | if (load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) | 3422 | if (kvm_load_segment_descriptor(vcpu, tss->cs, 9, VCPU_SREG_CS)) |
3442 | return 1; | 3423 | return 1; |
3443 | 3424 | ||
3444 | if (load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) | 3425 | if (kvm_load_segment_descriptor(vcpu, tss->ss, 1, VCPU_SREG_SS)) |
3445 | return 1; | 3426 | return 1; |
3446 | 3427 | ||
3447 | if (load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) | 3428 | if (kvm_load_segment_descriptor(vcpu, tss->ds, 1, VCPU_SREG_DS)) |
3448 | return 1; | 3429 | return 1; |
3449 | return 0; | 3430 | return 0; |
3450 | } | 3431 | } |
3451 | 3432 | ||
3452 | int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, | 3433 | static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector, |
3453 | struct desc_struct *cseg_desc, | 3434 | u32 old_tss_base, |
3454 | struct desc_struct *nseg_desc) | 3435 | struct desc_struct *nseg_desc) |
3455 | { | 3436 | { |
3456 | struct tss_segment_16 tss_segment_16; | 3437 | struct tss_segment_16 tss_segment_16; |
3457 | int ret = 0; | 3438 | int ret = 0; |
3458 | 3439 | ||
3459 | if (load_tss_segment16(vcpu, cseg_desc, &tss_segment_16)) | 3440 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_16, |
3441 | sizeof tss_segment_16)) | ||
3460 | goto out; | 3442 | goto out; |
3461 | 3443 | ||
3462 | save_state_to_tss16(vcpu, &tss_segment_16); | 3444 | save_state_to_tss16(vcpu, &tss_segment_16); |
3463 | save_tss_segment16(vcpu, cseg_desc, &tss_segment_16); | ||
3464 | 3445 | ||
3465 | if (load_tss_segment16(vcpu, nseg_desc, &tss_segment_16)) | 3446 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_16, |
3447 | sizeof tss_segment_16)) | ||
3448 | goto out; | ||
3449 | |||
3450 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | ||
3451 | &tss_segment_16, sizeof tss_segment_16)) | ||
3466 | goto out; | 3452 | goto out; |
3453 | |||
3467 | if (load_state_from_tss16(vcpu, &tss_segment_16)) | 3454 | if (load_state_from_tss16(vcpu, &tss_segment_16)) |
3468 | goto out; | 3455 | goto out; |
3469 | 3456 | ||
@@ -3472,21 +3459,27 @@ out: | |||
3472 | return ret; | 3459 | return ret; |
3473 | } | 3460 | } |
3474 | 3461 | ||
3475 | int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, | 3462 | static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector, |
3476 | struct desc_struct *cseg_desc, | 3463 | u32 old_tss_base, |
3477 | struct desc_struct *nseg_desc) | 3464 | struct desc_struct *nseg_desc) |
3478 | { | 3465 | { |
3479 | struct tss_segment_32 tss_segment_32; | 3466 | struct tss_segment_32 tss_segment_32; |
3480 | int ret = 0; | 3467 | int ret = 0; |
3481 | 3468 | ||
3482 | if (load_tss_segment32(vcpu, cseg_desc, &tss_segment_32)) | 3469 | if (kvm_read_guest(vcpu->kvm, old_tss_base, &tss_segment_32, |
3470 | sizeof tss_segment_32)) | ||
3483 | goto out; | 3471 | goto out; |
3484 | 3472 | ||
3485 | save_state_to_tss32(vcpu, &tss_segment_32); | 3473 | save_state_to_tss32(vcpu, &tss_segment_32); |
3486 | save_tss_segment32(vcpu, cseg_desc, &tss_segment_32); | ||
3487 | 3474 | ||
3488 | if (load_tss_segment32(vcpu, nseg_desc, &tss_segment_32)) | 3475 | if (kvm_write_guest(vcpu->kvm, old_tss_base, &tss_segment_32, |
3476 | sizeof tss_segment_32)) | ||
3477 | goto out; | ||
3478 | |||
3479 | if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc), | ||
3480 | &tss_segment_32, sizeof tss_segment_32)) | ||
3489 | goto out; | 3481 | goto out; |
3482 | |||
3490 | if (load_state_from_tss32(vcpu, &tss_segment_32)) | 3483 | if (load_state_from_tss32(vcpu, &tss_segment_32)) |
3491 | goto out; | 3484 | goto out; |
3492 | 3485 | ||
@@ -3501,16 +3494,20 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3501 | struct desc_struct cseg_desc; | 3494 | struct desc_struct cseg_desc; |
3502 | struct desc_struct nseg_desc; | 3495 | struct desc_struct nseg_desc; |
3503 | int ret = 0; | 3496 | int ret = 0; |
3497 | u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR); | ||
3498 | u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR); | ||
3504 | 3499 | ||
3505 | get_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 3500 | old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base); |
3506 | 3501 | ||
3502 | /* FIXME: Handle errors. Failure to read either TSS or their | ||
3503 | * descriptors should generate a pagefault. | ||
3504 | */ | ||
3507 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) | 3505 | if (load_guest_segment_descriptor(vcpu, tss_selector, &nseg_desc)) |
3508 | goto out; | 3506 | goto out; |
3509 | 3507 | ||
3510 | if (load_guest_segment_descriptor(vcpu, tr_seg.selector, &cseg_desc)) | 3508 | if (load_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc)) |
3511 | goto out; | 3509 | goto out; |
3512 | 3510 | ||
3513 | |||
3514 | if (reason != TASK_SWITCH_IRET) { | 3511 | if (reason != TASK_SWITCH_IRET) { |
3515 | int cpl; | 3512 | int cpl; |
3516 | 3513 | ||
@@ -3528,8 +3525,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3528 | 3525 | ||
3529 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { | 3526 | if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) { |
3530 | cseg_desc.type &= ~(1 << 1); //clear the B flag | 3527 | cseg_desc.type &= ~(1 << 1); //clear the B flag |
3531 | save_guest_segment_descriptor(vcpu, tr_seg.selector, | 3528 | save_guest_segment_descriptor(vcpu, old_tss_sel, &cseg_desc); |
3532 | &cseg_desc); | ||
3533 | } | 3529 | } |
3534 | 3530 | ||
3535 | if (reason == TASK_SWITCH_IRET) { | 3531 | if (reason == TASK_SWITCH_IRET) { |
@@ -3541,10 +3537,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3541 | kvm_x86_ops->cache_regs(vcpu); | 3537 | kvm_x86_ops->cache_regs(vcpu); |
3542 | 3538 | ||
3543 | if (nseg_desc.type & 8) | 3539 | if (nseg_desc.type & 8) |
3544 | ret = kvm_task_switch_32(vcpu, tss_selector, &cseg_desc, | 3540 | ret = kvm_task_switch_32(vcpu, tss_selector, old_tss_base, |
3545 | &nseg_desc); | 3541 | &nseg_desc); |
3546 | else | 3542 | else |
3547 | ret = kvm_task_switch_16(vcpu, tss_selector, &cseg_desc, | 3543 | ret = kvm_task_switch_16(vcpu, tss_selector, old_tss_base, |
3548 | &nseg_desc); | 3544 | &nseg_desc); |
3549 | 3545 | ||
3550 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { | 3546 | if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) { |
@@ -3561,7 +3557,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason) | |||
3561 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); | 3557 | kvm_x86_ops->set_cr0(vcpu, vcpu->arch.cr0 | X86_CR0_TS); |
3562 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); | 3558 | seg_desct_to_kvm_desct(&nseg_desc, tss_selector, &tr_seg); |
3563 | tr_seg.type = 11; | 3559 | tr_seg.type = 11; |
3564 | set_segment(vcpu, &tr_seg, VCPU_SREG_TR); | 3560 | kvm_set_segment(vcpu, &tr_seg, VCPU_SREG_TR); |
3565 | out: | 3561 | out: |
3566 | kvm_x86_ops->decache_regs(vcpu); | 3562 | kvm_x86_ops->decache_regs(vcpu); |
3567 | return ret; | 3563 | return ret; |
@@ -3628,15 +3624,15 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
3628 | } | 3624 | } |
3629 | } | 3625 | } |
3630 | 3626 | ||
3631 | set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); | 3627 | kvm_set_segment(vcpu, &sregs->cs, VCPU_SREG_CS); |
3632 | set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); | 3628 | kvm_set_segment(vcpu, &sregs->ds, VCPU_SREG_DS); |
3633 | set_segment(vcpu, &sregs->es, VCPU_SREG_ES); | 3629 | kvm_set_segment(vcpu, &sregs->es, VCPU_SREG_ES); |
3634 | set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); | 3630 | kvm_set_segment(vcpu, &sregs->fs, VCPU_SREG_FS); |
3635 | set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); | 3631 | kvm_set_segment(vcpu, &sregs->gs, VCPU_SREG_GS); |
3636 | set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); | 3632 | kvm_set_segment(vcpu, &sregs->ss, VCPU_SREG_SS); |
3637 | 3633 | ||
3638 | set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); | 3634 | kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR); |
3639 | set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); | 3635 | kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR); |
3640 | 3636 | ||
3641 | vcpu_put(vcpu); | 3637 | vcpu_put(vcpu); |
3642 | 3638 | ||
@@ -3751,14 +3747,14 @@ void fx_init(struct kvm_vcpu *vcpu) | |||
3751 | * allocate ram with GFP_KERNEL. | 3747 | * allocate ram with GFP_KERNEL. |
3752 | */ | 3748 | */ |
3753 | if (!used_math()) | 3749 | if (!used_math()) |
3754 | fx_save(&vcpu->arch.host_fx_image); | 3750 | kvm_fx_save(&vcpu->arch.host_fx_image); |
3755 | 3751 | ||
3756 | /* Initialize guest FPU by resetting ours and saving into guest's */ | 3752 | /* Initialize guest FPU by resetting ours and saving into guest's */ |
3757 | preempt_disable(); | 3753 | preempt_disable(); |
3758 | fx_save(&vcpu->arch.host_fx_image); | 3754 | kvm_fx_save(&vcpu->arch.host_fx_image); |
3759 | fx_finit(); | 3755 | kvm_fx_finit(); |
3760 | fx_save(&vcpu->arch.guest_fx_image); | 3756 | kvm_fx_save(&vcpu->arch.guest_fx_image); |
3761 | fx_restore(&vcpu->arch.host_fx_image); | 3757 | kvm_fx_restore(&vcpu->arch.host_fx_image); |
3762 | preempt_enable(); | 3758 | preempt_enable(); |
3763 | 3759 | ||
3764 | vcpu->arch.cr0 |= X86_CR0_ET; | 3760 | vcpu->arch.cr0 |= X86_CR0_ET; |
@@ -3775,8 +3771,8 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu) | |||
3775 | return; | 3771 | return; |
3776 | 3772 | ||
3777 | vcpu->guest_fpu_loaded = 1; | 3773 | vcpu->guest_fpu_loaded = 1; |
3778 | fx_save(&vcpu->arch.host_fx_image); | 3774 | kvm_fx_save(&vcpu->arch.host_fx_image); |
3779 | fx_restore(&vcpu->arch.guest_fx_image); | 3775 | kvm_fx_restore(&vcpu->arch.guest_fx_image); |
3780 | } | 3776 | } |
3781 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); | 3777 | EXPORT_SYMBOL_GPL(kvm_load_guest_fpu); |
3782 | 3778 | ||
@@ -3786,8 +3782,8 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) | |||
3786 | return; | 3782 | return; |
3787 | 3783 | ||
3788 | vcpu->guest_fpu_loaded = 0; | 3784 | vcpu->guest_fpu_loaded = 0; |
3789 | fx_save(&vcpu->arch.guest_fx_image); | 3785 | kvm_fx_save(&vcpu->arch.guest_fx_image); |
3790 | fx_restore(&vcpu->arch.host_fx_image); | 3786 | kvm_fx_restore(&vcpu->arch.host_fx_image); |
3791 | ++vcpu->stat.fpu_reload; | 3787 | ++vcpu->stat.fpu_reload; |
3792 | } | 3788 | } |
3793 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); | 3789 | EXPORT_SYMBOL_GPL(kvm_put_guest_fpu); |
@@ -3979,16 +3975,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
3979 | */ | 3975 | */ |
3980 | if (!user_alloc) { | 3976 | if (!user_alloc) { |
3981 | if (npages && !old.rmap) { | 3977 | if (npages && !old.rmap) { |
3978 | unsigned long userspace_addr; | ||
3979 | |||
3982 | down_write(¤t->mm->mmap_sem); | 3980 | down_write(¤t->mm->mmap_sem); |
3983 | memslot->userspace_addr = do_mmap(NULL, 0, | 3981 | userspace_addr = do_mmap(NULL, 0, |
3984 | npages * PAGE_SIZE, | 3982 | npages * PAGE_SIZE, |
3985 | PROT_READ | PROT_WRITE, | 3983 | PROT_READ | PROT_WRITE, |
3986 | MAP_SHARED | MAP_ANONYMOUS, | 3984 | MAP_SHARED | MAP_ANONYMOUS, |
3987 | 0); | 3985 | 0); |
3988 | up_write(¤t->mm->mmap_sem); | 3986 | up_write(¤t->mm->mmap_sem); |
3989 | 3987 | ||
3990 | if (IS_ERR((void *)memslot->userspace_addr)) | 3988 | if (IS_ERR((void *)userspace_addr)) |
3991 | return PTR_ERR((void *)memslot->userspace_addr); | 3989 | return PTR_ERR((void *)userspace_addr); |
3990 | |||
3991 | /* set userspace_addr atomically for kvm_hva_to_rmapp */ | ||
3992 | spin_lock(&kvm->mmu_lock); | ||
3993 | memslot->userspace_addr = userspace_addr; | ||
3994 | spin_unlock(&kvm->mmu_lock); | ||
3992 | } else { | 3995 | } else { |
3993 | if (!old.user_alloc && old.rmap) { | 3996 | if (!old.user_alloc && old.rmap) { |
3994 | int ret; | 3997 | int ret; |
@@ -4016,6 +4019,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm, | |||
4016 | return 0; | 4019 | return 0; |
4017 | } | 4020 | } |
4018 | 4021 | ||
4022 | void kvm_arch_flush_shadow(struct kvm *kvm) | ||
4023 | { | ||
4024 | kvm_mmu_zap_all(kvm); | ||
4025 | } | ||
4026 | |||
4019 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 4027 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
4020 | { | 4028 | { |
4021 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE | 4029 | return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE |
diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c index 932f216d890c..f2f90468f8b1 100644 --- a/arch/x86/kvm/x86_emulate.c +++ b/arch/x86/kvm/x86_emulate.c | |||
@@ -121,7 +121,7 @@ static u16 opcode_table[256] = { | |||
121 | 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , | 121 | 0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ , |
122 | 0, 0, 0, 0, | 122 | 0, 0, 0, 0, |
123 | /* 0x68 - 0x6F */ | 123 | /* 0x68 - 0x6F */ |
124 | 0, 0, ImplicitOps | Mov | Stack, 0, | 124 | SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0, |
125 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ | 125 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* insb, insw/insd */ |
126 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ | 126 | SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps, /* outsb, outsw/outsd */ |
127 | /* 0x70 - 0x77 */ | 127 | /* 0x70 - 0x77 */ |
@@ -138,9 +138,11 @@ static u16 opcode_table[256] = { | |||
138 | /* 0x88 - 0x8F */ | 138 | /* 0x88 - 0x8F */ |
139 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, | 139 | ByteOp | DstMem | SrcReg | ModRM | Mov, DstMem | SrcReg | ModRM | Mov, |
140 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, | 140 | ByteOp | DstReg | SrcMem | ModRM | Mov, DstReg | SrcMem | ModRM | Mov, |
141 | 0, ModRM | DstReg, 0, Group | Group1A, | 141 | DstMem | SrcReg | ModRM | Mov, ModRM | DstReg, |
142 | /* 0x90 - 0x9F */ | 142 | DstReg | SrcMem | ModRM | Mov, Group | Group1A, |
143 | 0, 0, 0, 0, 0, 0, 0, 0, | 143 | /* 0x90 - 0x97 */ |
144 | DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, | ||
145 | /* 0x98 - 0x9F */ | ||
144 | 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, | 146 | 0, 0, 0, 0, ImplicitOps | Stack, ImplicitOps | Stack, 0, 0, |
145 | /* 0xA0 - 0xA7 */ | 147 | /* 0xA0 - 0xA7 */ |
146 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, | 148 | ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs, |
@@ -152,7 +154,8 @@ static u16 opcode_table[256] = { | |||
152 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, | 154 | ByteOp | ImplicitOps | Mov | String, ImplicitOps | Mov | String, |
153 | ByteOp | ImplicitOps | String, ImplicitOps | String, | 155 | ByteOp | ImplicitOps | String, ImplicitOps | String, |
154 | /* 0xB0 - 0xBF */ | 156 | /* 0xB0 - 0xBF */ |
155 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 157 | 0, 0, 0, 0, 0, 0, 0, 0, |
158 | DstReg | SrcImm | Mov, 0, 0, 0, 0, 0, 0, 0, | ||
156 | /* 0xC0 - 0xC7 */ | 159 | /* 0xC0 - 0xC7 */ |
157 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, | 160 | ByteOp | DstMem | SrcImm | ModRM, DstMem | SrcImmByte | ModRM, |
158 | 0, ImplicitOps | Stack, 0, 0, | 161 | 0, ImplicitOps | Stack, 0, 0, |
@@ -168,7 +171,8 @@ static u16 opcode_table[256] = { | |||
168 | /* 0xE0 - 0xE7 */ | 171 | /* 0xE0 - 0xE7 */ |
169 | 0, 0, 0, 0, 0, 0, 0, 0, | 172 | 0, 0, 0, 0, 0, 0, 0, 0, |
170 | /* 0xE8 - 0xEF */ | 173 | /* 0xE8 - 0xEF */ |
171 | ImplicitOps | Stack, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, | 174 | ImplicitOps | Stack, SrcImm | ImplicitOps, |
175 | ImplicitOps, SrcImmByte | ImplicitOps, | ||
172 | 0, 0, 0, 0, | 176 | 0, 0, 0, 0, |
173 | /* 0xF0 - 0xF7 */ | 177 | /* 0xF0 - 0xF7 */ |
174 | 0, 0, 0, 0, | 178 | 0, 0, 0, 0, |
@@ -215,7 +219,7 @@ static u16 twobyte_table[256] = { | |||
215 | /* 0xA0 - 0xA7 */ | 219 | /* 0xA0 - 0xA7 */ |
216 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, | 220 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, |
217 | /* 0xA8 - 0xAF */ | 221 | /* 0xA8 - 0xAF */ |
218 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, 0, 0, | 222 | 0, 0, 0, DstMem | SrcReg | ModRM | BitOp, 0, 0, ModRM, 0, |
219 | /* 0xB0 - 0xB7 */ | 223 | /* 0xB0 - 0xB7 */ |
220 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, | 224 | ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM, 0, |
221 | DstMem | SrcReg | ModRM | BitOp, | 225 | DstMem | SrcReg | ModRM | BitOp, |
@@ -518,6 +522,39 @@ static inline void jmp_rel(struct decode_cache *c, int rel) | |||
518 | register_address_increment(c, &c->eip, rel); | 522 | register_address_increment(c, &c->eip, rel); |
519 | } | 523 | } |
520 | 524 | ||
525 | static void set_seg_override(struct decode_cache *c, int seg) | ||
526 | { | ||
527 | c->has_seg_override = true; | ||
528 | c->seg_override = seg; | ||
529 | } | ||
530 | |||
531 | static unsigned long seg_base(struct x86_emulate_ctxt *ctxt, int seg) | ||
532 | { | ||
533 | if (ctxt->mode == X86EMUL_MODE_PROT64 && seg < VCPU_SREG_FS) | ||
534 | return 0; | ||
535 | |||
536 | return kvm_x86_ops->get_segment_base(ctxt->vcpu, seg); | ||
537 | } | ||
538 | |||
539 | static unsigned long seg_override_base(struct x86_emulate_ctxt *ctxt, | ||
540 | struct decode_cache *c) | ||
541 | { | ||
542 | if (!c->has_seg_override) | ||
543 | return 0; | ||
544 | |||
545 | return seg_base(ctxt, c->seg_override); | ||
546 | } | ||
547 | |||
548 | static unsigned long es_base(struct x86_emulate_ctxt *ctxt) | ||
549 | { | ||
550 | return seg_base(ctxt, VCPU_SREG_ES); | ||
551 | } | ||
552 | |||
553 | static unsigned long ss_base(struct x86_emulate_ctxt *ctxt) | ||
554 | { | ||
555 | return seg_base(ctxt, VCPU_SREG_SS); | ||
556 | } | ||
557 | |||
521 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, | 558 | static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt, |
522 | struct x86_emulate_ops *ops, | 559 | struct x86_emulate_ops *ops, |
523 | unsigned long linear, u8 *dest) | 560 | unsigned long linear, u8 *dest) |
@@ -660,7 +697,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
660 | { | 697 | { |
661 | struct decode_cache *c = &ctxt->decode; | 698 | struct decode_cache *c = &ctxt->decode; |
662 | u8 sib; | 699 | u8 sib; |
663 | int index_reg = 0, base_reg = 0, scale, rip_relative = 0; | 700 | int index_reg = 0, base_reg = 0, scale; |
664 | int rc = 0; | 701 | int rc = 0; |
665 | 702 | ||
666 | if (c->rex_prefix) { | 703 | if (c->rex_prefix) { |
@@ -731,47 +768,28 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
731 | } | 768 | } |
732 | if (c->modrm_rm == 2 || c->modrm_rm == 3 || | 769 | if (c->modrm_rm == 2 || c->modrm_rm == 3 || |
733 | (c->modrm_rm == 6 && c->modrm_mod != 0)) | 770 | (c->modrm_rm == 6 && c->modrm_mod != 0)) |
734 | if (!c->override_base) | 771 | if (!c->has_seg_override) |
735 | c->override_base = &ctxt->ss_base; | 772 | set_seg_override(c, VCPU_SREG_SS); |
736 | c->modrm_ea = (u16)c->modrm_ea; | 773 | c->modrm_ea = (u16)c->modrm_ea; |
737 | } else { | 774 | } else { |
738 | /* 32/64-bit ModR/M decode. */ | 775 | /* 32/64-bit ModR/M decode. */ |
739 | switch (c->modrm_rm) { | 776 | if ((c->modrm_rm & 7) == 4) { |
740 | case 4: | ||
741 | case 12: | ||
742 | sib = insn_fetch(u8, 1, c->eip); | 777 | sib = insn_fetch(u8, 1, c->eip); |
743 | index_reg |= (sib >> 3) & 7; | 778 | index_reg |= (sib >> 3) & 7; |
744 | base_reg |= sib & 7; | 779 | base_reg |= sib & 7; |
745 | scale = sib >> 6; | 780 | scale = sib >> 6; |
746 | 781 | ||
747 | switch (base_reg) { | 782 | if ((base_reg & 7) == 5 && c->modrm_mod == 0) |
748 | case 5: | 783 | c->modrm_ea += insn_fetch(s32, 4, c->eip); |
749 | if (c->modrm_mod != 0) | 784 | else |
750 | c->modrm_ea += c->regs[base_reg]; | ||
751 | else | ||
752 | c->modrm_ea += | ||
753 | insn_fetch(s32, 4, c->eip); | ||
754 | break; | ||
755 | default: | ||
756 | c->modrm_ea += c->regs[base_reg]; | 785 | c->modrm_ea += c->regs[base_reg]; |
757 | } | 786 | if (index_reg != 4) |
758 | switch (index_reg) { | ||
759 | case 4: | ||
760 | break; | ||
761 | default: | ||
762 | c->modrm_ea += c->regs[index_reg] << scale; | 787 | c->modrm_ea += c->regs[index_reg] << scale; |
763 | } | 788 | } else if ((c->modrm_rm & 7) == 5 && c->modrm_mod == 0) { |
764 | break; | 789 | if (ctxt->mode == X86EMUL_MODE_PROT64) |
765 | case 5: | 790 | c->rip_relative = 1; |
766 | if (c->modrm_mod != 0) | 791 | } else |
767 | c->modrm_ea += c->regs[c->modrm_rm]; | ||
768 | else if (ctxt->mode == X86EMUL_MODE_PROT64) | ||
769 | rip_relative = 1; | ||
770 | break; | ||
771 | default: | ||
772 | c->modrm_ea += c->regs[c->modrm_rm]; | 792 | c->modrm_ea += c->regs[c->modrm_rm]; |
773 | break; | ||
774 | } | ||
775 | switch (c->modrm_mod) { | 793 | switch (c->modrm_mod) { |
776 | case 0: | 794 | case 0: |
777 | if (c->modrm_rm == 5) | 795 | if (c->modrm_rm == 5) |
@@ -785,22 +803,6 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, | |||
785 | break; | 803 | break; |
786 | } | 804 | } |
787 | } | 805 | } |
788 | if (rip_relative) { | ||
789 | c->modrm_ea += c->eip; | ||
790 | switch (c->d & SrcMask) { | ||
791 | case SrcImmByte: | ||
792 | c->modrm_ea += 1; | ||
793 | break; | ||
794 | case SrcImm: | ||
795 | if (c->d & ByteOp) | ||
796 | c->modrm_ea += 1; | ||
797 | else | ||
798 | if (c->op_bytes == 8) | ||
799 | c->modrm_ea += 4; | ||
800 | else | ||
801 | c->modrm_ea += c->op_bytes; | ||
802 | } | ||
803 | } | ||
804 | done: | 806 | done: |
805 | return rc; | 807 | return rc; |
806 | } | 808 | } |
@@ -838,6 +840,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
838 | 840 | ||
839 | memset(c, 0, sizeof(struct decode_cache)); | 841 | memset(c, 0, sizeof(struct decode_cache)); |
840 | c->eip = ctxt->vcpu->arch.rip; | 842 | c->eip = ctxt->vcpu->arch.rip; |
843 | ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS); | ||
841 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); | 844 | memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs); |
842 | 845 | ||
843 | switch (mode) { | 846 | switch (mode) { |
@@ -876,23 +879,15 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops) | |||
876 | /* switch between 2/4 bytes */ | 879 | /* switch between 2/4 bytes */ |
877 | c->ad_bytes = def_ad_bytes ^ 6; | 880 | c->ad_bytes = def_ad_bytes ^ 6; |
878 | break; | 881 | break; |
882 | case 0x26: /* ES override */ | ||
879 | case 0x2e: /* CS override */ | 883 | case 0x2e: /* CS override */ |
880 | c->override_base = &ctxt->cs_base; | 884 | case 0x36: /* SS override */ |
881 | break; | ||
882 | case 0x3e: /* DS override */ | 885 | case 0x3e: /* DS override */ |
883 | c->override_base = &ctxt->ds_base; | 886 | set_seg_override(c, (c->b >> 3) & 3); |
884 | break; | ||
885 | case 0x26: /* ES override */ | ||
886 | c->override_base = &ctxt->es_base; | ||
887 | break; | 887 | break; |
888 | case 0x64: /* FS override */ | 888 | case 0x64: /* FS override */ |
889 | c->override_base = &ctxt->fs_base; | ||
890 | break; | ||
891 | case 0x65: /* GS override */ | 889 | case 0x65: /* GS override */ |
892 | c->override_base = &ctxt->gs_base; | 890 | set_seg_override(c, c->b & 7); |
893 | break; | ||
894 | case 0x36: /* SS override */ | ||
895 | c->override_base = &ctxt->ss_base; | ||
896 | break; | 891 | break; |
897 | case 0x40 ... 0x4f: /* REX */ | 892 | case 0x40 ... 0x4f: /* REX */ |
898 | if (mode != X86EMUL_MODE_PROT64) | 893 | if (mode != X86EMUL_MODE_PROT64) |
@@ -964,15 +959,11 @@ done_prefixes: | |||
964 | if (rc) | 959 | if (rc) |
965 | goto done; | 960 | goto done; |
966 | 961 | ||
967 | if (!c->override_base) | 962 | if (!c->has_seg_override) |
968 | c->override_base = &ctxt->ds_base; | 963 | set_seg_override(c, VCPU_SREG_DS); |
969 | if (mode == X86EMUL_MODE_PROT64 && | ||
970 | c->override_base != &ctxt->fs_base && | ||
971 | c->override_base != &ctxt->gs_base) | ||
972 | c->override_base = NULL; | ||
973 | 964 | ||
974 | if (c->override_base) | 965 | if (!(!c->twobyte && c->b == 0x8d)) |
975 | c->modrm_ea += *c->override_base; | 966 | c->modrm_ea += seg_override_base(ctxt, c); |
976 | 967 | ||
977 | if (c->ad_bytes != 8) | 968 | if (c->ad_bytes != 8) |
978 | c->modrm_ea = (u32)c->modrm_ea; | 969 | c->modrm_ea = (u32)c->modrm_ea; |
@@ -1049,6 +1040,7 @@ done_prefixes: | |||
1049 | break; | 1040 | break; |
1050 | case DstMem: | 1041 | case DstMem: |
1051 | if ((c->d & ModRM) && c->modrm_mod == 3) { | 1042 | if ((c->d & ModRM) && c->modrm_mod == 3) { |
1043 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | ||
1052 | c->dst.type = OP_REG; | 1044 | c->dst.type = OP_REG; |
1053 | c->dst.val = c->dst.orig_val = c->modrm_val; | 1045 | c->dst.val = c->dst.orig_val = c->modrm_val; |
1054 | c->dst.ptr = c->modrm_ptr; | 1046 | c->dst.ptr = c->modrm_ptr; |
@@ -1058,6 +1050,9 @@ done_prefixes: | |||
1058 | break; | 1050 | break; |
1059 | } | 1051 | } |
1060 | 1052 | ||
1053 | if (c->rip_relative) | ||
1054 | c->modrm_ea += c->eip; | ||
1055 | |||
1061 | done: | 1056 | done: |
1062 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; | 1057 | return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0; |
1063 | } | 1058 | } |
@@ -1070,7 +1065,7 @@ static inline void emulate_push(struct x86_emulate_ctxt *ctxt) | |||
1070 | c->dst.bytes = c->op_bytes; | 1065 | c->dst.bytes = c->op_bytes; |
1071 | c->dst.val = c->src.val; | 1066 | c->dst.val = c->src.val; |
1072 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); | 1067 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], -c->op_bytes); |
1073 | c->dst.ptr = (void *) register_address(c, ctxt->ss_base, | 1068 | c->dst.ptr = (void *) register_address(c, ss_base(ctxt), |
1074 | c->regs[VCPU_REGS_RSP]); | 1069 | c->regs[VCPU_REGS_RSP]); |
1075 | } | 1070 | } |
1076 | 1071 | ||
@@ -1080,7 +1075,7 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt, | |||
1080 | struct decode_cache *c = &ctxt->decode; | 1075 | struct decode_cache *c = &ctxt->decode; |
1081 | int rc; | 1076 | int rc; |
1082 | 1077 | ||
1083 | rc = ops->read_std(register_address(c, ctxt->ss_base, | 1078 | rc = ops->read_std(register_address(c, ss_base(ctxt), |
1084 | c->regs[VCPU_REGS_RSP]), | 1079 | c->regs[VCPU_REGS_RSP]), |
1085 | &c->dst.val, c->dst.bytes, ctxt->vcpu); | 1080 | &c->dst.val, c->dst.bytes, ctxt->vcpu); |
1086 | if (rc != 0) | 1081 | if (rc != 0) |
@@ -1402,11 +1397,11 @@ special_insn: | |||
1402 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], | 1397 | register_address_increment(c, &c->regs[VCPU_REGS_RSP], |
1403 | -c->op_bytes); | 1398 | -c->op_bytes); |
1404 | c->dst.ptr = (void *) register_address( | 1399 | c->dst.ptr = (void *) register_address( |
1405 | c, ctxt->ss_base, c->regs[VCPU_REGS_RSP]); | 1400 | c, ss_base(ctxt), c->regs[VCPU_REGS_RSP]); |
1406 | break; | 1401 | break; |
1407 | case 0x58 ... 0x5f: /* pop reg */ | 1402 | case 0x58 ... 0x5f: /* pop reg */ |
1408 | pop_instruction: | 1403 | pop_instruction: |
1409 | if ((rc = ops->read_std(register_address(c, ctxt->ss_base, | 1404 | if ((rc = ops->read_std(register_address(c, ss_base(ctxt), |
1410 | c->regs[VCPU_REGS_RSP]), c->dst.ptr, | 1405 | c->regs[VCPU_REGS_RSP]), c->dst.ptr, |
1411 | c->op_bytes, ctxt->vcpu)) != 0) | 1406 | c->op_bytes, ctxt->vcpu)) != 0) |
1412 | goto done; | 1407 | goto done; |
@@ -1420,9 +1415,8 @@ special_insn: | |||
1420 | goto cannot_emulate; | 1415 | goto cannot_emulate; |
1421 | c->dst.val = (s32) c->src.val; | 1416 | c->dst.val = (s32) c->src.val; |
1422 | break; | 1417 | break; |
1418 | case 0x68: /* push imm */ | ||
1423 | case 0x6a: /* push imm8 */ | 1419 | case 0x6a: /* push imm8 */ |
1424 | c->src.val = 0L; | ||
1425 | c->src.val = insn_fetch(s8, 1, c->eip); | ||
1426 | emulate_push(ctxt); | 1420 | emulate_push(ctxt); |
1427 | break; | 1421 | break; |
1428 | case 0x6c: /* insb */ | 1422 | case 0x6c: /* insb */ |
@@ -1433,7 +1427,7 @@ special_insn: | |||
1433 | c->rep_prefix ? | 1427 | c->rep_prefix ? |
1434 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | 1428 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, |
1435 | (ctxt->eflags & EFLG_DF), | 1429 | (ctxt->eflags & EFLG_DF), |
1436 | register_address(c, ctxt->es_base, | 1430 | register_address(c, es_base(ctxt), |
1437 | c->regs[VCPU_REGS_RDI]), | 1431 | c->regs[VCPU_REGS_RDI]), |
1438 | c->rep_prefix, | 1432 | c->rep_prefix, |
1439 | c->regs[VCPU_REGS_RDX]) == 0) { | 1433 | c->regs[VCPU_REGS_RDX]) == 0) { |
@@ -1449,9 +1443,8 @@ special_insn: | |||
1449 | c->rep_prefix ? | 1443 | c->rep_prefix ? |
1450 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, | 1444 | address_mask(c, c->regs[VCPU_REGS_RCX]) : 1, |
1451 | (ctxt->eflags & EFLG_DF), | 1445 | (ctxt->eflags & EFLG_DF), |
1452 | register_address(c, c->override_base ? | 1446 | register_address(c, |
1453 | *c->override_base : | 1447 | seg_override_base(ctxt, c), |
1454 | ctxt->ds_base, | ||
1455 | c->regs[VCPU_REGS_RSI]), | 1448 | c->regs[VCPU_REGS_RSI]), |
1456 | c->rep_prefix, | 1449 | c->rep_prefix, |
1457 | c->regs[VCPU_REGS_RDX]) == 0) { | 1450 | c->regs[VCPU_REGS_RDX]) == 0) { |
@@ -1490,6 +1483,7 @@ special_insn: | |||
1490 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); | 1483 | emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags); |
1491 | break; | 1484 | break; |
1492 | case 0x86 ... 0x87: /* xchg */ | 1485 | case 0x86 ... 0x87: /* xchg */ |
1486 | xchg: | ||
1493 | /* Write back the register source. */ | 1487 | /* Write back the register source. */ |
1494 | switch (c->dst.bytes) { | 1488 | switch (c->dst.bytes) { |
1495 | case 1: | 1489 | case 1: |
@@ -1514,14 +1508,60 @@ special_insn: | |||
1514 | break; | 1508 | break; |
1515 | case 0x88 ... 0x8b: /* mov */ | 1509 | case 0x88 ... 0x8b: /* mov */ |
1516 | goto mov; | 1510 | goto mov; |
1511 | case 0x8c: { /* mov r/m, sreg */ | ||
1512 | struct kvm_segment segreg; | ||
1513 | |||
1514 | if (c->modrm_reg <= 5) | ||
1515 | kvm_get_segment(ctxt->vcpu, &segreg, c->modrm_reg); | ||
1516 | else { | ||
1517 | printk(KERN_INFO "0x8c: Invalid segreg in modrm byte 0x%02x\n", | ||
1518 | c->modrm); | ||
1519 | goto cannot_emulate; | ||
1520 | } | ||
1521 | c->dst.val = segreg.selector; | ||
1522 | break; | ||
1523 | } | ||
1517 | case 0x8d: /* lea r16/r32, m */ | 1524 | case 0x8d: /* lea r16/r32, m */ |
1518 | c->dst.val = c->modrm_ea; | 1525 | c->dst.val = c->modrm_ea; |
1519 | break; | 1526 | break; |
1527 | case 0x8e: { /* mov seg, r/m16 */ | ||
1528 | uint16_t sel; | ||
1529 | int type_bits; | ||
1530 | int err; | ||
1531 | |||
1532 | sel = c->src.val; | ||
1533 | if (c->modrm_reg <= 5) { | ||
1534 | type_bits = (c->modrm_reg == 1) ? 9 : 1; | ||
1535 | err = kvm_load_segment_descriptor(ctxt->vcpu, sel, | ||
1536 | type_bits, c->modrm_reg); | ||
1537 | } else { | ||
1538 | printk(KERN_INFO "Invalid segreg in modrm byte 0x%02x\n", | ||
1539 | c->modrm); | ||
1540 | goto cannot_emulate; | ||
1541 | } | ||
1542 | |||
1543 | if (err < 0) | ||
1544 | goto cannot_emulate; | ||
1545 | |||
1546 | c->dst.type = OP_NONE; /* Disable writeback. */ | ||
1547 | break; | ||
1548 | } | ||
1520 | case 0x8f: /* pop (sole member of Grp1a) */ | 1549 | case 0x8f: /* pop (sole member of Grp1a) */ |
1521 | rc = emulate_grp1a(ctxt, ops); | 1550 | rc = emulate_grp1a(ctxt, ops); |
1522 | if (rc != 0) | 1551 | if (rc != 0) |
1523 | goto done; | 1552 | goto done; |
1524 | break; | 1553 | break; |
1554 | case 0x90: /* nop / xchg r8,rax */ | ||
1555 | if (!(c->rex_prefix & 1)) { /* nop */ | ||
1556 | c->dst.type = OP_NONE; | ||
1557 | break; | ||
1558 | } | ||
1559 | case 0x91 ... 0x97: /* xchg reg,rax */ | ||
1560 | c->src.type = c->dst.type = OP_REG; | ||
1561 | c->src.bytes = c->dst.bytes = c->op_bytes; | ||
1562 | c->src.ptr = (unsigned long *) &c->regs[VCPU_REGS_RAX]; | ||
1563 | c->src.val = *(c->src.ptr); | ||
1564 | goto xchg; | ||
1525 | case 0x9c: /* pushf */ | 1565 | case 0x9c: /* pushf */ |
1526 | c->src.val = (unsigned long) ctxt->eflags; | 1566 | c->src.val = (unsigned long) ctxt->eflags; |
1527 | emulate_push(ctxt); | 1567 | emulate_push(ctxt); |
@@ -1540,11 +1580,10 @@ special_insn: | |||
1540 | c->dst.type = OP_MEM; | 1580 | c->dst.type = OP_MEM; |
1541 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1581 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1542 | c->dst.ptr = (unsigned long *)register_address(c, | 1582 | c->dst.ptr = (unsigned long *)register_address(c, |
1543 | ctxt->es_base, | 1583 | es_base(ctxt), |
1544 | c->regs[VCPU_REGS_RDI]); | 1584 | c->regs[VCPU_REGS_RDI]); |
1545 | if ((rc = ops->read_emulated(register_address(c, | 1585 | if ((rc = ops->read_emulated(register_address(c, |
1546 | c->override_base ? *c->override_base : | 1586 | seg_override_base(ctxt, c), |
1547 | ctxt->ds_base, | ||
1548 | c->regs[VCPU_REGS_RSI]), | 1587 | c->regs[VCPU_REGS_RSI]), |
1549 | &c->dst.val, | 1588 | &c->dst.val, |
1550 | c->dst.bytes, ctxt->vcpu)) != 0) | 1589 | c->dst.bytes, ctxt->vcpu)) != 0) |
@@ -1560,8 +1599,7 @@ special_insn: | |||
1560 | c->src.type = OP_NONE; /* Disable writeback. */ | 1599 | c->src.type = OP_NONE; /* Disable writeback. */ |
1561 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1600 | c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1562 | c->src.ptr = (unsigned long *)register_address(c, | 1601 | c->src.ptr = (unsigned long *)register_address(c, |
1563 | c->override_base ? *c->override_base : | 1602 | seg_override_base(ctxt, c), |
1564 | ctxt->ds_base, | ||
1565 | c->regs[VCPU_REGS_RSI]); | 1603 | c->regs[VCPU_REGS_RSI]); |
1566 | if ((rc = ops->read_emulated((unsigned long)c->src.ptr, | 1604 | if ((rc = ops->read_emulated((unsigned long)c->src.ptr, |
1567 | &c->src.val, | 1605 | &c->src.val, |
@@ -1572,7 +1610,7 @@ special_insn: | |||
1572 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1610 | c->dst.type = OP_NONE; /* Disable writeback. */ |
1573 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1611 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1574 | c->dst.ptr = (unsigned long *)register_address(c, | 1612 | c->dst.ptr = (unsigned long *)register_address(c, |
1575 | ctxt->es_base, | 1613 | es_base(ctxt), |
1576 | c->regs[VCPU_REGS_RDI]); | 1614 | c->regs[VCPU_REGS_RDI]); |
1577 | if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, | 1615 | if ((rc = ops->read_emulated((unsigned long)c->dst.ptr, |
1578 | &c->dst.val, | 1616 | &c->dst.val, |
@@ -1596,7 +1634,7 @@ special_insn: | |||
1596 | c->dst.type = OP_MEM; | 1634 | c->dst.type = OP_MEM; |
1597 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1635 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1598 | c->dst.ptr = (unsigned long *)register_address(c, | 1636 | c->dst.ptr = (unsigned long *)register_address(c, |
1599 | ctxt->es_base, | 1637 | es_base(ctxt), |
1600 | c->regs[VCPU_REGS_RDI]); | 1638 | c->regs[VCPU_REGS_RDI]); |
1601 | c->dst.val = c->regs[VCPU_REGS_RAX]; | 1639 | c->dst.val = c->regs[VCPU_REGS_RAX]; |
1602 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], | 1640 | register_address_increment(c, &c->regs[VCPU_REGS_RDI], |
@@ -1608,8 +1646,7 @@ special_insn: | |||
1608 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; | 1646 | c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes; |
1609 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; | 1647 | c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX]; |
1610 | if ((rc = ops->read_emulated(register_address(c, | 1648 | if ((rc = ops->read_emulated(register_address(c, |
1611 | c->override_base ? *c->override_base : | 1649 | seg_override_base(ctxt, c), |
1612 | ctxt->ds_base, | ||
1613 | c->regs[VCPU_REGS_RSI]), | 1650 | c->regs[VCPU_REGS_RSI]), |
1614 | &c->dst.val, | 1651 | &c->dst.val, |
1615 | c->dst.bytes, | 1652 | c->dst.bytes, |
@@ -1622,6 +1659,8 @@ special_insn: | |||
1622 | case 0xae ... 0xaf: /* scas */ | 1659 | case 0xae ... 0xaf: /* scas */ |
1623 | DPRINTF("Urk! I don't handle SCAS.\n"); | 1660 | DPRINTF("Urk! I don't handle SCAS.\n"); |
1624 | goto cannot_emulate; | 1661 | goto cannot_emulate; |
1662 | case 0xb8: /* mov r, imm */ | ||
1663 | goto mov; | ||
1625 | case 0xc0 ... 0xc1: | 1664 | case 0xc0 ... 0xc1: |
1626 | emulate_grp2(ctxt); | 1665 | emulate_grp2(ctxt); |
1627 | break; | 1666 | break; |
@@ -1660,13 +1699,39 @@ special_insn: | |||
1660 | break; | 1699 | break; |
1661 | } | 1700 | } |
1662 | case 0xe9: /* jmp rel */ | 1701 | case 0xe9: /* jmp rel */ |
1663 | case 0xeb: /* jmp rel short */ | 1702 | goto jmp; |
1703 | case 0xea: /* jmp far */ { | ||
1704 | uint32_t eip; | ||
1705 | uint16_t sel; | ||
1706 | |||
1707 | switch (c->op_bytes) { | ||
1708 | case 2: | ||
1709 | eip = insn_fetch(u16, 2, c->eip); | ||
1710 | break; | ||
1711 | case 4: | ||
1712 | eip = insn_fetch(u32, 4, c->eip); | ||
1713 | break; | ||
1714 | default: | ||
1715 | DPRINTF("jmp far: Invalid op_bytes\n"); | ||
1716 | goto cannot_emulate; | ||
1717 | } | ||
1718 | sel = insn_fetch(u16, 2, c->eip); | ||
1719 | if (kvm_load_segment_descriptor(ctxt->vcpu, sel, 9, VCPU_SREG_CS) < 0) { | ||
1720 | DPRINTF("jmp far: Failed to load CS descriptor\n"); | ||
1721 | goto cannot_emulate; | ||
1722 | } | ||
1723 | |||
1724 | c->eip = eip; | ||
1725 | break; | ||
1726 | } | ||
1727 | case 0xeb: | ||
1728 | jmp: /* jmp rel short */ | ||
1664 | jmp_rel(c, c->src.val); | 1729 | jmp_rel(c, c->src.val); |
1665 | c->dst.type = OP_NONE; /* Disable writeback. */ | 1730 | c->dst.type = OP_NONE; /* Disable writeback. */ |
1666 | break; | 1731 | break; |
1667 | case 0xf4: /* hlt */ | 1732 | case 0xf4: /* hlt */ |
1668 | ctxt->vcpu->arch.halt_request = 1; | 1733 | ctxt->vcpu->arch.halt_request = 1; |
1669 | goto done; | 1734 | break; |
1670 | case 0xf5: /* cmc */ | 1735 | case 0xf5: /* cmc */ |
1671 | /* complement carry flag from eflags reg */ | 1736 | /* complement carry flag from eflags reg */ |
1672 | ctxt->eflags ^= EFLG_CF; | 1737 | ctxt->eflags ^= EFLG_CF; |
@@ -1882,6 +1947,8 @@ twobyte_insn: | |||
1882 | c->src.val &= (c->dst.bytes << 3) - 1; | 1947 | c->src.val &= (c->dst.bytes << 3) - 1; |
1883 | emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); | 1948 | emulate_2op_SrcV_nobyte("bts", c->src, c->dst, ctxt->eflags); |
1884 | break; | 1949 | break; |
1950 | case 0xae: /* clflush */ | ||
1951 | break; | ||
1885 | case 0xb0 ... 0xb1: /* cmpxchg */ | 1952 | case 0xb0 ... 0xb1: /* cmpxchg */ |
1886 | /* | 1953 | /* |
1887 | * Save real source value, then compare EAX against | 1954 | * Save real source value, then compare EAX against |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index 50dad44fb542..d9249a882aa5 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -991,7 +991,6 @@ __init void lguest_init(void) | |||
991 | #ifdef CONFIG_X86_LOCAL_APIC | 991 | #ifdef CONFIG_X86_LOCAL_APIC |
992 | /* apic read/write intercepts */ | 992 | /* apic read/write intercepts */ |
993 | pv_apic_ops.apic_write = lguest_apic_write; | 993 | pv_apic_ops.apic_write = lguest_apic_write; |
994 | pv_apic_ops.apic_write_atomic = lguest_apic_write; | ||
995 | pv_apic_ops.apic_read = lguest_apic_read; | 994 | pv_apic_ops.apic_read = lguest_apic_read; |
996 | #endif | 995 | #endif |
997 | 996 | ||
@@ -1015,6 +1014,9 @@ __init void lguest_init(void) | |||
1015 | init_pg_tables_start = __pa(pg0); | 1014 | init_pg_tables_start = __pa(pg0); |
1016 | init_pg_tables_end = __pa(pg0); | 1015 | init_pg_tables_end = __pa(pg0); |
1017 | 1016 | ||
1017 | /* As described in head_32.S, we map the first 128M of memory. */ | ||
1018 | max_pfn_mapped = (128*1024*1024) >> PAGE_SHIFT; | ||
1019 | |||
1018 | /* Load the %fs segment register (the per-cpu segment register) with | 1020 | /* Load the %fs segment register (the per-cpu segment register) with |
1019 | * the normal data segment to get through booting. */ | 1021 | * the normal data segment to get through booting. */ |
1020 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); | 1022 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_DS) : "memory"); |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index dfdf428975c0..f118c110af32 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -52,7 +52,7 @@ | |||
52 | jnz 100b | 52 | jnz 100b |
53 | 102: | 53 | 102: |
54 | .section .fixup,"ax" | 54 | .section .fixup,"ax" |
55 | 103: addl %r8d,%edx /* ecx is zerorest also */ | 55 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
56 | jmp copy_user_handle_tail | 56 | jmp copy_user_handle_tail |
57 | .previous | 57 | .previous |
58 | 58 | ||
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S index 40e0e309d27e..cb0c112386fb 100644 --- a/arch/x86/lib/copy_user_nocache_64.S +++ b/arch/x86/lib/copy_user_nocache_64.S | |||
@@ -32,7 +32,7 @@ | |||
32 | jnz 100b | 32 | jnz 100b |
33 | 102: | 33 | 102: |
34 | .section .fixup,"ax" | 34 | .section .fixup,"ax" |
35 | 103: addl %r8d,%edx /* ecx is zerorest also */ | 35 | 103: addl %ecx,%edx /* ecx is zerorest also */ |
36 | jmp copy_user_handle_tail | 36 | jmp copy_user_handle_tail |
37 | .previous | 37 | .previous |
38 | 38 | ||
@@ -108,7 +108,6 @@ ENTRY(__copy_user_nocache) | |||
108 | jmp 60f | 108 | jmp 60f |
109 | 50: movl %ecx,%edx | 109 | 50: movl %ecx,%edx |
110 | 60: sfence | 110 | 60: sfence |
111 | movl %r8d,%ecx | ||
112 | jmp copy_user_handle_tail | 111 | jmp copy_user_handle_tail |
113 | .previous | 112 | .previous |
114 | 113 | ||
diff --git a/arch/x86/lib/msr-on-cpu.c b/arch/x86/lib/msr-on-cpu.c index d5a2b39f882b..01b868ba82f8 100644 --- a/arch/x86/lib/msr-on-cpu.c +++ b/arch/x86/lib/msr-on-cpu.c | |||
@@ -30,10 +30,11 @@ static int _rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h, int safe) | |||
30 | 30 | ||
31 | rv.msr_no = msr_no; | 31 | rv.msr_no = msr_no; |
32 | if (safe) { | 32 | if (safe) { |
33 | smp_call_function_single(cpu, __rdmsr_safe_on_cpu, &rv, 1); | 33 | err = smp_call_function_single(cpu, __rdmsr_safe_on_cpu, |
34 | err = rv.err; | 34 | &rv, 1); |
35 | err = err ? err : rv.err; | ||
35 | } else { | 36 | } else { |
36 | smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); | 37 | err = smp_call_function_single(cpu, __rdmsr_on_cpu, &rv, 1); |
37 | } | 38 | } |
38 | *l = rv.l; | 39 | *l = rv.l; |
39 | *h = rv.h; | 40 | *h = rv.h; |
@@ -64,23 +65,24 @@ static int _wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h, int safe) | |||
64 | rv.l = l; | 65 | rv.l = l; |
65 | rv.h = h; | 66 | rv.h = h; |
66 | if (safe) { | 67 | if (safe) { |
67 | smp_call_function_single(cpu, __wrmsr_safe_on_cpu, &rv, 1); | 68 | err = smp_call_function_single(cpu, __wrmsr_safe_on_cpu, |
68 | err = rv.err; | 69 | &rv, 1); |
70 | err = err ? err : rv.err; | ||
69 | } else { | 71 | } else { |
70 | smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); | 72 | err = smp_call_function_single(cpu, __wrmsr_on_cpu, &rv, 1); |
71 | } | 73 | } |
72 | 74 | ||
73 | return err; | 75 | return err; |
74 | } | 76 | } |
75 | 77 | ||
76 | void wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) | 78 | int wrmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 l, u32 h) |
77 | { | 79 | { |
78 | _wrmsr_on_cpu(cpu, msr_no, l, h, 0); | 80 | return _wrmsr_on_cpu(cpu, msr_no, l, h, 0); |
79 | } | 81 | } |
80 | 82 | ||
81 | void rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) | 83 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h) |
82 | { | 84 | { |
83 | _rdmsr_on_cpu(cpu, msr_no, l, h, 0); | 85 | return _rdmsr_on_cpu(cpu, msr_no, l, h, 0); |
84 | } | 86 | } |
85 | 87 | ||
86 | /* These "safe" variants are slower and should be used when the target MSR | 88 | /* These "safe" variants are slower and should be used when the target MSR |
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c index 48278fa7d3de..3d317836be9e 100644 --- a/arch/x86/mach-default/setup.c +++ b/arch/x86/mach-default/setup.c | |||
@@ -10,14 +10,6 @@ | |||
10 | #include <asm/e820.h> | 10 | #include <asm/e820.h> |
11 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
12 | 12 | ||
13 | /* | ||
14 | * Any quirks to be performed to initialize timers/irqs/etc? | ||
15 | */ | ||
16 | int (*arch_time_init_quirk)(void); | ||
17 | int (*arch_pre_intr_init_quirk)(void); | ||
18 | int (*arch_intr_init_quirk)(void); | ||
19 | int (*arch_trap_init_quirk)(void); | ||
20 | |||
21 | #ifdef CONFIG_HOTPLUG_CPU | 13 | #ifdef CONFIG_HOTPLUG_CPU |
22 | #define DEFAULT_SEND_IPI (1) | 14 | #define DEFAULT_SEND_IPI (1) |
23 | #else | 15 | #else |
@@ -37,8 +29,8 @@ int no_broadcast=DEFAULT_SEND_IPI; | |||
37 | **/ | 29 | **/ |
38 | void __init pre_intr_init_hook(void) | 30 | void __init pre_intr_init_hook(void) |
39 | { | 31 | { |
40 | if (arch_pre_intr_init_quirk) { | 32 | if (x86_quirks->arch_pre_intr_init) { |
41 | if (arch_pre_intr_init_quirk()) | 33 | if (x86_quirks->arch_pre_intr_init()) |
42 | return; | 34 | return; |
43 | } | 35 | } |
44 | init_ISA_irqs(); | 36 | init_ISA_irqs(); |
@@ -64,8 +56,8 @@ static struct irqaction irq2 = { | |||
64 | **/ | 56 | **/ |
65 | void __init intr_init_hook(void) | 57 | void __init intr_init_hook(void) |
66 | { | 58 | { |
67 | if (arch_intr_init_quirk) { | 59 | if (x86_quirks->arch_intr_init) { |
68 | if (arch_intr_init_quirk()) | 60 | if (x86_quirks->arch_intr_init()) |
69 | return; | 61 | return; |
70 | } | 62 | } |
71 | #ifdef CONFIG_X86_LOCAL_APIC | 63 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -97,8 +89,8 @@ void __init pre_setup_arch_hook(void) | |||
97 | **/ | 89 | **/ |
98 | void __init trap_init_hook(void) | 90 | void __init trap_init_hook(void) |
99 | { | 91 | { |
100 | if (arch_trap_init_quirk) { | 92 | if (x86_quirks->arch_trap_init) { |
101 | if (arch_trap_init_quirk()) | 93 | if (x86_quirks->arch_trap_init()) |
102 | return; | 94 | return; |
103 | } | 95 | } |
104 | } | 96 | } |
@@ -111,6 +103,16 @@ static struct irqaction irq0 = { | |||
111 | }; | 103 | }; |
112 | 104 | ||
113 | /** | 105 | /** |
106 | * pre_time_init_hook - do any specific initialisations before. | ||
107 | * | ||
108 | **/ | ||
109 | void __init pre_time_init_hook(void) | ||
110 | { | ||
111 | if (x86_quirks->arch_pre_time_init) | ||
112 | x86_quirks->arch_pre_time_init(); | ||
113 | } | ||
114 | |||
115 | /** | ||
114 | * time_init_hook - do any specific initialisations for the system timer. | 116 | * time_init_hook - do any specific initialisations for the system timer. |
115 | * | 117 | * |
116 | * Description: | 118 | * Description: |
@@ -119,13 +121,13 @@ static struct irqaction irq0 = { | |||
119 | **/ | 121 | **/ |
120 | void __init time_init_hook(void) | 122 | void __init time_init_hook(void) |
121 | { | 123 | { |
122 | if (arch_time_init_quirk) { | 124 | if (x86_quirks->arch_time_init) { |
123 | /* | 125 | /* |
124 | * A nonzero return code does not mean failure, it means | 126 | * A nonzero return code does not mean failure, it means |
125 | * that the architecture quirk does not want any | 127 | * that the architecture quirk does not want any |
126 | * generic (timer) setup to be performed after this: | 128 | * generic (timer) setup to be performed after this: |
127 | */ | 129 | */ |
128 | if (arch_time_init_quirk()) | 130 | if (x86_quirks->arch_time_init()) |
129 | return; | 131 | return; |
130 | } | 132 | } |
131 | 133 | ||
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/mach-es7000/es7000plat.c index 4354ce804889..50189af14b85 100644 --- a/arch/x86/mach-es7000/es7000plat.c +++ b/arch/x86/mach-es7000/es7000plat.c | |||
@@ -130,10 +130,10 @@ parse_unisys_oem (char *oemptr) | |||
130 | mip_addr = val; | 130 | mip_addr = val; |
131 | mip = (struct mip_reg *)val; | 131 | mip = (struct mip_reg *)val; |
132 | mip_reg = __va(mip); | 132 | mip_reg = __va(mip); |
133 | Dprintk("es7000_mipcfg: host_reg = 0x%lx \n", | 133 | pr_debug("es7000_mipcfg: host_reg = 0x%lx \n", |
134 | (unsigned long)host_reg); | 134 | (unsigned long)host_reg); |
135 | Dprintk("es7000_mipcfg: mip_reg = 0x%lx \n", | 135 | pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n", |
136 | (unsigned long)mip_reg); | 136 | (unsigned long)mip_reg); |
137 | success++; | 137 | success++; |
138 | break; | 138 | break; |
139 | case MIP_PSAI_REG: | 139 | case MIP_PSAI_REG: |
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c index a037041817c7..4f4e50c3ad3b 100644 --- a/arch/x86/mach-rdc321x/platform.c +++ b/arch/x86/mach-rdc321x/platform.c | |||
@@ -25,7 +25,6 @@ | |||
25 | #include <linux/list.h> | 25 | #include <linux/list.h> |
26 | #include <linux/device.h> | 26 | #include <linux/device.h> |
27 | #include <linux/platform_device.h> | 27 | #include <linux/platform_device.h> |
28 | #include <linux/version.h> | ||
29 | #include <linux/leds.h> | 28 | #include <linux/leds.h> |
30 | 29 | ||
31 | #include <asm/gpio.h> | 30 | #include <asm/gpio.h> |
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 9873716e9f76..dfb932dcf136 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile | |||
@@ -1,5 +1,5 @@ | |||
1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ | 1 | obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ |
2 | pat.o pgtable.o | 2 | pat.o pgtable.o gup.o |
3 | 3 | ||
4 | obj-$(CONFIG_X86_32) += pgtable_32.o | 4 | obj-$(CONFIG_X86_32) += pgtable_32.o |
5 | 5 | ||
@@ -21,3 +21,4 @@ obj-$(CONFIG_K8_NUMA) += k8topology_64.o | |||
21 | endif | 21 | endif |
22 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o | 22 | obj-$(CONFIG_ACPI_NUMA) += srat_$(BITS).o |
23 | 23 | ||
24 | obj-$(CONFIG_MEMTEST) += memtest.o | ||
diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index 5dfef9fa061a..62fa440678d8 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c | |||
@@ -42,7 +42,6 @@ | |||
42 | 42 | ||
43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 43 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
44 | EXPORT_SYMBOL(node_data); | 44 | EXPORT_SYMBOL(node_data); |
45 | static bootmem_data_t node0_bdata; | ||
46 | 45 | ||
47 | /* | 46 | /* |
48 | * numa interface - we expect the numa architecture specific code to have | 47 | * numa interface - we expect the numa architecture specific code to have |
@@ -385,7 +384,7 @@ void __init initmem_init(unsigned long start_pfn, | |||
385 | for_each_online_node(nid) | 384 | for_each_online_node(nid) |
386 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); | 385 | memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); |
387 | 386 | ||
388 | NODE_DATA(0)->bdata = &node0_bdata; | 387 | NODE_DATA(0)->bdata = &bootmem_node_data[0]; |
389 | setup_bootmem_allocator(); | 388 | setup_bootmem_allocator(); |
390 | } | 389 | } |
391 | 390 | ||
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0bb0caed8971..a20d1fa64b4e 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -148,8 +148,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
148 | * we have now. "break" is either changing perms, levels or | 148 | * we have now. "break" is either changing perms, levels or |
149 | * address space marker. | 149 | * address space marker. |
150 | */ | 150 | */ |
151 | prot = pgprot_val(new_prot) & ~(PTE_MASK); | 151 | prot = pgprot_val(new_prot) & ~(PTE_PFN_MASK); |
152 | cur = pgprot_val(st->current_prot) & ~(PTE_MASK); | 152 | cur = pgprot_val(st->current_prot) & ~(PTE_PFN_MASK); |
153 | 153 | ||
154 | if (!st->level) { | 154 | if (!st->level) { |
155 | /* First entry */ | 155 | /* First entry */ |
@@ -221,7 +221,7 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, | |||
221 | for (i = 0; i < PTRS_PER_PMD; i++) { | 221 | for (i = 0; i < PTRS_PER_PMD; i++) { |
222 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); | 222 | st->current_address = normalize_addr(P + i * PMD_LEVEL_MULT); |
223 | if (!pmd_none(*start)) { | 223 | if (!pmd_none(*start)) { |
224 | pgprotval_t prot = pmd_val(*start) & ~PTE_MASK; | 224 | pgprotval_t prot = pmd_val(*start) & PTE_FLAGS_MASK; |
225 | 225 | ||
226 | if (pmd_large(*start) || !pmd_present(*start)) | 226 | if (pmd_large(*start) || !pmd_present(*start)) |
227 | note_page(m, st, __pgprot(prot), 3); | 227 | note_page(m, st, __pgprot(prot), 3); |
@@ -253,7 +253,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |||
253 | for (i = 0; i < PTRS_PER_PUD; i++) { | 253 | for (i = 0; i < PTRS_PER_PUD; i++) { |
254 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); | 254 | st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); |
255 | if (!pud_none(*start)) { | 255 | if (!pud_none(*start)) { |
256 | pgprotval_t prot = pud_val(*start) & ~PTE_MASK; | 256 | pgprotval_t prot = pud_val(*start) & PTE_FLAGS_MASK; |
257 | 257 | ||
258 | if (pud_large(*start) || !pud_present(*start)) | 258 | if (pud_large(*start) || !pud_present(*start)) |
259 | note_page(m, st, __pgprot(prot), 2); | 259 | note_page(m, st, __pgprot(prot), 2); |
@@ -288,7 +288,7 @@ static void walk_pgd_level(struct seq_file *m) | |||
288 | for (i = 0; i < PTRS_PER_PGD; i++) { | 288 | for (i = 0; i < PTRS_PER_PGD; i++) { |
289 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); | 289 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
290 | if (!pgd_none(*start)) { | 290 | if (!pgd_none(*start)) { |
291 | pgprotval_t prot = pgd_val(*start) & ~PTE_MASK; | 291 | pgprotval_t prot = pgd_val(*start) & PTE_FLAGS_MASK; |
292 | 292 | ||
293 | if (pgd_large(*start) || !pgd_present(*start)) | 293 | if (pgd_large(*start) || !pgd_present(*start)) |
294 | note_page(m, &st, __pgprot(prot), 1); | 294 | note_page(m, &st, __pgprot(prot), 1); |
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c new file mode 100644 index 000000000000..007bb06c7504 --- /dev/null +++ b/arch/x86/mm/gup.c | |||
@@ -0,0 +1,298 @@ | |||
1 | /* | ||
2 | * Lockless get_user_pages_fast for x86 | ||
3 | * | ||
4 | * Copyright (C) 2008 Nick Piggin | ||
5 | * Copyright (C) 2008 Novell Inc. | ||
6 | */ | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/mm.h> | ||
9 | #include <linux/vmstat.h> | ||
10 | #include <linux/highmem.h> | ||
11 | |||
12 | #include <asm/pgtable.h> | ||
13 | |||
14 | static inline pte_t gup_get_pte(pte_t *ptep) | ||
15 | { | ||
16 | #ifndef CONFIG_X86_PAE | ||
17 | return *ptep; | ||
18 | #else | ||
19 | /* | ||
20 | * With get_user_pages_fast, we walk down the pagetables without taking | ||
21 | * any locks. For this we would like to load the pointers atoimcally, | ||
22 | * but that is not possible (without expensive cmpxchg8b) on PAE. What | ||
23 | * we do have is the guarantee that a pte will only either go from not | ||
24 | * present to present, or present to not present or both -- it will not | ||
25 | * switch to a completely different present page without a TLB flush in | ||
26 | * between; something that we are blocking by holding interrupts off. | ||
27 | * | ||
28 | * Setting ptes from not present to present goes: | ||
29 | * ptep->pte_high = h; | ||
30 | * smp_wmb(); | ||
31 | * ptep->pte_low = l; | ||
32 | * | ||
33 | * And present to not present goes: | ||
34 | * ptep->pte_low = 0; | ||
35 | * smp_wmb(); | ||
36 | * ptep->pte_high = 0; | ||
37 | * | ||
38 | * We must ensure here that the load of pte_low sees l iff pte_high | ||
39 | * sees h. We load pte_high *after* loading pte_low, which ensures we | ||
40 | * don't see an older value of pte_high. *Then* we recheck pte_low, | ||
41 | * which ensures that we haven't picked up a changed pte high. We might | ||
42 | * have got rubbish values from pte_low and pte_high, but we are | ||
43 | * guaranteed that pte_low will not have the present bit set *unless* | ||
44 | * it is 'l'. And get_user_pages_fast only operates on present ptes, so | ||
45 | * we're safe. | ||
46 | * | ||
47 | * gup_get_pte should not be used or copied outside gup.c without being | ||
48 | * very careful -- it does not atomically load the pte or anything that | ||
49 | * is likely to be useful for you. | ||
50 | */ | ||
51 | pte_t pte; | ||
52 | |||
53 | retry: | ||
54 | pte.pte_low = ptep->pte_low; | ||
55 | smp_rmb(); | ||
56 | pte.pte_high = ptep->pte_high; | ||
57 | smp_rmb(); | ||
58 | if (unlikely(pte.pte_low != ptep->pte_low)) | ||
59 | goto retry; | ||
60 | |||
61 | return pte; | ||
62 | #endif | ||
63 | } | ||
64 | |||
65 | /* | ||
66 | * The performance critical leaf functions are made noinline otherwise gcc | ||
67 | * inlines everything into a single function which results in too much | ||
68 | * register pressure. | ||
69 | */ | ||
70 | static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | ||
71 | unsigned long end, int write, struct page **pages, int *nr) | ||
72 | { | ||
73 | unsigned long mask; | ||
74 | pte_t *ptep; | ||
75 | |||
76 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
77 | if (write) | ||
78 | mask |= _PAGE_RW; | ||
79 | |||
80 | ptep = pte_offset_map(&pmd, addr); | ||
81 | do { | ||
82 | pte_t pte = gup_get_pte(ptep); | ||
83 | struct page *page; | ||
84 | |||
85 | if ((pte_val(pte) & (mask | _PAGE_SPECIAL)) != mask) { | ||
86 | pte_unmap(ptep); | ||
87 | return 0; | ||
88 | } | ||
89 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
90 | page = pte_page(pte); | ||
91 | get_page(page); | ||
92 | pages[*nr] = page; | ||
93 | (*nr)++; | ||
94 | |||
95 | } while (ptep++, addr += PAGE_SIZE, addr != end); | ||
96 | pte_unmap(ptep - 1); | ||
97 | |||
98 | return 1; | ||
99 | } | ||
100 | |||
101 | static inline void get_head_page_multiple(struct page *page, int nr) | ||
102 | { | ||
103 | VM_BUG_ON(page != compound_head(page)); | ||
104 | VM_BUG_ON(page_count(page) == 0); | ||
105 | atomic_add(nr, &page->_count); | ||
106 | } | ||
107 | |||
108 | static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | ||
109 | unsigned long end, int write, struct page **pages, int *nr) | ||
110 | { | ||
111 | unsigned long mask; | ||
112 | pte_t pte = *(pte_t *)&pmd; | ||
113 | struct page *head, *page; | ||
114 | int refs; | ||
115 | |||
116 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
117 | if (write) | ||
118 | mask |= _PAGE_RW; | ||
119 | if ((pte_val(pte) & mask) != mask) | ||
120 | return 0; | ||
121 | /* hugepages are never "special" */ | ||
122 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
123 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
124 | |||
125 | refs = 0; | ||
126 | head = pte_page(pte); | ||
127 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | ||
128 | do { | ||
129 | VM_BUG_ON(compound_head(page) != head); | ||
130 | pages[*nr] = page; | ||
131 | (*nr)++; | ||
132 | page++; | ||
133 | refs++; | ||
134 | } while (addr += PAGE_SIZE, addr != end); | ||
135 | get_head_page_multiple(head, refs); | ||
136 | |||
137 | return 1; | ||
138 | } | ||
139 | |||
140 | static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, | ||
141 | int write, struct page **pages, int *nr) | ||
142 | { | ||
143 | unsigned long next; | ||
144 | pmd_t *pmdp; | ||
145 | |||
146 | pmdp = pmd_offset(&pud, addr); | ||
147 | do { | ||
148 | pmd_t pmd = *pmdp; | ||
149 | |||
150 | next = pmd_addr_end(addr, end); | ||
151 | if (pmd_none(pmd)) | ||
152 | return 0; | ||
153 | if (unlikely(pmd_large(pmd))) { | ||
154 | if (!gup_huge_pmd(pmd, addr, next, write, pages, nr)) | ||
155 | return 0; | ||
156 | } else { | ||
157 | if (!gup_pte_range(pmd, addr, next, write, pages, nr)) | ||
158 | return 0; | ||
159 | } | ||
160 | } while (pmdp++, addr = next, addr != end); | ||
161 | |||
162 | return 1; | ||
163 | } | ||
164 | |||
165 | static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | ||
166 | unsigned long end, int write, struct page **pages, int *nr) | ||
167 | { | ||
168 | unsigned long mask; | ||
169 | pte_t pte = *(pte_t *)&pud; | ||
170 | struct page *head, *page; | ||
171 | int refs; | ||
172 | |||
173 | mask = _PAGE_PRESENT|_PAGE_USER; | ||
174 | if (write) | ||
175 | mask |= _PAGE_RW; | ||
176 | if ((pte_val(pte) & mask) != mask) | ||
177 | return 0; | ||
178 | /* hugepages are never "special" */ | ||
179 | VM_BUG_ON(pte_val(pte) & _PAGE_SPECIAL); | ||
180 | VM_BUG_ON(!pfn_valid(pte_pfn(pte))); | ||
181 | |||
182 | refs = 0; | ||
183 | head = pte_page(pte); | ||
184 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | ||
185 | do { | ||
186 | VM_BUG_ON(compound_head(page) != head); | ||
187 | pages[*nr] = page; | ||
188 | (*nr)++; | ||
189 | page++; | ||
190 | refs++; | ||
191 | } while (addr += PAGE_SIZE, addr != end); | ||
192 | get_head_page_multiple(head, refs); | ||
193 | |||
194 | return 1; | ||
195 | } | ||
196 | |||
197 | static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end, | ||
198 | int write, struct page **pages, int *nr) | ||
199 | { | ||
200 | unsigned long next; | ||
201 | pud_t *pudp; | ||
202 | |||
203 | pudp = pud_offset(&pgd, addr); | ||
204 | do { | ||
205 | pud_t pud = *pudp; | ||
206 | |||
207 | next = pud_addr_end(addr, end); | ||
208 | if (pud_none(pud)) | ||
209 | return 0; | ||
210 | if (unlikely(pud_large(pud))) { | ||
211 | if (!gup_huge_pud(pud, addr, next, write, pages, nr)) | ||
212 | return 0; | ||
213 | } else { | ||
214 | if (!gup_pmd_range(pud, addr, next, write, pages, nr)) | ||
215 | return 0; | ||
216 | } | ||
217 | } while (pudp++, addr = next, addr != end); | ||
218 | |||
219 | return 1; | ||
220 | } | ||
221 | |||
222 | int get_user_pages_fast(unsigned long start, int nr_pages, int write, | ||
223 | struct page **pages) | ||
224 | { | ||
225 | struct mm_struct *mm = current->mm; | ||
226 | unsigned long addr, len, end; | ||
227 | unsigned long next; | ||
228 | pgd_t *pgdp; | ||
229 | int nr = 0; | ||
230 | |||
231 | start &= PAGE_MASK; | ||
232 | addr = start; | ||
233 | len = (unsigned long) nr_pages << PAGE_SHIFT; | ||
234 | end = start + len; | ||
235 | if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ, | ||
236 | start, len))) | ||
237 | goto slow_irqon; | ||
238 | |||
239 | /* | ||
240 | * XXX: batch / limit 'nr', to avoid large irq off latency | ||
241 | * needs some instrumenting to determine the common sizes used by | ||
242 | * important workloads (eg. DB2), and whether limiting the batch size | ||
243 | * will decrease performance. | ||
244 | * | ||
245 | * It seems like we're in the clear for the moment. Direct-IO is | ||
246 | * the main guy that batches up lots of get_user_pages, and even | ||
247 | * they are limited to 64-at-a-time which is not so many. | ||
248 | */ | ||
249 | /* | ||
250 | * This doesn't prevent pagetable teardown, but does prevent | ||
251 | * the pagetables and pages from being freed on x86. | ||
252 | * | ||
253 | * So long as we atomically load page table pointers versus teardown | ||
254 | * (which we do on x86, with the above PAE exception), we can follow the | ||
255 | * address down to the the page and take a ref on it. | ||
256 | */ | ||
257 | local_irq_disable(); | ||
258 | pgdp = pgd_offset(mm, addr); | ||
259 | do { | ||
260 | pgd_t pgd = *pgdp; | ||
261 | |||
262 | next = pgd_addr_end(addr, end); | ||
263 | if (pgd_none(pgd)) | ||
264 | goto slow; | ||
265 | if (!gup_pud_range(pgd, addr, next, write, pages, &nr)) | ||
266 | goto slow; | ||
267 | } while (pgdp++, addr = next, addr != end); | ||
268 | local_irq_enable(); | ||
269 | |||
270 | VM_BUG_ON(nr != (end - start) >> PAGE_SHIFT); | ||
271 | return nr; | ||
272 | |||
273 | { | ||
274 | int ret; | ||
275 | |||
276 | slow: | ||
277 | local_irq_enable(); | ||
278 | slow_irqon: | ||
279 | /* Try to get the remaining pages with get_user_pages */ | ||
280 | start += nr << PAGE_SHIFT; | ||
281 | pages += nr; | ||
282 | |||
283 | down_read(&mm->mmap_sem); | ||
284 | ret = get_user_pages(current, mm, start, | ||
285 | (end - start) >> PAGE_SHIFT, write, 0, pages, NULL); | ||
286 | up_read(&mm->mmap_sem); | ||
287 | |||
288 | /* Have to be a bit careful with return values */ | ||
289 | if (nr > 0) { | ||
290 | if (ret < 0) | ||
291 | ret = nr; | ||
292 | else | ||
293 | ret += nr; | ||
294 | } | ||
295 | |||
296 | return ret; | ||
297 | } | ||
298 | } | ||
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 0b3d567e686d..8f307d914c2e 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -124,7 +124,8 @@ int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | 127 | pte_t *huge_pte_alloc(struct mm_struct *mm, |
128 | unsigned long addr, unsigned long sz) | ||
128 | { | 129 | { |
129 | pgd_t *pgd; | 130 | pgd_t *pgd; |
130 | pud_t *pud; | 131 | pud_t *pud; |
@@ -133,9 +134,14 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) | |||
133 | pgd = pgd_offset(mm, addr); | 134 | pgd = pgd_offset(mm, addr); |
134 | pud = pud_alloc(mm, pgd, addr); | 135 | pud = pud_alloc(mm, pgd, addr); |
135 | if (pud) { | 136 | if (pud) { |
136 | if (pud_none(*pud)) | 137 | if (sz == PUD_SIZE) { |
137 | huge_pmd_share(mm, addr, pud); | 138 | pte = (pte_t *)pud; |
138 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | 139 | } else { |
140 | BUG_ON(sz != PMD_SIZE); | ||
141 | if (pud_none(*pud)) | ||
142 | huge_pmd_share(mm, addr, pud); | ||
143 | pte = (pte_t *) pmd_alloc(mm, pud, addr); | ||
144 | } | ||
139 | } | 145 | } |
140 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); | 146 | BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte)); |
141 | 147 | ||
@@ -151,8 +157,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) | |||
151 | pgd = pgd_offset(mm, addr); | 157 | pgd = pgd_offset(mm, addr); |
152 | if (pgd_present(*pgd)) { | 158 | if (pgd_present(*pgd)) { |
153 | pud = pud_offset(pgd, addr); | 159 | pud = pud_offset(pgd, addr); |
154 | if (pud_present(*pud)) | 160 | if (pud_present(*pud)) { |
161 | if (pud_large(*pud)) | ||
162 | return (pte_t *)pud; | ||
155 | pmd = pmd_offset(pud, addr); | 163 | pmd = pmd_offset(pud, addr); |
164 | } | ||
156 | } | 165 | } |
157 | return (pte_t *) pmd; | 166 | return (pte_t *) pmd; |
158 | } | 167 | } |
@@ -188,6 +197,11 @@ int pmd_huge(pmd_t pmd) | |||
188 | return 0; | 197 | return 0; |
189 | } | 198 | } |
190 | 199 | ||
200 | int pud_huge(pud_t pud) | ||
201 | { | ||
202 | return 0; | ||
203 | } | ||
204 | |||
191 | struct page * | 205 | struct page * |
192 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 206 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
193 | pmd_t *pmd, int write) | 207 | pmd_t *pmd, int write) |
@@ -208,6 +222,11 @@ int pmd_huge(pmd_t pmd) | |||
208 | return !!(pmd_val(pmd) & _PAGE_PSE); | 222 | return !!(pmd_val(pmd) & _PAGE_PSE); |
209 | } | 223 | } |
210 | 224 | ||
225 | int pud_huge(pud_t pud) | ||
226 | { | ||
227 | return !!(pud_val(pud) & _PAGE_PSE); | ||
228 | } | ||
229 | |||
211 | struct page * | 230 | struct page * |
212 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, | 231 | follow_huge_pmd(struct mm_struct *mm, unsigned long address, |
213 | pmd_t *pmd, int write) | 232 | pmd_t *pmd, int write) |
@@ -216,9 +235,22 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address, | |||
216 | 235 | ||
217 | page = pte_page(*(pte_t *)pmd); | 236 | page = pte_page(*(pte_t *)pmd); |
218 | if (page) | 237 | if (page) |
219 | page += ((address & ~HPAGE_MASK) >> PAGE_SHIFT); | 238 | page += ((address & ~PMD_MASK) >> PAGE_SHIFT); |
239 | return page; | ||
240 | } | ||
241 | |||
242 | struct page * | ||
243 | follow_huge_pud(struct mm_struct *mm, unsigned long address, | ||
244 | pud_t *pud, int write) | ||
245 | { | ||
246 | struct page *page; | ||
247 | |||
248 | page = pte_page(*(pte_t *)pud); | ||
249 | if (page) | ||
250 | page += ((address & ~PUD_MASK) >> PAGE_SHIFT); | ||
220 | return page; | 251 | return page; |
221 | } | 252 | } |
253 | |||
222 | #endif | 254 | #endif |
223 | 255 | ||
224 | /* x86_64 also uses this file */ | 256 | /* x86_64 also uses this file */ |
@@ -228,6 +260,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
228 | unsigned long addr, unsigned long len, | 260 | unsigned long addr, unsigned long len, |
229 | unsigned long pgoff, unsigned long flags) | 261 | unsigned long pgoff, unsigned long flags) |
230 | { | 262 | { |
263 | struct hstate *h = hstate_file(file); | ||
231 | struct mm_struct *mm = current->mm; | 264 | struct mm_struct *mm = current->mm; |
232 | struct vm_area_struct *vma; | 265 | struct vm_area_struct *vma; |
233 | unsigned long start_addr; | 266 | unsigned long start_addr; |
@@ -240,7 +273,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
240 | } | 273 | } |
241 | 274 | ||
242 | full_search: | 275 | full_search: |
243 | addr = ALIGN(start_addr, HPAGE_SIZE); | 276 | addr = ALIGN(start_addr, huge_page_size(h)); |
244 | 277 | ||
245 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { | 278 | for (vma = find_vma(mm, addr); ; vma = vma->vm_next) { |
246 | /* At this point: (!vma || addr < vma->vm_end). */ | 279 | /* At this point: (!vma || addr < vma->vm_end). */ |
@@ -262,7 +295,7 @@ full_search: | |||
262 | } | 295 | } |
263 | if (addr + mm->cached_hole_size < vma->vm_start) | 296 | if (addr + mm->cached_hole_size < vma->vm_start) |
264 | mm->cached_hole_size = vma->vm_start - addr; | 297 | mm->cached_hole_size = vma->vm_start - addr; |
265 | addr = ALIGN(vma->vm_end, HPAGE_SIZE); | 298 | addr = ALIGN(vma->vm_end, huge_page_size(h)); |
266 | } | 299 | } |
267 | } | 300 | } |
268 | 301 | ||
@@ -270,6 +303,7 @@ static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file, | |||
270 | unsigned long addr0, unsigned long len, | 303 | unsigned long addr0, unsigned long len, |
271 | unsigned long pgoff, unsigned long flags) | 304 | unsigned long pgoff, unsigned long flags) |
272 | { | 305 | { |
306 | struct hstate *h = hstate_file(file); | ||
273 | struct mm_struct *mm = current->mm; | 307 | struct mm_struct *mm = current->mm; |
274 | struct vm_area_struct *vma, *prev_vma; | 308 | struct vm_area_struct *vma, *prev_vma; |
275 | unsigned long base = mm->mmap_base, addr = addr0; | 309 | unsigned long base = mm->mmap_base, addr = addr0; |
@@ -290,7 +324,7 @@ try_again: | |||
290 | goto fail; | 324 | goto fail; |
291 | 325 | ||
292 | /* either no address requested or cant fit in requested address hole */ | 326 | /* either no address requested or cant fit in requested address hole */ |
293 | addr = (mm->free_area_cache - len) & HPAGE_MASK; | 327 | addr = (mm->free_area_cache - len) & huge_page_mask(h); |
294 | do { | 328 | do { |
295 | /* | 329 | /* |
296 | * Lookup failure means no vma is above this address, | 330 | * Lookup failure means no vma is above this address, |
@@ -321,7 +355,7 @@ try_again: | |||
321 | largest_hole = vma->vm_start - addr; | 355 | largest_hole = vma->vm_start - addr; |
322 | 356 | ||
323 | /* try just below the current vma->vm_start */ | 357 | /* try just below the current vma->vm_start */ |
324 | addr = (vma->vm_start - len) & HPAGE_MASK; | 358 | addr = (vma->vm_start - len) & huge_page_mask(h); |
325 | } while (len <= vma->vm_start); | 359 | } while (len <= vma->vm_start); |
326 | 360 | ||
327 | fail: | 361 | fail: |
@@ -359,22 +393,23 @@ unsigned long | |||
359 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | 393 | hugetlb_get_unmapped_area(struct file *file, unsigned long addr, |
360 | unsigned long len, unsigned long pgoff, unsigned long flags) | 394 | unsigned long len, unsigned long pgoff, unsigned long flags) |
361 | { | 395 | { |
396 | struct hstate *h = hstate_file(file); | ||
362 | struct mm_struct *mm = current->mm; | 397 | struct mm_struct *mm = current->mm; |
363 | struct vm_area_struct *vma; | 398 | struct vm_area_struct *vma; |
364 | 399 | ||
365 | if (len & ~HPAGE_MASK) | 400 | if (len & ~huge_page_mask(h)) |
366 | return -EINVAL; | 401 | return -EINVAL; |
367 | if (len > TASK_SIZE) | 402 | if (len > TASK_SIZE) |
368 | return -ENOMEM; | 403 | return -ENOMEM; |
369 | 404 | ||
370 | if (flags & MAP_FIXED) { | 405 | if (flags & MAP_FIXED) { |
371 | if (prepare_hugepage_range(addr, len)) | 406 | if (prepare_hugepage_range(file, addr, len)) |
372 | return -EINVAL; | 407 | return -EINVAL; |
373 | return addr; | 408 | return addr; |
374 | } | 409 | } |
375 | 410 | ||
376 | if (addr) { | 411 | if (addr) { |
377 | addr = ALIGN(addr, HPAGE_SIZE); | 412 | addr = ALIGN(addr, huge_page_size(h)); |
378 | vma = find_vma(mm, addr); | 413 | vma = find_vma(mm, addr); |
379 | if (TASK_SIZE - len >= addr && | 414 | if (TASK_SIZE - len >= addr && |
380 | (!vma || addr + len <= vma->vm_start)) | 415 | (!vma || addr + len <= vma->vm_start)) |
@@ -390,3 +425,20 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
390 | 425 | ||
391 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ | 426 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ |
392 | 427 | ||
428 | #ifdef CONFIG_X86_64 | ||
429 | static __init int setup_hugepagesz(char *opt) | ||
430 | { | ||
431 | unsigned long ps = memparse(opt, &opt); | ||
432 | if (ps == PMD_SIZE) { | ||
433 | hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT); | ||
434 | } else if (ps == PUD_SIZE && cpu_has_gbpages) { | ||
435 | hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); | ||
436 | } else { | ||
437 | printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n", | ||
438 | ps >> 20); | ||
439 | return 0; | ||
440 | } | ||
441 | return 1; | ||
442 | } | ||
443 | __setup("hugepagesz=", setup_hugepagesz); | ||
444 | #endif | ||
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 9689a5138e64..60ec1d08ff24 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -458,11 +458,7 @@ static void __init pagetable_init(void) | |||
458 | { | 458 | { |
459 | pgd_t *pgd_base = swapper_pg_dir; | 459 | pgd_t *pgd_base = swapper_pg_dir; |
460 | 460 | ||
461 | paravirt_pagetable_setup_start(pgd_base); | ||
462 | |||
463 | permanent_kmaps_init(pgd_base); | 461 | permanent_kmaps_init(pgd_base); |
464 | |||
465 | paravirt_pagetable_setup_done(pgd_base); | ||
466 | } | 462 | } |
467 | 463 | ||
468 | #ifdef CONFIG_ACPI_SLEEP | 464 | #ifdef CONFIG_ACPI_SLEEP |
@@ -844,6 +840,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start, | |||
844 | reserve_early(table_start << PAGE_SHIFT, | 840 | reserve_early(table_start << PAGE_SHIFT, |
845 | table_end << PAGE_SHIFT, "PGTABLE"); | 841 | table_end << PAGE_SHIFT, "PGTABLE"); |
846 | 842 | ||
843 | if (!after_init_bootmem) | ||
844 | early_memtest(start, end); | ||
845 | |||
847 | return end >> PAGE_SHIFT; | 846 | return end >> PAGE_SHIFT; |
848 | } | 847 | } |
849 | 848 | ||
@@ -868,8 +867,6 @@ void __init paging_init(void) | |||
868 | */ | 867 | */ |
869 | sparse_init(); | 868 | sparse_init(); |
870 | zone_sizes_init(); | 869 | zone_sizes_init(); |
871 | |||
872 | paravirt_post_allocator_init(); | ||
873 | } | 870 | } |
874 | 871 | ||
875 | /* | 872 | /* |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 306049edd553..d3746efb060d 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -60,7 +60,7 @@ static unsigned long dma_reserve __initdata; | |||
60 | 60 | ||
61 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 61 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
62 | 62 | ||
63 | int direct_gbpages __meminitdata | 63 | int direct_gbpages |
64 | #ifdef CONFIG_DIRECT_GBPAGES | 64 | #ifdef CONFIG_DIRECT_GBPAGES |
65 | = 1 | 65 | = 1 |
66 | #endif | 66 | #endif |
@@ -86,46 +86,13 @@ early_param("gbpages", parse_direct_gbpages_on); | |||
86 | * around without checking the pgd every time. | 86 | * around without checking the pgd every time. |
87 | */ | 87 | */ |
88 | 88 | ||
89 | void show_mem(void) | ||
90 | { | ||
91 | long i, total = 0, reserved = 0; | ||
92 | long shared = 0, cached = 0; | ||
93 | struct page *page; | ||
94 | pg_data_t *pgdat; | ||
95 | |||
96 | printk(KERN_INFO "Mem-info:\n"); | ||
97 | show_free_areas(); | ||
98 | for_each_online_pgdat(pgdat) { | ||
99 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
100 | /* | ||
101 | * This loop can take a while with 256 GB and | ||
102 | * 4k pages so defer the NMI watchdog: | ||
103 | */ | ||
104 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
105 | touch_nmi_watchdog(); | ||
106 | |||
107 | if (!pfn_valid(pgdat->node_start_pfn + i)) | ||
108 | continue; | ||
109 | |||
110 | page = pfn_to_page(pgdat->node_start_pfn + i); | ||
111 | total++; | ||
112 | if (PageReserved(page)) | ||
113 | reserved++; | ||
114 | else if (PageSwapCache(page)) | ||
115 | cached++; | ||
116 | else if (page_count(page)) | ||
117 | shared += page_count(page) - 1; | ||
118 | } | ||
119 | } | ||
120 | printk(KERN_INFO "%lu pages of RAM\n", total); | ||
121 | printk(KERN_INFO "%lu reserved pages\n", reserved); | ||
122 | printk(KERN_INFO "%lu pages shared\n", shared); | ||
123 | printk(KERN_INFO "%lu pages swap cached\n", cached); | ||
124 | } | ||
125 | |||
126 | int after_bootmem; | 89 | int after_bootmem; |
127 | 90 | ||
128 | static __init void *spp_getpage(void) | 91 | /* |
92 | * NOTE: This function is marked __ref because it calls __init function | ||
93 | * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. | ||
94 | */ | ||
95 | static __ref void *spp_getpage(void) | ||
129 | { | 96 | { |
130 | void *ptr; | 97 | void *ptr; |
131 | 98 | ||
@@ -274,7 +241,7 @@ static unsigned long __initdata table_start; | |||
274 | static unsigned long __meminitdata table_end; | 241 | static unsigned long __meminitdata table_end; |
275 | static unsigned long __meminitdata table_top; | 242 | static unsigned long __meminitdata table_top; |
276 | 243 | ||
277 | static __meminit void *alloc_low_page(unsigned long *phys) | 244 | static __ref void *alloc_low_page(unsigned long *phys) |
278 | { | 245 | { |
279 | unsigned long pfn = table_end++; | 246 | unsigned long pfn = table_end++; |
280 | void *adr; | 247 | void *adr; |
@@ -295,7 +262,7 @@ static __meminit void *alloc_low_page(unsigned long *phys) | |||
295 | return adr; | 262 | return adr; |
296 | } | 263 | } |
297 | 264 | ||
298 | static __meminit void unmap_low_page(void *adr) | 265 | static __ref void unmap_low_page(void *adr) |
299 | { | 266 | { |
300 | if (after_bootmem) | 267 | if (after_bootmem) |
301 | return; | 268 | return; |
@@ -351,6 +318,7 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
351 | { | 318 | { |
352 | unsigned long pages = 0; | 319 | unsigned long pages = 0; |
353 | unsigned long last_map_addr = end; | 320 | unsigned long last_map_addr = end; |
321 | unsigned long start = address; | ||
354 | 322 | ||
355 | int i = pmd_index(address); | 323 | int i = pmd_index(address); |
356 | 324 | ||
@@ -368,16 +336,24 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
368 | } | 336 | } |
369 | 337 | ||
370 | if (pmd_val(*pmd)) { | 338 | if (pmd_val(*pmd)) { |
371 | if (!pmd_large(*pmd)) | 339 | if (!pmd_large(*pmd)) { |
340 | spin_lock(&init_mm.page_table_lock); | ||
372 | last_map_addr = phys_pte_update(pmd, address, | 341 | last_map_addr = phys_pte_update(pmd, address, |
373 | end); | 342 | end); |
343 | spin_unlock(&init_mm.page_table_lock); | ||
344 | } | ||
345 | /* Count entries we're using from level2_ident_pgt */ | ||
346 | if (start == 0) | ||
347 | pages++; | ||
374 | continue; | 348 | continue; |
375 | } | 349 | } |
376 | 350 | ||
377 | if (page_size_mask & (1<<PG_LEVEL_2M)) { | 351 | if (page_size_mask & (1<<PG_LEVEL_2M)) { |
378 | pages++; | 352 | pages++; |
353 | spin_lock(&init_mm.page_table_lock); | ||
379 | set_pte((pte_t *)pmd, | 354 | set_pte((pte_t *)pmd, |
380 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 355 | pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
356 | spin_unlock(&init_mm.page_table_lock); | ||
381 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; | 357 | last_map_addr = (address & PMD_MASK) + PMD_SIZE; |
382 | continue; | 358 | continue; |
383 | } | 359 | } |
@@ -386,7 +362,9 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long address, unsigned long end, | |||
386 | last_map_addr = phys_pte_init(pte, address, end); | 362 | last_map_addr = phys_pte_init(pte, address, end); |
387 | unmap_low_page(pte); | 363 | unmap_low_page(pte); |
388 | 364 | ||
365 | spin_lock(&init_mm.page_table_lock); | ||
389 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); | 366 | pmd_populate_kernel(&init_mm, pmd, __va(pte_phys)); |
367 | spin_unlock(&init_mm.page_table_lock); | ||
390 | } | 368 | } |
391 | update_page_count(PG_LEVEL_2M, pages); | 369 | update_page_count(PG_LEVEL_2M, pages); |
392 | return last_map_addr; | 370 | return last_map_addr; |
@@ -399,9 +377,7 @@ phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end, | |||
399 | pmd_t *pmd = pmd_offset(pud, 0); | 377 | pmd_t *pmd = pmd_offset(pud, 0); |
400 | unsigned long last_map_addr; | 378 | unsigned long last_map_addr; |
401 | 379 | ||
402 | spin_lock(&init_mm.page_table_lock); | ||
403 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); | 380 | last_map_addr = phys_pmd_init(pmd, address, end, page_size_mask); |
404 | spin_unlock(&init_mm.page_table_lock); | ||
405 | __flush_tlb_all(); | 381 | __flush_tlb_all(); |
406 | return last_map_addr; | 382 | return last_map_addr; |
407 | } | 383 | } |
@@ -437,20 +413,21 @@ phys_pud_init(pud_t *pud_page, unsigned long addr, unsigned long end, | |||
437 | 413 | ||
438 | if (page_size_mask & (1<<PG_LEVEL_1G)) { | 414 | if (page_size_mask & (1<<PG_LEVEL_1G)) { |
439 | pages++; | 415 | pages++; |
416 | spin_lock(&init_mm.page_table_lock); | ||
440 | set_pte((pte_t *)pud, | 417 | set_pte((pte_t *)pud, |
441 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); | 418 | pfn_pte(addr >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); |
419 | spin_unlock(&init_mm.page_table_lock); | ||
442 | last_map_addr = (addr & PUD_MASK) + PUD_SIZE; | 420 | last_map_addr = (addr & PUD_MASK) + PUD_SIZE; |
443 | continue; | 421 | continue; |
444 | } | 422 | } |
445 | 423 | ||
446 | pmd = alloc_low_page(&pmd_phys); | 424 | pmd = alloc_low_page(&pmd_phys); |
447 | |||
448 | spin_lock(&init_mm.page_table_lock); | ||
449 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); | 425 | last_map_addr = phys_pmd_init(pmd, addr, end, page_size_mask); |
450 | unmap_low_page(pmd); | 426 | unmap_low_page(pmd); |
427 | |||
428 | spin_lock(&init_mm.page_table_lock); | ||
451 | pud_populate(&init_mm, pud, __va(pmd_phys)); | 429 | pud_populate(&init_mm, pud, __va(pmd_phys)); |
452 | spin_unlock(&init_mm.page_table_lock); | 430 | spin_unlock(&init_mm.page_table_lock); |
453 | |||
454 | } | 431 | } |
455 | __flush_tlb_all(); | 432 | __flush_tlb_all(); |
456 | update_page_count(PG_LEVEL_1G, pages); | 433 | update_page_count(PG_LEVEL_1G, pages); |
@@ -517,118 +494,6 @@ static void __init init_gbpages(void) | |||
517 | direct_gbpages = 0; | 494 | direct_gbpages = 0; |
518 | } | 495 | } |
519 | 496 | ||
520 | #ifdef CONFIG_MEMTEST | ||
521 | |||
522 | static void __init memtest(unsigned long start_phys, unsigned long size, | ||
523 | unsigned pattern) | ||
524 | { | ||
525 | unsigned long i; | ||
526 | unsigned long *start; | ||
527 | unsigned long start_bad; | ||
528 | unsigned long last_bad; | ||
529 | unsigned long val; | ||
530 | unsigned long start_phys_aligned; | ||
531 | unsigned long count; | ||
532 | unsigned long incr; | ||
533 | |||
534 | switch (pattern) { | ||
535 | case 0: | ||
536 | val = 0UL; | ||
537 | break; | ||
538 | case 1: | ||
539 | val = -1UL; | ||
540 | break; | ||
541 | case 2: | ||
542 | val = 0x5555555555555555UL; | ||
543 | break; | ||
544 | case 3: | ||
545 | val = 0xaaaaaaaaaaaaaaaaUL; | ||
546 | break; | ||
547 | default: | ||
548 | return; | ||
549 | } | ||
550 | |||
551 | incr = sizeof(unsigned long); | ||
552 | start_phys_aligned = ALIGN(start_phys, incr); | ||
553 | count = (size - (start_phys_aligned - start_phys))/incr; | ||
554 | start = __va(start_phys_aligned); | ||
555 | start_bad = 0; | ||
556 | last_bad = 0; | ||
557 | |||
558 | for (i = 0; i < count; i++) | ||
559 | start[i] = val; | ||
560 | for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { | ||
561 | if (*start != val) { | ||
562 | if (start_phys_aligned == last_bad + incr) { | ||
563 | last_bad += incr; | ||
564 | } else { | ||
565 | if (start_bad) { | ||
566 | printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved", | ||
567 | val, start_bad, last_bad + incr); | ||
568 | reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); | ||
569 | } | ||
570 | start_bad = last_bad = start_phys_aligned; | ||
571 | } | ||
572 | } | ||
573 | } | ||
574 | if (start_bad) { | ||
575 | printk(KERN_CONT "\n %016lx bad mem addr %016lx - %016lx reserved", | ||
576 | val, start_bad, last_bad + incr); | ||
577 | reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); | ||
578 | } | ||
579 | |||
580 | } | ||
581 | |||
582 | /* default is disabled */ | ||
583 | static int memtest_pattern __initdata; | ||
584 | |||
585 | static int __init parse_memtest(char *arg) | ||
586 | { | ||
587 | if (arg) | ||
588 | memtest_pattern = simple_strtoul(arg, NULL, 0); | ||
589 | return 0; | ||
590 | } | ||
591 | |||
592 | early_param("memtest", parse_memtest); | ||
593 | |||
594 | static void __init early_memtest(unsigned long start, unsigned long end) | ||
595 | { | ||
596 | u64 t_start, t_size; | ||
597 | unsigned pattern; | ||
598 | |||
599 | if (!memtest_pattern) | ||
600 | return; | ||
601 | |||
602 | printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); | ||
603 | for (pattern = 0; pattern < memtest_pattern; pattern++) { | ||
604 | t_start = start; | ||
605 | t_size = 0; | ||
606 | while (t_start < end) { | ||
607 | t_start = find_e820_area_size(t_start, &t_size, 1); | ||
608 | |||
609 | /* done ? */ | ||
610 | if (t_start >= end) | ||
611 | break; | ||
612 | if (t_start + t_size > end) | ||
613 | t_size = end - t_start; | ||
614 | |||
615 | printk(KERN_CONT "\n %016llx - %016llx pattern %d", | ||
616 | (unsigned long long)t_start, | ||
617 | (unsigned long long)t_start + t_size, pattern); | ||
618 | |||
619 | memtest(t_start, t_size, pattern); | ||
620 | |||
621 | t_start += t_size; | ||
622 | } | ||
623 | } | ||
624 | printk(KERN_CONT "\n"); | ||
625 | } | ||
626 | #else | ||
627 | static void __init early_memtest(unsigned long start, unsigned long end) | ||
628 | { | ||
629 | } | ||
630 | #endif | ||
631 | |||
632 | static unsigned long __init kernel_physical_mapping_init(unsigned long start, | 497 | static unsigned long __init kernel_physical_mapping_init(unsigned long start, |
633 | unsigned long end, | 498 | unsigned long end, |
634 | unsigned long page_size_mask) | 499 | unsigned long page_size_mask) |
@@ -654,16 +519,14 @@ static unsigned long __init kernel_physical_mapping_init(unsigned long start, | |||
654 | continue; | 519 | continue; |
655 | } | 520 | } |
656 | 521 | ||
657 | if (after_bootmem) | 522 | pud = alloc_low_page(&pud_phys); |
658 | pud = pud_offset(pgd, start & PGDIR_MASK); | ||
659 | else | ||
660 | pud = alloc_low_page(&pud_phys); | ||
661 | |||
662 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), | 523 | last_map_addr = phys_pud_init(pud, __pa(start), __pa(next), |
663 | page_size_mask); | 524 | page_size_mask); |
664 | unmap_low_page(pud); | 525 | unmap_low_page(pud); |
665 | pgd_populate(&init_mm, pgd_offset_k(start), | 526 | |
666 | __va(pud_phys)); | 527 | spin_lock(&init_mm.page_table_lock); |
528 | pgd_populate(&init_mm, pgd, __va(pud_phys)); | ||
529 | spin_unlock(&init_mm.page_table_lock); | ||
667 | } | 530 | } |
668 | 531 | ||
669 | return last_map_addr; | 532 | return last_map_addr; |
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 24c1d3c30186..d4b6e6a29ae3 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -170,7 +170,7 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, | |||
170 | phys_addr &= PAGE_MASK; | 170 | phys_addr &= PAGE_MASK; |
171 | size = PAGE_ALIGN(last_addr+1) - phys_addr; | 171 | size = PAGE_ALIGN(last_addr+1) - phys_addr; |
172 | 172 | ||
173 | retval = reserve_memtype(phys_addr, phys_addr + size, | 173 | retval = reserve_memtype(phys_addr, (u64)phys_addr + size, |
174 | prot_val, &new_prot_val); | 174 | prot_val, &new_prot_val); |
175 | if (retval) { | 175 | if (retval) { |
176 | pr_debug("Warning: reserve_memtype returned %d\n", retval); | 176 | pr_debug("Warning: reserve_memtype returned %d\n", retval); |
@@ -330,6 +330,14 @@ static void __iomem *ioremap_default(resource_size_t phys_addr, | |||
330 | return (void __iomem *)ret; | 330 | return (void __iomem *)ret; |
331 | } | 331 | } |
332 | 332 | ||
333 | void __iomem *ioremap_prot(resource_size_t phys_addr, unsigned long size, | ||
334 | unsigned long prot_val) | ||
335 | { | ||
336 | return __ioremap_caller(phys_addr, size, (prot_val & _PAGE_CACHE_MASK), | ||
337 | __builtin_return_address(0)); | ||
338 | } | ||
339 | EXPORT_SYMBOL(ioremap_prot); | ||
340 | |||
333 | /** | 341 | /** |
334 | * iounmap - Free a IO remapping | 342 | * iounmap - Free a IO remapping |
335 | * @addr: virtual address from ioremap_* | 343 | * @addr: virtual address from ioremap_* |
@@ -545,13 +553,11 @@ static int __init check_early_ioremap_leak(void) | |||
545 | { | 553 | { |
546 | if (!early_ioremap_nested) | 554 | if (!early_ioremap_nested) |
547 | return 0; | 555 | return 0; |
548 | 556 | WARN(1, KERN_WARNING | |
549 | printk(KERN_WARNING | ||
550 | "Debug warning: early ioremap leak of %d areas detected.\n", | 557 | "Debug warning: early ioremap leak of %d areas detected.\n", |
551 | early_ioremap_nested); | 558 | early_ioremap_nested); |
552 | printk(KERN_WARNING | 559 | printk(KERN_WARNING |
553 | "please boot with early_ioremap_debug and report the dmesg.\n"); | 560 | "please boot with early_ioremap_debug and report the dmesg.\n"); |
554 | WARN_ON(1); | ||
555 | 561 | ||
556 | return 1; | 562 | return 1; |
557 | } | 563 | } |
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c new file mode 100644 index 000000000000..672e17f8262a --- /dev/null +++ b/arch/x86/mm/memtest.c | |||
@@ -0,0 +1,123 @@ | |||
1 | #include <linux/kernel.h> | ||
2 | #include <linux/errno.h> | ||
3 | #include <linux/string.h> | ||
4 | #include <linux/types.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/smp.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/pfn.h> | ||
9 | |||
10 | #include <asm/e820.h> | ||
11 | |||
12 | static void __init memtest(unsigned long start_phys, unsigned long size, | ||
13 | unsigned pattern) | ||
14 | { | ||
15 | unsigned long i; | ||
16 | unsigned long *start; | ||
17 | unsigned long start_bad; | ||
18 | unsigned long last_bad; | ||
19 | unsigned long val; | ||
20 | unsigned long start_phys_aligned; | ||
21 | unsigned long count; | ||
22 | unsigned long incr; | ||
23 | |||
24 | switch (pattern) { | ||
25 | case 0: | ||
26 | val = 0UL; | ||
27 | break; | ||
28 | case 1: | ||
29 | val = -1UL; | ||
30 | break; | ||
31 | case 2: | ||
32 | #ifdef CONFIG_X86_64 | ||
33 | val = 0x5555555555555555UL; | ||
34 | #else | ||
35 | val = 0x55555555UL; | ||
36 | #endif | ||
37 | break; | ||
38 | case 3: | ||
39 | #ifdef CONFIG_X86_64 | ||
40 | val = 0xaaaaaaaaaaaaaaaaUL; | ||
41 | #else | ||
42 | val = 0xaaaaaaaaUL; | ||
43 | #endif | ||
44 | break; | ||
45 | default: | ||
46 | return; | ||
47 | } | ||
48 | |||
49 | incr = sizeof(unsigned long); | ||
50 | start_phys_aligned = ALIGN(start_phys, incr); | ||
51 | count = (size - (start_phys_aligned - start_phys))/incr; | ||
52 | start = __va(start_phys_aligned); | ||
53 | start_bad = 0; | ||
54 | last_bad = 0; | ||
55 | |||
56 | for (i = 0; i < count; i++) | ||
57 | start[i] = val; | ||
58 | for (i = 0; i < count; i++, start++, start_phys_aligned += incr) { | ||
59 | if (*start != val) { | ||
60 | if (start_phys_aligned == last_bad + incr) { | ||
61 | last_bad += incr; | ||
62 | } else { | ||
63 | if (start_bad) { | ||
64 | printk(KERN_CONT "\n %010lx bad mem addr %010lx - %010lx reserved", | ||
65 | val, start_bad, last_bad + incr); | ||
66 | reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); | ||
67 | } | ||
68 | start_bad = last_bad = start_phys_aligned; | ||
69 | } | ||
70 | } | ||
71 | } | ||
72 | if (start_bad) { | ||
73 | printk(KERN_CONT "\n %016lx bad mem addr %010lx - %010lx reserved", | ||
74 | val, start_bad, last_bad + incr); | ||
75 | reserve_early(start_bad, last_bad - start_bad, "BAD RAM"); | ||
76 | } | ||
77 | |||
78 | } | ||
79 | |||
80 | /* default is disabled */ | ||
81 | static int memtest_pattern __initdata; | ||
82 | |||
83 | static int __init parse_memtest(char *arg) | ||
84 | { | ||
85 | if (arg) | ||
86 | memtest_pattern = simple_strtoul(arg, NULL, 0); | ||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | early_param("memtest", parse_memtest); | ||
91 | |||
92 | void __init early_memtest(unsigned long start, unsigned long end) | ||
93 | { | ||
94 | u64 t_start, t_size; | ||
95 | unsigned pattern; | ||
96 | |||
97 | if (!memtest_pattern) | ||
98 | return; | ||
99 | |||
100 | printk(KERN_INFO "early_memtest: pattern num %d", memtest_pattern); | ||
101 | for (pattern = 0; pattern < memtest_pattern; pattern++) { | ||
102 | t_start = start; | ||
103 | t_size = 0; | ||
104 | while (t_start < end) { | ||
105 | t_start = find_e820_area_size(t_start, &t_size, 1); | ||
106 | |||
107 | /* done ? */ | ||
108 | if (t_start >= end) | ||
109 | break; | ||
110 | if (t_start + t_size > end) | ||
111 | t_size = end - t_start; | ||
112 | |||
113 | printk(KERN_CONT "\n %010llx - %010llx pattern %d", | ||
114 | (unsigned long long)t_start, | ||
115 | (unsigned long long)t_start + t_size, pattern); | ||
116 | |||
117 | memtest(t_start, t_size, pattern); | ||
118 | |||
119 | t_start += t_size; | ||
120 | } | ||
121 | } | ||
122 | printk(KERN_CONT "\n"); | ||
123 | } | ||
diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index e7397e108beb..635b50e85581 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c | |||
@@ -430,7 +430,9 @@ static void enter_uniprocessor(void) | |||
430 | "may miss events.\n"); | 430 | "may miss events.\n"); |
431 | } | 431 | } |
432 | 432 | ||
433 | static void leave_uniprocessor(void) | 433 | /* __ref because leave_uniprocessor calls cpu_up which is __cpuinit, |
434 | but this whole function is ifdefed CONFIG_HOTPLUG_CPU */ | ||
435 | static void __ref leave_uniprocessor(void) | ||
434 | { | 436 | { |
435 | int cpu; | 437 | int cpu; |
436 | int err; | 438 | int err; |
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index b432d5781773..a4dd793d6003 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c | |||
@@ -20,15 +20,9 @@ | |||
20 | #include <asm/acpi.h> | 20 | #include <asm/acpi.h> |
21 | #include <asm/k8.h> | 21 | #include <asm/k8.h> |
22 | 22 | ||
23 | #ifndef Dprintk | ||
24 | #define Dprintk(x...) | ||
25 | #endif | ||
26 | |||
27 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; | 23 | struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; |
28 | EXPORT_SYMBOL(node_data); | 24 | EXPORT_SYMBOL(node_data); |
29 | 25 | ||
30 | static bootmem_data_t plat_node_bdata[MAX_NUMNODES]; | ||
31 | |||
32 | struct memnode memnode; | 26 | struct memnode memnode; |
33 | 27 | ||
34 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { | 28 | s16 apicid_to_node[MAX_LOCAL_APIC] __cpuinitdata = { |
@@ -202,7 +196,7 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, | |||
202 | nodedata_phys + pgdat_size - 1); | 196 | nodedata_phys + pgdat_size - 1); |
203 | 197 | ||
204 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); | 198 | memset(NODE_DATA(nodeid), 0, sizeof(pg_data_t)); |
205 | NODE_DATA(nodeid)->bdata = &plat_node_bdata[nodeid]; | 199 | NODE_DATA(nodeid)->bdata = &bootmem_node_data[nodeid]; |
206 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; | 200 | NODE_DATA(nodeid)->node_start_pfn = start_pfn; |
207 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; | 201 | NODE_DATA(nodeid)->node_spanned_pages = last_pfn - start_pfn; |
208 | 202 | ||
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 0dcd42eb94e6..d4aa503caaa2 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c | |||
@@ -221,8 +221,7 @@ static int pageattr_test(void) | |||
221 | failed += print_split(&sc); | 221 | failed += print_split(&sc); |
222 | 222 | ||
223 | if (failed) { | 223 | if (failed) { |
224 | printk(KERN_ERR "NOT PASSED. Please report.\n"); | 224 | WARN(1, KERN_ERR "NOT PASSED. Please report.\n"); |
225 | WARN_ON(1); | ||
226 | return -EINVAL; | 225 | return -EINVAL; |
227 | } else { | 226 | } else { |
228 | if (print) | 227 | if (print) |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 65c6e46bf059..43e2f8483e4f 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -55,13 +55,19 @@ static void split_page_count(int level) | |||
55 | 55 | ||
56 | int arch_report_meminfo(char *page) | 56 | int arch_report_meminfo(char *page) |
57 | { | 57 | { |
58 | int n = sprintf(page, "DirectMap4k: %8lu\n" | 58 | int n = sprintf(page, "DirectMap4k: %8lu kB\n", |
59 | "DirectMap2M: %8lu\n", | 59 | direct_pages_count[PG_LEVEL_4K] << 2); |
60 | direct_pages_count[PG_LEVEL_4K], | 60 | #if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE) |
61 | direct_pages_count[PG_LEVEL_2M]); | 61 | n += sprintf(page + n, "DirectMap2M: %8lu kB\n", |
62 | direct_pages_count[PG_LEVEL_2M] << 11); | ||
63 | #else | ||
64 | n += sprintf(page + n, "DirectMap4M: %8lu kB\n", | ||
65 | direct_pages_count[PG_LEVEL_2M] << 12); | ||
66 | #endif | ||
62 | #ifdef CONFIG_X86_64 | 67 | #ifdef CONFIG_X86_64 |
63 | n += sprintf(page + n, "DirectMap1G: %8lu\n", | 68 | if (direct_gbpages) |
64 | direct_pages_count[PG_LEVEL_1G]); | 69 | n += sprintf(page + n, "DirectMap1G: %8lu kB\n", |
70 | direct_pages_count[PG_LEVEL_1G] << 20); | ||
65 | #endif | 71 | #endif |
66 | return n; | 72 | return n; |
67 | } | 73 | } |
@@ -592,10 +598,9 @@ repeat: | |||
592 | if (!pte_val(old_pte)) { | 598 | if (!pte_val(old_pte)) { |
593 | if (!primary) | 599 | if (!primary) |
594 | return 0; | 600 | return 0; |
595 | printk(KERN_WARNING "CPA: called for zero pte. " | 601 | WARN(1, KERN_WARNING "CPA: called for zero pte. " |
596 | "vaddr = %lx cpa->vaddr = %lx\n", address, | 602 | "vaddr = %lx cpa->vaddr = %lx\n", address, |
597 | cpa->vaddr); | 603 | cpa->vaddr); |
598 | WARN_ON(1); | ||
599 | return -EINVAL; | 604 | return -EINVAL; |
600 | } | 605 | } |
601 | 606 | ||
@@ -844,7 +849,7 @@ int set_memory_uc(unsigned long addr, int numpages) | |||
844 | /* | 849 | /* |
845 | * for now UC MINUS. see comments in ioremap_nocache() | 850 | * for now UC MINUS. see comments in ioremap_nocache() |
846 | */ | 851 | */ |
847 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 852 | if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, |
848 | _PAGE_CACHE_UC_MINUS, NULL)) | 853 | _PAGE_CACHE_UC_MINUS, NULL)) |
849 | return -EINVAL; | 854 | return -EINVAL; |
850 | 855 | ||
@@ -863,7 +868,7 @@ int set_memory_wc(unsigned long addr, int numpages) | |||
863 | if (!pat_enabled) | 868 | if (!pat_enabled) |
864 | return set_memory_uc(addr, numpages); | 869 | return set_memory_uc(addr, numpages); |
865 | 870 | ||
866 | if (reserve_memtype(addr, addr + numpages * PAGE_SIZE, | 871 | if (reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, |
867 | _PAGE_CACHE_WC, NULL)) | 872 | _PAGE_CACHE_WC, NULL)) |
868 | return -EINVAL; | 873 | return -EINVAL; |
869 | 874 | ||
@@ -879,7 +884,7 @@ int _set_memory_wb(unsigned long addr, int numpages) | |||
879 | 884 | ||
880 | int set_memory_wb(unsigned long addr, int numpages) | 885 | int set_memory_wb(unsigned long addr, int numpages) |
881 | { | 886 | { |
882 | free_memtype(addr, addr + numpages * PAGE_SIZE); | 887 | free_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE); |
883 | 888 | ||
884 | return _set_memory_wb(addr, numpages); | 889 | return _set_memory_wb(addr, numpages); |
885 | } | 890 | } |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d4585077977a..2a50e0fa64a5 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c | |||
@@ -12,6 +12,8 @@ | |||
12 | #include <linux/gfp.h> | 12 | #include <linux/gfp.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/bootmem.h> | 14 | #include <linux/bootmem.h> |
15 | #include <linux/debugfs.h> | ||
16 | #include <linux/seq_file.h> | ||
15 | 17 | ||
16 | #include <asm/msr.h> | 18 | #include <asm/msr.h> |
17 | #include <asm/tlbflush.h> | 19 | #include <asm/tlbflush.h> |
@@ -205,6 +207,9 @@ static int chk_conflict(struct memtype *new, struct memtype *entry, | |||
205 | return -EBUSY; | 207 | return -EBUSY; |
206 | } | 208 | } |
207 | 209 | ||
210 | static struct memtype *cached_entry; | ||
211 | static u64 cached_start; | ||
212 | |||
208 | /* | 213 | /* |
209 | * req_type typically has one of the: | 214 | * req_type typically has one of the: |
210 | * - _PAGE_CACHE_WB | 215 | * - _PAGE_CACHE_WB |
@@ -278,11 +283,17 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
278 | 283 | ||
279 | spin_lock(&memtype_lock); | 284 | spin_lock(&memtype_lock); |
280 | 285 | ||
286 | if (cached_entry && start >= cached_start) | ||
287 | entry = cached_entry; | ||
288 | else | ||
289 | entry = list_entry(&memtype_list, struct memtype, nd); | ||
290 | |||
281 | /* Search for existing mapping that overlaps the current range */ | 291 | /* Search for existing mapping that overlaps the current range */ |
282 | where = NULL; | 292 | where = NULL; |
283 | list_for_each_entry(entry, &memtype_list, nd) { | 293 | list_for_each_entry_continue(entry, &memtype_list, nd) { |
284 | if (end <= entry->start) { | 294 | if (end <= entry->start) { |
285 | where = entry->nd.prev; | 295 | where = entry->nd.prev; |
296 | cached_entry = list_entry(where, struct memtype, nd); | ||
286 | break; | 297 | break; |
287 | } else if (start <= entry->start) { /* end > entry->start */ | 298 | } else if (start <= entry->start) { /* end > entry->start */ |
288 | err = chk_conflict(new, entry, new_type); | 299 | err = chk_conflict(new, entry, new_type); |
@@ -290,6 +301,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
290 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 301 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
291 | entry->start, entry->end); | 302 | entry->start, entry->end); |
292 | where = entry->nd.prev; | 303 | where = entry->nd.prev; |
304 | cached_entry = list_entry(where, | ||
305 | struct memtype, nd); | ||
293 | } | 306 | } |
294 | break; | 307 | break; |
295 | } else if (start < entry->end) { /* start > entry->start */ | 308 | } else if (start < entry->end) { /* start > entry->start */ |
@@ -297,7 +310,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
297 | if (!err) { | 310 | if (!err) { |
298 | dprintk("Overlap at 0x%Lx-0x%Lx\n", | 311 | dprintk("Overlap at 0x%Lx-0x%Lx\n", |
299 | entry->start, entry->end); | 312 | entry->start, entry->end); |
300 | where = &entry->nd; | 313 | cached_entry = list_entry(entry->nd.prev, |
314 | struct memtype, nd); | ||
315 | |||
316 | /* | ||
317 | * Move to right position in the linked | ||
318 | * list to add this new entry | ||
319 | */ | ||
320 | list_for_each_entry_continue(entry, | ||
321 | &memtype_list, nd) { | ||
322 | if (start <= entry->start) { | ||
323 | where = entry->nd.prev; | ||
324 | break; | ||
325 | } | ||
326 | } | ||
301 | } | 327 | } |
302 | break; | 328 | break; |
303 | } | 329 | } |
@@ -312,6 +338,8 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type, | |||
312 | return err; | 338 | return err; |
313 | } | 339 | } |
314 | 340 | ||
341 | cached_start = start; | ||
342 | |||
315 | if (where) | 343 | if (where) |
316 | list_add(&new->nd, where); | 344 | list_add(&new->nd, where); |
317 | else | 345 | else |
@@ -341,6 +369,9 @@ int free_memtype(u64 start, u64 end) | |||
341 | spin_lock(&memtype_lock); | 369 | spin_lock(&memtype_lock); |
342 | list_for_each_entry(entry, &memtype_list, nd) { | 370 | list_for_each_entry(entry, &memtype_list, nd) { |
343 | if (entry->start == start && entry->end == end) { | 371 | if (entry->start == start && entry->end == end) { |
372 | if (cached_entry == entry || cached_start == start) | ||
373 | cached_entry = NULL; | ||
374 | |||
344 | list_del(&entry->nd); | 375 | list_del(&entry->nd); |
345 | kfree(entry); | 376 | kfree(entry); |
346 | err = 0; | 377 | err = 0; |
@@ -359,22 +390,14 @@ int free_memtype(u64 start, u64 end) | |||
359 | } | 390 | } |
360 | 391 | ||
361 | 392 | ||
362 | /* | ||
363 | * /dev/mem mmap interface. The memtype used for mapping varies: | ||
364 | * - Use UC for mappings with O_SYNC flag | ||
365 | * - Without O_SYNC flag, if there is any conflict in reserve_memtype, | ||
366 | * inherit the memtype from existing mapping. | ||
367 | * - Else use UC_MINUS memtype (for backward compatibility with existing | ||
368 | * X drivers. | ||
369 | */ | ||
370 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, | 393 | pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, |
371 | unsigned long size, pgprot_t vma_prot) | 394 | unsigned long size, pgprot_t vma_prot) |
372 | { | 395 | { |
373 | return vma_prot; | 396 | return vma_prot; |
374 | } | 397 | } |
375 | 398 | ||
376 | #ifdef CONFIG_NONPROMISC_DEVMEM | 399 | #ifdef CONFIG_STRICT_DEVMEM |
377 | /* This check is done in drivers/char/mem.c in case of NONPROMISC_DEVMEM*/ | 400 | /* This check is done in drivers/char/mem.c in case of STRICT_DEVMEM*/ |
378 | static inline int range_is_allowed(unsigned long pfn, unsigned long size) | 401 | static inline int range_is_allowed(unsigned long pfn, unsigned long size) |
379 | { | 402 | { |
380 | return 1; | 403 | return 1; |
@@ -398,20 +421,20 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size) | |||
398 | } | 421 | } |
399 | return 1; | 422 | return 1; |
400 | } | 423 | } |
401 | #endif /* CONFIG_NONPROMISC_DEVMEM */ | 424 | #endif /* CONFIG_STRICT_DEVMEM */ |
402 | 425 | ||
403 | int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | 426 | int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, |
404 | unsigned long size, pgprot_t *vma_prot) | 427 | unsigned long size, pgprot_t *vma_prot) |
405 | { | 428 | { |
406 | u64 offset = ((u64) pfn) << PAGE_SHIFT; | 429 | u64 offset = ((u64) pfn) << PAGE_SHIFT; |
407 | unsigned long flags = _PAGE_CACHE_UC_MINUS; | 430 | unsigned long flags = -1; |
408 | int retval; | 431 | int retval; |
409 | 432 | ||
410 | if (!range_is_allowed(pfn, size)) | 433 | if (!range_is_allowed(pfn, size)) |
411 | return 0; | 434 | return 0; |
412 | 435 | ||
413 | if (file->f_flags & O_SYNC) { | 436 | if (file->f_flags & O_SYNC) { |
414 | flags = _PAGE_CACHE_UC; | 437 | flags = _PAGE_CACHE_UC_MINUS; |
415 | } | 438 | } |
416 | 439 | ||
417 | #ifdef CONFIG_X86_32 | 440 | #ifdef CONFIG_X86_32 |
@@ -434,13 +457,14 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, | |||
434 | #endif | 457 | #endif |
435 | 458 | ||
436 | /* | 459 | /* |
437 | * With O_SYNC, we can only take UC mapping. Fail if we cannot. | 460 | * With O_SYNC, we can only take UC_MINUS mapping. Fail if we cannot. |
461 | * | ||
438 | * Without O_SYNC, we want to get | 462 | * Without O_SYNC, we want to get |
439 | * - WB for WB-able memory and no other conflicting mappings | 463 | * - WB for WB-able memory and no other conflicting mappings |
440 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings | 464 | * - UC_MINUS for non-WB-able memory with no other conflicting mappings |
441 | * - Inherit from confliting mappings otherwise | 465 | * - Inherit from confliting mappings otherwise |
442 | */ | 466 | */ |
443 | if (flags != _PAGE_CACHE_UC_MINUS) { | 467 | if (flags != -1) { |
444 | retval = reserve_memtype(offset, offset + size, flags, NULL); | 468 | retval = reserve_memtype(offset, offset + size, flags, NULL); |
445 | } else { | 469 | } else { |
446 | retval = reserve_memtype(offset, offset + size, -1, &flags); | 470 | retval = reserve_memtype(offset, offset + size, -1, &flags); |
@@ -489,3 +513,89 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot) | |||
489 | 513 | ||
490 | free_memtype(addr, addr + size); | 514 | free_memtype(addr, addr + size); |
491 | } | 515 | } |
516 | |||
517 | #if defined(CONFIG_DEBUG_FS) | ||
518 | |||
519 | /* get Nth element of the linked list */ | ||
520 | static struct memtype *memtype_get_idx(loff_t pos) | ||
521 | { | ||
522 | struct memtype *list_node, *print_entry; | ||
523 | int i = 1; | ||
524 | |||
525 | print_entry = kmalloc(sizeof(struct memtype), GFP_KERNEL); | ||
526 | if (!print_entry) | ||
527 | return NULL; | ||
528 | |||
529 | spin_lock(&memtype_lock); | ||
530 | list_for_each_entry(list_node, &memtype_list, nd) { | ||
531 | if (pos == i) { | ||
532 | *print_entry = *list_node; | ||
533 | spin_unlock(&memtype_lock); | ||
534 | return print_entry; | ||
535 | } | ||
536 | ++i; | ||
537 | } | ||
538 | spin_unlock(&memtype_lock); | ||
539 | kfree(print_entry); | ||
540 | return NULL; | ||
541 | } | ||
542 | |||
543 | static void *memtype_seq_start(struct seq_file *seq, loff_t *pos) | ||
544 | { | ||
545 | if (*pos == 0) { | ||
546 | ++*pos; | ||
547 | seq_printf(seq, "PAT memtype list:\n"); | ||
548 | } | ||
549 | |||
550 | return memtype_get_idx(*pos); | ||
551 | } | ||
552 | |||
553 | static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos) | ||
554 | { | ||
555 | ++*pos; | ||
556 | return memtype_get_idx(*pos); | ||
557 | } | ||
558 | |||
559 | static void memtype_seq_stop(struct seq_file *seq, void *v) | ||
560 | { | ||
561 | } | ||
562 | |||
563 | static int memtype_seq_show(struct seq_file *seq, void *v) | ||
564 | { | ||
565 | struct memtype *print_entry = (struct memtype *)v; | ||
566 | |||
567 | seq_printf(seq, "%s @ 0x%Lx-0x%Lx\n", cattr_name(print_entry->type), | ||
568 | print_entry->start, print_entry->end); | ||
569 | kfree(print_entry); | ||
570 | return 0; | ||
571 | } | ||
572 | |||
573 | static struct seq_operations memtype_seq_ops = { | ||
574 | .start = memtype_seq_start, | ||
575 | .next = memtype_seq_next, | ||
576 | .stop = memtype_seq_stop, | ||
577 | .show = memtype_seq_show, | ||
578 | }; | ||
579 | |||
580 | static int memtype_seq_open(struct inode *inode, struct file *file) | ||
581 | { | ||
582 | return seq_open(file, &memtype_seq_ops); | ||
583 | } | ||
584 | |||
585 | static const struct file_operations memtype_fops = { | ||
586 | .open = memtype_seq_open, | ||
587 | .read = seq_read, | ||
588 | .llseek = seq_lseek, | ||
589 | .release = seq_release, | ||
590 | }; | ||
591 | |||
592 | static int __init pat_memtype_list_init(void) | ||
593 | { | ||
594 | debugfs_create_file("pat_memtype_list", S_IRUSR, arch_debugfs_dir, | ||
595 | NULL, &memtype_fops); | ||
596 | return 0; | ||
597 | } | ||
598 | |||
599 | late_initcall(pat_memtype_list_init); | ||
600 | |||
601 | #endif /* CONFIG_DEBUG_FS */ | ||
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 557b2abceef8..d50302774fe2 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c | |||
@@ -207,6 +207,9 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) | |||
207 | unsigned long addr; | 207 | unsigned long addr; |
208 | int i; | 208 | int i; |
209 | 209 | ||
210 | if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ | ||
211 | return; | ||
212 | |||
210 | pud = pud_offset(pgd, 0); | 213 | pud = pud_offset(pgd, 0); |
211 | 214 | ||
212 | for (addr = i = 0; i < PREALLOCATED_PMDS; | 215 | for (addr = i = 0; i < PREALLOCATED_PMDS; |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index b4becbf8c570..cab0abbd1ebe 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -20,53 +20,6 @@ | |||
20 | #include <asm/tlb.h> | 20 | #include <asm/tlb.h> |
21 | #include <asm/tlbflush.h> | 21 | #include <asm/tlbflush.h> |
22 | 22 | ||
23 | void show_mem(void) | ||
24 | { | ||
25 | int total = 0, reserved = 0; | ||
26 | int shared = 0, cached = 0; | ||
27 | int highmem = 0; | ||
28 | struct page *page; | ||
29 | pg_data_t *pgdat; | ||
30 | unsigned long i; | ||
31 | unsigned long flags; | ||
32 | |||
33 | printk(KERN_INFO "Mem-info:\n"); | ||
34 | show_free_areas(); | ||
35 | for_each_online_pgdat(pgdat) { | ||
36 | pgdat_resize_lock(pgdat, &flags); | ||
37 | for (i = 0; i < pgdat->node_spanned_pages; ++i) { | ||
38 | if (unlikely(i % MAX_ORDER_NR_PAGES == 0)) | ||
39 | touch_nmi_watchdog(); | ||
40 | page = pgdat_page_nr(pgdat, i); | ||
41 | total++; | ||
42 | if (PageHighMem(page)) | ||
43 | highmem++; | ||
44 | if (PageReserved(page)) | ||
45 | reserved++; | ||
46 | else if (PageSwapCache(page)) | ||
47 | cached++; | ||
48 | else if (page_count(page)) | ||
49 | shared += page_count(page) - 1; | ||
50 | } | ||
51 | pgdat_resize_unlock(pgdat, &flags); | ||
52 | } | ||
53 | printk(KERN_INFO "%d pages of RAM\n", total); | ||
54 | printk(KERN_INFO "%d pages of HIGHMEM\n", highmem); | ||
55 | printk(KERN_INFO "%d reserved pages\n", reserved); | ||
56 | printk(KERN_INFO "%d pages shared\n", shared); | ||
57 | printk(KERN_INFO "%d pages swap cached\n", cached); | ||
58 | |||
59 | printk(KERN_INFO "%lu pages dirty\n", global_page_state(NR_FILE_DIRTY)); | ||
60 | printk(KERN_INFO "%lu pages writeback\n", | ||
61 | global_page_state(NR_WRITEBACK)); | ||
62 | printk(KERN_INFO "%lu pages mapped\n", global_page_state(NR_FILE_MAPPED)); | ||
63 | printk(KERN_INFO "%lu pages slab\n", | ||
64 | global_page_state(NR_SLAB_RECLAIMABLE) + | ||
65 | global_page_state(NR_SLAB_UNRECLAIMABLE)); | ||
66 | printk(KERN_INFO "%lu pages pagetables\n", | ||
67 | global_page_state(NR_PAGETABLE)); | ||
68 | } | ||
69 | |||
70 | /* | 23 | /* |
71 | * Associate a virtual page frame with a given physical page frame | 24 | * Associate a virtual page frame with a given physical page frame |
72 | * and protection flags for that frame. | 25 | * and protection flags for that frame. |
diff --git a/arch/x86/mm/srat_32.c b/arch/x86/mm/srat_32.c index 1eb2973a301c..16ae70fc57e7 100644 --- a/arch/x86/mm/srat_32.c +++ b/arch/x86/mm/srat_32.c | |||
@@ -178,7 +178,7 @@ void acpi_numa_arch_fixup(void) | |||
178 | * start of the node, and that the current "end" address is after | 178 | * start of the node, and that the current "end" address is after |
179 | * the previous one. | 179 | * the previous one. |
180 | */ | 180 | */ |
181 | static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) | 181 | static __init int node_read_chunk(int nid, struct node_memory_chunk_s *memory_chunk) |
182 | { | 182 | { |
183 | /* | 183 | /* |
184 | * Only add present memory as told by the e820. | 184 | * Only add present memory as told by the e820. |
@@ -189,10 +189,10 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c | |||
189 | if (memory_chunk->start_pfn >= max_pfn) { | 189 | if (memory_chunk->start_pfn >= max_pfn) { |
190 | printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", | 190 | printk(KERN_INFO "Ignoring SRAT pfns: %08lx - %08lx\n", |
191 | memory_chunk->start_pfn, memory_chunk->end_pfn); | 191 | memory_chunk->start_pfn, memory_chunk->end_pfn); |
192 | return; | 192 | return -1; |
193 | } | 193 | } |
194 | if (memory_chunk->nid != nid) | 194 | if (memory_chunk->nid != nid) |
195 | return; | 195 | return -1; |
196 | 196 | ||
197 | if (!node_has_online_mem(nid)) | 197 | if (!node_has_online_mem(nid)) |
198 | node_start_pfn[nid] = memory_chunk->start_pfn; | 198 | node_start_pfn[nid] = memory_chunk->start_pfn; |
@@ -202,6 +202,8 @@ static __init void node_read_chunk(int nid, struct node_memory_chunk_s *memory_c | |||
202 | 202 | ||
203 | if (node_end_pfn[nid] < memory_chunk->end_pfn) | 203 | if (node_end_pfn[nid] < memory_chunk->end_pfn) |
204 | node_end_pfn[nid] = memory_chunk->end_pfn; | 204 | node_end_pfn[nid] = memory_chunk->end_pfn; |
205 | |||
206 | return 0; | ||
205 | } | 207 | } |
206 | 208 | ||
207 | int __init get_memcfg_from_srat(void) | 209 | int __init get_memcfg_from_srat(void) |
@@ -259,7 +261,9 @@ int __init get_memcfg_from_srat(void) | |||
259 | printk(KERN_DEBUG | 261 | printk(KERN_DEBUG |
260 | "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", | 262 | "chunk %d nid %d start_pfn %08lx end_pfn %08lx\n", |
261 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); | 263 | j, chunk->nid, chunk->start_pfn, chunk->end_pfn); |
262 | node_read_chunk(chunk->nid, chunk); | 264 | if (node_read_chunk(chunk->nid, chunk)) |
265 | continue; | ||
266 | |||
263 | e820_register_active_regions(chunk->nid, chunk->start_pfn, | 267 | e820_register_active_regions(chunk->nid, chunk->start_pfn, |
264 | min(chunk->end_pfn, max_pfn)); | 268 | min(chunk->end_pfn, max_pfn)); |
265 | } | 269 | } |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 7f3329b55d2e..8a5f1614a3d5 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | #include <linux/moduleparam.h> | 16 | #include <linux/moduleparam.h> |
17 | #include <linux/kdebug.h> | 17 | #include <linux/kdebug.h> |
18 | #include <linux/cpu.h> | ||
18 | #include <asm/nmi.h> | 19 | #include <asm/nmi.h> |
19 | #include <asm/msr.h> | 20 | #include <asm/msr.h> |
20 | #include <asm/apic.h> | 21 | #include <asm/apic.h> |
@@ -28,23 +29,48 @@ static DEFINE_PER_CPU(unsigned long, saved_lvtpc); | |||
28 | 29 | ||
29 | static int nmi_start(void); | 30 | static int nmi_start(void); |
30 | static void nmi_stop(void); | 31 | static void nmi_stop(void); |
32 | static void nmi_cpu_start(void *dummy); | ||
33 | static void nmi_cpu_stop(void *dummy); | ||
31 | 34 | ||
32 | /* 0 == registered but off, 1 == registered and on */ | 35 | /* 0 == registered but off, 1 == registered and on */ |
33 | static int nmi_enabled = 0; | 36 | static int nmi_enabled = 0; |
34 | 37 | ||
38 | #ifdef CONFIG_SMP | ||
39 | static int oprofile_cpu_notifier(struct notifier_block *b, unsigned long action, | ||
40 | void *data) | ||
41 | { | ||
42 | int cpu = (unsigned long)data; | ||
43 | switch (action) { | ||
44 | case CPU_DOWN_FAILED: | ||
45 | case CPU_ONLINE: | ||
46 | smp_call_function_single(cpu, nmi_cpu_start, NULL, 0); | ||
47 | break; | ||
48 | case CPU_DOWN_PREPARE: | ||
49 | smp_call_function_single(cpu, nmi_cpu_stop, NULL, 1); | ||
50 | break; | ||
51 | } | ||
52 | return NOTIFY_DONE; | ||
53 | } | ||
54 | |||
55 | static struct notifier_block oprofile_cpu_nb = { | ||
56 | .notifier_call = oprofile_cpu_notifier | ||
57 | }; | ||
58 | #endif | ||
59 | |||
35 | #ifdef CONFIG_PM | 60 | #ifdef CONFIG_PM |
36 | 61 | ||
37 | static int nmi_suspend(struct sys_device *dev, pm_message_t state) | 62 | static int nmi_suspend(struct sys_device *dev, pm_message_t state) |
38 | { | 63 | { |
64 | /* Only one CPU left, just stop that one */ | ||
39 | if (nmi_enabled == 1) | 65 | if (nmi_enabled == 1) |
40 | nmi_stop(); | 66 | nmi_cpu_stop(NULL); |
41 | return 0; | 67 | return 0; |
42 | } | 68 | } |
43 | 69 | ||
44 | static int nmi_resume(struct sys_device *dev) | 70 | static int nmi_resume(struct sys_device *dev) |
45 | { | 71 | { |
46 | if (nmi_enabled == 1) | 72 | if (nmi_enabled == 1) |
47 | nmi_start(); | 73 | nmi_cpu_start(NULL); |
48 | return 0; | 74 | return 0; |
49 | } | 75 | } |
50 | 76 | ||
@@ -269,10 +295,12 @@ static void nmi_cpu_shutdown(void *dummy) | |||
269 | 295 | ||
270 | static void nmi_shutdown(void) | 296 | static void nmi_shutdown(void) |
271 | { | 297 | { |
272 | struct op_msrs *msrs = &get_cpu_var(cpu_msrs); | 298 | struct op_msrs *msrs; |
299 | |||
273 | nmi_enabled = 0; | 300 | nmi_enabled = 0; |
274 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | 301 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); |
275 | unregister_die_notifier(&profile_exceptions_nb); | 302 | unregister_die_notifier(&profile_exceptions_nb); |
303 | msrs = &get_cpu_var(cpu_msrs); | ||
276 | model->shutdown(msrs); | 304 | model->shutdown(msrs); |
277 | free_msrs(); | 305 | free_msrs(); |
278 | put_cpu_var(cpu_msrs); | 306 | put_cpu_var(cpu_msrs); |
@@ -369,20 +397,34 @@ static int __init ppro_init(char **cpu_type) | |||
369 | { | 397 | { |
370 | __u8 cpu_model = boot_cpu_data.x86_model; | 398 | __u8 cpu_model = boot_cpu_data.x86_model; |
371 | 399 | ||
372 | if (cpu_model == 14) | 400 | switch (cpu_model) { |
401 | case 0 ... 2: | ||
402 | *cpu_type = "i386/ppro"; | ||
403 | break; | ||
404 | case 3 ... 5: | ||
405 | *cpu_type = "i386/pii"; | ||
406 | break; | ||
407 | case 6 ... 8: | ||
408 | *cpu_type = "i386/piii"; | ||
409 | break; | ||
410 | case 9: | ||
411 | *cpu_type = "i386/p6_mobile"; | ||
412 | break; | ||
413 | case 10 ... 13: | ||
414 | *cpu_type = "i386/p6"; | ||
415 | break; | ||
416 | case 14: | ||
373 | *cpu_type = "i386/core"; | 417 | *cpu_type = "i386/core"; |
374 | else if (cpu_model == 15 || cpu_model == 23) | 418 | break; |
419 | case 15: case 23: | ||
375 | *cpu_type = "i386/core_2"; | 420 | *cpu_type = "i386/core_2"; |
376 | else if (cpu_model > 0xd) | 421 | break; |
422 | case 26: | ||
423 | *cpu_type = "i386/core_2"; | ||
424 | break; | ||
425 | default: | ||
426 | /* Unknown */ | ||
377 | return 0; | 427 | return 0; |
378 | else if (cpu_model == 9) { | ||
379 | *cpu_type = "i386/p6_mobile"; | ||
380 | } else if (cpu_model > 5) { | ||
381 | *cpu_type = "i386/piii"; | ||
382 | } else if (cpu_model > 2) { | ||
383 | *cpu_type = "i386/pii"; | ||
384 | } else { | ||
385 | *cpu_type = "i386/ppro"; | ||
386 | } | 428 | } |
387 | 429 | ||
388 | model = &op_ppro_spec; | 430 | model = &op_ppro_spec; |
@@ -449,6 +491,9 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
449 | } | 491 | } |
450 | 492 | ||
451 | init_sysfs(); | 493 | init_sysfs(); |
494 | #ifdef CONFIG_SMP | ||
495 | register_cpu_notifier(&oprofile_cpu_nb); | ||
496 | #endif | ||
452 | using_nmi = 1; | 497 | using_nmi = 1; |
453 | ops->create_files = nmi_create_files; | 498 | ops->create_files = nmi_create_files; |
454 | ops->setup = nmi_setup; | 499 | ops->setup = nmi_setup; |
@@ -462,6 +507,10 @@ int __init op_nmi_init(struct oprofile_operations *ops) | |||
462 | 507 | ||
463 | void op_nmi_exit(void) | 508 | void op_nmi_exit(void) |
464 | { | 509 | { |
465 | if (using_nmi) | 510 | if (using_nmi) { |
466 | exit_sysfs(); | 511 | exit_sysfs(); |
512 | #ifdef CONFIG_SMP | ||
513 | unregister_cpu_notifier(&oprofile_cpu_nb); | ||
514 | #endif | ||
515 | } | ||
467 | } | 516 | } |
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index e515e8db842a..d49202e740ea 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -5,13 +5,13 @@ obj-$(CONFIG_PCI_MMCONFIG) += mmconfig_$(BITS).o direct.o mmconfig-shared.o | |||
5 | obj-$(CONFIG_PCI_DIRECT) += direct.o | 5 | obj-$(CONFIG_PCI_DIRECT) += direct.o |
6 | obj-$(CONFIG_PCI_OLPC) += olpc.o | 6 | obj-$(CONFIG_PCI_OLPC) += olpc.o |
7 | 7 | ||
8 | pci-y := fixup.o | 8 | obj-y += fixup.o |
9 | pci-$(CONFIG_ACPI) += acpi.o | 9 | obj-$(CONFIG_ACPI) += acpi.o |
10 | pci-y += legacy.o irq.o | 10 | obj-y += legacy.o irq.o |
11 | 11 | ||
12 | pci-$(CONFIG_X86_VISWS) += visws.o | 12 | obj-$(CONFIG_X86_VISWS) += visws.o |
13 | 13 | ||
14 | pci-$(CONFIG_X86_NUMAQ) += numa.o | 14 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o |
15 | 15 | ||
16 | obj-y += $(pci-y) common.o early.o | 16 | obj-y += common.o early.o |
17 | obj-y += amd_bus.o | 17 | obj-y += amd_bus.o |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index dbf532369711..6a0fca78c362 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -1,6 +1,7 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/pci.h> | 2 | #include <linux/pci.h> |
3 | #include <linux/topology.h> | 3 | #include <linux/topology.h> |
4 | #include <linux/cpu.h> | ||
4 | #include "pci.h" | 5 | #include "pci.h" |
5 | 6 | ||
6 | #ifdef CONFIG_X86_64 | 7 | #ifdef CONFIG_X86_64 |
@@ -555,15 +556,17 @@ static int __init early_fill_mp_bus_info(void) | |||
555 | return 0; | 556 | return 0; |
556 | } | 557 | } |
557 | 558 | ||
558 | postcore_initcall(early_fill_mp_bus_info); | 559 | #else /* !CONFIG_X86_64 */ |
559 | 560 | ||
560 | #endif | 561 | static int __init early_fill_mp_bus_info(void) { return 0; } |
562 | |||
563 | #endif /* !CONFIG_X86_64 */ | ||
561 | 564 | ||
562 | /* common 32/64 bit code */ | 565 | /* common 32/64 bit code */ |
563 | 566 | ||
564 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) | 567 | #define ENABLE_CF8_EXT_CFG (1ULL << 46) |
565 | 568 | ||
566 | static void enable_pci_io_ecs_per_cpu(void *unused) | 569 | static void enable_pci_io_ecs(void *unused) |
567 | { | 570 | { |
568 | u64 reg; | 571 | u64 reg; |
569 | rdmsrl(MSR_AMD64_NB_CFG, reg); | 572 | rdmsrl(MSR_AMD64_NB_CFG, reg); |
@@ -573,14 +576,51 @@ static void enable_pci_io_ecs_per_cpu(void *unused) | |||
573 | } | 576 | } |
574 | } | 577 | } |
575 | 578 | ||
576 | static int __init enable_pci_io_ecs(void) | 579 | static int __cpuinit amd_cpu_notify(struct notifier_block *self, |
580 | unsigned long action, void *hcpu) | ||
577 | { | 581 | { |
582 | int cpu = (long)hcpu; | ||
583 | switch(action) { | ||
584 | case CPU_ONLINE: | ||
585 | case CPU_ONLINE_FROZEN: | ||
586 | smp_call_function_single(cpu, enable_pci_io_ecs, NULL, 0); | ||
587 | break; | ||
588 | default: | ||
589 | break; | ||
590 | } | ||
591 | return NOTIFY_OK; | ||
592 | } | ||
593 | |||
594 | static struct notifier_block __cpuinitdata amd_cpu_notifier = { | ||
595 | .notifier_call = amd_cpu_notify, | ||
596 | }; | ||
597 | |||
598 | static int __init pci_io_ecs_init(void) | ||
599 | { | ||
600 | int cpu; | ||
601 | |||
578 | /* assume all cpus from fam10h have IO ECS */ | 602 | /* assume all cpus from fam10h have IO ECS */ |
579 | if (boot_cpu_data.x86 < 0x10) | 603 | if (boot_cpu_data.x86 < 0x10) |
580 | return 0; | 604 | return 0; |
581 | on_each_cpu(enable_pci_io_ecs_per_cpu, NULL, 1); | 605 | |
606 | register_cpu_notifier(&amd_cpu_notifier); | ||
607 | for_each_online_cpu(cpu) | ||
608 | amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, | ||
609 | (void *)(long)cpu); | ||
582 | pci_probe |= PCI_HAS_IO_ECS; | 610 | pci_probe |= PCI_HAS_IO_ECS; |
611 | |||
612 | return 0; | ||
613 | } | ||
614 | |||
615 | static int __init amd_postcore_init(void) | ||
616 | { | ||
617 | if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) | ||
618 | return 0; | ||
619 | |||
620 | early_fill_mp_bus_info(); | ||
621 | pci_io_ecs_init(); | ||
622 | |||
583 | return 0; | 623 | return 0; |
584 | } | 624 | } |
585 | 625 | ||
586 | postcore_initcall(enable_pci_io_ecs); | 626 | postcore_initcall(amd_postcore_init); |
diff --git a/arch/x86/pci/early.c b/arch/x86/pci/early.c index 858dbe3399f9..86631ccbc25a 100644 --- a/arch/x86/pci/early.c +++ b/arch/x86/pci/early.c | |||
@@ -7,15 +7,13 @@ | |||
7 | /* Direct PCI access. This is used for PCI accesses in early boot before | 7 | /* Direct PCI access. This is used for PCI accesses in early boot before |
8 | the PCI subsystem works. */ | 8 | the PCI subsystem works. */ |
9 | 9 | ||
10 | #define PDprintk(x...) | ||
11 | |||
12 | u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) | 10 | u32 read_pci_config(u8 bus, u8 slot, u8 func, u8 offset) |
13 | { | 11 | { |
14 | u32 v; | 12 | u32 v; |
15 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 13 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
16 | v = inl(0xcfc); | 14 | v = inl(0xcfc); |
17 | if (v != 0xffffffff) | 15 | if (v != 0xffffffff) |
18 | PDprintk("%x reading 4 from %x: %x\n", slot, offset, v); | 16 | pr_debug("%x reading 4 from %x: %x\n", slot, offset, v); |
19 | return v; | 17 | return v; |
20 | } | 18 | } |
21 | 19 | ||
@@ -24,7 +22,7 @@ u8 read_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset) | |||
24 | u8 v; | 22 | u8 v; |
25 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 23 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
26 | v = inb(0xcfc + (offset&3)); | 24 | v = inb(0xcfc + (offset&3)); |
27 | PDprintk("%x reading 1 from %x: %x\n", slot, offset, v); | 25 | pr_debug("%x reading 1 from %x: %x\n", slot, offset, v); |
28 | return v; | 26 | return v; |
29 | } | 27 | } |
30 | 28 | ||
@@ -33,28 +31,28 @@ u16 read_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset) | |||
33 | u16 v; | 31 | u16 v; |
34 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 32 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
35 | v = inw(0xcfc + (offset&2)); | 33 | v = inw(0xcfc + (offset&2)); |
36 | PDprintk("%x reading 2 from %x: %x\n", slot, offset, v); | 34 | pr_debug("%x reading 2 from %x: %x\n", slot, offset, v); |
37 | return v; | 35 | return v; |
38 | } | 36 | } |
39 | 37 | ||
40 | void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, | 38 | void write_pci_config(u8 bus, u8 slot, u8 func, u8 offset, |
41 | u32 val) | 39 | u32 val) |
42 | { | 40 | { |
43 | PDprintk("%x writing to %x: %x\n", slot, offset, val); | 41 | pr_debug("%x writing to %x: %x\n", slot, offset, val); |
44 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 42 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
45 | outl(val, 0xcfc); | 43 | outl(val, 0xcfc); |
46 | } | 44 | } |
47 | 45 | ||
48 | void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) | 46 | void write_pci_config_byte(u8 bus, u8 slot, u8 func, u8 offset, u8 val) |
49 | { | 47 | { |
50 | PDprintk("%x writing to %x: %x\n", slot, offset, val); | 48 | pr_debug("%x writing to %x: %x\n", slot, offset, val); |
51 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 49 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
52 | outb(val, 0xcfc + (offset&3)); | 50 | outb(val, 0xcfc + (offset&3)); |
53 | } | 51 | } |
54 | 52 | ||
55 | void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) | 53 | void write_pci_config_16(u8 bus, u8 slot, u8 func, u8 offset, u16 val) |
56 | { | 54 | { |
57 | PDprintk("%x writing to %x: %x\n", slot, offset, val); | 55 | pr_debug("%x writing to %x: %x\n", slot, offset, val); |
58 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); | 56 | outl(0x80000000 | (bus<<16) | (slot<<11) | (func<<8) | offset, 0xcf8); |
59 | outw(val, 0xcfc + (offset&2)); | 57 | outw(val, 0xcfc + (offset&2)); |
60 | } | 58 | } |
@@ -71,7 +69,7 @@ void early_dump_pci_device(u8 bus, u8 slot, u8 func) | |||
71 | int j; | 69 | int j; |
72 | u32 val; | 70 | u32 val; |
73 | 71 | ||
74 | printk("PCI: %02x:%02x:%02x", bus, slot, func); | 72 | printk(KERN_INFO "PCI: %02x:%02x:%02x", bus, slot, func); |
75 | 73 | ||
76 | for (i = 0; i < 256; i += 4) { | 74 | for (i = 0; i < 256; i += 4) { |
77 | if (!(i & 0x0f)) | 75 | if (!(i & 0x0f)) |
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index ff3a6a336342..4bdaa590375d 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -23,7 +23,8 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) | |||
23 | pci_read_config_byte(d, reg++, &busno); | 23 | pci_read_config_byte(d, reg++, &busno); |
24 | pci_read_config_byte(d, reg++, &suba); | 24 | pci_read_config_byte(d, reg++, &suba); |
25 | pci_read_config_byte(d, reg++, &subb); | 25 | pci_read_config_byte(d, reg++, &subb); |
26 | DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); | 26 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, |
27 | suba, subb); | ||
27 | if (busno) | 28 | if (busno) |
28 | pci_scan_bus_with_sysdata(busno); /* Bus A */ | 29 | pci_scan_bus_with_sysdata(busno); /* Bus A */ |
29 | if (suba < subb) | 30 | if (suba < subb) |
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 2aafb67dc5f1..8791fc55e715 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c | |||
@@ -128,10 +128,7 @@ static void __init pcibios_allocate_bus_resources(struct list_head *bus_list) | |||
128 | pr = pci_find_parent_resource(dev, r); | 128 | pr = pci_find_parent_resource(dev, r); |
129 | if (!r->start || !pr || | 129 | if (!r->start || !pr || |
130 | request_resource(pr, r) < 0) { | 130 | request_resource(pr, r) < 0) { |
131 | printk(KERN_ERR "PCI: Cannot allocate " | 131 | dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); |
132 | "resource region %d " | ||
133 | "of bridge %s\n", | ||
134 | idx, pci_name(dev)); | ||
135 | /* | 132 | /* |
136 | * Something is wrong with the region. | 133 | * Something is wrong with the region. |
137 | * Invalidate the resource to prevent | 134 | * Invalidate the resource to prevent |
@@ -166,15 +163,13 @@ static void __init pcibios_allocate_resources(int pass) | |||
166 | else | 163 | else |
167 | disabled = !(command & PCI_COMMAND_MEMORY); | 164 | disabled = !(command & PCI_COMMAND_MEMORY); |
168 | if (pass == disabled) { | 165 | if (pass == disabled) { |
169 | DBG("PCI: Resource %08lx-%08lx " | 166 | dev_dbg(&dev->dev, "resource %#08llx-%#08llx (f=%lx, d=%d, p=%d)\n", |
170 | "(f=%lx, d=%d, p=%d)\n", | 167 | (unsigned long long) r->start, |
171 | r->start, r->end, r->flags, disabled, pass); | 168 | (unsigned long long) r->end, |
169 | r->flags, disabled, pass); | ||
172 | pr = pci_find_parent_resource(dev, r); | 170 | pr = pci_find_parent_resource(dev, r); |
173 | if (!pr || request_resource(pr, r) < 0) { | 171 | if (!pr || request_resource(pr, r) < 0) { |
174 | printk(KERN_ERR "PCI: Cannot allocate " | 172 | dev_err(&dev->dev, "BAR %d: can't allocate resource\n", idx); |
175 | "resource region %d " | ||
176 | "of device %s\n", | ||
177 | idx, pci_name(dev)); | ||
178 | /* We'll assign a new address later */ | 173 | /* We'll assign a new address later */ |
179 | r->end -= r->start; | 174 | r->end -= r->start; |
180 | r->start = 0; | 175 | r->start = 0; |
@@ -187,8 +182,7 @@ static void __init pcibios_allocate_resources(int pass) | |||
187 | /* Turn the ROM off, leave the resource region, | 182 | /* Turn the ROM off, leave the resource region, |
188 | * but keep it unregistered. */ | 183 | * but keep it unregistered. */ |
189 | u32 reg; | 184 | u32 reg; |
190 | DBG("PCI: Switching off ROM of %s\n", | 185 | dev_dbg(&dev->dev, "disabling ROM\n"); |
191 | pci_name(dev)); | ||
192 | r->flags &= ~IORESOURCE_ROM_ENABLE; | 186 | r->flags &= ~IORESOURCE_ROM_ENABLE; |
193 | pci_read_config_dword(dev, | 187 | pci_read_config_dword(dev, |
194 | dev->rom_base_reg, ®); | 188 | dev->rom_base_reg, ®); |
@@ -257,8 +251,7 @@ void pcibios_set_master(struct pci_dev *dev) | |||
257 | lat = pcibios_max_latency; | 251 | lat = pcibios_max_latency; |
258 | else | 252 | else |
259 | return; | 253 | return; |
260 | printk(KERN_DEBUG "PCI: Setting latency timer of device %s to %d\n", | 254 | dev_printk(KERN_DEBUG, &dev->dev, "setting latency timer to %d\n", lat); |
261 | pci_name(dev), lat); | ||
262 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); | 255 | pci_write_config_byte(dev, PCI_LATENCY_TIMER, lat); |
263 | } | 256 | } |
264 | 257 | ||
@@ -280,6 +273,7 @@ static void pci_track_mmap_page_range(struct vm_area_struct *vma) | |||
280 | static struct vm_operations_struct pci_mmap_ops = { | 273 | static struct vm_operations_struct pci_mmap_ops = { |
281 | .open = pci_track_mmap_page_range, | 274 | .open = pci_track_mmap_page_range, |
282 | .close = pci_unmap_page_range, | 275 | .close = pci_unmap_page_range, |
276 | .access = generic_access_phys, | ||
283 | }; | 277 | }; |
284 | 278 | ||
285 | int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, | 279 | int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 6a06a2eb0597..8e077185e185 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -436,7 +436,7 @@ static int pirq_vlsi_get(struct pci_dev *router, struct pci_dev *dev, int pirq) | |||
436 | { | 436 | { |
437 | WARN_ON_ONCE(pirq >= 9); | 437 | WARN_ON_ONCE(pirq >= 9); |
438 | if (pirq > 8) { | 438 | if (pirq > 8) { |
439 | printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | 439 | dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); |
440 | return 0; | 440 | return 0; |
441 | } | 441 | } |
442 | return read_config_nybble(router, 0x74, pirq-1); | 442 | return read_config_nybble(router, 0x74, pirq-1); |
@@ -446,7 +446,7 @@ static int pirq_vlsi_set(struct pci_dev *router, struct pci_dev *dev, int pirq, | |||
446 | { | 446 | { |
447 | WARN_ON_ONCE(pirq >= 9); | 447 | WARN_ON_ONCE(pirq >= 9); |
448 | if (pirq > 8) { | 448 | if (pirq > 8) { |
449 | printk(KERN_INFO "VLSI router pirq escape (%d)\n", pirq); | 449 | dev_info(&dev->dev, "VLSI router PIRQ escape (%d)\n", pirq); |
450 | return 0; | 450 | return 0; |
451 | } | 451 | } |
452 | write_config_nybble(router, 0x74, pirq-1, irq); | 452 | write_config_nybble(router, 0x74, pirq-1, irq); |
@@ -492,15 +492,17 @@ static int pirq_amd756_get(struct pci_dev *router, struct pci_dev *dev, int pirq | |||
492 | irq = 0; | 492 | irq = 0; |
493 | if (pirq <= 4) | 493 | if (pirq <= 4) |
494 | irq = read_config_nybble(router, 0x56, pirq - 1); | 494 | irq = read_config_nybble(router, 0x56, pirq - 1); |
495 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d get irq : %2d\n", | 495 | dev_info(&dev->dev, |
496 | dev->vendor, dev->device, pirq, irq); | 496 | "AMD756: dev [%04x/%04x], router PIRQ %d get IRQ %d\n", |
497 | dev->vendor, dev->device, pirq, irq); | ||
497 | return irq; | 498 | return irq; |
498 | } | 499 | } |
499 | 500 | ||
500 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) | 501 | static int pirq_amd756_set(struct pci_dev *router, struct pci_dev *dev, int pirq, int irq) |
501 | { | 502 | { |
502 | printk(KERN_INFO "AMD756: dev %04x:%04x, router pirq : %d SET irq : %2d\n", | 503 | dev_info(&dev->dev, |
503 | dev->vendor, dev->device, pirq, irq); | 504 | "AMD756: dev [%04x/%04x], router PIRQ %d set IRQ %d\n", |
505 | dev->vendor, dev->device, pirq, irq); | ||
504 | if (pirq <= 4) | 506 | if (pirq <= 4) |
505 | write_config_nybble(router, 0x56, pirq - 1, irq); | 507 | write_config_nybble(router, 0x56, pirq - 1, irq); |
506 | return 1; | 508 | return 1; |
@@ -588,6 +590,8 @@ static __init int intel_router_probe(struct irq_router *r, struct pci_dev *route | |||
588 | case PCI_DEVICE_ID_INTEL_ICH10_1: | 590 | case PCI_DEVICE_ID_INTEL_ICH10_1: |
589 | case PCI_DEVICE_ID_INTEL_ICH10_2: | 591 | case PCI_DEVICE_ID_INTEL_ICH10_2: |
590 | case PCI_DEVICE_ID_INTEL_ICH10_3: | 592 | case PCI_DEVICE_ID_INTEL_ICH10_3: |
593 | case PCI_DEVICE_ID_INTEL_PCH_0: | ||
594 | case PCI_DEVICE_ID_INTEL_PCH_1: | ||
591 | r->name = "PIIX/ICH"; | 595 | r->name = "PIIX/ICH"; |
592 | r->get = pirq_piix_get; | 596 | r->get = pirq_piix_get; |
593 | r->set = pirq_piix_set; | 597 | r->set = pirq_piix_set; |
@@ -730,7 +734,6 @@ static __init int ali_router_probe(struct irq_router *r, struct pci_dev *router, | |||
730 | switch (device) { | 734 | switch (device) { |
731 | case PCI_DEVICE_ID_AL_M1533: | 735 | case PCI_DEVICE_ID_AL_M1533: |
732 | case PCI_DEVICE_ID_AL_M1563: | 736 | case PCI_DEVICE_ID_AL_M1563: |
733 | printk(KERN_DEBUG "PCI: Using ALI IRQ Router\n"); | ||
734 | r->name = "ALI"; | 737 | r->name = "ALI"; |
735 | r->get = pirq_ali_get; | 738 | r->get = pirq_ali_get; |
736 | r->set = pirq_ali_set; | 739 | r->set = pirq_ali_set; |
@@ -840,11 +843,9 @@ static void __init pirq_find_router(struct irq_router *r) | |||
840 | h->probe(r, pirq_router_dev, pirq_router_dev->device)) | 843 | h->probe(r, pirq_router_dev, pirq_router_dev->device)) |
841 | break; | 844 | break; |
842 | } | 845 | } |
843 | printk(KERN_INFO "PCI: Using IRQ router %s [%04x/%04x] at %s\n", | 846 | dev_info(&pirq_router_dev->dev, "%s IRQ router [%04x/%04x]\n", |
844 | pirq_router.name, | 847 | pirq_router.name, |
845 | pirq_router_dev->vendor, | 848 | pirq_router_dev->vendor, pirq_router_dev->device); |
846 | pirq_router_dev->device, | ||
847 | pci_name(pirq_router_dev)); | ||
848 | 849 | ||
849 | /* The device remains referenced for the kernel lifetime */ | 850 | /* The device remains referenced for the kernel lifetime */ |
850 | } | 851 | } |
@@ -877,7 +878,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
877 | /* Find IRQ pin */ | 878 | /* Find IRQ pin */ |
878 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); | 879 | pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); |
879 | if (!pin) { | 880 | if (!pin) { |
880 | DBG(KERN_DEBUG " -> no interrupt pin\n"); | 881 | dev_dbg(&dev->dev, "no interrupt pin\n"); |
881 | return 0; | 882 | return 0; |
882 | } | 883 | } |
883 | pin = pin - 1; | 884 | pin = pin - 1; |
@@ -887,20 +888,20 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
887 | if (!pirq_table) | 888 | if (!pirq_table) |
888 | return 0; | 889 | return 0; |
889 | 890 | ||
890 | DBG(KERN_DEBUG "IRQ for %s[%c]", pci_name(dev), 'A' + pin); | ||
891 | info = pirq_get_info(dev); | 891 | info = pirq_get_info(dev); |
892 | if (!info) { | 892 | if (!info) { |
893 | DBG(" -> not found in routing table\n" KERN_DEBUG); | 893 | dev_dbg(&dev->dev, "PCI INT %c not found in routing table\n", |
894 | 'A' + pin); | ||
894 | return 0; | 895 | return 0; |
895 | } | 896 | } |
896 | pirq = info->irq[pin].link; | 897 | pirq = info->irq[pin].link; |
897 | mask = info->irq[pin].bitmap; | 898 | mask = info->irq[pin].bitmap; |
898 | if (!pirq) { | 899 | if (!pirq) { |
899 | DBG(" -> not routed\n" KERN_DEBUG); | 900 | dev_dbg(&dev->dev, "PCI INT %c not routed\n", 'A' + pin); |
900 | return 0; | 901 | return 0; |
901 | } | 902 | } |
902 | DBG(" -> PIRQ %02x, mask %04x, excl %04x", pirq, mask, | 903 | dev_dbg(&dev->dev, "PCI INT %c -> PIRQ %02x, mask %04x, excl %04x", |
903 | pirq_table->exclusive_irqs); | 904 | 'A' + pin, pirq, mask, pirq_table->exclusive_irqs); |
904 | mask &= pcibios_irq_mask; | 905 | mask &= pcibios_irq_mask; |
905 | 906 | ||
906 | /* Work around broken HP Pavilion Notebooks which assign USB to | 907 | /* Work around broken HP Pavilion Notebooks which assign USB to |
@@ -930,10 +931,8 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
930 | if (pci_probe & PCI_USE_PIRQ_MASK) | 931 | if (pci_probe & PCI_USE_PIRQ_MASK) |
931 | newirq = 0; | 932 | newirq = 0; |
932 | else | 933 | else |
933 | printk("\n" KERN_WARNING | 934 | dev_warn(&dev->dev, "IRQ %d doesn't match PIRQ mask " |
934 | "PCI: IRQ %i for device %s doesn't match PIRQ mask - try pci=usepirqmask\n" | 935 | "%#x; try pci=usepirqmask\n", newirq, mask); |
935 | KERN_DEBUG, newirq, | ||
936 | pci_name(dev)); | ||
937 | } | 936 | } |
938 | if (!newirq && assign) { | 937 | if (!newirq && assign) { |
939 | for (i = 0; i < 16; i++) { | 938 | for (i = 0; i < 16; i++) { |
@@ -944,39 +943,35 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
944 | newirq = i; | 943 | newirq = i; |
945 | } | 944 | } |
946 | } | 945 | } |
947 | DBG(" -> newirq=%d", newirq); | 946 | dev_dbg(&dev->dev, "PCI INT %c -> newirq %d", 'A' + pin, newirq); |
948 | 947 | ||
949 | /* Check if it is hardcoded */ | 948 | /* Check if it is hardcoded */ |
950 | if ((pirq & 0xf0) == 0xf0) { | 949 | if ((pirq & 0xf0) == 0xf0) { |
951 | irq = pirq & 0xf; | 950 | irq = pirq & 0xf; |
952 | DBG(" -> hardcoded IRQ %d\n", irq); | 951 | msg = "hardcoded"; |
953 | msg = "Hardcoded"; | ||
954 | } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ | 952 | } else if (r->get && (irq = r->get(pirq_router_dev, dev, pirq)) && \ |
955 | ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { | 953 | ((!(pci_probe & PCI_USE_PIRQ_MASK)) || ((1 << irq) & mask))) { |
956 | DBG(" -> got IRQ %d\n", irq); | 954 | msg = "found"; |
957 | msg = "Found"; | ||
958 | eisa_set_level_irq(irq); | 955 | eisa_set_level_irq(irq); |
959 | } else if (newirq && r->set && | 956 | } else if (newirq && r->set && |
960 | (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { | 957 | (dev->class >> 8) != PCI_CLASS_DISPLAY_VGA) { |
961 | DBG(" -> assigning IRQ %d", newirq); | ||
962 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { | 958 | if (r->set(pirq_router_dev, dev, pirq, newirq)) { |
963 | eisa_set_level_irq(newirq); | 959 | eisa_set_level_irq(newirq); |
964 | DBG(" ... OK\n"); | 960 | msg = "assigned"; |
965 | msg = "Assigned"; | ||
966 | irq = newirq; | 961 | irq = newirq; |
967 | } | 962 | } |
968 | } | 963 | } |
969 | 964 | ||
970 | if (!irq) { | 965 | if (!irq) { |
971 | DBG(" ... failed\n"); | ||
972 | if (newirq && mask == (1 << newirq)) { | 966 | if (newirq && mask == (1 << newirq)) { |
973 | msg = "Guessed"; | 967 | msg = "guessed"; |
974 | irq = newirq; | 968 | irq = newirq; |
975 | } else | 969 | } else { |
970 | dev_dbg(&dev->dev, "can't route interrupt\n"); | ||
976 | return 0; | 971 | return 0; |
972 | } | ||
977 | } | 973 | } |
978 | printk(KERN_INFO "PCI: %s IRQ %d for device %s\n", msg, irq, | 974 | dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin, irq); |
979 | pci_name(dev)); | ||
980 | 975 | ||
981 | /* Update IRQ for all devices with the same pirq value */ | 976 | /* Update IRQ for all devices with the same pirq value */ |
982 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { | 977 | while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { |
@@ -996,17 +991,17 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) | |||
996 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ | 991 | (!(pci_probe & PCI_USE_PIRQ_MASK) || \ |
997 | ((1 << dev2->irq) & mask))) { | 992 | ((1 << dev2->irq) & mask))) { |
998 | #ifndef CONFIG_PCI_MSI | 993 | #ifndef CONFIG_PCI_MSI |
999 | printk(KERN_INFO "IRQ routing conflict for %s, have irq %d, want irq %d\n", | 994 | dev_info(&dev2->dev, "IRQ routing conflict: " |
1000 | pci_name(dev2), dev2->irq, irq); | 995 | "have IRQ %d, want IRQ %d\n", |
996 | dev2->irq, irq); | ||
1001 | #endif | 997 | #endif |
1002 | continue; | 998 | continue; |
1003 | } | 999 | } |
1004 | dev2->irq = irq; | 1000 | dev2->irq = irq; |
1005 | pirq_penalty[irq]++; | 1001 | pirq_penalty[irq]++; |
1006 | if (dev != dev2) | 1002 | if (dev != dev2) |
1007 | printk(KERN_INFO | 1003 | dev_info(&dev->dev, "sharing IRQ %d with %s\n", |
1008 | "PCI: Sharing IRQ %d with %s\n", | 1004 | irq, pci_name(dev2)); |
1009 | irq, pci_name(dev2)); | ||
1010 | } | 1005 | } |
1011 | } | 1006 | } |
1012 | return 1; | 1007 | return 1; |
@@ -1025,8 +1020,7 @@ static void __init pcibios_fixup_irqs(void) | |||
1025 | * already in use. | 1020 | * already in use. |
1026 | */ | 1021 | */ |
1027 | if (dev->irq >= 16) { | 1022 | if (dev->irq >= 16) { |
1028 | DBG(KERN_DEBUG "%s: ignoring bogus IRQ %d\n", | 1023 | dev_dbg(&dev->dev, "ignoring bogus IRQ %d\n", dev->irq); |
1029 | pci_name(dev), dev->irq); | ||
1030 | dev->irq = 0; | 1024 | dev->irq = 0; |
1031 | } | 1025 | } |
1032 | /* | 1026 | /* |
@@ -1070,12 +1064,12 @@ static void __init pcibios_fixup_irqs(void) | |||
1070 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1064 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1071 | PCI_SLOT(bridge->devfn), pin); | 1065 | PCI_SLOT(bridge->devfn), pin); |
1072 | if (irq >= 0) | 1066 | if (irq >= 0) |
1073 | printk(KERN_WARNING "PCI: using PPB %s[%c] to get irq %d\n", | 1067 | dev_warn(&dev->dev, "using bridge %s INT %c to get IRQ %d\n", |
1074 | pci_name(bridge), 'A' + pin, irq); | 1068 | pci_name(bridge), |
1069 | 'A' + pin, irq); | ||
1075 | } | 1070 | } |
1076 | if (irq >= 0) { | 1071 | if (irq >= 0) { |
1077 | printk(KERN_INFO "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | 1072 | dev_info(&dev->dev, "PCI->APIC IRQ transform: INT %c -> IRQ %d\n", 'A' + pin, irq); |
1078 | pci_name(dev), 'A' + pin, irq); | ||
1079 | dev->irq = irq; | 1073 | dev->irq = irq; |
1080 | } | 1074 | } |
1081 | } | 1075 | } |
@@ -1231,25 +1225,24 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1231 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, | 1225 | irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number, |
1232 | PCI_SLOT(bridge->devfn), pin); | 1226 | PCI_SLOT(bridge->devfn), pin); |
1233 | if (irq >= 0) | 1227 | if (irq >= 0) |
1234 | printk(KERN_WARNING | 1228 | dev_warn(&dev->dev, "using bridge %s " |
1235 | "PCI: using PPB %s[%c] to get irq %d\n", | 1229 | "INT %c to get IRQ %d\n", |
1236 | pci_name(bridge), | 1230 | pci_name(bridge), 'A' + pin, |
1237 | 'A' + pin, irq); | 1231 | irq); |
1238 | dev = bridge; | 1232 | dev = bridge; |
1239 | } | 1233 | } |
1240 | dev = temp_dev; | 1234 | dev = temp_dev; |
1241 | if (irq >= 0) { | 1235 | if (irq >= 0) { |
1242 | printk(KERN_INFO | 1236 | dev_info(&dev->dev, "PCI->APIC IRQ transform: " |
1243 | "PCI->APIC IRQ transform: %s[%c] -> IRQ %d\n", | 1237 | "INT %c -> IRQ %d\n", 'A' + pin, irq); |
1244 | pci_name(dev), 'A' + pin, irq); | ||
1245 | dev->irq = irq; | 1238 | dev->irq = irq; |
1246 | return 0; | 1239 | return 0; |
1247 | } else | 1240 | } else |
1248 | msg = " Probably buggy MP table."; | 1241 | msg = "; probably buggy MP table"; |
1249 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) | 1242 | } else if (pci_probe & PCI_BIOS_IRQ_SCAN) |
1250 | msg = ""; | 1243 | msg = ""; |
1251 | else | 1244 | else |
1252 | msg = " Please try using pci=biosirq."; | 1245 | msg = "; please try using pci=biosirq"; |
1253 | 1246 | ||
1254 | /* | 1247 | /* |
1255 | * With IDE legacy devices the IRQ lookup failure is not | 1248 | * With IDE legacy devices the IRQ lookup failure is not |
@@ -1259,9 +1252,8 @@ static int pirq_enable_irq(struct pci_dev *dev) | |||
1259 | !(dev->class & 0x5)) | 1252 | !(dev->class & 0x5)) |
1260 | return 0; | 1253 | return 0; |
1261 | 1254 | ||
1262 | printk(KERN_WARNING | 1255 | dev_warn(&dev->dev, "can't find IRQ for PCI INT %c%s\n", |
1263 | "PCI: No IRQ known for interrupt pin %c of device %s.%s\n", | 1256 | 'A' + pin, msg); |
1264 | 'A' + pin, pci_name(dev), msg); | ||
1265 | } | 1257 | } |
1266 | return 0; | 1258 | return 0; |
1267 | } | 1259 | } |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 132876cc6fca..b722dd481b39 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -14,7 +14,7 @@ static void __devinit pcibios_fixup_peer_bridges(void) | |||
14 | int n, devfn; | 14 | int n, devfn; |
15 | long node; | 15 | long node; |
16 | 16 | ||
17 | if (pcibios_last_bus <= 0 || pcibios_last_bus >= 0xff) | 17 | if (pcibios_last_bus <= 0 || pcibios_last_bus > 0xff) |
18 | return; | 18 | return; |
19 | DBG("PCI: Peer bridge fixup\n"); | 19 | DBG("PCI: Peer bridge fixup\n"); |
20 | 20 | ||
@@ -57,14 +57,17 @@ static int __init pci_legacy_init(void) | |||
57 | 57 | ||
58 | int __init pci_subsys_init(void) | 58 | int __init pci_subsys_init(void) |
59 | { | 59 | { |
60 | #ifdef CONFIG_X86_NUMAQ | ||
61 | pci_numaq_init(); | ||
62 | #endif | ||
60 | #ifdef CONFIG_ACPI | 63 | #ifdef CONFIG_ACPI |
61 | pci_acpi_init(); | 64 | pci_acpi_init(); |
62 | #endif | 65 | #endif |
66 | #ifdef CONFIG_X86_VISWS | ||
67 | pci_visws_init(); | ||
68 | #endif | ||
63 | pci_legacy_init(); | 69 | pci_legacy_init(); |
64 | pcibios_irq_init(); | 70 | pcibios_irq_init(); |
65 | #ifdef CONFIG_X86_NUMAQ | ||
66 | pci_numa_init(); | ||
67 | #endif | ||
68 | pcibios_init(); | 71 | pcibios_init(); |
69 | 72 | ||
70 | return 0; | 73 | return 0; |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 23faaa890ffc..d9635764ce3d 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -293,7 +293,7 @@ static acpi_status __init find_mboard_resource(acpi_handle handle, u32 lvl, | |||
293 | return AE_OK; | 293 | return AE_OK; |
294 | } | 294 | } |
295 | 295 | ||
296 | static int __init is_acpi_reserved(unsigned long start, unsigned long end) | 296 | static int __init is_acpi_reserved(u64 start, u64 end, unsigned not_used) |
297 | { | 297 | { |
298 | struct resource mcfg_res; | 298 | struct resource mcfg_res; |
299 | 299 | ||
@@ -310,6 +310,41 @@ static int __init is_acpi_reserved(unsigned long start, unsigned long end) | |||
310 | return mcfg_res.flags; | 310 | return mcfg_res.flags; |
311 | } | 311 | } |
312 | 312 | ||
313 | typedef int (*check_reserved_t)(u64 start, u64 end, unsigned type); | ||
314 | |||
315 | static int __init is_mmconf_reserved(check_reserved_t is_reserved, | ||
316 | u64 addr, u64 size, int i, | ||
317 | typeof(pci_mmcfg_config[0]) *cfg, int with_e820) | ||
318 | { | ||
319 | u64 old_size = size; | ||
320 | int valid = 0; | ||
321 | |||
322 | while (!is_reserved(addr, addr + size - 1, E820_RESERVED)) { | ||
323 | size >>= 1; | ||
324 | if (size < (16UL<<20)) | ||
325 | break; | ||
326 | } | ||
327 | |||
328 | if (size >= (16UL<<20) || size == old_size) { | ||
329 | printk(KERN_NOTICE | ||
330 | "PCI: MCFG area at %Lx reserved in %s\n", | ||
331 | addr, with_e820?"E820":"ACPI motherboard resources"); | ||
332 | valid = 1; | ||
333 | |||
334 | if (old_size != size) { | ||
335 | /* update end_bus_number */ | ||
336 | cfg->end_bus_number = cfg->start_bus_number + ((size>>20) - 1); | ||
337 | printk(KERN_NOTICE "PCI: updated MCFG configuration %d: base %lx " | ||
338 | "segment %hu buses %u - %u\n", | ||
339 | i, (unsigned long)cfg->address, cfg->pci_segment, | ||
340 | (unsigned int)cfg->start_bus_number, | ||
341 | (unsigned int)cfg->end_bus_number); | ||
342 | } | ||
343 | } | ||
344 | |||
345 | return valid; | ||
346 | } | ||
347 | |||
313 | static void __init pci_mmcfg_reject_broken(int early) | 348 | static void __init pci_mmcfg_reject_broken(int early) |
314 | { | 349 | { |
315 | typeof(pci_mmcfg_config[0]) *cfg; | 350 | typeof(pci_mmcfg_config[0]) *cfg; |
@@ -324,21 +359,22 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
324 | 359 | ||
325 | for (i = 0; i < pci_mmcfg_config_num; i++) { | 360 | for (i = 0; i < pci_mmcfg_config_num; i++) { |
326 | int valid = 0; | 361 | int valid = 0; |
327 | u32 size = (cfg->end_bus_number + 1) << 20; | 362 | u64 addr, size; |
363 | |||
328 | cfg = &pci_mmcfg_config[i]; | 364 | cfg = &pci_mmcfg_config[i]; |
365 | addr = cfg->start_bus_number; | ||
366 | addr <<= 20; | ||
367 | addr += cfg->address; | ||
368 | size = cfg->end_bus_number + 1 - cfg->start_bus_number; | ||
369 | size <<= 20; | ||
329 | printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " | 370 | printk(KERN_NOTICE "PCI: MCFG configuration %d: base %lx " |
330 | "segment %hu buses %u - %u\n", | 371 | "segment %hu buses %u - %u\n", |
331 | i, (unsigned long)cfg->address, cfg->pci_segment, | 372 | i, (unsigned long)cfg->address, cfg->pci_segment, |
332 | (unsigned int)cfg->start_bus_number, | 373 | (unsigned int)cfg->start_bus_number, |
333 | (unsigned int)cfg->end_bus_number); | 374 | (unsigned int)cfg->end_bus_number); |
334 | 375 | ||
335 | if (!early && | 376 | if (!early) |
336 | is_acpi_reserved(cfg->address, cfg->address + size - 1)) { | 377 | valid = is_mmconf_reserved(is_acpi_reserved, addr, size, i, cfg, 0); |
337 | printk(KERN_NOTICE "PCI: MCFG area at %Lx reserved " | ||
338 | "in ACPI motherboard resources\n", | ||
339 | cfg->address); | ||
340 | valid = 1; | ||
341 | } | ||
342 | 378 | ||
343 | if (valid) | 379 | if (valid) |
344 | continue; | 380 | continue; |
@@ -347,16 +383,11 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
347 | printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" | 383 | printk(KERN_ERR "PCI: BIOS Bug: MCFG area at %Lx is not" |
348 | " reserved in ACPI motherboard resources\n", | 384 | " reserved in ACPI motherboard resources\n", |
349 | cfg->address); | 385 | cfg->address); |
386 | |||
350 | /* Don't try to do this check unless configuration | 387 | /* Don't try to do this check unless configuration |
351 | type 1 is available. how about type 2 ?*/ | 388 | type 1 is available. how about type 2 ?*/ |
352 | if (raw_pci_ops && e820_all_mapped(cfg->address, | 389 | if (raw_pci_ops) |
353 | cfg->address + size - 1, | 390 | valid = is_mmconf_reserved(e820_all_mapped, addr, size, i, cfg, 1); |
354 | E820_RESERVED)) { | ||
355 | printk(KERN_NOTICE | ||
356 | "PCI: MCFG area at %Lx reserved in E820\n", | ||
357 | cfg->address); | ||
358 | valid = 1; | ||
359 | } | ||
360 | 391 | ||
361 | if (!valid) | 392 | if (!valid) |
362 | goto reject; | 393 | goto reject; |
@@ -365,7 +396,7 @@ static void __init pci_mmcfg_reject_broken(int early) | |||
365 | return; | 396 | return; |
366 | 397 | ||
367 | reject: | 398 | reject: |
368 | printk(KERN_ERR "PCI: Not using MMCONFIG.\n"); | 399 | printk(KERN_INFO "PCI: Not using MMCONFIG.\n"); |
369 | pci_mmcfg_arch_free(); | 400 | pci_mmcfg_arch_free(); |
370 | kfree(pci_mmcfg_config); | 401 | kfree(pci_mmcfg_config); |
371 | pci_mmcfg_config = NULL; | 402 | pci_mmcfg_config = NULL; |
diff --git a/arch/x86/pci/numa.c b/arch/x86/pci/numaq_32.c index 8b5ca1966731..1177845d3186 100644 --- a/arch/x86/pci/numa.c +++ b/arch/x86/pci/numaq_32.c | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * numa.c - Low-level PCI access for NUMA-Q machines | 2 | * numaq_32.c - Low-level PCI access for NUMA-Q machines |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
@@ -131,13 +131,14 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) | |||
131 | u8 busno, suba, subb; | 131 | u8 busno, suba, subb; |
132 | int quad = BUS2QUAD(d->bus->number); | 132 | int quad = BUS2QUAD(d->bus->number); |
133 | 133 | ||
134 | printk("PCI: Searching for i450NX host bridges on %s\n", pci_name(d)); | 134 | dev_info(&d->dev, "searching for i450NX host bridges\n"); |
135 | reg = 0xd0; | 135 | reg = 0xd0; |
136 | for(pxb=0; pxb<2; pxb++) { | 136 | for(pxb=0; pxb<2; pxb++) { |
137 | pci_read_config_byte(d, reg++, &busno); | 137 | pci_read_config_byte(d, reg++, &busno); |
138 | pci_read_config_byte(d, reg++, &suba); | 138 | pci_read_config_byte(d, reg++, &suba); |
139 | pci_read_config_byte(d, reg++, &subb); | 139 | pci_read_config_byte(d, reg++, &subb); |
140 | DBG("i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, suba, subb); | 140 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", |
141 | pxb, busno, suba, subb); | ||
141 | if (busno) { | 142 | if (busno) { |
142 | /* Bus A */ | 143 | /* Bus A */ |
143 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); | 144 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); |
@@ -151,7 +152,7 @@ static void __devinit pci_fixup_i450nx(struct pci_dev *d) | |||
151 | } | 152 | } |
152 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); | 153 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); |
153 | 154 | ||
154 | int __init pci_numa_init(void) | 155 | int __init pci_numaq_init(void) |
155 | { | 156 | { |
156 | int quad; | 157 | int quad; |
157 | 158 | ||
diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h index 3e25deb821ac..15b9cf6be729 100644 --- a/arch/x86/pci/pci.h +++ b/arch/x86/pci/pci.h | |||
@@ -108,7 +108,8 @@ extern void __init dmi_check_skip_isa_align(void); | |||
108 | /* some common used subsys_initcalls */ | 108 | /* some common used subsys_initcalls */ |
109 | extern int __init pci_acpi_init(void); | 109 | extern int __init pci_acpi_init(void); |
110 | extern int __init pcibios_irq_init(void); | 110 | extern int __init pcibios_irq_init(void); |
111 | extern int __init pci_numa_init(void); | 111 | extern int __init pci_visws_init(void); |
112 | extern int __init pci_numaq_init(void); | ||
112 | extern int __init pcibios_init(void); | 113 | extern int __init pcibios_init(void); |
113 | 114 | ||
114 | /* pci-mmconfig.c */ | 115 | /* pci-mmconfig.c */ |
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c index 1a7bed492bb1..42f4cb19faca 100644 --- a/arch/x86/pci/visws.c +++ b/arch/x86/pci/visws.c | |||
@@ -86,8 +86,14 @@ void __init pcibios_update_irq(struct pci_dev *dev, int irq) | |||
86 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); | 86 | pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); |
87 | } | 87 | } |
88 | 88 | ||
89 | static int __init pci_visws_init(void) | 89 | int __init pci_visws_init(void) |
90 | { | 90 | { |
91 | if (!is_visws_box()) | ||
92 | return -1; | ||
93 | |||
94 | pcibios_enable_irq = &pci_visws_enable_irq; | ||
95 | pcibios_disable_irq = &pci_visws_disable_irq; | ||
96 | |||
91 | /* The VISWS supports configuration access type 1 only */ | 97 | /* The VISWS supports configuration access type 1 only */ |
92 | pci_probe = (pci_probe | PCI_PROBE_CONF1) & | 98 | pci_probe = (pci_probe | PCI_PROBE_CONF1) & |
93 | ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2); | 99 | ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2); |
@@ -105,18 +111,3 @@ static int __init pci_visws_init(void) | |||
105 | pcibios_resource_survey(); | 111 | pcibios_resource_survey(); |
106 | return 0; | 112 | return 0; |
107 | } | 113 | } |
108 | |||
109 | static __init int pci_subsys_init(void) | ||
110 | { | ||
111 | if (!is_visws_box()) | ||
112 | return -1; | ||
113 | |||
114 | pcibios_enable_irq = &pci_visws_enable_irq; | ||
115 | pcibios_disable_irq = &pci_visws_disable_irq; | ||
116 | |||
117 | pci_visws_init(); | ||
118 | pcibios_init(); | ||
119 | |||
120 | return 0; | ||
121 | } | ||
122 | subsys_initcall(pci_subsys_init); | ||
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c index 7dc5d5cf50a2..d3e083dea720 100644 --- a/arch/x86/power/cpu_32.c +++ b/arch/x86/power/cpu_32.c | |||
@@ -45,7 +45,7 @@ static void __save_processor_state(struct saved_context *ctxt) | |||
45 | ctxt->cr0 = read_cr0(); | 45 | ctxt->cr0 = read_cr0(); |
46 | ctxt->cr2 = read_cr2(); | 46 | ctxt->cr2 = read_cr2(); |
47 | ctxt->cr3 = read_cr3(); | 47 | ctxt->cr3 = read_cr3(); |
48 | ctxt->cr4 = read_cr4(); | 48 | ctxt->cr4 = read_cr4_safe(); |
49 | } | 49 | } |
50 | 50 | ||
51 | /* Needed by apm.c */ | 51 | /* Needed by apm.c */ |
@@ -98,7 +98,9 @@ static void __restore_processor_state(struct saved_context *ctxt) | |||
98 | /* | 98 | /* |
99 | * control registers | 99 | * control registers |
100 | */ | 100 | */ |
101 | write_cr4(ctxt->cr4); | 101 | /* cr4 was introduced in the Pentium CPU */ |
102 | if (ctxt->cr4) | ||
103 | write_cr4(ctxt->cr4); | ||
102 | write_cr3(ctxt->cr3); | 104 | write_cr3(ctxt->cr3); |
103 | write_cr2(ctxt->cr2); | 105 | write_cr2(ctxt->cr2); |
104 | write_cr0(ctxt->cr0); | 106 | write_cr0(ctxt->cr0); |
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S index b95aa6cfe3cb..4fc7e872c85e 100644 --- a/arch/x86/power/hibernate_asm_32.S +++ b/arch/x86/power/hibernate_asm_32.S | |||
@@ -28,9 +28,9 @@ ENTRY(swsusp_arch_suspend) | |||
28 | ret | 28 | ret |
29 | 29 | ||
30 | ENTRY(restore_image) | 30 | ENTRY(restore_image) |
31 | movl resume_pg_dir, %ecx | 31 | movl resume_pg_dir, %eax |
32 | subl $__PAGE_OFFSET, %ecx | 32 | subl $__PAGE_OFFSET, %eax |
33 | movl %ecx, %cr3 | 33 | movl %eax, %cr3 |
34 | 34 | ||
35 | movl restore_pblist, %edx | 35 | movl restore_pblist, %edx |
36 | .p2align 4,,7 | 36 | .p2align 4,,7 |
@@ -52,17 +52,21 @@ copy_loop: | |||
52 | 52 | ||
53 | done: | 53 | done: |
54 | /* go back to the original page tables */ | 54 | /* go back to the original page tables */ |
55 | movl $swapper_pg_dir, %ecx | 55 | movl $swapper_pg_dir, %eax |
56 | subl $__PAGE_OFFSET, %ecx | 56 | subl $__PAGE_OFFSET, %eax |
57 | movl %ecx, %cr3 | 57 | movl %eax, %cr3 |
58 | /* Flush TLB, including "global" things (vmalloc) */ | 58 | /* Flush TLB, including "global" things (vmalloc) */ |
59 | movl mmu_cr4_features, %eax | 59 | movl mmu_cr4_features, %ecx |
60 | movl %eax, %edx | 60 | jecxz 1f # cr4 Pentium and higher, skip if zero |
61 | movl %ecx, %edx | ||
61 | andl $~(1<<7), %edx; # PGE | 62 | andl $~(1<<7), %edx; # PGE |
62 | movl %edx, %cr4; # turn off PGE | 63 | movl %edx, %cr4; # turn off PGE |
63 | movl %cr3, %ecx; # flush TLB | 64 | 1: |
64 | movl %ecx, %cr3 | 65 | movl %cr3, %eax; # flush TLB |
65 | movl %eax, %cr4; # turn PGE back on | 66 | movl %eax, %cr3 |
67 | jecxz 1f # cr4 Pentium and higher, skip if zero | ||
68 | movl %ecx, %cr4; # turn PGE back on | ||
69 | 1: | ||
66 | 70 | ||
67 | movl saved_context_esp, %esp | 71 | movl saved_context_esp, %esp |
68 | movl saved_context_ebp, %ebp | 72 | movl saved_context_ebp, %ebp |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index b7ad9f89d21f..4d6ef0a336d6 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -62,7 +62,7 @@ $(obj)/%-syms.lds: $(obj)/%.so.dbg FORCE | |||
62 | # Build multiple 32-bit vDSO images to choose from at boot time. | 62 | # Build multiple 32-bit vDSO images to choose from at boot time. |
63 | # | 63 | # |
64 | obj-$(VDSO32-y) += vdso32-syms.lds | 64 | obj-$(VDSO32-y) += vdso32-syms.lds |
65 | vdso32.so-$(CONFIG_X86_32) += int80 | 65 | vdso32.so-$(VDSO32-y) += int80 |
66 | vdso32.so-$(CONFIG_COMPAT) += syscall | 66 | vdso32.so-$(CONFIG_COMPAT) += syscall |
67 | vdso32.so-$(VDSO32-y) += sysenter | 67 | vdso32.so-$(VDSO32-y) += sysenter |
68 | 68 | ||
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index 0bce5429a515..513f330c5832 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -193,17 +193,12 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr) | |||
193 | } | 193 | } |
194 | } | 194 | } |
195 | 195 | ||
196 | /* | ||
197 | * These symbols are defined by vdso32.S to mark the bounds | ||
198 | * of the ELF DSO images included therein. | ||
199 | */ | ||
200 | extern const char vdso32_default_start, vdso32_default_end; | ||
201 | extern const char vdso32_sysenter_start, vdso32_sysenter_end; | ||
202 | static struct page *vdso32_pages[1]; | 196 | static struct page *vdso32_pages[1]; |
203 | 197 | ||
204 | #ifdef CONFIG_X86_64 | 198 | #ifdef CONFIG_X86_64 |
205 | 199 | ||
206 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) | 200 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SYSENTER32)) |
201 | #define vdso32_syscall() (boot_cpu_has(X86_FEATURE_SYSCALL32)) | ||
207 | 202 | ||
208 | /* May not be __init: called during resume */ | 203 | /* May not be __init: called during resume */ |
209 | void syscall32_cpu_init(void) | 204 | void syscall32_cpu_init(void) |
@@ -226,6 +221,7 @@ static inline void map_compat_vdso(int map) | |||
226 | #else /* CONFIG_X86_32 */ | 221 | #else /* CONFIG_X86_32 */ |
227 | 222 | ||
228 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) | 223 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) |
224 | #define vdso32_syscall() (0) | ||
229 | 225 | ||
230 | void enable_sep_cpu(void) | 226 | void enable_sep_cpu(void) |
231 | { | 227 | { |
@@ -296,12 +292,15 @@ int __init sysenter_setup(void) | |||
296 | gate_vma_init(); | 292 | gate_vma_init(); |
297 | #endif | 293 | #endif |
298 | 294 | ||
299 | if (!vdso32_sysenter()) { | 295 | if (vdso32_syscall()) { |
300 | vsyscall = &vdso32_default_start; | 296 | vsyscall = &vdso32_syscall_start; |
301 | vsyscall_len = &vdso32_default_end - &vdso32_default_start; | 297 | vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; |
302 | } else { | 298 | } else if (vdso32_sysenter()){ |
303 | vsyscall = &vdso32_sysenter_start; | 299 | vsyscall = &vdso32_sysenter_start; |
304 | vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; | 300 | vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; |
301 | } else { | ||
302 | vsyscall = &vdso32_int80_start; | ||
303 | vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; | ||
305 | } | 304 | } |
306 | 305 | ||
307 | memcpy(syscall_page, vsyscall, vsyscall_len); | 306 | memcpy(syscall_page, vsyscall, vsyscall_len); |
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S index 1e36f72cab86..2ce5f82c333b 100644 --- a/arch/x86/vdso/vdso32.S +++ b/arch/x86/vdso/vdso32.S | |||
@@ -2,14 +2,17 @@ | |||
2 | 2 | ||
3 | __INITDATA | 3 | __INITDATA |
4 | 4 | ||
5 | .globl vdso32_default_start, vdso32_default_end | 5 | .globl vdso32_int80_start, vdso32_int80_end |
6 | vdso32_default_start: | 6 | vdso32_int80_start: |
7 | #ifdef CONFIG_X86_32 | ||
8 | .incbin "arch/x86/vdso/vdso32-int80.so" | 7 | .incbin "arch/x86/vdso/vdso32-int80.so" |
9 | #else | 8 | vdso32_int80_end: |
9 | |||
10 | .globl vdso32_syscall_start, vdso32_syscall_end | ||
11 | vdso32_syscall_start: | ||
12 | #ifdef CONFIG_COMPAT | ||
10 | .incbin "arch/x86/vdso/vdso32-syscall.so" | 13 | .incbin "arch/x86/vdso/vdso32-syscall.so" |
11 | #endif | 14 | #endif |
12 | vdso32_default_end: | 15 | vdso32_syscall_end: |
13 | 16 | ||
14 | .globl vdso32_sysenter_start, vdso32_sysenter_end | 17 | .globl vdso32_sysenter_start, vdso32_sysenter_end |
15 | vdso32_sysenter_start: | 18 | vdso32_sysenter_start: |
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 19a6cfaf5db9..257ba4a10abf 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -21,7 +21,8 @@ unsigned int __read_mostly vdso_enabled = 1; | |||
21 | extern char vdso_start[], vdso_end[]; | 21 | extern char vdso_start[], vdso_end[]; |
22 | extern unsigned short vdso_sync_cpuid; | 22 | extern unsigned short vdso_sync_cpuid; |
23 | 23 | ||
24 | struct page **vdso_pages; | 24 | static struct page **vdso_pages; |
25 | static unsigned vdso_size; | ||
25 | 26 | ||
26 | static inline void *var_ref(void *p, char *name) | 27 | static inline void *var_ref(void *p, char *name) |
27 | { | 28 | { |
@@ -38,6 +39,7 @@ static int __init init_vdso_vars(void) | |||
38 | int i; | 39 | int i; |
39 | char *vbase; | 40 | char *vbase; |
40 | 41 | ||
42 | vdso_size = npages << PAGE_SHIFT; | ||
41 | vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); | 43 | vdso_pages = kmalloc(sizeof(struct page *) * npages, GFP_KERNEL); |
42 | if (!vdso_pages) | 44 | if (!vdso_pages) |
43 | goto oom; | 45 | goto oom; |
@@ -101,20 +103,19 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | |||
101 | struct mm_struct *mm = current->mm; | 103 | struct mm_struct *mm = current->mm; |
102 | unsigned long addr; | 104 | unsigned long addr; |
103 | int ret; | 105 | int ret; |
104 | unsigned len = round_up(vdso_end - vdso_start, PAGE_SIZE); | ||
105 | 106 | ||
106 | if (!vdso_enabled) | 107 | if (!vdso_enabled) |
107 | return 0; | 108 | return 0; |
108 | 109 | ||
109 | down_write(&mm->mmap_sem); | 110 | down_write(&mm->mmap_sem); |
110 | addr = vdso_addr(mm->start_stack, len); | 111 | addr = vdso_addr(mm->start_stack, vdso_size); |
111 | addr = get_unmapped_area(NULL, addr, len, 0, 0); | 112 | addr = get_unmapped_area(NULL, addr, vdso_size, 0, 0); |
112 | if (IS_ERR_VALUE(addr)) { | 113 | if (IS_ERR_VALUE(addr)) { |
113 | ret = addr; | 114 | ret = addr; |
114 | goto up_fail; | 115 | goto up_fail; |
115 | } | 116 | } |
116 | 117 | ||
117 | ret = install_special_mapping(mm, addr, len, | 118 | ret = install_special_mapping(mm, addr, vdso_size, |
118 | VM_READ|VM_EXEC| | 119 | VM_READ|VM_EXEC| |
119 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 120 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| |
120 | VM_ALWAYSDUMP, | 121 | VM_ALWAYSDUMP, |
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index c2cc99580871..3815e425f470 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -6,8 +6,8 @@ config XEN | |||
6 | bool "Xen guest support" | 6 | bool "Xen guest support" |
7 | select PARAVIRT | 7 | select PARAVIRT |
8 | select PARAVIRT_CLOCK | 8 | select PARAVIRT_CLOCK |
9 | depends on X86_32 | 9 | depends on X86_64 || (X86_32 && X86_PAE && !(X86_VISWS || X86_VOYAGER)) |
10 | depends on X86_CMPXCHG && X86_TSC && X86_PAE && !(X86_VISWS || X86_VOYAGER) | 10 | depends on X86_CMPXCHG && X86_TSC |
11 | help | 11 | help |
12 | This is the Linux Xen port. Enabling this will allow the | 12 | This is the Linux Xen port. Enabling this will allow the |
13 | kernel to boot in a paravirtualized environment under the | 13 | kernel to boot in a paravirtualized environment under the |
@@ -15,10 +15,16 @@ config XEN | |||
15 | 15 | ||
16 | config XEN_MAX_DOMAIN_MEMORY | 16 | config XEN_MAX_DOMAIN_MEMORY |
17 | int "Maximum allowed size of a domain in gigabytes" | 17 | int "Maximum allowed size of a domain in gigabytes" |
18 | default 8 | 18 | default 8 if X86_32 |
19 | default 32 if X86_64 | ||
19 | depends on XEN | 20 | depends on XEN |
20 | help | 21 | help |
21 | The pseudo-physical to machine address array is sized | 22 | The pseudo-physical to machine address array is sized |
22 | according to the maximum possible memory size of a Xen | 23 | according to the maximum possible memory size of a Xen |
23 | domain. This array uses 1 page per gigabyte, so there's no | 24 | domain. This array uses 1 page per gigabyte, so there's no |
24 | need to be too stingy here. \ No newline at end of file | 25 | need to be too stingy here. |
26 | |||
27 | config XEN_SAVE_RESTORE | ||
28 | bool | ||
29 | depends on PM | ||
30 | default y \ No newline at end of file | ||
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index 2ba2d1649131..59c1e539aed2 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile | |||
@@ -1,4 +1,4 @@ | |||
1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ | 1 | obj-y := enlighten.o setup.o multicalls.o mmu.o \ |
2 | time.o xen-asm.o grant-table.o suspend.o | 2 | time.o xen-asm_$(BITS).o grant-table.o suspend.o |
3 | 3 | ||
4 | obj-$(CONFIG_SMP) += smp.o | 4 | obj-$(CONFIG_SMP) += smp.o |
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index bb508456ef52..a4e201b47f64 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -33,6 +33,7 @@ | |||
33 | #include <xen/interface/sched.h> | 33 | #include <xen/interface/sched.h> |
34 | #include <xen/features.h> | 34 | #include <xen/features.h> |
35 | #include <xen/page.h> | 35 | #include <xen/page.h> |
36 | #include <xen/hvc-console.h> | ||
36 | 37 | ||
37 | #include <asm/paravirt.h> | 38 | #include <asm/paravirt.h> |
38 | #include <asm/page.h> | 39 | #include <asm/page.h> |
@@ -40,12 +41,12 @@ | |||
40 | #include <asm/xen/hypervisor.h> | 41 | #include <asm/xen/hypervisor.h> |
41 | #include <asm/fixmap.h> | 42 | #include <asm/fixmap.h> |
42 | #include <asm/processor.h> | 43 | #include <asm/processor.h> |
44 | #include <asm/msr-index.h> | ||
43 | #include <asm/setup.h> | 45 | #include <asm/setup.h> |
44 | #include <asm/desc.h> | 46 | #include <asm/desc.h> |
45 | #include <asm/pgtable.h> | 47 | #include <asm/pgtable.h> |
46 | #include <asm/tlbflush.h> | 48 | #include <asm/tlbflush.h> |
47 | #include <asm/reboot.h> | 49 | #include <asm/reboot.h> |
48 | #include <asm/pgalloc.h> | ||
49 | 50 | ||
50 | #include "xen-ops.h" | 51 | #include "xen-ops.h" |
51 | #include "mmu.h" | 52 | #include "mmu.h" |
@@ -57,6 +58,18 @@ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); | |||
57 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); | 58 | DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); |
58 | 59 | ||
59 | /* | 60 | /* |
61 | * Identity map, in addition to plain kernel map. This needs to be | ||
62 | * large enough to allocate page table pages to allocate the rest. | ||
63 | * Each page can map 2MB. | ||
64 | */ | ||
65 | static pte_t level1_ident_pgt[PTRS_PER_PTE * 4] __page_aligned_bss; | ||
66 | |||
67 | #ifdef CONFIG_X86_64 | ||
68 | /* l3 pud for userspace vsyscall mapping */ | ||
69 | static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss; | ||
70 | #endif /* CONFIG_X86_64 */ | ||
71 | |||
72 | /* | ||
60 | * Note about cr3 (pagetable base) values: | 73 | * Note about cr3 (pagetable base) values: |
61 | * | 74 | * |
62 | * xen_cr3 contains the current logical cr3 value; it contains the | 75 | * xen_cr3 contains the current logical cr3 value; it contains the |
@@ -167,10 +180,14 @@ void xen_vcpu_restore(void) | |||
167 | 180 | ||
168 | static void __init xen_banner(void) | 181 | static void __init xen_banner(void) |
169 | { | 182 | { |
183 | unsigned version = HYPERVISOR_xen_version(XENVER_version, NULL); | ||
184 | struct xen_extraversion extra; | ||
185 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); | ||
186 | |||
170 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 187 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
171 | pv_info.name); | 188 | pv_info.name); |
172 | printk(KERN_INFO "Hypervisor signature: %s%s\n", | 189 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", |
173 | xen_start_info->magic, | 190 | version >> 16, version & 0xffff, extra.extraversion, |
174 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 191 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
175 | } | 192 | } |
176 | 193 | ||
@@ -363,14 +380,6 @@ static void load_TLS_descriptor(struct thread_struct *t, | |||
363 | 380 | ||
364 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) | 381 | static void xen_load_tls(struct thread_struct *t, unsigned int cpu) |
365 | { | 382 | { |
366 | xen_mc_batch(); | ||
367 | |||
368 | load_TLS_descriptor(t, cpu, 0); | ||
369 | load_TLS_descriptor(t, cpu, 1); | ||
370 | load_TLS_descriptor(t, cpu, 2); | ||
371 | |||
372 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
373 | |||
374 | /* | 383 | /* |
375 | * XXX sleazy hack: If we're being called in a lazy-cpu zone, | 384 | * XXX sleazy hack: If we're being called in a lazy-cpu zone, |
376 | * it means we're in a context switch, and %gs has just been | 385 | * it means we're in a context switch, and %gs has just been |
@@ -379,10 +388,39 @@ static void xen_load_tls(struct thread_struct *t, unsigned int cpu) | |||
379 | * Either way, it has been saved, and the new value will get | 388 | * Either way, it has been saved, and the new value will get |
380 | * loaded properly. This will go away as soon as Xen has been | 389 | * loaded properly. This will go away as soon as Xen has been |
381 | * modified to not save/restore %gs for normal hypercalls. | 390 | * modified to not save/restore %gs for normal hypercalls. |
391 | * | ||
392 | * On x86_64, this hack is not used for %gs, because gs points | ||
393 | * to KERNEL_GS_BASE (and uses it for PDA references), so we | ||
394 | * must not zero %gs on x86_64 | ||
395 | * | ||
396 | * For x86_64, we need to zero %fs, otherwise we may get an | ||
397 | * exception between the new %fs descriptor being loaded and | ||
398 | * %fs being effectively cleared at __switch_to(). | ||
382 | */ | 399 | */ |
383 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) | 400 | if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) { |
401 | #ifdef CONFIG_X86_32 | ||
384 | loadsegment(gs, 0); | 402 | loadsegment(gs, 0); |
403 | #else | ||
404 | loadsegment(fs, 0); | ||
405 | #endif | ||
406 | } | ||
407 | |||
408 | xen_mc_batch(); | ||
409 | |||
410 | load_TLS_descriptor(t, cpu, 0); | ||
411 | load_TLS_descriptor(t, cpu, 1); | ||
412 | load_TLS_descriptor(t, cpu, 2); | ||
413 | |||
414 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
415 | } | ||
416 | |||
417 | #ifdef CONFIG_X86_64 | ||
418 | static void xen_load_gs_index(unsigned int idx) | ||
419 | { | ||
420 | if (HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, idx)) | ||
421 | BUG(); | ||
385 | } | 422 | } |
423 | #endif | ||
386 | 424 | ||
387 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | 425 | static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, |
388 | const void *ptr) | 426 | const void *ptr) |
@@ -400,23 +438,18 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum, | |||
400 | preempt_enable(); | 438 | preempt_enable(); |
401 | } | 439 | } |
402 | 440 | ||
403 | static int cvt_gate_to_trap(int vector, u32 low, u32 high, | 441 | static int cvt_gate_to_trap(int vector, const gate_desc *val, |
404 | struct trap_info *info) | 442 | struct trap_info *info) |
405 | { | 443 | { |
406 | u8 type, dpl; | 444 | if (val->type != 0xf && val->type != 0xe) |
407 | |||
408 | type = (high >> 8) & 0x1f; | ||
409 | dpl = (high >> 13) & 3; | ||
410 | |||
411 | if (type != 0xf && type != 0xe) | ||
412 | return 0; | 445 | return 0; |
413 | 446 | ||
414 | info->vector = vector; | 447 | info->vector = vector; |
415 | info->address = (high & 0xffff0000) | (low & 0x0000ffff); | 448 | info->address = gate_offset(*val); |
416 | info->cs = low >> 16; | 449 | info->cs = gate_segment(*val); |
417 | info->flags = dpl; | 450 | info->flags = val->dpl; |
418 | /* interrupt gates clear IF */ | 451 | /* interrupt gates clear IF */ |
419 | if (type == 0xe) | 452 | if (val->type == 0xe) |
420 | info->flags |= 4; | 453 | info->flags |= 4; |
421 | 454 | ||
422 | return 1; | 455 | return 1; |
@@ -443,11 +476,10 @@ static void xen_write_idt_entry(gate_desc *dt, int entrynum, const gate_desc *g) | |||
443 | 476 | ||
444 | if (p >= start && (p + 8) <= end) { | 477 | if (p >= start && (p + 8) <= end) { |
445 | struct trap_info info[2]; | 478 | struct trap_info info[2]; |
446 | u32 *desc = (u32 *)g; | ||
447 | 479 | ||
448 | info[1].address = 0; | 480 | info[1].address = 0; |
449 | 481 | ||
450 | if (cvt_gate_to_trap(entrynum, desc[0], desc[1], &info[0])) | 482 | if (cvt_gate_to_trap(entrynum, g, &info[0])) |
451 | if (HYPERVISOR_set_trap_table(info)) | 483 | if (HYPERVISOR_set_trap_table(info)) |
452 | BUG(); | 484 | BUG(); |
453 | } | 485 | } |
@@ -460,13 +492,13 @@ static void xen_convert_trap_info(const struct desc_ptr *desc, | |||
460 | { | 492 | { |
461 | unsigned in, out, count; | 493 | unsigned in, out, count; |
462 | 494 | ||
463 | count = (desc->size+1) / 8; | 495 | count = (desc->size+1) / sizeof(gate_desc); |
464 | BUG_ON(count > 256); | 496 | BUG_ON(count > 256); |
465 | 497 | ||
466 | for (in = out = 0; in < count; in++) { | 498 | for (in = out = 0; in < count; in++) { |
467 | const u32 *entry = (u32 *)(desc->address + in * 8); | 499 | gate_desc *entry = (gate_desc*)(desc->address) + in; |
468 | 500 | ||
469 | if (cvt_gate_to_trap(in, entry[0], entry[1], &traps[out])) | 501 | if (cvt_gate_to_trap(in, entry, &traps[out])) |
470 | out++; | 502 | out++; |
471 | } | 503 | } |
472 | traps[out].address = 0; | 504 | traps[out].address = 0; |
@@ -695,33 +727,89 @@ static void set_current_cr3(void *v) | |||
695 | x86_write_percpu(xen_current_cr3, (unsigned long)v); | 727 | x86_write_percpu(xen_current_cr3, (unsigned long)v); |
696 | } | 728 | } |
697 | 729 | ||
698 | static void xen_write_cr3(unsigned long cr3) | 730 | static void __xen_write_cr3(bool kernel, unsigned long cr3) |
699 | { | 731 | { |
700 | struct mmuext_op *op; | 732 | struct mmuext_op *op; |
701 | struct multicall_space mcs; | 733 | struct multicall_space mcs; |
702 | unsigned long mfn = pfn_to_mfn(PFN_DOWN(cr3)); | 734 | unsigned long mfn; |
703 | 735 | ||
704 | BUG_ON(preemptible()); | 736 | if (cr3) |
737 | mfn = pfn_to_mfn(PFN_DOWN(cr3)); | ||
738 | else | ||
739 | mfn = 0; | ||
705 | 740 | ||
706 | mcs = xen_mc_entry(sizeof(*op)); /* disables interrupts */ | 741 | WARN_ON(mfn == 0 && kernel); |
707 | 742 | ||
708 | /* Update while interrupts are disabled, so its atomic with | 743 | mcs = __xen_mc_entry(sizeof(*op)); |
709 | respect to ipis */ | ||
710 | x86_write_percpu(xen_cr3, cr3); | ||
711 | 744 | ||
712 | op = mcs.args; | 745 | op = mcs.args; |
713 | op->cmd = MMUEXT_NEW_BASEPTR; | 746 | op->cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR; |
714 | op->arg1.mfn = mfn; | 747 | op->arg1.mfn = mfn; |
715 | 748 | ||
716 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); | 749 | MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); |
717 | 750 | ||
718 | /* Update xen_update_cr3 once the batch has actually | 751 | if (kernel) { |
719 | been submitted. */ | 752 | x86_write_percpu(xen_cr3, cr3); |
720 | xen_mc_callback(set_current_cr3, (void *)cr3); | 753 | |
754 | /* Update xen_current_cr3 once the batch has actually | ||
755 | been submitted. */ | ||
756 | xen_mc_callback(set_current_cr3, (void *)cr3); | ||
757 | } | ||
758 | } | ||
759 | |||
760 | static void xen_write_cr3(unsigned long cr3) | ||
761 | { | ||
762 | BUG_ON(preemptible()); | ||
763 | |||
764 | xen_mc_batch(); /* disables interrupts */ | ||
765 | |||
766 | /* Update while interrupts are disabled, so its atomic with | ||
767 | respect to ipis */ | ||
768 | x86_write_percpu(xen_cr3, cr3); | ||
769 | |||
770 | __xen_write_cr3(true, cr3); | ||
771 | |||
772 | #ifdef CONFIG_X86_64 | ||
773 | { | ||
774 | pgd_t *user_pgd = xen_get_user_pgd(__va(cr3)); | ||
775 | if (user_pgd) | ||
776 | __xen_write_cr3(false, __pa(user_pgd)); | ||
777 | else | ||
778 | __xen_write_cr3(false, 0); | ||
779 | } | ||
780 | #endif | ||
721 | 781 | ||
722 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ | 782 | xen_mc_issue(PARAVIRT_LAZY_CPU); /* interrupts restored */ |
723 | } | 783 | } |
724 | 784 | ||
785 | static int xen_write_msr_safe(unsigned int msr, unsigned low, unsigned high) | ||
786 | { | ||
787 | int ret; | ||
788 | |||
789 | ret = 0; | ||
790 | |||
791 | switch(msr) { | ||
792 | #ifdef CONFIG_X86_64 | ||
793 | unsigned which; | ||
794 | u64 base; | ||
795 | |||
796 | case MSR_FS_BASE: which = SEGBASE_FS; goto set; | ||
797 | case MSR_KERNEL_GS_BASE: which = SEGBASE_GS_USER; goto set; | ||
798 | case MSR_GS_BASE: which = SEGBASE_GS_KERNEL; goto set; | ||
799 | |||
800 | set: | ||
801 | base = ((u64)high << 32) | low; | ||
802 | if (HYPERVISOR_set_segment_base(which, base) != 0) | ||
803 | ret = -EFAULT; | ||
804 | break; | ||
805 | #endif | ||
806 | default: | ||
807 | ret = native_write_msr_safe(msr, low, high); | ||
808 | } | ||
809 | |||
810 | return ret; | ||
811 | } | ||
812 | |||
725 | /* Early in boot, while setting up the initial pagetable, assume | 813 | /* Early in boot, while setting up the initial pagetable, assume |
726 | everything is pinned. */ | 814 | everything is pinned. */ |
727 | static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) | 815 | static __init void xen_alloc_pte_init(struct mm_struct *mm, u32 pfn) |
@@ -778,6 +866,48 @@ static void xen_alloc_pmd(struct mm_struct *mm, u32 pfn) | |||
778 | xen_alloc_ptpage(mm, pfn, PT_PMD); | 866 | xen_alloc_ptpage(mm, pfn, PT_PMD); |
779 | } | 867 | } |
780 | 868 | ||
869 | static int xen_pgd_alloc(struct mm_struct *mm) | ||
870 | { | ||
871 | pgd_t *pgd = mm->pgd; | ||
872 | int ret = 0; | ||
873 | |||
874 | BUG_ON(PagePinned(virt_to_page(pgd))); | ||
875 | |||
876 | #ifdef CONFIG_X86_64 | ||
877 | { | ||
878 | struct page *page = virt_to_page(pgd); | ||
879 | pgd_t *user_pgd; | ||
880 | |||
881 | BUG_ON(page->private != 0); | ||
882 | |||
883 | ret = -ENOMEM; | ||
884 | |||
885 | user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); | ||
886 | page->private = (unsigned long)user_pgd; | ||
887 | |||
888 | if (user_pgd != NULL) { | ||
889 | user_pgd[pgd_index(VSYSCALL_START)] = | ||
890 | __pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE); | ||
891 | ret = 0; | ||
892 | } | ||
893 | |||
894 | BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd)))); | ||
895 | } | ||
896 | #endif | ||
897 | |||
898 | return ret; | ||
899 | } | ||
900 | |||
901 | static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd) | ||
902 | { | ||
903 | #ifdef CONFIG_X86_64 | ||
904 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
905 | |||
906 | if (user_pgd) | ||
907 | free_page((unsigned long)user_pgd); | ||
908 | #endif | ||
909 | } | ||
910 | |||
781 | /* This should never happen until we're OK to use struct page */ | 911 | /* This should never happen until we're OK to use struct page */ |
782 | static void xen_release_ptpage(u32 pfn, unsigned level) | 912 | static void xen_release_ptpage(u32 pfn, unsigned level) |
783 | { | 913 | { |
@@ -803,6 +933,18 @@ static void xen_release_pmd(u32 pfn) | |||
803 | xen_release_ptpage(pfn, PT_PMD); | 933 | xen_release_ptpage(pfn, PT_PMD); |
804 | } | 934 | } |
805 | 935 | ||
936 | #if PAGETABLE_LEVELS == 4 | ||
937 | static void xen_alloc_pud(struct mm_struct *mm, u32 pfn) | ||
938 | { | ||
939 | xen_alloc_ptpage(mm, pfn, PT_PUD); | ||
940 | } | ||
941 | |||
942 | static void xen_release_pud(u32 pfn) | ||
943 | { | ||
944 | xen_release_ptpage(pfn, PT_PUD); | ||
945 | } | ||
946 | #endif | ||
947 | |||
806 | #ifdef CONFIG_HIGHPTE | 948 | #ifdef CONFIG_HIGHPTE |
807 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) | 949 | static void *xen_kmap_atomic_pte(struct page *page, enum km_type type) |
808 | { | 950 | { |
@@ -841,68 +983,16 @@ static __init void xen_set_pte_init(pte_t *ptep, pte_t pte) | |||
841 | 983 | ||
842 | static __init void xen_pagetable_setup_start(pgd_t *base) | 984 | static __init void xen_pagetable_setup_start(pgd_t *base) |
843 | { | 985 | { |
844 | pgd_t *xen_pgd = (pgd_t *)xen_start_info->pt_base; | ||
845 | int i; | ||
846 | |||
847 | /* special set_pte for pagetable initialization */ | ||
848 | pv_mmu_ops.set_pte = xen_set_pte_init; | ||
849 | |||
850 | init_mm.pgd = base; | ||
851 | /* | ||
852 | * copy top-level of Xen-supplied pagetable into place. This | ||
853 | * is a stand-in while we copy the pmd pages. | ||
854 | */ | ||
855 | memcpy(base, xen_pgd, PTRS_PER_PGD * sizeof(pgd_t)); | ||
856 | |||
857 | /* | ||
858 | * For PAE, need to allocate new pmds, rather than | ||
859 | * share Xen's, since Xen doesn't like pmd's being | ||
860 | * shared between address spaces. | ||
861 | */ | ||
862 | for (i = 0; i < PTRS_PER_PGD; i++) { | ||
863 | if (pgd_val_ma(xen_pgd[i]) & _PAGE_PRESENT) { | ||
864 | pmd_t *pmd = (pmd_t *)alloc_bootmem_low_pages(PAGE_SIZE); | ||
865 | |||
866 | memcpy(pmd, (void *)pgd_page_vaddr(xen_pgd[i]), | ||
867 | PAGE_SIZE); | ||
868 | |||
869 | make_lowmem_page_readonly(pmd); | ||
870 | |||
871 | set_pgd(&base[i], __pgd(1 + __pa(pmd))); | ||
872 | } else | ||
873 | pgd_clear(&base[i]); | ||
874 | } | ||
875 | |||
876 | /* make sure zero_page is mapped RO so we can use it in pagetables */ | ||
877 | make_lowmem_page_readonly(empty_zero_page); | ||
878 | make_lowmem_page_readonly(base); | ||
879 | /* | ||
880 | * Switch to new pagetable. This is done before | ||
881 | * pagetable_init has done anything so that the new pages | ||
882 | * added to the table can be prepared properly for Xen. | ||
883 | */ | ||
884 | xen_write_cr3(__pa(base)); | ||
885 | |||
886 | /* Unpin initial Xen pagetable */ | ||
887 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, | ||
888 | PFN_DOWN(__pa(xen_start_info->pt_base))); | ||
889 | } | 986 | } |
890 | 987 | ||
891 | void xen_setup_shared_info(void) | 988 | void xen_setup_shared_info(void) |
892 | { | 989 | { |
893 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 990 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
894 | unsigned long addr = fix_to_virt(FIX_PARAVIRT_BOOTMAP); | 991 | set_fixmap(FIX_PARAVIRT_BOOTMAP, |
895 | 992 | xen_start_info->shared_info); | |
896 | /* | 993 | |
897 | * Create a mapping for the shared info page. | 994 | HYPERVISOR_shared_info = |
898 | * Should be set_fixmap(), but shared_info is a machine | 995 | (struct shared_info *)fix_to_virt(FIX_PARAVIRT_BOOTMAP); |
899 | * address with no corresponding pseudo-phys address. | ||
900 | */ | ||
901 | set_pte_mfn(addr, | ||
902 | PFN_DOWN(xen_start_info->shared_info), | ||
903 | PAGE_KERNEL); | ||
904 | |||
905 | HYPERVISOR_shared_info = (struct shared_info *)addr; | ||
906 | } else | 996 | } else |
907 | HYPERVISOR_shared_info = | 997 | HYPERVISOR_shared_info = |
908 | (struct shared_info *)__va(xen_start_info->shared_info); | 998 | (struct shared_info *)__va(xen_start_info->shared_info); |
@@ -917,26 +1007,32 @@ void xen_setup_shared_info(void) | |||
917 | 1007 | ||
918 | static __init void xen_pagetable_setup_done(pgd_t *base) | 1008 | static __init void xen_pagetable_setup_done(pgd_t *base) |
919 | { | 1009 | { |
920 | /* This will work as long as patching hasn't happened yet | ||
921 | (which it hasn't) */ | ||
922 | pv_mmu_ops.alloc_pte = xen_alloc_pte; | ||
923 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | ||
924 | pv_mmu_ops.release_pte = xen_release_pte; | ||
925 | pv_mmu_ops.release_pmd = xen_release_pmd; | ||
926 | pv_mmu_ops.set_pte = xen_set_pte; | ||
927 | |||
928 | xen_setup_shared_info(); | 1010 | xen_setup_shared_info(); |
929 | |||
930 | /* Actually pin the pagetable down, but we can't set PG_pinned | ||
931 | yet because the page structures don't exist yet. */ | ||
932 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(base))); | ||
933 | } | 1011 | } |
934 | 1012 | ||
935 | static __init void xen_post_allocator_init(void) | 1013 | static __init void xen_post_allocator_init(void) |
936 | { | 1014 | { |
1015 | pv_mmu_ops.set_pte = xen_set_pte; | ||
937 | pv_mmu_ops.set_pmd = xen_set_pmd; | 1016 | pv_mmu_ops.set_pmd = xen_set_pmd; |
938 | pv_mmu_ops.set_pud = xen_set_pud; | 1017 | pv_mmu_ops.set_pud = xen_set_pud; |
1018 | #if PAGETABLE_LEVELS == 4 | ||
1019 | pv_mmu_ops.set_pgd = xen_set_pgd; | ||
1020 | #endif | ||
1021 | |||
1022 | /* This will work as long as patching hasn't happened yet | ||
1023 | (which it hasn't) */ | ||
1024 | pv_mmu_ops.alloc_pte = xen_alloc_pte; | ||
1025 | pv_mmu_ops.alloc_pmd = xen_alloc_pmd; | ||
1026 | pv_mmu_ops.release_pte = xen_release_pte; | ||
1027 | pv_mmu_ops.release_pmd = xen_release_pmd; | ||
1028 | #if PAGETABLE_LEVELS == 4 | ||
1029 | pv_mmu_ops.alloc_pud = xen_alloc_pud; | ||
1030 | pv_mmu_ops.release_pud = xen_release_pud; | ||
1031 | #endif | ||
939 | 1032 | ||
1033 | #ifdef CONFIG_X86_64 | ||
1034 | SetPagePinned(virt_to_page(level3_user_vsyscall)); | ||
1035 | #endif | ||
940 | xen_mark_init_mm_pinned(); | 1036 | xen_mark_init_mm_pinned(); |
941 | } | 1037 | } |
942 | 1038 | ||
@@ -950,6 +1046,7 @@ void xen_setup_vcpu_info_placement(void) | |||
950 | 1046 | ||
951 | /* xen_vcpu_setup managed to place the vcpu_info within the | 1047 | /* xen_vcpu_setup managed to place the vcpu_info within the |
952 | percpu area for all cpus, so make use of it */ | 1048 | percpu area for all cpus, so make use of it */ |
1049 | #ifdef CONFIG_X86_32 | ||
953 | if (have_vcpu_info_placement) { | 1050 | if (have_vcpu_info_placement) { |
954 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); | 1051 | printk(KERN_INFO "Xen: using vcpu_info placement\n"); |
955 | 1052 | ||
@@ -959,6 +1056,7 @@ void xen_setup_vcpu_info_placement(void) | |||
959 | pv_irq_ops.irq_enable = xen_irq_enable_direct; | 1056 | pv_irq_ops.irq_enable = xen_irq_enable_direct; |
960 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; | 1057 | pv_mmu_ops.read_cr2 = xen_read_cr2_direct; |
961 | } | 1058 | } |
1059 | #endif | ||
962 | } | 1060 | } |
963 | 1061 | ||
964 | static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | 1062 | static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, |
@@ -979,10 +1077,12 @@ static unsigned xen_patch(u8 type, u16 clobbers, void *insnbuf, | |||
979 | goto patch_site | 1077 | goto patch_site |
980 | 1078 | ||
981 | switch (type) { | 1079 | switch (type) { |
1080 | #ifdef CONFIG_X86_32 | ||
982 | SITE(pv_irq_ops, irq_enable); | 1081 | SITE(pv_irq_ops, irq_enable); |
983 | SITE(pv_irq_ops, irq_disable); | 1082 | SITE(pv_irq_ops, irq_disable); |
984 | SITE(pv_irq_ops, save_fl); | 1083 | SITE(pv_irq_ops, save_fl); |
985 | SITE(pv_irq_ops, restore_fl); | 1084 | SITE(pv_irq_ops, restore_fl); |
1085 | #endif /* CONFIG_X86_32 */ | ||
986 | #undef SITE | 1086 | #undef SITE |
987 | 1087 | ||
988 | patch_site: | 1088 | patch_site: |
@@ -1025,8 +1125,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) | |||
1025 | #ifdef CONFIG_X86_F00F_BUG | 1125 | #ifdef CONFIG_X86_F00F_BUG |
1026 | case FIX_F00F_IDT: | 1126 | case FIX_F00F_IDT: |
1027 | #endif | 1127 | #endif |
1128 | #ifdef CONFIG_X86_32 | ||
1028 | case FIX_WP_TEST: | 1129 | case FIX_WP_TEST: |
1029 | case FIX_VDSO: | 1130 | case FIX_VDSO: |
1131 | # ifdef CONFIG_HIGHMEM | ||
1132 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: | ||
1133 | # endif | ||
1134 | #else | ||
1135 | case VSYSCALL_LAST_PAGE ... VSYSCALL_FIRST_PAGE: | ||
1136 | #endif | ||
1030 | #ifdef CONFIG_X86_LOCAL_APIC | 1137 | #ifdef CONFIG_X86_LOCAL_APIC |
1031 | case FIX_APIC_BASE: /* maps dummy local APIC */ | 1138 | case FIX_APIC_BASE: /* maps dummy local APIC */ |
1032 | #endif | 1139 | #endif |
@@ -1039,6 +1146,15 @@ static void xen_set_fixmap(unsigned idx, unsigned long phys, pgprot_t prot) | |||
1039 | } | 1146 | } |
1040 | 1147 | ||
1041 | __native_set_fixmap(idx, pte); | 1148 | __native_set_fixmap(idx, pte); |
1149 | |||
1150 | #ifdef CONFIG_X86_64 | ||
1151 | /* Replicate changes to map the vsyscall page into the user | ||
1152 | pagetable vsyscall mapping. */ | ||
1153 | if (idx >= VSYSCALL_LAST_PAGE && idx <= VSYSCALL_FIRST_PAGE) { | ||
1154 | unsigned long vaddr = __fix_to_virt(idx); | ||
1155 | set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte); | ||
1156 | } | ||
1157 | #endif | ||
1042 | } | 1158 | } |
1043 | 1159 | ||
1044 | static const struct pv_info xen_info __initdata = { | 1160 | static const struct pv_info xen_info __initdata = { |
@@ -1084,18 +1200,25 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1084 | .wbinvd = native_wbinvd, | 1200 | .wbinvd = native_wbinvd, |
1085 | 1201 | ||
1086 | .read_msr = native_read_msr_safe, | 1202 | .read_msr = native_read_msr_safe, |
1087 | .write_msr = native_write_msr_safe, | 1203 | .write_msr = xen_write_msr_safe, |
1088 | .read_tsc = native_read_tsc, | 1204 | .read_tsc = native_read_tsc, |
1089 | .read_pmc = native_read_pmc, | 1205 | .read_pmc = native_read_pmc, |
1090 | 1206 | ||
1091 | .iret = xen_iret, | 1207 | .iret = xen_iret, |
1092 | .irq_enable_sysexit = xen_sysexit, | 1208 | .irq_enable_sysexit = xen_sysexit, |
1209 | #ifdef CONFIG_X86_64 | ||
1210 | .usergs_sysret32 = xen_sysret32, | ||
1211 | .usergs_sysret64 = xen_sysret64, | ||
1212 | #endif | ||
1093 | 1213 | ||
1094 | .load_tr_desc = paravirt_nop, | 1214 | .load_tr_desc = paravirt_nop, |
1095 | .set_ldt = xen_set_ldt, | 1215 | .set_ldt = xen_set_ldt, |
1096 | .load_gdt = xen_load_gdt, | 1216 | .load_gdt = xen_load_gdt, |
1097 | .load_idt = xen_load_idt, | 1217 | .load_idt = xen_load_idt, |
1098 | .load_tls = xen_load_tls, | 1218 | .load_tls = xen_load_tls, |
1219 | #ifdef CONFIG_X86_64 | ||
1220 | .load_gs_index = xen_load_gs_index, | ||
1221 | #endif | ||
1099 | 1222 | ||
1100 | .store_gdt = native_store_gdt, | 1223 | .store_gdt = native_store_gdt, |
1101 | .store_idt = native_store_idt, | 1224 | .store_idt = native_store_idt, |
@@ -1109,14 +1232,34 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = { | |||
1109 | .set_iopl_mask = xen_set_iopl_mask, | 1232 | .set_iopl_mask = xen_set_iopl_mask, |
1110 | .io_delay = xen_io_delay, | 1233 | .io_delay = xen_io_delay, |
1111 | 1234 | ||
1235 | /* Xen takes care of %gs when switching to usermode for us */ | ||
1236 | .swapgs = paravirt_nop, | ||
1237 | |||
1112 | .lazy_mode = { | 1238 | .lazy_mode = { |
1113 | .enter = paravirt_enter_lazy_cpu, | 1239 | .enter = paravirt_enter_lazy_cpu, |
1114 | .leave = xen_leave_lazy, | 1240 | .leave = xen_leave_lazy, |
1115 | }, | 1241 | }, |
1116 | }; | 1242 | }; |
1117 | 1243 | ||
1244 | static void __init __xen_init_IRQ(void) | ||
1245 | { | ||
1246 | #ifdef CONFIG_X86_64 | ||
1247 | int i; | ||
1248 | |||
1249 | /* Create identity vector->irq map */ | ||
1250 | for(i = 0; i < NR_VECTORS; i++) { | ||
1251 | int cpu; | ||
1252 | |||
1253 | for_each_possible_cpu(cpu) | ||
1254 | per_cpu(vector_irq, cpu)[i] = i; | ||
1255 | } | ||
1256 | #endif /* CONFIG_X86_64 */ | ||
1257 | |||
1258 | xen_init_IRQ(); | ||
1259 | } | ||
1260 | |||
1118 | static const struct pv_irq_ops xen_irq_ops __initdata = { | 1261 | static const struct pv_irq_ops xen_irq_ops __initdata = { |
1119 | .init_IRQ = xen_init_IRQ, | 1262 | .init_IRQ = __xen_init_IRQ, |
1120 | .save_fl = xen_save_fl, | 1263 | .save_fl = xen_save_fl, |
1121 | .restore_fl = xen_restore_fl, | 1264 | .restore_fl = xen_restore_fl, |
1122 | .irq_disable = xen_irq_disable, | 1265 | .irq_disable = xen_irq_disable, |
@@ -1124,14 +1267,13 @@ static const struct pv_irq_ops xen_irq_ops __initdata = { | |||
1124 | .safe_halt = xen_safe_halt, | 1267 | .safe_halt = xen_safe_halt, |
1125 | .halt = xen_halt, | 1268 | .halt = xen_halt, |
1126 | #ifdef CONFIG_X86_64 | 1269 | #ifdef CONFIG_X86_64 |
1127 | .adjust_exception_frame = paravirt_nop, | 1270 | .adjust_exception_frame = xen_adjust_exception_frame, |
1128 | #endif | 1271 | #endif |
1129 | }; | 1272 | }; |
1130 | 1273 | ||
1131 | static const struct pv_apic_ops xen_apic_ops __initdata = { | 1274 | static const struct pv_apic_ops xen_apic_ops __initdata = { |
1132 | #ifdef CONFIG_X86_LOCAL_APIC | 1275 | #ifdef CONFIG_X86_LOCAL_APIC |
1133 | .apic_write = xen_apic_write, | 1276 | .apic_write = xen_apic_write, |
1134 | .apic_write_atomic = xen_apic_write, | ||
1135 | .apic_read = xen_apic_read, | 1277 | .apic_read = xen_apic_read, |
1136 | .setup_boot_clock = paravirt_nop, | 1278 | .setup_boot_clock = paravirt_nop, |
1137 | .setup_secondary_clock = paravirt_nop, | 1279 | .setup_secondary_clock = paravirt_nop, |
@@ -1157,8 +1299,8 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1157 | .pte_update = paravirt_nop, | 1299 | .pte_update = paravirt_nop, |
1158 | .pte_update_defer = paravirt_nop, | 1300 | .pte_update_defer = paravirt_nop, |
1159 | 1301 | ||
1160 | .pgd_alloc = __paravirt_pgd_alloc, | 1302 | .pgd_alloc = xen_pgd_alloc, |
1161 | .pgd_free = paravirt_nop, | 1303 | .pgd_free = xen_pgd_free, |
1162 | 1304 | ||
1163 | .alloc_pte = xen_alloc_pte_init, | 1305 | .alloc_pte = xen_alloc_pte_init, |
1164 | .release_pte = xen_release_pte_init, | 1306 | .release_pte = xen_release_pte_init, |
@@ -1170,7 +1312,11 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1170 | .kmap_atomic_pte = xen_kmap_atomic_pte, | 1312 | .kmap_atomic_pte = xen_kmap_atomic_pte, |
1171 | #endif | 1313 | #endif |
1172 | 1314 | ||
1173 | .set_pte = NULL, /* see xen_pagetable_setup_* */ | 1315 | #ifdef CONFIG_X86_64 |
1316 | .set_pte = xen_set_pte, | ||
1317 | #else | ||
1318 | .set_pte = xen_set_pte_init, | ||
1319 | #endif | ||
1174 | .set_pte_at = xen_set_pte_at, | 1320 | .set_pte_at = xen_set_pte_at, |
1175 | .set_pmd = xen_set_pmd_hyper, | 1321 | .set_pmd = xen_set_pmd_hyper, |
1176 | 1322 | ||
@@ -1178,21 +1324,32 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1178 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, | 1324 | .ptep_modify_prot_commit = __ptep_modify_prot_commit, |
1179 | 1325 | ||
1180 | .pte_val = xen_pte_val, | 1326 | .pte_val = xen_pte_val, |
1181 | .pte_flags = native_pte_val, | 1327 | .pte_flags = native_pte_flags, |
1182 | .pgd_val = xen_pgd_val, | 1328 | .pgd_val = xen_pgd_val, |
1183 | 1329 | ||
1184 | .make_pte = xen_make_pte, | 1330 | .make_pte = xen_make_pte, |
1185 | .make_pgd = xen_make_pgd, | 1331 | .make_pgd = xen_make_pgd, |
1186 | 1332 | ||
1333 | #ifdef CONFIG_X86_PAE | ||
1187 | .set_pte_atomic = xen_set_pte_atomic, | 1334 | .set_pte_atomic = xen_set_pte_atomic, |
1188 | .set_pte_present = xen_set_pte_at, | 1335 | .set_pte_present = xen_set_pte_at, |
1189 | .set_pud = xen_set_pud_hyper, | ||
1190 | .pte_clear = xen_pte_clear, | 1336 | .pte_clear = xen_pte_clear, |
1191 | .pmd_clear = xen_pmd_clear, | 1337 | .pmd_clear = xen_pmd_clear, |
1338 | #endif /* CONFIG_X86_PAE */ | ||
1339 | .set_pud = xen_set_pud_hyper, | ||
1192 | 1340 | ||
1193 | .make_pmd = xen_make_pmd, | 1341 | .make_pmd = xen_make_pmd, |
1194 | .pmd_val = xen_pmd_val, | 1342 | .pmd_val = xen_pmd_val, |
1195 | 1343 | ||
1344 | #if PAGETABLE_LEVELS == 4 | ||
1345 | .pud_val = xen_pud_val, | ||
1346 | .make_pud = xen_make_pud, | ||
1347 | .set_pgd = xen_set_pgd_hyper, | ||
1348 | |||
1349 | .alloc_pud = xen_alloc_pte_init, | ||
1350 | .release_pud = xen_release_pte_init, | ||
1351 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
1352 | |||
1196 | .activate_mm = xen_activate_mm, | 1353 | .activate_mm = xen_activate_mm, |
1197 | .dup_mmap = xen_dup_mmap, | 1354 | .dup_mmap = xen_dup_mmap, |
1198 | .exit_mmap = xen_exit_mmap, | 1355 | .exit_mmap = xen_exit_mmap, |
@@ -1205,21 +1362,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = { | |||
1205 | .set_fixmap = xen_set_fixmap, | 1362 | .set_fixmap = xen_set_fixmap, |
1206 | }; | 1363 | }; |
1207 | 1364 | ||
1208 | #ifdef CONFIG_SMP | ||
1209 | static const struct smp_ops xen_smp_ops __initdata = { | ||
1210 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | ||
1211 | .smp_prepare_cpus = xen_smp_prepare_cpus, | ||
1212 | .cpu_up = xen_cpu_up, | ||
1213 | .smp_cpus_done = xen_smp_cpus_done, | ||
1214 | |||
1215 | .smp_send_stop = xen_smp_send_stop, | ||
1216 | .smp_send_reschedule = xen_smp_send_reschedule, | ||
1217 | |||
1218 | .send_call_func_ipi = xen_smp_send_call_function_ipi, | ||
1219 | .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, | ||
1220 | }; | ||
1221 | #endif /* CONFIG_SMP */ | ||
1222 | |||
1223 | static void xen_reboot(int reason) | 1365 | static void xen_reboot(int reason) |
1224 | { | 1366 | { |
1225 | struct sched_shutdown r = { .reason = reason }; | 1367 | struct sched_shutdown r = { .reason = reason }; |
@@ -1264,6 +1406,7 @@ static const struct machine_ops __initdata xen_machine_ops = { | |||
1264 | 1406 | ||
1265 | static void __init xen_reserve_top(void) | 1407 | static void __init xen_reserve_top(void) |
1266 | { | 1408 | { |
1409 | #ifdef CONFIG_X86_32 | ||
1267 | unsigned long top = HYPERVISOR_VIRT_START; | 1410 | unsigned long top = HYPERVISOR_VIRT_START; |
1268 | struct xen_platform_parameters pp; | 1411 | struct xen_platform_parameters pp; |
1269 | 1412 | ||
@@ -1271,8 +1414,248 @@ static void __init xen_reserve_top(void) | |||
1271 | top = pp.virt_start; | 1414 | top = pp.virt_start; |
1272 | 1415 | ||
1273 | reserve_top_address(-top + 2 * PAGE_SIZE); | 1416 | reserve_top_address(-top + 2 * PAGE_SIZE); |
1417 | #endif /* CONFIG_X86_32 */ | ||
1418 | } | ||
1419 | |||
1420 | /* | ||
1421 | * Like __va(), but returns address in the kernel mapping (which is | ||
1422 | * all we have until the physical memory mapping has been set up. | ||
1423 | */ | ||
1424 | static void *__ka(phys_addr_t paddr) | ||
1425 | { | ||
1426 | #ifdef CONFIG_X86_64 | ||
1427 | return (void *)(paddr + __START_KERNEL_map); | ||
1428 | #else | ||
1429 | return __va(paddr); | ||
1430 | #endif | ||
1274 | } | 1431 | } |
1275 | 1432 | ||
1433 | /* Convert a machine address to physical address */ | ||
1434 | static unsigned long m2p(phys_addr_t maddr) | ||
1435 | { | ||
1436 | phys_addr_t paddr; | ||
1437 | |||
1438 | maddr &= PTE_PFN_MASK; | ||
1439 | paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT; | ||
1440 | |||
1441 | return paddr; | ||
1442 | } | ||
1443 | |||
1444 | /* Convert a machine address to kernel virtual */ | ||
1445 | static void *m2v(phys_addr_t maddr) | ||
1446 | { | ||
1447 | return __ka(m2p(maddr)); | ||
1448 | } | ||
1449 | |||
1450 | #ifdef CONFIG_X86_64 | ||
1451 | static void walk(pgd_t *pgd, unsigned long addr) | ||
1452 | { | ||
1453 | unsigned l4idx = pgd_index(addr); | ||
1454 | unsigned l3idx = pud_index(addr); | ||
1455 | unsigned l2idx = pmd_index(addr); | ||
1456 | unsigned l1idx = pte_index(addr); | ||
1457 | pgd_t l4; | ||
1458 | pud_t l3; | ||
1459 | pmd_t l2; | ||
1460 | pte_t l1; | ||
1461 | |||
1462 | xen_raw_printk("walk %p, %lx -> %d %d %d %d\n", | ||
1463 | pgd, addr, l4idx, l3idx, l2idx, l1idx); | ||
1464 | |||
1465 | l4 = pgd[l4idx]; | ||
1466 | xen_raw_printk(" l4: %016lx\n", l4.pgd); | ||
1467 | xen_raw_printk(" %016lx\n", pgd_val(l4)); | ||
1468 | |||
1469 | l3 = ((pud_t *)(m2v(l4.pgd)))[l3idx]; | ||
1470 | xen_raw_printk(" l3: %016lx\n", l3.pud); | ||
1471 | xen_raw_printk(" %016lx\n", pud_val(l3)); | ||
1472 | |||
1473 | l2 = ((pmd_t *)(m2v(l3.pud)))[l2idx]; | ||
1474 | xen_raw_printk(" l2: %016lx\n", l2.pmd); | ||
1475 | xen_raw_printk(" %016lx\n", pmd_val(l2)); | ||
1476 | |||
1477 | l1 = ((pte_t *)(m2v(l2.pmd)))[l1idx]; | ||
1478 | xen_raw_printk(" l1: %016lx\n", l1.pte); | ||
1479 | xen_raw_printk(" %016lx\n", pte_val(l1)); | ||
1480 | } | ||
1481 | #endif | ||
1482 | |||
1483 | static void set_page_prot(void *addr, pgprot_t prot) | ||
1484 | { | ||
1485 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | ||
1486 | pte_t pte = pfn_pte(pfn, prot); | ||
1487 | |||
1488 | xen_raw_printk("addr=%p pfn=%lx mfn=%lx prot=%016llx pte=%016llx\n", | ||
1489 | addr, pfn, get_phys_to_machine(pfn), | ||
1490 | pgprot_val(prot), pte.pte); | ||
1491 | |||
1492 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, 0)) | ||
1493 | BUG(); | ||
1494 | } | ||
1495 | |||
1496 | static __init void xen_map_identity_early(pmd_t *pmd, unsigned long max_pfn) | ||
1497 | { | ||
1498 | unsigned pmdidx, pteidx; | ||
1499 | unsigned ident_pte; | ||
1500 | unsigned long pfn; | ||
1501 | |||
1502 | ident_pte = 0; | ||
1503 | pfn = 0; | ||
1504 | for(pmdidx = 0; pmdidx < PTRS_PER_PMD && pfn < max_pfn; pmdidx++) { | ||
1505 | pte_t *pte_page; | ||
1506 | |||
1507 | /* Reuse or allocate a page of ptes */ | ||
1508 | if (pmd_present(pmd[pmdidx])) | ||
1509 | pte_page = m2v(pmd[pmdidx].pmd); | ||
1510 | else { | ||
1511 | /* Check for free pte pages */ | ||
1512 | if (ident_pte == ARRAY_SIZE(level1_ident_pgt)) | ||
1513 | break; | ||
1514 | |||
1515 | pte_page = &level1_ident_pgt[ident_pte]; | ||
1516 | ident_pte += PTRS_PER_PTE; | ||
1517 | |||
1518 | pmd[pmdidx] = __pmd(__pa(pte_page) | _PAGE_TABLE); | ||
1519 | } | ||
1520 | |||
1521 | /* Install mappings */ | ||
1522 | for(pteidx = 0; pteidx < PTRS_PER_PTE; pteidx++, pfn++) { | ||
1523 | pte_t pte; | ||
1524 | |||
1525 | if (pfn > max_pfn_mapped) | ||
1526 | max_pfn_mapped = pfn; | ||
1527 | |||
1528 | if (!pte_none(pte_page[pteidx])) | ||
1529 | continue; | ||
1530 | |||
1531 | pte = pfn_pte(pfn, PAGE_KERNEL_EXEC); | ||
1532 | pte_page[pteidx] = pte; | ||
1533 | } | ||
1534 | } | ||
1535 | |||
1536 | for(pteidx = 0; pteidx < ident_pte; pteidx += PTRS_PER_PTE) | ||
1537 | set_page_prot(&level1_ident_pgt[pteidx], PAGE_KERNEL_RO); | ||
1538 | |||
1539 | set_page_prot(pmd, PAGE_KERNEL_RO); | ||
1540 | } | ||
1541 | |||
1542 | #ifdef CONFIG_X86_64 | ||
1543 | static void convert_pfn_mfn(void *v) | ||
1544 | { | ||
1545 | pte_t *pte = v; | ||
1546 | int i; | ||
1547 | |||
1548 | /* All levels are converted the same way, so just treat them | ||
1549 | as ptes. */ | ||
1550 | for(i = 0; i < PTRS_PER_PTE; i++) | ||
1551 | pte[i] = xen_make_pte(pte[i].pte); | ||
1552 | } | ||
1553 | |||
1554 | /* | ||
1555 | * Set up the inital kernel pagetable. | ||
1556 | * | ||
1557 | * We can construct this by grafting the Xen provided pagetable into | ||
1558 | * head_64.S's preconstructed pagetables. We copy the Xen L2's into | ||
1559 | * level2_ident_pgt, level2_kernel_pgt and level2_fixmap_pgt. This | ||
1560 | * means that only the kernel has a physical mapping to start with - | ||
1561 | * but that's enough to get __va working. We need to fill in the rest | ||
1562 | * of the physical mapping once some sort of allocator has been set | ||
1563 | * up. | ||
1564 | */ | ||
1565 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | ||
1566 | { | ||
1567 | pud_t *l3; | ||
1568 | pmd_t *l2; | ||
1569 | |||
1570 | /* Zap identity mapping */ | ||
1571 | init_level4_pgt[0] = __pgd(0); | ||
1572 | |||
1573 | /* Pre-constructed entries are in pfn, so convert to mfn */ | ||
1574 | convert_pfn_mfn(init_level4_pgt); | ||
1575 | convert_pfn_mfn(level3_ident_pgt); | ||
1576 | convert_pfn_mfn(level3_kernel_pgt); | ||
1577 | |||
1578 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | ||
1579 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | ||
1580 | |||
1581 | memcpy(level2_ident_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1582 | memcpy(level2_kernel_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1583 | |||
1584 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map + PMD_SIZE)].pgd); | ||
1585 | l2 = m2v(l3[pud_index(__START_KERNEL_map + PMD_SIZE)].pud); | ||
1586 | memcpy(level2_fixmap_pgt, l2, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1587 | |||
1588 | /* Set up identity map */ | ||
1589 | xen_map_identity_early(level2_ident_pgt, max_pfn); | ||
1590 | |||
1591 | /* Make pagetable pieces RO */ | ||
1592 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | ||
1593 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | ||
1594 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | ||
1595 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | ||
1596 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1597 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | ||
1598 | |||
1599 | /* Pin down new L4 */ | ||
1600 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1601 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1602 | |||
1603 | /* Unpin Xen-provided one */ | ||
1604 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1605 | |||
1606 | /* Switch over */ | ||
1607 | pgd = init_level4_pgt; | ||
1608 | |||
1609 | /* | ||
1610 | * At this stage there can be no user pgd, and no page | ||
1611 | * structure to attach it to, so make sure we just set kernel | ||
1612 | * pgd. | ||
1613 | */ | ||
1614 | xen_mc_batch(); | ||
1615 | __xen_write_cr3(true, __pa(pgd)); | ||
1616 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1617 | |||
1618 | reserve_early(__pa(xen_start_info->pt_base), | ||
1619 | __pa(xen_start_info->pt_base + | ||
1620 | xen_start_info->nr_pt_frames * PAGE_SIZE), | ||
1621 | "XEN PAGETABLES"); | ||
1622 | |||
1623 | return pgd; | ||
1624 | } | ||
1625 | #else /* !CONFIG_X86_64 */ | ||
1626 | static pmd_t level2_kernel_pgt[PTRS_PER_PMD] __page_aligned_bss; | ||
1627 | |||
1628 | static __init pgd_t *xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | ||
1629 | { | ||
1630 | pmd_t *kernel_pmd; | ||
1631 | |||
1632 | init_pg_tables_start = __pa(pgd); | ||
1633 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | ||
1634 | max_pfn_mapped = PFN_DOWN(init_pg_tables_end + 512*1024); | ||
1635 | |||
1636 | kernel_pmd = m2v(pgd[KERNEL_PGD_BOUNDARY].pgd); | ||
1637 | memcpy(level2_kernel_pgt, kernel_pmd, sizeof(pmd_t) * PTRS_PER_PMD); | ||
1638 | |||
1639 | xen_map_identity_early(level2_kernel_pgt, max_pfn); | ||
1640 | |||
1641 | memcpy(swapper_pg_dir, pgd, sizeof(pgd_t) * PTRS_PER_PGD); | ||
1642 | set_pgd(&swapper_pg_dir[KERNEL_PGD_BOUNDARY], | ||
1643 | __pgd(__pa(level2_kernel_pgt) | _PAGE_PRESENT)); | ||
1644 | |||
1645 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1646 | set_page_prot(swapper_pg_dir, PAGE_KERNEL_RO); | ||
1647 | set_page_prot(empty_zero_page, PAGE_KERNEL_RO); | ||
1648 | |||
1649 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1650 | |||
1651 | xen_write_cr3(__pa(swapper_pg_dir)); | ||
1652 | |||
1653 | pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(swapper_pg_dir))); | ||
1654 | |||
1655 | return swapper_pg_dir; | ||
1656 | } | ||
1657 | #endif /* CONFIG_X86_64 */ | ||
1658 | |||
1276 | /* First C function to be called on Xen boot */ | 1659 | /* First C function to be called on Xen boot */ |
1277 | asmlinkage void __init xen_start_kernel(void) | 1660 | asmlinkage void __init xen_start_kernel(void) |
1278 | { | 1661 | { |
@@ -1301,53 +1684,56 @@ asmlinkage void __init xen_start_kernel(void) | |||
1301 | 1684 | ||
1302 | machine_ops = xen_machine_ops; | 1685 | machine_ops = xen_machine_ops; |
1303 | 1686 | ||
1304 | #ifdef CONFIG_SMP | 1687 | #ifdef CONFIG_X86_64 |
1305 | smp_ops = xen_smp_ops; | 1688 | /* Disable until direct per-cpu data access. */ |
1689 | have_vcpu_info_placement = 0; | ||
1690 | x86_64_init_pda(); | ||
1306 | #endif | 1691 | #endif |
1307 | 1692 | ||
1693 | xen_smp_init(); | ||
1694 | |||
1308 | /* Get mfn list */ | 1695 | /* Get mfn list */ |
1309 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1696 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
1310 | xen_build_dynamic_phys_to_machine(); | 1697 | xen_build_dynamic_phys_to_machine(); |
1311 | 1698 | ||
1312 | pgd = (pgd_t *)xen_start_info->pt_base; | 1699 | pgd = (pgd_t *)xen_start_info->pt_base; |
1313 | 1700 | ||
1314 | init_pg_tables_start = __pa(pgd); | 1701 | /* Prevent unwanted bits from being set in PTEs. */ |
1315 | init_pg_tables_end = __pa(pgd) + xen_start_info->nr_pt_frames*PAGE_SIZE; | 1702 | __supported_pte_mask &= ~_PAGE_GLOBAL; |
1316 | max_pfn_mapped = (init_pg_tables_end + 512*1024) >> PAGE_SHIFT; | 1703 | if (!is_initial_xendomain()) |
1317 | 1704 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | |
1318 | init_mm.pgd = pgd; /* use the Xen pagetables to start */ | ||
1319 | |||
1320 | /* keep using Xen gdt for now; no urgent need to change it */ | ||
1321 | |||
1322 | x86_write_percpu(xen_cr3, __pa(pgd)); | ||
1323 | x86_write_percpu(xen_current_cr3, __pa(pgd)); | ||
1324 | 1705 | ||
1325 | /* Don't do the full vcpu_info placement stuff until we have a | 1706 | /* Don't do the full vcpu_info placement stuff until we have a |
1326 | possible map and a non-dummy shared_info. */ | 1707 | possible map and a non-dummy shared_info. */ |
1327 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; | 1708 | per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; |
1328 | 1709 | ||
1710 | xen_raw_console_write("mapping kernel into physical memory\n"); | ||
1711 | pgd = xen_setup_kernel_pagetable(pgd, xen_start_info->nr_pages); | ||
1712 | |||
1713 | init_mm.pgd = pgd; | ||
1714 | |||
1715 | /* keep using Xen gdt for now; no urgent need to change it */ | ||
1716 | |||
1329 | pv_info.kernel_rpl = 1; | 1717 | pv_info.kernel_rpl = 1; |
1330 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) | 1718 | if (xen_feature(XENFEAT_supervisor_mode_kernel)) |
1331 | pv_info.kernel_rpl = 0; | 1719 | pv_info.kernel_rpl = 0; |
1332 | 1720 | ||
1333 | /* Prevent unwanted bits from being set in PTEs. */ | ||
1334 | __supported_pte_mask &= ~_PAGE_GLOBAL; | ||
1335 | if (!is_initial_xendomain()) | ||
1336 | __supported_pte_mask &= ~(_PAGE_PWT | _PAGE_PCD); | ||
1337 | |||
1338 | /* set the limit of our address space */ | 1721 | /* set the limit of our address space */ |
1339 | xen_reserve_top(); | 1722 | xen_reserve_top(); |
1340 | 1723 | ||
1724 | #ifdef CONFIG_X86_32 | ||
1341 | /* set up basic CPUID stuff */ | 1725 | /* set up basic CPUID stuff */ |
1342 | cpu_detect(&new_cpu_data); | 1726 | cpu_detect(&new_cpu_data); |
1343 | new_cpu_data.hard_math = 1; | 1727 | new_cpu_data.hard_math = 1; |
1344 | new_cpu_data.x86_capability[0] = cpuid_edx(1); | 1728 | new_cpu_data.x86_capability[0] = cpuid_edx(1); |
1729 | #endif | ||
1345 | 1730 | ||
1346 | /* Poke various useful things into boot_params */ | 1731 | /* Poke various useful things into boot_params */ |
1347 | boot_params.hdr.type_of_loader = (9 << 4) | 0; | 1732 | boot_params.hdr.type_of_loader = (9 << 4) | 0; |
1348 | boot_params.hdr.ramdisk_image = xen_start_info->mod_start | 1733 | boot_params.hdr.ramdisk_image = xen_start_info->mod_start |
1349 | ? __pa(xen_start_info->mod_start) : 0; | 1734 | ? __pa(xen_start_info->mod_start) : 0; |
1350 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; | 1735 | boot_params.hdr.ramdisk_size = xen_start_info->mod_len; |
1736 | boot_params.hdr.cmd_line_ptr = __pa(xen_start_info->cmd_line); | ||
1351 | 1737 | ||
1352 | if (!is_initial_xendomain()) { | 1738 | if (!is_initial_xendomain()) { |
1353 | add_preferred_console("xenboot", 0, NULL); | 1739 | add_preferred_console("xenboot", 0, NULL); |
@@ -1355,6 +1741,21 @@ asmlinkage void __init xen_start_kernel(void) | |||
1355 | add_preferred_console("hvc", 0, NULL); | 1741 | add_preferred_console("hvc", 0, NULL); |
1356 | } | 1742 | } |
1357 | 1743 | ||
1744 | xen_raw_console_write("about to get started...\n"); | ||
1745 | |||
1746 | #if 0 | ||
1747 | xen_raw_printk("&boot_params=%p __pa(&boot_params)=%lx __va(__pa(&boot_params))=%lx\n", | ||
1748 | &boot_params, __pa_symbol(&boot_params), | ||
1749 | __va(__pa_symbol(&boot_params))); | ||
1750 | |||
1751 | walk(pgd, &boot_params); | ||
1752 | walk(pgd, __va(__pa(&boot_params))); | ||
1753 | #endif | ||
1754 | |||
1358 | /* Start the world */ | 1755 | /* Start the world */ |
1756 | #ifdef CONFIG_X86_32 | ||
1359 | i386_start_kernel(); | 1757 | i386_start_kernel(); |
1758 | #else | ||
1759 | x86_64_start_reservations((char *)__pa_symbol(&boot_params)); | ||
1760 | #endif | ||
1360 | } | 1761 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ff0aa74afaa1..aa37469da696 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -44,8 +44,10 @@ | |||
44 | 44 | ||
45 | #include <asm/pgtable.h> | 45 | #include <asm/pgtable.h> |
46 | #include <asm/tlbflush.h> | 46 | #include <asm/tlbflush.h> |
47 | #include <asm/fixmap.h> | ||
47 | #include <asm/mmu_context.h> | 48 | #include <asm/mmu_context.h> |
48 | #include <asm/paravirt.h> | 49 | #include <asm/paravirt.h> |
50 | #include <asm/linkage.h> | ||
49 | 51 | ||
50 | #include <asm/xen/hypercall.h> | 52 | #include <asm/xen/hypercall.h> |
51 | #include <asm/xen/hypervisor.h> | 53 | #include <asm/xen/hypervisor.h> |
@@ -56,26 +58,29 @@ | |||
56 | #include "multicalls.h" | 58 | #include "multicalls.h" |
57 | #include "mmu.h" | 59 | #include "mmu.h" |
58 | 60 | ||
61 | /* | ||
62 | * Just beyond the highest usermode address. STACK_TOP_MAX has a | ||
63 | * redzone above it, so round it up to a PGD boundary. | ||
64 | */ | ||
65 | #define USER_LIMIT ((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK) | ||
66 | |||
67 | |||
59 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) | 68 | #define P2M_ENTRIES_PER_PAGE (PAGE_SIZE / sizeof(unsigned long)) |
60 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) | 69 | #define TOP_ENTRIES (MAX_DOMAIN_PAGES / P2M_ENTRIES_PER_PAGE) |
61 | 70 | ||
62 | /* Placeholder for holes in the address space */ | 71 | /* Placeholder for holes in the address space */ |
63 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] | 72 | static unsigned long p2m_missing[P2M_ENTRIES_PER_PAGE] __page_aligned_data = |
64 | __attribute__((section(".data.page_aligned"))) = | ||
65 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; | 73 | { [ 0 ... P2M_ENTRIES_PER_PAGE-1 ] = ~0UL }; |
66 | 74 | ||
67 | /* Array of pointers to pages containing p2m entries */ | 75 | /* Array of pointers to pages containing p2m entries */ |
68 | static unsigned long *p2m_top[TOP_ENTRIES] | 76 | static unsigned long *p2m_top[TOP_ENTRIES] __page_aligned_data = |
69 | __attribute__((section(".data.page_aligned"))) = | ||
70 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; | 77 | { [ 0 ... TOP_ENTRIES - 1] = &p2m_missing[0] }; |
71 | 78 | ||
72 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ | 79 | /* Arrays of p2m arrays expressed in mfns used for save/restore */ |
73 | static unsigned long p2m_top_mfn[TOP_ENTRIES] | 80 | static unsigned long p2m_top_mfn[TOP_ENTRIES] __page_aligned_bss; |
74 | __attribute__((section(".bss.page_aligned"))); | ||
75 | 81 | ||
76 | static unsigned long p2m_top_mfn_list[ | 82 | static unsigned long p2m_top_mfn_list[TOP_ENTRIES / P2M_ENTRIES_PER_PAGE] |
77 | PAGE_ALIGN(TOP_ENTRIES / P2M_ENTRIES_PER_PAGE)] | 83 | __page_aligned_bss; |
78 | __attribute__((section(".bss.page_aligned"))); | ||
79 | 84 | ||
80 | static inline unsigned p2m_top_index(unsigned long pfn) | 85 | static inline unsigned p2m_top_index(unsigned long pfn) |
81 | { | 86 | { |
@@ -181,15 +186,16 @@ void set_phys_to_machine(unsigned long pfn, unsigned long mfn) | |||
181 | p2m_top[topidx][idx] = mfn; | 186 | p2m_top[topidx][idx] = mfn; |
182 | } | 187 | } |
183 | 188 | ||
184 | xmaddr_t arbitrary_virt_to_machine(unsigned long address) | 189 | xmaddr_t arbitrary_virt_to_machine(void *vaddr) |
185 | { | 190 | { |
191 | unsigned long address = (unsigned long)vaddr; | ||
186 | unsigned int level; | 192 | unsigned int level; |
187 | pte_t *pte = lookup_address(address, &level); | 193 | pte_t *pte = lookup_address(address, &level); |
188 | unsigned offset = address & ~PAGE_MASK; | 194 | unsigned offset = address & ~PAGE_MASK; |
189 | 195 | ||
190 | BUG_ON(pte == NULL); | 196 | BUG_ON(pte == NULL); |
191 | 197 | ||
192 | return XMADDR((pte_mfn(*pte) << PAGE_SHIFT) + offset); | 198 | return XMADDR(((phys_addr_t)pte_mfn(*pte) << PAGE_SHIFT) + offset); |
193 | } | 199 | } |
194 | 200 | ||
195 | void make_lowmem_page_readonly(void *vaddr) | 201 | void make_lowmem_page_readonly(void *vaddr) |
@@ -256,7 +262,8 @@ void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val) | |||
256 | 262 | ||
257 | xen_mc_batch(); | 263 | xen_mc_batch(); |
258 | 264 | ||
259 | u.ptr = virt_to_machine(ptr).maddr; | 265 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
266 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | ||
260 | u.val = pmd_val_ma(val); | 267 | u.val = pmd_val_ma(val); |
261 | extend_mmu_update(&u); | 268 | extend_mmu_update(&u); |
262 | 269 | ||
@@ -283,35 +290,7 @@ void xen_set_pmd(pmd_t *ptr, pmd_t val) | |||
283 | */ | 290 | */ |
284 | void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) | 291 | void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags) |
285 | { | 292 | { |
286 | pgd_t *pgd; | 293 | set_pte_vaddr(vaddr, mfn_pte(mfn, flags)); |
287 | pud_t *pud; | ||
288 | pmd_t *pmd; | ||
289 | pte_t *pte; | ||
290 | |||
291 | pgd = swapper_pg_dir + pgd_index(vaddr); | ||
292 | if (pgd_none(*pgd)) { | ||
293 | BUG(); | ||
294 | return; | ||
295 | } | ||
296 | pud = pud_offset(pgd, vaddr); | ||
297 | if (pud_none(*pud)) { | ||
298 | BUG(); | ||
299 | return; | ||
300 | } | ||
301 | pmd = pmd_offset(pud, vaddr); | ||
302 | if (pmd_none(*pmd)) { | ||
303 | BUG(); | ||
304 | return; | ||
305 | } | ||
306 | pte = pte_offset_kernel(pmd, vaddr); | ||
307 | /* <mfn,flags> stored as-is, to permit clearing entries */ | ||
308 | xen_set_pte(pte, mfn_pte(mfn, flags)); | ||
309 | |||
310 | /* | ||
311 | * It's enough to flush this one mapping. | ||
312 | * (PGE mappings get flushed as well) | ||
313 | */ | ||
314 | __flush_tlb_one(vaddr); | ||
315 | } | 294 | } |
316 | 295 | ||
317 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 296 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
@@ -364,8 +343,8 @@ void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | |||
364 | static pteval_t pte_mfn_to_pfn(pteval_t val) | 343 | static pteval_t pte_mfn_to_pfn(pteval_t val) |
365 | { | 344 | { |
366 | if (val & _PAGE_PRESENT) { | 345 | if (val & _PAGE_PRESENT) { |
367 | unsigned long mfn = (val & PTE_MASK) >> PAGE_SHIFT; | 346 | unsigned long mfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; |
368 | pteval_t flags = val & ~PTE_MASK; | 347 | pteval_t flags = val & PTE_FLAGS_MASK; |
369 | val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; | 348 | val = ((pteval_t)mfn_to_pfn(mfn) << PAGE_SHIFT) | flags; |
370 | } | 349 | } |
371 | 350 | ||
@@ -375,8 +354,8 @@ static pteval_t pte_mfn_to_pfn(pteval_t val) | |||
375 | static pteval_t pte_pfn_to_mfn(pteval_t val) | 354 | static pteval_t pte_pfn_to_mfn(pteval_t val) |
376 | { | 355 | { |
377 | if (val & _PAGE_PRESENT) { | 356 | if (val & _PAGE_PRESENT) { |
378 | unsigned long pfn = (val & PTE_MASK) >> PAGE_SHIFT; | 357 | unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT; |
379 | pteval_t flags = val & ~PTE_MASK; | 358 | pteval_t flags = val & PTE_FLAGS_MASK; |
380 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; | 359 | val = ((pteval_t)pfn_to_mfn(pfn) << PAGE_SHIFT) | flags; |
381 | } | 360 | } |
382 | 361 | ||
@@ -418,7 +397,8 @@ void xen_set_pud_hyper(pud_t *ptr, pud_t val) | |||
418 | 397 | ||
419 | xen_mc_batch(); | 398 | xen_mc_batch(); |
420 | 399 | ||
421 | u.ptr = virt_to_machine(ptr).maddr; | 400 | /* ptr may be ioremapped for 64-bit pagetable setup */ |
401 | u.ptr = arbitrary_virt_to_machine(ptr).maddr; | ||
422 | u.val = pud_val_ma(val); | 402 | u.val = pud_val_ma(val); |
423 | extend_mmu_update(&u); | 403 | extend_mmu_update(&u); |
424 | 404 | ||
@@ -441,14 +421,19 @@ void xen_set_pud(pud_t *ptr, pud_t val) | |||
441 | 421 | ||
442 | void xen_set_pte(pte_t *ptep, pte_t pte) | 422 | void xen_set_pte(pte_t *ptep, pte_t pte) |
443 | { | 423 | { |
424 | #ifdef CONFIG_X86_PAE | ||
444 | ptep->pte_high = pte.pte_high; | 425 | ptep->pte_high = pte.pte_high; |
445 | smp_wmb(); | 426 | smp_wmb(); |
446 | ptep->pte_low = pte.pte_low; | 427 | ptep->pte_low = pte.pte_low; |
428 | #else | ||
429 | *ptep = pte; | ||
430 | #endif | ||
447 | } | 431 | } |
448 | 432 | ||
433 | #ifdef CONFIG_X86_PAE | ||
449 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) | 434 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte) |
450 | { | 435 | { |
451 | set_64bit((u64 *)ptep, pte_val_ma(pte)); | 436 | set_64bit((u64 *)ptep, native_pte_val(pte)); |
452 | } | 437 | } |
453 | 438 | ||
454 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | 439 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
@@ -462,6 +447,7 @@ void xen_pmd_clear(pmd_t *pmdp) | |||
462 | { | 447 | { |
463 | set_pmd(pmdp, __pmd(0)); | 448 | set_pmd(pmdp, __pmd(0)); |
464 | } | 449 | } |
450 | #endif /* CONFIG_X86_PAE */ | ||
465 | 451 | ||
466 | pmd_t xen_make_pmd(pmdval_t pmd) | 452 | pmd_t xen_make_pmd(pmdval_t pmd) |
467 | { | 453 | { |
@@ -469,78 +455,189 @@ pmd_t xen_make_pmd(pmdval_t pmd) | |||
469 | return native_make_pmd(pmd); | 455 | return native_make_pmd(pmd); |
470 | } | 456 | } |
471 | 457 | ||
458 | #if PAGETABLE_LEVELS == 4 | ||
459 | pudval_t xen_pud_val(pud_t pud) | ||
460 | { | ||
461 | return pte_mfn_to_pfn(pud.pud); | ||
462 | } | ||
463 | |||
464 | pud_t xen_make_pud(pudval_t pud) | ||
465 | { | ||
466 | pud = pte_pfn_to_mfn(pud); | ||
467 | |||
468 | return native_make_pud(pud); | ||
469 | } | ||
470 | |||
471 | pgd_t *xen_get_user_pgd(pgd_t *pgd) | ||
472 | { | ||
473 | pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK); | ||
474 | unsigned offset = pgd - pgd_page; | ||
475 | pgd_t *user_ptr = NULL; | ||
476 | |||
477 | if (offset < pgd_index(USER_LIMIT)) { | ||
478 | struct page *page = virt_to_page(pgd_page); | ||
479 | user_ptr = (pgd_t *)page->private; | ||
480 | if (user_ptr) | ||
481 | user_ptr += offset; | ||
482 | } | ||
483 | |||
484 | return user_ptr; | ||
485 | } | ||
486 | |||
487 | static void __xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
488 | { | ||
489 | struct mmu_update u; | ||
490 | |||
491 | u.ptr = virt_to_machine(ptr).maddr; | ||
492 | u.val = pgd_val_ma(val); | ||
493 | extend_mmu_update(&u); | ||
494 | } | ||
495 | |||
496 | /* | ||
497 | * Raw hypercall-based set_pgd, intended for in early boot before | ||
498 | * there's a page structure. This implies: | ||
499 | * 1. The only existing pagetable is the kernel's | ||
500 | * 2. It is always pinned | ||
501 | * 3. It has no user pagetable attached to it | ||
502 | */ | ||
503 | void __init xen_set_pgd_hyper(pgd_t *ptr, pgd_t val) | ||
504 | { | ||
505 | preempt_disable(); | ||
506 | |||
507 | xen_mc_batch(); | ||
508 | |||
509 | __xen_set_pgd_hyper(ptr, val); | ||
510 | |||
511 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
512 | |||
513 | preempt_enable(); | ||
514 | } | ||
515 | |||
516 | void xen_set_pgd(pgd_t *ptr, pgd_t val) | ||
517 | { | ||
518 | pgd_t *user_ptr = xen_get_user_pgd(ptr); | ||
519 | |||
520 | /* If page is not pinned, we can just update the entry | ||
521 | directly */ | ||
522 | if (!page_pinned(ptr)) { | ||
523 | *ptr = val; | ||
524 | if (user_ptr) { | ||
525 | WARN_ON(page_pinned(user_ptr)); | ||
526 | *user_ptr = val; | ||
527 | } | ||
528 | return; | ||
529 | } | ||
530 | |||
531 | /* If it's pinned, then we can at least batch the kernel and | ||
532 | user updates together. */ | ||
533 | xen_mc_batch(); | ||
534 | |||
535 | __xen_set_pgd_hyper(ptr, val); | ||
536 | if (user_ptr) | ||
537 | __xen_set_pgd_hyper(user_ptr, val); | ||
538 | |||
539 | xen_mc_issue(PARAVIRT_LAZY_MMU); | ||
540 | } | ||
541 | #endif /* PAGETABLE_LEVELS == 4 */ | ||
542 | |||
472 | /* | 543 | /* |
473 | (Yet another) pagetable walker. This one is intended for pinning a | 544 | * (Yet another) pagetable walker. This one is intended for pinning a |
474 | pagetable. This means that it walks a pagetable and calls the | 545 | * pagetable. This means that it walks a pagetable and calls the |
475 | callback function on each page it finds making up the page table, | 546 | * callback function on each page it finds making up the page table, |
476 | at every level. It walks the entire pagetable, but it only bothers | 547 | * at every level. It walks the entire pagetable, but it only bothers |
477 | pinning pte pages which are below pte_limit. In the normal case | 548 | * pinning pte pages which are below limit. In the normal case this |
478 | this will be TASK_SIZE, but at boot we need to pin up to | 549 | * will be STACK_TOP_MAX, but at boot we need to pin up to |
479 | FIXADDR_TOP. But the important bit is that we don't pin beyond | 550 | * FIXADDR_TOP. |
480 | there, because then we start getting into Xen's ptes. | 551 | * |
481 | */ | 552 | * For 32-bit the important bit is that we don't pin beyond there, |
482 | static int pgd_walk(pgd_t *pgd_base, int (*func)(struct page *, enum pt_level), | 553 | * because then we start getting into Xen's ptes. |
554 | * | ||
555 | * For 64-bit, we must skip the Xen hole in the middle of the address | ||
556 | * space, just after the big x86-64 virtual hole. | ||
557 | */ | ||
558 | static int pgd_walk(pgd_t *pgd, int (*func)(struct page *, enum pt_level), | ||
483 | unsigned long limit) | 559 | unsigned long limit) |
484 | { | 560 | { |
485 | pgd_t *pgd = pgd_base; | ||
486 | int flush = 0; | 561 | int flush = 0; |
487 | unsigned long addr = 0; | 562 | unsigned hole_low, hole_high; |
488 | unsigned long pgd_next; | 563 | unsigned pgdidx_limit, pudidx_limit, pmdidx_limit; |
564 | unsigned pgdidx, pudidx, pmdidx; | ||
489 | 565 | ||
490 | BUG_ON(limit > FIXADDR_TOP); | 566 | /* The limit is the last byte to be touched */ |
567 | limit--; | ||
568 | BUG_ON(limit >= FIXADDR_TOP); | ||
491 | 569 | ||
492 | if (xen_feature(XENFEAT_auto_translated_physmap)) | 570 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
493 | return 0; | 571 | return 0; |
494 | 572 | ||
495 | for (; addr != FIXADDR_TOP; pgd++, addr = pgd_next) { | 573 | /* |
574 | * 64-bit has a great big hole in the middle of the address | ||
575 | * space, which contains the Xen mappings. On 32-bit these | ||
576 | * will end up making a zero-sized hole and so is a no-op. | ||
577 | */ | ||
578 | hole_low = pgd_index(USER_LIMIT); | ||
579 | hole_high = pgd_index(PAGE_OFFSET); | ||
580 | |||
581 | pgdidx_limit = pgd_index(limit); | ||
582 | #if PTRS_PER_PUD > 1 | ||
583 | pudidx_limit = pud_index(limit); | ||
584 | #else | ||
585 | pudidx_limit = 0; | ||
586 | #endif | ||
587 | #if PTRS_PER_PMD > 1 | ||
588 | pmdidx_limit = pmd_index(limit); | ||
589 | #else | ||
590 | pmdidx_limit = 0; | ||
591 | #endif | ||
592 | |||
593 | flush |= (*func)(virt_to_page(pgd), PT_PGD); | ||
594 | |||
595 | for (pgdidx = 0; pgdidx <= pgdidx_limit; pgdidx++) { | ||
496 | pud_t *pud; | 596 | pud_t *pud; |
497 | unsigned long pud_limit, pud_next; | ||
498 | 597 | ||
499 | pgd_next = pud_limit = pgd_addr_end(addr, FIXADDR_TOP); | 598 | if (pgdidx >= hole_low && pgdidx < hole_high) |
599 | continue; | ||
500 | 600 | ||
501 | if (!pgd_val(*pgd)) | 601 | if (!pgd_val(pgd[pgdidx])) |
502 | continue; | 602 | continue; |
503 | 603 | ||
504 | pud = pud_offset(pgd, 0); | 604 | pud = pud_offset(&pgd[pgdidx], 0); |
505 | 605 | ||
506 | if (PTRS_PER_PUD > 1) /* not folded */ | 606 | if (PTRS_PER_PUD > 1) /* not folded */ |
507 | flush |= (*func)(virt_to_page(pud), PT_PUD); | 607 | flush |= (*func)(virt_to_page(pud), PT_PUD); |
508 | 608 | ||
509 | for (; addr != pud_limit; pud++, addr = pud_next) { | 609 | for (pudidx = 0; pudidx < PTRS_PER_PUD; pudidx++) { |
510 | pmd_t *pmd; | 610 | pmd_t *pmd; |
511 | unsigned long pmd_limit; | ||
512 | 611 | ||
513 | pud_next = pud_addr_end(addr, pud_limit); | 612 | if (pgdidx == pgdidx_limit && |
514 | 613 | pudidx > pudidx_limit) | |
515 | if (pud_next < limit) | 614 | goto out; |
516 | pmd_limit = pud_next; | ||
517 | else | ||
518 | pmd_limit = limit; | ||
519 | 615 | ||
520 | if (pud_none(*pud)) | 616 | if (pud_none(pud[pudidx])) |
521 | continue; | 617 | continue; |
522 | 618 | ||
523 | pmd = pmd_offset(pud, 0); | 619 | pmd = pmd_offset(&pud[pudidx], 0); |
524 | 620 | ||
525 | if (PTRS_PER_PMD > 1) /* not folded */ | 621 | if (PTRS_PER_PMD > 1) /* not folded */ |
526 | flush |= (*func)(virt_to_page(pmd), PT_PMD); | 622 | flush |= (*func)(virt_to_page(pmd), PT_PMD); |
527 | 623 | ||
528 | for (; addr != pmd_limit; pmd++) { | 624 | for (pmdidx = 0; pmdidx < PTRS_PER_PMD; pmdidx++) { |
529 | addr += (PAGE_SIZE * PTRS_PER_PTE); | 625 | struct page *pte; |
530 | if ((pmd_limit-1) < (addr-1)) { | 626 | |
531 | addr = pmd_limit; | 627 | if (pgdidx == pgdidx_limit && |
532 | break; | 628 | pudidx == pudidx_limit && |
533 | } | 629 | pmdidx > pmdidx_limit) |
630 | goto out; | ||
534 | 631 | ||
535 | if (pmd_none(*pmd)) | 632 | if (pmd_none(pmd[pmdidx])) |
536 | continue; | 633 | continue; |
537 | 634 | ||
538 | flush |= (*func)(pmd_page(*pmd), PT_PTE); | 635 | pte = pmd_page(pmd[pmdidx]); |
636 | flush |= (*func)(pte, PT_PTE); | ||
539 | } | 637 | } |
540 | } | 638 | } |
541 | } | 639 | } |
542 | 640 | out: | |
543 | flush |= (*func)(virt_to_page(pgd_base), PT_PGD); | ||
544 | 641 | ||
545 | return flush; | 642 | return flush; |
546 | } | 643 | } |
@@ -622,14 +719,31 @@ void xen_pgd_pin(pgd_t *pgd) | |||
622 | { | 719 | { |
623 | xen_mc_batch(); | 720 | xen_mc_batch(); |
624 | 721 | ||
625 | if (pgd_walk(pgd, pin_page, TASK_SIZE)) { | 722 | if (pgd_walk(pgd, pin_page, USER_LIMIT)) { |
626 | /* re-enable interrupts for kmap_flush_unused */ | 723 | /* re-enable interrupts for kmap_flush_unused */ |
627 | xen_mc_issue(0); | 724 | xen_mc_issue(0); |
628 | kmap_flush_unused(); | 725 | kmap_flush_unused(); |
629 | xen_mc_batch(); | 726 | xen_mc_batch(); |
630 | } | 727 | } |
631 | 728 | ||
729 | #ifdef CONFIG_X86_64 | ||
730 | { | ||
731 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
732 | |||
733 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd))); | ||
734 | |||
735 | if (user_pgd) { | ||
736 | pin_page(virt_to_page(user_pgd), PT_PGD); | ||
737 | xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
738 | } | ||
739 | } | ||
740 | #else /* CONFIG_X86_32 */ | ||
741 | #ifdef CONFIG_X86_PAE | ||
742 | /* Need to make sure unshared kernel PMD is pinnable */ | ||
743 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | ||
744 | #endif | ||
632 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); | 745 | xen_do_pin(MMUEXT_PIN_L3_TABLE, PFN_DOWN(__pa(pgd))); |
746 | #endif /* CONFIG_X86_64 */ | ||
633 | xen_mc_issue(0); | 747 | xen_mc_issue(0); |
634 | } | 748 | } |
635 | 749 | ||
@@ -656,9 +770,11 @@ void xen_mm_pin_all(void) | |||
656 | spin_unlock_irqrestore(&pgd_lock, flags); | 770 | spin_unlock_irqrestore(&pgd_lock, flags); |
657 | } | 771 | } |
658 | 772 | ||
659 | /* The init_mm pagetable is really pinned as soon as its created, but | 773 | /* |
660 | that's before we have page structures to store the bits. So do all | 774 | * The init_mm pagetable is really pinned as soon as its created, but |
661 | the book-keeping now. */ | 775 | * that's before we have page structures to store the bits. So do all |
776 | * the book-keeping now. | ||
777 | */ | ||
662 | static __init int mark_pinned(struct page *page, enum pt_level level) | 778 | static __init int mark_pinned(struct page *page, enum pt_level level) |
663 | { | 779 | { |
664 | SetPagePinned(page); | 780 | SetPagePinned(page); |
@@ -708,7 +824,23 @@ static void xen_pgd_unpin(pgd_t *pgd) | |||
708 | 824 | ||
709 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | 825 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); |
710 | 826 | ||
711 | pgd_walk(pgd, unpin_page, TASK_SIZE); | 827 | #ifdef CONFIG_X86_64 |
828 | { | ||
829 | pgd_t *user_pgd = xen_get_user_pgd(pgd); | ||
830 | |||
831 | if (user_pgd) { | ||
832 | xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(user_pgd))); | ||
833 | unpin_page(virt_to_page(user_pgd), PT_PGD); | ||
834 | } | ||
835 | } | ||
836 | #endif | ||
837 | |||
838 | #ifdef CONFIG_X86_PAE | ||
839 | /* Need to make sure unshared kernel PMD is unpinned */ | ||
840 | pin_page(virt_to_page(pgd_page(pgd[pgd_index(TASK_SIZE)])), PT_PMD); | ||
841 | #endif | ||
842 | |||
843 | pgd_walk(pgd, unpin_page, USER_LIMIT); | ||
712 | 844 | ||
713 | xen_mc_issue(0); | 845 | xen_mc_issue(0); |
714 | } | 846 | } |
@@ -727,7 +859,6 @@ void xen_mm_unpin_all(void) | |||
727 | list_for_each_entry(page, &pgd_list, lru) { | 859 | list_for_each_entry(page, &pgd_list, lru) { |
728 | if (PageSavePinned(page)) { | 860 | if (PageSavePinned(page)) { |
729 | BUG_ON(!PagePinned(page)); | 861 | BUG_ON(!PagePinned(page)); |
730 | printk("unpinning pinned %p\n", page_address(page)); | ||
731 | xen_pgd_unpin((pgd_t *)page_address(page)); | 862 | xen_pgd_unpin((pgd_t *)page_address(page)); |
732 | ClearPageSavePinned(page); | 863 | ClearPageSavePinned(page); |
733 | } | 864 | } |
@@ -757,8 +888,15 @@ void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) | |||
757 | static void drop_other_mm_ref(void *info) | 888 | static void drop_other_mm_ref(void *info) |
758 | { | 889 | { |
759 | struct mm_struct *mm = info; | 890 | struct mm_struct *mm = info; |
891 | struct mm_struct *active_mm; | ||
892 | |||
893 | #ifdef CONFIG_X86_64 | ||
894 | active_mm = read_pda(active_mm); | ||
895 | #else | ||
896 | active_mm = __get_cpu_var(cpu_tlbstate).active_mm; | ||
897 | #endif | ||
760 | 898 | ||
761 | if (__get_cpu_var(cpu_tlbstate).active_mm == mm) | 899 | if (active_mm == mm) |
762 | leave_mm(smp_processor_id()); | 900 | leave_mm(smp_processor_id()); |
763 | 901 | ||
764 | /* If this cpu still has a stale cr3 reference, then make sure | 902 | /* If this cpu still has a stale cr3 reference, then make sure |
diff --git a/arch/x86/xen/mmu.h b/arch/x86/xen/mmu.h index 297bf9f5b8bc..0f59bd03f9e3 100644 --- a/arch/x86/xen/mmu.h +++ b/arch/x86/xen/mmu.h | |||
@@ -10,18 +10,6 @@ enum pt_level { | |||
10 | PT_PTE | 10 | PT_PTE |
11 | }; | 11 | }; |
12 | 12 | ||
13 | /* | ||
14 | * Page-directory addresses above 4GB do not fit into architectural %cr3. | ||
15 | * When accessing %cr3, or equivalent field in vcpu_guest_context, guests | ||
16 | * must use the following accessor macros to pack/unpack valid MFNs. | ||
17 | * | ||
18 | * Note that Xen is using the fact that the pagetable base is always | ||
19 | * page-aligned, and putting the 12 MSB of the address into the 12 LSB | ||
20 | * of cr3. | ||
21 | */ | ||
22 | #define xen_pfn_to_cr3(pfn) (((unsigned)(pfn) << 12) | ((unsigned)(pfn) >> 20)) | ||
23 | #define xen_cr3_to_pfn(cr3) (((unsigned)(cr3) >> 12) | ((unsigned)(cr3) << 20)) | ||
24 | |||
25 | 13 | ||
26 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); | 14 | void set_pte_mfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags); |
27 | 15 | ||
@@ -44,13 +32,26 @@ pgd_t xen_make_pgd(pgdval_t); | |||
44 | void xen_set_pte(pte_t *ptep, pte_t pteval); | 32 | void xen_set_pte(pte_t *ptep, pte_t pteval); |
45 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, | 33 | void xen_set_pte_at(struct mm_struct *mm, unsigned long addr, |
46 | pte_t *ptep, pte_t pteval); | 34 | pte_t *ptep, pte_t pteval); |
35 | |||
36 | #ifdef CONFIG_X86_PAE | ||
47 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); | 37 | void xen_set_pte_atomic(pte_t *ptep, pte_t pte); |
38 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | ||
39 | void xen_pmd_clear(pmd_t *pmdp); | ||
40 | #endif /* CONFIG_X86_PAE */ | ||
41 | |||
48 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); | 42 | void xen_set_pmd(pmd_t *pmdp, pmd_t pmdval); |
49 | void xen_set_pud(pud_t *ptr, pud_t val); | 43 | void xen_set_pud(pud_t *ptr, pud_t val); |
50 | void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); | 44 | void xen_set_pmd_hyper(pmd_t *pmdp, pmd_t pmdval); |
51 | void xen_set_pud_hyper(pud_t *ptr, pud_t val); | 45 | void xen_set_pud_hyper(pud_t *ptr, pud_t val); |
52 | void xen_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 46 | |
53 | void xen_pmd_clear(pmd_t *pmdp); | 47 | #if PAGETABLE_LEVELS == 4 |
48 | pudval_t xen_pud_val(pud_t pud); | ||
49 | pud_t xen_make_pud(pudval_t pudval); | ||
50 | void xen_set_pgd(pgd_t *pgdp, pgd_t pgd); | ||
51 | void xen_set_pgd_hyper(pgd_t *pgdp, pgd_t pgd); | ||
52 | #endif | ||
53 | |||
54 | pgd_t *xen_get_user_pgd(pgd_t *pgd); | ||
54 | 55 | ||
55 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); | 56 | pte_t xen_ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep); |
56 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, | 57 | void xen_ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, |
diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 3c63c4da7ed1..9efd1c6c9776 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c | |||
@@ -76,6 +76,7 @@ void xen_mc_flush(void) | |||
76 | if (ret) { | 76 | if (ret) { |
77 | printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", | 77 | printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", |
78 | ret, smp_processor_id()); | 78 | ret, smp_processor_id()); |
79 | dump_stack(); | ||
79 | for (i = 0; i < b->mcidx; i++) { | 80 | for (i = 0; i < b->mcidx; i++) { |
80 | printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", | 81 | printk(" call %2d/%d: op=%lu arg=[%lx] result=%ld\n", |
81 | i+1, b->mcidx, | 82 | i+1, b->mcidx, |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index e0a39595bde3..d67901083888 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -42,7 +42,7 @@ char * __init xen_memory_setup(void) | |||
42 | 42 | ||
43 | e820.nr_map = 0; | 43 | e820.nr_map = 0; |
44 | 44 | ||
45 | e820_add_region(0, PFN_PHYS(max_pfn), E820_RAM); | 45 | e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM); |
46 | 46 | ||
47 | /* | 47 | /* |
48 | * Even though this is normal, usable memory under Xen, reserve | 48 | * Even though this is normal, usable memory under Xen, reserve |
@@ -83,30 +83,72 @@ static void xen_idle(void) | |||
83 | 83 | ||
84 | /* | 84 | /* |
85 | * Set the bit indicating "nosegneg" library variants should be used. | 85 | * Set the bit indicating "nosegneg" library variants should be used. |
86 | * We only need to bother in pure 32-bit mode; compat 32-bit processes | ||
87 | * can have un-truncated segments, so wrapping around is allowed. | ||
86 | */ | 88 | */ |
87 | static void __init fiddle_vdso(void) | 89 | static void __init fiddle_vdso(void) |
88 | { | 90 | { |
89 | extern const char vdso32_default_start; | 91 | #ifdef CONFIG_X86_32 |
90 | u32 *mask = VDSO32_SYMBOL(&vdso32_default_start, NOTE_MASK); | 92 | u32 *mask; |
93 | mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK); | ||
91 | *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; | 94 | *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; |
95 | mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK); | ||
96 | *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT; | ||
97 | #endif | ||
92 | } | 98 | } |
93 | 99 | ||
94 | void xen_enable_sysenter(void) | 100 | static __cpuinit int register_callback(unsigned type, const void *func) |
95 | { | 101 | { |
96 | int cpu = smp_processor_id(); | 102 | struct callback_register callback = { |
97 | extern void xen_sysenter_target(void); | 103 | .type = type, |
98 | /* Mask events on entry, even though they get enabled immediately */ | 104 | .address = XEN_CALLBACK(__KERNEL_CS, func), |
99 | static struct callback_register sysenter = { | ||
100 | .type = CALLBACKTYPE_sysenter, | ||
101 | .address = { __KERNEL_CS, (unsigned long)xen_sysenter_target }, | ||
102 | .flags = CALLBACKF_mask_events, | 105 | .flags = CALLBACKF_mask_events, |
103 | }; | 106 | }; |
104 | 107 | ||
105 | if (!boot_cpu_has(X86_FEATURE_SEP) || | 108 | return HYPERVISOR_callback_op(CALLBACKOP_register, &callback); |
106 | HYPERVISOR_callback_op(CALLBACKOP_register, &sysenter) != 0) { | 109 | } |
107 | clear_cpu_cap(&cpu_data(cpu), X86_FEATURE_SEP); | 110 | |
108 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_SEP); | 111 | void __cpuinit xen_enable_sysenter(void) |
112 | { | ||
113 | extern void xen_sysenter_target(void); | ||
114 | int ret; | ||
115 | unsigned sysenter_feature; | ||
116 | |||
117 | #ifdef CONFIG_X86_32 | ||
118 | sysenter_feature = X86_FEATURE_SEP; | ||
119 | #else | ||
120 | sysenter_feature = X86_FEATURE_SYSENTER32; | ||
121 | #endif | ||
122 | |||
123 | if (!boot_cpu_has(sysenter_feature)) | ||
124 | return; | ||
125 | |||
126 | ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target); | ||
127 | if(ret != 0) | ||
128 | setup_clear_cpu_cap(sysenter_feature); | ||
129 | } | ||
130 | |||
131 | void __cpuinit xen_enable_syscall(void) | ||
132 | { | ||
133 | #ifdef CONFIG_X86_64 | ||
134 | int ret; | ||
135 | extern void xen_syscall_target(void); | ||
136 | extern void xen_syscall32_target(void); | ||
137 | |||
138 | ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target); | ||
139 | if (ret != 0) { | ||
140 | printk(KERN_ERR "Failed to set syscall callback: %d\n", ret); | ||
141 | /* Pretty fatal; 64-bit userspace has no other | ||
142 | mechanism for syscalls. */ | ||
109 | } | 143 | } |
144 | |||
145 | if (boot_cpu_has(X86_FEATURE_SYSCALL32)) { | ||
146 | ret = register_callback(CALLBACKTYPE_syscall32, | ||
147 | xen_syscall32_target); | ||
148 | if (ret != 0) | ||
149 | setup_clear_cpu_cap(X86_FEATURE_SYSCALL32); | ||
150 | } | ||
151 | #endif /* CONFIG_X86_64 */ | ||
110 | } | 152 | } |
111 | 153 | ||
112 | void __init xen_arch_setup(void) | 154 | void __init xen_arch_setup(void) |
@@ -120,10 +162,12 @@ void __init xen_arch_setup(void) | |||
120 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 162 | if (!xen_feature(XENFEAT_auto_translated_physmap)) |
121 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); | 163 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_pae_extended_cr3); |
122 | 164 | ||
123 | HYPERVISOR_set_callbacks(__KERNEL_CS, (unsigned long)xen_hypervisor_callback, | 165 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || |
124 | __KERNEL_CS, (unsigned long)xen_failsafe_callback); | 166 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) |
167 | BUG(); | ||
125 | 168 | ||
126 | xen_enable_sysenter(); | 169 | xen_enable_sysenter(); |
170 | xen_enable_syscall(); | ||
127 | 171 | ||
128 | set_iopl.iopl = 1; | 172 | set_iopl.iopl = 1; |
129 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | 173 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); |
@@ -143,11 +187,6 @@ void __init xen_arch_setup(void) | |||
143 | 187 | ||
144 | pm_idle = xen_idle; | 188 | pm_idle = xen_idle; |
145 | 189 | ||
146 | #ifdef CONFIG_SMP | ||
147 | /* fill cpus_possible with all available cpus */ | ||
148 | xen_fill_possible_map(); | ||
149 | #endif | ||
150 | |||
151 | paravirt_disable_iospace(); | 190 | paravirt_disable_iospace(); |
152 | 191 | ||
153 | fiddle_vdso(); | 192 | fiddle_vdso(); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 233156f39b7f..d8faf79a0a1d 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -15,6 +15,7 @@ | |||
15 | * This does not handle HOTPLUG_CPU yet. | 15 | * This does not handle HOTPLUG_CPU yet. |
16 | */ | 16 | */ |
17 | #include <linux/sched.h> | 17 | #include <linux/sched.h> |
18 | #include <linux/kernel_stat.h> | ||
18 | #include <linux/err.h> | 19 | #include <linux/err.h> |
19 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
20 | 21 | ||
@@ -35,6 +36,8 @@ | |||
35 | #include "xen-ops.h" | 36 | #include "xen-ops.h" |
36 | #include "mmu.h" | 37 | #include "mmu.h" |
37 | 38 | ||
39 | static void __cpuinit xen_init_lock_cpu(int cpu); | ||
40 | |||
38 | cpumask_t xen_cpu_initialized_map; | 41 | cpumask_t xen_cpu_initialized_map; |
39 | 42 | ||
40 | static DEFINE_PER_CPU(int, resched_irq); | 43 | static DEFINE_PER_CPU(int, resched_irq); |
@@ -66,13 +69,22 @@ static __cpuinit void cpu_bringup_and_idle(void) | |||
66 | int cpu = smp_processor_id(); | 69 | int cpu = smp_processor_id(); |
67 | 70 | ||
68 | cpu_init(); | 71 | cpu_init(); |
72 | preempt_disable(); | ||
73 | |||
69 | xen_enable_sysenter(); | 74 | xen_enable_sysenter(); |
75 | xen_enable_syscall(); | ||
70 | 76 | ||
71 | preempt_disable(); | 77 | cpu = smp_processor_id(); |
72 | per_cpu(cpu_state, cpu) = CPU_ONLINE; | 78 | smp_store_cpu_info(cpu); |
79 | cpu_data(cpu).x86_max_cores = 1; | ||
80 | set_cpu_sibling_map(cpu); | ||
73 | 81 | ||
74 | xen_setup_cpu_clockevents(); | 82 | xen_setup_cpu_clockevents(); |
75 | 83 | ||
84 | cpu_set(cpu, cpu_online_map); | ||
85 | x86_write_percpu(cpu_state, CPU_ONLINE); | ||
86 | wmb(); | ||
87 | |||
76 | /* We can take interrupts now: we're officially "up". */ | 88 | /* We can take interrupts now: we're officially "up". */ |
77 | local_irq_enable(); | 89 | local_irq_enable(); |
78 | 90 | ||
@@ -141,56 +153,39 @@ static int xen_smp_intr_init(unsigned int cpu) | |||
141 | return rc; | 153 | return rc; |
142 | } | 154 | } |
143 | 155 | ||
144 | void __init xen_fill_possible_map(void) | 156 | static void __init xen_fill_possible_map(void) |
145 | { | 157 | { |
146 | int i, rc; | 158 | int i, rc; |
147 | 159 | ||
148 | for (i = 0; i < NR_CPUS; i++) { | 160 | for (i = 0; i < NR_CPUS; i++) { |
149 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | 161 | rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); |
150 | if (rc >= 0) | 162 | if (rc >= 0) { |
163 | num_processors++; | ||
151 | cpu_set(i, cpu_possible_map); | 164 | cpu_set(i, cpu_possible_map); |
165 | } | ||
152 | } | 166 | } |
153 | } | 167 | } |
154 | 168 | ||
155 | void __init xen_smp_prepare_boot_cpu(void) | 169 | static void __init xen_smp_prepare_boot_cpu(void) |
156 | { | 170 | { |
157 | int cpu; | ||
158 | |||
159 | BUG_ON(smp_processor_id() != 0); | 171 | BUG_ON(smp_processor_id() != 0); |
160 | native_smp_prepare_boot_cpu(); | 172 | native_smp_prepare_boot_cpu(); |
161 | 173 | ||
162 | /* We've switched to the "real" per-cpu gdt, so make sure the | 174 | /* We've switched to the "real" per-cpu gdt, so make sure the |
163 | old memory can be recycled */ | 175 | old memory can be recycled */ |
164 | make_lowmem_page_readwrite(&per_cpu__gdt_page); | 176 | make_lowmem_page_readwrite(&per_cpu_var(gdt_page)); |
165 | |||
166 | for_each_possible_cpu(cpu) { | ||
167 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | ||
168 | /* | ||
169 | * cpu_core_map lives in a per cpu area that is cleared | ||
170 | * when the per cpu array is allocated. | ||
171 | * | ||
172 | * cpus_clear(per_cpu(cpu_core_map, cpu)); | ||
173 | */ | ||
174 | } | ||
175 | 177 | ||
176 | xen_setup_vcpu_info_placement(); | 178 | xen_setup_vcpu_info_placement(); |
177 | } | 179 | } |
178 | 180 | ||
179 | void __init xen_smp_prepare_cpus(unsigned int max_cpus) | 181 | static void __init xen_smp_prepare_cpus(unsigned int max_cpus) |
180 | { | 182 | { |
181 | unsigned cpu; | 183 | unsigned cpu; |
182 | 184 | ||
183 | for_each_possible_cpu(cpu) { | 185 | xen_init_lock_cpu(0); |
184 | cpus_clear(per_cpu(cpu_sibling_map, cpu)); | ||
185 | /* | ||
186 | * cpu_core_ map will be zeroed when the per | ||
187 | * cpu area is allocated. | ||
188 | * | ||
189 | * cpus_clear(per_cpu(cpu_core_map, cpu)); | ||
190 | */ | ||
191 | } | ||
192 | 186 | ||
193 | smp_store_cpu_info(0); | 187 | smp_store_cpu_info(0); |
188 | cpu_data(0).x86_max_cores = 1; | ||
194 | set_cpu_sibling_map(0); | 189 | set_cpu_sibling_map(0); |
195 | 190 | ||
196 | if (xen_smp_intr_init(0)) | 191 | if (xen_smp_intr_init(0)) |
@@ -225,7 +220,7 @@ static __cpuinit int | |||
225 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | 220 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) |
226 | { | 221 | { |
227 | struct vcpu_guest_context *ctxt; | 222 | struct vcpu_guest_context *ctxt; |
228 | struct gdt_page *gdt = &per_cpu(gdt_page, cpu); | 223 | struct desc_struct *gdt; |
229 | 224 | ||
230 | if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) | 225 | if (cpu_test_and_set(cpu, xen_cpu_initialized_map)) |
231 | return 0; | 226 | return 0; |
@@ -234,12 +229,15 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
234 | if (ctxt == NULL) | 229 | if (ctxt == NULL) |
235 | return -ENOMEM; | 230 | return -ENOMEM; |
236 | 231 | ||
232 | gdt = get_cpu_gdt_table(cpu); | ||
233 | |||
237 | ctxt->flags = VGCF_IN_KERNEL; | 234 | ctxt->flags = VGCF_IN_KERNEL; |
238 | ctxt->user_regs.ds = __USER_DS; | 235 | ctxt->user_regs.ds = __USER_DS; |
239 | ctxt->user_regs.es = __USER_DS; | 236 | ctxt->user_regs.es = __USER_DS; |
240 | ctxt->user_regs.fs = __KERNEL_PERCPU; | ||
241 | ctxt->user_regs.gs = 0; | ||
242 | ctxt->user_regs.ss = __KERNEL_DS; | 237 | ctxt->user_regs.ss = __KERNEL_DS; |
238 | #ifdef CONFIG_X86_32 | ||
239 | ctxt->user_regs.fs = __KERNEL_PERCPU; | ||
240 | #endif | ||
243 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | 241 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
244 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 242 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
245 | 243 | ||
@@ -249,11 +247,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
249 | 247 | ||
250 | ctxt->ldt_ents = 0; | 248 | ctxt->ldt_ents = 0; |
251 | 249 | ||
252 | BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK); | 250 | BUG_ON((unsigned long)gdt & ~PAGE_MASK); |
253 | make_lowmem_page_readonly(gdt->gdt); | 251 | make_lowmem_page_readonly(gdt); |
254 | 252 | ||
255 | ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt); | 253 | ctxt->gdt_frames[0] = virt_to_mfn(gdt); |
256 | ctxt->gdt_ents = ARRAY_SIZE(gdt->gdt); | 254 | ctxt->gdt_ents = GDT_ENTRIES; |
257 | 255 | ||
258 | ctxt->user_regs.cs = __KERNEL_CS; | 256 | ctxt->user_regs.cs = __KERNEL_CS; |
259 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | 257 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
@@ -261,9 +259,11 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
261 | ctxt->kernel_ss = __KERNEL_DS; | 259 | ctxt->kernel_ss = __KERNEL_DS; |
262 | ctxt->kernel_sp = idle->thread.sp0; | 260 | ctxt->kernel_sp = idle->thread.sp0; |
263 | 261 | ||
262 | #ifdef CONFIG_X86_32 | ||
264 | ctxt->event_callback_cs = __KERNEL_CS; | 263 | ctxt->event_callback_cs = __KERNEL_CS; |
265 | ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; | ||
266 | ctxt->failsafe_callback_cs = __KERNEL_CS; | 264 | ctxt->failsafe_callback_cs = __KERNEL_CS; |
265 | #endif | ||
266 | ctxt->event_callback_eip = (unsigned long)xen_hypervisor_callback; | ||
267 | ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; | 267 | ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; |
268 | 268 | ||
269 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | 269 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); |
@@ -276,7 +276,7 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
276 | return 0; | 276 | return 0; |
277 | } | 277 | } |
278 | 278 | ||
279 | int __cpuinit xen_cpu_up(unsigned int cpu) | 279 | static int __cpuinit xen_cpu_up(unsigned int cpu) |
280 | { | 280 | { |
281 | struct task_struct *idle = idle_task(cpu); | 281 | struct task_struct *idle = idle_task(cpu); |
282 | int rc; | 282 | int rc; |
@@ -287,10 +287,28 @@ int __cpuinit xen_cpu_up(unsigned int cpu) | |||
287 | return rc; | 287 | return rc; |
288 | #endif | 288 | #endif |
289 | 289 | ||
290 | #ifdef CONFIG_X86_64 | ||
291 | /* Allocate node local memory for AP pdas */ | ||
292 | WARN_ON(cpu == 0); | ||
293 | if (cpu > 0) { | ||
294 | rc = get_local_pda(cpu); | ||
295 | if (rc) | ||
296 | return rc; | ||
297 | } | ||
298 | #endif | ||
299 | |||
300 | #ifdef CONFIG_X86_32 | ||
290 | init_gdt(cpu); | 301 | init_gdt(cpu); |
291 | per_cpu(current_task, cpu) = idle; | 302 | per_cpu(current_task, cpu) = idle; |
292 | irq_ctx_init(cpu); | 303 | irq_ctx_init(cpu); |
304 | #else | ||
305 | cpu_pda(cpu)->pcurrent = idle; | ||
306 | clear_tsk_thread_flag(idle, TIF_FORK); | ||
307 | #endif | ||
293 | xen_setup_timer(cpu); | 308 | xen_setup_timer(cpu); |
309 | xen_init_lock_cpu(cpu); | ||
310 | |||
311 | per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; | ||
294 | 312 | ||
295 | /* make sure interrupts start blocked */ | 313 | /* make sure interrupts start blocked */ |
296 | per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; | 314 | per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; |
@@ -306,20 +324,18 @@ int __cpuinit xen_cpu_up(unsigned int cpu) | |||
306 | if (rc) | 324 | if (rc) |
307 | return rc; | 325 | return rc; |
308 | 326 | ||
309 | smp_store_cpu_info(cpu); | ||
310 | set_cpu_sibling_map(cpu); | ||
311 | /* This must be done before setting cpu_online_map */ | ||
312 | wmb(); | ||
313 | |||
314 | cpu_set(cpu, cpu_online_map); | ||
315 | |||
316 | rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); | 327 | rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); |
317 | BUG_ON(rc); | 328 | BUG_ON(rc); |
318 | 329 | ||
330 | while(per_cpu(cpu_state, cpu) != CPU_ONLINE) { | ||
331 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | ||
332 | barrier(); | ||
333 | } | ||
334 | |||
319 | return 0; | 335 | return 0; |
320 | } | 336 | } |
321 | 337 | ||
322 | void xen_smp_cpus_done(unsigned int max_cpus) | 338 | static void xen_smp_cpus_done(unsigned int max_cpus) |
323 | { | 339 | { |
324 | } | 340 | } |
325 | 341 | ||
@@ -335,12 +351,12 @@ static void stop_self(void *v) | |||
335 | BUG(); | 351 | BUG(); |
336 | } | 352 | } |
337 | 353 | ||
338 | void xen_smp_send_stop(void) | 354 | static void xen_smp_send_stop(void) |
339 | { | 355 | { |
340 | smp_call_function(stop_self, NULL, 0); | 356 | smp_call_function(stop_self, NULL, 0); |
341 | } | 357 | } |
342 | 358 | ||
343 | void xen_smp_send_reschedule(int cpu) | 359 | static void xen_smp_send_reschedule(int cpu) |
344 | { | 360 | { |
345 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | 361 | xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); |
346 | } | 362 | } |
@@ -351,18 +367,18 @@ static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | |||
351 | 367 | ||
352 | cpus_and(mask, mask, cpu_online_map); | 368 | cpus_and(mask, mask, cpu_online_map); |
353 | 369 | ||
354 | for_each_cpu_mask(cpu, mask) | 370 | for_each_cpu_mask_nr(cpu, mask) |
355 | xen_send_IPI_one(cpu, vector); | 371 | xen_send_IPI_one(cpu, vector); |
356 | } | 372 | } |
357 | 373 | ||
358 | void xen_smp_send_call_function_ipi(cpumask_t mask) | 374 | static void xen_smp_send_call_function_ipi(cpumask_t mask) |
359 | { | 375 | { |
360 | int cpu; | 376 | int cpu; |
361 | 377 | ||
362 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | 378 | xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); |
363 | 379 | ||
364 | /* Make sure other vcpus get a chance to run if they need to. */ | 380 | /* Make sure other vcpus get a chance to run if they need to. */ |
365 | for_each_cpu_mask(cpu, mask) { | 381 | for_each_cpu_mask_nr(cpu, mask) { |
366 | if (xen_vcpu_stolen(cpu)) { | 382 | if (xen_vcpu_stolen(cpu)) { |
367 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); | 383 | HYPERVISOR_sched_op(SCHEDOP_yield, 0); |
368 | break; | 384 | break; |
@@ -370,7 +386,7 @@ void xen_smp_send_call_function_ipi(cpumask_t mask) | |||
370 | } | 386 | } |
371 | } | 387 | } |
372 | 388 | ||
373 | void xen_smp_send_call_function_single_ipi(int cpu) | 389 | static void xen_smp_send_call_function_single_ipi(int cpu) |
374 | { | 390 | { |
375 | xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); | 391 | xen_send_IPI_mask(cpumask_of_cpu(cpu), XEN_CALL_FUNCTION_SINGLE_VECTOR); |
376 | } | 392 | } |
@@ -379,7 +395,11 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | |||
379 | { | 395 | { |
380 | irq_enter(); | 396 | irq_enter(); |
381 | generic_smp_call_function_interrupt(); | 397 | generic_smp_call_function_interrupt(); |
398 | #ifdef CONFIG_X86_32 | ||
382 | __get_cpu_var(irq_stat).irq_call_count++; | 399 | __get_cpu_var(irq_stat).irq_call_count++; |
400 | #else | ||
401 | add_pda(irq_call_count, 1); | ||
402 | #endif | ||
383 | irq_exit(); | 403 | irq_exit(); |
384 | 404 | ||
385 | return IRQ_HANDLED; | 405 | return IRQ_HANDLED; |
@@ -389,8 +409,196 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) | |||
389 | { | 409 | { |
390 | irq_enter(); | 410 | irq_enter(); |
391 | generic_smp_call_function_single_interrupt(); | 411 | generic_smp_call_function_single_interrupt(); |
412 | #ifdef CONFIG_X86_32 | ||
392 | __get_cpu_var(irq_stat).irq_call_count++; | 413 | __get_cpu_var(irq_stat).irq_call_count++; |
414 | #else | ||
415 | add_pda(irq_call_count, 1); | ||
416 | #endif | ||
393 | irq_exit(); | 417 | irq_exit(); |
394 | 418 | ||
395 | return IRQ_HANDLED; | 419 | return IRQ_HANDLED; |
396 | } | 420 | } |
421 | |||
422 | struct xen_spinlock { | ||
423 | unsigned char lock; /* 0 -> free; 1 -> locked */ | ||
424 | unsigned short spinners; /* count of waiting cpus */ | ||
425 | }; | ||
426 | |||
427 | static int xen_spin_is_locked(struct raw_spinlock *lock) | ||
428 | { | ||
429 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
430 | |||
431 | return xl->lock != 0; | ||
432 | } | ||
433 | |||
434 | static int xen_spin_is_contended(struct raw_spinlock *lock) | ||
435 | { | ||
436 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
437 | |||
438 | /* Not strictly true; this is only the count of contended | ||
439 | lock-takers entering the slow path. */ | ||
440 | return xl->spinners != 0; | ||
441 | } | ||
442 | |||
443 | static int xen_spin_trylock(struct raw_spinlock *lock) | ||
444 | { | ||
445 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
446 | u8 old = 1; | ||
447 | |||
448 | asm("xchgb %b0,%1" | ||
449 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
450 | |||
451 | return old == 0; | ||
452 | } | ||
453 | |||
454 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | ||
455 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | ||
456 | |||
457 | static inline void spinning_lock(struct xen_spinlock *xl) | ||
458 | { | ||
459 | __get_cpu_var(lock_spinners) = xl; | ||
460 | wmb(); /* set lock of interest before count */ | ||
461 | asm(LOCK_PREFIX " incw %0" | ||
462 | : "+m" (xl->spinners) : : "memory"); | ||
463 | } | ||
464 | |||
465 | static inline void unspinning_lock(struct xen_spinlock *xl) | ||
466 | { | ||
467 | asm(LOCK_PREFIX " decw %0" | ||
468 | : "+m" (xl->spinners) : : "memory"); | ||
469 | wmb(); /* decrement count before clearing lock */ | ||
470 | __get_cpu_var(lock_spinners) = NULL; | ||
471 | } | ||
472 | |||
473 | static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) | ||
474 | { | ||
475 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
476 | int irq = __get_cpu_var(lock_kicker_irq); | ||
477 | int ret; | ||
478 | |||
479 | /* If kicker interrupts not initialized yet, just spin */ | ||
480 | if (irq == -1) | ||
481 | return 0; | ||
482 | |||
483 | /* announce we're spinning */ | ||
484 | spinning_lock(xl); | ||
485 | |||
486 | /* clear pending */ | ||
487 | xen_clear_irq_pending(irq); | ||
488 | |||
489 | /* check again make sure it didn't become free while | ||
490 | we weren't looking */ | ||
491 | ret = xen_spin_trylock(lock); | ||
492 | if (ret) | ||
493 | goto out; | ||
494 | |||
495 | /* block until irq becomes pending */ | ||
496 | xen_poll_irq(irq); | ||
497 | kstat_this_cpu.irqs[irq]++; | ||
498 | |||
499 | out: | ||
500 | unspinning_lock(xl); | ||
501 | return ret; | ||
502 | } | ||
503 | |||
504 | static void xen_spin_lock(struct raw_spinlock *lock) | ||
505 | { | ||
506 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
507 | int timeout; | ||
508 | u8 oldval; | ||
509 | |||
510 | do { | ||
511 | timeout = 1 << 10; | ||
512 | |||
513 | asm("1: xchgb %1,%0\n" | ||
514 | " testb %1,%1\n" | ||
515 | " jz 3f\n" | ||
516 | "2: rep;nop\n" | ||
517 | " cmpb $0,%0\n" | ||
518 | " je 1b\n" | ||
519 | " dec %2\n" | ||
520 | " jnz 2b\n" | ||
521 | "3:\n" | ||
522 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
523 | : "1" (1) | ||
524 | : "memory"); | ||
525 | |||
526 | } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); | ||
527 | } | ||
528 | |||
529 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | ||
530 | { | ||
531 | int cpu; | ||
532 | |||
533 | for_each_online_cpu(cpu) { | ||
534 | /* XXX should mix up next cpu selection */ | ||
535 | if (per_cpu(lock_spinners, cpu) == xl) { | ||
536 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | ||
537 | break; | ||
538 | } | ||
539 | } | ||
540 | } | ||
541 | |||
542 | static void xen_spin_unlock(struct raw_spinlock *lock) | ||
543 | { | ||
544 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
545 | |||
546 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
547 | xl->lock = 0; /* release lock */ | ||
548 | |||
549 | /* make sure unlock happens before kick */ | ||
550 | barrier(); | ||
551 | |||
552 | if (unlikely(xl->spinners)) | ||
553 | xen_spin_unlock_slow(xl); | ||
554 | } | ||
555 | |||
556 | static __cpuinit void xen_init_lock_cpu(int cpu) | ||
557 | { | ||
558 | int irq; | ||
559 | const char *name; | ||
560 | |||
561 | name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); | ||
562 | irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, | ||
563 | cpu, | ||
564 | xen_reschedule_interrupt, | ||
565 | IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | ||
566 | name, | ||
567 | NULL); | ||
568 | |||
569 | if (irq >= 0) { | ||
570 | disable_irq(irq); /* make sure it's never delivered */ | ||
571 | per_cpu(lock_kicker_irq, cpu) = irq; | ||
572 | } | ||
573 | |||
574 | printk("cpu %d spinlock event irq %d\n", cpu, irq); | ||
575 | } | ||
576 | |||
577 | static void __init xen_init_spinlocks(void) | ||
578 | { | ||
579 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | ||
580 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | ||
581 | pv_lock_ops.spin_lock = xen_spin_lock; | ||
582 | pv_lock_ops.spin_trylock = xen_spin_trylock; | ||
583 | pv_lock_ops.spin_unlock = xen_spin_unlock; | ||
584 | } | ||
585 | |||
586 | static const struct smp_ops xen_smp_ops __initdata = { | ||
587 | .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, | ||
588 | .smp_prepare_cpus = xen_smp_prepare_cpus, | ||
589 | .cpu_up = xen_cpu_up, | ||
590 | .smp_cpus_done = xen_smp_cpus_done, | ||
591 | |||
592 | .smp_send_stop = xen_smp_send_stop, | ||
593 | .smp_send_reschedule = xen_smp_send_reschedule, | ||
594 | |||
595 | .send_call_func_ipi = xen_smp_send_call_function_ipi, | ||
596 | .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi, | ||
597 | }; | ||
598 | |||
599 | void __init xen_smp_init(void) | ||
600 | { | ||
601 | smp_ops = xen_smp_ops; | ||
602 | xen_fill_possible_map(); | ||
603 | xen_init_spinlocks(); | ||
604 | } | ||
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c index 251669a932d4..2a234db5949b 100644 --- a/arch/x86/xen/suspend.c +++ b/arch/x86/xen/suspend.c | |||
@@ -38,8 +38,11 @@ void xen_post_suspend(int suspend_cancelled) | |||
38 | xen_cpu_initialized_map = cpu_online_map; | 38 | xen_cpu_initialized_map = cpu_online_map; |
39 | #endif | 39 | #endif |
40 | xen_vcpu_restore(); | 40 | xen_vcpu_restore(); |
41 | xen_timer_resume(); | ||
42 | } | 41 | } |
43 | 42 | ||
44 | } | 43 | } |
45 | 44 | ||
45 | void xen_arch_resume(void) | ||
46 | { | ||
47 | /* nothing */ | ||
48 | } | ||
diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm_32.S index 2497a30f41de..2497a30f41de 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm_32.S | |||
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S new file mode 100644 index 000000000000..7f58304fafb3 --- /dev/null +++ b/arch/x86/xen/xen-asm_64.S | |||
@@ -0,0 +1,271 @@ | |||
1 | /* | ||
2 | Asm versions of Xen pv-ops, suitable for either direct use or inlining. | ||
3 | The inline versions are the same as the direct-use versions, with the | ||
4 | pre- and post-amble chopped off. | ||
5 | |||
6 | This code is encoded for size rather than absolute efficiency, | ||
7 | with a view to being able to inline as much as possible. | ||
8 | |||
9 | We only bother with direct forms (ie, vcpu in pda) of the operations | ||
10 | here; the indirect forms are better handled in C, since they're | ||
11 | generally too large to inline anyway. | ||
12 | */ | ||
13 | |||
14 | #include <linux/linkage.h> | ||
15 | |||
16 | #include <asm/asm-offsets.h> | ||
17 | #include <asm/processor-flags.h> | ||
18 | #include <asm/errno.h> | ||
19 | #include <asm/segment.h> | ||
20 | |||
21 | #include <xen/interface/xen.h> | ||
22 | |||
23 | #define RELOC(x, v) .globl x##_reloc; x##_reloc=v | ||
24 | #define ENDPATCH(x) .globl x##_end; x##_end=. | ||
25 | |||
26 | /* Pseudo-flag used for virtual NMI, which we don't implement yet */ | ||
27 | #define XEN_EFLAGS_NMI 0x80000000 | ||
28 | |||
29 | #if 0 | ||
30 | #include <asm/percpu.h> | ||
31 | |||
32 | /* | ||
33 | Enable events. This clears the event mask and tests the pending | ||
34 | event status with one and operation. If there are pending | ||
35 | events, then enter the hypervisor to get them handled. | ||
36 | */ | ||
37 | ENTRY(xen_irq_enable_direct) | ||
38 | /* Unmask events */ | ||
39 | movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) | ||
40 | |||
41 | /* Preempt here doesn't matter because that will deal with | ||
42 | any pending interrupts. The pending check may end up being | ||
43 | run on the wrong CPU, but that doesn't hurt. */ | ||
44 | |||
45 | /* Test for pending */ | ||
46 | testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) | ||
47 | jz 1f | ||
48 | |||
49 | 2: call check_events | ||
50 | 1: | ||
51 | ENDPATCH(xen_irq_enable_direct) | ||
52 | ret | ||
53 | ENDPROC(xen_irq_enable_direct) | ||
54 | RELOC(xen_irq_enable_direct, 2b+1) | ||
55 | |||
56 | /* | ||
57 | Disabling events is simply a matter of making the event mask | ||
58 | non-zero. | ||
59 | */ | ||
60 | ENTRY(xen_irq_disable_direct) | ||
61 | movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) | ||
62 | ENDPATCH(xen_irq_disable_direct) | ||
63 | ret | ||
64 | ENDPROC(xen_irq_disable_direct) | ||
65 | RELOC(xen_irq_disable_direct, 0) | ||
66 | |||
67 | /* | ||
68 | (xen_)save_fl is used to get the current interrupt enable status. | ||
69 | Callers expect the status to be in X86_EFLAGS_IF, and other bits | ||
70 | may be set in the return value. We take advantage of this by | ||
71 | making sure that X86_EFLAGS_IF has the right value (and other bits | ||
72 | in that byte are 0), but other bits in the return value are | ||
73 | undefined. We need to toggle the state of the bit, because | ||
74 | Xen and x86 use opposite senses (mask vs enable). | ||
75 | */ | ||
76 | ENTRY(xen_save_fl_direct) | ||
77 | testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) | ||
78 | setz %ah | ||
79 | addb %ah,%ah | ||
80 | ENDPATCH(xen_save_fl_direct) | ||
81 | ret | ||
82 | ENDPROC(xen_save_fl_direct) | ||
83 | RELOC(xen_save_fl_direct, 0) | ||
84 | |||
85 | /* | ||
86 | In principle the caller should be passing us a value return | ||
87 | from xen_save_fl_direct, but for robustness sake we test only | ||
88 | the X86_EFLAGS_IF flag rather than the whole byte. After | ||
89 | setting the interrupt mask state, it checks for unmasked | ||
90 | pending events and enters the hypervisor to get them delivered | ||
91 | if so. | ||
92 | */ | ||
93 | ENTRY(xen_restore_fl_direct) | ||
94 | testb $X86_EFLAGS_IF>>8, %ah | ||
95 | setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) | ||
96 | /* Preempt here doesn't matter because that will deal with | ||
97 | any pending interrupts. The pending check may end up being | ||
98 | run on the wrong CPU, but that doesn't hurt. */ | ||
99 | |||
100 | /* check for unmasked and pending */ | ||
101 | cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) | ||
102 | jz 1f | ||
103 | 2: call check_events | ||
104 | 1: | ||
105 | ENDPATCH(xen_restore_fl_direct) | ||
106 | ret | ||
107 | ENDPROC(xen_restore_fl_direct) | ||
108 | RELOC(xen_restore_fl_direct, 2b+1) | ||
109 | |||
110 | |||
111 | /* | ||
112 | Force an event check by making a hypercall, | ||
113 | but preserve regs before making the call. | ||
114 | */ | ||
115 | check_events: | ||
116 | push %rax | ||
117 | push %rcx | ||
118 | push %rdx | ||
119 | push %rsi | ||
120 | push %rdi | ||
121 | push %r8 | ||
122 | push %r9 | ||
123 | push %r10 | ||
124 | push %r11 | ||
125 | call force_evtchn_callback | ||
126 | pop %r11 | ||
127 | pop %r10 | ||
128 | pop %r9 | ||
129 | pop %r8 | ||
130 | pop %rdi | ||
131 | pop %rsi | ||
132 | pop %rdx | ||
133 | pop %rcx | ||
134 | pop %rax | ||
135 | ret | ||
136 | #endif | ||
137 | |||
138 | ENTRY(xen_adjust_exception_frame) | ||
139 | mov 8+0(%rsp),%rcx | ||
140 | mov 8+8(%rsp),%r11 | ||
141 | ret $16 | ||
142 | |||
143 | hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 | ||
144 | /* | ||
145 | Xen64 iret frame: | ||
146 | |||
147 | ss | ||
148 | rsp | ||
149 | rflags | ||
150 | cs | ||
151 | rip <-- standard iret frame | ||
152 | |||
153 | flags | ||
154 | |||
155 | rcx } | ||
156 | r11 }<-- pushed by hypercall page | ||
157 | rsp -> rax } | ||
158 | */ | ||
159 | ENTRY(xen_iret) | ||
160 | pushq $0 | ||
161 | 1: jmp hypercall_iret | ||
162 | ENDPATCH(xen_iret) | ||
163 | RELOC(xen_iret, 1b+1) | ||
164 | |||
165 | /* | ||
166 | sysexit is not used for 64-bit processes, so it's | ||
167 | only ever used to return to 32-bit compat userspace. | ||
168 | */ | ||
169 | ENTRY(xen_sysexit) | ||
170 | pushq $__USER32_DS | ||
171 | pushq %rcx | ||
172 | pushq $X86_EFLAGS_IF | ||
173 | pushq $__USER32_CS | ||
174 | pushq %rdx | ||
175 | |||
176 | pushq $0 | ||
177 | 1: jmp hypercall_iret | ||
178 | ENDPATCH(xen_sysexit) | ||
179 | RELOC(xen_sysexit, 1b+1) | ||
180 | |||
181 | ENTRY(xen_sysret64) | ||
182 | /* We're already on the usermode stack at this point, but still | ||
183 | with the kernel gs, so we can easily switch back */ | ||
184 | movq %rsp, %gs:pda_oldrsp | ||
185 | movq %gs:pda_kernelstack,%rsp | ||
186 | |||
187 | pushq $__USER_DS | ||
188 | pushq %gs:pda_oldrsp | ||
189 | pushq %r11 | ||
190 | pushq $__USER_CS | ||
191 | pushq %rcx | ||
192 | |||
193 | pushq $VGCF_in_syscall | ||
194 | 1: jmp hypercall_iret | ||
195 | ENDPATCH(xen_sysret64) | ||
196 | RELOC(xen_sysret64, 1b+1) | ||
197 | |||
198 | ENTRY(xen_sysret32) | ||
199 | /* We're already on the usermode stack at this point, but still | ||
200 | with the kernel gs, so we can easily switch back */ | ||
201 | movq %rsp, %gs:pda_oldrsp | ||
202 | movq %gs:pda_kernelstack, %rsp | ||
203 | |||
204 | pushq $__USER32_DS | ||
205 | pushq %gs:pda_oldrsp | ||
206 | pushq %r11 | ||
207 | pushq $__USER32_CS | ||
208 | pushq %rcx | ||
209 | |||
210 | pushq $VGCF_in_syscall | ||
211 | 1: jmp hypercall_iret | ||
212 | ENDPATCH(xen_sysret32) | ||
213 | RELOC(xen_sysret32, 1b+1) | ||
214 | |||
215 | /* | ||
216 | Xen handles syscall callbacks much like ordinary exceptions, | ||
217 | which means we have: | ||
218 | - kernel gs | ||
219 | - kernel rsp | ||
220 | - an iret-like stack frame on the stack (including rcx and r11): | ||
221 | ss | ||
222 | rsp | ||
223 | rflags | ||
224 | cs | ||
225 | rip | ||
226 | r11 | ||
227 | rsp-> rcx | ||
228 | |||
229 | In all the entrypoints, we undo all that to make it look | ||
230 | like a CPU-generated syscall/sysenter and jump to the normal | ||
231 | entrypoint. | ||
232 | */ | ||
233 | |||
234 | .macro undo_xen_syscall | ||
235 | mov 0*8(%rsp),%rcx | ||
236 | mov 1*8(%rsp),%r11 | ||
237 | mov 5*8(%rsp),%rsp | ||
238 | .endm | ||
239 | |||
240 | /* Normal 64-bit system call target */ | ||
241 | ENTRY(xen_syscall_target) | ||
242 | undo_xen_syscall | ||
243 | jmp system_call_after_swapgs | ||
244 | ENDPROC(xen_syscall_target) | ||
245 | |||
246 | #ifdef CONFIG_IA32_EMULATION | ||
247 | |||
248 | /* 32-bit compat syscall target */ | ||
249 | ENTRY(xen_syscall32_target) | ||
250 | undo_xen_syscall | ||
251 | jmp ia32_cstar_target | ||
252 | ENDPROC(xen_syscall32_target) | ||
253 | |||
254 | /* 32-bit compat sysenter target */ | ||
255 | ENTRY(xen_sysenter_target) | ||
256 | undo_xen_syscall | ||
257 | jmp ia32_sysenter_target | ||
258 | ENDPROC(xen_sysenter_target) | ||
259 | |||
260 | #else /* !CONFIG_IA32_EMULATION */ | ||
261 | |||
262 | ENTRY(xen_syscall32_target) | ||
263 | ENTRY(xen_sysenter_target) | ||
264 | lea 16(%rsp), %rsp /* strip %rcx,%r11 */ | ||
265 | mov $-ENOSYS, %rax | ||
266 | pushq $VGCF_in_syscall | ||
267 | jmp hypercall_iret | ||
268 | ENDPROC(xen_syscall32_target) | ||
269 | ENDPROC(xen_sysenter_target) | ||
270 | |||
271 | #endif /* CONFIG_IA32_EMULATION */ | ||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7c0cf6320a0a..63d49a523ed3 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -5,15 +5,24 @@ | |||
5 | 5 | ||
6 | #include <linux/elfnote.h> | 6 | #include <linux/elfnote.h> |
7 | #include <linux/init.h> | 7 | #include <linux/init.h> |
8 | |||
8 | #include <asm/boot.h> | 9 | #include <asm/boot.h> |
10 | #include <asm/asm.h> | ||
11 | #include <asm/page.h> | ||
12 | |||
9 | #include <xen/interface/elfnote.h> | 13 | #include <xen/interface/elfnote.h> |
10 | #include <asm/xen/interface.h> | 14 | #include <asm/xen/interface.h> |
11 | 15 | ||
12 | __INIT | 16 | __INIT |
13 | ENTRY(startup_xen) | 17 | ENTRY(startup_xen) |
14 | movl %esi,xen_start_info | ||
15 | cld | 18 | cld |
16 | movl $(init_thread_union+THREAD_SIZE),%esp | 19 | #ifdef CONFIG_X86_32 |
20 | mov %esi,xen_start_info | ||
21 | mov $init_thread_union+THREAD_SIZE,%esp | ||
22 | #else | ||
23 | mov %rsi,xen_start_info | ||
24 | mov $init_thread_union+THREAD_SIZE,%rsp | ||
25 | #endif | ||
17 | jmp xen_start_kernel | 26 | jmp xen_start_kernel |
18 | 27 | ||
19 | __FINIT | 28 | __FINIT |
@@ -21,21 +30,26 @@ ENTRY(startup_xen) | |||
21 | .pushsection .text | 30 | .pushsection .text |
22 | .align PAGE_SIZE_asm | 31 | .align PAGE_SIZE_asm |
23 | ENTRY(hypercall_page) | 32 | ENTRY(hypercall_page) |
24 | .skip 0x1000 | 33 | .skip PAGE_SIZE_asm |
25 | .popsection | 34 | .popsection |
26 | 35 | ||
27 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") | 36 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_OS, .asciz "linux") |
28 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") | 37 | ELFNOTE(Xen, XEN_ELFNOTE_GUEST_VERSION, .asciz "2.6") |
29 | ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") | 38 | ELFNOTE(Xen, XEN_ELFNOTE_XEN_VERSION, .asciz "xen-3.0") |
30 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, .long __PAGE_OFFSET) | 39 | #ifdef CONFIG_X86_32 |
31 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, .long startup_xen) | 40 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __PAGE_OFFSET) |
32 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, .long hypercall_page) | 41 | #else |
42 | ELFNOTE(Xen, XEN_ELFNOTE_VIRT_BASE, _ASM_PTR __START_KERNEL_map) | ||
43 | #endif | ||
44 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) | ||
45 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) | ||
33 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 46 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") |
34 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 47 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
35 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 48 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
36 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | 49 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, |
37 | .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) | 50 | .quad _PAGE_PRESENT; .quad _PAGE_PRESENT) |
38 | ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) | 51 | ELFNOTE(Xen, XEN_ELFNOTE_SUSPEND_CANCEL, .long 1) |
39 | ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, .long __HYPERVISOR_VIRT_START) | 52 | ELFNOTE(Xen, XEN_ELFNOTE_HV_START_LOW, _ASM_PTR __HYPERVISOR_VIRT_START) |
53 | ELFNOTE(Xen, XEN_ELFNOTE_PADDR_OFFSET, _ASM_PTR 0) | ||
40 | 54 | ||
41 | #endif /*CONFIG_XEN */ | 55 | #endif /*CONFIG_XEN */ |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 6f4b1045c1c2..dd3c23152a2e 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -26,6 +26,7 @@ char * __init xen_memory_setup(void); | |||
26 | void __init xen_arch_setup(void); | 26 | void __init xen_arch_setup(void); |
27 | void __init xen_init_IRQ(void); | 27 | void __init xen_init_IRQ(void); |
28 | void xen_enable_sysenter(void); | 28 | void xen_enable_sysenter(void); |
29 | void xen_enable_syscall(void); | ||
29 | void xen_vcpu_restore(void); | 30 | void xen_vcpu_restore(void); |
30 | 31 | ||
31 | void __init xen_build_dynamic_phys_to_machine(void); | 32 | void __init xen_build_dynamic_phys_to_machine(void); |
@@ -37,7 +38,6 @@ void __init xen_time_init(void); | |||
37 | unsigned long xen_get_wallclock(void); | 38 | unsigned long xen_get_wallclock(void); |
38 | int xen_set_wallclock(unsigned long time); | 39 | int xen_set_wallclock(unsigned long time); |
39 | unsigned long long xen_sched_clock(void); | 40 | unsigned long long xen_sched_clock(void); |
40 | void xen_timer_resume(void); | ||
41 | 41 | ||
42 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); | 42 | irqreturn_t xen_debug_interrupt(int irq, void *dev_id); |
43 | 43 | ||
@@ -45,20 +45,15 @@ bool xen_vcpu_stolen(int vcpu); | |||
45 | 45 | ||
46 | void xen_mark_init_mm_pinned(void); | 46 | void xen_mark_init_mm_pinned(void); |
47 | 47 | ||
48 | void __init xen_fill_possible_map(void); | ||
49 | |||
50 | void __init xen_setup_vcpu_info_placement(void); | 48 | void __init xen_setup_vcpu_info_placement(void); |
51 | void xen_smp_prepare_boot_cpu(void); | ||
52 | void xen_smp_prepare_cpus(unsigned int max_cpus); | ||
53 | int xen_cpu_up(unsigned int cpu); | ||
54 | void xen_smp_cpus_done(unsigned int max_cpus); | ||
55 | 49 | ||
56 | void xen_smp_send_stop(void); | 50 | #ifdef CONFIG_SMP |
57 | void xen_smp_send_reschedule(int cpu); | 51 | void xen_smp_init(void); |
58 | void xen_smp_send_call_function_ipi(cpumask_t mask); | ||
59 | void xen_smp_send_call_function_single_ipi(int cpu); | ||
60 | 52 | ||
61 | extern cpumask_t xen_cpu_initialized_map; | 53 | extern cpumask_t xen_cpu_initialized_map; |
54 | #else | ||
55 | static inline void xen_smp_init(void) {} | ||
56 | #endif | ||
62 | 57 | ||
63 | 58 | ||
64 | /* Declare an asm function, along with symbols needed to make it | 59 | /* Declare an asm function, along with symbols needed to make it |
@@ -73,7 +68,11 @@ DECL_ASM(void, xen_irq_disable_direct, void); | |||
73 | DECL_ASM(unsigned long, xen_save_fl_direct, void); | 68 | DECL_ASM(unsigned long, xen_save_fl_direct, void); |
74 | DECL_ASM(void, xen_restore_fl_direct, unsigned long); | 69 | DECL_ASM(void, xen_restore_fl_direct, unsigned long); |
75 | 70 | ||
71 | /* These are not functions, and cannot be called normally */ | ||
76 | void xen_iret(void); | 72 | void xen_iret(void); |
77 | void xen_sysexit(void); | 73 | void xen_sysexit(void); |
74 | void xen_sysret32(void); | ||
75 | void xen_sysret64(void); | ||
76 | void xen_adjust_exception_frame(void); | ||
78 | 77 | ||
79 | #endif /* XEN_OPS_H */ | 78 | #endif /* XEN_OPS_H */ |