diff options
author | Dave Kleikamp <shaggy@austin.ibm.com> | 2006-01-24 15:34:47 -0500 |
---|---|---|
committer | Dave Kleikamp <shaggy@austin.ibm.com> | 2006-01-24 15:34:47 -0500 |
commit | 0a0fc0ddbe732779366ab6b1b879f62195e65967 (patch) | |
tree | 7b42490a676cf39ae0691b6859ecf7fd410f229b /arch/x86_64 | |
parent | 4d5dbd0945d9e0833dd7964a3d6ee33157f7cc7a (diff) | |
parent | 3ee68c4af3fd7228c1be63254b9f884614f9ebb2 (diff) |
Merge with /home/shaggy/git/linus-clean/
Diffstat (limited to 'arch/x86_64')
89 files changed, 3475 insertions, 2352 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 4cce2f6f170c..2f9deca31cc9 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -69,12 +69,34 @@ config ARCH_MAY_HAVE_PC_FDC | |||
69 | bool | 69 | bool |
70 | default y | 70 | default y |
71 | 71 | ||
72 | config DMI | ||
73 | bool | ||
74 | default y | ||
75 | |||
72 | source "init/Kconfig" | 76 | source "init/Kconfig" |
73 | 77 | ||
74 | 78 | ||
75 | menu "Processor type and features" | 79 | menu "Processor type and features" |
76 | 80 | ||
77 | choice | 81 | choice |
82 | prompt "Subarchitecture Type" | ||
83 | default X86_PC | ||
84 | |||
85 | config X86_PC | ||
86 | bool "PC-compatible" | ||
87 | help | ||
88 | Choose this option if your computer is a standard PC or compatible. | ||
89 | |||
90 | config X86_VSMP | ||
91 | bool "Support for ScaleMP vSMP" | ||
92 | help | ||
93 | Support for ScaleMP vSMP systems. Say 'Y' here if this kernel is | ||
94 | supposed to run on these EM64T-based machines. Only choose this option | ||
95 | if you have one of these machines. | ||
96 | |||
97 | endchoice | ||
98 | |||
99 | choice | ||
78 | prompt "Processor family" | 100 | prompt "Processor family" |
79 | default MK8 | 101 | default MK8 |
80 | 102 | ||
@@ -226,22 +248,42 @@ config SCHED_SMT | |||
226 | 248 | ||
227 | source "kernel/Kconfig.preempt" | 249 | source "kernel/Kconfig.preempt" |
228 | 250 | ||
229 | config K8_NUMA | 251 | config NUMA |
230 | bool "K8 NUMA support" | 252 | bool "Non Uniform Memory Access (NUMA) Support" |
231 | select NUMA | ||
232 | depends on SMP | 253 | depends on SMP |
233 | help | 254 | help |
234 | Enable NUMA (Non Unified Memory Architecture) support for | 255 | Enable NUMA (Non Uniform Memory Access) support. The kernel |
235 | AMD Opteron Multiprocessor systems. The kernel will try to allocate | 256 | will try to allocate memory used by a CPU on the local memory |
236 | memory used by a CPU on the local memory controller of the CPU | 257 | controller of the CPU and add some more NUMA awareness to the kernel. |
237 | and add some more NUMA awareness to the kernel. | 258 | This code is recommended on all multiprocessor Opteron systems. |
238 | This code is recommended on all multiprocessor Opteron systems | 259 | If the system is EM64T, you should say N unless your system is EM64T |
239 | and normally doesn't hurt on others. | 260 | NUMA. |
261 | |||
262 | config K8_NUMA | ||
263 | bool "Old style AMD Opteron NUMA detection" | ||
264 | depends on NUMA | ||
265 | default y | ||
266 | help | ||
267 | Enable K8 NUMA node topology detection. You should say Y here if | ||
268 | you have a multi processor AMD K8 system. This uses an old | ||
269 | method to read the NUMA configurtion directly from the builtin | ||
270 | Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA | ||
271 | instead, which also takes priority if both are compiled in. | ||
272 | |||
273 | # Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. | ||
274 | |||
275 | config X86_64_ACPI_NUMA | ||
276 | bool "ACPI NUMA detection" | ||
277 | depends on NUMA | ||
278 | select ACPI | ||
279 | select ACPI_NUMA | ||
280 | default y | ||
281 | help | ||
282 | Enable ACPI SRAT based node topology detection. | ||
240 | 283 | ||
241 | config NUMA_EMU | 284 | config NUMA_EMU |
242 | bool "NUMA emulation support" | 285 | bool "NUMA emulation" |
243 | select NUMA | 286 | depends on NUMA |
244 | depends on SMP | ||
245 | help | 287 | help |
246 | Enable NUMA emulation. A flat machine will be split | 288 | Enable NUMA emulation. A flat machine will be split |
247 | into virtual nodes when booted with "numa=fake=N", where N is the | 289 | into virtual nodes when booted with "numa=fake=N", where N is the |
@@ -252,9 +294,6 @@ config ARCH_DISCONTIGMEM_ENABLE | |||
252 | depends on NUMA | 294 | depends on NUMA |
253 | default y | 295 | default y |
254 | 296 | ||
255 | config NUMA | ||
256 | bool | ||
257 | default n | ||
258 | 297 | ||
259 | config ARCH_DISCONTIGMEM_ENABLE | 298 | config ARCH_DISCONTIGMEM_ENABLE |
260 | def_bool y | 299 | def_bool y |
@@ -266,7 +305,11 @@ config ARCH_DISCONTIGMEM_DEFAULT | |||
266 | 305 | ||
267 | config ARCH_SPARSEMEM_ENABLE | 306 | config ARCH_SPARSEMEM_ENABLE |
268 | def_bool y | 307 | def_bool y |
269 | depends on NUMA | 308 | depends on (NUMA || EXPERIMENTAL) |
309 | |||
310 | config ARCH_MEMORY_PROBE | ||
311 | def_bool y | ||
312 | depends on MEMORY_HOTPLUG | ||
270 | 313 | ||
271 | config ARCH_FLATMEM_ENABLE | 314 | config ARCH_FLATMEM_ENABLE |
272 | def_bool y | 315 | def_bool y |
@@ -276,6 +319,7 @@ source "mm/Kconfig" | |||
276 | 319 | ||
277 | config HAVE_ARCH_EARLY_PFN_TO_NID | 320 | config HAVE_ARCH_EARLY_PFN_TO_NID |
278 | def_bool y | 321 | def_bool y |
322 | depends on NUMA | ||
279 | 323 | ||
280 | config NR_CPUS | 324 | config NR_CPUS |
281 | int "Maximum number of CPUs (2-256)" | 325 | int "Maximum number of CPUs (2-256)" |
@@ -311,7 +355,7 @@ config HPET_TIMER | |||
311 | <http://www.intel.com/hardwaredesign/hpetspec.htm>. | 355 | <http://www.intel.com/hardwaredesign/hpetspec.htm>. |
312 | 356 | ||
313 | config X86_PM_TIMER | 357 | config X86_PM_TIMER |
314 | bool "PM timer" | 358 | bool "PM timer" if EMBEDDED |
315 | depends on ACPI | 359 | depends on ACPI |
316 | default y | 360 | default y |
317 | help | 361 | help |
@@ -330,32 +374,24 @@ config HPET_EMULATE_RTC | |||
330 | depends on HPET_TIMER && RTC=y | 374 | depends on HPET_TIMER && RTC=y |
331 | 375 | ||
332 | config GART_IOMMU | 376 | config GART_IOMMU |
333 | bool "IOMMU support" | 377 | bool "K8 GART IOMMU support" |
334 | default y | 378 | default y |
379 | select SWIOTLB | ||
335 | depends on PCI | 380 | depends on PCI |
336 | help | 381 | help |
337 | Support the IOMMU. Needed to run systems with more than 3GB of memory | 382 | Support the IOMMU. Needed to run systems with more than 3GB of memory |
338 | properly with 32-bit PCI devices that do not support DAC (Double Address | 383 | properly with 32-bit PCI devices that do not support DAC (Double Address |
339 | Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. | 384 | Cycle). The IOMMU can be turned off at runtime with the iommu=off parameter. |
340 | Normally the kernel will take the right choice by itself. | 385 | Normally the kernel will take the right choice by itself. |
341 | This option includes a driver for the AMD Opteron/Athlon64 IOMMU | 386 | This option includes a driver for the AMD Opteron/Athlon64 northbridge IOMMU |
342 | and a software emulation used on some other systems. | 387 | and a software emulation used on other systems. |
343 | If unsure, say Y. | 388 | If unsure, say Y. |
344 | 389 | ||
345 | # need this always enabled with GART_IOMMU for the VIA workaround | 390 | # need this always enabled with GART_IOMMU for the VIA workaround |
346 | config SWIOTLB | 391 | config SWIOTLB |
347 | bool | ||
348 | depends on GART_IOMMU | ||
349 | default y | ||
350 | |||
351 | config DUMMY_IOMMU | ||
352 | bool | 392 | bool |
353 | depends on !GART_IOMMU && !SWIOTLB | ||
354 | default y | 393 | default y |
355 | help | 394 | depends on GART_IOMMU |
356 | Don't use IOMMU code. This will cause problems when you have more than 4GB | ||
357 | of memory and any 32-bit devices. Don't turn on unless you know what you | ||
358 | are doing. | ||
359 | 395 | ||
360 | config X86_MCE | 396 | config X86_MCE |
361 | bool "Machine check support" if EMBEDDED | 397 | bool "Machine check support" if EMBEDDED |
@@ -374,16 +410,13 @@ config X86_MCE_INTEL | |||
374 | Additional support for intel specific MCE features such as | 410 | Additional support for intel specific MCE features such as |
375 | the thermal monitor. | 411 | the thermal monitor. |
376 | 412 | ||
377 | config PHYSICAL_START | 413 | config X86_MCE_AMD |
378 | hex "Physical address where the kernel is loaded" if EMBEDDED | 414 | bool "AMD MCE features" |
379 | default "0x100000" | 415 | depends on X86_MCE && X86_LOCAL_APIC |
416 | default y | ||
380 | help | 417 | help |
381 | This gives the physical address where the kernel is loaded. | 418 | Additional support for AMD specific MCE features such as |
382 | Primarily used in the case of kexec on panic where the | 419 | the DRAM Error Threshold. |
383 | fail safe kernel needs to run at a different address than | ||
384 | the panic-ed kernel. | ||
385 | |||
386 | Don't change this unless you know what you are doing. | ||
387 | 420 | ||
388 | config KEXEC | 421 | config KEXEC |
389 | bool "kexec system call (EXPERIMENTAL)" | 422 | bool "kexec system call (EXPERIMENTAL)" |
@@ -402,6 +435,31 @@ config KEXEC | |||
402 | support. As of this writing the exact hardware interface is | 435 | support. As of this writing the exact hardware interface is |
403 | strongly in flux, so no good recommendation can be made. | 436 | strongly in flux, so no good recommendation can be made. |
404 | 437 | ||
438 | config CRASH_DUMP | ||
439 | bool "kernel crash dumps (EXPERIMENTAL)" | ||
440 | depends on EXPERIMENTAL | ||
441 | help | ||
442 | Generate crash dump after being started by kexec. | ||
443 | |||
444 | config PHYSICAL_START | ||
445 | hex "Physical address where the kernel is loaded" if (EMBEDDED || CRASH_DUMP) | ||
446 | default "0x1000000" if CRASH_DUMP | ||
447 | default "0x100000" | ||
448 | help | ||
449 | This gives the physical address where the kernel is loaded. Normally | ||
450 | for regular kernels this value is 0x100000 (1MB). But in the case | ||
451 | of kexec on panic the fail safe kernel needs to run at a different | ||
452 | address than the panic-ed kernel. This option is used to set the load | ||
453 | address for kernels used to capture crash dump on being kexec'ed | ||
454 | after panic. The default value for crash dump kernels is | ||
455 | 0x1000000 (16MB). This can also be set based on the "X" value as | ||
456 | specified in the "crashkernel=YM@XM" command line boot parameter | ||
457 | passed to the panic-ed kernel. Typically this parameter is set as | ||
458 | crashkernel=64M@16M. Please take a look at | ||
459 | Documentation/kdump/kdump.txt for more details about crash dumps. | ||
460 | |||
461 | Don't change this unless you know what you are doing. | ||
462 | |||
405 | config SECCOMP | 463 | config SECCOMP |
406 | bool "Enable seccomp to safely compute untrusted bytecode" | 464 | bool "Enable seccomp to safely compute untrusted bytecode" |
407 | depends on PROC_FS | 465 | depends on PROC_FS |
@@ -502,7 +560,7 @@ config IA32_EMULATION | |||
502 | left. | 560 | left. |
503 | 561 | ||
504 | config IA32_AOUT | 562 | config IA32_AOUT |
505 | bool "IA32 a.out support" | 563 | tristate "IA32 a.out support" |
506 | depends on IA32_EMULATION | 564 | depends on IA32_EMULATION |
507 | help | 565 | help |
508 | Support old a.out binaries in the 32bit emulation. | 566 | Support old a.out binaries in the 32bit emulation. |
@@ -517,11 +575,6 @@ config SYSVIPC_COMPAT | |||
517 | depends on COMPAT && SYSVIPC | 575 | depends on COMPAT && SYSVIPC |
518 | default y | 576 | default y |
519 | 577 | ||
520 | config UID16 | ||
521 | bool | ||
522 | depends on IA32_EMULATION | ||
523 | default y | ||
524 | |||
525 | endmenu | 578 | endmenu |
526 | 579 | ||
527 | source "net/Kconfig" | 580 | source "net/Kconfig" |
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index d584ecc27ea1..fcb06a50fdd2 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug | |||
@@ -2,15 +2,6 @@ menu "Kernel hacking" | |||
2 | 2 | ||
3 | source "lib/Kconfig.debug" | 3 | source "lib/Kconfig.debug" |
4 | 4 | ||
5 | # !SMP for now because the context switch early causes GPF in segment reloading | ||
6 | # and the GS base checking does the wrong thing then, causing a hang. | ||
7 | config CHECKING | ||
8 | bool "Additional run-time checks" | ||
9 | depends on DEBUG_KERNEL && !SMP | ||
10 | help | ||
11 | Enables some internal consistency checks for kernel debugging. | ||
12 | You should normally say N. | ||
13 | |||
14 | config INIT_DEBUG | 5 | config INIT_DEBUG |
15 | bool "Debug __init statements" | 6 | bool "Debug __init statements" |
16 | depends on DEBUG_KERNEL | 7 | depends on DEBUG_KERNEL |
@@ -18,6 +9,16 @@ config INIT_DEBUG | |||
18 | Fill __init and __initdata at the end of boot. This helps debugging | 9 | Fill __init and __initdata at the end of boot. This helps debugging |
19 | illegal uses of __init and __initdata after initialization. | 10 | illegal uses of __init and __initdata after initialization. |
20 | 11 | ||
12 | config DEBUG_RODATA | ||
13 | bool "Write protect kernel read-only data structures" | ||
14 | depends on DEBUG_KERNEL | ||
15 | help | ||
16 | Mark the kernel read-only data as write-protected in the pagetables, | ||
17 | in order to catch accidental (and incorrect) writes to such const data. | ||
18 | This option may have a slight performance impact because a portion | ||
19 | of the kernel code won't be covered by a 2MB TLB anymore. | ||
20 | If in doubt, say "N". | ||
21 | |||
21 | config IOMMU_DEBUG | 22 | config IOMMU_DEBUG |
22 | depends on GART_IOMMU && DEBUG_KERNEL | 23 | depends on GART_IOMMU && DEBUG_KERNEL |
23 | bool "Enable IOMMU debugging" | 24 | bool "Enable IOMMU debugging" |
diff --git a/arch/x86_64/Makefile b/arch/x86_64/Makefile index a9cd42e61828..d7fd46479c55 100644 --- a/arch/x86_64/Makefile +++ b/arch/x86_64/Makefile | |||
@@ -31,6 +31,7 @@ cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8) | |||
31 | cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) | 31 | cflags-$(CONFIG_MPSC) += $(call cc-option,-march=nocona) |
32 | CFLAGS += $(cflags-y) | 32 | CFLAGS += $(cflags-y) |
33 | 33 | ||
34 | CFLAGS += -m64 | ||
34 | CFLAGS += -mno-red-zone | 35 | CFLAGS += -mno-red-zone |
35 | CFLAGS += -mcmodel=kernel | 36 | CFLAGS += -mcmodel=kernel |
36 | CFLAGS += -pipe | 37 | CFLAGS += -pipe |
@@ -38,8 +39,10 @@ CFLAGS += -pipe | |||
38 | # actually it makes the kernel smaller too. | 39 | # actually it makes the kernel smaller too. |
39 | CFLAGS += -fno-reorder-blocks | 40 | CFLAGS += -fno-reorder-blocks |
40 | CFLAGS += -Wno-sign-compare | 41 | CFLAGS += -Wno-sign-compare |
41 | ifneq ($(CONFIG_DEBUG_INFO),y) | 42 | ifneq ($(CONFIG_UNWIND_INFO),y) |
42 | CFLAGS += -fno-asynchronous-unwind-tables | 43 | CFLAGS += -fno-asynchronous-unwind-tables |
44 | endif | ||
45 | ifneq ($(CONFIG_DEBUG_INFO),y) | ||
43 | # -fweb shrinks the kernel a bit, but the difference is very small | 46 | # -fweb shrinks the kernel a bit, but the difference is very small |
44 | # it also messes up debugging, so don't use it for now. | 47 | # it also messes up debugging, so don't use it for now. |
45 | #CFLAGS += $(call cc-option,-fweb) | 48 | #CFLAGS += $(call cc-option,-fweb) |
@@ -50,6 +53,8 @@ CFLAGS += $(call cc-option,-funit-at-a-time) | |||
50 | # prevent gcc from generating any FP code by mistake | 53 | # prevent gcc from generating any FP code by mistake |
51 | CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) | 54 | CFLAGS += $(call cc-option,-mno-sse -mno-mmx -mno-sse2 -mno-3dnow,) |
52 | 55 | ||
56 | AFLAGS += -m64 | ||
57 | |||
53 | head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o | 58 | head-y := arch/x86_64/kernel/head.o arch/x86_64/kernel/head64.o arch/x86_64/kernel/init_task.o |
54 | 59 | ||
55 | libs-y += arch/x86_64/lib/ | 60 | libs-y += arch/x86_64/lib/ |
@@ -80,9 +85,12 @@ bzlilo: vmlinux | |||
80 | bzdisk: vmlinux | 85 | bzdisk: vmlinux |
81 | $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk | 86 | $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) zdisk |
82 | 87 | ||
83 | install fdimage fdimage144 fdimage288: vmlinux | 88 | fdimage fdimage144 fdimage288: vmlinux |
84 | $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ | 89 | $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ |
85 | 90 | ||
91 | install: | ||
92 | $(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@ | ||
93 | |||
86 | archclean: | 94 | archclean: |
87 | $(Q)$(MAKE) $(clean)=$(boot) | 95 | $(Q)$(MAKE) $(clean)=$(boot) |
88 | 96 | ||
diff --git a/arch/x86_64/boot/.gitignore b/arch/x86_64/boot/.gitignore new file mode 100644 index 000000000000..495f20c085de --- /dev/null +++ b/arch/x86_64/boot/.gitignore | |||
@@ -0,0 +1,3 @@ | |||
1 | bootsect | ||
2 | bzImage | ||
3 | setup | ||
diff --git a/arch/x86_64/boot/Makefile b/arch/x86_64/boot/Makefile index 18c6e915d69b..29f8396ed151 100644 --- a/arch/x86_64/boot/Makefile +++ b/arch/x86_64/boot/Makefile | |||
@@ -98,5 +98,5 @@ zlilo: $(BOOTIMAGE) | |||
98 | cp System.map $(INSTALL_PATH)/ | 98 | cp System.map $(INSTALL_PATH)/ |
99 | if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi | 99 | if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi |
100 | 100 | ||
101 | install: $(BOOTIMAGE) | 101 | install: |
102 | sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" | 102 | sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)" |
diff --git a/arch/x86_64/boot/compressed/misc.c b/arch/x86_64/boot/compressed/misc.c index 0e10fd84c7cc..cf4b88c416dc 100644 --- a/arch/x86_64/boot/compressed/misc.c +++ b/arch/x86_64/boot/compressed/misc.c | |||
@@ -9,7 +9,7 @@ | |||
9 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 | 9 | * High loaded stuff by Hans Lermen & Werner Almesberger, Feb. 1996 |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include "miscsetup.h" | 12 | #include <linux/screen_info.h> |
13 | #include <asm/io.h> | 13 | #include <asm/io.h> |
14 | #include <asm/page.h> | 14 | #include <asm/page.h> |
15 | 15 | ||
diff --git a/arch/x86_64/boot/compressed/miscsetup.h b/arch/x86_64/boot/compressed/miscsetup.h deleted file mode 100644 index bb1620531703..000000000000 --- a/arch/x86_64/boot/compressed/miscsetup.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | #define NULL 0 | ||
2 | //typedef unsigned int size_t; | ||
3 | |||
4 | |||
5 | struct screen_info { | ||
6 | unsigned char orig_x; /* 0x00 */ | ||
7 | unsigned char orig_y; /* 0x01 */ | ||
8 | unsigned short dontuse1; /* 0x02 -- EXT_MEM_K sits here */ | ||
9 | unsigned short orig_video_page; /* 0x04 */ | ||
10 | unsigned char orig_video_mode; /* 0x06 */ | ||
11 | unsigned char orig_video_cols; /* 0x07 */ | ||
12 | unsigned short unused2; /* 0x08 */ | ||
13 | unsigned short orig_video_ega_bx; /* 0x0a */ | ||
14 | unsigned short unused3; /* 0x0c */ | ||
15 | unsigned char orig_video_lines; /* 0x0e */ | ||
16 | unsigned char orig_video_isVGA; /* 0x0f */ | ||
17 | unsigned short orig_video_points; /* 0x10 */ | ||
18 | |||
19 | /* VESA graphic mode -- linear frame buffer */ | ||
20 | unsigned short lfb_width; /* 0x12 */ | ||
21 | unsigned short lfb_height; /* 0x14 */ | ||
22 | unsigned short lfb_depth; /* 0x16 */ | ||
23 | unsigned long lfb_base; /* 0x18 */ | ||
24 | unsigned long lfb_size; /* 0x1c */ | ||
25 | unsigned short dontuse2, dontuse3; /* 0x20 -- CL_MAGIC and CL_OFFSET here */ | ||
26 | unsigned short lfb_linelength; /* 0x24 */ | ||
27 | unsigned char red_size; /* 0x26 */ | ||
28 | unsigned char red_pos; /* 0x27 */ | ||
29 | unsigned char green_size; /* 0x28 */ | ||
30 | unsigned char green_pos; /* 0x29 */ | ||
31 | unsigned char blue_size; /* 0x2a */ | ||
32 | unsigned char blue_pos; /* 0x2b */ | ||
33 | unsigned char rsvd_size; /* 0x2c */ | ||
34 | unsigned char rsvd_pos; /* 0x2d */ | ||
35 | unsigned short vesapm_seg; /* 0x2e */ | ||
36 | unsigned short vesapm_off; /* 0x30 */ | ||
37 | unsigned short pages; /* 0x32 */ | ||
38 | /* 0x34 -- 0x3f reserved for future expansion */ | ||
39 | }; | ||
diff --git a/arch/x86_64/boot/install.sh b/arch/x86_64/boot/install.sh index 198af15a7758..baaa2369bdb8 100644 --- a/arch/x86_64/boot/install.sh +++ b/arch/x86_64/boot/install.sh | |||
@@ -1,40 +1,2 @@ | |||
1 | #!/bin/sh | 1 | #!/bin/sh |
2 | # | 2 | . $srctree/arch/i386/boot/install.sh |
3 | # arch/x86_64/boot/install.sh | ||
4 | # | ||
5 | # This file is subject to the terms and conditions of the GNU General Public | ||
6 | # License. See the file "COPYING" in the main directory of this archive | ||
7 | # for more details. | ||
8 | # | ||
9 | # Copyright (C) 1995 by Linus Torvalds | ||
10 | # | ||
11 | # Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin | ||
12 | # | ||
13 | # "make install" script for i386 architecture | ||
14 | # | ||
15 | # Arguments: | ||
16 | # $1 - kernel version | ||
17 | # $2 - kernel image file | ||
18 | # $3 - kernel map file | ||
19 | # $4 - default install path (blank if root directory) | ||
20 | # | ||
21 | |||
22 | # User may have a custom install script | ||
23 | |||
24 | if [ -x ~/bin/${CROSS_COMPILE}installkernel ]; then exec ~/bin/${CROSS_COMPILE}installkernel "$@"; fi | ||
25 | if [ -x /sbin/${CROSS_COMPILE}installkernel ]; then exec /sbin/${CROSS_COMPILE}installkernel "$@"; fi | ||
26 | |||
27 | # Default install - same as make zlilo | ||
28 | |||
29 | if [ -f $4/vmlinuz ]; then | ||
30 | mv $4/vmlinuz $4/vmlinuz.old | ||
31 | fi | ||
32 | |||
33 | if [ -f $4/System.map ]; then | ||
34 | mv $4/System.map $4/System.old | ||
35 | fi | ||
36 | |||
37 | cat $2 > $4/vmlinuz | ||
38 | cp $3 $4/System.map | ||
39 | |||
40 | if [ -x /sbin/lilo ]; then /sbin/lilo; else /etc/lilo/install; fi | ||
diff --git a/arch/x86_64/boot/tools/.gitignore b/arch/x86_64/boot/tools/.gitignore new file mode 100644 index 000000000000..378eac25d311 --- /dev/null +++ b/arch/x86_64/boot/tools/.gitignore | |||
@@ -0,0 +1 @@ | |||
build | |||
diff --git a/arch/x86_64/crypto/aes.c b/arch/x86_64/crypto/aes.c index acfdaa28791e..fb1b961a2e2f 100644 --- a/arch/x86_64/crypto/aes.c +++ b/arch/x86_64/crypto/aes.c | |||
@@ -74,8 +74,6 @@ static inline u8 byte(const u32 x, const unsigned n) | |||
74 | return x >> (n << 3); | 74 | return x >> (n << 3); |
75 | } | 75 | } |
76 | 76 | ||
77 | #define u32_in(x) le32_to_cpu(*(const __le32 *)(x)) | ||
78 | |||
79 | struct aes_ctx | 77 | struct aes_ctx |
80 | { | 78 | { |
81 | u32 key_length; | 79 | u32 key_length; |
@@ -234,6 +232,7 @@ static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, | |||
234 | u32 *flags) | 232 | u32 *flags) |
235 | { | 233 | { |
236 | struct aes_ctx *ctx = ctx_arg; | 234 | struct aes_ctx *ctx = ctx_arg; |
235 | const __le32 *key = (const __le32 *)in_key; | ||
237 | u32 i, j, t, u, v, w; | 236 | u32 i, j, t, u, v, w; |
238 | 237 | ||
239 | if (key_len != 16 && key_len != 24 && key_len != 32) { | 238 | if (key_len != 16 && key_len != 24 && key_len != 32) { |
@@ -243,10 +242,10 @@ static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, | |||
243 | 242 | ||
244 | ctx->key_length = key_len; | 243 | ctx->key_length = key_len; |
245 | 244 | ||
246 | D_KEY[key_len + 24] = E_KEY[0] = u32_in(in_key); | 245 | D_KEY[key_len + 24] = E_KEY[0] = le32_to_cpu(key[0]); |
247 | D_KEY[key_len + 25] = E_KEY[1] = u32_in(in_key + 4); | 246 | D_KEY[key_len + 25] = E_KEY[1] = le32_to_cpu(key[1]); |
248 | D_KEY[key_len + 26] = E_KEY[2] = u32_in(in_key + 8); | 247 | D_KEY[key_len + 26] = E_KEY[2] = le32_to_cpu(key[2]); |
249 | D_KEY[key_len + 27] = E_KEY[3] = u32_in(in_key + 12); | 248 | D_KEY[key_len + 27] = E_KEY[3] = le32_to_cpu(key[3]); |
250 | 249 | ||
251 | switch (key_len) { | 250 | switch (key_len) { |
252 | case 16: | 251 | case 16: |
@@ -256,17 +255,17 @@ static int aes_set_key(void *ctx_arg, const u8 *in_key, unsigned int key_len, | |||
256 | break; | 255 | break; |
257 | 256 | ||
258 | case 24: | 257 | case 24: |
259 | E_KEY[4] = u32_in(in_key + 16); | 258 | E_KEY[4] = le32_to_cpu(key[4]); |
260 | t = E_KEY[5] = u32_in(in_key + 20); | 259 | t = E_KEY[5] = le32_to_cpu(key[5]); |
261 | for (i = 0; i < 8; ++i) | 260 | for (i = 0; i < 8; ++i) |
262 | loop6 (i); | 261 | loop6 (i); |
263 | break; | 262 | break; |
264 | 263 | ||
265 | case 32: | 264 | case 32: |
266 | E_KEY[4] = u32_in(in_key + 16); | 265 | E_KEY[4] = le32_to_cpu(key[4]); |
267 | E_KEY[5] = u32_in(in_key + 20); | 266 | E_KEY[5] = le32_to_cpu(key[5]); |
268 | E_KEY[6] = u32_in(in_key + 24); | 267 | E_KEY[6] = le32_to_cpu(key[6]); |
269 | t = E_KEY[7] = u32_in(in_key + 28); | 268 | t = E_KEY[7] = le32_to_cpu(key[7]); |
270 | for (i = 0; i < 7; ++i) | 269 | for (i = 0; i < 7; ++i) |
271 | loop8(i); | 270 | loop8(i); |
272 | break; | 271 | break; |
@@ -290,6 +289,8 @@ extern void aes_decrypt(void *ctx_arg, u8 *out, const u8 *in); | |||
290 | 289 | ||
291 | static struct crypto_alg aes_alg = { | 290 | static struct crypto_alg aes_alg = { |
292 | .cra_name = "aes", | 291 | .cra_name = "aes", |
292 | .cra_driver_name = "aes-x86_64", | ||
293 | .cra_priority = 200, | ||
293 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, | 294 | .cra_flags = CRYPTO_ALG_TYPE_CIPHER, |
294 | .cra_blocksize = AES_BLOCK_SIZE, | 295 | .cra_blocksize = AES_BLOCK_SIZE, |
295 | .cra_ctxsize = sizeof(struct aes_ctx), | 296 | .cra_ctxsize = sizeof(struct aes_ctx), |
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index f8db7e500fbf..09a3eb743315 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.13-git11 | 3 | # Linux kernel version: 2.6.15-git12 |
4 | # Mon Sep 12 16:16:16 2005 | 4 | # Mon Jan 16 13:09:08 2006 |
5 | # | 5 | # |
6 | CONFIG_X86_64=y | 6 | CONFIG_X86_64=y |
7 | CONFIG_64BIT=y | 7 | CONFIG_64BIT=y |
@@ -15,6 +15,7 @@ CONFIG_EARLY_PRINTK=y | |||
15 | CONFIG_GENERIC_ISA_DMA=y | 15 | CONFIG_GENERIC_ISA_DMA=y |
16 | CONFIG_GENERIC_IOMAP=y | 16 | CONFIG_GENERIC_IOMAP=y |
17 | CONFIG_ARCH_MAY_HAVE_PC_FDC=y | 17 | CONFIG_ARCH_MAY_HAVE_PC_FDC=y |
18 | CONFIG_DMI=y | ||
18 | 19 | ||
19 | # | 20 | # |
20 | # Code maturity level options | 21 | # Code maturity level options |
@@ -35,18 +36,21 @@ CONFIG_POSIX_MQUEUE=y | |||
35 | # CONFIG_BSD_PROCESS_ACCT is not set | 36 | # CONFIG_BSD_PROCESS_ACCT is not set |
36 | CONFIG_SYSCTL=y | 37 | CONFIG_SYSCTL=y |
37 | # CONFIG_AUDIT is not set | 38 | # CONFIG_AUDIT is not set |
38 | # CONFIG_HOTPLUG is not set | ||
39 | CONFIG_KOBJECT_UEVENT=y | ||
40 | CONFIG_IKCONFIG=y | 39 | CONFIG_IKCONFIG=y |
41 | CONFIG_IKCONFIG_PROC=y | 40 | CONFIG_IKCONFIG_PROC=y |
42 | # CONFIG_CPUSETS is not set | 41 | # CONFIG_CPUSETS is not set |
43 | CONFIG_INITRAMFS_SOURCE="" | 42 | CONFIG_INITRAMFS_SOURCE="" |
43 | CONFIG_UID16=y | ||
44 | CONFIG_VM86=y | ||
45 | CONFIG_CC_OPTIMIZE_FOR_SIZE=y | ||
44 | # CONFIG_EMBEDDED is not set | 46 | # CONFIG_EMBEDDED is not set |
45 | CONFIG_KALLSYMS=y | 47 | CONFIG_KALLSYMS=y |
46 | CONFIG_KALLSYMS_ALL=y | 48 | CONFIG_KALLSYMS_ALL=y |
47 | # CONFIG_KALLSYMS_EXTRA_PASS is not set | 49 | # CONFIG_KALLSYMS_EXTRA_PASS is not set |
50 | CONFIG_HOTPLUG=y | ||
48 | CONFIG_PRINTK=y | 51 | CONFIG_PRINTK=y |
49 | CONFIG_BUG=y | 52 | CONFIG_BUG=y |
53 | CONFIG_ELF_CORE=y | ||
50 | CONFIG_BASE_FULL=y | 54 | CONFIG_BASE_FULL=y |
51 | CONFIG_FUTEX=y | 55 | CONFIG_FUTEX=y |
52 | CONFIG_EPOLL=y | 56 | CONFIG_EPOLL=y |
@@ -55,8 +59,10 @@ CONFIG_CC_ALIGN_FUNCTIONS=0 | |||
55 | CONFIG_CC_ALIGN_LABELS=0 | 59 | CONFIG_CC_ALIGN_LABELS=0 |
56 | CONFIG_CC_ALIGN_LOOPS=0 | 60 | CONFIG_CC_ALIGN_LOOPS=0 |
57 | CONFIG_CC_ALIGN_JUMPS=0 | 61 | CONFIG_CC_ALIGN_JUMPS=0 |
62 | CONFIG_SLAB=y | ||
58 | # CONFIG_TINY_SHMEM is not set | 63 | # CONFIG_TINY_SHMEM is not set |
59 | CONFIG_BASE_SMALL=0 | 64 | CONFIG_BASE_SMALL=0 |
65 | # CONFIG_SLOB is not set | ||
60 | 66 | ||
61 | # | 67 | # |
62 | # Loadable module support | 68 | # Loadable module support |
@@ -71,8 +77,28 @@ CONFIG_OBSOLETE_MODPARM=y | |||
71 | CONFIG_STOP_MACHINE=y | 77 | CONFIG_STOP_MACHINE=y |
72 | 78 | ||
73 | # | 79 | # |
80 | # Block layer | ||
81 | # | ||
82 | CONFIG_LBD=y | ||
83 | |||
84 | # | ||
85 | # IO Schedulers | ||
86 | # | ||
87 | CONFIG_IOSCHED_NOOP=y | ||
88 | # CONFIG_IOSCHED_AS is not set | ||
89 | CONFIG_IOSCHED_DEADLINE=y | ||
90 | CONFIG_IOSCHED_CFQ=y | ||
91 | # CONFIG_DEFAULT_AS is not set | ||
92 | # CONFIG_DEFAULT_DEADLINE is not set | ||
93 | CONFIG_DEFAULT_CFQ=y | ||
94 | # CONFIG_DEFAULT_NOOP is not set | ||
95 | CONFIG_DEFAULT_IOSCHED="cfq" | ||
96 | |||
97 | # | ||
74 | # Processor type and features | 98 | # Processor type and features |
75 | # | 99 | # |
100 | CONFIG_X86_PC=y | ||
101 | # CONFIG_X86_VSMP is not set | ||
76 | # CONFIG_MK8 is not set | 102 | # CONFIG_MK8 is not set |
77 | # CONFIG_MPSC is not set | 103 | # CONFIG_MPSC is not set |
78 | CONFIG_GENERIC_CPU=y | 104 | CONFIG_GENERIC_CPU=y |
@@ -89,14 +115,15 @@ CONFIG_X86_LOCAL_APIC=y | |||
89 | CONFIG_MTRR=y | 115 | CONFIG_MTRR=y |
90 | CONFIG_SMP=y | 116 | CONFIG_SMP=y |
91 | CONFIG_SCHED_SMT=y | 117 | CONFIG_SCHED_SMT=y |
92 | CONFIG_PREEMPT_NONE=y | 118 | # CONFIG_PREEMPT_NONE is not set |
93 | # CONFIG_PREEMPT_VOLUNTARY is not set | 119 | CONFIG_PREEMPT_VOLUNTARY=y |
94 | # CONFIG_PREEMPT is not set | 120 | # CONFIG_PREEMPT is not set |
95 | CONFIG_PREEMPT_BKL=y | 121 | CONFIG_PREEMPT_BKL=y |
122 | CONFIG_NUMA=y | ||
96 | CONFIG_K8_NUMA=y | 123 | CONFIG_K8_NUMA=y |
97 | # CONFIG_NUMA_EMU is not set | 124 | CONFIG_X86_64_ACPI_NUMA=y |
125 | CONFIG_NUMA_EMU=y | ||
98 | CONFIG_ARCH_DISCONTIGMEM_ENABLE=y | 126 | CONFIG_ARCH_DISCONTIGMEM_ENABLE=y |
99 | CONFIG_NUMA=y | ||
100 | CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y | 127 | CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y |
101 | CONFIG_ARCH_SPARSEMEM_ENABLE=y | 128 | CONFIG_ARCH_SPARSEMEM_ENABLE=y |
102 | CONFIG_SELECT_MEMORY_MODEL=y | 129 | CONFIG_SELECT_MEMORY_MODEL=y |
@@ -107,9 +134,11 @@ CONFIG_DISCONTIGMEM=y | |||
107 | CONFIG_FLAT_NODE_MEM_MAP=y | 134 | CONFIG_FLAT_NODE_MEM_MAP=y |
108 | CONFIG_NEED_MULTIPLE_NODES=y | 135 | CONFIG_NEED_MULTIPLE_NODES=y |
109 | # CONFIG_SPARSEMEM_STATIC is not set | 136 | # CONFIG_SPARSEMEM_STATIC is not set |
137 | CONFIG_SPLIT_PTLOCK_CPUS=4 | ||
138 | CONFIG_MIGRATION=y | ||
110 | CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y | 139 | CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y |
111 | CONFIG_HAVE_DEC_LOCK=y | ||
112 | CONFIG_NR_CPUS=32 | 140 | CONFIG_NR_CPUS=32 |
141 | CONFIG_HOTPLUG_CPU=y | ||
113 | CONFIG_HPET_TIMER=y | 142 | CONFIG_HPET_TIMER=y |
114 | CONFIG_X86_PM_TIMER=y | 143 | CONFIG_X86_PM_TIMER=y |
115 | CONFIG_HPET_EMULATE_RTC=y | 144 | CONFIG_HPET_EMULATE_RTC=y |
@@ -117,8 +146,10 @@ CONFIG_GART_IOMMU=y | |||
117 | CONFIG_SWIOTLB=y | 146 | CONFIG_SWIOTLB=y |
118 | CONFIG_X86_MCE=y | 147 | CONFIG_X86_MCE=y |
119 | CONFIG_X86_MCE_INTEL=y | 148 | CONFIG_X86_MCE_INTEL=y |
120 | CONFIG_PHYSICAL_START=0x100000 | 149 | CONFIG_X86_MCE_AMD=y |
121 | # CONFIG_KEXEC is not set | 150 | # CONFIG_KEXEC is not set |
151 | # CONFIG_CRASH_DUMP is not set | ||
152 | CONFIG_PHYSICAL_START=0x100000 | ||
122 | CONFIG_SECCOMP=y | 153 | CONFIG_SECCOMP=y |
123 | # CONFIG_HZ_100 is not set | 154 | # CONFIG_HZ_100 is not set |
124 | CONFIG_HZ_250=y | 155 | CONFIG_HZ_250=y |
@@ -133,21 +164,27 @@ CONFIG_GENERIC_PENDING_IRQ=y | |||
133 | # Power management options | 164 | # Power management options |
134 | # | 165 | # |
135 | CONFIG_PM=y | 166 | CONFIG_PM=y |
167 | # CONFIG_PM_LEGACY is not set | ||
136 | # CONFIG_PM_DEBUG is not set | 168 | # CONFIG_PM_DEBUG is not set |
137 | CONFIG_SOFTWARE_SUSPEND=y | 169 | CONFIG_SOFTWARE_SUSPEND=y |
138 | CONFIG_PM_STD_PARTITION="" | 170 | CONFIG_PM_STD_PARTITION="" |
171 | CONFIG_SUSPEND_SMP=y | ||
139 | 172 | ||
140 | # | 173 | # |
141 | # ACPI (Advanced Configuration and Power Interface) Support | 174 | # ACPI (Advanced Configuration and Power Interface) Support |
142 | # | 175 | # |
143 | CONFIG_ACPI=y | 176 | CONFIG_ACPI=y |
177 | CONFIG_ACPI_SLEEP=y | ||
178 | CONFIG_ACPI_SLEEP_PROC_FS=y | ||
179 | CONFIG_ACPI_SLEEP_PROC_SLEEP=y | ||
144 | CONFIG_ACPI_AC=y | 180 | CONFIG_ACPI_AC=y |
145 | CONFIG_ACPI_BATTERY=y | 181 | CONFIG_ACPI_BATTERY=y |
146 | CONFIG_ACPI_BUTTON=y | 182 | CONFIG_ACPI_BUTTON=y |
147 | # CONFIG_ACPI_VIDEO is not set | 183 | # CONFIG_ACPI_VIDEO is not set |
148 | CONFIG_ACPI_HOTKEY=m | 184 | # CONFIG_ACPI_HOTKEY is not set |
149 | CONFIG_ACPI_FAN=y | 185 | CONFIG_ACPI_FAN=y |
150 | CONFIG_ACPI_PROCESSOR=y | 186 | CONFIG_ACPI_PROCESSOR=y |
187 | CONFIG_ACPI_HOTPLUG_CPU=y | ||
151 | CONFIG_ACPI_THERMAL=y | 188 | CONFIG_ACPI_THERMAL=y |
152 | CONFIG_ACPI_NUMA=y | 189 | CONFIG_ACPI_NUMA=y |
153 | # CONFIG_ACPI_ASUS is not set | 190 | # CONFIG_ACPI_ASUS is not set |
@@ -158,7 +195,7 @@ CONFIG_ACPI_BLACKLIST_YEAR=2001 | |||
158 | CONFIG_ACPI_EC=y | 195 | CONFIG_ACPI_EC=y |
159 | CONFIG_ACPI_POWER=y | 196 | CONFIG_ACPI_POWER=y |
160 | CONFIG_ACPI_SYSTEM=y | 197 | CONFIG_ACPI_SYSTEM=y |
161 | # CONFIG_ACPI_CONTAINER is not set | 198 | CONFIG_ACPI_CONTAINER=y |
162 | 199 | ||
163 | # | 200 | # |
164 | # CPU Frequency scaling | 201 | # CPU Frequency scaling |
@@ -197,7 +234,7 @@ CONFIG_PCI=y | |||
197 | CONFIG_PCI_DIRECT=y | 234 | CONFIG_PCI_DIRECT=y |
198 | CONFIG_PCI_MMCONFIG=y | 235 | CONFIG_PCI_MMCONFIG=y |
199 | CONFIG_UNORDERED_IO=y | 236 | CONFIG_UNORDERED_IO=y |
200 | # CONFIG_PCIEPORTBUS is not set | 237 | CONFIG_PCIEPORTBUS=y |
201 | CONFIG_PCI_MSI=y | 238 | CONFIG_PCI_MSI=y |
202 | # CONFIG_PCI_LEGACY_PROC is not set | 239 | # CONFIG_PCI_LEGACY_PROC is not set |
203 | # CONFIG_PCI_DEBUG is not set | 240 | # CONFIG_PCI_DEBUG is not set |
@@ -221,7 +258,6 @@ CONFIG_IA32_EMULATION=y | |||
221 | CONFIG_IA32_AOUT=y | 258 | CONFIG_IA32_AOUT=y |
222 | CONFIG_COMPAT=y | 259 | CONFIG_COMPAT=y |
223 | CONFIG_SYSVIPC_COMPAT=y | 260 | CONFIG_SYSVIPC_COMPAT=y |
224 | CONFIG_UID16=y | ||
225 | 261 | ||
226 | # | 262 | # |
227 | # Networking | 263 | # Networking |
@@ -283,17 +319,24 @@ CONFIG_IPV6=y | |||
283 | # CONFIG_ATALK is not set | 319 | # CONFIG_ATALK is not set |
284 | # CONFIG_X25 is not set | 320 | # CONFIG_X25 is not set |
285 | # CONFIG_LAPB is not set | 321 | # CONFIG_LAPB is not set |
322 | |||
323 | # | ||
324 | # TIPC Configuration (EXPERIMENTAL) | ||
325 | # | ||
326 | # CONFIG_TIPC is not set | ||
286 | # CONFIG_NET_DIVERT is not set | 327 | # CONFIG_NET_DIVERT is not set |
287 | # CONFIG_ECONET is not set | 328 | # CONFIG_ECONET is not set |
288 | # CONFIG_WAN_ROUTER is not set | 329 | # CONFIG_WAN_ROUTER is not set |
330 | |||
331 | # | ||
332 | # QoS and/or fair queueing | ||
333 | # | ||
289 | # CONFIG_NET_SCHED is not set | 334 | # CONFIG_NET_SCHED is not set |
290 | # CONFIG_NET_CLS_ROUTE is not set | ||
291 | 335 | ||
292 | # | 336 | # |
293 | # Network testing | 337 | # Network testing |
294 | # | 338 | # |
295 | # CONFIG_NET_PKTGEN is not set | 339 | # CONFIG_NET_PKTGEN is not set |
296 | # CONFIG_NETFILTER_NETLINK is not set | ||
297 | # CONFIG_HAMRADIO is not set | 340 | # CONFIG_HAMRADIO is not set |
298 | # CONFIG_IRDA is not set | 341 | # CONFIG_IRDA is not set |
299 | # CONFIG_BT is not set | 342 | # CONFIG_BT is not set |
@@ -308,10 +351,15 @@ CONFIG_IPV6=y | |||
308 | # | 351 | # |
309 | CONFIG_STANDALONE=y | 352 | CONFIG_STANDALONE=y |
310 | CONFIG_PREVENT_FIRMWARE_BUILD=y | 353 | CONFIG_PREVENT_FIRMWARE_BUILD=y |
311 | # CONFIG_FW_LOADER is not set | 354 | CONFIG_FW_LOADER=y |
312 | # CONFIG_DEBUG_DRIVER is not set | 355 | # CONFIG_DEBUG_DRIVER is not set |
313 | 356 | ||
314 | # | 357 | # |
358 | # Connector - unified userspace <-> kernelspace linker | ||
359 | # | ||
360 | # CONFIG_CONNECTOR is not set | ||
361 | |||
362 | # | ||
315 | # Memory Technology Devices (MTD) | 363 | # Memory Technology Devices (MTD) |
316 | # | 364 | # |
317 | # CONFIG_MTD is not set | 365 | # CONFIG_MTD is not set |
@@ -344,16 +392,7 @@ CONFIG_BLK_DEV_RAM=y | |||
344 | CONFIG_BLK_DEV_RAM_COUNT=16 | 392 | CONFIG_BLK_DEV_RAM_COUNT=16 |
345 | CONFIG_BLK_DEV_RAM_SIZE=4096 | 393 | CONFIG_BLK_DEV_RAM_SIZE=4096 |
346 | CONFIG_BLK_DEV_INITRD=y | 394 | CONFIG_BLK_DEV_INITRD=y |
347 | CONFIG_LBD=y | ||
348 | # CONFIG_CDROM_PKTCDVD is not set | 395 | # CONFIG_CDROM_PKTCDVD is not set |
349 | |||
350 | # | ||
351 | # IO Schedulers | ||
352 | # | ||
353 | CONFIG_IOSCHED_NOOP=y | ||
354 | # CONFIG_IOSCHED_AS is not set | ||
355 | CONFIG_IOSCHED_DEADLINE=y | ||
356 | CONFIG_IOSCHED_CFQ=y | ||
357 | # CONFIG_ATA_OVER_ETH is not set | 396 | # CONFIG_ATA_OVER_ETH is not set |
358 | 397 | ||
359 | # | 398 | # |
@@ -393,7 +432,7 @@ CONFIG_IDEDMA_PCI_AUTO=y | |||
393 | # CONFIG_BLK_DEV_AEC62XX is not set | 432 | # CONFIG_BLK_DEV_AEC62XX is not set |
394 | # CONFIG_BLK_DEV_ALI15X3 is not set | 433 | # CONFIG_BLK_DEV_ALI15X3 is not set |
395 | CONFIG_BLK_DEV_AMD74XX=y | 434 | CONFIG_BLK_DEV_AMD74XX=y |
396 | # CONFIG_BLK_DEV_ATIIXP is not set | 435 | CONFIG_BLK_DEV_ATIIXP=y |
397 | # CONFIG_BLK_DEV_CMD64X is not set | 436 | # CONFIG_BLK_DEV_CMD64X is not set |
398 | # CONFIG_BLK_DEV_TRIFLEX is not set | 437 | # CONFIG_BLK_DEV_TRIFLEX is not set |
399 | # CONFIG_BLK_DEV_CY82C693 is not set | 438 | # CONFIG_BLK_DEV_CY82C693 is not set |
@@ -441,19 +480,21 @@ CONFIG_BLK_DEV_SD=y | |||
441 | # Some SCSI devices (e.g. CD jukebox) support multiple LUNs | 480 | # Some SCSI devices (e.g. CD jukebox) support multiple LUNs |
442 | # | 481 | # |
443 | # CONFIG_SCSI_MULTI_LUN is not set | 482 | # CONFIG_SCSI_MULTI_LUN is not set |
444 | # CONFIG_SCSI_CONSTANTS is not set | 483 | CONFIG_SCSI_CONSTANTS=y |
445 | # CONFIG_SCSI_LOGGING is not set | 484 | # CONFIG_SCSI_LOGGING is not set |
446 | 485 | ||
447 | # | 486 | # |
448 | # SCSI Transport Attributes | 487 | # SCSI Transport Attributes |
449 | # | 488 | # |
450 | CONFIG_SCSI_SPI_ATTRS=y | 489 | CONFIG_SCSI_SPI_ATTRS=y |
451 | # CONFIG_SCSI_FC_ATTRS is not set | 490 | CONFIG_SCSI_FC_ATTRS=y |
452 | # CONFIG_SCSI_ISCSI_ATTRS is not set | 491 | # CONFIG_SCSI_ISCSI_ATTRS is not set |
492 | # CONFIG_SCSI_SAS_ATTRS is not set | ||
453 | 493 | ||
454 | # | 494 | # |
455 | # SCSI low-level drivers | 495 | # SCSI low-level drivers |
456 | # | 496 | # |
497 | # CONFIG_ISCSI_TCP is not set | ||
457 | # CONFIG_BLK_DEV_3W_XXXX_RAID is not set | 498 | # CONFIG_BLK_DEV_3W_XXXX_RAID is not set |
458 | # CONFIG_SCSI_3W_9XXX is not set | 499 | # CONFIG_SCSI_3W_9XXX is not set |
459 | # CONFIG_SCSI_ACARD is not set | 500 | # CONFIG_SCSI_ACARD is not set |
@@ -467,22 +508,28 @@ CONFIG_AIC79XX_RESET_DELAY_MS=4000 | |||
467 | # CONFIG_AIC79XX_DEBUG_ENABLE is not set | 508 | # CONFIG_AIC79XX_DEBUG_ENABLE is not set |
468 | CONFIG_AIC79XX_DEBUG_MASK=0 | 509 | CONFIG_AIC79XX_DEBUG_MASK=0 |
469 | # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set | 510 | # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set |
470 | # CONFIG_MEGARAID_NEWGEN is not set | 511 | CONFIG_MEGARAID_NEWGEN=y |
512 | CONFIG_MEGARAID_MM=y | ||
513 | CONFIG_MEGARAID_MAILBOX=y | ||
471 | # CONFIG_MEGARAID_LEGACY is not set | 514 | # CONFIG_MEGARAID_LEGACY is not set |
515 | CONFIG_MEGARAID_SAS=y | ||
472 | CONFIG_SCSI_SATA=y | 516 | CONFIG_SCSI_SATA=y |
473 | # CONFIG_SCSI_SATA_AHCI is not set | 517 | CONFIG_SCSI_SATA_AHCI=y |
474 | # CONFIG_SCSI_SATA_SVW is not set | 518 | # CONFIG_SCSI_SATA_SVW is not set |
475 | CONFIG_SCSI_ATA_PIIX=y | 519 | CONFIG_SCSI_ATA_PIIX=y |
476 | # CONFIG_SCSI_SATA_MV is not set | 520 | # CONFIG_SCSI_SATA_MV is not set |
477 | # CONFIG_SCSI_SATA_NV is not set | 521 | CONFIG_SCSI_SATA_NV=y |
478 | # CONFIG_SCSI_SATA_PROMISE is not set | 522 | # CONFIG_SCSI_PDC_ADMA is not set |
479 | # CONFIG_SCSI_SATA_QSTOR is not set | 523 | # CONFIG_SCSI_SATA_QSTOR is not set |
524 | # CONFIG_SCSI_SATA_PROMISE is not set | ||
480 | # CONFIG_SCSI_SATA_SX4 is not set | 525 | # CONFIG_SCSI_SATA_SX4 is not set |
481 | # CONFIG_SCSI_SATA_SIL is not set | 526 | CONFIG_SCSI_SATA_SIL=y |
527 | # CONFIG_SCSI_SATA_SIL24 is not set | ||
482 | # CONFIG_SCSI_SATA_SIS is not set | 528 | # CONFIG_SCSI_SATA_SIS is not set |
483 | # CONFIG_SCSI_SATA_ULI is not set | 529 | # CONFIG_SCSI_SATA_ULI is not set |
484 | CONFIG_SCSI_SATA_VIA=y | 530 | CONFIG_SCSI_SATA_VIA=y |
485 | # CONFIG_SCSI_SATA_VITESSE is not set | 531 | # CONFIG_SCSI_SATA_VITESSE is not set |
532 | CONFIG_SCSI_SATA_INTEL_COMBINED=y | ||
486 | # CONFIG_SCSI_BUSLOGIC is not set | 533 | # CONFIG_SCSI_BUSLOGIC is not set |
487 | # CONFIG_SCSI_DMX3191D is not set | 534 | # CONFIG_SCSI_DMX3191D is not set |
488 | # CONFIG_SCSI_EATA is not set | 535 | # CONFIG_SCSI_EATA is not set |
@@ -495,13 +542,7 @@ CONFIG_SCSI_SATA_VIA=y | |||
495 | # CONFIG_SCSI_IPR is not set | 542 | # CONFIG_SCSI_IPR is not set |
496 | # CONFIG_SCSI_QLOGIC_FC is not set | 543 | # CONFIG_SCSI_QLOGIC_FC is not set |
497 | # CONFIG_SCSI_QLOGIC_1280 is not set | 544 | # CONFIG_SCSI_QLOGIC_1280 is not set |
498 | CONFIG_SCSI_QLA2XXX=y | 545 | # CONFIG_SCSI_QLA_FC is not set |
499 | # CONFIG_SCSI_QLA21XX is not set | ||
500 | # CONFIG_SCSI_QLA22XX is not set | ||
501 | # CONFIG_SCSI_QLA2300 is not set | ||
502 | # CONFIG_SCSI_QLA2322 is not set | ||
503 | # CONFIG_SCSI_QLA6312 is not set | ||
504 | # CONFIG_SCSI_QLA24XX is not set | ||
505 | # CONFIG_SCSI_LPFC is not set | 546 | # CONFIG_SCSI_LPFC is not set |
506 | # CONFIG_SCSI_DC395x is not set | 547 | # CONFIG_SCSI_DC395x is not set |
507 | # CONFIG_SCSI_DC390T is not set | 548 | # CONFIG_SCSI_DC390T is not set |
@@ -525,6 +566,7 @@ CONFIG_BLK_DEV_DM=y | |||
525 | CONFIG_FUSION=y | 566 | CONFIG_FUSION=y |
526 | CONFIG_FUSION_SPI=y | 567 | CONFIG_FUSION_SPI=y |
527 | # CONFIG_FUSION_FC is not set | 568 | # CONFIG_FUSION_FC is not set |
569 | # CONFIG_FUSION_SAS is not set | ||
528 | CONFIG_FUSION_MAX_SGE=128 | 570 | CONFIG_FUSION_MAX_SGE=128 |
529 | # CONFIG_FUSION_CTL is not set | 571 | # CONFIG_FUSION_CTL is not set |
530 | 572 | ||
@@ -564,6 +606,7 @@ CONFIG_NET_ETHERNET=y | |||
564 | CONFIG_MII=y | 606 | CONFIG_MII=y |
565 | # CONFIG_HAPPYMEAL is not set | 607 | # CONFIG_HAPPYMEAL is not set |
566 | # CONFIG_SUNGEM is not set | 608 | # CONFIG_SUNGEM is not set |
609 | # CONFIG_CASSINI is not set | ||
567 | CONFIG_NET_VENDOR_3COM=y | 610 | CONFIG_NET_VENDOR_3COM=y |
568 | CONFIG_VORTEX=y | 611 | CONFIG_VORTEX=y |
569 | # CONFIG_TYPHOON is not set | 612 | # CONFIG_TYPHOON is not set |
@@ -603,12 +646,14 @@ CONFIG_8139TOO=y | |||
603 | # CONFIG_DL2K is not set | 646 | # CONFIG_DL2K is not set |
604 | CONFIG_E1000=y | 647 | CONFIG_E1000=y |
605 | # CONFIG_E1000_NAPI is not set | 648 | # CONFIG_E1000_NAPI is not set |
649 | # CONFIG_E1000_DISABLE_PACKET_SPLIT is not set | ||
606 | # CONFIG_NS83820 is not set | 650 | # CONFIG_NS83820 is not set |
607 | # CONFIG_HAMACHI is not set | 651 | # CONFIG_HAMACHI is not set |
608 | # CONFIG_YELLOWFIN is not set | 652 | # CONFIG_YELLOWFIN is not set |
609 | # CONFIG_R8169 is not set | 653 | # CONFIG_R8169 is not set |
610 | # CONFIG_SIS190 is not set | 654 | # CONFIG_SIS190 is not set |
611 | # CONFIG_SKGE is not set | 655 | # CONFIG_SKGE is not set |
656 | # CONFIG_SKY2 is not set | ||
612 | # CONFIG_SK98LIN is not set | 657 | # CONFIG_SK98LIN is not set |
613 | # CONFIG_VIA_VELOCITY is not set | 658 | # CONFIG_VIA_VELOCITY is not set |
614 | CONFIG_TIGON3=y | 659 | CONFIG_TIGON3=y |
@@ -621,7 +666,6 @@ CONFIG_TIGON3=y | |||
621 | # CONFIG_IXGB is not set | 666 | # CONFIG_IXGB is not set |
622 | CONFIG_S2IO=m | 667 | CONFIG_S2IO=m |
623 | # CONFIG_S2IO_NAPI is not set | 668 | # CONFIG_S2IO_NAPI is not set |
624 | # CONFIG_2BUFF_MODE is not set | ||
625 | 669 | ||
626 | # | 670 | # |
627 | # Token Ring devices | 671 | # Token Ring devices |
@@ -720,6 +764,7 @@ CONFIG_SERIAL_8250=y | |||
720 | CONFIG_SERIAL_8250_CONSOLE=y | 764 | CONFIG_SERIAL_8250_CONSOLE=y |
721 | # CONFIG_SERIAL_8250_ACPI is not set | 765 | # CONFIG_SERIAL_8250_ACPI is not set |
722 | CONFIG_SERIAL_8250_NR_UARTS=4 | 766 | CONFIG_SERIAL_8250_NR_UARTS=4 |
767 | CONFIG_SERIAL_8250_RUNTIME_UARTS=4 | ||
723 | # CONFIG_SERIAL_8250_EXTENDED is not set | 768 | # CONFIG_SERIAL_8250_EXTENDED is not set |
724 | 769 | ||
725 | # | 770 | # |
@@ -727,7 +772,6 @@ CONFIG_SERIAL_8250_NR_UARTS=4 | |||
727 | # | 772 | # |
728 | CONFIG_SERIAL_CORE=y | 773 | CONFIG_SERIAL_CORE=y |
729 | CONFIG_SERIAL_CORE_CONSOLE=y | 774 | CONFIG_SERIAL_CORE_CONSOLE=y |
730 | # CONFIG_SERIAL_JSM is not set | ||
731 | CONFIG_UNIX98_PTYS=y | 775 | CONFIG_UNIX98_PTYS=y |
732 | CONFIG_LEGACY_PTYS=y | 776 | CONFIG_LEGACY_PTYS=y |
733 | CONFIG_LEGACY_PTY_COUNT=256 | 777 | CONFIG_LEGACY_PTY_COUNT=256 |
@@ -740,7 +784,44 @@ CONFIG_LEGACY_PTY_COUNT=256 | |||
740 | # | 784 | # |
741 | # Watchdog Cards | 785 | # Watchdog Cards |
742 | # | 786 | # |
743 | # CONFIG_WATCHDOG is not set | 787 | CONFIG_WATCHDOG=y |
788 | # CONFIG_WATCHDOG_NOWAYOUT is not set | ||
789 | |||
790 | # | ||
791 | # Watchdog Device Drivers | ||
792 | # | ||
793 | CONFIG_SOFT_WATCHDOG=y | ||
794 | # CONFIG_ACQUIRE_WDT is not set | ||
795 | # CONFIG_ADVANTECH_WDT is not set | ||
796 | # CONFIG_ALIM1535_WDT is not set | ||
797 | # CONFIG_ALIM7101_WDT is not set | ||
798 | # CONFIG_SC520_WDT is not set | ||
799 | # CONFIG_EUROTECH_WDT is not set | ||
800 | # CONFIG_IB700_WDT is not set | ||
801 | # CONFIG_IBMASR is not set | ||
802 | # CONFIG_WAFER_WDT is not set | ||
803 | # CONFIG_I6300ESB_WDT is not set | ||
804 | # CONFIG_I8XX_TCO is not set | ||
805 | # CONFIG_SC1200_WDT is not set | ||
806 | # CONFIG_60XX_WDT is not set | ||
807 | # CONFIG_SBC8360_WDT is not set | ||
808 | # CONFIG_CPU5_WDT is not set | ||
809 | # CONFIG_W83627HF_WDT is not set | ||
810 | # CONFIG_W83877F_WDT is not set | ||
811 | # CONFIG_W83977F_WDT is not set | ||
812 | # CONFIG_MACHZ_WDT is not set | ||
813 | # CONFIG_SBC_EPX_C3_WATCHDOG is not set | ||
814 | |||
815 | # | ||
816 | # PCI-based Watchdog Cards | ||
817 | # | ||
818 | # CONFIG_PCIPCWATCHDOG is not set | ||
819 | # CONFIG_WDTPCI is not set | ||
820 | |||
821 | # | ||
822 | # USB-based Watchdog Cards | ||
823 | # | ||
824 | # CONFIG_USBPCWATCHDOG is not set | ||
744 | CONFIG_HW_RANDOM=y | 825 | CONFIG_HW_RANDOM=y |
745 | # CONFIG_NVRAM is not set | 826 | # CONFIG_NVRAM is not set |
746 | CONFIG_RTC=y | 827 | CONFIG_RTC=y |
@@ -757,16 +838,17 @@ CONFIG_AGP_INTEL=y | |||
757 | # CONFIG_DRM is not set | 838 | # CONFIG_DRM is not set |
758 | # CONFIG_MWAVE is not set | 839 | # CONFIG_MWAVE is not set |
759 | CONFIG_RAW_DRIVER=y | 840 | CONFIG_RAW_DRIVER=y |
841 | CONFIG_MAX_RAW_DEVS=256 | ||
760 | CONFIG_HPET=y | 842 | CONFIG_HPET=y |
761 | # CONFIG_HPET_RTC_IRQ is not set | 843 | # CONFIG_HPET_RTC_IRQ is not set |
762 | CONFIG_HPET_MMAP=y | 844 | CONFIG_HPET_MMAP=y |
763 | CONFIG_MAX_RAW_DEVS=256 | ||
764 | # CONFIG_HANGCHECK_TIMER is not set | 845 | # CONFIG_HANGCHECK_TIMER is not set |
765 | 846 | ||
766 | # | 847 | # |
767 | # TPM devices | 848 | # TPM devices |
768 | # | 849 | # |
769 | # CONFIG_TCG_TPM is not set | 850 | # CONFIG_TCG_TPM is not set |
851 | # CONFIG_TELCLOCK is not set | ||
770 | 852 | ||
771 | # | 853 | # |
772 | # I2C support | 854 | # I2C support |
@@ -774,6 +856,12 @@ CONFIG_MAX_RAW_DEVS=256 | |||
774 | # CONFIG_I2C is not set | 856 | # CONFIG_I2C is not set |
775 | 857 | ||
776 | # | 858 | # |
859 | # SPI support | ||
860 | # | ||
861 | # CONFIG_SPI is not set | ||
862 | # CONFIG_SPI_MASTER is not set | ||
863 | |||
864 | # | ||
777 | # Dallas's 1-wire bus | 865 | # Dallas's 1-wire bus |
778 | # | 866 | # |
779 | # CONFIG_W1 is not set | 867 | # CONFIG_W1 is not set |
@@ -783,6 +871,7 @@ CONFIG_MAX_RAW_DEVS=256 | |||
783 | # | 871 | # |
784 | CONFIG_HWMON=y | 872 | CONFIG_HWMON=y |
785 | # CONFIG_HWMON_VID is not set | 873 | # CONFIG_HWMON_VID is not set |
874 | # CONFIG_SENSORS_HDAPS is not set | ||
786 | # CONFIG_HWMON_DEBUG_CHIP is not set | 875 | # CONFIG_HWMON_DEBUG_CHIP is not set |
787 | 876 | ||
788 | # | 877 | # |
@@ -830,6 +919,7 @@ CONFIG_SOUND=y | |||
830 | # Open Sound System | 919 | # Open Sound System |
831 | # | 920 | # |
832 | CONFIG_SOUND_PRIME=y | 921 | CONFIG_SOUND_PRIME=y |
922 | CONFIG_OBSOLETE_OSS_DRIVER=y | ||
833 | # CONFIG_SOUND_BT878 is not set | 923 | # CONFIG_SOUND_BT878 is not set |
834 | # CONFIG_SOUND_CMPCI is not set | 924 | # CONFIG_SOUND_CMPCI is not set |
835 | # CONFIG_SOUND_EMU10K1 is not set | 925 | # CONFIG_SOUND_EMU10K1 is not set |
@@ -886,12 +976,15 @@ CONFIG_USB_UHCI_HCD=y | |||
886 | # USB Device Class drivers | 976 | # USB Device Class drivers |
887 | # | 977 | # |
888 | # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set | 978 | # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set |
889 | # CONFIG_USB_BLUETOOTH_TTY is not set | ||
890 | # CONFIG_USB_ACM is not set | 979 | # CONFIG_USB_ACM is not set |
891 | CONFIG_USB_PRINTER=y | 980 | CONFIG_USB_PRINTER=y |
892 | 981 | ||
893 | # | 982 | # |
894 | # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information | 983 | # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' |
984 | # | ||
985 | |||
986 | # | ||
987 | # may also be needed; see USB_STORAGE Help for more information | ||
895 | # | 988 | # |
896 | CONFIG_USB_STORAGE=y | 989 | CONFIG_USB_STORAGE=y |
897 | # CONFIG_USB_STORAGE_DEBUG is not set | 990 | # CONFIG_USB_STORAGE_DEBUG is not set |
@@ -903,13 +996,15 @@ CONFIG_USB_STORAGE=y | |||
903 | # CONFIG_USB_STORAGE_SDDR09 is not set | 996 | # CONFIG_USB_STORAGE_SDDR09 is not set |
904 | # CONFIG_USB_STORAGE_SDDR55 is not set | 997 | # CONFIG_USB_STORAGE_SDDR55 is not set |
905 | # CONFIG_USB_STORAGE_JUMPSHOT is not set | 998 | # CONFIG_USB_STORAGE_JUMPSHOT is not set |
906 | # CONFIG_USB_STORAGE_ONETOUCH is not set | 999 | # CONFIG_USB_STORAGE_ALAUDA is not set |
1000 | # CONFIG_USB_LIBUSUAL is not set | ||
907 | 1001 | ||
908 | # | 1002 | # |
909 | # USB Input Devices | 1003 | # USB Input Devices |
910 | # | 1004 | # |
911 | CONFIG_USB_HID=y | 1005 | CONFIG_USB_HID=y |
912 | CONFIG_USB_HIDINPUT=y | 1006 | CONFIG_USB_HIDINPUT=y |
1007 | # CONFIG_USB_HIDINPUT_POWERBOOK is not set | ||
913 | # CONFIG_HID_FF is not set | 1008 | # CONFIG_HID_FF is not set |
914 | # CONFIG_USB_HIDDEV is not set | 1009 | # CONFIG_USB_HIDDEV is not set |
915 | # CONFIG_USB_AIPTEK is not set | 1010 | # CONFIG_USB_AIPTEK is not set |
@@ -923,7 +1018,9 @@ CONFIG_USB_HIDINPUT=y | |||
923 | # CONFIG_USB_YEALINK is not set | 1018 | # CONFIG_USB_YEALINK is not set |
924 | # CONFIG_USB_XPAD is not set | 1019 | # CONFIG_USB_XPAD is not set |
925 | # CONFIG_USB_ATI_REMOTE is not set | 1020 | # CONFIG_USB_ATI_REMOTE is not set |
1021 | # CONFIG_USB_ATI_REMOTE2 is not set | ||
926 | # CONFIG_USB_KEYSPAN_REMOTE is not set | 1022 | # CONFIG_USB_KEYSPAN_REMOTE is not set |
1023 | # CONFIG_USB_APPLETOUCH is not set | ||
927 | 1024 | ||
928 | # | 1025 | # |
929 | # USB Imaging devices | 1026 | # USB Imaging devices |
@@ -1005,7 +1102,7 @@ CONFIG_USB_MON=y | |||
1005 | # | 1102 | # |
1006 | # CONFIG_EDD is not set | 1103 | # CONFIG_EDD is not set |
1007 | # CONFIG_DELL_RBU is not set | 1104 | # CONFIG_DELL_RBU is not set |
1008 | CONFIG_DCDBAS=m | 1105 | # CONFIG_DCDBAS is not set |
1009 | 1106 | ||
1010 | # | 1107 | # |
1011 | # File systems | 1108 | # File systems |
@@ -1031,13 +1128,14 @@ CONFIG_REISERFS_FS_POSIX_ACL=y | |||
1031 | # CONFIG_JFS_FS is not set | 1128 | # CONFIG_JFS_FS is not set |
1032 | CONFIG_FS_POSIX_ACL=y | 1129 | CONFIG_FS_POSIX_ACL=y |
1033 | # CONFIG_XFS_FS is not set | 1130 | # CONFIG_XFS_FS is not set |
1131 | # CONFIG_OCFS2_FS is not set | ||
1034 | # CONFIG_MINIX_FS is not set | 1132 | # CONFIG_MINIX_FS is not set |
1035 | # CONFIG_ROMFS_FS is not set | 1133 | # CONFIG_ROMFS_FS is not set |
1036 | CONFIG_INOTIFY=y | 1134 | CONFIG_INOTIFY=y |
1037 | # CONFIG_QUOTA is not set | 1135 | # CONFIG_QUOTA is not set |
1038 | CONFIG_DNOTIFY=y | 1136 | CONFIG_DNOTIFY=y |
1039 | CONFIG_AUTOFS_FS=y | 1137 | CONFIG_AUTOFS_FS=y |
1040 | # CONFIG_AUTOFS4_FS is not set | 1138 | CONFIG_AUTOFS4_FS=y |
1041 | # CONFIG_FUSE_FS is not set | 1139 | # CONFIG_FUSE_FS is not set |
1042 | 1140 | ||
1043 | # | 1141 | # |
@@ -1068,7 +1166,8 @@ CONFIG_TMPFS=y | |||
1068 | CONFIG_HUGETLBFS=y | 1166 | CONFIG_HUGETLBFS=y |
1069 | CONFIG_HUGETLB_PAGE=y | 1167 | CONFIG_HUGETLB_PAGE=y |
1070 | CONFIG_RAMFS=y | 1168 | CONFIG_RAMFS=y |
1071 | # CONFIG_RELAYFS_FS is not set | 1169 | CONFIG_RELAYFS_FS=y |
1170 | # CONFIG_CONFIGFS_FS is not set | ||
1072 | 1171 | ||
1073 | # | 1172 | # |
1074 | # Miscellaneous filesystems | 1173 | # Miscellaneous filesystems |
@@ -1166,30 +1265,35 @@ CONFIG_NLS_ISO8859_15=y | |||
1166 | CONFIG_NLS_UTF8=y | 1265 | CONFIG_NLS_UTF8=y |
1167 | 1266 | ||
1168 | # | 1267 | # |
1169 | # Profiling support | 1268 | # Instrumentation Support |
1170 | # | 1269 | # |
1171 | CONFIG_PROFILING=y | 1270 | CONFIG_PROFILING=y |
1172 | CONFIG_OPROFILE=y | 1271 | CONFIG_OPROFILE=y |
1272 | CONFIG_KPROBES=y | ||
1173 | 1273 | ||
1174 | # | 1274 | # |
1175 | # Kernel hacking | 1275 | # Kernel hacking |
1176 | # | 1276 | # |
1177 | # CONFIG_PRINTK_TIME is not set | 1277 | # CONFIG_PRINTK_TIME is not set |
1178 | CONFIG_DEBUG_KERNEL=y | ||
1179 | CONFIG_MAGIC_SYSRQ=y | 1278 | CONFIG_MAGIC_SYSRQ=y |
1279 | CONFIG_DEBUG_KERNEL=y | ||
1180 | CONFIG_LOG_BUF_SHIFT=18 | 1280 | CONFIG_LOG_BUF_SHIFT=18 |
1181 | CONFIG_DETECT_SOFTLOCKUP=y | 1281 | CONFIG_DETECT_SOFTLOCKUP=y |
1182 | # CONFIG_SCHEDSTATS is not set | 1282 | # CONFIG_SCHEDSTATS is not set |
1183 | # CONFIG_DEBUG_SLAB is not set | 1283 | # CONFIG_DEBUG_SLAB is not set |
1284 | # CONFIG_DEBUG_MUTEXES is not set | ||
1184 | # CONFIG_DEBUG_SPINLOCK is not set | 1285 | # CONFIG_DEBUG_SPINLOCK is not set |
1185 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set | 1286 | # CONFIG_DEBUG_SPINLOCK_SLEEP is not set |
1186 | # CONFIG_DEBUG_KOBJECT is not set | 1287 | # CONFIG_DEBUG_KOBJECT is not set |
1187 | # CONFIG_DEBUG_INFO is not set | 1288 | # CONFIG_DEBUG_INFO is not set |
1188 | CONFIG_DEBUG_FS=y | 1289 | CONFIG_DEBUG_FS=y |
1290 | # CONFIG_DEBUG_VM is not set | ||
1189 | # CONFIG_FRAME_POINTER is not set | 1291 | # CONFIG_FRAME_POINTER is not set |
1292 | # CONFIG_FORCED_INLINING is not set | ||
1293 | # CONFIG_RCU_TORTURE_TEST is not set | ||
1190 | CONFIG_INIT_DEBUG=y | 1294 | CONFIG_INIT_DEBUG=y |
1295 | # CONFIG_DEBUG_RODATA is not set | ||
1191 | # CONFIG_IOMMU_DEBUG is not set | 1296 | # CONFIG_IOMMU_DEBUG is not set |
1192 | CONFIG_KPROBES=y | ||
1193 | 1297 | ||
1194 | # | 1298 | # |
1195 | # Security options | 1299 | # Security options |
diff --git a/arch/x86_64/ia32/Makefile b/arch/x86_64/ia32/Makefile index f76217d8f579..929e6b0771f8 100644 --- a/arch/x86_64/ia32/Makefile +++ b/arch/x86_64/ia32/Makefile | |||
@@ -2,9 +2,9 @@ | |||
2 | # Makefile for the ia32 kernel emulation subsystem. | 2 | # Makefile for the ia32 kernel emulation subsystem. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_ioctl.o \ | 5 | obj-$(CONFIG_IA32_EMULATION) := ia32entry.o sys_ia32.o ia32_signal.o tls32.o \ |
6 | ia32_signal.o tls32.o \ | 6 | ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o \ |
7 | ia32_binfmt.o fpu32.o ptrace32.o syscall32.o syscall32_syscall.o | 7 | mmap32.o |
8 | 8 | ||
9 | sysv-$(CONFIG_SYSVIPC) := ipc32.o | 9 | sysv-$(CONFIG_SYSVIPC) := ipc32.o |
10 | obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) | 10 | obj-$(CONFIG_IA32_EMULATION) += $(sysv-y) |
@@ -29,4 +29,3 @@ $(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE | |||
29 | 29 | ||
30 | AFLAGS_vsyscall-sysenter.o = -m32 | 30 | AFLAGS_vsyscall-sysenter.o = -m32 |
31 | AFLAGS_vsyscall-syscall.o = -m32 | 31 | AFLAGS_vsyscall-syscall.o = -m32 |
32 | CFLAGS_ia32_ioctl.o += -Ifs/ | ||
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 93c60f4aa47a..3bf58af98936 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c | |||
@@ -36,9 +36,6 @@ | |||
36 | #undef WARN_OLD | 36 | #undef WARN_OLD |
37 | #undef CORE_DUMP /* probably broken */ | 37 | #undef CORE_DUMP /* probably broken */ |
38 | 38 | ||
39 | extern int ia32_setup_arg_pages(struct linux_binprm *bprm, | ||
40 | unsigned long stack_top, int exec_stack); | ||
41 | |||
42 | static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); | 39 | static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); |
43 | static int load_aout_library(struct file*); | 40 | static int load_aout_library(struct file*); |
44 | 41 | ||
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index d9161e395978..572b3b28772d 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c | |||
@@ -197,8 +197,7 @@ static inline void elf_core_copy_regs(elf_gregset_t *elfregs, struct pt_regs *re | |||
197 | 197 | ||
198 | static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) | 198 | static inline int elf_core_copy_task_regs(struct task_struct *t, elf_gregset_t* elfregs) |
199 | { | 199 | { |
200 | struct pt_regs *pp = (struct pt_regs *)(t->thread.rsp0); | 200 | struct pt_regs *pp = task_pt_regs(t); |
201 | --pp; | ||
202 | ELF_CORE_COPY_REGS((*elfregs), pp); | 201 | ELF_CORE_COPY_REGS((*elfregs), pp); |
203 | /* fix wrong segments */ | 202 | /* fix wrong segments */ |
204 | (*elfregs)[7] = t->thread.ds; | 203 | (*elfregs)[7] = t->thread.ds; |
@@ -217,8 +216,7 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr | |||
217 | if (!tsk_used_math(tsk)) | 216 | if (!tsk_used_math(tsk)) |
218 | return 0; | 217 | return 0; |
219 | if (!regs) | 218 | if (!regs) |
220 | regs = (struct pt_regs *)tsk->thread.rsp0; | 219 | regs = task_pt_regs(tsk); |
221 | --regs; | ||
222 | if (tsk == current) | 220 | if (tsk == current) |
223 | unlazy_fpu(tsk); | 221 | unlazy_fpu(tsk); |
224 | set_fs(KERNEL_DS); | 222 | set_fs(KERNEL_DS); |
@@ -234,7 +232,7 @@ elf_core_copy_task_fpregs(struct task_struct *tsk, struct pt_regs *regs, elf_fpr | |||
234 | static inline int | 232 | static inline int |
235 | elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) | 233 | elf_core_copy_task_xfpregs(struct task_struct *t, elf_fpxregset_t *xfpu) |
236 | { | 234 | { |
237 | struct pt_regs *regs = ((struct pt_regs *)(t->thread.rsp0))-1; | 235 | struct pt_regs *regs = task_pt_regs(t); |
238 | if (!tsk_used_math(t)) | 236 | if (!tsk_used_math(t)) |
239 | return 0; | 237 | return 0; |
240 | if (t == current) | 238 | if (t == current) |
@@ -295,8 +293,6 @@ int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int | |||
295 | } while(0) | 293 | } while(0) |
296 | 294 | ||
297 | 295 | ||
298 | #define elf_map elf32_map | ||
299 | |||
300 | #include <linux/module.h> | 296 | #include <linux/module.h> |
301 | 297 | ||
302 | MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); | 298 | MODULE_DESCRIPTION("Binary format loader for compatibility with IA32 ELF binaries."); |
@@ -335,7 +331,8 @@ static void elf32_init(struct pt_regs *regs) | |||
335 | me->thread.es = __USER_DS; | 331 | me->thread.es = __USER_DS; |
336 | } | 332 | } |
337 | 333 | ||
338 | int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) | 334 | int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, |
335 | int executable_stack) | ||
339 | { | 336 | { |
340 | unsigned long stack_base; | 337 | unsigned long stack_base; |
341 | struct vm_area_struct *mpnt; | 338 | struct vm_area_struct *mpnt; |
@@ -389,21 +386,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int exec | |||
389 | 386 | ||
390 | return 0; | 387 | return 0; |
391 | } | 388 | } |
392 | 389 | EXPORT_SYMBOL(ia32_setup_arg_pages); | |
393 | static unsigned long | ||
394 | elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) | ||
395 | { | ||
396 | unsigned long map_addr; | ||
397 | struct task_struct *me = current; | ||
398 | |||
399 | down_write(&me->mm->mmap_sem); | ||
400 | map_addr = do_mmap(filep, ELF_PAGESTART(addr), | ||
401 | eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr), prot, | ||
402 | type, | ||
403 | eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr)); | ||
404 | up_write(&me->mm->mmap_sem); | ||
405 | return(map_addr); | ||
406 | } | ||
407 | 390 | ||
408 | #ifdef CONFIG_SYSCTL | 391 | #ifdef CONFIG_SYSCTL |
409 | /* Register vsyscall32 into the ABI table */ | 392 | /* Register vsyscall32 into the ABI table */ |
diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c deleted file mode 100644 index e335bd0b637d..000000000000 --- a/arch/x86_64/ia32/ia32_ioctl.c +++ /dev/null | |||
@@ -1,79 +0,0 @@ | |||
1 | /* $Id: ia32_ioctl.c,v 1.25 2002/10/11 07:17:06 ak Exp $ | ||
2 | * ioctl32.c: Conversion between 32bit and 64bit native ioctls. | ||
3 | * | ||
4 | * Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com) | ||
5 | * Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be) | ||
6 | * Copyright (C) 2001,2002 Andi Kleen, SuSE Labs | ||
7 | * | ||
8 | * These routines maintain argument size conversion between 32bit and 64bit | ||
9 | * ioctls. | ||
10 | */ | ||
11 | |||
12 | #define INCLUDES | ||
13 | #include <linux/syscalls.h> | ||
14 | #include "compat_ioctl.c" | ||
15 | #include <asm/ia32.h> | ||
16 | |||
17 | #define CODE | ||
18 | #include "compat_ioctl.c" | ||
19 | |||
20 | #define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */ | ||
21 | #define RTC_IRQP_SET32 _IOW('p', 0x0c, unsigned int) /* Set IRQ rate */ | ||
22 | #define RTC_EPOCH_READ32 _IOR('p', 0x0d, unsigned) /* Read epoch */ | ||
23 | #define RTC_EPOCH_SET32 _IOW('p', 0x0e, unsigned) /* Set epoch */ | ||
24 | |||
25 | static int rtc32_ioctl(unsigned fd, unsigned cmd, unsigned long arg) | ||
26 | { | ||
27 | unsigned long val; | ||
28 | mm_segment_t oldfs = get_fs(); | ||
29 | int ret; | ||
30 | |||
31 | switch (cmd) { | ||
32 | case RTC_IRQP_READ32: | ||
33 | set_fs(KERNEL_DS); | ||
34 | ret = sys_ioctl(fd, RTC_IRQP_READ, (unsigned long)&val); | ||
35 | set_fs(oldfs); | ||
36 | if (!ret) | ||
37 | ret = put_user(val, (unsigned int __user *) arg); | ||
38 | return ret; | ||
39 | |||
40 | case RTC_IRQP_SET32: | ||
41 | cmd = RTC_IRQP_SET; | ||
42 | break; | ||
43 | |||
44 | case RTC_EPOCH_READ32: | ||
45 | set_fs(KERNEL_DS); | ||
46 | ret = sys_ioctl(fd, RTC_EPOCH_READ, (unsigned long) &val); | ||
47 | set_fs(oldfs); | ||
48 | if (!ret) | ||
49 | ret = put_user(val, (unsigned int __user *) arg); | ||
50 | return ret; | ||
51 | |||
52 | case RTC_EPOCH_SET32: | ||
53 | cmd = RTC_EPOCH_SET; | ||
54 | break; | ||
55 | } | ||
56 | return sys_ioctl(fd,cmd,arg); | ||
57 | } | ||
58 | |||
59 | |||
60 | #define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) }, | ||
61 | #define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) | ||
62 | |||
63 | struct ioctl_trans ioctl_start[] = { | ||
64 | #include <linux/compat_ioctl.h> | ||
65 | #define DECLARES | ||
66 | #include "compat_ioctl.c" | ||
67 | |||
68 | /* And these ioctls need translation */ | ||
69 | /* realtime device */ | ||
70 | HANDLE_IOCTL(RTC_IRQP_READ, rtc32_ioctl) | ||
71 | HANDLE_IOCTL(RTC_IRQP_READ32,rtc32_ioctl) | ||
72 | HANDLE_IOCTL(RTC_IRQP_SET32, rtc32_ioctl) | ||
73 | HANDLE_IOCTL(RTC_EPOCH_READ32, rtc32_ioctl) | ||
74 | HANDLE_IOCTL(RTC_EPOCH_SET32, rtc32_ioctl) | ||
75 | /* take care of sizeof(sizeof()) breakage */ | ||
76 | }; | ||
77 | |||
78 | int ioctl_table_size = ARRAY_SIZE(ioctl_start); | ||
79 | |||
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index 0903cc1faef2..e0a92439f634 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c | |||
@@ -353,7 +353,6 @@ ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __ | |||
353 | struct pt_regs *regs, unsigned int mask) | 353 | struct pt_regs *regs, unsigned int mask) |
354 | { | 354 | { |
355 | int tmp, err = 0; | 355 | int tmp, err = 0; |
356 | u32 eflags; | ||
357 | 356 | ||
358 | tmp = 0; | 357 | tmp = 0; |
359 | __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); | 358 | __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); |
@@ -378,10 +377,7 @@ ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, struct _fpstate_ia32 __ | |||
378 | err |= __put_user(current->thread.trap_no, &sc->trapno); | 377 | err |= __put_user(current->thread.trap_no, &sc->trapno); |
379 | err |= __put_user(current->thread.error_code, &sc->err); | 378 | err |= __put_user(current->thread.error_code, &sc->err); |
380 | err |= __put_user((u32)regs->rip, &sc->eip); | 379 | err |= __put_user((u32)regs->rip, &sc->eip); |
381 | eflags = regs->eflags; | 380 | err |= __put_user((u32)regs->eflags, &sc->eflags); |
382 | if (current->ptrace & PT_PTRACED) | ||
383 | eflags &= ~TF_MASK; | ||
384 | err |= __put_user((u32)eflags, &sc->eflags); | ||
385 | err |= __put_user((u32)regs->rsp, &sc->esp_at_signal); | 381 | err |= __put_user((u32)regs->rsp, &sc->esp_at_signal); |
386 | 382 | ||
387 | tmp = save_i387_ia32(current, fpstate, regs, 0); | 383 | tmp = save_i387_ia32(current, fpstate, regs, 0); |
@@ -505,13 +501,9 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka, | |||
505 | regs->ss = __USER32_DS; | 501 | regs->ss = __USER32_DS; |
506 | 502 | ||
507 | set_fs(USER_DS); | 503 | set_fs(USER_DS); |
508 | if (regs->eflags & TF_MASK) { | 504 | regs->eflags &= ~TF_MASK; |
509 | if (current->ptrace & PT_PTRACED) { | 505 | if (test_thread_flag(TIF_SINGLESTEP)) |
510 | ptrace_notify(SIGTRAP); | 506 | ptrace_notify(SIGTRAP); |
511 | } else { | ||
512 | regs->eflags &= ~TF_MASK; | ||
513 | } | ||
514 | } | ||
515 | 507 | ||
516 | #if DEBUG_SIG | 508 | #if DEBUG_SIG |
517 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | 509 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", |
@@ -605,13 +597,9 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
605 | regs->ss = __USER32_DS; | 597 | regs->ss = __USER32_DS; |
606 | 598 | ||
607 | set_fs(USER_DS); | 599 | set_fs(USER_DS); |
608 | if (regs->eflags & TF_MASK) { | 600 | regs->eflags &= ~TF_MASK; |
609 | if (current->ptrace & PT_PTRACED) { | 601 | if (test_thread_flag(TIF_SINGLESTEP)) |
610 | ptrace_notify(SIGTRAP); | 602 | ptrace_notify(SIGTRAP); |
611 | } else { | ||
612 | regs->eflags &= ~TF_MASK; | ||
613 | } | ||
614 | } | ||
615 | 603 | ||
616 | #if DEBUG_SIG | 604 | #if DEBUG_SIG |
617 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", | 605 | printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", |
diff --git a/arch/x86_64/ia32/ia32entry.S b/arch/x86_64/ia32/ia32entry.S index e0eb0c712fe9..f05c2a802489 100644 --- a/arch/x86_64/ia32/ia32entry.S +++ b/arch/x86_64/ia32/ia32entry.S | |||
@@ -35,6 +35,18 @@ | |||
35 | movq %rax,R8(%rsp) | 35 | movq %rax,R8(%rsp) |
36 | .endm | 36 | .endm |
37 | 37 | ||
38 | .macro CFI_STARTPROC32 simple | ||
39 | CFI_STARTPROC \simple | ||
40 | CFI_UNDEFINED r8 | ||
41 | CFI_UNDEFINED r9 | ||
42 | CFI_UNDEFINED r10 | ||
43 | CFI_UNDEFINED r11 | ||
44 | CFI_UNDEFINED r12 | ||
45 | CFI_UNDEFINED r13 | ||
46 | CFI_UNDEFINED r14 | ||
47 | CFI_UNDEFINED r15 | ||
48 | .endm | ||
49 | |||
38 | /* | 50 | /* |
39 | * 32bit SYSENTER instruction entry. | 51 | * 32bit SYSENTER instruction entry. |
40 | * | 52 | * |
@@ -55,7 +67,7 @@ | |||
55 | * with the int 0x80 path. | 67 | * with the int 0x80 path. |
56 | */ | 68 | */ |
57 | ENTRY(ia32_sysenter_target) | 69 | ENTRY(ia32_sysenter_target) |
58 | CFI_STARTPROC simple | 70 | CFI_STARTPROC32 simple |
59 | CFI_DEF_CFA rsp,0 | 71 | CFI_DEF_CFA rsp,0 |
60 | CFI_REGISTER rsp,rbp | 72 | CFI_REGISTER rsp,rbp |
61 | swapgs | 73 | swapgs |
@@ -92,6 +104,7 @@ ENTRY(ia32_sysenter_target) | |||
92 | .quad 1b,ia32_badarg | 104 | .quad 1b,ia32_badarg |
93 | .previous | 105 | .previous |
94 | GET_THREAD_INFO(%r10) | 106 | GET_THREAD_INFO(%r10) |
107 | orl $TS_COMPAT,threadinfo_status(%r10) | ||
95 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | 108 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) |
96 | CFI_REMEMBER_STATE | 109 | CFI_REMEMBER_STATE |
97 | jnz sysenter_tracesys | 110 | jnz sysenter_tracesys |
@@ -105,6 +118,7 @@ sysenter_do_call: | |||
105 | cli | 118 | cli |
106 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 119 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) |
107 | jnz int_ret_from_sys_call | 120 | jnz int_ret_from_sys_call |
121 | andl $~TS_COMPAT,threadinfo_status(%r10) | ||
108 | /* clear IF, that popfq doesn't enable interrupts early */ | 122 | /* clear IF, that popfq doesn't enable interrupts early */ |
109 | andl $~0x200,EFLAGS-R11(%rsp) | 123 | andl $~0x200,EFLAGS-R11(%rsp) |
110 | RESTORE_ARGS 1,24,1,1,1,1 | 124 | RESTORE_ARGS 1,24,1,1,1,1 |
@@ -161,7 +175,7 @@ sysenter_tracesys: | |||
161 | * with the int 0x80 path. | 175 | * with the int 0x80 path. |
162 | */ | 176 | */ |
163 | ENTRY(ia32_cstar_target) | 177 | ENTRY(ia32_cstar_target) |
164 | CFI_STARTPROC simple | 178 | CFI_STARTPROC32 simple |
165 | CFI_DEF_CFA rsp,0 | 179 | CFI_DEF_CFA rsp,0 |
166 | CFI_REGISTER rip,rcx | 180 | CFI_REGISTER rip,rcx |
167 | /*CFI_REGISTER rflags,r11*/ | 181 | /*CFI_REGISTER rflags,r11*/ |
@@ -191,6 +205,7 @@ ENTRY(ia32_cstar_target) | |||
191 | .quad 1b,ia32_badarg | 205 | .quad 1b,ia32_badarg |
192 | .previous | 206 | .previous |
193 | GET_THREAD_INFO(%r10) | 207 | GET_THREAD_INFO(%r10) |
208 | orl $TS_COMPAT,threadinfo_status(%r10) | ||
194 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | 209 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) |
195 | CFI_REMEMBER_STATE | 210 | CFI_REMEMBER_STATE |
196 | jnz cstar_tracesys | 211 | jnz cstar_tracesys |
@@ -204,6 +219,7 @@ cstar_do_call: | |||
204 | cli | 219 | cli |
205 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) | 220 | testl $_TIF_ALLWORK_MASK,threadinfo_flags(%r10) |
206 | jnz int_ret_from_sys_call | 221 | jnz int_ret_from_sys_call |
222 | andl $~TS_COMPAT,threadinfo_status(%r10) | ||
207 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 | 223 | RESTORE_ARGS 1,-ARG_SKIP,1,1,1 |
208 | movl RIP-ARGOFFSET(%rsp),%ecx | 224 | movl RIP-ARGOFFSET(%rsp),%ecx |
209 | CFI_REGISTER rip,rcx | 225 | CFI_REGISTER rip,rcx |
@@ -276,6 +292,7 @@ ENTRY(ia32_syscall) | |||
276 | this could be a problem. */ | 292 | this could be a problem. */ |
277 | SAVE_ARGS 0,0,1 | 293 | SAVE_ARGS 0,0,1 |
278 | GET_THREAD_INFO(%r10) | 294 | GET_THREAD_INFO(%r10) |
295 | orl $TS_COMPAT,threadinfo_status(%r10) | ||
279 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) | 296 | testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%r10) |
280 | jnz ia32_tracesys | 297 | jnz ia32_tracesys |
281 | ia32_do_syscall: | 298 | ia32_do_syscall: |
@@ -318,7 +335,7 @@ quiet_ni_syscall: | |||
318 | jmp ia32_ptregs_common | 335 | jmp ia32_ptregs_common |
319 | .endm | 336 | .endm |
320 | 337 | ||
321 | CFI_STARTPROC | 338 | CFI_STARTPROC32 |
322 | 339 | ||
323 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi | 340 | PTREGSCALL stub32_rt_sigreturn, sys32_rt_sigreturn, %rdi |
324 | PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi | 341 | PTREGSCALL stub32_sigreturn, sys32_sigreturn, %rdi |
@@ -333,15 +350,26 @@ quiet_ni_syscall: | |||
333 | 350 | ||
334 | ENTRY(ia32_ptregs_common) | 351 | ENTRY(ia32_ptregs_common) |
335 | popq %r11 | 352 | popq %r11 |
336 | CFI_ADJUST_CFA_OFFSET -8 | 353 | CFI_ENDPROC |
337 | CFI_REGISTER rip, r11 | 354 | CFI_STARTPROC32 simple |
355 | CFI_DEF_CFA rsp,SS+8-ARGOFFSET | ||
356 | CFI_REL_OFFSET rax,RAX-ARGOFFSET | ||
357 | CFI_REL_OFFSET rcx,RCX-ARGOFFSET | ||
358 | CFI_REL_OFFSET rdx,RDX-ARGOFFSET | ||
359 | CFI_REL_OFFSET rsi,RSI-ARGOFFSET | ||
360 | CFI_REL_OFFSET rdi,RDI-ARGOFFSET | ||
361 | CFI_REL_OFFSET rip,RIP-ARGOFFSET | ||
362 | /* CFI_REL_OFFSET cs,CS-ARGOFFSET*/ | ||
363 | /* CFI_REL_OFFSET rflags,EFLAGS-ARGOFFSET*/ | ||
364 | CFI_REL_OFFSET rsp,RSP-ARGOFFSET | ||
365 | /* CFI_REL_OFFSET ss,SS-ARGOFFSET*/ | ||
338 | SAVE_REST | 366 | SAVE_REST |
339 | call *%rax | 367 | call *%rax |
340 | RESTORE_REST | 368 | RESTORE_REST |
341 | jmp ia32_sysret /* misbalances the return cache */ | 369 | jmp ia32_sysret /* misbalances the return cache */ |
342 | CFI_ENDPROC | 370 | CFI_ENDPROC |
343 | 371 | ||
344 | .data | 372 | .section .rodata,"a" |
345 | .align 8 | 373 | .align 8 |
346 | .globl ia32_sys_call_table | 374 | .globl ia32_sys_call_table |
347 | ia32_sys_call_table: | 375 | ia32_sys_call_table: |
@@ -608,7 +636,7 @@ ia32_sys_call_table: | |||
608 | .quad sys_epoll_wait | 636 | .quad sys_epoll_wait |
609 | .quad sys_remap_file_pages | 637 | .quad sys_remap_file_pages |
610 | .quad sys_set_tid_address | 638 | .quad sys_set_tid_address |
611 | .quad sys32_timer_create | 639 | .quad compat_sys_timer_create |
612 | .quad compat_sys_timer_settime /* 260 */ | 640 | .quad compat_sys_timer_settime /* 260 */ |
613 | .quad compat_sys_timer_gettime | 641 | .quad compat_sys_timer_gettime |
614 | .quad sys_timer_getoverrun | 642 | .quad sys_timer_getoverrun |
@@ -643,6 +671,20 @@ ia32_sys_call_table: | |||
643 | .quad sys_inotify_init | 671 | .quad sys_inotify_init |
644 | .quad sys_inotify_add_watch | 672 | .quad sys_inotify_add_watch |
645 | .quad sys_inotify_rm_watch | 673 | .quad sys_inotify_rm_watch |
674 | .quad sys_migrate_pages | ||
675 | .quad compat_sys_openat /* 295 */ | ||
676 | .quad sys_mkdirat | ||
677 | .quad sys_mknodat | ||
678 | .quad sys_fchownat | ||
679 | .quad sys_futimesat | ||
680 | .quad compat_sys_newfstatat /* 300 */ | ||
681 | .quad sys_unlinkat | ||
682 | .quad sys_renameat | ||
683 | .quad sys_linkat | ||
684 | .quad sys_symlinkat | ||
685 | .quad sys_readlinkat /* 305 */ | ||
686 | .quad sys_fchmodat | ||
687 | .quad sys_faccessat | ||
646 | ia32_syscall_end: | 688 | ia32_syscall_end: |
647 | .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 | 689 | .rept IA32_NR_syscalls-(ia32_syscall_end-ia32_sys_call_table)/8 |
648 | .quad ni_syscall | 690 | .quad ni_syscall |
diff --git a/arch/x86_64/ia32/mmap32.c b/arch/x86_64/ia32/mmap32.c new file mode 100644 index 000000000000..079f4132575c --- /dev/null +++ b/arch/x86_64/ia32/mmap32.c | |||
@@ -0,0 +1,78 @@ | |||
1 | /* | ||
2 | * linux/arch/x86_64/ia32/mm/mmap.c | ||
3 | * | ||
4 | * flexible mmap layout support | ||
5 | * | ||
6 | * Based on the i386 version which was | ||
7 | * | ||
8 | * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina. | ||
9 | * All Rights Reserved. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of the GNU General Public License as published by | ||
13 | * the Free Software Foundation; either version 2 of the License, or | ||
14 | * (at your option) any later version. | ||
15 | * | ||
16 | * This program is distributed in the hope that it will be useful, | ||
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
19 | * GNU General Public License for more details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
24 | * | ||
25 | * | ||
26 | * Started by Ingo Molnar <mingo@elte.hu> | ||
27 | */ | ||
28 | |||
29 | #include <linux/personality.h> | ||
30 | #include <linux/mm.h> | ||
31 | #include <linux/random.h> | ||
32 | |||
33 | /* | ||
34 | * Top of mmap area (just below the process stack). | ||
35 | * | ||
36 | * Leave an at least ~128 MB hole. | ||
37 | */ | ||
38 | #define MIN_GAP (128*1024*1024) | ||
39 | #define MAX_GAP (TASK_SIZE/6*5) | ||
40 | |||
41 | static inline unsigned long mmap_base(struct mm_struct *mm) | ||
42 | { | ||
43 | unsigned long gap = current->signal->rlim[RLIMIT_STACK].rlim_cur; | ||
44 | unsigned long random_factor = 0; | ||
45 | |||
46 | if (current->flags & PF_RANDOMIZE) | ||
47 | random_factor = get_random_int() % (1024*1024); | ||
48 | |||
49 | if (gap < MIN_GAP) | ||
50 | gap = MIN_GAP; | ||
51 | else if (gap > MAX_GAP) | ||
52 | gap = MAX_GAP; | ||
53 | |||
54 | return PAGE_ALIGN(TASK_SIZE - gap - random_factor); | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * This function, called very early during the creation of a new | ||
59 | * process VM image, sets up which VM layout function to use: | ||
60 | */ | ||
61 | void ia32_pick_mmap_layout(struct mm_struct *mm) | ||
62 | { | ||
63 | /* | ||
64 | * Fall back to the standard layout if the personality | ||
65 | * bit is set, or if the expected stack growth is unlimited: | ||
66 | */ | ||
67 | if (sysctl_legacy_va_layout || | ||
68 | (current->personality & ADDR_COMPAT_LAYOUT) || | ||
69 | current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY) { | ||
70 | mm->mmap_base = TASK_UNMAPPED_BASE; | ||
71 | mm->get_unmapped_area = arch_get_unmapped_area; | ||
72 | mm->unmap_area = arch_unmap_area; | ||
73 | } else { | ||
74 | mm->mmap_base = mmap_base(mm); | ||
75 | mm->get_unmapped_area = arch_get_unmapped_area_topdown; | ||
76 | mm->unmap_area = arch_unmap_area_topdown; | ||
77 | } | ||
78 | } | ||
diff --git a/arch/x86_64/ia32/ptrace32.c b/arch/x86_64/ia32/ptrace32.c index 2a925e2af390..23a4515a73b4 100644 --- a/arch/x86_64/ia32/ptrace32.c +++ b/arch/x86_64/ia32/ptrace32.c | |||
@@ -28,9 +28,12 @@ | |||
28 | #include <asm/i387.h> | 28 | #include <asm/i387.h> |
29 | #include <asm/fpu32.h> | 29 | #include <asm/fpu32.h> |
30 | 30 | ||
31 | /* determines which flags the user has access to. */ | 31 | /* |
32 | /* 1 = access 0 = no access */ | 32 | * Determines which flags the user has access to [1 = access, 0 = no access]. |
33 | #define FLAG_MASK 0x44dd5UL | 33 | * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). |
34 | * Also masks reserved bits (31-22, 15, 5, 3, 1). | ||
35 | */ | ||
36 | #define FLAG_MASK 0x54dd5UL | ||
34 | 37 | ||
35 | #define R32(l,q) \ | 38 | #define R32(l,q) \ |
36 | case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break | 39 | case offsetof(struct user32, regs.l): stack[offsetof(struct pt_regs, q)/8] = val; break |
@@ -38,7 +41,7 @@ | |||
38 | static int putreg32(struct task_struct *child, unsigned regno, u32 val) | 41 | static int putreg32(struct task_struct *child, unsigned regno, u32 val) |
39 | { | 42 | { |
40 | int i; | 43 | int i; |
41 | __u64 *stack = (__u64 *)(child->thread.rsp0 - sizeof(struct pt_regs)); | 44 | __u64 *stack = (__u64 *)task_pt_regs(child); |
42 | 45 | ||
43 | switch (regno) { | 46 | switch (regno) { |
44 | case offsetof(struct user32, regs.fs): | 47 | case offsetof(struct user32, regs.fs): |
@@ -134,7 +137,7 @@ static int putreg32(struct task_struct *child, unsigned regno, u32 val) | |||
134 | 137 | ||
135 | static int getreg32(struct task_struct *child, unsigned regno, u32 *val) | 138 | static int getreg32(struct task_struct *child, unsigned regno, u32 *val) |
136 | { | 139 | { |
137 | __u64 *stack = (__u64 *)(child->thread.rsp0 - sizeof(struct pt_regs)); | 140 | __u64 *stack = (__u64 *)task_pt_regs(child); |
138 | 141 | ||
139 | switch (regno) { | 142 | switch (regno) { |
140 | case offsetof(struct user32, regs.fs): | 143 | case offsetof(struct user32, regs.fs): |
@@ -196,36 +199,6 @@ static int getreg32(struct task_struct *child, unsigned regno, u32 *val) | |||
196 | 199 | ||
197 | #undef R32 | 200 | #undef R32 |
198 | 201 | ||
199 | static struct task_struct *find_target(int request, int pid, int *err) | ||
200 | { | ||
201 | struct task_struct *child; | ||
202 | |||
203 | *err = -EPERM; | ||
204 | if (pid == 1) | ||
205 | return NULL; | ||
206 | |||
207 | *err = -ESRCH; | ||
208 | read_lock(&tasklist_lock); | ||
209 | child = find_task_by_pid(pid); | ||
210 | if (child) | ||
211 | get_task_struct(child); | ||
212 | read_unlock(&tasklist_lock); | ||
213 | if (child) { | ||
214 | *err = -EPERM; | ||
215 | if (child->pid == 1) | ||
216 | goto out; | ||
217 | *err = ptrace_check_attach(child, request == PTRACE_KILL); | ||
218 | if (*err < 0) | ||
219 | goto out; | ||
220 | return child; | ||
221 | } | ||
222 | out: | ||
223 | if (child) | ||
224 | put_task_struct(child); | ||
225 | return NULL; | ||
226 | |||
227 | } | ||
228 | |||
229 | asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) | 202 | asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) |
230 | { | 203 | { |
231 | struct task_struct *child; | 204 | struct task_struct *child; |
@@ -254,11 +227,18 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) | |||
254 | break; | 227 | break; |
255 | } | 228 | } |
256 | 229 | ||
257 | child = find_target(request, pid, &ret); | 230 | if (request == PTRACE_TRACEME) |
258 | if (!child) | 231 | return ptrace_traceme(); |
259 | return ret; | 232 | |
233 | child = ptrace_get_task_struct(pid); | ||
234 | if (IS_ERR(child)) | ||
235 | return PTR_ERR(child); | ||
260 | 236 | ||
261 | childregs = (struct pt_regs *)(child->thread.rsp0 - sizeof(struct pt_regs)); | 237 | ret = ptrace_check_attach(child, request == PTRACE_KILL); |
238 | if (ret < 0) | ||
239 | goto out; | ||
240 | |||
241 | childregs = task_pt_regs(child); | ||
262 | 242 | ||
263 | switch (request) { | 243 | switch (request) { |
264 | case PTRACE_PEEKDATA: | 244 | case PTRACE_PEEKDATA: |
@@ -373,6 +353,7 @@ asmlinkage long sys32_ptrace(long request, u32 pid, u32 addr, u32 data) | |||
373 | break; | 353 | break; |
374 | } | 354 | } |
375 | 355 | ||
356 | out: | ||
376 | put_task_struct(child); | 357 | put_task_struct(child); |
377 | return ret; | 358 | return ret; |
378 | } | 359 | } |
diff --git a/arch/x86_64/ia32/sys_ia32.c b/arch/x86_64/ia32/sys_ia32.c index 5389df610e78..54481af5344a 100644 --- a/arch/x86_64/ia32/sys_ia32.c +++ b/arch/x86_64/ia32/sys_ia32.c | |||
@@ -969,25 +969,6 @@ long sys32_kill(int pid, int sig) | |||
969 | return sys_kill(pid, sig); | 969 | return sys_kill(pid, sig); |
970 | } | 970 | } |
971 | 971 | ||
972 | extern asmlinkage long | ||
973 | sys_timer_create(clockid_t which_clock, | ||
974 | struct sigevent __user *timer_event_spec, | ||
975 | timer_t __user * created_timer_id); | ||
976 | |||
977 | long | ||
978 | sys32_timer_create(u32 clock, struct compat_sigevent __user *se32, timer_t __user *timer_id) | ||
979 | { | ||
980 | struct sigevent __user *p = NULL; | ||
981 | if (se32) { | ||
982 | struct sigevent se; | ||
983 | p = compat_alloc_user_space(sizeof(struct sigevent)); | ||
984 | if (get_compat_sigevent(&se, se32) || | ||
985 | copy_to_user(p, &se, sizeof(se))) | ||
986 | return -EFAULT; | ||
987 | } | ||
988 | return sys_timer_create(clock, p, timer_id); | ||
989 | } | ||
990 | |||
991 | long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, | 972 | long sys32_fadvise64_64(int fd, __u32 offset_low, __u32 offset_high, |
992 | __u32 len_low, __u32 len_high, int advice) | 973 | __u32 len_low, __u32 len_high, int advice) |
993 | { | 974 | { |
diff --git a/arch/x86_64/ia32/vsyscall-sigreturn.S b/arch/x86_64/ia32/vsyscall-sigreturn.S index 8b5a4b060bb5..d90321fe9bba 100644 --- a/arch/x86_64/ia32/vsyscall-sigreturn.S +++ b/arch/x86_64/ia32/vsyscall-sigreturn.S | |||
@@ -7,6 +7,7 @@ | |||
7 | * by doing ".balign 32" must match in both versions of the page. | 7 | * by doing ".balign 32" must match in both versions of the page. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | .code32 | ||
10 | .section .text.sigreturn,"ax" | 11 | .section .text.sigreturn,"ax" |
11 | .balign 32 | 12 | .balign 32 |
12 | .globl __kernel_sigreturn | 13 | .globl __kernel_sigreturn |
diff --git a/arch/x86_64/ia32/vsyscall-syscall.S b/arch/x86_64/ia32/vsyscall-syscall.S index b024965bb689..cf9ef678de3e 100644 --- a/arch/x86_64/ia32/vsyscall-syscall.S +++ b/arch/x86_64/ia32/vsyscall-syscall.S | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | #include <asm/segment.h> | 7 | #include <asm/segment.h> |
8 | 8 | ||
9 | .code32 | ||
9 | .text | 10 | .text |
10 | .section .text.vsyscall,"ax" | 11 | .section .text.vsyscall,"ax" |
11 | .globl __kernel_vsyscall | 12 | .globl __kernel_vsyscall |
diff --git a/arch/x86_64/ia32/vsyscall-sysenter.S b/arch/x86_64/ia32/vsyscall-sysenter.S index 71f3de586b56..ae056e553d13 100644 --- a/arch/x86_64/ia32/vsyscall-sysenter.S +++ b/arch/x86_64/ia32/vsyscall-sysenter.S | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <asm/ia32_unistd.h> | 5 | #include <asm/ia32_unistd.h> |
6 | #include <asm/asm-offsets.h> | 6 | #include <asm/asm-offsets.h> |
7 | 7 | ||
8 | .code32 | ||
8 | .text | 9 | .text |
9 | .section .text.vsyscall,"ax" | 10 | .section .text.vsyscall,"ax" |
10 | .globl __kernel_vsyscall | 11 | .globl __kernel_vsyscall |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 14328cab5d3a..72fe60c20d39 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -7,10 +7,12 @@ EXTRA_AFLAGS := -traditional | |||
7 | obj-y := process.o signal.o entry.o traps.o irq.o \ | 7 | obj-y := process.o signal.o entry.o traps.o irq.o \ |
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ | 8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_x86_64.o \ |
9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ | 9 | x8664_ksyms.o i387.o syscall.o vsyscall.o \ |
10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o | 10 | setup64.o bootflag.o e820.o reboot.o quirks.o i8237.o \ |
11 | dmi_scan.o pci-dma.o pci-nommu.o | ||
11 | 12 | ||
12 | obj-$(CONFIG_X86_MCE) += mce.o | 13 | obj-$(CONFIG_X86_MCE) += mce.o |
13 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 14 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
15 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | ||
14 | obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ | 16 | obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ |
15 | obj-$(CONFIG_ACPI) += acpi/ | 17 | obj-$(CONFIG_ACPI) += acpi/ |
16 | obj-$(CONFIG_X86_MSR) += msr.o | 18 | obj-$(CONFIG_X86_MSR) += msr.o |
@@ -21,14 +23,16 @@ obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o | |||
21 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \ | 23 | obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \ |
22 | genapic.o genapic_cluster.o genapic_flat.o | 24 | genapic.o genapic_cluster.o genapic_flat.o |
23 | obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o | 25 | obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o |
26 | obj-$(CONFIG_CRASH_DUMP) += crash_dump.o | ||
24 | obj-$(CONFIG_PM) += suspend.o | 27 | obj-$(CONFIG_PM) += suspend.o |
25 | obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o | 28 | obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o |
26 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | 29 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ |
27 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 30 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
28 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o | 31 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o |
29 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o | 32 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
30 | obj-$(CONFIG_KPROBES) += kprobes.o | 33 | obj-$(CONFIG_KPROBES) += kprobes.o |
31 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | 34 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o |
35 | obj-$(CONFIG_X86_VSMP) += vsmp.o | ||
32 | 36 | ||
33 | obj-$(CONFIG_MODULES) += module.o | 37 | obj-$(CONFIG_MODULES) += module.o |
34 | 38 | ||
@@ -45,3 +49,5 @@ intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o | |||
45 | quirks-y += ../../i386/kernel/quirks.o | 49 | quirks-y += ../../i386/kernel/quirks.o |
46 | i8237-y += ../../i386/kernel/i8237.o | 50 | i8237-y += ../../i386/kernel/i8237.o |
47 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o | 51 | msr-$(subst m,y,$(CONFIG_X86_MSR)) += ../../i386/kernel/msr.o |
52 | dmi_scan-y += ../../i386/kernel/dmi_scan.o | ||
53 | |||
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 962ad4823b6a..e4e2b7d01f89 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <asm/io.h> | 23 | #include <asm/io.h> |
24 | #include <asm/proto.h> | 24 | #include <asm/proto.h> |
25 | #include <asm/pci-direct.h> | 25 | #include <asm/pci-direct.h> |
26 | #include <asm/dma.h> | ||
26 | 27 | ||
27 | int iommu_aperture; | 28 | int iommu_aperture; |
28 | int iommu_aperture_disabled __initdata = 0; | 29 | int iommu_aperture_disabled __initdata = 0; |
@@ -196,7 +197,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) | |||
196 | void __init iommu_hole_init(void) | 197 | void __init iommu_hole_init(void) |
197 | { | 198 | { |
198 | int fix, num; | 199 | int fix, num; |
199 | u32 aper_size, aper_alloc = 0, aper_order, last_aper_order = 0; | 200 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; |
200 | u64 aper_base, last_aper_base = 0; | 201 | u64 aper_base, last_aper_base = 0; |
201 | int valid_agp = 0; | 202 | int valid_agp = 0; |
202 | 203 | ||
@@ -247,7 +248,7 @@ void __init iommu_hole_init(void) | |||
247 | /* Got the aperture from the AGP bridge */ | 248 | /* Got the aperture from the AGP bridge */ |
248 | } else if (swiotlb && !valid_agp) { | 249 | } else if (swiotlb && !valid_agp) { |
249 | /* Do nothing */ | 250 | /* Do nothing */ |
250 | } else if ((!no_iommu && end_pfn >= 0xffffffff>>PAGE_SHIFT) || | 251 | } else if ((!no_iommu && end_pfn >= MAX_DMA32_PFN) || |
251 | force_iommu || | 252 | force_iommu || |
252 | valid_agp || | 253 | valid_agp || |
253 | fallback_aper_force) { | 254 | fallback_aper_force) { |
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index b6e7715d877f..5d3c5b07b8db 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include <linux/mc146818rtc.h> | 25 | #include <linux/mc146818rtc.h> |
26 | #include <linux/kernel_stat.h> | 26 | #include <linux/kernel_stat.h> |
27 | #include <linux/sysdev.h> | 27 | #include <linux/sysdev.h> |
28 | #include <linux/module.h> | ||
28 | 29 | ||
29 | #include <asm/atomic.h> | 30 | #include <asm/atomic.h> |
30 | #include <asm/smp.h> | 31 | #include <asm/smp.h> |
@@ -33,36 +34,36 @@ | |||
33 | #include <asm/pgalloc.h> | 34 | #include <asm/pgalloc.h> |
34 | #include <asm/mach_apic.h> | 35 | #include <asm/mach_apic.h> |
35 | #include <asm/nmi.h> | 36 | #include <asm/nmi.h> |
37 | #include <asm/idle.h> | ||
36 | 38 | ||
37 | int apic_verbosity; | 39 | int apic_verbosity; |
38 | 40 | ||
39 | int disable_apic_timer __initdata; | 41 | int disable_apic_timer __initdata; |
40 | 42 | ||
43 | /* | ||
44 | * cpu_mask that denotes the CPUs that needs timer interrupt coming in as | ||
45 | * IPIs in place of local APIC timers | ||
46 | */ | ||
47 | static cpumask_t timer_interrupt_broadcast_ipi_mask; | ||
48 | |||
41 | /* Using APIC to generate smp_local_timer_interrupt? */ | 49 | /* Using APIC to generate smp_local_timer_interrupt? */ |
42 | int using_apic_timer = 0; | 50 | int using_apic_timer = 0; |
43 | 51 | ||
44 | static DEFINE_PER_CPU(int, prof_multiplier) = 1; | ||
45 | static DEFINE_PER_CPU(int, prof_old_multiplier) = 1; | ||
46 | static DEFINE_PER_CPU(int, prof_counter) = 1; | ||
47 | |||
48 | static void apic_pm_activate(void); | 52 | static void apic_pm_activate(void); |
49 | 53 | ||
50 | void enable_NMI_through_LVT0 (void * dummy) | 54 | void enable_NMI_through_LVT0 (void * dummy) |
51 | { | 55 | { |
52 | unsigned int v, ver; | 56 | unsigned int v; |
53 | 57 | ||
54 | ver = apic_read(APIC_LVR); | ||
55 | ver = GET_APIC_VERSION(ver); | ||
56 | v = APIC_DM_NMI; /* unmask and set to NMI */ | 58 | v = APIC_DM_NMI; /* unmask and set to NMI */ |
57 | apic_write_around(APIC_LVT0, v); | 59 | apic_write(APIC_LVT0, v); |
58 | } | 60 | } |
59 | 61 | ||
60 | int get_maxlvt(void) | 62 | int get_maxlvt(void) |
61 | { | 63 | { |
62 | unsigned int v, ver, maxlvt; | 64 | unsigned int v, maxlvt; |
63 | 65 | ||
64 | v = apic_read(APIC_LVR); | 66 | v = apic_read(APIC_LVR); |
65 | ver = GET_APIC_VERSION(v); | ||
66 | maxlvt = GET_APIC_MAXLVT(v); | 67 | maxlvt = GET_APIC_MAXLVT(v); |
67 | return maxlvt; | 68 | return maxlvt; |
68 | } | 69 | } |
@@ -80,33 +81,33 @@ void clear_local_APIC(void) | |||
80 | */ | 81 | */ |
81 | if (maxlvt >= 3) { | 82 | if (maxlvt >= 3) { |
82 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ | 83 | v = ERROR_APIC_VECTOR; /* any non-zero vector will do */ |
83 | apic_write_around(APIC_LVTERR, v | APIC_LVT_MASKED); | 84 | apic_write(APIC_LVTERR, v | APIC_LVT_MASKED); |
84 | } | 85 | } |
85 | /* | 86 | /* |
86 | * Careful: we have to set masks only first to deassert | 87 | * Careful: we have to set masks only first to deassert |
87 | * any level-triggered sources. | 88 | * any level-triggered sources. |
88 | */ | 89 | */ |
89 | v = apic_read(APIC_LVTT); | 90 | v = apic_read(APIC_LVTT); |
90 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | 91 | apic_write(APIC_LVTT, v | APIC_LVT_MASKED); |
91 | v = apic_read(APIC_LVT0); | 92 | v = apic_read(APIC_LVT0); |
92 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | 93 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
93 | v = apic_read(APIC_LVT1); | 94 | v = apic_read(APIC_LVT1); |
94 | apic_write_around(APIC_LVT1, v | APIC_LVT_MASKED); | 95 | apic_write(APIC_LVT1, v | APIC_LVT_MASKED); |
95 | if (maxlvt >= 4) { | 96 | if (maxlvt >= 4) { |
96 | v = apic_read(APIC_LVTPC); | 97 | v = apic_read(APIC_LVTPC); |
97 | apic_write_around(APIC_LVTPC, v | APIC_LVT_MASKED); | 98 | apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); |
98 | } | 99 | } |
99 | 100 | ||
100 | /* | 101 | /* |
101 | * Clean APIC state for other OSs: | 102 | * Clean APIC state for other OSs: |
102 | */ | 103 | */ |
103 | apic_write_around(APIC_LVTT, APIC_LVT_MASKED); | 104 | apic_write(APIC_LVTT, APIC_LVT_MASKED); |
104 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | 105 | apic_write(APIC_LVT0, APIC_LVT_MASKED); |
105 | apic_write_around(APIC_LVT1, APIC_LVT_MASKED); | 106 | apic_write(APIC_LVT1, APIC_LVT_MASKED); |
106 | if (maxlvt >= 3) | 107 | if (maxlvt >= 3) |
107 | apic_write_around(APIC_LVTERR, APIC_LVT_MASKED); | 108 | apic_write(APIC_LVTERR, APIC_LVT_MASKED); |
108 | if (maxlvt >= 4) | 109 | if (maxlvt >= 4) |
109 | apic_write_around(APIC_LVTPC, APIC_LVT_MASKED); | 110 | apic_write(APIC_LVTPC, APIC_LVT_MASKED); |
110 | v = GET_APIC_VERSION(apic_read(APIC_LVR)); | 111 | v = GET_APIC_VERSION(apic_read(APIC_LVR)); |
111 | apic_write(APIC_ESR, 0); | 112 | apic_write(APIC_ESR, 0); |
112 | apic_read(APIC_ESR); | 113 | apic_read(APIC_ESR); |
@@ -151,7 +152,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
151 | value &= ~APIC_VECTOR_MASK; | 152 | value &= ~APIC_VECTOR_MASK; |
152 | value |= APIC_SPIV_APIC_ENABLED; | 153 | value |= APIC_SPIV_APIC_ENABLED; |
153 | value |= 0xf; | 154 | value |= 0xf; |
154 | apic_write_around(APIC_SPIV, value); | 155 | apic_write(APIC_SPIV, value); |
155 | 156 | ||
156 | if (!virt_wire_setup) { | 157 | if (!virt_wire_setup) { |
157 | /* For LVT0 make it edge triggered, active high, external and enabled */ | 158 | /* For LVT0 make it edge triggered, active high, external and enabled */ |
@@ -161,11 +162,11 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
161 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); | 162 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED ); |
162 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 163 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
163 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); | 164 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT); |
164 | apic_write_around(APIC_LVT0, value); | 165 | apic_write(APIC_LVT0, value); |
165 | } | 166 | } |
166 | else { | 167 | else { |
167 | /* Disable LVT0 */ | 168 | /* Disable LVT0 */ |
168 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED); | 169 | apic_write(APIC_LVT0, APIC_LVT_MASKED); |
169 | } | 170 | } |
170 | 171 | ||
171 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ | 172 | /* For LVT1 make it edge triggered, active high, nmi and enabled */ |
@@ -176,7 +177,7 @@ void disconnect_bsp_APIC(int virt_wire_setup) | |||
176 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); | 177 | APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); |
177 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; | 178 | value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; |
178 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); | 179 | value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); |
179 | apic_write_around(APIC_LVT1, value); | 180 | apic_write(APIC_LVT1, value); |
180 | } | 181 | } |
181 | } | 182 | } |
182 | 183 | ||
@@ -192,7 +193,7 @@ void disable_local_APIC(void) | |||
192 | */ | 193 | */ |
193 | value = apic_read(APIC_SPIV); | 194 | value = apic_read(APIC_SPIV); |
194 | value &= ~APIC_SPIV_APIC_ENABLED; | 195 | value &= ~APIC_SPIV_APIC_ENABLED; |
195 | apic_write_around(APIC_SPIV, value); | 196 | apic_write(APIC_SPIV, value); |
196 | } | 197 | } |
197 | 198 | ||
198 | /* | 199 | /* |
@@ -269,7 +270,7 @@ void __init sync_Arb_IDs(void) | |||
269 | apic_wait_icr_idle(); | 270 | apic_wait_icr_idle(); |
270 | 271 | ||
271 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); | 272 | apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); |
272 | apic_write_around(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | 273 | apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG |
273 | | APIC_DM_INIT); | 274 | | APIC_DM_INIT); |
274 | } | 275 | } |
275 | 276 | ||
@@ -280,7 +281,7 @@ extern void __error_in_apic_c (void); | |||
280 | */ | 281 | */ |
281 | void __init init_bsp_APIC(void) | 282 | void __init init_bsp_APIC(void) |
282 | { | 283 | { |
283 | unsigned int value, ver; | 284 | unsigned int value; |
284 | 285 | ||
285 | /* | 286 | /* |
286 | * Don't do the setup now if we have a SMP BIOS as the | 287 | * Don't do the setup now if we have a SMP BIOS as the |
@@ -290,7 +291,6 @@ void __init init_bsp_APIC(void) | |||
290 | return; | 291 | return; |
291 | 292 | ||
292 | value = apic_read(APIC_LVR); | 293 | value = apic_read(APIC_LVR); |
293 | ver = GET_APIC_VERSION(value); | ||
294 | 294 | ||
295 | /* | 295 | /* |
296 | * Do not trust the local APIC being empty at bootup. | 296 | * Do not trust the local APIC being empty at bootup. |
@@ -305,22 +305,21 @@ void __init init_bsp_APIC(void) | |||
305 | value |= APIC_SPIV_APIC_ENABLED; | 305 | value |= APIC_SPIV_APIC_ENABLED; |
306 | value |= APIC_SPIV_FOCUS_DISABLED; | 306 | value |= APIC_SPIV_FOCUS_DISABLED; |
307 | value |= SPURIOUS_APIC_VECTOR; | 307 | value |= SPURIOUS_APIC_VECTOR; |
308 | apic_write_around(APIC_SPIV, value); | 308 | apic_write(APIC_SPIV, value); |
309 | 309 | ||
310 | /* | 310 | /* |
311 | * Set up the virtual wire mode. | 311 | * Set up the virtual wire mode. |
312 | */ | 312 | */ |
313 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 313 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
314 | value = APIC_DM_NMI; | 314 | value = APIC_DM_NMI; |
315 | apic_write_around(APIC_LVT1, value); | 315 | apic_write(APIC_LVT1, value); |
316 | } | 316 | } |
317 | 317 | ||
318 | void __cpuinit setup_local_APIC (void) | 318 | void __cpuinit setup_local_APIC (void) |
319 | { | 319 | { |
320 | unsigned int value, ver, maxlvt; | 320 | unsigned int value, maxlvt; |
321 | 321 | ||
322 | value = apic_read(APIC_LVR); | 322 | value = apic_read(APIC_LVR); |
323 | ver = GET_APIC_VERSION(value); | ||
324 | 323 | ||
325 | if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) | 324 | if ((SPURIOUS_APIC_VECTOR & 0x0f) != 0x0f) |
326 | __error_in_apic_c(); | 325 | __error_in_apic_c(); |
@@ -345,7 +344,7 @@ void __cpuinit setup_local_APIC (void) | |||
345 | */ | 344 | */ |
346 | value = apic_read(APIC_TASKPRI); | 345 | value = apic_read(APIC_TASKPRI); |
347 | value &= ~APIC_TPRI_MASK; | 346 | value &= ~APIC_TPRI_MASK; |
348 | apic_write_around(APIC_TASKPRI, value); | 347 | apic_write(APIC_TASKPRI, value); |
349 | 348 | ||
350 | /* | 349 | /* |
351 | * Now that we are all set up, enable the APIC | 350 | * Now that we are all set up, enable the APIC |
@@ -387,7 +386,7 @@ void __cpuinit setup_local_APIC (void) | |||
387 | * Set spurious IRQ vector | 386 | * Set spurious IRQ vector |
388 | */ | 387 | */ |
389 | value |= SPURIOUS_APIC_VECTOR; | 388 | value |= SPURIOUS_APIC_VECTOR; |
390 | apic_write_around(APIC_SPIV, value); | 389 | apic_write(APIC_SPIV, value); |
391 | 390 | ||
392 | /* | 391 | /* |
393 | * Set up LVT0, LVT1: | 392 | * Set up LVT0, LVT1: |
@@ -407,7 +406,7 @@ void __cpuinit setup_local_APIC (void) | |||
407 | value = APIC_DM_EXTINT | APIC_LVT_MASKED; | 406 | value = APIC_DM_EXTINT | APIC_LVT_MASKED; |
408 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", smp_processor_id()); | 407 | apic_printk(APIC_VERBOSE, "masked ExtINT on CPU#%d\n", smp_processor_id()); |
409 | } | 408 | } |
410 | apic_write_around(APIC_LVT0, value); | 409 | apic_write(APIC_LVT0, value); |
411 | 410 | ||
412 | /* | 411 | /* |
413 | * only the BP should see the LINT1 NMI signal, obviously. | 412 | * only the BP should see the LINT1 NMI signal, obviously. |
@@ -416,14 +415,14 @@ void __cpuinit setup_local_APIC (void) | |||
416 | value = APIC_DM_NMI; | 415 | value = APIC_DM_NMI; |
417 | else | 416 | else |
418 | value = APIC_DM_NMI | APIC_LVT_MASKED; | 417 | value = APIC_DM_NMI | APIC_LVT_MASKED; |
419 | apic_write_around(APIC_LVT1, value); | 418 | apic_write(APIC_LVT1, value); |
420 | 419 | ||
421 | { | 420 | { |
422 | unsigned oldvalue; | 421 | unsigned oldvalue; |
423 | maxlvt = get_maxlvt(); | 422 | maxlvt = get_maxlvt(); |
424 | oldvalue = apic_read(APIC_ESR); | 423 | oldvalue = apic_read(APIC_ESR); |
425 | value = ERROR_APIC_VECTOR; // enables sending errors | 424 | value = ERROR_APIC_VECTOR; // enables sending errors |
426 | apic_write_around(APIC_LVTERR, value); | 425 | apic_write(APIC_LVTERR, value); |
427 | /* | 426 | /* |
428 | * spec says clear errors after enabling vector. | 427 | * spec says clear errors after enabling vector. |
429 | */ | 428 | */ |
@@ -500,13 +499,10 @@ static int lapic_resume(struct sys_device *dev) | |||
500 | if (!apic_pm_state.active) | 499 | if (!apic_pm_state.active) |
501 | return 0; | 500 | return 0; |
502 | 501 | ||
503 | /* XXX: Pavel needs this for S3 resume, but can't explain why */ | ||
504 | set_fixmap_nocache(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); | ||
505 | |||
506 | local_irq_save(flags); | 502 | local_irq_save(flags); |
507 | rdmsr(MSR_IA32_APICBASE, l, h); | 503 | rdmsr(MSR_IA32_APICBASE, l, h); |
508 | l &= ~MSR_IA32_APICBASE_BASE; | 504 | l &= ~MSR_IA32_APICBASE_BASE; |
509 | l |= MSR_IA32_APICBASE_ENABLE | APIC_DEFAULT_PHYS_BASE; | 505 | l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; |
510 | wrmsr(MSR_IA32_APICBASE, l, h); | 506 | wrmsr(MSR_IA32_APICBASE, l, h); |
511 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); | 507 | apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); |
512 | apic_write(APIC_ID, apic_pm_state.apic_id); | 508 | apic_write(APIC_ID, apic_pm_state.apic_id); |
@@ -660,20 +656,25 @@ void __init init_apic_mappings(void) | |||
660 | static void __setup_APIC_LVTT(unsigned int clocks) | 656 | static void __setup_APIC_LVTT(unsigned int clocks) |
661 | { | 657 | { |
662 | unsigned int lvtt_value, tmp_value, ver; | 658 | unsigned int lvtt_value, tmp_value, ver; |
659 | int cpu = smp_processor_id(); | ||
663 | 660 | ||
664 | ver = GET_APIC_VERSION(apic_read(APIC_LVR)); | 661 | ver = GET_APIC_VERSION(apic_read(APIC_LVR)); |
665 | lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; | 662 | lvtt_value = APIC_LVT_TIMER_PERIODIC | LOCAL_TIMER_VECTOR; |
666 | apic_write_around(APIC_LVTT, lvtt_value); | 663 | |
664 | if (cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) | ||
665 | lvtt_value |= APIC_LVT_MASKED; | ||
666 | |||
667 | apic_write(APIC_LVTT, lvtt_value); | ||
667 | 668 | ||
668 | /* | 669 | /* |
669 | * Divide PICLK by 16 | 670 | * Divide PICLK by 16 |
670 | */ | 671 | */ |
671 | tmp_value = apic_read(APIC_TDCR); | 672 | tmp_value = apic_read(APIC_TDCR); |
672 | apic_write_around(APIC_TDCR, (tmp_value | 673 | apic_write(APIC_TDCR, (tmp_value |
673 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 674 | & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
674 | | APIC_TDR_DIV_16); | 675 | | APIC_TDR_DIV_16); |
675 | 676 | ||
676 | apic_write_around(APIC_TMICT, clocks/APIC_DIVISOR); | 677 | apic_write(APIC_TMICT, clocks/APIC_DIVISOR); |
677 | } | 678 | } |
678 | 679 | ||
679 | static void setup_APIC_timer(unsigned int clocks) | 680 | static void setup_APIC_timer(unsigned int clocks) |
@@ -682,12 +683,6 @@ static void setup_APIC_timer(unsigned int clocks) | |||
682 | 683 | ||
683 | local_irq_save(flags); | 684 | local_irq_save(flags); |
684 | 685 | ||
685 | /* For some reasons this doesn't work on Simics, so fake it for now */ | ||
686 | if (!strstr(boot_cpu_data.x86_model_id, "Screwdriver")) { | ||
687 | __setup_APIC_LVTT(clocks); | ||
688 | return; | ||
689 | } | ||
690 | |||
691 | /* wait for irq slice */ | 686 | /* wait for irq slice */ |
692 | if (vxtime.hpet_address) { | 687 | if (vxtime.hpet_address) { |
693 | int trigger = hpet_readl(HPET_T0_CMP); | 688 | int trigger = hpet_readl(HPET_T0_CMP); |
@@ -700,7 +695,7 @@ static void setup_APIC_timer(unsigned int clocks) | |||
700 | outb_p(0x00, 0x43); | 695 | outb_p(0x00, 0x43); |
701 | c2 = inb_p(0x40); | 696 | c2 = inb_p(0x40); |
702 | c2 |= inb_p(0x40) << 8; | 697 | c2 |= inb_p(0x40) << 8; |
703 | do { | 698 | do { |
704 | c1 = c2; | 699 | c1 = c2; |
705 | outb_p(0x00, 0x43); | 700 | outb_p(0x00, 0x43); |
706 | c2 = inb_p(0x40); | 701 | c2 = inb_p(0x40); |
@@ -785,54 +780,80 @@ void __cpuinit setup_secondary_APIC_clock(void) | |||
785 | local_irq_enable(); | 780 | local_irq_enable(); |
786 | } | 781 | } |
787 | 782 | ||
788 | void __cpuinit disable_APIC_timer(void) | 783 | void disable_APIC_timer(void) |
789 | { | 784 | { |
790 | if (using_apic_timer) { | 785 | if (using_apic_timer) { |
791 | unsigned long v; | 786 | unsigned long v; |
792 | 787 | ||
793 | v = apic_read(APIC_LVTT); | 788 | v = apic_read(APIC_LVTT); |
794 | apic_write_around(APIC_LVTT, v | APIC_LVT_MASKED); | 789 | apic_write(APIC_LVTT, v | APIC_LVT_MASKED); |
795 | } | 790 | } |
796 | } | 791 | } |
797 | 792 | ||
798 | void enable_APIC_timer(void) | 793 | void enable_APIC_timer(void) |
799 | { | 794 | { |
800 | if (using_apic_timer) { | 795 | int cpu = smp_processor_id(); |
796 | |||
797 | if (using_apic_timer && | ||
798 | !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { | ||
801 | unsigned long v; | 799 | unsigned long v; |
802 | 800 | ||
803 | v = apic_read(APIC_LVTT); | 801 | v = apic_read(APIC_LVTT); |
804 | apic_write_around(APIC_LVTT, v & ~APIC_LVT_MASKED); | 802 | apic_write(APIC_LVTT, v & ~APIC_LVT_MASKED); |
805 | } | 803 | } |
806 | } | 804 | } |
807 | 805 | ||
808 | /* | 806 | void switch_APIC_timer_to_ipi(void *cpumask) |
809 | * the frequency of the profiling timer can be changed | ||
810 | * by writing a multiplier value into /proc/profile. | ||
811 | */ | ||
812 | int setup_profiling_timer(unsigned int multiplier) | ||
813 | { | 807 | { |
814 | int i; | 808 | cpumask_t mask = *(cpumask_t *)cpumask; |
809 | int cpu = smp_processor_id(); | ||
815 | 810 | ||
816 | /* | 811 | if (cpu_isset(cpu, mask) && |
817 | * Sanity check. [at least 500 APIC cycles should be | 812 | !cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { |
818 | * between APIC interrupts as a rule of thumb, to avoid | 813 | disable_APIC_timer(); |
819 | * irqs flooding us] | 814 | cpu_set(cpu, timer_interrupt_broadcast_ipi_mask); |
820 | */ | 815 | } |
821 | if ( (!multiplier) || (calibration_result/multiplier < 500)) | 816 | } |
822 | return -EINVAL; | 817 | EXPORT_SYMBOL(switch_APIC_timer_to_ipi); |
823 | |||
824 | /* | ||
825 | * Set the new multiplier for each CPU. CPUs don't start using the | ||
826 | * new values until the next timer interrupt in which they do process | ||
827 | * accounting. At that time they also adjust their APIC timers | ||
828 | * accordingly. | ||
829 | */ | ||
830 | for (i = 0; i < NR_CPUS; ++i) | ||
831 | per_cpu(prof_multiplier, i) = multiplier; | ||
832 | 818 | ||
833 | return 0; | 819 | void smp_send_timer_broadcast_ipi(void) |
820 | { | ||
821 | cpumask_t mask; | ||
822 | |||
823 | cpus_and(mask, cpu_online_map, timer_interrupt_broadcast_ipi_mask); | ||
824 | if (!cpus_empty(mask)) { | ||
825 | send_IPI_mask(mask, LOCAL_TIMER_VECTOR); | ||
826 | } | ||
827 | } | ||
828 | |||
829 | void switch_ipi_to_APIC_timer(void *cpumask) | ||
830 | { | ||
831 | cpumask_t mask = *(cpumask_t *)cpumask; | ||
832 | int cpu = smp_processor_id(); | ||
833 | |||
834 | if (cpu_isset(cpu, mask) && | ||
835 | cpu_isset(cpu, timer_interrupt_broadcast_ipi_mask)) { | ||
836 | cpu_clear(cpu, timer_interrupt_broadcast_ipi_mask); | ||
837 | enable_APIC_timer(); | ||
838 | } | ||
839 | } | ||
840 | EXPORT_SYMBOL(switch_ipi_to_APIC_timer); | ||
841 | |||
842 | int setup_profiling_timer(unsigned int multiplier) | ||
843 | { | ||
844 | return -EINVAL; | ||
834 | } | 845 | } |
835 | 846 | ||
847 | #ifdef CONFIG_X86_MCE_AMD | ||
848 | void setup_threshold_lvt(unsigned long lvt_off) | ||
849 | { | ||
850 | unsigned int v = 0; | ||
851 | unsigned long reg = (lvt_off << 4) + 0x500; | ||
852 | v |= THRESHOLD_APIC_VECTOR; | ||
853 | apic_write(reg, v); | ||
854 | } | ||
855 | #endif /* CONFIG_X86_MCE_AMD */ | ||
856 | |||
836 | #undef APIC_DIVISOR | 857 | #undef APIC_DIVISOR |
837 | 858 | ||
838 | /* | 859 | /* |
@@ -847,32 +868,10 @@ int setup_profiling_timer(unsigned int multiplier) | |||
847 | 868 | ||
848 | void smp_local_timer_interrupt(struct pt_regs *regs) | 869 | void smp_local_timer_interrupt(struct pt_regs *regs) |
849 | { | 870 | { |
850 | int cpu = smp_processor_id(); | ||
851 | |||
852 | profile_tick(CPU_PROFILING, regs); | 871 | profile_tick(CPU_PROFILING, regs); |
853 | if (--per_cpu(prof_counter, cpu) <= 0) { | ||
854 | /* | ||
855 | * The multiplier may have changed since the last time we got | ||
856 | * to this point as a result of the user writing to | ||
857 | * /proc/profile. In this case we need to adjust the APIC | ||
858 | * timer accordingly. | ||
859 | * | ||
860 | * Interrupts are already masked off at this point. | ||
861 | */ | ||
862 | per_cpu(prof_counter, cpu) = per_cpu(prof_multiplier, cpu); | ||
863 | if (per_cpu(prof_counter, cpu) != | ||
864 | per_cpu(prof_old_multiplier, cpu)) { | ||
865 | __setup_APIC_LVTT(calibration_result/ | ||
866 | per_cpu(prof_counter, cpu)); | ||
867 | per_cpu(prof_old_multiplier, cpu) = | ||
868 | per_cpu(prof_counter, cpu); | ||
869 | } | ||
870 | |||
871 | #ifdef CONFIG_SMP | 872 | #ifdef CONFIG_SMP |
872 | update_process_times(user_mode(regs)); | 873 | update_process_times(user_mode(regs)); |
873 | #endif | 874 | #endif |
874 | } | ||
875 | |||
876 | /* | 875 | /* |
877 | * We take the 'long' return path, and there every subsystem | 876 | * We take the 'long' return path, and there every subsystem |
878 | * grabs the appropriate locks (kernel lock/ irq lock). | 877 | * grabs the appropriate locks (kernel lock/ irq lock). |
@@ -910,6 +909,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs) | |||
910 | * Besides, if we don't timer interrupts ignore the global | 909 | * Besides, if we don't timer interrupts ignore the global |
911 | * interrupt lock, which is the WrongThing (tm) to do. | 910 | * interrupt lock, which is the WrongThing (tm) to do. |
912 | */ | 911 | */ |
912 | exit_idle(); | ||
913 | irq_enter(); | 913 | irq_enter(); |
914 | smp_local_timer_interrupt(regs); | 914 | smp_local_timer_interrupt(regs); |
915 | irq_exit(); | 915 | irq_exit(); |
@@ -969,6 +969,7 @@ __init int oem_force_hpet_timer(void) | |||
969 | asmlinkage void smp_spurious_interrupt(void) | 969 | asmlinkage void smp_spurious_interrupt(void) |
970 | { | 970 | { |
971 | unsigned int v; | 971 | unsigned int v; |
972 | exit_idle(); | ||
972 | irq_enter(); | 973 | irq_enter(); |
973 | /* | 974 | /* |
974 | * Check if this really is a spurious interrupt and ACK it | 975 | * Check if this really is a spurious interrupt and ACK it |
@@ -1004,6 +1005,7 @@ asmlinkage void smp_error_interrupt(void) | |||
1004 | { | 1005 | { |
1005 | unsigned int v, v1; | 1006 | unsigned int v, v1; |
1006 | 1007 | ||
1008 | exit_idle(); | ||
1007 | irq_enter(); | 1009 | irq_enter(); |
1008 | /* First tickle the hardware, only then report what went on. -- REW */ | 1010 | /* First tickle the hardware, only then report what went on. -- REW */ |
1009 | v = apic_read(APIC_ESR); | 1011 | v = apic_read(APIC_ESR); |
@@ -1050,7 +1052,7 @@ int __init APIC_init_uniprocessor (void) | |||
1050 | connect_bsp_APIC(); | 1052 | connect_bsp_APIC(); |
1051 | 1053 | ||
1052 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); | 1054 | phys_cpu_present_map = physid_mask_of_physid(boot_cpu_id); |
1053 | apic_write_around(APIC_ID, boot_cpu_id); | 1055 | apic_write(APIC_ID, SET_APIC_ID(boot_cpu_id)); |
1054 | 1056 | ||
1055 | setup_local_APIC(); | 1057 | setup_local_APIC(); |
1056 | 1058 | ||
diff --git a/arch/x86_64/kernel/asm-offsets.c b/arch/x86_64/kernel/asm-offsets.c index aaa6d3833517..38834bbbae11 100644 --- a/arch/x86_64/kernel/asm-offsets.c +++ b/arch/x86_64/kernel/asm-offsets.c | |||
@@ -33,6 +33,7 @@ int main(void) | |||
33 | ENTRY(flags); | 33 | ENTRY(flags); |
34 | ENTRY(addr_limit); | 34 | ENTRY(addr_limit); |
35 | ENTRY(preempt_count); | 35 | ENTRY(preempt_count); |
36 | ENTRY(status); | ||
36 | BLANK(); | 37 | BLANK(); |
37 | #undef ENTRY | 38 | #undef ENTRY |
38 | #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) | 39 | #define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) |
@@ -42,6 +43,7 @@ int main(void) | |||
42 | ENTRY(irqcount); | 43 | ENTRY(irqcount); |
43 | ENTRY(cpunumber); | 44 | ENTRY(cpunumber); |
44 | ENTRY(irqstackptr); | 45 | ENTRY(irqstackptr); |
46 | ENTRY(data_offset); | ||
45 | BLANK(); | 47 | BLANK(); |
46 | #undef ENTRY | 48 | #undef ENTRY |
47 | #ifdef CONFIG_IA32_EMULATION | 49 | #ifdef CONFIG_IA32_EMULATION |
@@ -64,5 +66,7 @@ int main(void) | |||
64 | DEFINE(pbe_address, offsetof(struct pbe, address)); | 66 | DEFINE(pbe_address, offsetof(struct pbe, address)); |
65 | DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); | 67 | DEFINE(pbe_orig_address, offsetof(struct pbe, orig_address)); |
66 | DEFINE(pbe_next, offsetof(struct pbe, next)); | 68 | DEFINE(pbe_next, offsetof(struct pbe, next)); |
69 | BLANK(); | ||
70 | DEFINE(TSS_ist, offsetof(struct tss_struct, ist)); | ||
67 | return 0; | 71 | return 0; |
68 | } | 72 | } |
diff --git a/arch/x86_64/kernel/crash.c b/arch/x86_64/kernel/crash.c index 535e04466079..4e6c3b729e39 100644 --- a/arch/x86_64/kernel/crash.c +++ b/arch/x86_64/kernel/crash.c | |||
@@ -11,19 +11,156 @@ | |||
11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/smp.h> | 13 | #include <linux/smp.h> |
14 | #include <linux/irq.h> | ||
14 | #include <linux/reboot.h> | 15 | #include <linux/reboot.h> |
15 | #include <linux/kexec.h> | 16 | #include <linux/kexec.h> |
17 | #include <linux/delay.h> | ||
18 | #include <linux/elf.h> | ||
19 | #include <linux/elfcore.h> | ||
16 | 20 | ||
17 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
18 | #include <asm/hardirq.h> | 22 | #include <asm/hardirq.h> |
19 | #include <asm/nmi.h> | 23 | #include <asm/nmi.h> |
20 | #include <asm/hw_irq.h> | 24 | #include <asm/hw_irq.h> |
25 | #include <asm/mach_apic.h> | ||
21 | 26 | ||
22 | note_buf_t crash_notes[NR_CPUS]; | 27 | /* This keeps a track of which one is crashing cpu. */ |
28 | static int crashing_cpu; | ||
29 | |||
30 | static u32 *append_elf_note(u32 *buf, char *name, unsigned type, | ||
31 | void *data, size_t data_len) | ||
32 | { | ||
33 | struct elf_note note; | ||
34 | |||
35 | note.n_namesz = strlen(name) + 1; | ||
36 | note.n_descsz = data_len; | ||
37 | note.n_type = type; | ||
38 | memcpy(buf, ¬e, sizeof(note)); | ||
39 | buf += (sizeof(note) +3)/4; | ||
40 | memcpy(buf, name, note.n_namesz); | ||
41 | buf += (note.n_namesz + 3)/4; | ||
42 | memcpy(buf, data, note.n_descsz); | ||
43 | buf += (note.n_descsz + 3)/4; | ||
44 | |||
45 | return buf; | ||
46 | } | ||
47 | |||
48 | static void final_note(u32 *buf) | ||
49 | { | ||
50 | struct elf_note note; | ||
51 | |||
52 | note.n_namesz = 0; | ||
53 | note.n_descsz = 0; | ||
54 | note.n_type = 0; | ||
55 | memcpy(buf, ¬e, sizeof(note)); | ||
56 | } | ||
57 | |||
58 | static void crash_save_this_cpu(struct pt_regs *regs, int cpu) | ||
59 | { | ||
60 | struct elf_prstatus prstatus; | ||
61 | u32 *buf; | ||
62 | |||
63 | if ((cpu < 0) || (cpu >= NR_CPUS)) | ||
64 | return; | ||
65 | |||
66 | /* Using ELF notes here is opportunistic. | ||
67 | * I need a well defined structure format | ||
68 | * for the data I pass, and I need tags | ||
69 | * on the data to indicate what information I have | ||
70 | * squirrelled away. ELF notes happen to provide | ||
71 | * all of that that no need to invent something new. | ||
72 | */ | ||
73 | |||
74 | buf = (u32*)per_cpu_ptr(crash_notes, cpu); | ||
75 | |||
76 | if (!buf) | ||
77 | return; | ||
78 | |||
79 | memset(&prstatus, 0, sizeof(prstatus)); | ||
80 | prstatus.pr_pid = current->pid; | ||
81 | elf_core_copy_regs(&prstatus.pr_reg, regs); | ||
82 | buf = append_elf_note(buf, "CORE", NT_PRSTATUS, &prstatus, | ||
83 | sizeof(prstatus)); | ||
84 | final_note(buf); | ||
85 | } | ||
86 | |||
87 | static void crash_save_self(struct pt_regs *regs) | ||
88 | { | ||
89 | int cpu; | ||
90 | |||
91 | cpu = smp_processor_id(); | ||
92 | crash_save_this_cpu(regs, cpu); | ||
93 | } | ||
94 | |||
95 | #ifdef CONFIG_SMP | ||
96 | static atomic_t waiting_for_crash_ipi; | ||
97 | |||
98 | static int crash_nmi_callback(struct pt_regs *regs, int cpu) | ||
99 | { | ||
100 | /* | ||
101 | * Don't do anything if this handler is invoked on crashing cpu. | ||
102 | * Otherwise, system will completely hang. Crashing cpu can get | ||
103 | * an NMI if system was initially booted with nmi_watchdog parameter. | ||
104 | */ | ||
105 | if (cpu == crashing_cpu) | ||
106 | return 1; | ||
107 | local_irq_disable(); | ||
108 | |||
109 | crash_save_this_cpu(regs, cpu); | ||
110 | disable_local_APIC(); | ||
111 | atomic_dec(&waiting_for_crash_ipi); | ||
112 | /* Assume hlt works */ | ||
113 | for(;;) | ||
114 | asm("hlt"); | ||
115 | |||
116 | return 1; | ||
117 | } | ||
118 | |||
119 | static void smp_send_nmi_allbutself(void) | ||
120 | { | ||
121 | send_IPI_allbutself(APIC_DM_NMI); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * This code is a best effort heuristic to get the | ||
126 | * other cpus to stop executing. So races with | ||
127 | * cpu hotplug shouldn't matter. | ||
128 | */ | ||
129 | |||
130 | static void nmi_shootdown_cpus(void) | ||
131 | { | ||
132 | unsigned long msecs; | ||
133 | |||
134 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | ||
135 | set_nmi_callback(crash_nmi_callback); | ||
136 | |||
137 | /* | ||
138 | * Ensure the new callback function is set before sending | ||
139 | * out the NMI | ||
140 | */ | ||
141 | wmb(); | ||
142 | |||
143 | smp_send_nmi_allbutself(); | ||
144 | |||
145 | msecs = 1000; /* Wait at most a second for the other cpus to stop */ | ||
146 | while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { | ||
147 | mdelay(1); | ||
148 | msecs--; | ||
149 | } | ||
150 | /* Leave the nmi callback set */ | ||
151 | disable_local_APIC(); | ||
152 | } | ||
153 | #else | ||
154 | static void nmi_shootdown_cpus(void) | ||
155 | { | ||
156 | /* There are no cpus to shootdown */ | ||
157 | } | ||
158 | #endif | ||
23 | 159 | ||
24 | void machine_crash_shutdown(struct pt_regs *regs) | 160 | void machine_crash_shutdown(struct pt_regs *regs) |
25 | { | 161 | { |
26 | /* This function is only called after the system | 162 | /* |
163 | * This function is only called after the system | ||
27 | * has paniced or is otherwise in a critical state. | 164 | * has paniced or is otherwise in a critical state. |
28 | * The minimum amount of code to allow a kexec'd kernel | 165 | * The minimum amount of code to allow a kexec'd kernel |
29 | * to run successfully needs to happen here. | 166 | * to run successfully needs to happen here. |
@@ -31,4 +168,19 @@ void machine_crash_shutdown(struct pt_regs *regs) | |||
31 | * In practice this means shooting down the other cpus in | 168 | * In practice this means shooting down the other cpus in |
32 | * an SMP system. | 169 | * an SMP system. |
33 | */ | 170 | */ |
171 | /* The kernel is broken so disable interrupts */ | ||
172 | local_irq_disable(); | ||
173 | |||
174 | /* Make a note of crashing cpu. Will be used in NMI callback.*/ | ||
175 | crashing_cpu = smp_processor_id(); | ||
176 | nmi_shootdown_cpus(); | ||
177 | |||
178 | if(cpu_has_apic) | ||
179 | disable_local_APIC(); | ||
180 | |||
181 | #if defined(CONFIG_X86_IO_APIC) | ||
182 | disable_IO_APIC(); | ||
183 | #endif | ||
184 | |||
185 | crash_save_self(regs); | ||
34 | } | 186 | } |
diff --git a/arch/x86_64/kernel/crash_dump.c b/arch/x86_64/kernel/crash_dump.c new file mode 100644 index 000000000000..942deac4d43a --- /dev/null +++ b/arch/x86_64/kernel/crash_dump.c | |||
@@ -0,0 +1,47 @@ | |||
1 | /* | ||
2 | * kernel/crash_dump.c - Memory preserving reboot related code. | ||
3 | * | ||
4 | * Created by: Hariprasad Nellitheertha (hari@in.ibm.com) | ||
5 | * Copyright (C) IBM Corporation, 2004. All rights reserved | ||
6 | */ | ||
7 | |||
8 | #include <linux/errno.h> | ||
9 | #include <linux/crash_dump.h> | ||
10 | |||
11 | #include <asm/uaccess.h> | ||
12 | #include <asm/io.h> | ||
13 | |||
14 | /** | ||
15 | * copy_oldmem_page - copy one page from "oldmem" | ||
16 | * @pfn: page frame number to be copied | ||
17 | * @buf: target memory address for the copy; this can be in kernel address | ||
18 | * space or user address space (see @userbuf) | ||
19 | * @csize: number of bytes to copy | ||
20 | * @offset: offset in bytes into the page (based on pfn) to begin the copy | ||
21 | * @userbuf: if set, @buf is in user address space, use copy_to_user(), | ||
22 | * otherwise @buf is in kernel address space, use memcpy(). | ||
23 | * | ||
24 | * Copy a page from "oldmem". For this page, there is no pte mapped | ||
25 | * in the current kernel. We stitch up a pte, similar to kmap_atomic. | ||
26 | */ | ||
27 | ssize_t copy_oldmem_page(unsigned long pfn, char *buf, | ||
28 | size_t csize, unsigned long offset, int userbuf) | ||
29 | { | ||
30 | void *vaddr; | ||
31 | |||
32 | if (!csize) | ||
33 | return 0; | ||
34 | |||
35 | vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); | ||
36 | |||
37 | if (userbuf) { | ||
38 | if (copy_to_user(buf, (vaddr + offset), csize)) { | ||
39 | iounmap(vaddr); | ||
40 | return -EFAULT; | ||
41 | } | ||
42 | } else | ||
43 | memcpy(buf, (vaddr + offset), csize); | ||
44 | |||
45 | iounmap(vaddr); | ||
46 | return csize; | ||
47 | } | ||
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index ab3f87aaff70..293cd71a266a 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -23,8 +23,7 @@ | |||
23 | #include <asm/e820.h> | 23 | #include <asm/e820.h> |
24 | #include <asm/proto.h> | 24 | #include <asm/proto.h> |
25 | #include <asm/bootsetup.h> | 25 | #include <asm/bootsetup.h> |
26 | 26 | #include <asm/sections.h> | |
27 | extern char _end[]; | ||
28 | 27 | ||
29 | /* | 28 | /* |
30 | * PFN of last memory page. | 29 | * PFN of last memory page. |
@@ -560,6 +559,27 @@ void __init parse_memopt(char *p, char **from) | |||
560 | end_user_pfn >>= PAGE_SHIFT; | 559 | end_user_pfn >>= PAGE_SHIFT; |
561 | } | 560 | } |
562 | 561 | ||
562 | void __init parse_memmapopt(char *p, char **from) | ||
563 | { | ||
564 | unsigned long long start_at, mem_size; | ||
565 | |||
566 | mem_size = memparse(p, from); | ||
567 | p = *from; | ||
568 | if (*p == '@') { | ||
569 | start_at = memparse(p+1, from); | ||
570 | add_memory_region(start_at, mem_size, E820_RAM); | ||
571 | } else if (*p == '#') { | ||
572 | start_at = memparse(p+1, from); | ||
573 | add_memory_region(start_at, mem_size, E820_ACPI); | ||
574 | } else if (*p == '$') { | ||
575 | start_at = memparse(p+1, from); | ||
576 | add_memory_region(start_at, mem_size, E820_RESERVED); | ||
577 | } else { | ||
578 | end_user_pfn = (mem_size >> PAGE_SHIFT); | ||
579 | } | ||
580 | p = *from; | ||
581 | } | ||
582 | |||
563 | unsigned long pci_mem_start = 0xaeedbabe; | 583 | unsigned long pci_mem_start = 0xaeedbabe; |
564 | 584 | ||
565 | /* | 585 | /* |
diff --git a/arch/x86_64/kernel/early_printk.c b/arch/x86_64/kernel/early_printk.c index 9cd968dd0f5a..6dffb498ccd7 100644 --- a/arch/x86_64/kernel/early_printk.c +++ b/arch/x86_64/kernel/early_printk.c | |||
@@ -216,7 +216,7 @@ void early_printk(const char *fmt, ...) | |||
216 | va_end(ap); | 216 | va_end(ap); |
217 | } | 217 | } |
218 | 218 | ||
219 | static int keep_early; | 219 | static int __initdata keep_early; |
220 | 220 | ||
221 | int __init setup_early_printk(char *opt) | 221 | int __init setup_early_printk(char *opt) |
222 | { | 222 | { |
@@ -226,8 +226,6 @@ int __init setup_early_printk(char *opt) | |||
226 | if (early_console_initialized) | 226 | if (early_console_initialized) |
227 | return -1; | 227 | return -1; |
228 | 228 | ||
229 | opt = strchr(opt, '=') + 1; | ||
230 | |||
231 | strlcpy(buf,opt,sizeof(buf)); | 229 | strlcpy(buf,opt,sizeof(buf)); |
232 | space = strchr(buf, ' '); | 230 | space = strchr(buf, ' '); |
233 | if (space) | 231 | if (space) |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7937971d1853..dbdba56e8faa 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -41,6 +41,7 @@ | |||
41 | #include <asm/unistd.h> | 41 | #include <asm/unistd.h> |
42 | #include <asm/thread_info.h> | 42 | #include <asm/thread_info.h> |
43 | #include <asm/hw_irq.h> | 43 | #include <asm/hw_irq.h> |
44 | #include <asm/page.h> | ||
44 | 45 | ||
45 | .code64 | 46 | .code64 |
46 | 47 | ||
@@ -313,6 +314,7 @@ int_with_check: | |||
313 | movl threadinfo_flags(%rcx),%edx | 314 | movl threadinfo_flags(%rcx),%edx |
314 | andl %edi,%edx | 315 | andl %edi,%edx |
315 | jnz int_careful | 316 | jnz int_careful |
317 | andl $~TS_COMPAT,threadinfo_status(%rcx) | ||
316 | jmp retint_swapgs | 318 | jmp retint_swapgs |
317 | 319 | ||
318 | /* Either reschedule or signal or syscall exit tracking needed. */ | 320 | /* Either reschedule or signal or syscall exit tracking needed. */ |
@@ -612,6 +614,9 @@ retint_kernel: | |||
612 | ENTRY(thermal_interrupt) | 614 | ENTRY(thermal_interrupt) |
613 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | 615 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt |
614 | 616 | ||
617 | ENTRY(threshold_interrupt) | ||
618 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | ||
619 | |||
615 | #ifdef CONFIG_SMP | 620 | #ifdef CONFIG_SMP |
616 | ENTRY(reschedule_interrupt) | 621 | ENTRY(reschedule_interrupt) |
617 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | 622 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt |
@@ -670,7 +675,7 @@ ENTRY(spurious_interrupt) | |||
670 | 675 | ||
671 | /* error code is on the stack already */ | 676 | /* error code is on the stack already */ |
672 | /* handle NMI like exceptions that can happen everywhere */ | 677 | /* handle NMI like exceptions that can happen everywhere */ |
673 | .macro paranoidentry sym | 678 | .macro paranoidentry sym, ist=0 |
674 | SAVE_ALL | 679 | SAVE_ALL |
675 | cld | 680 | cld |
676 | movl $1,%ebx | 681 | movl $1,%ebx |
@@ -680,10 +685,20 @@ ENTRY(spurious_interrupt) | |||
680 | js 1f | 685 | js 1f |
681 | swapgs | 686 | swapgs |
682 | xorl %ebx,%ebx | 687 | xorl %ebx,%ebx |
683 | 1: movq %rsp,%rdi | 688 | 1: |
689 | .if \ist | ||
690 | movq %gs:pda_data_offset, %rbp | ||
691 | .endif | ||
692 | movq %rsp,%rdi | ||
684 | movq ORIG_RAX(%rsp),%rsi | 693 | movq ORIG_RAX(%rsp),%rsi |
685 | movq $-1,ORIG_RAX(%rsp) | 694 | movq $-1,ORIG_RAX(%rsp) |
695 | .if \ist | ||
696 | subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
697 | .endif | ||
686 | call \sym | 698 | call \sym |
699 | .if \ist | ||
700 | addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) | ||
701 | .endif | ||
687 | cli | 702 | cli |
688 | .endm | 703 | .endm |
689 | 704 | ||
@@ -751,7 +766,7 @@ error_exit: | |||
751 | jnz retint_careful | 766 | jnz retint_careful |
752 | swapgs | 767 | swapgs |
753 | RESTORE_ARGS 0,8,0 | 768 | RESTORE_ARGS 0,8,0 |
754 | iretq | 769 | jmp iret_label |
755 | CFI_ENDPROC | 770 | CFI_ENDPROC |
756 | 771 | ||
757 | error_kernelspace: | 772 | error_kernelspace: |
@@ -901,7 +916,7 @@ KPROBE_ENTRY(debug) | |||
901 | INTR_FRAME | 916 | INTR_FRAME |
902 | pushq $0 | 917 | pushq $0 |
903 | CFI_ADJUST_CFA_OFFSET 8 | 918 | CFI_ADJUST_CFA_OFFSET 8 |
904 | paranoidentry do_debug | 919 | paranoidentry do_debug, DEBUG_STACK |
905 | jmp paranoid_exit | 920 | jmp paranoid_exit |
906 | CFI_ENDPROC | 921 | CFI_ENDPROC |
907 | .previous .text | 922 | .previous .text |
@@ -956,7 +971,12 @@ paranoid_schedule: | |||
956 | CFI_ENDPROC | 971 | CFI_ENDPROC |
957 | 972 | ||
958 | KPROBE_ENTRY(int3) | 973 | KPROBE_ENTRY(int3) |
959 | zeroentry do_int3 | 974 | INTR_FRAME |
975 | pushq $0 | ||
976 | CFI_ADJUST_CFA_OFFSET 8 | ||
977 | paranoidentry do_int3, DEBUG_STACK | ||
978 | jmp paranoid_exit | ||
979 | CFI_ENDPROC | ||
960 | .previous .text | 980 | .previous .text |
961 | 981 | ||
962 | ENTRY(overflow) | 982 | ENTRY(overflow) |
@@ -1018,23 +1038,18 @@ ENTRY(machine_check) | |||
1018 | CFI_ENDPROC | 1038 | CFI_ENDPROC |
1019 | #endif | 1039 | #endif |
1020 | 1040 | ||
1021 | ENTRY(call_debug) | ||
1022 | zeroentry do_call_debug | ||
1023 | |||
1024 | ENTRY(call_softirq) | 1041 | ENTRY(call_softirq) |
1025 | CFI_STARTPROC | 1042 | CFI_STARTPROC |
1026 | movq %gs:pda_irqstackptr,%rax | 1043 | movq %gs:pda_irqstackptr,%rax |
1027 | pushq %r15 | 1044 | movq %rsp,%rdx |
1028 | CFI_ADJUST_CFA_OFFSET 8 | 1045 | CFI_DEF_CFA_REGISTER rdx |
1029 | movq %rsp,%r15 | ||
1030 | CFI_DEF_CFA_REGISTER r15 | ||
1031 | incl %gs:pda_irqcount | 1046 | incl %gs:pda_irqcount |
1032 | cmove %rax,%rsp | 1047 | cmove %rax,%rsp |
1048 | pushq %rdx | ||
1049 | /*todo CFI_DEF_CFA_EXPRESSION ...*/ | ||
1033 | call __do_softirq | 1050 | call __do_softirq |
1034 | movq %r15,%rsp | 1051 | popq %rsp |
1035 | CFI_DEF_CFA_REGISTER rsp | 1052 | CFI_DEF_CFA_REGISTER rsp |
1036 | decl %gs:pda_irqcount | 1053 | decl %gs:pda_irqcount |
1037 | popq %r15 | ||
1038 | CFI_ADJUST_CFA_OFFSET -8 | ||
1039 | ret | 1054 | ret |
1040 | CFI_ENDPROC | 1055 | CFI_ENDPROC |
diff --git a/arch/x86_64/kernel/genapic_cluster.c b/arch/x86_64/kernel/genapic_cluster.c index a472d62f899a..43fcf62fef0f 100644 --- a/arch/x86_64/kernel/genapic_cluster.c +++ b/arch/x86_64/kernel/genapic_cluster.c | |||
@@ -72,14 +72,11 @@ static void cluster_send_IPI_mask(cpumask_t mask, int vector) | |||
72 | static void cluster_send_IPI_allbutself(int vector) | 72 | static void cluster_send_IPI_allbutself(int vector) |
73 | { | 73 | { |
74 | cpumask_t mask = cpu_online_map; | 74 | cpumask_t mask = cpu_online_map; |
75 | int me = get_cpu(); /* Ensure we are not preempted when we clear */ | ||
76 | 75 | ||
77 | cpu_clear(me, mask); | 76 | cpu_clear(smp_processor_id(), mask); |
78 | 77 | ||
79 | if (!cpus_empty(mask)) | 78 | if (!cpus_empty(mask)) |
80 | cluster_send_IPI_mask(mask, vector); | 79 | cluster_send_IPI_mask(mask, vector); |
81 | |||
82 | put_cpu(); | ||
83 | } | 80 | } |
84 | 81 | ||
85 | static void cluster_send_IPI_all(int vector) | 82 | static void cluster_send_IPI_all(int vector) |
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 9da3edb799ea..1a2ab825be98 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c | |||
@@ -83,12 +83,11 @@ static void flat_send_IPI_allbutself(int vector) | |||
83 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); | 83 | __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); |
84 | #else | 84 | #else |
85 | cpumask_t allbutme = cpu_online_map; | 85 | cpumask_t allbutme = cpu_online_map; |
86 | int me = get_cpu(); /* Ensure we are not preempted when we clear */ | 86 | |
87 | cpu_clear(me, allbutme); | 87 | cpu_clear(smp_processor_id(), allbutme); |
88 | 88 | ||
89 | if (!cpus_empty(allbutme)) | 89 | if (!cpus_empty(allbutme)) |
90 | flat_send_IPI_mask(allbutme, vector); | 90 | flat_send_IPI_mask(allbutme, vector); |
91 | put_cpu(); | ||
92 | #endif | 91 | #endif |
93 | } | 92 | } |
94 | 93 | ||
@@ -149,10 +148,9 @@ static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) | |||
149 | static void physflat_send_IPI_allbutself(int vector) | 148 | static void physflat_send_IPI_allbutself(int vector) |
150 | { | 149 | { |
151 | cpumask_t allbutme = cpu_online_map; | 150 | cpumask_t allbutme = cpu_online_map; |
152 | int me = get_cpu(); | 151 | |
153 | cpu_clear(me, allbutme); | 152 | cpu_clear(smp_processor_id(), allbutme); |
154 | physflat_send_IPI_mask(allbutme, vector); | 153 | physflat_send_IPI_mask(allbutme, vector); |
155 | put_cpu(); | ||
156 | } | 154 | } |
157 | 155 | ||
158 | static void physflat_send_IPI_all(int vector) | 156 | static void physflat_send_IPI_all(int vector) |
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index b92e5f45ed46..692c737feddb 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
14 | #include <linux/threads.h> | 14 | #include <linux/threads.h> |
15 | #include <linux/init.h> | ||
15 | #include <asm/desc.h> | 16 | #include <asm/desc.h> |
16 | #include <asm/segment.h> | 17 | #include <asm/segment.h> |
17 | #include <asm/page.h> | 18 | #include <asm/page.h> |
@@ -70,7 +71,7 @@ startup_32: | |||
70 | movl %eax, %cr4 | 71 | movl %eax, %cr4 |
71 | 72 | ||
72 | /* Setup early boot stage 4 level pagetables */ | 73 | /* Setup early boot stage 4 level pagetables */ |
73 | movl $(init_level4_pgt - __START_KERNEL_map), %eax | 74 | movl $(boot_level4_pgt - __START_KERNEL_map), %eax |
74 | movl %eax, %cr3 | 75 | movl %eax, %cr3 |
75 | 76 | ||
76 | /* Setup EFER (Extended Feature Enable Register) */ | 77 | /* Setup EFER (Extended Feature Enable Register) */ |
@@ -113,7 +114,7 @@ startup_64: | |||
113 | movq %rax, %cr4 | 114 | movq %rax, %cr4 |
114 | 115 | ||
115 | /* Setup early boot stage 4 level pagetables. */ | 116 | /* Setup early boot stage 4 level pagetables. */ |
116 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | 117 | movq $(boot_level4_pgt - __START_KERNEL_map), %rax |
117 | movq %rax, %cr3 | 118 | movq %rax, %cr3 |
118 | 119 | ||
119 | /* Check if nx is implemented */ | 120 | /* Check if nx is implemented */ |
@@ -240,116 +241,90 @@ ljumpvector: | |||
240 | ENTRY(stext) | 241 | ENTRY(stext) |
241 | ENTRY(_stext) | 242 | ENTRY(_stext) |
242 | 243 | ||
243 | /* | 244 | $page = 0 |
244 | * This default setting generates an ident mapping at address 0x100000 | 245 | #define NEXT_PAGE(name) \ |
245 | * and a mapping for the kernel that precisely maps virtual address | 246 | $page = $page + 1; \ |
246 | * 0xffffffff80000000 to physical address 0x000000. (always using | 247 | .org $page * 0x1000; \ |
247 | * 2Mbyte large pages provided by PAE mode) | 248 | phys_/**/name = $page * 0x1000 + __PHYSICAL_START; \ |
248 | */ | 249 | ENTRY(name) |
249 | .org 0x1000 | ||
250 | ENTRY(init_level4_pgt) | ||
251 | .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ | ||
252 | .fill 255,8,0 | ||
253 | .quad 0x000000000000a007 + __PHYSICAL_START | ||
254 | .fill 254,8,0 | ||
255 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
256 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ | ||
257 | 250 | ||
258 | .org 0x2000 | 251 | NEXT_PAGE(init_level4_pgt) |
259 | ENTRY(level3_ident_pgt) | 252 | /* This gets initialized in x86_64_start_kernel */ |
260 | .quad 0x0000000000004007 + __PHYSICAL_START | 253 | .fill 512,8,0 |
254 | |||
255 | NEXT_PAGE(level3_ident_pgt) | ||
256 | .quad phys_level2_ident_pgt | 0x007 | ||
261 | .fill 511,8,0 | 257 | .fill 511,8,0 |
262 | 258 | ||
263 | .org 0x3000 | 259 | NEXT_PAGE(level3_kernel_pgt) |
264 | ENTRY(level3_kernel_pgt) | ||
265 | .fill 510,8,0 | 260 | .fill 510,8,0 |
266 | /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ | 261 | /* (2^48-(2*1024*1024*1024)-((2^39)*511))/(2^30) = 510 */ |
267 | .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt */ | 262 | .quad phys_level2_kernel_pgt | 0x007 |
268 | .fill 1,8,0 | 263 | .fill 1,8,0 |
269 | 264 | ||
270 | .org 0x4000 | 265 | NEXT_PAGE(level2_ident_pgt) |
271 | ENTRY(level2_ident_pgt) | ||
272 | /* 40MB for bootup. */ | 266 | /* 40MB for bootup. */ |
273 | .quad 0x0000000000000083 | 267 | i = 0 |
274 | .quad 0x0000000000200083 | 268 | .rept 20 |
275 | .quad 0x0000000000400083 | 269 | .quad i << 21 | 0x083 |
276 | .quad 0x0000000000600083 | 270 | i = i + 1 |
277 | .quad 0x0000000000800083 | 271 | .endr |
278 | .quad 0x0000000000A00083 | ||
279 | .quad 0x0000000000C00083 | ||
280 | .quad 0x0000000000E00083 | ||
281 | .quad 0x0000000001000083 | ||
282 | .quad 0x0000000001200083 | ||
283 | .quad 0x0000000001400083 | ||
284 | .quad 0x0000000001600083 | ||
285 | .quad 0x0000000001800083 | ||
286 | .quad 0x0000000001A00083 | ||
287 | .quad 0x0000000001C00083 | ||
288 | .quad 0x0000000001E00083 | ||
289 | .quad 0x0000000002000083 | ||
290 | .quad 0x0000000002200083 | ||
291 | .quad 0x0000000002400083 | ||
292 | .quad 0x0000000002600083 | ||
293 | /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ | 272 | /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ |
294 | .globl temp_boot_pmds | 273 | .globl temp_boot_pmds |
295 | temp_boot_pmds: | 274 | temp_boot_pmds: |
296 | .fill 492,8,0 | 275 | .fill 492,8,0 |
297 | 276 | ||
298 | .org 0x5000 | 277 | NEXT_PAGE(level2_kernel_pgt) |
299 | ENTRY(level2_kernel_pgt) | ||
300 | /* 40MB kernel mapping. The kernel code cannot be bigger than that. | 278 | /* 40MB kernel mapping. The kernel code cannot be bigger than that. |
301 | When you change this change KERNEL_TEXT_SIZE in page.h too. */ | 279 | When you change this change KERNEL_TEXT_SIZE in page.h too. */ |
302 | /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ | 280 | /* (2^48-(2*1024*1024*1024)-((2^39)*511)-((2^30)*510)) = 0 */ |
303 | .quad 0x0000000000000183 | 281 | i = 0 |
304 | .quad 0x0000000000200183 | 282 | .rept 20 |
305 | .quad 0x0000000000400183 | 283 | .quad i << 21 | 0x183 |
306 | .quad 0x0000000000600183 | 284 | i = i + 1 |
307 | .quad 0x0000000000800183 | 285 | .endr |
308 | .quad 0x0000000000A00183 | ||
309 | .quad 0x0000000000C00183 | ||
310 | .quad 0x0000000000E00183 | ||
311 | .quad 0x0000000001000183 | ||
312 | .quad 0x0000000001200183 | ||
313 | .quad 0x0000000001400183 | ||
314 | .quad 0x0000000001600183 | ||
315 | .quad 0x0000000001800183 | ||
316 | .quad 0x0000000001A00183 | ||
317 | .quad 0x0000000001C00183 | ||
318 | .quad 0x0000000001E00183 | ||
319 | .quad 0x0000000002000183 | ||
320 | .quad 0x0000000002200183 | ||
321 | .quad 0x0000000002400183 | ||
322 | .quad 0x0000000002600183 | ||
323 | /* Module mapping starts here */ | 286 | /* Module mapping starts here */ |
324 | .fill 492,8,0 | 287 | .fill 492,8,0 |
325 | 288 | ||
326 | .org 0x6000 | 289 | NEXT_PAGE(empty_zero_page) |
327 | ENTRY(empty_zero_page) | ||
328 | |||
329 | .org 0x7000 | ||
330 | ENTRY(empty_bad_page) | ||
331 | 290 | ||
332 | .org 0x8000 | 291 | NEXT_PAGE(level3_physmem_pgt) |
333 | ENTRY(empty_bad_pte_table) | 292 | .quad phys_level2_kernel_pgt | 0x007 /* so that __va works even before pagetable_init */ |
293 | .fill 511,8,0 | ||
334 | 294 | ||
335 | .org 0x9000 | 295 | #undef NEXT_PAGE |
336 | ENTRY(empty_bad_pmd_table) | ||
337 | 296 | ||
338 | .org 0xa000 | 297 | .data |
339 | ENTRY(level3_physmem_pgt) | ||
340 | .quad 0x0000000000005007 + __PHYSICAL_START /* -> level2_kernel_pgt (so that __va works even before pagetable_init) */ | ||
341 | 298 | ||
342 | .org 0xb000 | ||
343 | #ifdef CONFIG_ACPI_SLEEP | 299 | #ifdef CONFIG_ACPI_SLEEP |
300 | .align PAGE_SIZE | ||
344 | ENTRY(wakeup_level4_pgt) | 301 | ENTRY(wakeup_level4_pgt) |
345 | .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ | 302 | .quad phys_level3_ident_pgt | 0x007 |
346 | .fill 255,8,0 | 303 | .fill 255,8,0 |
347 | .quad 0x000000000000a007 + __PHYSICAL_START | 304 | .quad phys_level3_physmem_pgt | 0x007 |
348 | .fill 254,8,0 | 305 | .fill 254,8,0 |
349 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | 306 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ |
350 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ | 307 | .quad phys_level3_kernel_pgt | 0x007 |
351 | #endif | 308 | #endif |
352 | 309 | ||
310 | #ifndef CONFIG_HOTPLUG_CPU | ||
311 | __INITDATA | ||
312 | #endif | ||
313 | /* | ||
314 | * This default setting generates an ident mapping at address 0x100000 | ||
315 | * and a mapping for the kernel that precisely maps virtual address | ||
316 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
317 | * 2Mbyte large pages provided by PAE mode) | ||
318 | */ | ||
319 | .align PAGE_SIZE | ||
320 | ENTRY(boot_level4_pgt) | ||
321 | .quad phys_level3_ident_pgt | 0x007 | ||
322 | .fill 255,8,0 | ||
323 | .quad phys_level3_physmem_pgt | 0x007 | ||
324 | .fill 254,8,0 | ||
325 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
326 | .quad phys_level3_kernel_pgt | 0x007 | ||
327 | |||
353 | .data | 328 | .data |
354 | 329 | ||
355 | .align 16 | 330 | .align 16 |
@@ -370,14 +345,14 @@ gdt: | |||
370 | * Also sysret mandates a special GDT layout | 345 | * Also sysret mandates a special GDT layout |
371 | */ | 346 | */ |
372 | 347 | ||
373 | .align L1_CACHE_BYTES | 348 | .align PAGE_SIZE |
374 | 349 | ||
375 | /* The TLS descriptors are currently at a different place compared to i386. | 350 | /* The TLS descriptors are currently at a different place compared to i386. |
376 | Hopefully nobody expects them at a fixed place (Wine?) */ | 351 | Hopefully nobody expects them at a fixed place (Wine?) */ |
377 | 352 | ||
378 | ENTRY(cpu_gdt_table) | 353 | ENTRY(cpu_gdt_table) |
379 | .quad 0x0000000000000000 /* NULL descriptor */ | 354 | .quad 0x0000000000000000 /* NULL descriptor */ |
380 | .quad 0x008f9a000000ffff /* __KERNEL_COMPAT32_CS */ | 355 | .quad 0x0 /* unused */ |
381 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | 356 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ |
382 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | 357 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ |
383 | .quad 0x00cffa000000ffff /* __USER32_CS */ | 358 | .quad 0x00cffa000000ffff /* __USER32_CS */ |
@@ -387,15 +362,15 @@ ENTRY(cpu_gdt_table) | |||
387 | .quad 0,0 /* TSS */ | 362 | .quad 0,0 /* TSS */ |
388 | .quad 0,0 /* LDT */ | 363 | .quad 0,0 /* LDT */ |
389 | .quad 0,0,0 /* three TLS descriptors */ | 364 | .quad 0,0,0 /* three TLS descriptors */ |
390 | .quad 0x00009a000000ffff /* __KERNEL16_CS - 16bit PM for S3 wakeup. */ | 365 | .quad 0 /* unused */ |
391 | /* base must be patched for real base address. */ | ||
392 | gdt_end: | 366 | gdt_end: |
393 | /* asm/segment.h:GDT_ENTRIES must match this */ | 367 | /* asm/segment.h:GDT_ENTRIES must match this */ |
394 | /* This should be a multiple of the cache line size */ | 368 | /* This should be a multiple of the cache line size */ |
395 | /* GDTs of other CPUs: */ | 369 | /* GDTs of other CPUs are now dynamically allocated */ |
396 | .fill (GDT_SIZE * NR_CPUS) - (gdt_end - cpu_gdt_table) | 370 | |
371 | /* zero the remaining page */ | ||
372 | .fill PAGE_SIZE / 8 - GDT_ENTRIES,8,0 | ||
397 | 373 | ||
398 | .align L1_CACHE_BYTES | ||
399 | ENTRY(idt_table) | 374 | ENTRY(idt_table) |
400 | .rept 256 | 375 | .rept 256 |
401 | .quad 0 | 376 | .quad 0 |
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index cf6ab147a2a5..cea20a66c150 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c | |||
@@ -19,14 +19,15 @@ | |||
19 | #include <asm/bootsetup.h> | 19 | #include <asm/bootsetup.h> |
20 | #include <asm/setup.h> | 20 | #include <asm/setup.h> |
21 | #include <asm/desc.h> | 21 | #include <asm/desc.h> |
22 | #include <asm/pgtable.h> | ||
23 | #include <asm/sections.h> | ||
22 | 24 | ||
23 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 25 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
24 | yet. */ | 26 | yet. */ |
25 | static void __init clear_bss(void) | 27 | static void __init clear_bss(void) |
26 | { | 28 | { |
27 | extern char __bss_start[], __bss_end[]; | ||
28 | memset(__bss_start, 0, | 29 | memset(__bss_start, 0, |
29 | (unsigned long) __bss_end - (unsigned long) __bss_start); | 30 | (unsigned long) __bss_stop - (unsigned long) __bss_start); |
30 | } | 31 | } |
31 | 32 | ||
32 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ | 33 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ |
@@ -75,8 +76,6 @@ static void __init setup_boot_cpu_data(void) | |||
75 | boot_cpu_data.x86_mask = eax & 0xf; | 76 | boot_cpu_data.x86_mask = eax & 0xf; |
76 | } | 77 | } |
77 | 78 | ||
78 | extern char _end[]; | ||
79 | |||
80 | void __init x86_64_start_kernel(char * real_mode_data) | 79 | void __init x86_64_start_kernel(char * real_mode_data) |
81 | { | 80 | { |
82 | char *s; | 81 | char *s; |
@@ -86,6 +85,16 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
86 | set_intr_gate(i, early_idt_handler); | 85 | set_intr_gate(i, early_idt_handler); |
87 | asm volatile("lidt %0" :: "m" (idt_descr)); | 86 | asm volatile("lidt %0" :: "m" (idt_descr)); |
88 | clear_bss(); | 87 | clear_bss(); |
88 | |||
89 | /* | ||
90 | * switch to init_level4_pgt from boot_level4_pgt | ||
91 | */ | ||
92 | memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); | ||
93 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
94 | |||
95 | for (i = 0; i < NR_CPUS; i++) | ||
96 | cpu_pda(i) = &boot_cpu_pda[i]; | ||
97 | |||
89 | pda_init(0); | 98 | pda_init(0); |
90 | copy_bootdata(real_mode_data); | 99 | copy_bootdata(real_mode_data); |
91 | #ifdef CONFIG_SMP | 100 | #ifdef CONFIG_SMP |
@@ -93,7 +102,7 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
93 | #endif | 102 | #endif |
94 | s = strstr(saved_command_line, "earlyprintk="); | 103 | s = strstr(saved_command_line, "earlyprintk="); |
95 | if (s != NULL) | 104 | if (s != NULL) |
96 | setup_early_printk(s); | 105 | setup_early_printk(strchr(s, '=') + 1); |
97 | #ifdef CONFIG_NUMA | 106 | #ifdef CONFIG_NUMA |
98 | s = strstr(saved_command_line, "numa="); | 107 | s = strstr(saved_command_line, "numa="); |
99 | if (s != NULL) | 108 | if (s != NULL) |
diff --git a/arch/x86_64/kernel/i387.c b/arch/x86_64/kernel/i387.c index d9b22b633e39..a5d7e16b928e 100644 --- a/arch/x86_64/kernel/i387.c +++ b/arch/x86_64/kernel/i387.c | |||
@@ -95,7 +95,7 @@ int save_i387(struct _fpstate __user *buf) | |||
95 | if (!used_math()) | 95 | if (!used_math()) |
96 | return 0; | 96 | return 0; |
97 | clear_used_math(); /* trigger finit */ | 97 | clear_used_math(); /* trigger finit */ |
98 | if (tsk->thread_info->status & TS_USEDFPU) { | 98 | if (task_thread_info(tsk)->status & TS_USEDFPU) { |
99 | err = save_i387_checking((struct i387_fxsave_struct __user *)buf); | 99 | err = save_i387_checking((struct i387_fxsave_struct __user *)buf); |
100 | if (err) return err; | 100 | if (err) return err; |
101 | stts(); | 101 | stts(); |
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index c6c9791d77c1..5ecd34ab8c2b 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c | |||
@@ -133,7 +133,7 @@ static void end_8259A_irq (unsigned int irq) | |||
133 | { | 133 | { |
134 | if (irq > 256) { | 134 | if (irq > 256) { |
135 | char var; | 135 | char var; |
136 | printk("return %p stack %p ti %p\n", __builtin_return_address(0), &var, current->thread_info); | 136 | printk("return %p stack %p ti %p\n", __builtin_return_address(0), &var, task_thread_info(current)); |
137 | 137 | ||
138 | BUG(); | 138 | BUG(); |
139 | } | 139 | } |
@@ -492,6 +492,7 @@ void invalidate_interrupt5(void); | |||
492 | void invalidate_interrupt6(void); | 492 | void invalidate_interrupt6(void); |
493 | void invalidate_interrupt7(void); | 493 | void invalidate_interrupt7(void); |
494 | void thermal_interrupt(void); | 494 | void thermal_interrupt(void); |
495 | void threshold_interrupt(void); | ||
495 | void i8254_timer_resume(void); | 496 | void i8254_timer_resume(void); |
496 | 497 | ||
497 | static void setup_timer_hardware(void) | 498 | static void setup_timer_hardware(void) |
@@ -515,7 +516,7 @@ void i8254_timer_resume(void) | |||
515 | } | 516 | } |
516 | 517 | ||
517 | static struct sysdev_class timer_sysclass = { | 518 | static struct sysdev_class timer_sysclass = { |
518 | set_kset_name("timer"), | 519 | set_kset_name("timer_pit"), |
519 | .resume = timer_resume, | 520 | .resume = timer_resume, |
520 | }; | 521 | }; |
521 | 522 | ||
@@ -548,10 +549,9 @@ void __init init_IRQ(void) | |||
548 | int vector = FIRST_EXTERNAL_VECTOR + i; | 549 | int vector = FIRST_EXTERNAL_VECTOR + i; |
549 | if (i >= NR_IRQS) | 550 | if (i >= NR_IRQS) |
550 | break; | 551 | break; |
551 | if (vector != IA32_SYSCALL_VECTOR && vector != KDB_VECTOR) { | 552 | if (vector != IA32_SYSCALL_VECTOR) |
552 | set_intr_gate(vector, interrupt[i]); | 553 | set_intr_gate(vector, interrupt[i]); |
553 | } | 554 | } |
554 | } | ||
555 | 555 | ||
556 | #ifdef CONFIG_SMP | 556 | #ifdef CONFIG_SMP |
557 | /* | 557 | /* |
@@ -580,6 +580,7 @@ void __init init_IRQ(void) | |||
580 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 580 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
581 | #endif | 581 | #endif |
582 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 582 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
583 | set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | ||
583 | 584 | ||
584 | #ifdef CONFIG_X86_LOCAL_APIC | 585 | #ifdef CONFIG_X86_LOCAL_APIC |
585 | /* self generated IPI for local APIC timer */ | 586 | /* self generated IPI for local APIC timer */ |
diff --git a/arch/x86_64/kernel/init_task.c b/arch/x86_64/kernel/init_task.c index e0ba5c1043fd..ce31d904d601 100644 --- a/arch/x86_64/kernel/init_task.c +++ b/arch/x86_64/kernel/init_task.c | |||
@@ -44,6 +44,6 @@ EXPORT_SYMBOL(init_task); | |||
44 | * section. Since TSS's are completely CPU-local, we want them | 44 | * section. Since TSS's are completely CPU-local, we want them |
45 | * on exact cacheline boundaries, to eliminate cacheline ping-pong. | 45 | * on exact cacheline boundaries, to eliminate cacheline ping-pong. |
46 | */ | 46 | */ |
47 | DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_maxaligned_in_smp = INIT_TSS; | 47 | DEFINE_PER_CPU(struct tss_struct, init_tss) ____cacheline_internodealigned_in_smp = INIT_TSS; |
48 | 48 | ||
49 | #define ALIGN_TO_4K __attribute__((section(".data.init_task"))) | 49 | #define ALIGN_TO_4K __attribute__((section(".data.init_task"))) |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c8eee20cd519..e8cf44ef8778 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <asm/proto.h> | 37 | #include <asm/proto.h> |
38 | #include <asm/mach_apic.h> | 38 | #include <asm/mach_apic.h> |
39 | #include <asm/acpi.h> | 39 | #include <asm/acpi.h> |
40 | #include <asm/dma.h> | ||
40 | 41 | ||
41 | #define __apicdebuginit __init | 42 | #define __apicdebuginit __init |
42 | 43 | ||
@@ -46,6 +47,9 @@ static int no_timer_check; | |||
46 | 47 | ||
47 | int disable_timer_pin_1 __initdata; | 48 | int disable_timer_pin_1 __initdata; |
48 | 49 | ||
50 | /* Where if anywhere is the i8259 connect in external int mode */ | ||
51 | static struct { int pin, apic; } ioapic_i8259 = { -1, -1 }; | ||
52 | |||
49 | static DEFINE_SPINLOCK(ioapic_lock); | 53 | static DEFINE_SPINLOCK(ioapic_lock); |
50 | 54 | ||
51 | /* | 55 | /* |
@@ -57,7 +61,7 @@ int nr_ioapic_registers[MAX_IO_APICS]; | |||
57 | * Rough estimation of how many shared IRQs there are, can | 61 | * Rough estimation of how many shared IRQs there are, can |
58 | * be changed anytime. | 62 | * be changed anytime. |
59 | */ | 63 | */ |
60 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | 64 | #define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS |
61 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | 65 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) |
62 | 66 | ||
63 | /* | 67 | /* |
@@ -85,6 +89,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; | |||
85 | int pin; \ | 89 | int pin; \ |
86 | struct irq_pin_list *entry = irq_2_pin + irq; \ | 90 | struct irq_pin_list *entry = irq_2_pin + irq; \ |
87 | \ | 91 | \ |
92 | BUG_ON(irq >= NR_IRQS); \ | ||
88 | for (;;) { \ | 93 | for (;;) { \ |
89 | unsigned int reg; \ | 94 | unsigned int reg; \ |
90 | pin = entry->pin; \ | 95 | pin = entry->pin; \ |
@@ -127,6 +132,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
127 | } | 132 | } |
128 | #endif | 133 | #endif |
129 | 134 | ||
135 | static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF }; | ||
136 | |||
130 | /* | 137 | /* |
131 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are | 138 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are |
132 | * shared ISA-space IRQs, so we have to support them. We are super | 139 | * shared ISA-space IRQs, so we have to support them. We are super |
@@ -137,6 +144,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
137 | static int first_free_entry = NR_IRQS; | 144 | static int first_free_entry = NR_IRQS; |
138 | struct irq_pin_list *entry = irq_2_pin + irq; | 145 | struct irq_pin_list *entry = irq_2_pin + irq; |
139 | 146 | ||
147 | BUG_ON(irq >= NR_IRQS); | ||
140 | while (entry->next) | 148 | while (entry->next) |
141 | entry = irq_2_pin + entry->next; | 149 | entry = irq_2_pin + entry->next; |
142 | 150 | ||
@@ -144,7 +152,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
144 | entry->next = first_free_entry; | 152 | entry->next = first_free_entry; |
145 | entry = irq_2_pin + entry->next; | 153 | entry = irq_2_pin + entry->next; |
146 | if (++first_free_entry >= PIN_MAP_SIZE) | 154 | if (++first_free_entry >= PIN_MAP_SIZE) |
147 | panic("io_apic.c: whoops"); | 155 | panic("io_apic.c: ran out of irq_2_pin entries!"); |
148 | } | 156 | } |
149 | entry->apic = apic; | 157 | entry->apic = apic; |
150 | entry->pin = pin; | 158 | entry->pin = pin; |
@@ -256,9 +264,6 @@ __setup("apic", enable_ioapic_setup); | |||
256 | void __init check_ioapic(void) | 264 | void __init check_ioapic(void) |
257 | { | 265 | { |
258 | int num,slot,func; | 266 | int num,slot,func; |
259 | if (ioapic_force) | ||
260 | return; | ||
261 | |||
262 | /* Poor man's PCI discovery */ | 267 | /* Poor man's PCI discovery */ |
263 | for (num = 0; num < 32; num++) { | 268 | for (num = 0; num < 32; num++) { |
264 | for (slot = 0; slot < 32; slot++) { | 269 | for (slot = 0; slot < 32; slot++) { |
@@ -280,7 +285,7 @@ void __init check_ioapic(void) | |||
280 | switch (vendor) { | 285 | switch (vendor) { |
281 | case PCI_VENDOR_ID_VIA: | 286 | case PCI_VENDOR_ID_VIA: |
282 | #ifdef CONFIG_GART_IOMMU | 287 | #ifdef CONFIG_GART_IOMMU |
283 | if ((end_pfn >= (0xffffffff>>PAGE_SHIFT) || | 288 | if ((end_pfn > MAX_DMA32_PFN || |
284 | force_iommu) && | 289 | force_iommu) && |
285 | !iommu_aperture_allowed) { | 290 | !iommu_aperture_allowed) { |
286 | printk(KERN_INFO | 291 | printk(KERN_INFO |
@@ -359,7 +364,7 @@ static int find_irq_entry(int apic, int pin, int type) | |||
359 | /* | 364 | /* |
360 | * Find the pin to which IRQ[irq] (ISA) is connected | 365 | * Find the pin to which IRQ[irq] (ISA) is connected |
361 | */ | 366 | */ |
362 | static int find_isa_irq_pin(int irq, int type) | 367 | static int __init find_isa_irq_pin(int irq, int type) |
363 | { | 368 | { |
364 | int i; | 369 | int i; |
365 | 370 | ||
@@ -377,6 +382,31 @@ static int find_isa_irq_pin(int irq, int type) | |||
377 | return -1; | 382 | return -1; |
378 | } | 383 | } |
379 | 384 | ||
385 | static int __init find_isa_irq_apic(int irq, int type) | ||
386 | { | ||
387 | int i; | ||
388 | |||
389 | for (i = 0; i < mp_irq_entries; i++) { | ||
390 | int lbus = mp_irqs[i].mpc_srcbus; | ||
391 | |||
392 | if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA || | ||
393 | mp_bus_id_to_type[lbus] == MP_BUS_EISA || | ||
394 | mp_bus_id_to_type[lbus] == MP_BUS_MCA) && | ||
395 | (mp_irqs[i].mpc_irqtype == type) && | ||
396 | (mp_irqs[i].mpc_srcbusirq == irq)) | ||
397 | break; | ||
398 | } | ||
399 | if (i < mp_irq_entries) { | ||
400 | int apic; | ||
401 | for(apic = 0; apic < nr_ioapics; apic++) { | ||
402 | if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic) | ||
403 | return apic; | ||
404 | } | ||
405 | } | ||
406 | |||
407 | return -1; | ||
408 | } | ||
409 | |||
380 | /* | 410 | /* |
381 | * Find a specific PCI IRQ entry. | 411 | * Find a specific PCI IRQ entry. |
382 | * Not an __init, possibly needed by modules | 412 | * Not an __init, possibly needed by modules |
@@ -420,6 +450,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
420 | best_guess = irq; | 450 | best_guess = irq; |
421 | } | 451 | } |
422 | } | 452 | } |
453 | BUG_ON(best_guess >= NR_IRQS); | ||
423 | return best_guess; | 454 | return best_guess; |
424 | } | 455 | } |
425 | 456 | ||
@@ -610,6 +641,64 @@ static inline int irq_trigger(int idx) | |||
610 | return MPBIOS_trigger(idx); | 641 | return MPBIOS_trigger(idx); |
611 | } | 642 | } |
612 | 643 | ||
644 | static int next_irq = 16; | ||
645 | |||
646 | /* | ||
647 | * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ | ||
648 | * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number | ||
649 | * from ACPI, which can reach 800 in large boxen. | ||
650 | * | ||
651 | * Compact the sparse GSI space into a sequential IRQ series and reuse | ||
652 | * vectors if possible. | ||
653 | */ | ||
654 | int gsi_irq_sharing(int gsi) | ||
655 | { | ||
656 | int i, tries, vector; | ||
657 | |||
658 | BUG_ON(gsi >= NR_IRQ_VECTORS); | ||
659 | |||
660 | if (platform_legacy_irq(gsi)) | ||
661 | return gsi; | ||
662 | |||
663 | if (gsi_2_irq[gsi] != 0xFF) | ||
664 | return (int)gsi_2_irq[gsi]; | ||
665 | |||
666 | tries = NR_IRQS; | ||
667 | try_again: | ||
668 | vector = assign_irq_vector(gsi); | ||
669 | |||
670 | /* | ||
671 | * Sharing vectors means sharing IRQs, so scan irq_vectors for previous | ||
672 | * use of vector and if found, return that IRQ. However, we never want | ||
673 | * to share legacy IRQs, which usually have a different trigger mode | ||
674 | * than PCI. | ||
675 | */ | ||
676 | for (i = 0; i < NR_IRQS; i++) | ||
677 | if (IO_APIC_VECTOR(i) == vector) | ||
678 | break; | ||
679 | if (platform_legacy_irq(i)) { | ||
680 | if (--tries >= 0) { | ||
681 | IO_APIC_VECTOR(i) = 0; | ||
682 | goto try_again; | ||
683 | } | ||
684 | panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi); | ||
685 | } | ||
686 | if (i < NR_IRQS) { | ||
687 | gsi_2_irq[gsi] = i; | ||
688 | printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n", | ||
689 | gsi, vector, i); | ||
690 | return i; | ||
691 | } | ||
692 | |||
693 | i = next_irq++; | ||
694 | BUG_ON(i >= NR_IRQS); | ||
695 | gsi_2_irq[gsi] = i; | ||
696 | IO_APIC_VECTOR(i) = vector; | ||
697 | printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n", | ||
698 | gsi, vector, i); | ||
699 | return i; | ||
700 | } | ||
701 | |||
613 | static int pin_2_irq(int idx, int apic, int pin) | 702 | static int pin_2_irq(int idx, int apic, int pin) |
614 | { | 703 | { |
615 | int irq, i; | 704 | int irq, i; |
@@ -639,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
639 | while (i < apic) | 728 | while (i < apic) |
640 | irq += nr_ioapic_registers[i++]; | 729 | irq += nr_ioapic_registers[i++]; |
641 | irq += pin; | 730 | irq += pin; |
731 | irq = gsi_irq_sharing(irq); | ||
642 | break; | 732 | break; |
643 | } | 733 | } |
644 | default: | 734 | default: |
@@ -648,6 +738,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
648 | break; | 738 | break; |
649 | } | 739 | } |
650 | } | 740 | } |
741 | BUG_ON(irq >= NR_IRQS); | ||
651 | 742 | ||
652 | /* | 743 | /* |
653 | * PCI IRQ command line redirection. Yes, limits are hardcoded. | 744 | * PCI IRQ command line redirection. Yes, limits are hardcoded. |
@@ -663,6 +754,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
663 | } | 754 | } |
664 | } | 755 | } |
665 | } | 756 | } |
757 | BUG_ON(irq >= NR_IRQS); | ||
666 | return irq; | 758 | return irq; |
667 | } | 759 | } |
668 | 760 | ||
@@ -690,8 +782,8 @@ int assign_irq_vector(int irq) | |||
690 | { | 782 | { |
691 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; | 783 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; |
692 | 784 | ||
693 | BUG_ON(irq >= NR_IRQ_VECTORS); | 785 | BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); |
694 | if (IO_APIC_VECTOR(irq) > 0) | 786 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) |
695 | return IO_APIC_VECTOR(irq); | 787 | return IO_APIC_VECTOR(irq); |
696 | next: | 788 | next: |
697 | current_vector += 8; | 789 | current_vector += 8; |
@@ -699,9 +791,8 @@ next: | |||
699 | goto next; | 791 | goto next; |
700 | 792 | ||
701 | if (current_vector >= FIRST_SYSTEM_VECTOR) { | 793 | if (current_vector >= FIRST_SYSTEM_VECTOR) { |
702 | offset++; | 794 | /* If we run out of vectors on large boxen, must share them. */ |
703 | if (!(offset%8)) | 795 | offset = (offset + 1) % 8; |
704 | return -ENOSPC; | ||
705 | current_vector = FIRST_DEVICE_VECTOR + offset; | 796 | current_vector = FIRST_DEVICE_VECTOR + offset; |
706 | } | 797 | } |
707 | 798 | ||
@@ -809,7 +900,7 @@ static void __init setup_IO_APIC_irqs(void) | |||
809 | * Set up the 8259A-master output pin as broadcast to all | 900 | * Set up the 8259A-master output pin as broadcast to all |
810 | * CPUs. | 901 | * CPUs. |
811 | */ | 902 | */ |
812 | static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) | 903 | static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) |
813 | { | 904 | { |
814 | struct IO_APIC_route_entry entry; | 905 | struct IO_APIC_route_entry entry; |
815 | unsigned long flags; | 906 | unsigned long flags; |
@@ -819,7 +910,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) | |||
819 | disable_8259A_irq(0); | 910 | disable_8259A_irq(0); |
820 | 911 | ||
821 | /* mask LVT0 */ | 912 | /* mask LVT0 */ |
822 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 913 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
823 | 914 | ||
824 | /* | 915 | /* |
825 | * We use logical delivery to get the timer IRQ | 916 | * We use logical delivery to get the timer IRQ |
@@ -843,8 +934,8 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector) | |||
843 | * Add it to the IO-APIC irq-routing table: | 934 | * Add it to the IO-APIC irq-routing table: |
844 | */ | 935 | */ |
845 | spin_lock_irqsave(&ioapic_lock, flags); | 936 | spin_lock_irqsave(&ioapic_lock, flags); |
846 | io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); | 937 | io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); |
847 | io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); | 938 | io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); |
848 | spin_unlock_irqrestore(&ioapic_lock, flags); | 939 | spin_unlock_irqrestore(&ioapic_lock, flags); |
849 | 940 | ||
850 | enable_8259A_irq(0); | 941 | enable_8259A_irq(0); |
@@ -1123,7 +1214,8 @@ void __apicdebuginit print_PIC(void) | |||
1123 | static void __init enable_IO_APIC(void) | 1214 | static void __init enable_IO_APIC(void) |
1124 | { | 1215 | { |
1125 | union IO_APIC_reg_01 reg_01; | 1216 | union IO_APIC_reg_01 reg_01; |
1126 | int i; | 1217 | int i8259_apic, i8259_pin; |
1218 | int i, apic; | ||
1127 | unsigned long flags; | 1219 | unsigned long flags; |
1128 | 1220 | ||
1129 | for (i = 0; i < PIN_MAP_SIZE; i++) { | 1221 | for (i = 0; i < PIN_MAP_SIZE; i++) { |
@@ -1137,11 +1229,48 @@ static void __init enable_IO_APIC(void) | |||
1137 | /* | 1229 | /* |
1138 | * The number of IO-APIC IRQ registers (== #pins): | 1230 | * The number of IO-APIC IRQ registers (== #pins): |
1139 | */ | 1231 | */ |
1140 | for (i = 0; i < nr_ioapics; i++) { | 1232 | for (apic = 0; apic < nr_ioapics; apic++) { |
1141 | spin_lock_irqsave(&ioapic_lock, flags); | 1233 | spin_lock_irqsave(&ioapic_lock, flags); |
1142 | reg_01.raw = io_apic_read(i, 1); | 1234 | reg_01.raw = io_apic_read(apic, 1); |
1143 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1235 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1144 | nr_ioapic_registers[i] = reg_01.bits.entries+1; | 1236 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; |
1237 | } | ||
1238 | for(apic = 0; apic < nr_ioapics; apic++) { | ||
1239 | int pin; | ||
1240 | /* See if any of the pins is in ExtINT mode */ | ||
1241 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | ||
1242 | struct IO_APIC_route_entry entry; | ||
1243 | spin_lock_irqsave(&ioapic_lock, flags); | ||
1244 | *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); | ||
1245 | *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); | ||
1246 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1247 | |||
1248 | |||
1249 | /* If the interrupt line is enabled and in ExtInt mode | ||
1250 | * I have found the pin where the i8259 is connected. | ||
1251 | */ | ||
1252 | if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) { | ||
1253 | ioapic_i8259.apic = apic; | ||
1254 | ioapic_i8259.pin = pin; | ||
1255 | goto found_i8259; | ||
1256 | } | ||
1257 | } | ||
1258 | } | ||
1259 | found_i8259: | ||
1260 | /* Look to see what if the MP table has reported the ExtINT */ | ||
1261 | i8259_pin = find_isa_irq_pin(0, mp_ExtINT); | ||
1262 | i8259_apic = find_isa_irq_apic(0, mp_ExtINT); | ||
1263 | /* Trust the MP table if nothing is setup in the hardware */ | ||
1264 | if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) { | ||
1265 | printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n"); | ||
1266 | ioapic_i8259.pin = i8259_pin; | ||
1267 | ioapic_i8259.apic = i8259_apic; | ||
1268 | } | ||
1269 | /* Complain if the MP table and the hardware disagree */ | ||
1270 | if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) && | ||
1271 | (i8259_pin >= 0) && (ioapic_i8259.pin >= 0)) | ||
1272 | { | ||
1273 | printk(KERN_WARNING "ExtINT in hardware and MP table differ\n"); | ||
1145 | } | 1274 | } |
1146 | 1275 | ||
1147 | /* | 1276 | /* |
@@ -1155,7 +1284,6 @@ static void __init enable_IO_APIC(void) | |||
1155 | */ | 1284 | */ |
1156 | void disable_IO_APIC(void) | 1285 | void disable_IO_APIC(void) |
1157 | { | 1286 | { |
1158 | int pin; | ||
1159 | /* | 1287 | /* |
1160 | * Clear the IO-APIC before rebooting: | 1288 | * Clear the IO-APIC before rebooting: |
1161 | */ | 1289 | */ |
@@ -1166,8 +1294,7 @@ void disable_IO_APIC(void) | |||
1166 | * Put that IOAPIC in virtual wire mode | 1294 | * Put that IOAPIC in virtual wire mode |
1167 | * so legacy interrupts can be delivered. | 1295 | * so legacy interrupts can be delivered. |
1168 | */ | 1296 | */ |
1169 | pin = find_isa_irq_pin(0, mp_ExtINT); | 1297 | if (ioapic_i8259.pin != -1) { |
1170 | if (pin != -1) { | ||
1171 | struct IO_APIC_route_entry entry; | 1298 | struct IO_APIC_route_entry entry; |
1172 | unsigned long flags; | 1299 | unsigned long flags; |
1173 | 1300 | ||
@@ -1178,21 +1305,23 @@ void disable_IO_APIC(void) | |||
1178 | entry.polarity = 0; /* High */ | 1305 | entry.polarity = 0; /* High */ |
1179 | entry.delivery_status = 0; | 1306 | entry.delivery_status = 0; |
1180 | entry.dest_mode = 0; /* Physical */ | 1307 | entry.dest_mode = 0; /* Physical */ |
1181 | entry.delivery_mode = 7; /* ExtInt */ | 1308 | entry.delivery_mode = dest_ExtINT; /* ExtInt */ |
1182 | entry.vector = 0; | 1309 | entry.vector = 0; |
1183 | entry.dest.physical.physical_dest = 0; | 1310 | entry.dest.physical.physical_dest = |
1184 | 1311 | GET_APIC_ID(apic_read(APIC_ID)); | |
1185 | 1312 | ||
1186 | /* | 1313 | /* |
1187 | * Add it to the IO-APIC irq-routing table: | 1314 | * Add it to the IO-APIC irq-routing table: |
1188 | */ | 1315 | */ |
1189 | spin_lock_irqsave(&ioapic_lock, flags); | 1316 | spin_lock_irqsave(&ioapic_lock, flags); |
1190 | io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1)); | 1317 | io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, |
1191 | io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0)); | 1318 | *(((int *)&entry)+1)); |
1319 | io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, | ||
1320 | *(((int *)&entry)+0)); | ||
1192 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1321 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1193 | } | 1322 | } |
1194 | 1323 | ||
1195 | disconnect_bsp_APIC(pin != -1); | 1324 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); |
1196 | } | 1325 | } |
1197 | 1326 | ||
1198 | /* | 1327 | /* |
@@ -1506,7 +1635,7 @@ static void enable_lapic_irq (unsigned int irq) | |||
1506 | unsigned long v; | 1635 | unsigned long v; |
1507 | 1636 | ||
1508 | v = apic_read(APIC_LVT0); | 1637 | v = apic_read(APIC_LVT0); |
1509 | apic_write_around(APIC_LVT0, v & ~APIC_LVT_MASKED); | 1638 | apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); |
1510 | } | 1639 | } |
1511 | 1640 | ||
1512 | static void disable_lapic_irq (unsigned int irq) | 1641 | static void disable_lapic_irq (unsigned int irq) |
@@ -1514,7 +1643,7 @@ static void disable_lapic_irq (unsigned int irq) | |||
1514 | unsigned long v; | 1643 | unsigned long v; |
1515 | 1644 | ||
1516 | v = apic_read(APIC_LVT0); | 1645 | v = apic_read(APIC_LVT0); |
1517 | apic_write_around(APIC_LVT0, v | APIC_LVT_MASKED); | 1646 | apic_write(APIC_LVT0, v | APIC_LVT_MASKED); |
1518 | } | 1647 | } |
1519 | 1648 | ||
1520 | static void ack_lapic_irq (unsigned int irq) | 1649 | static void ack_lapic_irq (unsigned int irq) |
@@ -1561,20 +1690,21 @@ static void setup_nmi (void) | |||
1561 | */ | 1690 | */ |
1562 | static inline void unlock_ExtINT_logic(void) | 1691 | static inline void unlock_ExtINT_logic(void) |
1563 | { | 1692 | { |
1564 | int pin, i; | 1693 | int apic, pin, i; |
1565 | struct IO_APIC_route_entry entry0, entry1; | 1694 | struct IO_APIC_route_entry entry0, entry1; |
1566 | unsigned char save_control, save_freq_select; | 1695 | unsigned char save_control, save_freq_select; |
1567 | unsigned long flags; | 1696 | unsigned long flags; |
1568 | 1697 | ||
1569 | pin = find_isa_irq_pin(8, mp_INT); | 1698 | pin = find_isa_irq_pin(8, mp_INT); |
1699 | apic = find_isa_irq_apic(8, mp_INT); | ||
1570 | if (pin == -1) | 1700 | if (pin == -1) |
1571 | return; | 1701 | return; |
1572 | 1702 | ||
1573 | spin_lock_irqsave(&ioapic_lock, flags); | 1703 | spin_lock_irqsave(&ioapic_lock, flags); |
1574 | *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin); | 1704 | *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); |
1575 | *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin); | 1705 | *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); |
1576 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1706 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1577 | clear_IO_APIC_pin(0, pin); | 1707 | clear_IO_APIC_pin(apic, pin); |
1578 | 1708 | ||
1579 | memset(&entry1, 0, sizeof(entry1)); | 1709 | memset(&entry1, 0, sizeof(entry1)); |
1580 | 1710 | ||
@@ -1587,8 +1717,8 @@ static inline void unlock_ExtINT_logic(void) | |||
1587 | entry1.vector = 0; | 1717 | entry1.vector = 0; |
1588 | 1718 | ||
1589 | spin_lock_irqsave(&ioapic_lock, flags); | 1719 | spin_lock_irqsave(&ioapic_lock, flags); |
1590 | io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); | 1720 | io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); |
1591 | io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); | 1721 | io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); |
1592 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1722 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1593 | 1723 | ||
1594 | save_control = CMOS_READ(RTC_CONTROL); | 1724 | save_control = CMOS_READ(RTC_CONTROL); |
@@ -1606,11 +1736,11 @@ static inline void unlock_ExtINT_logic(void) | |||
1606 | 1736 | ||
1607 | CMOS_WRITE(save_control, RTC_CONTROL); | 1737 | CMOS_WRITE(save_control, RTC_CONTROL); |
1608 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); | 1738 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); |
1609 | clear_IO_APIC_pin(0, pin); | 1739 | clear_IO_APIC_pin(apic, pin); |
1610 | 1740 | ||
1611 | spin_lock_irqsave(&ioapic_lock, flags); | 1741 | spin_lock_irqsave(&ioapic_lock, flags); |
1612 | io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); | 1742 | io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); |
1613 | io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); | 1743 | io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); |
1614 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1744 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1615 | } | 1745 | } |
1616 | 1746 | ||
@@ -1622,7 +1752,7 @@ static inline void unlock_ExtINT_logic(void) | |||
1622 | */ | 1752 | */ |
1623 | static inline void check_timer(void) | 1753 | static inline void check_timer(void) |
1624 | { | 1754 | { |
1625 | int pin1, pin2; | 1755 | int apic1, pin1, apic2, pin2; |
1626 | int vector; | 1756 | int vector; |
1627 | 1757 | ||
1628 | /* | 1758 | /* |
@@ -1639,14 +1769,17 @@ static inline void check_timer(void) | |||
1639 | * the 8259A which implies the virtual wire has to be | 1769 | * the 8259A which implies the virtual wire has to be |
1640 | * disabled in the local APIC. | 1770 | * disabled in the local APIC. |
1641 | */ | 1771 | */ |
1642 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); | 1772 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); |
1643 | init_8259A(1); | 1773 | init_8259A(1); |
1644 | enable_8259A_irq(0); | 1774 | enable_8259A_irq(0); |
1645 | 1775 | ||
1646 | pin1 = find_isa_irq_pin(0, mp_INT); | 1776 | pin1 = find_isa_irq_pin(0, mp_INT); |
1647 | pin2 = find_isa_irq_pin(0, mp_ExtINT); | 1777 | apic1 = find_isa_irq_apic(0, mp_INT); |
1778 | pin2 = ioapic_i8259.pin; | ||
1779 | apic2 = ioapic_i8259.apic; | ||
1648 | 1780 | ||
1649 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2); | 1781 | apic_printk(APIC_VERBOSE,KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n", |
1782 | vector, apic1, pin1, apic2, pin2); | ||
1650 | 1783 | ||
1651 | if (pin1 != -1) { | 1784 | if (pin1 != -1) { |
1652 | /* | 1785 | /* |
@@ -1664,17 +1797,20 @@ static inline void check_timer(void) | |||
1664 | clear_IO_APIC_pin(0, pin1); | 1797 | clear_IO_APIC_pin(0, pin1); |
1665 | return; | 1798 | return; |
1666 | } | 1799 | } |
1667 | clear_IO_APIC_pin(0, pin1); | 1800 | clear_IO_APIC_pin(apic1, pin1); |
1668 | apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n"); | 1801 | apic_printk(APIC_QUIET,KERN_ERR "..MP-BIOS bug: 8254 timer not " |
1802 | "connected to IO-APIC\n"); | ||
1669 | } | 1803 | } |
1670 | 1804 | ||
1671 | apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... "); | 1805 | apic_printk(APIC_VERBOSE,KERN_INFO "...trying to set up timer (IRQ0) " |
1806 | "through the 8259A ... "); | ||
1672 | if (pin2 != -1) { | 1807 | if (pin2 != -1) { |
1673 | apic_printk(APIC_VERBOSE,"\n..... (found pin %d) ...", pin2); | 1808 | apic_printk(APIC_VERBOSE,"\n..... (found apic %d pin %d) ...", |
1809 | apic2, pin2); | ||
1674 | /* | 1810 | /* |
1675 | * legacy devices should be connected to IO APIC #0 | 1811 | * legacy devices should be connected to IO APIC #0 |
1676 | */ | 1812 | */ |
1677 | setup_ExtINT_IRQ0_pin(pin2, vector); | 1813 | setup_ExtINT_IRQ0_pin(apic2, pin2, vector); |
1678 | if (timer_irq_works()) { | 1814 | if (timer_irq_works()) { |
1679 | printk("works.\n"); | 1815 | printk("works.\n"); |
1680 | nmi_watchdog_default(); | 1816 | nmi_watchdog_default(); |
@@ -1686,7 +1822,7 @@ static inline void check_timer(void) | |||
1686 | /* | 1822 | /* |
1687 | * Cleanup, just in case ... | 1823 | * Cleanup, just in case ... |
1688 | */ | 1824 | */ |
1689 | clear_IO_APIC_pin(0, pin2); | 1825 | clear_IO_APIC_pin(apic2, pin2); |
1690 | } | 1826 | } |
1691 | printk(" failed.\n"); | 1827 | printk(" failed.\n"); |
1692 | 1828 | ||
@@ -1699,21 +1835,21 @@ static inline void check_timer(void) | |||
1699 | 1835 | ||
1700 | disable_8259A_irq(0); | 1836 | disable_8259A_irq(0); |
1701 | irq_desc[0].handler = &lapic_irq_type; | 1837 | irq_desc[0].handler = &lapic_irq_type; |
1702 | apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ | 1838 | apic_write(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ |
1703 | enable_8259A_irq(0); | 1839 | enable_8259A_irq(0); |
1704 | 1840 | ||
1705 | if (timer_irq_works()) { | 1841 | if (timer_irq_works()) { |
1706 | apic_printk(APIC_QUIET, " works.\n"); | 1842 | apic_printk(APIC_QUIET, " works.\n"); |
1707 | return; | 1843 | return; |
1708 | } | 1844 | } |
1709 | apic_write_around(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); | 1845 | apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | vector); |
1710 | apic_printk(APIC_VERBOSE," failed.\n"); | 1846 | apic_printk(APIC_VERBOSE," failed.\n"); |
1711 | 1847 | ||
1712 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); | 1848 | apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as ExtINT IRQ..."); |
1713 | 1849 | ||
1714 | init_8259A(0); | 1850 | init_8259A(0); |
1715 | make_8259A_irq(0); | 1851 | make_8259A_irq(0); |
1716 | apic_write_around(APIC_LVT0, APIC_DM_EXTINT); | 1852 | apic_write(APIC_LVT0, APIC_DM_EXTINT); |
1717 | 1853 | ||
1718 | unlock_ExtINT_logic(); | 1854 | unlock_ExtINT_logic(); |
1719 | 1855 | ||
@@ -1917,6 +2053,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a | |||
1917 | entry.polarity = active_high_low; | 2053 | entry.polarity = active_high_low; |
1918 | entry.mask = 1; /* Disabled (masked) */ | 2054 | entry.mask = 1; /* Disabled (masked) */ |
1919 | 2055 | ||
2056 | irq = gsi_irq_sharing(irq); | ||
1920 | /* | 2057 | /* |
1921 | * IRQs < 16 are already in the irq_2_pin[] map | 2058 | * IRQs < 16 are already in the irq_2_pin[] map |
1922 | */ | 2059 | */ |
diff --git a/arch/x86_64/kernel/ioport.c b/arch/x86_64/kernel/ioport.c index cde0e868efe3..b81614970ecc 100644 --- a/arch/x86_64/kernel/ioport.c +++ b/arch/x86_64/kernel/ioport.c | |||
@@ -7,6 +7,7 @@ | |||
7 | 7 | ||
8 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
9 | #include <linux/kernel.h> | 9 | #include <linux/kernel.h> |
10 | #include <linux/capability.h> | ||
10 | #include <linux/errno.h> | 11 | #include <linux/errno.h> |
11 | #include <linux/types.h> | 12 | #include <linux/types.h> |
12 | #include <linux/ioport.h> | 13 | #include <linux/ioport.h> |
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index d6a04a8320a3..30d2a1e545fe 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/delay.h> | 17 | #include <linux/delay.h> |
18 | #include <asm/uaccess.h> | 18 | #include <asm/uaccess.h> |
19 | #include <asm/io_apic.h> | 19 | #include <asm/io_apic.h> |
20 | #include <asm/idle.h> | ||
20 | 21 | ||
21 | atomic_t irq_err_count; | 22 | atomic_t irq_err_count; |
22 | #ifdef CONFIG_X86_IO_APIC | 23 | #ifdef CONFIG_X86_IO_APIC |
@@ -69,13 +70,13 @@ skip: | |||
69 | seq_printf(p, "NMI: "); | 70 | seq_printf(p, "NMI: "); |
70 | for (j = 0; j < NR_CPUS; j++) | 71 | for (j = 0; j < NR_CPUS; j++) |
71 | if (cpu_online(j)) | 72 | if (cpu_online(j)) |
72 | seq_printf(p, "%10u ", cpu_pda[j].__nmi_count); | 73 | seq_printf(p, "%10u ", cpu_pda(j)->__nmi_count); |
73 | seq_putc(p, '\n'); | 74 | seq_putc(p, '\n'); |
74 | #ifdef CONFIG_X86_LOCAL_APIC | 75 | #ifdef CONFIG_X86_LOCAL_APIC |
75 | seq_printf(p, "LOC: "); | 76 | seq_printf(p, "LOC: "); |
76 | for (j = 0; j < NR_CPUS; j++) | 77 | for (j = 0; j < NR_CPUS; j++) |
77 | if (cpu_online(j)) | 78 | if (cpu_online(j)) |
78 | seq_printf(p, "%10u ", cpu_pda[j].apic_timer_irqs); | 79 | seq_printf(p, "%10u ", cpu_pda(j)->apic_timer_irqs); |
79 | seq_putc(p, '\n'); | 80 | seq_putc(p, '\n'); |
80 | #endif | 81 | #endif |
81 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); | 82 | seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); |
@@ -98,6 +99,7 @@ asmlinkage unsigned int do_IRQ(struct pt_regs *regs) | |||
98 | /* high bits used in ret_from_ code */ | 99 | /* high bits used in ret_from_ code */ |
99 | unsigned irq = regs->orig_rax & 0xff; | 100 | unsigned irq = regs->orig_rax & 0xff; |
100 | 101 | ||
102 | exit_idle(); | ||
101 | irq_enter(); | 103 | irq_enter(); |
102 | 104 | ||
103 | __do_IRQ(irq, regs); | 105 | __do_IRQ(irq, regs); |
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index dddeb678b440..8b866a8572cf 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c | |||
@@ -42,8 +42,8 @@ | |||
42 | #include <asm/pgtable.h> | 42 | #include <asm/pgtable.h> |
43 | #include <asm/kdebug.h> | 43 | #include <asm/kdebug.h> |
44 | 44 | ||
45 | static DECLARE_MUTEX(kprobe_mutex); | ||
46 | void jprobe_return_end(void); | 45 | void jprobe_return_end(void); |
46 | static void __kprobes arch_copy_kprobe(struct kprobe *p); | ||
47 | 47 | ||
48 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; | 48 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
49 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); | 49 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
@@ -69,12 +69,11 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn) | |||
69 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 69 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
70 | { | 70 | { |
71 | /* insn: must be on special executable page on x86_64. */ | 71 | /* insn: must be on special executable page on x86_64. */ |
72 | down(&kprobe_mutex); | ||
73 | p->ainsn.insn = get_insn_slot(); | 72 | p->ainsn.insn = get_insn_slot(); |
74 | up(&kprobe_mutex); | ||
75 | if (!p->ainsn.insn) { | 73 | if (!p->ainsn.insn) { |
76 | return -ENOMEM; | 74 | return -ENOMEM; |
77 | } | 75 | } |
76 | arch_copy_kprobe(p); | ||
78 | return 0; | 77 | return 0; |
79 | } | 78 | } |
80 | 79 | ||
@@ -181,7 +180,7 @@ static inline s32 *is_riprel(u8 *insn) | |||
181 | return NULL; | 180 | return NULL; |
182 | } | 181 | } |
183 | 182 | ||
184 | void __kprobes arch_copy_kprobe(struct kprobe *p) | 183 | static void __kprobes arch_copy_kprobe(struct kprobe *p) |
185 | { | 184 | { |
186 | s32 *ripdisp; | 185 | s32 *ripdisp; |
187 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); | 186 | memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE); |
@@ -329,12 +328,21 @@ int __kprobes kprobe_handler(struct pt_regs *regs) | |||
329 | */ | 328 | */ |
330 | save_previous_kprobe(kcb); | 329 | save_previous_kprobe(kcb); |
331 | set_current_kprobe(p, regs, kcb); | 330 | set_current_kprobe(p, regs, kcb); |
332 | p->nmissed++; | 331 | kprobes_inc_nmissed_count(p); |
333 | prepare_singlestep(p, regs); | 332 | prepare_singlestep(p, regs); |
334 | kcb->kprobe_status = KPROBE_REENTER; | 333 | kcb->kprobe_status = KPROBE_REENTER; |
335 | return 1; | 334 | return 1; |
336 | } | 335 | } |
337 | } else { | 336 | } else { |
337 | if (*addr != BREAKPOINT_INSTRUCTION) { | ||
338 | /* The breakpoint instruction was removed by | ||
339 | * another cpu right after we hit, no further | ||
340 | * handling of this interrupt is appropriate | ||
341 | */ | ||
342 | regs->rip = (unsigned long)addr; | ||
343 | ret = 1; | ||
344 | goto no_kprobe; | ||
345 | } | ||
338 | p = __get_cpu_var(current_kprobe); | 346 | p = __get_cpu_var(current_kprobe); |
339 | if (p->break_handler && p->break_handler(p, regs)) { | 347 | if (p->break_handler && p->break_handler(p, regs)) { |
340 | goto ss_probe; | 348 | goto ss_probe; |
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 69541db5ff2c..13a2eada6c95 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/sysdev.h> | 15 | #include <linux/sysdev.h> |
16 | #include <linux/miscdevice.h> | 16 | #include <linux/miscdevice.h> |
17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
18 | #include <linux/capability.h> | ||
18 | #include <linux/cpu.h> | 19 | #include <linux/cpu.h> |
19 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
20 | #include <linux/ctype.h> | 21 | #include <linux/ctype.h> |
@@ -23,9 +24,10 @@ | |||
23 | #include <asm/mce.h> | 24 | #include <asm/mce.h> |
24 | #include <asm/kdebug.h> | 25 | #include <asm/kdebug.h> |
25 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
27 | #include <asm/smp.h> | ||
26 | 28 | ||
27 | #define MISC_MCELOG_MINOR 227 | 29 | #define MISC_MCELOG_MINOR 227 |
28 | #define NR_BANKS 5 | 30 | #define NR_BANKS 6 |
29 | 31 | ||
30 | static int mce_dont_init; | 32 | static int mce_dont_init; |
31 | 33 | ||
@@ -37,7 +39,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; | |||
37 | static unsigned long console_logged; | 39 | static unsigned long console_logged; |
38 | static int notify_user; | 40 | static int notify_user; |
39 | static int rip_msr; | 41 | static int rip_msr; |
40 | static int mce_bootlog; | 42 | static int mce_bootlog = 1; |
41 | 43 | ||
42 | /* | 44 | /* |
43 | * Lockless MCE logging infrastructure. | 45 | * Lockless MCE logging infrastructure. |
@@ -91,6 +93,7 @@ void mce_log(struct mce *mce) | |||
91 | static void print_mce(struct mce *m) | 93 | static void print_mce(struct mce *m) |
92 | { | 94 | { |
93 | printk(KERN_EMERG "\n" | 95 | printk(KERN_EMERG "\n" |
96 | KERN_EMERG "HARDWARE ERROR\n" | ||
94 | KERN_EMERG | 97 | KERN_EMERG |
95 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", | 98 | "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", |
96 | m->cpu, m->mcgstatus, m->bank, m->status); | 99 | m->cpu, m->mcgstatus, m->bank, m->status); |
@@ -109,6 +112,9 @@ static void print_mce(struct mce *m) | |||
109 | if (m->misc) | 112 | if (m->misc) |
110 | printk("MISC %Lx ", m->misc); | 113 | printk("MISC %Lx ", m->misc); |
111 | printk("\n"); | 114 | printk("\n"); |
115 | printk(KERN_EMERG "This is not a software problem!\n"); | ||
116 | printk(KERN_EMERG | ||
117 | "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | ||
112 | } | 118 | } |
113 | 119 | ||
114 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) | 120 | static void mce_panic(char *msg, struct mce *backup, unsigned long start) |
@@ -168,12 +174,12 @@ void do_machine_check(struct pt_regs * regs, long error_code) | |||
168 | int panicm_found = 0; | 174 | int panicm_found = 0; |
169 | 175 | ||
170 | if (regs) | 176 | if (regs) |
171 | notify_die(DIE_NMI, "machine check", regs, error_code, 255, SIGKILL); | 177 | notify_die(DIE_NMI, "machine check", regs, error_code, 18, SIGKILL); |
172 | if (!banks) | 178 | if (!banks) |
173 | return; | 179 | return; |
174 | 180 | ||
175 | memset(&m, 0, sizeof(struct mce)); | 181 | memset(&m, 0, sizeof(struct mce)); |
176 | m.cpu = hard_smp_processor_id(); | 182 | m.cpu = safe_smp_processor_id(); |
177 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); | 183 | rdmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus); |
178 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) | 184 | if (!(m.mcgstatus & MCG_STATUS_RIPV)) |
179 | kill_it = 1; | 185 | kill_it = 1; |
@@ -347,7 +353,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
347 | /* disable GART TBL walk error reporting, which trips off | 353 | /* disable GART TBL walk error reporting, which trips off |
348 | incorrectly with the IOMMU & 3ware & Cerberus. */ | 354 | incorrectly with the IOMMU & 3ware & Cerberus. */ |
349 | clear_bit(10, &bank[4]); | 355 | clear_bit(10, &bank[4]); |
356 | /* Lots of broken BIOS around that don't clear them | ||
357 | by default and leave crap in there. Don't log. */ | ||
358 | mce_bootlog = 0; | ||
350 | } | 359 | } |
360 | |||
351 | } | 361 | } |
352 | 362 | ||
353 | static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | 363 | static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) |
@@ -356,6 +366,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | |||
356 | case X86_VENDOR_INTEL: | 366 | case X86_VENDOR_INTEL: |
357 | mce_intel_feature_init(c); | 367 | mce_intel_feature_init(c); |
358 | break; | 368 | break; |
369 | case X86_VENDOR_AMD: | ||
370 | mce_amd_feature_init(c); | ||
371 | break; | ||
359 | default: | 372 | default: |
360 | break; | 373 | break; |
361 | } | 374 | } |
@@ -495,16 +508,16 @@ static int __init mcheck_disable(char *str) | |||
495 | /* mce=off disables machine check. Note you can reenable it later | 508 | /* mce=off disables machine check. Note you can reenable it later |
496 | using sysfs. | 509 | using sysfs. |
497 | mce=TOLERANCELEVEL (number, see above) | 510 | mce=TOLERANCELEVEL (number, see above) |
498 | mce=bootlog Log MCEs from before booting. Disabled by default to work | 511 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
499 | around buggy BIOS that leave bogus MCEs. */ | 512 | mce=nobootlog Don't log MCEs from before booting. */ |
500 | static int __init mcheck_enable(char *str) | 513 | static int __init mcheck_enable(char *str) |
501 | { | 514 | { |
502 | if (*str == '=') | 515 | if (*str == '=') |
503 | str++; | 516 | str++; |
504 | if (!strcmp(str, "off")) | 517 | if (!strcmp(str, "off")) |
505 | mce_dont_init = 1; | 518 | mce_dont_init = 1; |
506 | else if (!strcmp(str, "bootlog")) | 519 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) |
507 | mce_bootlog = 1; | 520 | mce_bootlog = str[0] == 'b'; |
508 | else if (isdigit(str[0])) | 521 | else if (isdigit(str[0])) |
509 | get_option(&str, &tolerant); | 522 | get_option(&str, &tolerant); |
510 | else | 523 | else |
@@ -566,6 +579,10 @@ ACCESSOR(bank1ctl,bank[1],mce_restart()) | |||
566 | ACCESSOR(bank2ctl,bank[2],mce_restart()) | 579 | ACCESSOR(bank2ctl,bank[2],mce_restart()) |
567 | ACCESSOR(bank3ctl,bank[3],mce_restart()) | 580 | ACCESSOR(bank3ctl,bank[3],mce_restart()) |
568 | ACCESSOR(bank4ctl,bank[4],mce_restart()) | 581 | ACCESSOR(bank4ctl,bank[4],mce_restart()) |
582 | ACCESSOR(bank5ctl,bank[5],mce_restart()) | ||
583 | static struct sysdev_attribute * bank_attributes[NR_BANKS] = { | ||
584 | &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl, | ||
585 | &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl}; | ||
569 | ACCESSOR(tolerant,tolerant,) | 586 | ACCESSOR(tolerant,tolerant,) |
570 | ACCESSOR(check_interval,check_interval,mce_restart()) | 587 | ACCESSOR(check_interval,check_interval,mce_restart()) |
571 | 588 | ||
@@ -573,6 +590,7 @@ ACCESSOR(check_interval,check_interval,mce_restart()) | |||
573 | static __cpuinit int mce_create_device(unsigned int cpu) | 590 | static __cpuinit int mce_create_device(unsigned int cpu) |
574 | { | 591 | { |
575 | int err; | 592 | int err; |
593 | int i; | ||
576 | if (!mce_available(&cpu_data[cpu])) | 594 | if (!mce_available(&cpu_data[cpu])) |
577 | return -EIO; | 595 | return -EIO; |
578 | 596 | ||
@@ -582,11 +600,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
582 | err = sysdev_register(&per_cpu(device_mce,cpu)); | 600 | err = sysdev_register(&per_cpu(device_mce,cpu)); |
583 | 601 | ||
584 | if (!err) { | 602 | if (!err) { |
585 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); | 603 | for (i = 0; i < banks; i++) |
586 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); | 604 | sysdev_create_file(&per_cpu(device_mce,cpu), |
587 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); | 605 | bank_attributes[i]); |
588 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); | ||
589 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); | ||
590 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); | 606 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); |
591 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); | 607 | sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); |
592 | } | 608 | } |
@@ -596,11 +612,11 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
596 | #ifdef CONFIG_HOTPLUG_CPU | 612 | #ifdef CONFIG_HOTPLUG_CPU |
597 | static __cpuinit void mce_remove_device(unsigned int cpu) | 613 | static __cpuinit void mce_remove_device(unsigned int cpu) |
598 | { | 614 | { |
599 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); | 615 | int i; |
600 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); | 616 | |
601 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); | 617 | for (i = 0; i < banks; i++) |
602 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); | 618 | sysdev_remove_file(&per_cpu(device_mce,cpu), |
603 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); | 619 | bank_attributes[i]); |
604 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); | 620 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); |
605 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); | 621 | sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); |
606 | sysdev_unregister(&per_cpu(device_mce,cpu)); | 622 | sysdev_unregister(&per_cpu(device_mce,cpu)); |
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c new file mode 100644 index 000000000000..d3ad7d81266d --- /dev/null +++ b/arch/x86_64/kernel/mce_amd.c | |||
@@ -0,0 +1,540 @@ | |||
1 | /* | ||
2 | * (c) 2005 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | * | ||
7 | * Written by Jacob Shin - AMD, Inc. | ||
8 | * | ||
9 | * Support : jacob.shin@amd.com | ||
10 | * | ||
11 | * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. | ||
12 | * MC4_MISC0 exists per physical processor. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <linux/cpu.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/kobject.h> | ||
21 | #include <linux/notifier.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/smp.h> | ||
24 | #include <linux/sysdev.h> | ||
25 | #include <linux/sysfs.h> | ||
26 | #include <asm/apic.h> | ||
27 | #include <asm/mce.h> | ||
28 | #include <asm/msr.h> | ||
29 | #include <asm/percpu.h> | ||
30 | #include <asm/idle.h> | ||
31 | |||
32 | #define PFX "mce_threshold: " | ||
33 | #define VERSION "version 1.00.9" | ||
34 | #define NR_BANKS 5 | ||
35 | #define THRESHOLD_MAX 0xFFF | ||
36 | #define INT_TYPE_APIC 0x00020000 | ||
37 | #define MASK_VALID_HI 0x80000000 | ||
38 | #define MASK_LVTOFF_HI 0x00F00000 | ||
39 | #define MASK_COUNT_EN_HI 0x00080000 | ||
40 | #define MASK_INT_TYPE_HI 0x00060000 | ||
41 | #define MASK_OVERFLOW_HI 0x00010000 | ||
42 | #define MASK_ERR_COUNT_HI 0x00000FFF | ||
43 | #define MASK_OVERFLOW 0x0001000000000000L | ||
44 | |||
45 | struct threshold_bank { | ||
46 | unsigned int cpu; | ||
47 | u8 bank; | ||
48 | u8 interrupt_enable; | ||
49 | u16 threshold_limit; | ||
50 | struct kobject kobj; | ||
51 | }; | ||
52 | |||
53 | static struct threshold_bank threshold_defaults = { | ||
54 | .interrupt_enable = 0, | ||
55 | .threshold_limit = THRESHOLD_MAX, | ||
56 | }; | ||
57 | |||
58 | #ifdef CONFIG_SMP | ||
59 | static unsigned char shared_bank[NR_BANKS] = { | ||
60 | 0, 0, 0, 0, 1 | ||
61 | }; | ||
62 | #endif | ||
63 | |||
64 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | ||
65 | |||
66 | /* | ||
67 | * CPU Initialization | ||
68 | */ | ||
69 | |||
70 | /* must be called with correct cpu affinity */ | ||
71 | static void threshold_restart_bank(struct threshold_bank *b, | ||
72 | int reset, u16 old_limit) | ||
73 | { | ||
74 | u32 mci_misc_hi, mci_misc_lo; | ||
75 | |||
76 | rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); | ||
77 | |||
78 | if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | ||
79 | reset = 1; /* limit cannot be lower than err count */ | ||
80 | |||
81 | if (reset) { /* reset err count and overflow bit */ | ||
82 | mci_misc_hi = | ||
83 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | | ||
84 | (THRESHOLD_MAX - b->threshold_limit); | ||
85 | } else if (old_limit) { /* change limit w/o reset */ | ||
86 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | ||
87 | (old_limit - b->threshold_limit); | ||
88 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | ||
89 | (new_count & THRESHOLD_MAX); | ||
90 | } | ||
91 | |||
92 | b->interrupt_enable ? | ||
93 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | ||
94 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | ||
95 | |||
96 | mci_misc_hi |= MASK_COUNT_EN_HI; | ||
97 | wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); | ||
98 | } | ||
99 | |||
100 | void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | ||
101 | { | ||
102 | int bank; | ||
103 | u32 mci_misc_lo, mci_misc_hi; | ||
104 | unsigned int cpu = smp_processor_id(); | ||
105 | |||
106 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
107 | rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); | ||
108 | |||
109 | /* !valid, !counter present, bios locked */ | ||
110 | if (!(mci_misc_hi & MASK_VALID_HI) || | ||
111 | !(mci_misc_hi & MASK_VALID_HI >> 1) || | ||
112 | (mci_misc_hi & MASK_VALID_HI >> 2)) | ||
113 | continue; | ||
114 | |||
115 | per_cpu(bank_map, cpu) |= (1 << bank); | ||
116 | |||
117 | #ifdef CONFIG_SMP | ||
118 | if (shared_bank[bank] && cpu_core_id[cpu]) | ||
119 | continue; | ||
120 | #endif | ||
121 | |||
122 | setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); | ||
123 | threshold_defaults.cpu = cpu; | ||
124 | threshold_defaults.bank = bank; | ||
125 | threshold_restart_bank(&threshold_defaults, 0, 0); | ||
126 | } | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * APIC Interrupt Handler | ||
131 | */ | ||
132 | |||
133 | /* | ||
134 | * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. | ||
135 | * the interrupt goes off when error_count reaches threshold_limit. | ||
136 | * the handler will simply log mcelog w/ software defined bank number. | ||
137 | */ | ||
138 | asmlinkage void mce_threshold_interrupt(void) | ||
139 | { | ||
140 | int bank; | ||
141 | struct mce m; | ||
142 | |||
143 | ack_APIC_irq(); | ||
144 | exit_idle(); | ||
145 | irq_enter(); | ||
146 | |||
147 | memset(&m, 0, sizeof(m)); | ||
148 | rdtscll(m.tsc); | ||
149 | m.cpu = smp_processor_id(); | ||
150 | |||
151 | /* assume first bank caused it */ | ||
152 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
153 | m.bank = MCE_THRESHOLD_BASE + bank; | ||
154 | rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); | ||
155 | |||
156 | if (m.misc & MASK_OVERFLOW) { | ||
157 | mce_log(&m); | ||
158 | goto out; | ||
159 | } | ||
160 | } | ||
161 | out: | ||
162 | irq_exit(); | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Sysfs Interface | ||
167 | */ | ||
168 | |||
169 | static struct sysdev_class threshold_sysclass = { | ||
170 | set_kset_name("threshold"), | ||
171 | }; | ||
172 | |||
173 | static DEFINE_PER_CPU(struct sys_device, device_threshold); | ||
174 | |||
175 | struct threshold_attr { | ||
176 | struct attribute attr; | ||
177 | ssize_t(*show) (struct threshold_bank *, char *); | ||
178 | ssize_t(*store) (struct threshold_bank *, const char *, size_t count); | ||
179 | }; | ||
180 | |||
181 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | ||
182 | |||
183 | static cpumask_t affinity_set(unsigned int cpu) | ||
184 | { | ||
185 | cpumask_t oldmask = current->cpus_allowed; | ||
186 | cpumask_t newmask = CPU_MASK_NONE; | ||
187 | cpu_set(cpu, newmask); | ||
188 | set_cpus_allowed(current, newmask); | ||
189 | return oldmask; | ||
190 | } | ||
191 | |||
192 | static void affinity_restore(cpumask_t oldmask) | ||
193 | { | ||
194 | set_cpus_allowed(current, oldmask); | ||
195 | } | ||
196 | |||
197 | #define SHOW_FIELDS(name) \ | ||
198 | static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ | ||
199 | { \ | ||
200 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | ||
201 | } | ||
202 | SHOW_FIELDS(interrupt_enable) | ||
203 | SHOW_FIELDS(threshold_limit) | ||
204 | |||
205 | static ssize_t store_interrupt_enable(struct threshold_bank *b, | ||
206 | const char *buf, size_t count) | ||
207 | { | ||
208 | char *end; | ||
209 | cpumask_t oldmask; | ||
210 | unsigned long new = simple_strtoul(buf, &end, 0); | ||
211 | if (end == buf) | ||
212 | return -EINVAL; | ||
213 | b->interrupt_enable = !!new; | ||
214 | |||
215 | oldmask = affinity_set(b->cpu); | ||
216 | threshold_restart_bank(b, 0, 0); | ||
217 | affinity_restore(oldmask); | ||
218 | |||
219 | return end - buf; | ||
220 | } | ||
221 | |||
222 | static ssize_t store_threshold_limit(struct threshold_bank *b, | ||
223 | const char *buf, size_t count) | ||
224 | { | ||
225 | char *end; | ||
226 | cpumask_t oldmask; | ||
227 | u16 old; | ||
228 | unsigned long new = simple_strtoul(buf, &end, 0); | ||
229 | if (end == buf) | ||
230 | return -EINVAL; | ||
231 | if (new > THRESHOLD_MAX) | ||
232 | new = THRESHOLD_MAX; | ||
233 | if (new < 1) | ||
234 | new = 1; | ||
235 | old = b->threshold_limit; | ||
236 | b->threshold_limit = new; | ||
237 | |||
238 | oldmask = affinity_set(b->cpu); | ||
239 | threshold_restart_bank(b, 0, old); | ||
240 | affinity_restore(oldmask); | ||
241 | |||
242 | return end - buf; | ||
243 | } | ||
244 | |||
245 | static ssize_t show_error_count(struct threshold_bank *b, char *buf) | ||
246 | { | ||
247 | u32 high, low; | ||
248 | cpumask_t oldmask; | ||
249 | oldmask = affinity_set(b->cpu); | ||
250 | rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ | ||
251 | affinity_restore(oldmask); | ||
252 | return sprintf(buf, "%x\n", | ||
253 | (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); | ||
254 | } | ||
255 | |||
256 | static ssize_t store_error_count(struct threshold_bank *b, | ||
257 | const char *buf, size_t count) | ||
258 | { | ||
259 | cpumask_t oldmask; | ||
260 | oldmask = affinity_set(b->cpu); | ||
261 | threshold_restart_bank(b, 1, 0); | ||
262 | affinity_restore(oldmask); | ||
263 | return 1; | ||
264 | } | ||
265 | |||
266 | #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ | ||
267 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | ||
268 | .show = _show, \ | ||
269 | .store = _store, \ | ||
270 | }; | ||
271 | |||
272 | #define ATTR_FIELDS(name) \ | ||
273 | static struct threshold_attr name = \ | ||
274 | THRESHOLD_ATTR(name, 0644, show_## name, store_## name) | ||
275 | |||
276 | ATTR_FIELDS(interrupt_enable); | ||
277 | ATTR_FIELDS(threshold_limit); | ||
278 | ATTR_FIELDS(error_count); | ||
279 | |||
280 | static struct attribute *default_attrs[] = { | ||
281 | &interrupt_enable.attr, | ||
282 | &threshold_limit.attr, | ||
283 | &error_count.attr, | ||
284 | NULL | ||
285 | }; | ||
286 | |||
287 | #define to_bank(k) container_of(k,struct threshold_bank,kobj) | ||
288 | #define to_attr(a) container_of(a,struct threshold_attr,attr) | ||
289 | |||
290 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | ||
291 | { | ||
292 | struct threshold_bank *b = to_bank(kobj); | ||
293 | struct threshold_attr *a = to_attr(attr); | ||
294 | ssize_t ret; | ||
295 | ret = a->show ? a->show(b, buf) : -EIO; | ||
296 | return ret; | ||
297 | } | ||
298 | |||
299 | static ssize_t store(struct kobject *kobj, struct attribute *attr, | ||
300 | const char *buf, size_t count) | ||
301 | { | ||
302 | struct threshold_bank *b = to_bank(kobj); | ||
303 | struct threshold_attr *a = to_attr(attr); | ||
304 | ssize_t ret; | ||
305 | ret = a->store ? a->store(b, buf, count) : -EIO; | ||
306 | return ret; | ||
307 | } | ||
308 | |||
309 | static struct sysfs_ops threshold_ops = { | ||
310 | .show = show, | ||
311 | .store = store, | ||
312 | }; | ||
313 | |||
314 | static struct kobj_type threshold_ktype = { | ||
315 | .sysfs_ops = &threshold_ops, | ||
316 | .default_attrs = default_attrs, | ||
317 | }; | ||
318 | |||
319 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | ||
320 | static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) | ||
321 | { | ||
322 | int err = 0; | ||
323 | struct threshold_bank *b = NULL; | ||
324 | |||
325 | #ifdef CONFIG_SMP | ||
326 | if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ | ||
327 | char name[16]; | ||
328 | unsigned lcpu = first_cpu(cpu_core_map[cpu]); | ||
329 | if (cpu_core_id[lcpu]) | ||
330 | goto out; /* first core not up yet */ | ||
331 | |||
332 | b = per_cpu(threshold_banks, lcpu)[bank]; | ||
333 | if (!b) | ||
334 | goto out; | ||
335 | sprintf(name, "bank%i", bank); | ||
336 | err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, | ||
337 | &b->kobj, name); | ||
338 | if (err) | ||
339 | goto out; | ||
340 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
341 | goto out; | ||
342 | } | ||
343 | #endif | ||
344 | |||
345 | b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); | ||
346 | if (!b) { | ||
347 | err = -ENOMEM; | ||
348 | goto out; | ||
349 | } | ||
350 | memset(b, 0, sizeof(struct threshold_bank)); | ||
351 | |||
352 | b->cpu = cpu; | ||
353 | b->bank = bank; | ||
354 | b->interrupt_enable = 0; | ||
355 | b->threshold_limit = THRESHOLD_MAX; | ||
356 | kobject_set_name(&b->kobj, "bank%i", bank); | ||
357 | b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; | ||
358 | b->kobj.ktype = &threshold_ktype; | ||
359 | |||
360 | err = kobject_register(&b->kobj); | ||
361 | if (err) { | ||
362 | kfree(b); | ||
363 | goto out; | ||
364 | } | ||
365 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
366 | out: | ||
367 | return err; | ||
368 | } | ||
369 | |||
370 | /* create dir/files for all valid threshold banks */ | ||
371 | static __cpuinit int threshold_create_device(unsigned int cpu) | ||
372 | { | ||
373 | int bank; | ||
374 | int err = 0; | ||
375 | |||
376 | per_cpu(device_threshold, cpu).id = cpu; | ||
377 | per_cpu(device_threshold, cpu).cls = &threshold_sysclass; | ||
378 | err = sysdev_register(&per_cpu(device_threshold, cpu)); | ||
379 | if (err) | ||
380 | goto out; | ||
381 | |||
382 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
383 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
384 | continue; | ||
385 | err = threshold_create_bank(cpu, bank); | ||
386 | if (err) | ||
387 | goto out; | ||
388 | } | ||
389 | out: | ||
390 | return err; | ||
391 | } | ||
392 | |||
393 | #ifdef CONFIG_HOTPLUG_CPU | ||
394 | /* | ||
395 | * let's be hotplug friendly. | ||
396 | * in case of multiple core processors, the first core always takes ownership | ||
397 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. | ||
398 | */ | ||
399 | |||
400 | /* cpu hotplug call removes all symlinks before first core dies */ | ||
401 | static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) | ||
402 | { | ||
403 | struct threshold_bank *b; | ||
404 | char name[16]; | ||
405 | |||
406 | b = per_cpu(threshold_banks, cpu)[bank]; | ||
407 | if (!b) | ||
408 | return; | ||
409 | if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { | ||
410 | sprintf(name, "bank%i", bank); | ||
411 | sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); | ||
412 | per_cpu(threshold_banks, cpu)[bank] = NULL; | ||
413 | } else { | ||
414 | kobject_unregister(&b->kobj); | ||
415 | kfree(per_cpu(threshold_banks, cpu)[bank]); | ||
416 | } | ||
417 | } | ||
418 | |||
419 | static __cpuinit void threshold_remove_device(unsigned int cpu) | ||
420 | { | ||
421 | int bank; | ||
422 | |||
423 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
424 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
425 | continue; | ||
426 | threshold_remove_bank(cpu, bank); | ||
427 | } | ||
428 | sysdev_unregister(&per_cpu(device_threshold, cpu)); | ||
429 | } | ||
430 | |||
431 | /* link all existing siblings when first core comes up */ | ||
432 | static __cpuinit int threshold_create_symlinks(unsigned int cpu) | ||
433 | { | ||
434 | int bank, err = 0; | ||
435 | unsigned int lcpu = 0; | ||
436 | |||
437 | if (cpu_core_id[cpu]) | ||
438 | return 0; | ||
439 | for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { | ||
440 | if (lcpu == cpu) | ||
441 | continue; | ||
442 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
443 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
444 | continue; | ||
445 | if (!shared_bank[bank]) | ||
446 | continue; | ||
447 | err = threshold_create_bank(lcpu, bank); | ||
448 | } | ||
449 | } | ||
450 | return err; | ||
451 | } | ||
452 | |||
453 | /* remove all symlinks before first core dies. */ | ||
454 | static __cpuinit void threshold_remove_symlinks(unsigned int cpu) | ||
455 | { | ||
456 | int bank; | ||
457 | unsigned int lcpu = 0; | ||
458 | if (cpu_core_id[cpu]) | ||
459 | return; | ||
460 | for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { | ||
461 | if (lcpu == cpu) | ||
462 | continue; | ||
463 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
464 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
465 | continue; | ||
466 | if (!shared_bank[bank]) | ||
467 | continue; | ||
468 | threshold_remove_bank(lcpu, bank); | ||
469 | } | ||
470 | } | ||
471 | } | ||
472 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
473 | static __cpuinit void threshold_create_symlinks(unsigned int cpu) | ||
474 | { | ||
475 | } | ||
476 | static __cpuinit void threshold_remove_symlinks(unsigned int cpu) | ||
477 | { | ||
478 | } | ||
479 | static void threshold_remove_device(unsigned int cpu) | ||
480 | { | ||
481 | } | ||
482 | #endif | ||
483 | |||
484 | /* get notified when a cpu comes on/off */ | ||
485 | static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, | ||
486 | unsigned long action, void *hcpu) | ||
487 | { | ||
488 | /* cpu was unsigned int to begin with */ | ||
489 | unsigned int cpu = (unsigned long)hcpu; | ||
490 | |||
491 | if (cpu >= NR_CPUS) | ||
492 | goto out; | ||
493 | |||
494 | switch (action) { | ||
495 | case CPU_ONLINE: | ||
496 | threshold_create_device(cpu); | ||
497 | threshold_create_symlinks(cpu); | ||
498 | break; | ||
499 | case CPU_DOWN_PREPARE: | ||
500 | threshold_remove_symlinks(cpu); | ||
501 | break; | ||
502 | case CPU_DOWN_FAILED: | ||
503 | threshold_create_symlinks(cpu); | ||
504 | break; | ||
505 | case CPU_DEAD: | ||
506 | threshold_remove_device(cpu); | ||
507 | break; | ||
508 | default: | ||
509 | break; | ||
510 | } | ||
511 | out: | ||
512 | return NOTIFY_OK; | ||
513 | } | ||
514 | |||
515 | static struct notifier_block threshold_cpu_notifier = { | ||
516 | .notifier_call = threshold_cpu_callback, | ||
517 | }; | ||
518 | |||
519 | static __init int threshold_init_device(void) | ||
520 | { | ||
521 | int err; | ||
522 | int lcpu = 0; | ||
523 | |||
524 | err = sysdev_class_register(&threshold_sysclass); | ||
525 | if (err) | ||
526 | goto out; | ||
527 | |||
528 | /* to hit CPUs online before the notifier is up */ | ||
529 | for_each_online_cpu(lcpu) { | ||
530 | err = threshold_create_device(lcpu); | ||
531 | if (err) | ||
532 | goto out; | ||
533 | } | ||
534 | register_cpu_notifier(&threshold_cpu_notifier); | ||
535 | |||
536 | out: | ||
537 | return err; | ||
538 | } | ||
539 | |||
540 | device_initcall(threshold_init_device); | ||
diff --git a/arch/x86_64/kernel/mce_intel.c b/arch/x86_64/kernel/mce_intel.c index 0be0a7959814..8f533d2c40cb 100644 --- a/arch/x86_64/kernel/mce_intel.c +++ b/arch/x86_64/kernel/mce_intel.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <asm/msr.h> | 10 | #include <asm/msr.h> |
11 | #include <asm/mce.h> | 11 | #include <asm/mce.h> |
12 | #include <asm/hw_irq.h> | 12 | #include <asm/hw_irq.h> |
13 | #include <asm/idle.h> | ||
13 | 14 | ||
14 | static DEFINE_PER_CPU(unsigned long, next_check); | 15 | static DEFINE_PER_CPU(unsigned long, next_check); |
15 | 16 | ||
@@ -19,6 +20,7 @@ asmlinkage void smp_thermal_interrupt(void) | |||
19 | 20 | ||
20 | ack_APIC_irq(); | 21 | ack_APIC_irq(); |
21 | 22 | ||
23 | exit_idle(); | ||
22 | irq_enter(); | 24 | irq_enter(); |
23 | if (time_before(jiffies, __get_cpu_var(next_check))) | 25 | if (time_before(jiffies, __get_cpu_var(next_check))) |
24 | goto done; | 26 | goto done; |
@@ -78,7 +80,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) | |||
78 | 80 | ||
79 | h = THERMAL_APIC_VECTOR; | 81 | h = THERMAL_APIC_VECTOR; |
80 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); | 82 | h |= (APIC_DM_FIXED | APIC_LVT_MASKED); |
81 | apic_write_around(APIC_LVTTHMR, h); | 83 | apic_write(APIC_LVTTHMR, h); |
82 | 84 | ||
83 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 85 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
84 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); | 86 | wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); |
@@ -87,7 +89,7 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c) | |||
87 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); | 89 | wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); |
88 | 90 | ||
89 | l = apic_read(APIC_LVTTHMR); | 91 | l = apic_read(APIC_LVTTHMR); |
90 | apic_write_around(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 92 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
91 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | 93 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", |
92 | cpu, tm2 ? "TM2" : "TM1"); | 94 | cpu, tm2 ? "TM2" : "TM1"); |
93 | return; | 95 | return; |
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index f16d38d09daf..1105250bf02c 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c | |||
@@ -42,7 +42,7 @@ int acpi_found_madt; | |||
42 | * Various Linux-internal data structures created from the | 42 | * Various Linux-internal data structures created from the |
43 | * MP-table. | 43 | * MP-table. |
44 | */ | 44 | */ |
45 | int apic_version [MAX_APICS]; | 45 | unsigned char apic_version [MAX_APICS]; |
46 | unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | 46 | unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
47 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | 47 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
48 | 48 | ||
@@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0; | |||
65 | /* Processor that is doing the boot up */ | 65 | /* Processor that is doing the boot up */ |
66 | unsigned int boot_cpu_id = -1U; | 66 | unsigned int boot_cpu_id = -1U; |
67 | /* Internal processor count */ | 67 | /* Internal processor count */ |
68 | static unsigned int num_processors = 0; | 68 | unsigned int num_processors __initdata = 0; |
69 | |||
70 | unsigned disabled_cpus __initdata; | ||
69 | 71 | ||
70 | /* Bitmask of physically existing CPUs */ | 72 | /* Bitmask of physically existing CPUs */ |
71 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; | 73 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; |
@@ -106,11 +108,14 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
106 | 108 | ||
107 | static void __init MP_processor_info (struct mpc_config_processor *m) | 109 | static void __init MP_processor_info (struct mpc_config_processor *m) |
108 | { | 110 | { |
109 | int ver, cpu; | 111 | int cpu; |
112 | unsigned char ver; | ||
110 | static int found_bsp=0; | 113 | static int found_bsp=0; |
111 | 114 | ||
112 | if (!(m->mpc_cpuflag & CPU_ENABLED)) | 115 | if (!(m->mpc_cpuflag & CPU_ENABLED)) { |
116 | disabled_cpus++; | ||
113 | return; | 117 | return; |
118 | } | ||
114 | 119 | ||
115 | printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", | 120 | printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", |
116 | m->mpc_apicid, | 121 | m->mpc_apicid, |
@@ -129,12 +134,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m) | |||
129 | } | 134 | } |
130 | 135 | ||
131 | cpu = num_processors++; | 136 | cpu = num_processors++; |
132 | 137 | ||
133 | if (m->mpc_apicid > MAX_APICS) { | 138 | #if MAX_APICS < 255 |
139 | if ((int)m->mpc_apicid > MAX_APICS) { | ||
134 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | 140 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", |
135 | m->mpc_apicid, MAX_APICS); | 141 | m->mpc_apicid, MAX_APICS); |
136 | return; | 142 | return; |
137 | } | 143 | } |
144 | #endif | ||
138 | ver = m->mpc_apicver; | 145 | ver = m->mpc_apicver; |
139 | 146 | ||
140 | physid_set(m->mpc_apicid, phys_cpu_present_map); | 147 | physid_set(m->mpc_apicid, phys_cpu_present_map); |
@@ -218,7 +225,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m) | |||
218 | m->mpc_irqtype, m->mpc_irqflag & 3, | 225 | m->mpc_irqtype, m->mpc_irqflag & 3, |
219 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 226 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, |
220 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); | 227 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); |
221 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | 228 | if (++mp_irq_entries >= MAX_IRQ_SOURCES) |
222 | panic("Max # of irq sources exceeded!!\n"); | 229 | panic("Max # of irq sources exceeded!!\n"); |
223 | } | 230 | } |
224 | 231 | ||
@@ -549,7 +556,7 @@ void __init get_smp_config (void) | |||
549 | * Read the physical hardware table. Anything here will | 556 | * Read the physical hardware table. Anything here will |
550 | * override the defaults. | 557 | * override the defaults. |
551 | */ | 558 | */ |
552 | if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) { | 559 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { |
553 | smp_found_config = 0; | 560 | smp_found_config = 0; |
554 | printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); | 561 | printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); |
555 | printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); | 562 | printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); |
diff --git a/arch/x86_64/kernel/nmi.c b/arch/x86_64/kernel/nmi.c index 39d445e16f22..5fae6f0cd994 100644 --- a/arch/x86_64/kernel/nmi.c +++ b/arch/x86_64/kernel/nmi.c | |||
@@ -151,23 +151,25 @@ int __init check_nmi_watchdog (void) | |||
151 | 151 | ||
152 | printk(KERN_INFO "testing NMI watchdog ... "); | 152 | printk(KERN_INFO "testing NMI watchdog ... "); |
153 | 153 | ||
154 | #ifdef CONFIG_SMP | ||
154 | if (nmi_watchdog == NMI_LOCAL_APIC) | 155 | if (nmi_watchdog == NMI_LOCAL_APIC) |
155 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); | 156 | smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); |
157 | #endif | ||
156 | 158 | ||
157 | for (cpu = 0; cpu < NR_CPUS; cpu++) | 159 | for (cpu = 0; cpu < NR_CPUS; cpu++) |
158 | counts[cpu] = cpu_pda[cpu].__nmi_count; | 160 | counts[cpu] = cpu_pda(cpu)->__nmi_count; |
159 | local_irq_enable(); | 161 | local_irq_enable(); |
160 | mdelay((10*1000)/nmi_hz); // wait 10 ticks | 162 | mdelay((10*1000)/nmi_hz); // wait 10 ticks |
161 | 163 | ||
162 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | 164 | for (cpu = 0; cpu < NR_CPUS; cpu++) { |
163 | if (!cpu_online(cpu)) | 165 | if (!cpu_online(cpu)) |
164 | continue; | 166 | continue; |
165 | if (cpu_pda[cpu].__nmi_count - counts[cpu] <= 5) { | 167 | if (cpu_pda(cpu)->__nmi_count - counts[cpu] <= 5) { |
166 | endflag = 1; | 168 | endflag = 1; |
167 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 169 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
168 | cpu, | 170 | cpu, |
169 | counts[cpu], | 171 | counts[cpu], |
170 | cpu_pda[cpu].__nmi_count); | 172 | cpu_pda(cpu)->__nmi_count); |
171 | nmi_active = 0; | 173 | nmi_active = 0; |
172 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 174 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; |
173 | nmi_perfctr_msr = 0; | 175 | nmi_perfctr_msr = 0; |
diff --git a/arch/x86_64/kernel/pci-dma.c b/arch/x86_64/kernel/pci-dma.c index cab471cf3edb..2f5d8328e2b9 100644 --- a/arch/x86_64/kernel/pci-dma.c +++ b/arch/x86_64/kernel/pci-dma.c | |||
@@ -8,53 +8,259 @@ | |||
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <asm/io.h> | 10 | #include <asm/io.h> |
11 | #include <asm/proto.h> | ||
11 | 12 | ||
12 | /* Map a set of buffers described by scatterlist in streaming | 13 | int iommu_merge __read_mostly = 0; |
13 | * mode for DMA. This is the scatter-gather version of the | 14 | EXPORT_SYMBOL(iommu_merge); |
14 | * above pci_map_single interface. Here the scatter gather list | 15 | |
15 | * elements are each tagged with the appropriate dma address | 16 | dma_addr_t bad_dma_address __read_mostly; |
16 | * and length. They are obtained via sg_dma_{address,length}(SG). | 17 | EXPORT_SYMBOL(bad_dma_address); |
17 | * | 18 | |
18 | * NOTE: An implementation may be able to use a smaller number of | 19 | /* This tells the BIO block layer to assume merging. Default to off |
19 | * DMA address/length pairs than there are SG table elements. | 20 | because we cannot guarantee merging later. */ |
20 | * (for example via virtual mapping capabilities) | 21 | int iommu_bio_merge __read_mostly = 0; |
21 | * The routine returns the number of addr/length pairs actually | 22 | EXPORT_SYMBOL(iommu_bio_merge); |
22 | * used, at most nents. | 23 | |
23 | * | 24 | int iommu_sac_force __read_mostly = 0; |
24 | * Device ownership issues as mentioned above for pci_map_single are | 25 | EXPORT_SYMBOL(iommu_sac_force); |
25 | * the same here. | 26 | |
27 | int no_iommu __read_mostly; | ||
28 | #ifdef CONFIG_IOMMU_DEBUG | ||
29 | int panic_on_overflow __read_mostly = 1; | ||
30 | int force_iommu __read_mostly = 1; | ||
31 | #else | ||
32 | int panic_on_overflow __read_mostly = 0; | ||
33 | int force_iommu __read_mostly= 0; | ||
34 | #endif | ||
35 | |||
36 | /* Dummy device used for NULL arguments (normally ISA). Better would | ||
37 | be probably a smaller DMA mask, but this is bug-to-bug compatible | ||
38 | to i386. */ | ||
39 | struct device fallback_dev = { | ||
40 | .bus_id = "fallback device", | ||
41 | .coherent_dma_mask = 0xffffffff, | ||
42 | .dma_mask = &fallback_dev.coherent_dma_mask, | ||
43 | }; | ||
44 | |||
45 | /* Allocate DMA memory on node near device */ | ||
46 | noinline static void * | ||
47 | dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
48 | { | ||
49 | struct page *page; | ||
50 | int node; | ||
51 | if (dev->bus == &pci_bus_type) | ||
52 | node = pcibus_to_node(to_pci_dev(dev)->bus); | ||
53 | else | ||
54 | node = numa_node_id(); | ||
55 | page = alloc_pages_node(node, gfp, order); | ||
56 | return page ? page_address(page) : NULL; | ||
57 | } | ||
58 | |||
59 | /* | ||
60 | * Allocate memory for a coherent mapping. | ||
26 | */ | 61 | */ |
27 | int dma_map_sg(struct device *hwdev, struct scatterlist *sg, | 62 | void * |
28 | int nents, int direction) | 63 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
64 | gfp_t gfp) | ||
29 | { | 65 | { |
30 | int i; | 66 | void *memory; |
31 | 67 | unsigned long dma_mask = 0; | |
32 | BUG_ON(direction == DMA_NONE); | 68 | u64 bus; |
33 | for (i = 0; i < nents; i++ ) { | 69 | |
34 | struct scatterlist *s = &sg[i]; | 70 | if (!dev) |
35 | BUG_ON(!s->page); | 71 | dev = &fallback_dev; |
36 | s->dma_address = virt_to_bus(page_address(s->page) +s->offset); | 72 | dma_mask = dev->coherent_dma_mask; |
37 | s->dma_length = s->length; | 73 | if (dma_mask == 0) |
74 | dma_mask = 0xffffffff; | ||
75 | |||
76 | /* Kludge to make it bug-to-bug compatible with i386. i386 | ||
77 | uses the normal dma_mask for alloc_coherent. */ | ||
78 | dma_mask &= *dev->dma_mask; | ||
79 | |||
80 | /* Why <=? Even when the mask is smaller than 4GB it is often | ||
81 | larger than 16MB and in this case we have a chance of | ||
82 | finding fitting memory in the next higher zone first. If | ||
83 | not retry with true GFP_DMA. -AK */ | ||
84 | if (dma_mask <= 0xffffffff) | ||
85 | gfp |= GFP_DMA32; | ||
86 | |||
87 | again: | ||
88 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | ||
89 | if (memory == NULL) | ||
90 | return NULL; | ||
91 | |||
92 | { | ||
93 | int high, mmu; | ||
94 | bus = virt_to_bus(memory); | ||
95 | high = (bus + size) >= dma_mask; | ||
96 | mmu = high; | ||
97 | if (force_iommu && !(gfp & GFP_DMA)) | ||
98 | mmu = 1; | ||
99 | else if (high) { | ||
100 | free_pages((unsigned long)memory, | ||
101 | get_order(size)); | ||
102 | |||
103 | /* Don't use the 16MB ZONE_DMA unless absolutely | ||
104 | needed. It's better to use remapping first. */ | ||
105 | if (dma_mask < 0xffffffff && !(gfp & GFP_DMA)) { | ||
106 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; | ||
107 | goto again; | ||
108 | } | ||
109 | |||
110 | if (dma_ops->alloc_coherent) | ||
111 | return dma_ops->alloc_coherent(dev, size, | ||
112 | dma_handle, gfp); | ||
113 | return NULL; | ||
114 | } | ||
115 | |||
116 | memset(memory, 0, size); | ||
117 | if (!mmu) { | ||
118 | *dma_handle = virt_to_bus(memory); | ||
119 | return memory; | ||
120 | } | ||
121 | } | ||
122 | |||
123 | if (dma_ops->alloc_coherent) { | ||
124 | free_pages((unsigned long)memory, get_order(size)); | ||
125 | gfp &= ~(GFP_DMA|GFP_DMA32); | ||
126 | return dma_ops->alloc_coherent(dev, size, dma_handle, gfp); | ||
127 | } | ||
128 | |||
129 | if (dma_ops->map_simple) { | ||
130 | *dma_handle = dma_ops->map_simple(dev, memory, | ||
131 | size, | ||
132 | PCI_DMA_BIDIRECTIONAL); | ||
133 | if (*dma_handle != bad_dma_address) | ||
134 | return memory; | ||
38 | } | 135 | } |
39 | return nents; | ||
40 | } | ||
41 | 136 | ||
42 | EXPORT_SYMBOL(dma_map_sg); | 137 | if (panic_on_overflow) |
138 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); | ||
139 | free_pages((unsigned long)memory, get_order(size)); | ||
140 | return NULL; | ||
141 | } | ||
142 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
43 | 143 | ||
44 | /* Unmap a set of streaming mode DMA translations. | 144 | /* |
45 | * Again, cpu read rules concerning calls here are the same as for | 145 | * Unmap coherent memory. |
46 | * pci_unmap_single() above. | 146 | * The caller must ensure that the device has finished accessing the mapping. |
47 | */ | 147 | */ |
48 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, | 148 | void dma_free_coherent(struct device *dev, size_t size, |
49 | int nents, int dir) | 149 | void *vaddr, dma_addr_t bus) |
150 | { | ||
151 | if (dma_ops->unmap_single) | ||
152 | dma_ops->unmap_single(dev, bus, size, 0); | ||
153 | free_pages((unsigned long)vaddr, get_order(size)); | ||
154 | } | ||
155 | EXPORT_SYMBOL(dma_free_coherent); | ||
156 | |||
157 | int dma_supported(struct device *dev, u64 mask) | ||
158 | { | ||
159 | if (dma_ops->dma_supported) | ||
160 | return dma_ops->dma_supported(dev, mask); | ||
161 | |||
162 | /* Copied from i386. Doesn't make much sense, because it will | ||
163 | only work for pci_alloc_coherent. | ||
164 | The caller just has to use GFP_DMA in this case. */ | ||
165 | if (mask < 0x00ffffff) | ||
166 | return 0; | ||
167 | |||
168 | /* Tell the device to use SAC when IOMMU force is on. This | ||
169 | allows the driver to use cheaper accesses in some cases. | ||
170 | |||
171 | Problem with this is that if we overflow the IOMMU area and | ||
172 | return DAC as fallback address the device may not handle it | ||
173 | correctly. | ||
174 | |||
175 | As a special case some controllers have a 39bit address | ||
176 | mode that is as efficient as 32bit (aic79xx). Don't force | ||
177 | SAC for these. Assume all masks <= 40 bits are of this | ||
178 | type. Normally this doesn't make any difference, but gives | ||
179 | more gentle handling of IOMMU overflow. */ | ||
180 | if (iommu_sac_force && (mask >= 0xffffffffffULL)) { | ||
181 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | return 1; | ||
186 | } | ||
187 | EXPORT_SYMBOL(dma_supported); | ||
188 | |||
189 | int dma_set_mask(struct device *dev, u64 mask) | ||
50 | { | 190 | { |
51 | int i; | 191 | if (!dev->dma_mask || !dma_supported(dev, mask)) |
52 | for (i = 0; i < nents; i++) { | 192 | return -EIO; |
53 | struct scatterlist *s = &sg[i]; | 193 | *dev->dma_mask = mask; |
54 | BUG_ON(s->page == NULL); | 194 | return 0; |
55 | BUG_ON(s->dma_address == 0); | ||
56 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
57 | } | ||
58 | } | 195 | } |
196 | EXPORT_SYMBOL(dma_set_mask); | ||
197 | |||
198 | /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | ||
199 | [,forcesac][,fullflush][,nomerge][,biomerge] | ||
200 | size set size of iommu (in bytes) | ||
201 | noagp don't initialize the AGP driver and use full aperture. | ||
202 | off don't use the IOMMU | ||
203 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | ||
204 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | ||
205 | noforce don't force IOMMU usage. Default. | ||
206 | force Force IOMMU. | ||
207 | merge Do lazy merging. This may improve performance on some block devices. | ||
208 | Implies force (experimental) | ||
209 | biomerge Do merging at the BIO layer. This is more efficient than merge, | ||
210 | but should be only done with very big IOMMUs. Implies merge,force. | ||
211 | nomerge Don't do SG merging. | ||
212 | forcesac For SAC mode for masks <40bits (experimental) | ||
213 | fullflush Flush IOMMU on each allocation (default) | ||
214 | nofullflush Don't use IOMMU fullflush | ||
215 | allowed overwrite iommu off workarounds for specific chipsets. | ||
216 | soft Use software bounce buffering (default for Intel machines) | ||
217 | noaperture Don't touch the aperture for AGP. | ||
218 | */ | ||
219 | __init int iommu_setup(char *p) | ||
220 | { | ||
221 | iommu_merge = 1; | ||
59 | 222 | ||
60 | EXPORT_SYMBOL(dma_unmap_sg); | 223 | while (*p) { |
224 | if (!strncmp(p,"off",3)) | ||
225 | no_iommu = 1; | ||
226 | /* gart_parse_options has more force support */ | ||
227 | if (!strncmp(p,"force",5)) | ||
228 | force_iommu = 1; | ||
229 | if (!strncmp(p,"noforce",7)) { | ||
230 | iommu_merge = 0; | ||
231 | force_iommu = 0; | ||
232 | } | ||
233 | |||
234 | if (!strncmp(p, "biomerge",8)) { | ||
235 | iommu_bio_merge = 4096; | ||
236 | iommu_merge = 1; | ||
237 | force_iommu = 1; | ||
238 | } | ||
239 | if (!strncmp(p, "panic",5)) | ||
240 | panic_on_overflow = 1; | ||
241 | if (!strncmp(p, "nopanic",7)) | ||
242 | panic_on_overflow = 0; | ||
243 | if (!strncmp(p, "merge",5)) { | ||
244 | iommu_merge = 1; | ||
245 | force_iommu = 1; | ||
246 | } | ||
247 | if (!strncmp(p, "nomerge",7)) | ||
248 | iommu_merge = 0; | ||
249 | if (!strncmp(p, "forcesac",8)) | ||
250 | iommu_sac_force = 1; | ||
251 | |||
252 | #ifdef CONFIG_SWIOTLB | ||
253 | if (!strncmp(p, "soft",4)) | ||
254 | swiotlb = 1; | ||
255 | #endif | ||
256 | |||
257 | #ifdef CONFIG_GART_IOMMU | ||
258 | gart_parse_options(p); | ||
259 | #endif | ||
260 | |||
261 | p += strcspn(p, ","); | ||
262 | if (*p == ',') | ||
263 | ++p; | ||
264 | } | ||
265 | return 1; | ||
266 | } | ||
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index 88be97c96987..c37fc7726ba6 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -30,8 +30,8 @@ | |||
30 | #include <asm/proto.h> | 30 | #include <asm/proto.h> |
31 | #include <asm/cacheflush.h> | 31 | #include <asm/cacheflush.h> |
32 | #include <asm/kdebug.h> | 32 | #include <asm/kdebug.h> |
33 | 33 | #include <asm/swiotlb.h> | |
34 | dma_addr_t bad_dma_address; | 34 | #include <asm/dma.h> |
35 | 35 | ||
36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ | 36 | unsigned long iommu_bus_base; /* GART remapping area (physical) */ |
37 | static unsigned long iommu_size; /* size of remapping area bytes */ | 37 | static unsigned long iommu_size; /* size of remapping area bytes */ |
@@ -39,18 +39,6 @@ static unsigned long iommu_pages; /* .. and in pages */ | |||
39 | 39 | ||
40 | u32 *iommu_gatt_base; /* Remapping table */ | 40 | u32 *iommu_gatt_base; /* Remapping table */ |
41 | 41 | ||
42 | int no_iommu; | ||
43 | static int no_agp; | ||
44 | #ifdef CONFIG_IOMMU_DEBUG | ||
45 | int panic_on_overflow = 1; | ||
46 | int force_iommu = 1; | ||
47 | #else | ||
48 | int panic_on_overflow = 0; | ||
49 | int force_iommu = 0; | ||
50 | #endif | ||
51 | int iommu_merge = 1; | ||
52 | int iommu_sac_force = 0; | ||
53 | |||
54 | /* If this is disabled the IOMMU will use an optimized flushing strategy | 42 | /* If this is disabled the IOMMU will use an optimized flushing strategy |
55 | of only flushing when an mapping is reused. With it true the GART is flushed | 43 | of only flushing when an mapping is reused. With it true the GART is flushed |
56 | for every mapping. Problem is that doing the lazy flush seems to trigger | 44 | for every mapping. Problem is that doing the lazy flush seems to trigger |
@@ -58,10 +46,6 @@ int iommu_sac_force = 0; | |||
58 | also seen with Qlogic at least). */ | 46 | also seen with Qlogic at least). */ |
59 | int iommu_fullflush = 1; | 47 | int iommu_fullflush = 1; |
60 | 48 | ||
61 | /* This tells the BIO block layer to assume merging. Default to off | ||
62 | because we cannot guarantee merging later. */ | ||
63 | int iommu_bio_merge = 0; | ||
64 | |||
65 | #define MAX_NB 8 | 49 | #define MAX_NB 8 |
66 | 50 | ||
67 | /* Allocation bitmap for the remapping area */ | 51 | /* Allocation bitmap for the remapping area */ |
@@ -102,16 +86,6 @@ AGPEXTERN __u32 *agp_gatt_table; | |||
102 | 86 | ||
103 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ | 87 | static unsigned long next_bit; /* protected by iommu_bitmap_lock */ |
104 | static int need_flush; /* global flush state. set for each gart wrap */ | 88 | static int need_flush; /* global flush state. set for each gart wrap */ |
105 | static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | ||
106 | size_t size, int dir, int do_panic); | ||
107 | |||
108 | /* Dummy device used for NULL arguments (normally ISA). Better would | ||
109 | be probably a smaller DMA mask, but this is bug-to-bug compatible to i386. */ | ||
110 | static struct device fallback_dev = { | ||
111 | .bus_id = "fallback device", | ||
112 | .coherent_dma_mask = 0xffffffff, | ||
113 | .dma_mask = &fallback_dev.coherent_dma_mask, | ||
114 | }; | ||
115 | 89 | ||
116 | static unsigned long alloc_iommu(int size) | 90 | static unsigned long alloc_iommu(int size) |
117 | { | 91 | { |
@@ -185,108 +159,7 @@ static void flush_gart(struct device *dev) | |||
185 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); | 159 | spin_unlock_irqrestore(&iommu_bitmap_lock, flags); |
186 | } | 160 | } |
187 | 161 | ||
188 | /* Allocate DMA memory on node near device */ | ||
189 | noinline | ||
190 | static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) | ||
191 | { | ||
192 | struct page *page; | ||
193 | int node; | ||
194 | if (dev->bus == &pci_bus_type) | ||
195 | node = pcibus_to_node(to_pci_dev(dev)->bus); | ||
196 | else | ||
197 | node = numa_node_id(); | ||
198 | page = alloc_pages_node(node, gfp, order); | ||
199 | return page ? page_address(page) : NULL; | ||
200 | } | ||
201 | |||
202 | /* | ||
203 | * Allocate memory for a coherent mapping. | ||
204 | */ | ||
205 | void * | ||
206 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | ||
207 | gfp_t gfp) | ||
208 | { | ||
209 | void *memory; | ||
210 | unsigned long dma_mask = 0; | ||
211 | u64 bus; | ||
212 | 162 | ||
213 | if (!dev) | ||
214 | dev = &fallback_dev; | ||
215 | dma_mask = dev->coherent_dma_mask; | ||
216 | if (dma_mask == 0) | ||
217 | dma_mask = 0xffffffff; | ||
218 | |||
219 | /* Kludge to make it bug-to-bug compatible with i386. i386 | ||
220 | uses the normal dma_mask for alloc_coherent. */ | ||
221 | dma_mask &= *dev->dma_mask; | ||
222 | |||
223 | again: | ||
224 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | ||
225 | if (memory == NULL) | ||
226 | return NULL; | ||
227 | |||
228 | { | ||
229 | int high, mmu; | ||
230 | bus = virt_to_bus(memory); | ||
231 | high = (bus + size) >= dma_mask; | ||
232 | mmu = high; | ||
233 | if (force_iommu && !(gfp & GFP_DMA)) | ||
234 | mmu = 1; | ||
235 | if (no_iommu || dma_mask < 0xffffffffUL) { | ||
236 | if (high) { | ||
237 | free_pages((unsigned long)memory, | ||
238 | get_order(size)); | ||
239 | |||
240 | if (swiotlb) { | ||
241 | return | ||
242 | swiotlb_alloc_coherent(dev, size, | ||
243 | dma_handle, | ||
244 | gfp); | ||
245 | } | ||
246 | |||
247 | if (!(gfp & GFP_DMA)) { | ||
248 | gfp |= GFP_DMA; | ||
249 | goto again; | ||
250 | } | ||
251 | return NULL; | ||
252 | } | ||
253 | mmu = 0; | ||
254 | } | ||
255 | memset(memory, 0, size); | ||
256 | if (!mmu) { | ||
257 | *dma_handle = virt_to_bus(memory); | ||
258 | return memory; | ||
259 | } | ||
260 | } | ||
261 | |||
262 | *dma_handle = dma_map_area(dev, bus, size, PCI_DMA_BIDIRECTIONAL, 0); | ||
263 | if (*dma_handle == bad_dma_address) | ||
264 | goto error; | ||
265 | flush_gart(dev); | ||
266 | return memory; | ||
267 | |||
268 | error: | ||
269 | if (panic_on_overflow) | ||
270 | panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", size); | ||
271 | free_pages((unsigned long)memory, get_order(size)); | ||
272 | return NULL; | ||
273 | } | ||
274 | |||
275 | /* | ||
276 | * Unmap coherent memory. | ||
277 | * The caller must ensure that the device has finished accessing the mapping. | ||
278 | */ | ||
279 | void dma_free_coherent(struct device *dev, size_t size, | ||
280 | void *vaddr, dma_addr_t bus) | ||
281 | { | ||
282 | if (swiotlb) { | ||
283 | swiotlb_free_coherent(dev, size, vaddr, bus); | ||
284 | return; | ||
285 | } | ||
286 | |||
287 | dma_unmap_single(dev, bus, size, 0); | ||
288 | free_pages((unsigned long)vaddr, get_order(size)); | ||
289 | } | ||
290 | 163 | ||
291 | #ifdef CONFIG_IOMMU_LEAK | 164 | #ifdef CONFIG_IOMMU_LEAK |
292 | 165 | ||
@@ -320,7 +193,7 @@ void dump_leak(void) | |||
320 | #define CLEAR_LEAK(x) | 193 | #define CLEAR_LEAK(x) |
321 | #endif | 194 | #endif |
322 | 195 | ||
323 | static void iommu_full(struct device *dev, size_t size, int dir, int do_panic) | 196 | static void iommu_full(struct device *dev, size_t size, int dir) |
324 | { | 197 | { |
325 | /* | 198 | /* |
326 | * Ran out of IOMMU space for this operation. This is very bad. | 199 | * Ran out of IOMMU space for this operation. This is very bad. |
@@ -336,11 +209,11 @@ static void iommu_full(struct device *dev, size_t size, int dir, int do_panic) | |||
336 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", | 209 | "PCI-DMA: Out of IOMMU space for %lu bytes at device %s\n", |
337 | size, dev->bus_id); | 210 | size, dev->bus_id); |
338 | 211 | ||
339 | if (size > PAGE_SIZE*EMERGENCY_PAGES && do_panic) { | 212 | if (size > PAGE_SIZE*EMERGENCY_PAGES) { |
340 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 213 | if (dir == PCI_DMA_FROMDEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
341 | panic("PCI-DMA: Memory would be corrupted\n"); | 214 | panic("PCI-DMA: Memory would be corrupted\n"); |
342 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) | 215 | if (dir == PCI_DMA_TODEVICE || dir == PCI_DMA_BIDIRECTIONAL) |
343 | panic("PCI-DMA: Random memory would be DMAed\n"); | 216 | panic(KERN_ERR "PCI-DMA: Random memory would be DMAed\n"); |
344 | } | 217 | } |
345 | 218 | ||
346 | #ifdef CONFIG_IOMMU_LEAK | 219 | #ifdef CONFIG_IOMMU_LEAK |
@@ -379,8 +252,8 @@ static inline int nonforced_iommu(struct device *dev, unsigned long addr, size_t | |||
379 | /* Map a single continuous physical area into the IOMMU. | 252 | /* Map a single continuous physical area into the IOMMU. |
380 | * Caller needs to check if the iommu is needed and flush. | 253 | * Caller needs to check if the iommu is needed and flush. |
381 | */ | 254 | */ |
382 | static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | 255 | static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, |
383 | size_t size, int dir, int do_panic) | 256 | size_t size, int dir) |
384 | { | 257 | { |
385 | unsigned long npages = to_pages(phys_mem, size); | 258 | unsigned long npages = to_pages(phys_mem, size); |
386 | unsigned long iommu_page = alloc_iommu(npages); | 259 | unsigned long iommu_page = alloc_iommu(npages); |
@@ -390,7 +263,7 @@ static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | |||
390 | return phys_mem; | 263 | return phys_mem; |
391 | if (panic_on_overflow) | 264 | if (panic_on_overflow) |
392 | panic("dma_map_area overflow %lu bytes\n", size); | 265 | panic("dma_map_area overflow %lu bytes\n", size); |
393 | iommu_full(dev, size, dir, do_panic); | 266 | iommu_full(dev, size, dir); |
394 | return bad_dma_address; | 267 | return bad_dma_address; |
395 | } | 268 | } |
396 | 269 | ||
@@ -402,15 +275,21 @@ static dma_addr_t dma_map_area(struct device *dev, unsigned long phys_mem, | |||
402 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); | 275 | return iommu_bus_base + iommu_page*PAGE_SIZE + (phys_mem & ~PAGE_MASK); |
403 | } | 276 | } |
404 | 277 | ||
278 | static dma_addr_t gart_map_simple(struct device *dev, char *buf, | ||
279 | size_t size, int dir) | ||
280 | { | ||
281 | dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); | ||
282 | flush_gart(dev); | ||
283 | return map; | ||
284 | } | ||
285 | |||
405 | /* Map a single area into the IOMMU */ | 286 | /* Map a single area into the IOMMU */ |
406 | dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) | 287 | dma_addr_t gart_map_single(struct device *dev, void *addr, size_t size, int dir) |
407 | { | 288 | { |
408 | unsigned long phys_mem, bus; | 289 | unsigned long phys_mem, bus; |
409 | 290 | ||
410 | BUG_ON(dir == DMA_NONE); | 291 | BUG_ON(dir == DMA_NONE); |
411 | 292 | ||
412 | if (swiotlb) | ||
413 | return swiotlb_map_single(dev,addr,size,dir); | ||
414 | if (!dev) | 293 | if (!dev) |
415 | dev = &fallback_dev; | 294 | dev = &fallback_dev; |
416 | 295 | ||
@@ -418,10 +297,24 @@ dma_addr_t dma_map_single(struct device *dev, void *addr, size_t size, int dir) | |||
418 | if (!need_iommu(dev, phys_mem, size)) | 297 | if (!need_iommu(dev, phys_mem, size)) |
419 | return phys_mem; | 298 | return phys_mem; |
420 | 299 | ||
421 | bus = dma_map_area(dev, phys_mem, size, dir, 1); | 300 | bus = gart_map_simple(dev, addr, size, dir); |
422 | flush_gart(dev); | ||
423 | return bus; | 301 | return bus; |
424 | } | 302 | } |
303 | |||
304 | /* | ||
305 | * Wrapper for pci_unmap_single working with scatterlists. | ||
306 | */ | ||
307 | void gart_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
308 | { | ||
309 | int i; | ||
310 | |||
311 | for (i = 0; i < nents; i++) { | ||
312 | struct scatterlist *s = &sg[i]; | ||
313 | if (!s->dma_length || !s->length) | ||
314 | break; | ||
315 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
316 | } | ||
317 | } | ||
425 | 318 | ||
426 | /* Fallback for dma_map_sg in case of overflow */ | 319 | /* Fallback for dma_map_sg in case of overflow */ |
427 | static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | 320 | static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, |
@@ -437,10 +330,10 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, | |||
437 | struct scatterlist *s = &sg[i]; | 330 | struct scatterlist *s = &sg[i]; |
438 | unsigned long addr = page_to_phys(s->page) + s->offset; | 331 | unsigned long addr = page_to_phys(s->page) + s->offset; |
439 | if (nonforced_iommu(dev, addr, s->length)) { | 332 | if (nonforced_iommu(dev, addr, s->length)) { |
440 | addr = dma_map_area(dev, addr, s->length, dir, 0); | 333 | addr = dma_map_area(dev, addr, s->length, dir); |
441 | if (addr == bad_dma_address) { | 334 | if (addr == bad_dma_address) { |
442 | if (i > 0) | 335 | if (i > 0) |
443 | dma_unmap_sg(dev, sg, i, dir); | 336 | gart_unmap_sg(dev, sg, i, dir); |
444 | nents = 0; | 337 | nents = 0; |
445 | sg[0].dma_length = 0; | 338 | sg[0].dma_length = 0; |
446 | break; | 339 | break; |
@@ -509,7 +402,7 @@ static inline int dma_map_cont(struct scatterlist *sg, int start, int stopat, | |||
509 | * DMA map all entries in a scatterlist. | 402 | * DMA map all entries in a scatterlist. |
510 | * Merge chunks that have page aligned sizes into a continuous mapping. | 403 | * Merge chunks that have page aligned sizes into a continuous mapping. |
511 | */ | 404 | */ |
512 | int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | 405 | int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) |
513 | { | 406 | { |
514 | int i; | 407 | int i; |
515 | int out; | 408 | int out; |
@@ -521,8 +414,6 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
521 | if (nents == 0) | 414 | if (nents == 0) |
522 | return 0; | 415 | return 0; |
523 | 416 | ||
524 | if (swiotlb) | ||
525 | return swiotlb_map_sg(dev,sg,nents,dir); | ||
526 | if (!dev) | 417 | if (!dev) |
527 | dev = &fallback_dev; | 418 | dev = &fallback_dev; |
528 | 419 | ||
@@ -565,13 +456,13 @@ int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | |||
565 | 456 | ||
566 | error: | 457 | error: |
567 | flush_gart(NULL); | 458 | flush_gart(NULL); |
568 | dma_unmap_sg(dev, sg, nents, dir); | 459 | gart_unmap_sg(dev, sg, nents, dir); |
569 | /* When it was forced try again unforced */ | 460 | /* When it was forced try again unforced */ |
570 | if (force_iommu) | 461 | if (force_iommu) |
571 | return dma_map_sg_nonforce(dev, sg, nents, dir); | 462 | return dma_map_sg_nonforce(dev, sg, nents, dir); |
572 | if (panic_on_overflow) | 463 | if (panic_on_overflow) |
573 | panic("dma_map_sg: overflow on %lu pages\n", pages); | 464 | panic("dma_map_sg: overflow on %lu pages\n", pages); |
574 | iommu_full(dev, pages << PAGE_SHIFT, dir, 0); | 465 | iommu_full(dev, pages << PAGE_SHIFT, dir); |
575 | for (i = 0; i < nents; i++) | 466 | for (i = 0; i < nents; i++) |
576 | sg[i].dma_address = bad_dma_address; | 467 | sg[i].dma_address = bad_dma_address; |
577 | return 0; | 468 | return 0; |
@@ -580,18 +471,13 @@ error: | |||
580 | /* | 471 | /* |
581 | * Free a DMA mapping. | 472 | * Free a DMA mapping. |
582 | */ | 473 | */ |
583 | void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, | 474 | void gart_unmap_single(struct device *dev, dma_addr_t dma_addr, |
584 | size_t size, int direction) | 475 | size_t size, int direction) |
585 | { | 476 | { |
586 | unsigned long iommu_page; | 477 | unsigned long iommu_page; |
587 | int npages; | 478 | int npages; |
588 | int i; | 479 | int i; |
589 | 480 | ||
590 | if (swiotlb) { | ||
591 | swiotlb_unmap_single(dev,dma_addr,size,direction); | ||
592 | return; | ||
593 | } | ||
594 | |||
595 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || | 481 | if (dma_addr < iommu_bus_base + EMERGENCY_PAGES*PAGE_SIZE || |
596 | dma_addr >= iommu_bus_base + iommu_size) | 482 | dma_addr >= iommu_bus_base + iommu_size) |
597 | return; | 483 | return; |
@@ -604,68 +490,7 @@ void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, | |||
604 | free_iommu(iommu_page, npages); | 490 | free_iommu(iommu_page, npages); |
605 | } | 491 | } |
606 | 492 | ||
607 | /* | 493 | static int no_agp; |
608 | * Wrapper for pci_unmap_single working with scatterlists. | ||
609 | */ | ||
610 | void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) | ||
611 | { | ||
612 | int i; | ||
613 | if (swiotlb) { | ||
614 | swiotlb_unmap_sg(dev,sg,nents,dir); | ||
615 | return; | ||
616 | } | ||
617 | for (i = 0; i < nents; i++) { | ||
618 | struct scatterlist *s = &sg[i]; | ||
619 | if (!s->dma_length || !s->length) | ||
620 | break; | ||
621 | dma_unmap_single(dev, s->dma_address, s->dma_length, dir); | ||
622 | } | ||
623 | } | ||
624 | |||
625 | int dma_supported(struct device *dev, u64 mask) | ||
626 | { | ||
627 | /* Copied from i386. Doesn't make much sense, because it will | ||
628 | only work for pci_alloc_coherent. | ||
629 | The caller just has to use GFP_DMA in this case. */ | ||
630 | if (mask < 0x00ffffff) | ||
631 | return 0; | ||
632 | |||
633 | /* Tell the device to use SAC when IOMMU force is on. | ||
634 | This allows the driver to use cheaper accesses in some cases. | ||
635 | |||
636 | Problem with this is that if we overflow the IOMMU area | ||
637 | and return DAC as fallback address the device may not handle it correctly. | ||
638 | |||
639 | As a special case some controllers have a 39bit address mode | ||
640 | that is as efficient as 32bit (aic79xx). Don't force SAC for these. | ||
641 | Assume all masks <= 40 bits are of this type. Normally this doesn't | ||
642 | make any difference, but gives more gentle handling of IOMMU overflow. */ | ||
643 | if (iommu_sac_force && (mask >= 0xffffffffffULL)) { | ||
644 | printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); | ||
645 | return 0; | ||
646 | } | ||
647 | |||
648 | return 1; | ||
649 | } | ||
650 | |||
651 | int dma_get_cache_alignment(void) | ||
652 | { | ||
653 | return boot_cpu_data.x86_clflush_size; | ||
654 | } | ||
655 | |||
656 | EXPORT_SYMBOL(dma_unmap_sg); | ||
657 | EXPORT_SYMBOL(dma_map_sg); | ||
658 | EXPORT_SYMBOL(dma_map_single); | ||
659 | EXPORT_SYMBOL(dma_unmap_single); | ||
660 | EXPORT_SYMBOL(dma_supported); | ||
661 | EXPORT_SYMBOL(no_iommu); | ||
662 | EXPORT_SYMBOL(force_iommu); | ||
663 | EXPORT_SYMBOL(bad_dma_address); | ||
664 | EXPORT_SYMBOL(iommu_bio_merge); | ||
665 | EXPORT_SYMBOL(iommu_sac_force); | ||
666 | EXPORT_SYMBOL(dma_get_cache_alignment); | ||
667 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
668 | EXPORT_SYMBOL(dma_free_coherent); | ||
669 | 494 | ||
670 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) | 495 | static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size) |
671 | { | 496 | { |
@@ -766,12 +591,27 @@ static __init int init_k8_gatt(struct agp_kern_info *info) | |||
766 | nommu: | 591 | nommu: |
767 | /* Should not happen anymore */ | 592 | /* Should not happen anymore */ |
768 | printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" | 593 | printk(KERN_ERR "PCI-DMA: More than 4GB of RAM and no IOMMU\n" |
769 | KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction."); | 594 | KERN_ERR "PCI-DMA: 32bit PCI IO may malfunction.\n"); |
770 | return -1; | 595 | return -1; |
771 | } | 596 | } |
772 | 597 | ||
773 | extern int agp_amd64_init(void); | 598 | extern int agp_amd64_init(void); |
774 | 599 | ||
600 | static struct dma_mapping_ops gart_dma_ops = { | ||
601 | .mapping_error = NULL, | ||
602 | .map_single = gart_map_single, | ||
603 | .map_simple = gart_map_simple, | ||
604 | .unmap_single = gart_unmap_single, | ||
605 | .sync_single_for_cpu = NULL, | ||
606 | .sync_single_for_device = NULL, | ||
607 | .sync_single_range_for_cpu = NULL, | ||
608 | .sync_single_range_for_device = NULL, | ||
609 | .sync_sg_for_cpu = NULL, | ||
610 | .sync_sg_for_device = NULL, | ||
611 | .map_sg = gart_map_sg, | ||
612 | .unmap_sg = gart_unmap_sg, | ||
613 | }; | ||
614 | |||
775 | static int __init pci_iommu_init(void) | 615 | static int __init pci_iommu_init(void) |
776 | { | 616 | { |
777 | struct agp_kern_info info; | 617 | struct agp_kern_info info; |
@@ -793,16 +633,15 @@ static int __init pci_iommu_init(void) | |||
793 | 633 | ||
794 | if (swiotlb) { | 634 | if (swiotlb) { |
795 | no_iommu = 1; | 635 | no_iommu = 1; |
796 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | ||
797 | return -1; | 636 | return -1; |
798 | } | 637 | } |
799 | 638 | ||
800 | if (no_iommu || | 639 | if (no_iommu || |
801 | (!force_iommu && end_pfn < 0xffffffff>>PAGE_SHIFT) || | 640 | (!force_iommu && end_pfn <= MAX_DMA32_PFN) || |
802 | !iommu_aperture || | 641 | !iommu_aperture || |
803 | (no_agp && init_k8_gatt(&info) < 0)) { | 642 | (no_agp && init_k8_gatt(&info) < 0)) { |
804 | printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); | ||
805 | no_iommu = 1; | 643 | no_iommu = 1; |
644 | no_iommu_init(); | ||
806 | return -1; | 645 | return -1; |
807 | } | 646 | } |
808 | 647 | ||
@@ -879,100 +718,50 @@ static int __init pci_iommu_init(void) | |||
879 | 718 | ||
880 | flush_gart(NULL); | 719 | flush_gart(NULL); |
881 | 720 | ||
721 | printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); | ||
722 | dma_ops = &gart_dma_ops; | ||
723 | |||
882 | return 0; | 724 | return 0; |
883 | } | 725 | } |
884 | 726 | ||
885 | /* Must execute after PCI subsystem */ | 727 | /* Must execute after PCI subsystem */ |
886 | fs_initcall(pci_iommu_init); | 728 | fs_initcall(pci_iommu_init); |
887 | 729 | ||
888 | /* iommu=[size][,noagp][,off][,force][,noforce][,leak][,memaper[=order]][,merge] | 730 | void gart_parse_options(char *p) |
889 | [,forcesac][,fullflush][,nomerge][,biomerge] | 731 | { |
890 | size set size of iommu (in bytes) | 732 | int arg; |
891 | noagp don't initialize the AGP driver and use full aperture. | 733 | |
892 | off don't use the IOMMU | ||
893 | leak turn on simple iommu leak tracing (only when CONFIG_IOMMU_LEAK is on) | ||
894 | memaper[=order] allocate an own aperture over RAM with size 32MB^order. | ||
895 | noforce don't force IOMMU usage. Default. | ||
896 | force Force IOMMU. | ||
897 | merge Do lazy merging. This may improve performance on some block devices. | ||
898 | Implies force (experimental) | ||
899 | biomerge Do merging at the BIO layer. This is more efficient than merge, | ||
900 | but should be only done with very big IOMMUs. Implies merge,force. | ||
901 | nomerge Don't do SG merging. | ||
902 | forcesac For SAC mode for masks <40bits (experimental) | ||
903 | fullflush Flush IOMMU on each allocation (default) | ||
904 | nofullflush Don't use IOMMU fullflush | ||
905 | allowed overwrite iommu off workarounds for specific chipsets. | ||
906 | soft Use software bounce buffering (default for Intel machines) | ||
907 | noaperture Don't touch the aperture for AGP. | ||
908 | */ | ||
909 | __init int iommu_setup(char *p) | ||
910 | { | ||
911 | int arg; | ||
912 | |||
913 | while (*p) { | ||
914 | if (!strncmp(p,"noagp",5)) | ||
915 | no_agp = 1; | ||
916 | if (!strncmp(p,"off",3)) | ||
917 | no_iommu = 1; | ||
918 | if (!strncmp(p,"force",5)) { | ||
919 | force_iommu = 1; | ||
920 | iommu_aperture_allowed = 1; | ||
921 | } | ||
922 | if (!strncmp(p,"allowed",7)) | ||
923 | iommu_aperture_allowed = 1; | ||
924 | if (!strncmp(p,"noforce",7)) { | ||
925 | iommu_merge = 0; | ||
926 | force_iommu = 0; | ||
927 | } | ||
928 | if (!strncmp(p, "memaper", 7)) { | ||
929 | fallback_aper_force = 1; | ||
930 | p += 7; | ||
931 | if (*p == '=') { | ||
932 | ++p; | ||
933 | if (get_option(&p, &arg)) | ||
934 | fallback_aper_order = arg; | ||
935 | } | ||
936 | } | ||
937 | if (!strncmp(p, "biomerge",8)) { | ||
938 | iommu_bio_merge = 4096; | ||
939 | iommu_merge = 1; | ||
940 | force_iommu = 1; | ||
941 | } | ||
942 | if (!strncmp(p, "panic",5)) | ||
943 | panic_on_overflow = 1; | ||
944 | if (!strncmp(p, "nopanic",7)) | ||
945 | panic_on_overflow = 0; | ||
946 | if (!strncmp(p, "merge",5)) { | ||
947 | iommu_merge = 1; | ||
948 | force_iommu = 1; | ||
949 | } | ||
950 | if (!strncmp(p, "nomerge",7)) | ||
951 | iommu_merge = 0; | ||
952 | if (!strncmp(p, "forcesac",8)) | ||
953 | iommu_sac_force = 1; | ||
954 | if (!strncmp(p, "fullflush",8)) | ||
955 | iommu_fullflush = 1; | ||
956 | if (!strncmp(p, "nofullflush",11)) | ||
957 | iommu_fullflush = 0; | ||
958 | if (!strncmp(p, "soft",4)) | ||
959 | swiotlb = 1; | ||
960 | if (!strncmp(p, "noaperture",10)) | ||
961 | fix_aperture = 0; | ||
962 | #ifdef CONFIG_IOMMU_LEAK | 734 | #ifdef CONFIG_IOMMU_LEAK |
963 | if (!strncmp(p,"leak",4)) { | 735 | if (!strncmp(p,"leak",4)) { |
964 | leak_trace = 1; | 736 | leak_trace = 1; |
965 | p += 4; | 737 | p += 4; |
966 | if (*p == '=') ++p; | 738 | if (*p == '=') ++p; |
967 | if (isdigit(*p) && get_option(&p, &arg)) | 739 | if (isdigit(*p) && get_option(&p, &arg)) |
968 | iommu_leak_pages = arg; | 740 | iommu_leak_pages = arg; |
969 | } else | 741 | } |
970 | #endif | 742 | #endif |
971 | if (isdigit(*p) && get_option(&p, &arg)) | 743 | if (isdigit(*p) && get_option(&p, &arg)) |
972 | iommu_size = arg; | 744 | iommu_size = arg; |
973 | p += strcspn(p, ","); | 745 | if (!strncmp(p, "fullflush",8)) |
974 | if (*p == ',') | 746 | iommu_fullflush = 1; |
975 | ++p; | 747 | if (!strncmp(p, "nofullflush",11)) |
976 | } | 748 | iommu_fullflush = 0; |
977 | return 1; | 749 | if (!strncmp(p,"noagp",5)) |
978 | } | 750 | no_agp = 1; |
751 | if (!strncmp(p, "noaperture",10)) | ||
752 | fix_aperture = 0; | ||
753 | /* duplicated from pci-dma.c */ | ||
754 | if (!strncmp(p,"force",5)) | ||
755 | iommu_aperture_allowed = 1; | ||
756 | if (!strncmp(p,"allowed",7)) | ||
757 | iommu_aperture_allowed = 1; | ||
758 | if (!strncmp(p, "memaper", 7)) { | ||
759 | fallback_aper_force = 1; | ||
760 | p += 7; | ||
761 | if (*p == '=') { | ||
762 | ++p; | ||
763 | if (get_option(&p, &arg)) | ||
764 | fallback_aper_order = arg; | ||
765 | } | ||
766 | } | ||
767 | } | ||
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index 5a981dca87ff..e41564975195 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c | |||
@@ -6,89 +6,94 @@ | |||
6 | #include <linux/string.h> | 6 | #include <linux/string.h> |
7 | #include <asm/proto.h> | 7 | #include <asm/proto.h> |
8 | #include <asm/processor.h> | 8 | #include <asm/processor.h> |
9 | #include <asm/dma.h> | ||
9 | 10 | ||
10 | int iommu_merge = 0; | 11 | static int |
11 | EXPORT_SYMBOL(iommu_merge); | 12 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
12 | |||
13 | dma_addr_t bad_dma_address; | ||
14 | EXPORT_SYMBOL(bad_dma_address); | ||
15 | |||
16 | int iommu_bio_merge = 0; | ||
17 | EXPORT_SYMBOL(iommu_bio_merge); | ||
18 | |||
19 | int iommu_sac_force = 0; | ||
20 | EXPORT_SYMBOL(iommu_sac_force); | ||
21 | |||
22 | /* | ||
23 | * Dummy IO MMU functions | ||
24 | */ | ||
25 | |||
26 | void *dma_alloc_coherent(struct device *hwdev, size_t size, | ||
27 | dma_addr_t *dma_handle, gfp_t gfp) | ||
28 | { | 13 | { |
29 | void *ret; | 14 | if (hwdev && bus + size > *hwdev->dma_mask) { |
30 | u64 mask; | 15 | printk(KERN_ERR |
31 | int order = get_order(size); | 16 | "nommu_%s: overflow %Lx+%lu of device mask %Lx\n", |
32 | 17 | name, (long long)bus, size, (long long)*hwdev->dma_mask); | |
33 | if (hwdev) | 18 | return 0; |
34 | mask = hwdev->coherent_dma_mask & *hwdev->dma_mask; | ||
35 | else | ||
36 | mask = 0xffffffff; | ||
37 | for (;;) { | ||
38 | ret = (void *)__get_free_pages(gfp, order); | ||
39 | if (ret == NULL) | ||
40 | return NULL; | ||
41 | *dma_handle = virt_to_bus(ret); | ||
42 | if ((*dma_handle & ~mask) == 0) | ||
43 | break; | ||
44 | free_pages((unsigned long)ret, order); | ||
45 | if (gfp & GFP_DMA) | ||
46 | return NULL; | ||
47 | gfp |= GFP_DMA; | ||
48 | } | 19 | } |
20 | return 1; | ||
21 | } | ||
49 | 22 | ||
50 | memset(ret, 0, size); | 23 | static dma_addr_t |
51 | return ret; | 24 | nommu_map_single(struct device *hwdev, void *ptr, size_t size, |
25 | int direction) | ||
26 | { | ||
27 | dma_addr_t bus = virt_to_bus(ptr); | ||
28 | if (!check_addr("map_single", hwdev, bus, size)) | ||
29 | return bad_dma_address; | ||
30 | return bus; | ||
52 | } | 31 | } |
53 | EXPORT_SYMBOL(dma_alloc_coherent); | ||
54 | 32 | ||
55 | void dma_free_coherent(struct device *hwdev, size_t size, | 33 | void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, |
56 | void *vaddr, dma_addr_t dma_handle) | 34 | int direction) |
57 | { | 35 | { |
58 | free_pages((unsigned long)vaddr, get_order(size)); | ||
59 | } | 36 | } |
60 | EXPORT_SYMBOL(dma_free_coherent); | ||
61 | 37 | ||
62 | int dma_supported(struct device *hwdev, u64 mask) | 38 | /* Map a set of buffers described by scatterlist in streaming |
39 | * mode for DMA. This is the scatter-gather version of the | ||
40 | * above pci_map_single interface. Here the scatter gather list | ||
41 | * elements are each tagged with the appropriate dma address | ||
42 | * and length. They are obtained via sg_dma_{address,length}(SG). | ||
43 | * | ||
44 | * NOTE: An implementation may be able to use a smaller number of | ||
45 | * DMA address/length pairs than there are SG table elements. | ||
46 | * (for example via virtual mapping capabilities) | ||
47 | * The routine returns the number of addr/length pairs actually | ||
48 | * used, at most nents. | ||
49 | * | ||
50 | * Device ownership issues as mentioned above for pci_map_single are | ||
51 | * the same here. | ||
52 | */ | ||
53 | int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, | ||
54 | int nents, int direction) | ||
63 | { | 55 | { |
64 | /* | 56 | int i; |
65 | * we fall back to GFP_DMA when the mask isn't all 1s, | ||
66 | * so we can't guarantee allocations that must be | ||
67 | * within a tighter range than GFP_DMA.. | ||
68 | * RED-PEN this won't work for pci_map_single. Caller has to | ||
69 | * use GFP_DMA in the first place. | ||
70 | */ | ||
71 | if (mask < 0x00ffffff) | ||
72 | return 0; | ||
73 | 57 | ||
74 | return 1; | 58 | BUG_ON(direction == DMA_NONE); |
75 | } | 59 | for (i = 0; i < nents; i++ ) { |
76 | EXPORT_SYMBOL(dma_supported); | 60 | struct scatterlist *s = &sg[i]; |
61 | BUG_ON(!s->page); | ||
62 | s->dma_address = virt_to_bus(page_address(s->page) +s->offset); | ||
63 | if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) | ||
64 | return 0; | ||
65 | s->dma_length = s->length; | ||
66 | } | ||
67 | return nents; | ||
68 | } | ||
77 | 69 | ||
78 | int dma_get_cache_alignment(void) | 70 | /* Unmap a set of streaming mode DMA translations. |
71 | * Again, cpu read rules concerning calls here are the same as for | ||
72 | * pci_unmap_single() above. | ||
73 | */ | ||
74 | void nommu_unmap_sg(struct device *dev, struct scatterlist *sg, | ||
75 | int nents, int dir) | ||
79 | { | 76 | { |
80 | return boot_cpu_data.x86_clflush_size; | ||
81 | } | 77 | } |
82 | EXPORT_SYMBOL(dma_get_cache_alignment); | ||
83 | 78 | ||
84 | static int __init check_ram(void) | 79 | struct dma_mapping_ops nommu_dma_ops = { |
85 | { | 80 | .map_single = nommu_map_single, |
86 | if (end_pfn >= 0xffffffff>>PAGE_SHIFT) { | 81 | .unmap_single = nommu_unmap_single, |
87 | printk( | 82 | .map_sg = nommu_map_sg, |
88 | KERN_ERR "WARNING more than 4GB of memory but IOMMU not compiled in.\n" | 83 | .unmap_sg = nommu_unmap_sg, |
89 | KERN_ERR "WARNING 32bit PCI may malfunction.\n"); | 84 | .is_phys = 1, |
90 | } | 85 | }; |
91 | return 0; | ||
92 | } | ||
93 | __initcall(check_ram); | ||
94 | 86 | ||
87 | void __init no_iommu_init(void) | ||
88 | { | ||
89 | if (dma_ops) | ||
90 | return; | ||
91 | printk(KERN_INFO "PCI-DMA: Disabling IOMMU.\n"); | ||
92 | dma_ops = &nommu_dma_ops; | ||
93 | if (end_pfn > MAX_DMA32_PFN) { | ||
94 | printk(KERN_ERR | ||
95 | "WARNING more than 4GB of memory but IOMMU not compiled in.\n" | ||
96 | KERN_ERR "WARNING 32bit PCI may malfunction.\n" | ||
97 | KERN_ERR "You might want to enable CONFIG_GART_IOMMU\n"); | ||
98 | } | ||
99 | } | ||
diff --git a/arch/x86_64/kernel/pci-swiotlb.c b/arch/x86_64/kernel/pci-swiotlb.c new file mode 100644 index 000000000000..3569a25ad7fb --- /dev/null +++ b/arch/x86_64/kernel/pci-swiotlb.c | |||
@@ -0,0 +1,42 @@ | |||
1 | /* Glue code to lib/swiotlb.c */ | ||
2 | |||
3 | #include <linux/pci.h> | ||
4 | #include <linux/cache.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <asm/dma-mapping.h> | ||
7 | #include <asm/proto.h> | ||
8 | #include <asm/swiotlb.h> | ||
9 | #include <asm/dma.h> | ||
10 | |||
11 | int swiotlb __read_mostly; | ||
12 | EXPORT_SYMBOL(swiotlb); | ||
13 | |||
14 | struct dma_mapping_ops swiotlb_dma_ops = { | ||
15 | .mapping_error = swiotlb_dma_mapping_error, | ||
16 | .alloc_coherent = swiotlb_alloc_coherent, | ||
17 | .free_coherent = swiotlb_free_coherent, | ||
18 | .map_single = swiotlb_map_single, | ||
19 | .unmap_single = swiotlb_unmap_single, | ||
20 | .sync_single_for_cpu = swiotlb_sync_single_for_cpu, | ||
21 | .sync_single_for_device = swiotlb_sync_single_for_device, | ||
22 | .sync_single_range_for_cpu = swiotlb_sync_single_range_for_cpu, | ||
23 | .sync_single_range_for_device = swiotlb_sync_single_range_for_device, | ||
24 | .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, | ||
25 | .sync_sg_for_device = swiotlb_sync_sg_for_device, | ||
26 | .map_sg = swiotlb_map_sg, | ||
27 | .unmap_sg = swiotlb_unmap_sg, | ||
28 | .dma_supported = NULL, | ||
29 | }; | ||
30 | |||
31 | void pci_swiotlb_init(void) | ||
32 | { | ||
33 | /* don't initialize swiotlb if iommu=off (no_iommu=1) */ | ||
34 | if (!iommu_aperture && !no_iommu && | ||
35 | (end_pfn > MAX_DMA32_PFN || force_iommu)) | ||
36 | swiotlb = 1; | ||
37 | if (swiotlb) { | ||
38 | swiotlb_init(); | ||
39 | printk(KERN_INFO "PCI-DMA: Using software bounce buffering for IO (SWIOTLB)\n"); | ||
40 | dma_ops = &swiotlb_dma_ops; | ||
41 | } | ||
42 | } | ||
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index 59be85d9a4bc..8ded407e4a94 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/utsname.h> | 36 | #include <linux/utsname.h> |
37 | #include <linux/random.h> | 37 | #include <linux/random.h> |
38 | #include <linux/kprobes.h> | 38 | #include <linux/kprobes.h> |
39 | #include <linux/notifier.h> | ||
39 | 40 | ||
40 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
41 | #include <asm/pgtable.h> | 42 | #include <asm/pgtable.h> |
@@ -50,13 +51,12 @@ | |||
50 | #include <asm/desc.h> | 51 | #include <asm/desc.h> |
51 | #include <asm/proto.h> | 52 | #include <asm/proto.h> |
52 | #include <asm/ia32.h> | 53 | #include <asm/ia32.h> |
54 | #include <asm/idle.h> | ||
53 | 55 | ||
54 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
55 | 57 | ||
56 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; | 58 | unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; |
57 | 59 | ||
58 | static atomic_t hlt_counter = ATOMIC_INIT(0); | ||
59 | |||
60 | unsigned long boot_option_idle_override = 0; | 60 | unsigned long boot_option_idle_override = 0; |
61 | EXPORT_SYMBOL(boot_option_idle_override); | 61 | EXPORT_SYMBOL(boot_option_idle_override); |
62 | 62 | ||
@@ -66,19 +66,49 @@ EXPORT_SYMBOL(boot_option_idle_override); | |||
66 | void (*pm_idle)(void); | 66 | void (*pm_idle)(void); |
67 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); | 67 | static DEFINE_PER_CPU(unsigned int, cpu_idle_state); |
68 | 68 | ||
69 | void disable_hlt(void) | 69 | static struct notifier_block *idle_notifier; |
70 | static DEFINE_SPINLOCK(idle_notifier_lock); | ||
71 | |||
72 | void idle_notifier_register(struct notifier_block *n) | ||
73 | { | ||
74 | unsigned long flags; | ||
75 | spin_lock_irqsave(&idle_notifier_lock, flags); | ||
76 | notifier_chain_register(&idle_notifier, n); | ||
77 | spin_unlock_irqrestore(&idle_notifier_lock, flags); | ||
78 | } | ||
79 | EXPORT_SYMBOL_GPL(idle_notifier_register); | ||
80 | |||
81 | void idle_notifier_unregister(struct notifier_block *n) | ||
70 | { | 82 | { |
71 | atomic_inc(&hlt_counter); | 83 | unsigned long flags; |
84 | spin_lock_irqsave(&idle_notifier_lock, flags); | ||
85 | notifier_chain_unregister(&idle_notifier, n); | ||
86 | spin_unlock_irqrestore(&idle_notifier_lock, flags); | ||
72 | } | 87 | } |
88 | EXPORT_SYMBOL(idle_notifier_unregister); | ||
73 | 89 | ||
74 | EXPORT_SYMBOL(disable_hlt); | 90 | enum idle_state { CPU_IDLE, CPU_NOT_IDLE }; |
91 | static DEFINE_PER_CPU(enum idle_state, idle_state) = CPU_NOT_IDLE; | ||
75 | 92 | ||
76 | void enable_hlt(void) | 93 | void enter_idle(void) |
77 | { | 94 | { |
78 | atomic_dec(&hlt_counter); | 95 | __get_cpu_var(idle_state) = CPU_IDLE; |
96 | notifier_call_chain(&idle_notifier, IDLE_START, NULL); | ||
79 | } | 97 | } |
80 | 98 | ||
81 | EXPORT_SYMBOL(enable_hlt); | 99 | static void __exit_idle(void) |
100 | { | ||
101 | __get_cpu_var(idle_state) = CPU_NOT_IDLE; | ||
102 | notifier_call_chain(&idle_notifier, IDLE_END, NULL); | ||
103 | } | ||
104 | |||
105 | /* Called from interrupts to signify idle end */ | ||
106 | void exit_idle(void) | ||
107 | { | ||
108 | if (current->pid | read_pda(irqcount)) | ||
109 | return; | ||
110 | __exit_idle(); | ||
111 | } | ||
82 | 112 | ||
83 | /* | 113 | /* |
84 | * We use this if we don't have any better | 114 | * We use this if we don't have any better |
@@ -88,21 +118,16 @@ void default_idle(void) | |||
88 | { | 118 | { |
89 | local_irq_enable(); | 119 | local_irq_enable(); |
90 | 120 | ||
91 | if (!atomic_read(&hlt_counter)) { | 121 | clear_thread_flag(TIF_POLLING_NRFLAG); |
92 | clear_thread_flag(TIF_POLLING_NRFLAG); | 122 | smp_mb__after_clear_bit(); |
93 | smp_mb__after_clear_bit(); | 123 | while (!need_resched()) { |
94 | while (!need_resched()) { | 124 | local_irq_disable(); |
95 | local_irq_disable(); | 125 | if (!need_resched()) |
96 | if (!need_resched()) | 126 | safe_halt(); |
97 | safe_halt(); | 127 | else |
98 | else | 128 | local_irq_enable(); |
99 | local_irq_enable(); | ||
100 | } | ||
101 | set_thread_flag(TIF_POLLING_NRFLAG); | ||
102 | } else { | ||
103 | while (!need_resched()) | ||
104 | cpu_relax(); | ||
105 | } | 129 | } |
130 | set_thread_flag(TIF_POLLING_NRFLAG); | ||
106 | } | 131 | } |
107 | 132 | ||
108 | /* | 133 | /* |
@@ -144,7 +169,8 @@ void cpu_idle_wait(void) | |||
144 | do { | 169 | do { |
145 | ssleep(1); | 170 | ssleep(1); |
146 | for_each_online_cpu(cpu) { | 171 | for_each_online_cpu(cpu) { |
147 | if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | 172 | if (cpu_isset(cpu, map) && |
173 | !per_cpu(cpu_idle_state, cpu)) | ||
148 | cpu_clear(cpu, map); | 174 | cpu_clear(cpu, map); |
149 | } | 175 | } |
150 | cpus_and(map, map, cpu_online_map); | 176 | cpus_and(map, map, cpu_online_map); |
@@ -156,7 +182,7 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait); | |||
156 | DECLARE_PER_CPU(int, cpu_state); | 182 | DECLARE_PER_CPU(int, cpu_state); |
157 | 183 | ||
158 | #include <asm/nmi.h> | 184 | #include <asm/nmi.h> |
159 | /* We don't actually take CPU down, just spin without interrupts. */ | 185 | /* We halt the CPU with physical CPU hotplug */ |
160 | static inline void play_dead(void) | 186 | static inline void play_dead(void) |
161 | { | 187 | { |
162 | idle_task_exit(); | 188 | idle_task_exit(); |
@@ -165,8 +191,9 @@ static inline void play_dead(void) | |||
165 | /* Ack it */ | 191 | /* Ack it */ |
166 | __get_cpu_var(cpu_state) = CPU_DEAD; | 192 | __get_cpu_var(cpu_state) = CPU_DEAD; |
167 | 193 | ||
194 | local_irq_disable(); | ||
168 | while (1) | 195 | while (1) |
169 | safe_halt(); | 196 | halt(); |
170 | } | 197 | } |
171 | #else | 198 | #else |
172 | static inline void play_dead(void) | 199 | static inline void play_dead(void) |
@@ -199,7 +226,9 @@ void cpu_idle (void) | |||
199 | idle = default_idle; | 226 | idle = default_idle; |
200 | if (cpu_is_offline(smp_processor_id())) | 227 | if (cpu_is_offline(smp_processor_id())) |
201 | play_dead(); | 228 | play_dead(); |
229 | enter_idle(); | ||
202 | idle(); | 230 | idle(); |
231 | __exit_idle(); | ||
203 | } | 232 | } |
204 | 233 | ||
205 | preempt_enable_no_resched(); | 234 | preempt_enable_no_resched(); |
@@ -275,7 +304,8 @@ void __show_regs(struct pt_regs * regs) | |||
275 | system_utsname.version); | 304 | system_utsname.version); |
276 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); | 305 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); |
277 | printk_address(regs->rip); | 306 | printk_address(regs->rip); |
278 | printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); | 307 | printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, |
308 | regs->eflags); | ||
279 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 309 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", |
280 | regs->rax, regs->rbx, regs->rcx); | 310 | regs->rax, regs->rbx, regs->rcx); |
281 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", | 311 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", |
@@ -349,13 +379,6 @@ void flush_thread(void) | |||
349 | struct task_struct *tsk = current; | 379 | struct task_struct *tsk = current; |
350 | struct thread_info *t = current_thread_info(); | 380 | struct thread_info *t = current_thread_info(); |
351 | 381 | ||
352 | /* | ||
353 | * Remove function-return probe instances associated with this task | ||
354 | * and put them back on the free list. Do not insert an exit probe for | ||
355 | * this function, it will be disabled by kprobe_flush_task if you do. | ||
356 | */ | ||
357 | kprobe_flush_task(tsk); | ||
358 | |||
359 | if (t->flags & _TIF_ABI_PENDING) | 382 | if (t->flags & _TIF_ABI_PENDING) |
360 | t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); | 383 | t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); |
361 | 384 | ||
@@ -427,21 +450,20 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
427 | struct pt_regs * childregs; | 450 | struct pt_regs * childregs; |
428 | struct task_struct *me = current; | 451 | struct task_struct *me = current; |
429 | 452 | ||
430 | childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; | 453 | childregs = ((struct pt_regs *) |
431 | 454 | (THREAD_SIZE + task_stack_page(p))) - 1; | |
432 | *childregs = *regs; | 455 | *childregs = *regs; |
433 | 456 | ||
434 | childregs->rax = 0; | 457 | childregs->rax = 0; |
435 | childregs->rsp = rsp; | 458 | childregs->rsp = rsp; |
436 | if (rsp == ~0UL) { | 459 | if (rsp == ~0UL) |
437 | childregs->rsp = (unsigned long)childregs; | 460 | childregs->rsp = (unsigned long)childregs; |
438 | } | ||
439 | 461 | ||
440 | p->thread.rsp = (unsigned long) childregs; | 462 | p->thread.rsp = (unsigned long) childregs; |
441 | p->thread.rsp0 = (unsigned long) (childregs+1); | 463 | p->thread.rsp0 = (unsigned long) (childregs+1); |
442 | p->thread.userrsp = me->thread.userrsp; | 464 | p->thread.userrsp = me->thread.userrsp; |
443 | 465 | ||
444 | set_ti_thread_flag(p->thread_info, TIF_FORK); | 466 | set_tsk_thread_flag(p, TIF_FORK); |
445 | 467 | ||
446 | p->thread.fs = me->thread.fs; | 468 | p->thread.fs = me->thread.fs; |
447 | p->thread.gs = me->thread.gs; | 469 | p->thread.gs = me->thread.gs; |
@@ -457,7 +479,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
457 | p->thread.io_bitmap_max = 0; | 479 | p->thread.io_bitmap_max = 0; |
458 | return -ENOMEM; | 480 | return -ENOMEM; |
459 | } | 481 | } |
460 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); | 482 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, |
483 | IO_BITMAP_BYTES); | ||
461 | } | 484 | } |
462 | 485 | ||
463 | /* | 486 | /* |
@@ -494,7 +517,8 @@ out: | |||
494 | * - fold all the options into a flag word and test it with a single test. | 517 | * - fold all the options into a flag word and test it with a single test. |
495 | * - could test fs/gs bitsliced | 518 | * - could test fs/gs bitsliced |
496 | */ | 519 | */ |
497 | struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 520 | struct task_struct * |
521 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
498 | { | 522 | { |
499 | struct thread_struct *prev = &prev_p->thread, | 523 | struct thread_struct *prev = &prev_p->thread, |
500 | *next = &next_p->thread; | 524 | *next = &next_p->thread; |
@@ -565,7 +589,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * | |||
565 | prev->userrsp = read_pda(oldrsp); | 589 | prev->userrsp = read_pda(oldrsp); |
566 | write_pda(oldrsp, next->userrsp); | 590 | write_pda(oldrsp, next->userrsp); |
567 | write_pda(pcurrent, next_p); | 591 | write_pda(pcurrent, next_p); |
568 | write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); | 592 | write_pda(kernelstack, |
593 | task_stack_page(next_p) + THREAD_SIZE - PDA_STACKOFFSET); | ||
569 | 594 | ||
570 | /* | 595 | /* |
571 | * Now maybe reload the debug registers | 596 | * Now maybe reload the debug registers |
@@ -646,7 +671,9 @@ asmlinkage long sys_fork(struct pt_regs *regs) | |||
646 | return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); | 671 | return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); |
647 | } | 672 | } |
648 | 673 | ||
649 | asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | 674 | asmlinkage long |
675 | sys_clone(unsigned long clone_flags, unsigned long newsp, | ||
676 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | ||
650 | { | 677 | { |
651 | if (!newsp) | 678 | if (!newsp) |
652 | newsp = regs->rsp; | 679 | newsp = regs->rsp; |
@@ -677,12 +704,13 @@ unsigned long get_wchan(struct task_struct *p) | |||
677 | 704 | ||
678 | if (!p || p == current || p->state==TASK_RUNNING) | 705 | if (!p || p == current || p->state==TASK_RUNNING) |
679 | return 0; | 706 | return 0; |
680 | stack = (unsigned long)p->thread_info; | 707 | stack = (unsigned long)task_stack_page(p); |
681 | if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE) | 708 | if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE) |
682 | return 0; | 709 | return 0; |
683 | fp = *(u64 *)(p->thread.rsp); | 710 | fp = *(u64 *)(p->thread.rsp); |
684 | do { | 711 | do { |
685 | if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) | 712 | if (fp < (unsigned long)stack || |
713 | fp > (unsigned long)stack+THREAD_SIZE) | ||
686 | return 0; | 714 | return 0; |
687 | rip = *(u64 *)(fp+8); | 715 | rip = *(u64 *)(fp+8); |
688 | if (!in_sched_functions(rip)) | 716 | if (!in_sched_functions(rip)) |
@@ -717,8 +745,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
717 | task->thread.gsindex = 0; | 745 | task->thread.gsindex = 0; |
718 | task->thread.gs = addr; | 746 | task->thread.gs = addr; |
719 | if (doit) { | 747 | if (doit) { |
720 | load_gs_index(0); | 748 | load_gs_index(0); |
721 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); | 749 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); |
722 | } | 750 | } |
723 | } | 751 | } |
724 | put_cpu(); | 752 | put_cpu(); |
@@ -735,7 +763,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
735 | set_32bit_tls(task, FS_TLS, addr); | 763 | set_32bit_tls(task, FS_TLS, addr); |
736 | if (doit) { | 764 | if (doit) { |
737 | load_TLS(&task->thread, cpu); | 765 | load_TLS(&task->thread, cpu); |
738 | asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL)); | 766 | asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); |
739 | } | 767 | } |
740 | task->thread.fsindex = FS_TLS_SEL; | 768 | task->thread.fsindex = FS_TLS_SEL; |
741 | task->thread.fs = 0; | 769 | task->thread.fs = 0; |
@@ -745,8 +773,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
745 | if (doit) { | 773 | if (doit) { |
746 | /* set the selector to 0 to not confuse | 774 | /* set the selector to 0 to not confuse |
747 | __switch_to */ | 775 | __switch_to */ |
748 | asm volatile("movl %0,%%fs" :: "r" (0)); | 776 | asm volatile("movl %0,%%fs" :: "r" (0)); |
749 | ret = checking_wrmsrl(MSR_FS_BASE, addr); | 777 | ret = checking_wrmsrl(MSR_FS_BASE, addr); |
750 | } | 778 | } |
751 | } | 779 | } |
752 | put_cpu(); | 780 | put_cpu(); |
@@ -755,9 +783,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
755 | unsigned long base; | 783 | unsigned long base; |
756 | if (task->thread.fsindex == FS_TLS_SEL) | 784 | if (task->thread.fsindex == FS_TLS_SEL) |
757 | base = read_32bit_tls(task, FS_TLS); | 785 | base = read_32bit_tls(task, FS_TLS); |
758 | else if (doit) { | 786 | else if (doit) |
759 | rdmsrl(MSR_FS_BASE, base); | 787 | rdmsrl(MSR_FS_BASE, base); |
760 | } else | 788 | else |
761 | base = task->thread.fs; | 789 | base = task->thread.fs; |
762 | ret = put_user(base, (unsigned long __user *)addr); | 790 | ret = put_user(base, (unsigned long __user *)addr); |
763 | break; | 791 | break; |
@@ -766,9 +794,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
766 | unsigned long base; | 794 | unsigned long base; |
767 | if (task->thread.gsindex == GS_TLS_SEL) | 795 | if (task->thread.gsindex == GS_TLS_SEL) |
768 | base = read_32bit_tls(task, GS_TLS); | 796 | base = read_32bit_tls(task, GS_TLS); |
769 | else if (doit) { | 797 | else if (doit) |
770 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 798 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
771 | } else | 799 | else |
772 | base = task->thread.gs; | 800 | base = task->thread.gs; |
773 | ret = put_user(base, (unsigned long __user *)addr); | 801 | ret = put_user(base, (unsigned long __user *)addr); |
774 | break; | 802 | break; |
@@ -794,8 +822,7 @@ int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs) | |||
794 | { | 822 | { |
795 | struct pt_regs *pp, ptregs; | 823 | struct pt_regs *pp, ptregs; |
796 | 824 | ||
797 | pp = (struct pt_regs *)(tsk->thread.rsp0); | 825 | pp = task_pt_regs(tsk); |
798 | --pp; | ||
799 | 826 | ||
800 | ptregs = *pp; | 827 | ptregs = *pp; |
801 | ptregs.cs &= 0xffff; | 828 | ptregs.cs &= 0xffff; |
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index a87b6cebe80f..53205622351c 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c | |||
@@ -36,9 +36,12 @@ | |||
36 | * in exit.c or in signal.c. | 36 | * in exit.c or in signal.c. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | /* determines which flags the user has access to. */ | 39 | /* |
40 | /* 1 = access 0 = no access */ | 40 | * Determines which flags the user has access to [1 = access, 0 = no access]. |
41 | #define FLAG_MASK 0x44dd5UL | 41 | * Prohibits changing ID(21), VIP(20), VIF(19), VM(17), IOPL(12-13), IF(9). |
42 | * Also masks reserved bits (63-22, 15, 5, 3, 1). | ||
43 | */ | ||
44 | #define FLAG_MASK 0x54dd5UL | ||
42 | 45 | ||
43 | /* set's the trap flag. */ | 46 | /* set's the trap flag. */ |
44 | #define TRAP_FLAG 0x100UL | 47 | #define TRAP_FLAG 0x100UL |
@@ -64,12 +67,6 @@ static inline unsigned long get_stack_long(struct task_struct *task, int offset) | |||
64 | return (*((unsigned long *)stack)); | 67 | return (*((unsigned long *)stack)); |
65 | } | 68 | } |
66 | 69 | ||
67 | static inline struct pt_regs *get_child_regs(struct task_struct *task) | ||
68 | { | ||
69 | struct pt_regs *regs = (void *)task->thread.rsp0; | ||
70 | return regs - 1; | ||
71 | } | ||
72 | |||
73 | /* | 70 | /* |
74 | * this routine will put a word on the processes privileged stack. | 71 | * this routine will put a word on the processes privileged stack. |
75 | * the offset is how far from the base addr as stored in the TSS. | 72 | * the offset is how far from the base addr as stored in the TSS. |
@@ -167,7 +164,7 @@ static int is_at_popf(struct task_struct *child, struct pt_regs *regs) | |||
167 | 164 | ||
168 | static void set_singlestep(struct task_struct *child) | 165 | static void set_singlestep(struct task_struct *child) |
169 | { | 166 | { |
170 | struct pt_regs *regs = get_child_regs(child); | 167 | struct pt_regs *regs = task_pt_regs(child); |
171 | 168 | ||
172 | /* | 169 | /* |
173 | * Always set TIF_SINGLESTEP - this guarantees that | 170 | * Always set TIF_SINGLESTEP - this guarantees that |
@@ -205,7 +202,7 @@ static void clear_singlestep(struct task_struct *child) | |||
205 | 202 | ||
206 | /* But touch TF only if it was set by us.. */ | 203 | /* But touch TF only if it was set by us.. */ |
207 | if (child->ptrace & PT_DTRACE) { | 204 | if (child->ptrace & PT_DTRACE) { |
208 | struct pt_regs *regs = get_child_regs(child); | 205 | struct pt_regs *regs = task_pt_regs(child); |
209 | regs->eflags &= ~TRAP_FLAG; | 206 | regs->eflags &= ~TRAP_FLAG; |
210 | child->ptrace &= ~PT_DTRACE; | 207 | child->ptrace &= ~PT_DTRACE; |
211 | } | 208 | } |
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c index 47f95687905f..57117b8beb2b 100644 --- a/arch/x86_64/kernel/reboot.c +++ b/arch/x86_64/kernel/reboot.c | |||
@@ -6,6 +6,7 @@ | |||
6 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
7 | #include <linux/ctype.h> | 7 | #include <linux/ctype.h> |
8 | #include <linux/string.h> | 8 | #include <linux/string.h> |
9 | #include <linux/pm.h> | ||
9 | #include <asm/io.h> | 10 | #include <asm/io.h> |
10 | #include <asm/kdebug.h> | 11 | #include <asm/kdebug.h> |
11 | #include <asm/delay.h> | 12 | #include <asm/delay.h> |
@@ -77,6 +78,7 @@ static inline void kb_wait(void) | |||
77 | 78 | ||
78 | void machine_shutdown(void) | 79 | void machine_shutdown(void) |
79 | { | 80 | { |
81 | unsigned long flags; | ||
80 | /* Stop the cpus and apics */ | 82 | /* Stop the cpus and apics */ |
81 | #ifdef CONFIG_SMP | 83 | #ifdef CONFIG_SMP |
82 | int reboot_cpu_id; | 84 | int reboot_cpu_id; |
@@ -98,7 +100,7 @@ void machine_shutdown(void) | |||
98 | smp_send_stop(); | 100 | smp_send_stop(); |
99 | #endif | 101 | #endif |
100 | 102 | ||
101 | local_irq_disable(); | 103 | local_irq_save(flags); |
102 | 104 | ||
103 | #ifndef CONFIG_SMP | 105 | #ifndef CONFIG_SMP |
104 | disable_local_APIC(); | 106 | disable_local_APIC(); |
@@ -106,7 +108,7 @@ void machine_shutdown(void) | |||
106 | 108 | ||
107 | disable_IO_APIC(); | 109 | disable_IO_APIC(); |
108 | 110 | ||
109 | local_irq_enable(); | 111 | local_irq_restore(flags); |
110 | } | 112 | } |
111 | 113 | ||
112 | void machine_emergency_restart(void) | 114 | void machine_emergency_restart(void) |
@@ -120,7 +122,7 @@ void machine_emergency_restart(void) | |||
120 | /* Could also try the reset bit in the Hammer NB */ | 122 | /* Could also try the reset bit in the Hammer NB */ |
121 | switch (reboot_type) { | 123 | switch (reboot_type) { |
122 | case BOOT_KBD: | 124 | case BOOT_KBD: |
123 | for (i=0; i<100; i++) { | 125 | for (i=0; i<10; i++) { |
124 | kb_wait(); | 126 | kb_wait(); |
125 | udelay(50); | 127 | udelay(50); |
126 | outb(0xfe,0x64); /* pulse reset low */ | 128 | outb(0xfe,0x64); /* pulse reset low */ |
@@ -153,10 +155,11 @@ void machine_halt(void) | |||
153 | 155 | ||
154 | void machine_power_off(void) | 156 | void machine_power_off(void) |
155 | { | 157 | { |
156 | if (!reboot_force) { | 158 | if (pm_power_off) { |
157 | machine_shutdown(); | 159 | if (!reboot_force) { |
158 | } | 160 | machine_shutdown(); |
159 | if (pm_power_off) | 161 | } |
160 | pm_power_off(); | 162 | pm_power_off(); |
163 | } | ||
161 | } | 164 | } |
162 | 165 | ||
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index da0bc3e7bdf5..28895c03cb11 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <asm/processor.h> | 35 | #include <asm/processor.h> |
36 | #include <linux/console.h> | 36 | #include <linux/console.h> |
37 | #include <linux/seq_file.h> | 37 | #include <linux/seq_file.h> |
38 | #include <linux/crash_dump.h> | ||
38 | #include <linux/root_dev.h> | 39 | #include <linux/root_dev.h> |
39 | #include <linux/pci.h> | 40 | #include <linux/pci.h> |
40 | #include <linux/acpi.h> | 41 | #include <linux/acpi.h> |
@@ -42,6 +43,9 @@ | |||
42 | #include <linux/edd.h> | 43 | #include <linux/edd.h> |
43 | #include <linux/mmzone.h> | 44 | #include <linux/mmzone.h> |
44 | #include <linux/kexec.h> | 45 | #include <linux/kexec.h> |
46 | #include <linux/cpufreq.h> | ||
47 | #include <linux/dmi.h> | ||
48 | #include <linux/dma-mapping.h> | ||
45 | 49 | ||
46 | #include <asm/mtrr.h> | 50 | #include <asm/mtrr.h> |
47 | #include <asm/uaccess.h> | 51 | #include <asm/uaccess.h> |
@@ -60,6 +64,9 @@ | |||
60 | #include <asm/setup.h> | 64 | #include <asm/setup.h> |
61 | #include <asm/mach_apic.h> | 65 | #include <asm/mach_apic.h> |
62 | #include <asm/numa.h> | 66 | #include <asm/numa.h> |
67 | #include <asm/swiotlb.h> | ||
68 | #include <asm/sections.h> | ||
69 | #include <asm/gart-mapping.h> | ||
63 | 70 | ||
64 | /* | 71 | /* |
65 | * Machine setup.. | 72 | * Machine setup.. |
@@ -84,11 +91,6 @@ int bootloader_type; | |||
84 | 91 | ||
85 | unsigned long saved_video_mode; | 92 | unsigned long saved_video_mode; |
86 | 93 | ||
87 | #ifdef CONFIG_SWIOTLB | ||
88 | int swiotlb; | ||
89 | EXPORT_SYMBOL(swiotlb); | ||
90 | #endif | ||
91 | |||
92 | /* | 94 | /* |
93 | * Setup options | 95 | * Setup options |
94 | */ | 96 | */ |
@@ -103,7 +105,6 @@ struct edid_info edid_info; | |||
103 | struct e820map e820; | 105 | struct e820map e820; |
104 | 106 | ||
105 | extern int root_mountflags; | 107 | extern int root_mountflags; |
106 | extern char _text, _etext, _edata, _end; | ||
107 | 108 | ||
108 | char command_line[COMMAND_LINE_SIZE]; | 109 | char command_line[COMMAND_LINE_SIZE]; |
109 | 110 | ||
@@ -274,10 +275,7 @@ static __init void parse_cmdline_early (char ** cmdline_p) | |||
274 | { | 275 | { |
275 | char c = ' ', *to = command_line, *from = COMMAND_LINE; | 276 | char c = ' ', *to = command_line, *from = COMMAND_LINE; |
276 | int len = 0; | 277 | int len = 0; |
277 | 278 | int userdef = 0; | |
278 | /* Save unparsed command line copy for /proc/cmdline */ | ||
279 | memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); | ||
280 | saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; | ||
281 | 279 | ||
282 | for (;;) { | 280 | for (;;) { |
283 | if (c != ' ') | 281 | if (c != ' ') |
@@ -345,10 +343,14 @@ static __init void parse_cmdline_early (char ** cmdline_p) | |||
345 | !memcmp(from, "disableapic", 11)) | 343 | !memcmp(from, "disableapic", 11)) |
346 | disable_apic = 1; | 344 | disable_apic = 1; |
347 | 345 | ||
348 | if (!memcmp(from, "noapic", 6)) | 346 | /* Don't confuse with noapictimer */ |
347 | if (!memcmp(from, "noapic", 6) && | ||
348 | (from[6] == ' ' || from[6] == 0)) | ||
349 | skip_ioapic_setup = 1; | 349 | skip_ioapic_setup = 1; |
350 | 350 | ||
351 | if (!memcmp(from, "apic", 4)) { | 351 | /* Make sure to not confuse with apic= */ |
352 | if (!memcmp(from, "apic", 4) && | ||
353 | (from[4] == ' ' || from[4] == 0)) { | ||
352 | skip_ioapic_setup = 0; | 354 | skip_ioapic_setup = 0; |
353 | ioapic_force = 1; | 355 | ioapic_force = 1; |
354 | } | 356 | } |
@@ -356,16 +358,36 @@ static __init void parse_cmdline_early (char ** cmdline_p) | |||
356 | if (!memcmp(from, "mem=", 4)) | 358 | if (!memcmp(from, "mem=", 4)) |
357 | parse_memopt(from+4, &from); | 359 | parse_memopt(from+4, &from); |
358 | 360 | ||
361 | if (!memcmp(from, "memmap=", 7)) { | ||
362 | /* exactmap option is for used defined memory */ | ||
363 | if (!memcmp(from+7, "exactmap", 8)) { | ||
364 | #ifdef CONFIG_CRASH_DUMP | ||
365 | /* If we are doing a crash dump, we | ||
366 | * still need to know the real mem | ||
367 | * size before original memory map is | ||
368 | * reset. | ||
369 | */ | ||
370 | saved_max_pfn = e820_end_of_ram(); | ||
371 | #endif | ||
372 | from += 8+7; | ||
373 | end_pfn_map = 0; | ||
374 | e820.nr_map = 0; | ||
375 | userdef = 1; | ||
376 | } | ||
377 | else { | ||
378 | parse_memmapopt(from+7, &from); | ||
379 | userdef = 1; | ||
380 | } | ||
381 | } | ||
382 | |||
359 | #ifdef CONFIG_NUMA | 383 | #ifdef CONFIG_NUMA |
360 | if (!memcmp(from, "numa=", 5)) | 384 | if (!memcmp(from, "numa=", 5)) |
361 | numa_setup(from+5); | 385 | numa_setup(from+5); |
362 | #endif | 386 | #endif |
363 | 387 | ||
364 | #ifdef CONFIG_GART_IOMMU | ||
365 | if (!memcmp(from,"iommu=",6)) { | 388 | if (!memcmp(from,"iommu=",6)) { |
366 | iommu_setup(from+6); | 389 | iommu_setup(from+6); |
367 | } | 390 | } |
368 | #endif | ||
369 | 391 | ||
370 | if (!memcmp(from,"oops=panic", 10)) | 392 | if (!memcmp(from,"oops=panic", 10)) |
371 | panic_on_oops = 1; | 393 | panic_on_oops = 1; |
@@ -394,6 +416,14 @@ static __init void parse_cmdline_early (char ** cmdline_p) | |||
394 | } | 416 | } |
395 | #endif | 417 | #endif |
396 | 418 | ||
419 | #ifdef CONFIG_PROC_VMCORE | ||
420 | /* elfcorehdr= specifies the location of elf core header | ||
421 | * stored by the crashed kernel. This option will be passed | ||
422 | * by kexec loader to the capture kernel. | ||
423 | */ | ||
424 | else if(!memcmp(from, "elfcorehdr=", 11)) | ||
425 | elfcorehdr_addr = memparse(from+11, &from); | ||
426 | #endif | ||
397 | next_char: | 427 | next_char: |
398 | c = *(from++); | 428 | c = *(from++); |
399 | if (!c) | 429 | if (!c) |
@@ -402,6 +432,10 @@ static __init void parse_cmdline_early (char ** cmdline_p) | |||
402 | break; | 432 | break; |
403 | *(to++) = c; | 433 | *(to++) = c; |
404 | } | 434 | } |
435 | if (userdef) { | ||
436 | printk(KERN_INFO "user-defined physical RAM map:\n"); | ||
437 | e820_print_map("user"); | ||
438 | } | ||
405 | *to = '\0'; | 439 | *to = '\0'; |
406 | *cmdline_p = command_line; | 440 | *cmdline_p = command_line; |
407 | } | 441 | } |
@@ -412,7 +446,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
412 | { | 446 | { |
413 | unsigned long bootmap_size, bootmap; | 447 | unsigned long bootmap_size, bootmap; |
414 | 448 | ||
415 | memory_present(0, start_pfn, end_pfn); | ||
416 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 449 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
417 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); | 450 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); |
418 | if (bootmap == -1L) | 451 | if (bootmap == -1L) |
@@ -443,6 +476,8 @@ static unsigned char *k8_nops[ASM_NOP_MAX+1] = { | |||
443 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, | 476 | k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7, |
444 | }; | 477 | }; |
445 | 478 | ||
479 | extern char __vsyscall_0; | ||
480 | |||
446 | /* Replace instructions with better alternatives for this CPU type. | 481 | /* Replace instructions with better alternatives for this CPU type. |
447 | 482 | ||
448 | This runs before SMP is initialized to avoid SMP problems with | 483 | This runs before SMP is initialized to avoid SMP problems with |
@@ -454,11 +489,17 @@ void apply_alternatives(void *start, void *end) | |||
454 | struct alt_instr *a; | 489 | struct alt_instr *a; |
455 | int diff, i, k; | 490 | int diff, i, k; |
456 | for (a = start; (void *)a < end; a++) { | 491 | for (a = start; (void *)a < end; a++) { |
492 | u8 *instr; | ||
493 | |||
457 | if (!boot_cpu_has(a->cpuid)) | 494 | if (!boot_cpu_has(a->cpuid)) |
458 | continue; | 495 | continue; |
459 | 496 | ||
460 | BUG_ON(a->replacementlen > a->instrlen); | 497 | BUG_ON(a->replacementlen > a->instrlen); |
461 | __inline_memcpy(a->instr, a->replacement, a->replacementlen); | 498 | instr = a->instr; |
499 | /* vsyscall code is not mapped yet. resolve it manually. */ | ||
500 | if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) | ||
501 | instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0)); | ||
502 | __inline_memcpy(instr, a->replacement, a->replacementlen); | ||
462 | diff = a->instrlen - a->replacementlen; | 503 | diff = a->instrlen - a->replacementlen; |
463 | 504 | ||
464 | /* Pad the rest with nops */ | 505 | /* Pad the rest with nops */ |
@@ -466,7 +507,7 @@ void apply_alternatives(void *start, void *end) | |||
466 | k = diff; | 507 | k = diff; |
467 | if (k > ASM_NOP_MAX) | 508 | if (k > ASM_NOP_MAX) |
468 | k = ASM_NOP_MAX; | 509 | k = ASM_NOP_MAX; |
469 | __inline_memcpy(a->instr + i, k8_nops[k], k); | 510 | __inline_memcpy(instr + i, k8_nops[k], k); |
470 | } | 511 | } |
471 | } | 512 | } |
472 | } | 513 | } |
@@ -571,6 +612,8 @@ void __init setup_arch(char **cmdline_p) | |||
571 | 612 | ||
572 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); | 613 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); |
573 | 614 | ||
615 | zap_low_mappings(0); | ||
616 | |||
574 | #ifdef CONFIG_ACPI | 617 | #ifdef CONFIG_ACPI |
575 | /* | 618 | /* |
576 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). | 619 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). |
@@ -657,8 +700,6 @@ void __init setup_arch(char **cmdline_p) | |||
657 | } | 700 | } |
658 | #endif | 701 | #endif |
659 | 702 | ||
660 | sparse_init(); | ||
661 | |||
662 | paging_init(); | 703 | paging_init(); |
663 | 704 | ||
664 | check_ioapic(); | 705 | check_ioapic(); |
@@ -670,6 +711,8 @@ void __init setup_arch(char **cmdline_p) | |||
670 | acpi_boot_init(); | 711 | acpi_boot_init(); |
671 | #endif | 712 | #endif |
672 | 713 | ||
714 | init_cpu_to_node(); | ||
715 | |||
673 | #ifdef CONFIG_X86_LOCAL_APIC | 716 | #ifdef CONFIG_X86_LOCAL_APIC |
674 | /* | 717 | /* |
675 | * get boot-time SMP configuration: | 718 | * get boot-time SMP configuration: |
@@ -793,7 +836,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
793 | #endif | 836 | #endif |
794 | 837 | ||
795 | bits = 0; | 838 | bits = 0; |
796 | while ((1 << bits) < c->x86_num_cores) | 839 | while ((1 << bits) < c->x86_max_cores) |
797 | bits++; | 840 | bits++; |
798 | 841 | ||
799 | /* Low order bits define the core id (index of core in socket) */ | 842 | /* Low order bits define the core id (index of core in socket) */ |
@@ -823,10 +866,10 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
823 | if (!node_online(node)) | 866 | if (!node_online(node)) |
824 | node = nearby_node(apicid); | 867 | node = nearby_node(apicid); |
825 | } | 868 | } |
826 | cpu_to_node[cpu] = node; | 869 | numa_set_node(cpu, node); |
827 | 870 | ||
828 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", | 871 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", |
829 | cpu, c->x86_num_cores, node, cpu_core_id[cpu]); | 872 | cpu, c->x86_max_cores, node, cpu_core_id[cpu]); |
830 | #endif | 873 | #endif |
831 | #endif | 874 | #endif |
832 | } | 875 | } |
@@ -834,7 +877,6 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
834 | static int __init init_amd(struct cpuinfo_x86 *c) | 877 | static int __init init_amd(struct cpuinfo_x86 *c) |
835 | { | 878 | { |
836 | int r; | 879 | int r; |
837 | int level; | ||
838 | 880 | ||
839 | #ifdef CONFIG_SMP | 881 | #ifdef CONFIG_SMP |
840 | unsigned long value; | 882 | unsigned long value; |
@@ -857,11 +899,6 @@ static int __init init_amd(struct cpuinfo_x86 *c) | |||
857 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ | 899 | 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */ |
858 | clear_bit(0*32+31, &c->x86_capability); | 900 | clear_bit(0*32+31, &c->x86_capability); |
859 | 901 | ||
860 | /* C-stepping K8? */ | ||
861 | level = cpuid_eax(1); | ||
862 | if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) | ||
863 | set_bit(X86_FEATURE_K8_C, &c->x86_capability); | ||
864 | |||
865 | r = get_model_name(c); | 902 | r = get_model_name(c); |
866 | if (!r) { | 903 | if (!r) { |
867 | switch (c->x86) { | 904 | switch (c->x86) { |
@@ -874,10 +911,14 @@ static int __init init_amd(struct cpuinfo_x86 *c) | |||
874 | } | 911 | } |
875 | display_cacheinfo(c); | 912 | display_cacheinfo(c); |
876 | 913 | ||
914 | /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */ | ||
915 | if (c->x86_power & (1<<8)) | ||
916 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | ||
917 | |||
877 | if (c->extended_cpuid_level >= 0x80000008) { | 918 | if (c->extended_cpuid_level >= 0x80000008) { |
878 | c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 919 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; |
879 | if (c->x86_num_cores & (c->x86_num_cores - 1)) | 920 | if (c->x86_max_cores & (c->x86_max_cores - 1)) |
880 | c->x86_num_cores = 1; | 921 | c->x86_max_cores = 1; |
881 | 922 | ||
882 | amd_detect_cmp(c); | 923 | amd_detect_cmp(c); |
883 | } | 924 | } |
@@ -889,54 +930,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
889 | { | 930 | { |
890 | #ifdef CONFIG_SMP | 931 | #ifdef CONFIG_SMP |
891 | u32 eax, ebx, ecx, edx; | 932 | u32 eax, ebx, ecx, edx; |
892 | int index_msb, tmp; | 933 | int index_msb, core_bits; |
893 | int cpu = smp_processor_id(); | 934 | int cpu = smp_processor_id(); |
894 | 935 | ||
936 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
937 | |||
938 | c->apicid = phys_pkg_id(0); | ||
939 | |||
895 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 940 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
896 | return; | 941 | return; |
897 | 942 | ||
898 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
899 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 943 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
900 | 944 | ||
901 | if (smp_num_siblings == 1) { | 945 | if (smp_num_siblings == 1) { |
902 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 946 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); |
903 | } else if (smp_num_siblings > 1) { | 947 | } else if (smp_num_siblings > 1 ) { |
904 | index_msb = 31; | 948 | |
905 | /* | ||
906 | * At this point we only support two siblings per | ||
907 | * processor package. | ||
908 | */ | ||
909 | if (smp_num_siblings > NR_CPUS) { | 949 | if (smp_num_siblings > NR_CPUS) { |
910 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | 950 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); |
911 | smp_num_siblings = 1; | 951 | smp_num_siblings = 1; |
912 | return; | 952 | return; |
913 | } | 953 | } |
914 | tmp = smp_num_siblings; | 954 | |
915 | while ((tmp & 0x80000000 ) == 0) { | 955 | index_msb = get_count_order(smp_num_siblings); |
916 | tmp <<=1 ; | ||
917 | index_msb--; | ||
918 | } | ||
919 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
920 | index_msb++; | ||
921 | phys_proc_id[cpu] = phys_pkg_id(index_msb); | 956 | phys_proc_id[cpu] = phys_pkg_id(index_msb); |
922 | 957 | ||
923 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 958 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
924 | phys_proc_id[cpu]); | 959 | phys_proc_id[cpu]); |
925 | 960 | ||
926 | smp_num_siblings = smp_num_siblings / c->x86_num_cores; | 961 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
927 | 962 | ||
928 | tmp = smp_num_siblings; | 963 | index_msb = get_count_order(smp_num_siblings) ; |
929 | index_msb = 31; | 964 | |
930 | while ((tmp & 0x80000000) == 0) { | 965 | core_bits = get_count_order(c->x86_max_cores); |
931 | tmp <<=1 ; | ||
932 | index_msb--; | ||
933 | } | ||
934 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
935 | index_msb++; | ||
936 | 966 | ||
937 | cpu_core_id[cpu] = phys_pkg_id(index_msb); | 967 | cpu_core_id[cpu] = phys_pkg_id(index_msb) & |
968 | ((1 << core_bits) - 1); | ||
938 | 969 | ||
939 | if (c->x86_num_cores > 1) | 970 | if (c->x86_max_cores > 1) |
940 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 971 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
941 | cpu_core_id[cpu]); | 972 | cpu_core_id[cpu]); |
942 | } | 973 | } |
@@ -975,7 +1006,7 @@ static void srat_detect_node(void) | |||
975 | node = apicid_to_node[hard_smp_processor_id()]; | 1006 | node = apicid_to_node[hard_smp_processor_id()]; |
976 | if (node == NUMA_NO_NODE) | 1007 | if (node == NUMA_NO_NODE) |
977 | node = 0; | 1008 | node = 0; |
978 | cpu_to_node[cpu] = node; | 1009 | numa_set_node(cpu, node); |
979 | 1010 | ||
980 | if (acpi_numa > 0) | 1011 | if (acpi_numa > 0) |
981 | printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); | 1012 | printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); |
@@ -993,13 +1024,20 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
993 | unsigned eax = cpuid_eax(0x80000008); | 1024 | unsigned eax = cpuid_eax(0x80000008); |
994 | c->x86_virt_bits = (eax >> 8) & 0xff; | 1025 | c->x86_virt_bits = (eax >> 8) & 0xff; |
995 | c->x86_phys_bits = eax & 0xff; | 1026 | c->x86_phys_bits = eax & 0xff; |
1027 | /* CPUID workaround for Intel 0F34 CPU */ | ||
1028 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
1029 | c->x86 == 0xF && c->x86_model == 0x3 && | ||
1030 | c->x86_mask == 0x4) | ||
1031 | c->x86_phys_bits = 36; | ||
996 | } | 1032 | } |
997 | 1033 | ||
998 | if (c->x86 == 15) | 1034 | if (c->x86 == 15) |
999 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 1035 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
1000 | if (c->x86 >= 15) | 1036 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
1037 | (c->x86 == 0x6 && c->x86_model >= 0x0e)) | ||
1001 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | 1038 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
1002 | c->x86_num_cores = intel_num_cpu_cores(c); | 1039 | set_bit(X86_FEATURE_SYNC_RDTSC, &c->x86_capability); |
1040 | c->x86_max_cores = intel_num_cpu_cores(c); | ||
1003 | 1041 | ||
1004 | srat_detect_node(); | 1042 | srat_detect_node(); |
1005 | } | 1043 | } |
@@ -1037,7 +1075,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1037 | c->x86_model_id[0] = '\0'; /* Unset */ | 1075 | c->x86_model_id[0] = '\0'; /* Unset */ |
1038 | c->x86_clflush_size = 64; | 1076 | c->x86_clflush_size = 64; |
1039 | c->x86_cache_alignment = c->x86_clflush_size; | 1077 | c->x86_cache_alignment = c->x86_clflush_size; |
1040 | c->x86_num_cores = 1; | 1078 | c->x86_max_cores = 1; |
1041 | c->extended_cpuid_level = 0; | 1079 | c->extended_cpuid_level = 0; |
1042 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 1080 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
1043 | 1081 | ||
@@ -1060,10 +1098,10 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1060 | c->x86 = (tfms >> 8) & 0xf; | 1098 | c->x86 = (tfms >> 8) & 0xf; |
1061 | c->x86_model = (tfms >> 4) & 0xf; | 1099 | c->x86_model = (tfms >> 4) & 0xf; |
1062 | c->x86_mask = tfms & 0xf; | 1100 | c->x86_mask = tfms & 0xf; |
1063 | if (c->x86 == 0xf) { | 1101 | if (c->x86 == 0xf) |
1064 | c->x86 += (tfms >> 20) & 0xff; | 1102 | c->x86 += (tfms >> 20) & 0xff; |
1103 | if (c->x86 >= 0x6) | ||
1065 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 1104 | c->x86_model += ((tfms >> 16) & 0xF) << 4; |
1066 | } | ||
1067 | if (c->x86_capability[0] & (1<<19)) | 1105 | if (c->x86_capability[0] & (1<<19)) |
1068 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | 1106 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
1069 | } else { | 1107 | } else { |
@@ -1197,7 +1235,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1197 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1235 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1198 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, | 1236 | NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL, |
1199 | NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, | 1237 | NULL, NULL, NULL, NULL, "nx", NULL, "mmxext", NULL, |
1200 | NULL, "fxsr_opt", NULL, NULL, NULL, "lm", "3dnowext", "3dnow", | 1238 | NULL, "fxsr_opt", "rdtscp", NULL, NULL, "lm", "3dnowext", "3dnow", |
1201 | 1239 | ||
1202 | /* Transmeta-defined */ | 1240 | /* Transmeta-defined */ |
1203 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, | 1241 | "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL, |
@@ -1225,7 +1263,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1225 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1263 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1226 | 1264 | ||
1227 | /* AMD-defined (#2) */ | 1265 | /* AMD-defined (#2) */ |
1228 | "lahf_lm", "cmp_legacy", NULL, NULL, NULL, NULL, NULL, NULL, | 1266 | "lahf_lm", "cmp_legacy", "svm", NULL, "cr8_legacy", NULL, NULL, NULL, |
1229 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1267 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1230 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1268 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1231 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1269 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
@@ -1236,7 +1274,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1236 | "vid", /* voltage id control */ | 1274 | "vid", /* voltage id control */ |
1237 | "ttp", /* thermal trip */ | 1275 | "ttp", /* thermal trip */ |
1238 | "tm", | 1276 | "tm", |
1239 | "stc" | 1277 | "stc", |
1278 | NULL, | ||
1279 | /* nothing */ /* constant_tsc - moved to flags */ | ||
1240 | }; | 1280 | }; |
1241 | 1281 | ||
1242 | 1282 | ||
@@ -1262,8 +1302,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1262 | seq_printf(m, "stepping\t: unknown\n"); | 1302 | seq_printf(m, "stepping\t: unknown\n"); |
1263 | 1303 | ||
1264 | if (cpu_has(c,X86_FEATURE_TSC)) { | 1304 | if (cpu_has(c,X86_FEATURE_TSC)) { |
1305 | unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data)); | ||
1306 | if (!freq) | ||
1307 | freq = cpu_khz; | ||
1265 | seq_printf(m, "cpu MHz\t\t: %u.%03u\n", | 1308 | seq_printf(m, "cpu MHz\t\t: %u.%03u\n", |
1266 | cpu_khz / 1000, (cpu_khz % 1000)); | 1309 | freq / 1000, (freq % 1000)); |
1267 | } | 1310 | } |
1268 | 1311 | ||
1269 | /* Cache size */ | 1312 | /* Cache size */ |
@@ -1271,13 +1314,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1271 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | 1314 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); |
1272 | 1315 | ||
1273 | #ifdef CONFIG_SMP | 1316 | #ifdef CONFIG_SMP |
1274 | if (smp_num_siblings * c->x86_num_cores > 1) { | 1317 | if (smp_num_siblings * c->x86_max_cores > 1) { |
1275 | int cpu = c - cpu_data; | 1318 | int cpu = c - cpu_data; |
1276 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); | 1319 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); |
1277 | seq_printf(m, "siblings\t: %d\n", | 1320 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); |
1278 | c->x86_num_cores * smp_num_siblings); | ||
1279 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); | 1321 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); |
1280 | seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); | 1322 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
1281 | } | 1323 | } |
1282 | #endif | 1324 | #endif |
1283 | 1325 | ||
@@ -1314,8 +1356,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1314 | unsigned i; | 1356 | unsigned i; |
1315 | for (i = 0; i < 32; i++) | 1357 | for (i = 0; i < 32; i++) |
1316 | if (c->x86_power & (1 << i)) { | 1358 | if (c->x86_power & (1 << i)) { |
1317 | if (i < ARRAY_SIZE(x86_power_flags)) | 1359 | if (i < ARRAY_SIZE(x86_power_flags) && |
1318 | seq_printf(m, " %s", x86_power_flags[i]); | 1360 | x86_power_flags[i]) |
1361 | seq_printf(m, "%s%s", | ||
1362 | x86_power_flags[i][0]?" ":"", | ||
1363 | x86_power_flags[i]); | ||
1319 | else | 1364 | else |
1320 | seq_printf(m, " [%d]", i); | 1365 | seq_printf(m, " [%d]", i); |
1321 | } | 1366 | } |
@@ -1347,3 +1392,11 @@ struct seq_operations cpuinfo_op = { | |||
1347 | .stop = c_stop, | 1392 | .stop = c_stop, |
1348 | .show = show_cpuinfo, | 1393 | .show = show_cpuinfo, |
1349 | }; | 1394 | }; |
1395 | |||
1396 | static int __init run_dmi_scan(void) | ||
1397 | { | ||
1398 | dmi_scan_machine(); | ||
1399 | return 0; | ||
1400 | } | ||
1401 | core_initcall(run_dmi_scan); | ||
1402 | |||
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index 79190891fbc5..70f1bb808a20 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c | |||
@@ -30,14 +30,15 @@ char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; | |||
30 | 30 | ||
31 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; | 31 | cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; |
32 | 32 | ||
33 | struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; | 33 | struct x8664_pda *_cpu_pda[NR_CPUS] __read_mostly; |
34 | struct x8664_pda boot_cpu_pda[NR_CPUS] __cacheline_aligned; | ||
34 | 35 | ||
35 | struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; | 36 | struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; |
36 | 37 | ||
37 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); | 38 | char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); |
38 | 39 | ||
39 | unsigned long __supported_pte_mask __read_mostly = ~0UL; | 40 | unsigned long __supported_pte_mask __read_mostly = ~0UL; |
40 | static int do_not_nx __initdata = 0; | 41 | static int do_not_nx __cpuinitdata = 0; |
41 | 42 | ||
42 | /* noexec=on|off | 43 | /* noexec=on|off |
43 | Control non executable mappings for 64bit processes. | 44 | Control non executable mappings for 64bit processes. |
@@ -110,18 +111,18 @@ void __init setup_per_cpu_areas(void) | |||
110 | } | 111 | } |
111 | if (!ptr) | 112 | if (!ptr) |
112 | panic("Cannot allocate cpu data for CPU %d\n", i); | 113 | panic("Cannot allocate cpu data for CPU %d\n", i); |
113 | cpu_pda[i].data_offset = ptr - __per_cpu_start; | 114 | cpu_pda(i)->data_offset = ptr - __per_cpu_start; |
114 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); | 115 | memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); |
115 | } | 116 | } |
116 | } | 117 | } |
117 | 118 | ||
118 | void pda_init(int cpu) | 119 | void pda_init(int cpu) |
119 | { | 120 | { |
120 | struct x8664_pda *pda = &cpu_pda[cpu]; | 121 | struct x8664_pda *pda = cpu_pda(cpu); |
121 | 122 | ||
122 | /* Setup up data that may be needed in __get_free_pages early */ | 123 | /* Setup up data that may be needed in __get_free_pages early */ |
123 | asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); | 124 | asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0)); |
124 | wrmsrl(MSR_GS_BASE, cpu_pda + cpu); | 125 | wrmsrl(MSR_GS_BASE, pda); |
125 | 126 | ||
126 | pda->cpunumber = cpu; | 127 | pda->cpunumber = cpu; |
127 | pda->irqcount = -1; | 128 | pda->irqcount = -1; |
@@ -141,12 +142,11 @@ void pda_init(int cpu) | |||
141 | panic("cannot allocate irqstack for cpu %d", cpu); | 142 | panic("cannot allocate irqstack for cpu %d", cpu); |
142 | } | 143 | } |
143 | 144 | ||
144 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
145 | 145 | ||
146 | pda->irqstackptr += IRQSTACKSIZE-64; | 146 | pda->irqstackptr += IRQSTACKSIZE-64; |
147 | } | 147 | } |
148 | 148 | ||
149 | char boot_exception_stacks[N_EXCEPTION_STACKS * EXCEPTION_STKSZ] | 149 | char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ] |
150 | __attribute__((section(".bss.page_aligned"))); | 150 | __attribute__((section(".bss.page_aligned"))); |
151 | 151 | ||
152 | /* May not be marked __init: used by software suspend */ | 152 | /* May not be marked __init: used by software suspend */ |
@@ -197,6 +197,7 @@ void __cpuinit cpu_init (void) | |||
197 | /* CPU 0 is initialised in head64.c */ | 197 | /* CPU 0 is initialised in head64.c */ |
198 | if (cpu != 0) { | 198 | if (cpu != 0) { |
199 | pda_init(cpu); | 199 | pda_init(cpu); |
200 | zap_low_mappings(cpu); | ||
200 | } else | 201 | } else |
201 | estacks = boot_exception_stacks; | 202 | estacks = boot_exception_stacks; |
202 | 203 | ||
@@ -213,23 +214,14 @@ void __cpuinit cpu_init (void) | |||
213 | * Initialize the per-CPU GDT with the boot GDT, | 214 | * Initialize the per-CPU GDT with the boot GDT, |
214 | * and set up the GDT descriptor: | 215 | * and set up the GDT descriptor: |
215 | */ | 216 | */ |
216 | if (cpu) { | 217 | if (cpu) |
217 | memcpy(cpu_gdt_table[cpu], cpu_gdt_table[0], GDT_SIZE); | 218 | memcpy(cpu_gdt(cpu), cpu_gdt_table, GDT_SIZE); |
218 | } | ||
219 | 219 | ||
220 | cpu_gdt_descr[cpu].size = GDT_SIZE; | 220 | cpu_gdt_descr[cpu].size = GDT_SIZE; |
221 | cpu_gdt_descr[cpu].address = (unsigned long)cpu_gdt_table[cpu]; | ||
222 | asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); | 221 | asm volatile("lgdt %0" :: "m" (cpu_gdt_descr[cpu])); |
223 | asm volatile("lidt %0" :: "m" (idt_descr)); | 222 | asm volatile("lidt %0" :: "m" (idt_descr)); |
224 | 223 | ||
225 | memcpy(me->thread.tls_array, cpu_gdt_table[cpu], GDT_ENTRY_TLS_ENTRIES * 8); | 224 | memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); |
226 | |||
227 | /* | ||
228 | * Delete NT | ||
229 | */ | ||
230 | |||
231 | asm volatile("pushfq ; popq %%rax ; btr $14,%%rax ; pushq %%rax ; popfq" ::: "eax"); | ||
232 | |||
233 | syscall_init(); | 225 | syscall_init(); |
234 | 226 | ||
235 | wrmsrl(MSR_FS_BASE, 0); | 227 | wrmsrl(MSR_FS_BASE, 0); |
@@ -243,13 +235,27 @@ void __cpuinit cpu_init (void) | |||
243 | */ | 235 | */ |
244 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { | 236 | for (v = 0; v < N_EXCEPTION_STACKS; v++) { |
245 | if (cpu) { | 237 | if (cpu) { |
246 | estacks = (char *)__get_free_pages(GFP_ATOMIC, | 238 | static const unsigned int order[N_EXCEPTION_STACKS] = { |
247 | EXCEPTION_STACK_ORDER); | 239 | [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, |
240 | [DEBUG_STACK - 1] = DEBUG_STACK_ORDER | ||
241 | }; | ||
242 | |||
243 | estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]); | ||
248 | if (!estacks) | 244 | if (!estacks) |
249 | panic("Cannot allocate exception stack %ld %d\n", | 245 | panic("Cannot allocate exception stack %ld %d\n", |
250 | v, cpu); | 246 | v, cpu); |
251 | } | 247 | } |
252 | estacks += EXCEPTION_STKSZ; | 248 | switch (v + 1) { |
249 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
250 | case DEBUG_STACK: | ||
251 | cpu_pda[cpu].debugstack = (unsigned long)estacks; | ||
252 | estacks += DEBUG_STKSZ; | ||
253 | break; | ||
254 | #endif | ||
255 | default: | ||
256 | estacks += EXCEPTION_STKSZ; | ||
257 | break; | ||
258 | } | ||
253 | t->ist[v] = (unsigned long)estacks; | 259 | t->ist[v] = (unsigned long)estacks; |
254 | } | 260 | } |
255 | 261 | ||
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index d642fbf3da29..5876df116c92 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c | |||
@@ -110,6 +110,15 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned | |||
110 | COPY(r14); | 110 | COPY(r14); |
111 | COPY(r15); | 111 | COPY(r15); |
112 | 112 | ||
113 | /* Kernel saves and restores only the CS segment register on signals, | ||
114 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
115 | * App's signal handler can save/restore other segments if needed. */ | ||
116 | { | ||
117 | unsigned cs; | ||
118 | err |= __get_user(cs, &sc->cs); | ||
119 | regs->cs = cs | 3; /* Force into user mode */ | ||
120 | } | ||
121 | |||
113 | { | 122 | { |
114 | unsigned int tmpflags; | 123 | unsigned int tmpflags; |
115 | err |= __get_user(tmpflags, &sc->eflags); | 124 | err |= __get_user(tmpflags, &sc->eflags); |
@@ -187,6 +196,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo | |||
187 | { | 196 | { |
188 | int err = 0; | 197 | int err = 0; |
189 | 198 | ||
199 | err |= __put_user(regs->cs, &sc->cs); | ||
190 | err |= __put_user(0, &sc->gs); | 200 | err |= __put_user(0, &sc->gs); |
191 | err |= __put_user(0, &sc->fs); | 201 | err |= __put_user(0, &sc->fs); |
192 | 202 | ||
@@ -318,7 +328,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
318 | 328 | ||
319 | regs->rsp = (unsigned long)frame; | 329 | regs->rsp = (unsigned long)frame; |
320 | 330 | ||
331 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
332 | even if the handler happens to be interrupting 32-bit code. */ | ||
333 | regs->cs = __USER_CS; | ||
334 | |||
335 | /* This, by contrast, has nothing to do with segment registers - | ||
336 | see include/asm-x86_64/uaccess.h for details. */ | ||
321 | set_fs(USER_DS); | 337 | set_fs(USER_DS); |
338 | |||
322 | regs->eflags &= ~TF_MASK; | 339 | regs->eflags &= ~TF_MASK; |
323 | if (test_thread_flag(TIF_SINGLESTEP)) | 340 | if (test_thread_flag(TIF_SINGLESTEP)) |
324 | ptrace_notify(SIGTRAP); | 341 | ptrace_notify(SIGTRAP); |
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 9db9dda161b4..19ef012b1f17 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -27,8 +27,7 @@ | |||
27 | #include <asm/mmu_context.h> | 27 | #include <asm/mmu_context.h> |
28 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
29 | #include <asm/apicdef.h> | 29 | #include <asm/apicdef.h> |
30 | 30 | #include <asm/idle.h> | |
31 | #define __cpuinit __init | ||
32 | 31 | ||
33 | /* | 32 | /* |
34 | * Smarter SMP flushing macros. | 33 | * Smarter SMP flushing macros. |
@@ -282,11 +281,6 @@ void flush_tlb_all(void) | |||
282 | on_each_cpu(do_flush_tlb_all, NULL, 1, 1); | 281 | on_each_cpu(do_flush_tlb_all, NULL, 1, 1); |
283 | } | 282 | } |
284 | 283 | ||
285 | void smp_kdb_stop(void) | ||
286 | { | ||
287 | send_IPI_allbutself(KDB_VECTOR); | ||
288 | } | ||
289 | |||
290 | /* | 284 | /* |
291 | * this function sends a 'reschedule' IPI to another CPU. | 285 | * this function sends a 'reschedule' IPI to another CPU. |
292 | * it goes straight through and wastes no time serializing | 286 | * it goes straight through and wastes no time serializing |
@@ -452,13 +446,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
452 | 446 | ||
453 | void smp_stop_cpu(void) | 447 | void smp_stop_cpu(void) |
454 | { | 448 | { |
449 | unsigned long flags; | ||
455 | /* | 450 | /* |
456 | * Remove this CPU: | 451 | * Remove this CPU: |
457 | */ | 452 | */ |
458 | cpu_clear(smp_processor_id(), cpu_online_map); | 453 | cpu_clear(smp_processor_id(), cpu_online_map); |
459 | local_irq_disable(); | 454 | local_irq_save(flags); |
460 | disable_local_APIC(); | 455 | disable_local_APIC(); |
461 | local_irq_enable(); | 456 | local_irq_restore(flags); |
462 | } | 457 | } |
463 | 458 | ||
464 | static void smp_really_stop_cpu(void *dummy) | 459 | static void smp_really_stop_cpu(void *dummy) |
@@ -513,6 +508,7 @@ asmlinkage void smp_call_function_interrupt(void) | |||
513 | /* | 508 | /* |
514 | * At this point the info structure may be out of scope unless wait==1 | 509 | * At this point the info structure may be out of scope unless wait==1 |
515 | */ | 510 | */ |
511 | exit_idle(); | ||
516 | irq_enter(); | 512 | irq_enter(); |
517 | (*func)(info); | 513 | (*func)(info); |
518 | irq_exit(); | 514 | irq_exit(); |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index c4e59bbdc187..a28756ef7cef 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -64,6 +64,7 @@ | |||
64 | int smp_num_siblings = 1; | 64 | int smp_num_siblings = 1; |
65 | /* Package ID of each logical CPU */ | 65 | /* Package ID of each logical CPU */ |
66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
67 | /* core ID of each logical CPU */ | ||
67 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 68 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
68 | 69 | ||
69 | /* Bitmask of currently online CPUs */ | 70 | /* Bitmask of currently online CPUs */ |
@@ -87,7 +88,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | |||
87 | /* Set when the idlers are all forked */ | 88 | /* Set when the idlers are all forked */ |
88 | int smp_threads_ready; | 89 | int smp_threads_ready; |
89 | 90 | ||
91 | /* representing HT siblings of each logical CPU */ | ||
90 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 92 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
93 | |||
94 | /* representing HT and core siblings of each logical CPU */ | ||
91 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | 95 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; |
92 | EXPORT_SYMBOL(cpu_core_map); | 96 | EXPORT_SYMBOL(cpu_core_map); |
93 | 97 | ||
@@ -331,7 +335,13 @@ static __cpuinit void sync_tsc(unsigned int master) | |||
331 | 335 | ||
332 | static void __cpuinit tsc_sync_wait(void) | 336 | static void __cpuinit tsc_sync_wait(void) |
333 | { | 337 | { |
334 | if (notscsync || !cpu_has_tsc) | 338 | /* |
339 | * When the CPU has synchronized TSCs assume the BIOS | ||
340 | * or the hardware already synced. Otherwise we could | ||
341 | * mess up a possible perfect synchronization with a | ||
342 | * not-quite-perfect algorithm. | ||
343 | */ | ||
344 | if (notscsync || !cpu_has_tsc || !unsynchronized_tsc()) | ||
335 | return; | 345 | return; |
336 | sync_tsc(0); | 346 | sync_tsc(0); |
337 | } | 347 | } |
@@ -434,30 +444,59 @@ void __cpuinit smp_callin(void) | |||
434 | cpu_set(cpuid, cpu_callin_map); | 444 | cpu_set(cpuid, cpu_callin_map); |
435 | } | 445 | } |
436 | 446 | ||
447 | /* representing cpus for which sibling maps can be computed */ | ||
448 | static cpumask_t cpu_sibling_setup_map; | ||
449 | |||
437 | static inline void set_cpu_sibling_map(int cpu) | 450 | static inline void set_cpu_sibling_map(int cpu) |
438 | { | 451 | { |
439 | int i; | 452 | int i; |
453 | struct cpuinfo_x86 *c = cpu_data; | ||
454 | |||
455 | cpu_set(cpu, cpu_sibling_setup_map); | ||
440 | 456 | ||
441 | if (smp_num_siblings > 1) { | 457 | if (smp_num_siblings > 1) { |
442 | for_each_cpu(i) { | 458 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
443 | if (cpu_core_id[cpu] == cpu_core_id[i]) { | 459 | if (phys_proc_id[cpu] == phys_proc_id[i] && |
460 | cpu_core_id[cpu] == cpu_core_id[i]) { | ||
444 | cpu_set(i, cpu_sibling_map[cpu]); | 461 | cpu_set(i, cpu_sibling_map[cpu]); |
445 | cpu_set(cpu, cpu_sibling_map[i]); | 462 | cpu_set(cpu, cpu_sibling_map[i]); |
463 | cpu_set(i, cpu_core_map[cpu]); | ||
464 | cpu_set(cpu, cpu_core_map[i]); | ||
446 | } | 465 | } |
447 | } | 466 | } |
448 | } else { | 467 | } else { |
449 | cpu_set(cpu, cpu_sibling_map[cpu]); | 468 | cpu_set(cpu, cpu_sibling_map[cpu]); |
450 | } | 469 | } |
451 | 470 | ||
452 | if (current_cpu_data.x86_num_cores > 1) { | 471 | if (current_cpu_data.x86_max_cores == 1) { |
453 | for_each_cpu(i) { | ||
454 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
455 | cpu_set(i, cpu_core_map[cpu]); | ||
456 | cpu_set(cpu, cpu_core_map[i]); | ||
457 | } | ||
458 | } | ||
459 | } else { | ||
460 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 472 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
473 | c[cpu].booted_cores = 1; | ||
474 | return; | ||
475 | } | ||
476 | |||
477 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
478 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
479 | cpu_set(i, cpu_core_map[cpu]); | ||
480 | cpu_set(cpu, cpu_core_map[i]); | ||
481 | /* | ||
482 | * Does this new cpu bringup a new core? | ||
483 | */ | ||
484 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { | ||
485 | /* | ||
486 | * for each core in package, increment | ||
487 | * the booted_cores for this new cpu | ||
488 | */ | ||
489 | if (first_cpu(cpu_sibling_map[i]) == i) | ||
490 | c[cpu].booted_cores++; | ||
491 | /* | ||
492 | * increment the core count for all | ||
493 | * the other cpus in this package | ||
494 | */ | ||
495 | if (i != cpu) | ||
496 | c[i].booted_cores++; | ||
497 | } else if (i != cpu && !c[cpu].booted_cores) | ||
498 | c[cpu].booted_cores = c[i].booted_cores; | ||
499 | } | ||
461 | } | 500 | } |
462 | } | 501 | } |
463 | 502 | ||
@@ -613,6 +652,7 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta | |||
613 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | 652 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; |
614 | } while (send_status && (timeout++ < 1000)); | 653 | } while (send_status && (timeout++ < 1000)); |
615 | 654 | ||
655 | mb(); | ||
616 | atomic_set(&init_deasserted, 1); | 656 | atomic_set(&init_deasserted, 1); |
617 | 657 | ||
618 | num_starts = 2; | 658 | num_starts = 2; |
@@ -626,7 +666,6 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta | |||
626 | 666 | ||
627 | for (j = 1; j <= num_starts; j++) { | 667 | for (j = 1; j <= num_starts; j++) { |
628 | Dprintk("Sending STARTUP #%d.\n",j); | 668 | Dprintk("Sending STARTUP #%d.\n",j); |
629 | apic_read_around(APIC_SPIV); | ||
630 | apic_write(APIC_ESR, 0); | 669 | apic_write(APIC_ESR, 0); |
631 | apic_read(APIC_ESR); | 670 | apic_read(APIC_ESR); |
632 | Dprintk("After apic_write.\n"); | 671 | Dprintk("After apic_write.\n"); |
@@ -665,7 +704,6 @@ static int __cpuinit wakeup_secondary_via_INIT(int phys_apicid, unsigned int sta | |||
665 | * Due to the Pentium erratum 3AP. | 704 | * Due to the Pentium erratum 3AP. |
666 | */ | 705 | */ |
667 | if (maxlvt > 3) { | 706 | if (maxlvt > 3) { |
668 | apic_read_around(APIC_SPIV); | ||
669 | apic_write(APIC_ESR, 0); | 707 | apic_write(APIC_ESR, 0); |
670 | } | 708 | } |
671 | accept_status = (apic_read(APIC_ESR) & 0xEF); | 709 | accept_status = (apic_read(APIC_ESR) & 0xEF); |
@@ -710,11 +748,35 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
710 | }; | 748 | }; |
711 | DECLARE_WORK(work, do_fork_idle, &c_idle); | 749 | DECLARE_WORK(work, do_fork_idle, &c_idle); |
712 | 750 | ||
751 | /* allocate memory for gdts of secondary cpus. Hotplug is considered */ | ||
752 | if (!cpu_gdt_descr[cpu].address && | ||
753 | !(cpu_gdt_descr[cpu].address = get_zeroed_page(GFP_KERNEL))) { | ||
754 | printk(KERN_ERR "Failed to allocate GDT for CPU %d\n", cpu); | ||
755 | return -1; | ||
756 | } | ||
757 | |||
758 | /* Allocate node local memory for AP pdas */ | ||
759 | if (cpu_pda(cpu) == &boot_cpu_pda[cpu]) { | ||
760 | struct x8664_pda *newpda, *pda; | ||
761 | int node = cpu_to_node(cpu); | ||
762 | pda = cpu_pda(cpu); | ||
763 | newpda = kmalloc_node(sizeof (struct x8664_pda), GFP_ATOMIC, | ||
764 | node); | ||
765 | if (newpda) { | ||
766 | memcpy(newpda, pda, sizeof (struct x8664_pda)); | ||
767 | cpu_pda(cpu) = newpda; | ||
768 | } else | ||
769 | printk(KERN_ERR | ||
770 | "Could not allocate node local PDA for CPU %d on node %d\n", | ||
771 | cpu, node); | ||
772 | } | ||
773 | |||
774 | |||
713 | c_idle.idle = get_idle_for_cpu(cpu); | 775 | c_idle.idle = get_idle_for_cpu(cpu); |
714 | 776 | ||
715 | if (c_idle.idle) { | 777 | if (c_idle.idle) { |
716 | c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) | 778 | c_idle.idle->thread.rsp = (unsigned long) (((struct pt_regs *) |
717 | (THREAD_SIZE + (unsigned long) c_idle.idle->thread_info)) - 1); | 779 | (THREAD_SIZE + task_stack_page(c_idle.idle))) - 1); |
718 | init_idle(c_idle.idle, cpu); | 780 | init_idle(c_idle.idle, cpu); |
719 | goto do_rest; | 781 | goto do_rest; |
720 | } | 782 | } |
@@ -745,14 +807,14 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid) | |||
745 | 807 | ||
746 | do_rest: | 808 | do_rest: |
747 | 809 | ||
748 | cpu_pda[cpu].pcurrent = c_idle.idle; | 810 | cpu_pda(cpu)->pcurrent = c_idle.idle; |
749 | 811 | ||
750 | start_rip = setup_trampoline(); | 812 | start_rip = setup_trampoline(); |
751 | 813 | ||
752 | init_rsp = c_idle.idle->thread.rsp; | 814 | init_rsp = c_idle.idle->thread.rsp; |
753 | per_cpu(init_tss,cpu).rsp0 = init_rsp; | 815 | per_cpu(init_tss,cpu).rsp0 = init_rsp; |
754 | initial_code = start_secondary; | 816 | initial_code = start_secondary; |
755 | clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK); | 817 | clear_tsk_thread_flag(c_idle.idle, TIF_FORK); |
756 | 818 | ||
757 | printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, | 819 | printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, |
758 | cpus_weight(cpu_present_map), | 820 | cpus_weight(cpu_present_map), |
@@ -778,11 +840,8 @@ do_rest: | |||
778 | /* | 840 | /* |
779 | * Be paranoid about clearing APIC errors. | 841 | * Be paranoid about clearing APIC errors. |
780 | */ | 842 | */ |
781 | if (APIC_INTEGRATED(apic_version[apicid])) { | 843 | apic_write(APIC_ESR, 0); |
782 | apic_read_around(APIC_SPIV); | 844 | apic_read(APIC_ESR); |
783 | apic_write(APIC_ESR, 0); | ||
784 | apic_read(APIC_ESR); | ||
785 | } | ||
786 | 845 | ||
787 | /* | 846 | /* |
788 | * Status is now clean | 847 | * Status is now clean |
@@ -879,6 +938,9 @@ static __init void disable_smp(void) | |||
879 | } | 938 | } |
880 | 939 | ||
881 | #ifdef CONFIG_HOTPLUG_CPU | 940 | #ifdef CONFIG_HOTPLUG_CPU |
941 | |||
942 | int additional_cpus __initdata = -1; | ||
943 | |||
882 | /* | 944 | /* |
883 | * cpu_possible_map should be static, it cannot change as cpu's | 945 | * cpu_possible_map should be static, it cannot change as cpu's |
884 | * are onlined, or offlined. The reason is per-cpu data-structures | 946 | * are onlined, or offlined. The reason is per-cpu data-structures |
@@ -887,14 +949,35 @@ static __init void disable_smp(void) | |||
887 | * cpu_present_map on the other hand can change dynamically. | 949 | * cpu_present_map on the other hand can change dynamically. |
888 | * In case when cpu_hotplug is not compiled, then we resort to current | 950 | * In case when cpu_hotplug is not compiled, then we resort to current |
889 | * behaviour, which is cpu_possible == cpu_present. | 951 | * behaviour, which is cpu_possible == cpu_present. |
890 | * If cpu-hotplug is supported, then we need to preallocate for all | ||
891 | * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. | ||
892 | * - Ashok Raj | 952 | * - Ashok Raj |
953 | * | ||
954 | * Three ways to find out the number of additional hotplug CPUs: | ||
955 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. | ||
956 | * - The user can overwrite it with additional_cpus=NUM | ||
957 | * - Otherwise don't reserve additional CPUs. | ||
958 | * We do this because additional CPUs waste a lot of memory. | ||
959 | * -AK | ||
893 | */ | 960 | */ |
894 | __init void prefill_possible_map(void) | 961 | __init void prefill_possible_map(void) |
895 | { | 962 | { |
896 | int i; | 963 | int i; |
897 | for (i = 0; i < NR_CPUS; i++) | 964 | int possible; |
965 | |||
966 | if (additional_cpus == -1) { | ||
967 | if (disabled_cpus > 0) | ||
968 | additional_cpus = disabled_cpus; | ||
969 | else | ||
970 | additional_cpus = 0; | ||
971 | } | ||
972 | possible = num_processors + additional_cpus; | ||
973 | if (possible > NR_CPUS) | ||
974 | possible = NR_CPUS; | ||
975 | |||
976 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | ||
977 | possible, | ||
978 | max_t(int, possible - num_processors, 0)); | ||
979 | |||
980 | for (i = 0; i < possible; i++) | ||
898 | cpu_set(i, cpu_possible_map); | 981 | cpu_set(i, cpu_possible_map); |
899 | } | 982 | } |
900 | #endif | 983 | #endif |
@@ -936,7 +1019,7 @@ static int __init smp_sanity_check(unsigned max_cpus) | |||
936 | /* | 1019 | /* |
937 | * If we couldn't find a local APIC, then get out of here now! | 1020 | * If we couldn't find a local APIC, then get out of here now! |
938 | */ | 1021 | */ |
939 | if (APIC_INTEGRATED(apic_version[boot_cpu_id]) && !cpu_has_apic) { | 1022 | if (!cpu_has_apic) { |
940 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", | 1023 | printk(KERN_ERR "BIOS bug, local APIC #%d not detected!...\n", |
941 | boot_cpu_id); | 1024 | boot_cpu_id); |
942 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); | 1025 | printk(KERN_ERR "... forcing use of dummy APIC emulation. (tell your hw vendor)\n"); |
@@ -965,6 +1048,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
965 | nmi_watchdog_default(); | 1048 | nmi_watchdog_default(); |
966 | current_cpu_data = boot_cpu_data; | 1049 | current_cpu_data = boot_cpu_data; |
967 | current_thread_info()->cpu = 0; /* needed? */ | 1050 | current_thread_info()->cpu = 0; /* needed? */ |
1051 | set_cpu_sibling_map(0); | ||
968 | 1052 | ||
969 | if (smp_sanity_check(max_cpus) < 0) { | 1053 | if (smp_sanity_check(max_cpus) < 0) { |
970 | printk(KERN_INFO "SMP disabled\n"); | 1054 | printk(KERN_INFO "SMP disabled\n"); |
@@ -1008,8 +1092,6 @@ void __init smp_prepare_boot_cpu(void) | |||
1008 | int me = smp_processor_id(); | 1092 | int me = smp_processor_id(); |
1009 | cpu_set(me, cpu_online_map); | 1093 | cpu_set(me, cpu_online_map); |
1010 | cpu_set(me, cpu_callout_map); | 1094 | cpu_set(me, cpu_callout_map); |
1011 | cpu_set(0, cpu_sibling_map[0]); | ||
1012 | cpu_set(0, cpu_core_map[0]); | ||
1013 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1095 | per_cpu(cpu_state, me) = CPU_ONLINE; |
1014 | } | 1096 | } |
1015 | 1097 | ||
@@ -1062,9 +1144,6 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1062 | */ | 1144 | */ |
1063 | void __init smp_cpus_done(unsigned int max_cpus) | 1145 | void __init smp_cpus_done(unsigned int max_cpus) |
1064 | { | 1146 | { |
1065 | #ifndef CONFIG_HOTPLUG_CPU | ||
1066 | zap_low_mappings(); | ||
1067 | #endif | ||
1068 | smp_cleanup_boot(); | 1147 | smp_cleanup_boot(); |
1069 | 1148 | ||
1070 | #ifdef CONFIG_X86_IO_APIC | 1149 | #ifdef CONFIG_X86_IO_APIC |
@@ -1081,15 +1160,24 @@ void __init smp_cpus_done(unsigned int max_cpus) | |||
1081 | static void remove_siblinginfo(int cpu) | 1160 | static void remove_siblinginfo(int cpu) |
1082 | { | 1161 | { |
1083 | int sibling; | 1162 | int sibling; |
1163 | struct cpuinfo_x86 *c = cpu_data; | ||
1084 | 1164 | ||
1165 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { | ||
1166 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1167 | /* | ||
1168 | * last thread sibling in this cpu core going down | ||
1169 | */ | ||
1170 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) | ||
1171 | c[sibling].booted_cores--; | ||
1172 | } | ||
1173 | |||
1085 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) | 1174 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) |
1086 | cpu_clear(cpu, cpu_sibling_map[sibling]); | 1175 | cpu_clear(cpu, cpu_sibling_map[sibling]); |
1087 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) | ||
1088 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1089 | cpus_clear(cpu_sibling_map[cpu]); | 1176 | cpus_clear(cpu_sibling_map[cpu]); |
1090 | cpus_clear(cpu_core_map[cpu]); | 1177 | cpus_clear(cpu_core_map[cpu]); |
1091 | phys_proc_id[cpu] = BAD_APICID; | 1178 | phys_proc_id[cpu] = BAD_APICID; |
1092 | cpu_core_id[cpu] = BAD_APICID; | 1179 | cpu_core_id[cpu] = BAD_APICID; |
1180 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
1093 | } | 1181 | } |
1094 | 1182 | ||
1095 | void remove_cpu_from_maps(void) | 1183 | void remove_cpu_from_maps(void) |
@@ -1116,7 +1204,7 @@ int __cpu_disable(void) | |||
1116 | if (cpu == 0) | 1204 | if (cpu == 0) |
1117 | return -EBUSY; | 1205 | return -EBUSY; |
1118 | 1206 | ||
1119 | disable_APIC_timer(); | 1207 | clear_local_APIC(); |
1120 | 1208 | ||
1121 | /* | 1209 | /* |
1122 | * HACK: | 1210 | * HACK: |
@@ -1153,6 +1241,12 @@ void __cpu_die(unsigned int cpu) | |||
1153 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1241 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
1154 | } | 1242 | } |
1155 | 1243 | ||
1244 | static __init int setup_additional_cpus(char *s) | ||
1245 | { | ||
1246 | return get_option(&s, &additional_cpus); | ||
1247 | } | ||
1248 | __setup("additional_cpus=", setup_additional_cpus); | ||
1249 | |||
1156 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1250 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
1157 | 1251 | ||
1158 | int __cpu_disable(void) | 1252 | int __cpu_disable(void) |
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index fd2bef780882..ecbd34c1093d 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c | |||
@@ -120,7 +120,7 @@ void fix_processor_context(void) | |||
120 | 120 | ||
121 | set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ | 121 | set_tss_desc(cpu,t); /* This just modifies memory; should not be neccessary. But... This is neccessary, because 386 hardware has concept of busy TSS or some similar stupidity. */ |
122 | 122 | ||
123 | cpu_gdt_table[cpu][GDT_ENTRY_TSS].type = 9; | 123 | cpu_gdt(cpu)[GDT_ENTRY_TSS].type = 9; |
124 | 124 | ||
125 | syscall_init(); /* This sets MSR_*STAR and related */ | 125 | syscall_init(); /* This sets MSR_*STAR and related */ |
126 | load_TR_desc(); /* This does ltr */ | 126 | load_TR_desc(); /* This does ltr */ |
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index cc7821c68851..6449ea8fe756 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c | |||
@@ -154,17 +154,3 @@ asmlinkage long sys_uname(struct new_utsname __user * name) | |||
154 | err |= copy_to_user(&name->machine, "i686", 5); | 154 | err |= copy_to_user(&name->machine, "i686", 5); |
155 | return err ? -EFAULT : 0; | 155 | return err ? -EFAULT : 0; |
156 | } | 156 | } |
157 | |||
158 | asmlinkage long sys_time64(long __user * tloc) | ||
159 | { | ||
160 | struct timeval now; | ||
161 | int i; | ||
162 | |||
163 | do_gettimeofday(&now); | ||
164 | i = now.tv_sec; | ||
165 | if (tloc) { | ||
166 | if (put_user(i,tloc)) | ||
167 | i = -EFAULT; | ||
168 | } | ||
169 | return i; | ||
170 | } | ||
diff --git a/arch/x86_64/kernel/syscall.c b/arch/x86_64/kernel/syscall.c index e263685f864c..7c176b3edde0 100644 --- a/arch/x86_64/kernel/syscall.c +++ b/arch/x86_64/kernel/syscall.c | |||
@@ -19,7 +19,7 @@ typedef void (*sys_call_ptr_t)(void); | |||
19 | 19 | ||
20 | extern void sys_ni_syscall(void); | 20 | extern void sys_ni_syscall(void); |
21 | 21 | ||
22 | sys_call_ptr_t sys_call_table[__NR_syscall_max+1] __cacheline_aligned = { | 22 | const sys_call_ptr_t sys_call_table[__NR_syscall_max+1] = { |
23 | /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ | 23 | /* Smells like a like a compiler bug -- it doesn't work when the & below is removed. */ |
24 | [0 ... __NR_syscall_max] = &sys_ni_syscall, | 24 | [0 ... __NR_syscall_max] = &sys_ni_syscall, |
25 | #include <asm-x86_64/unistd.h> | 25 | #include <asm-x86_64/unistd.h> |
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index fdaddc4e5284..f8c47c688443 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -59,7 +59,7 @@ static int notsc __initdata = 0; | |||
59 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ | 59 | unsigned int cpu_khz; /* TSC clocks / usec, not used here */ |
60 | static unsigned long hpet_period; /* fsecs / HPET clock */ | 60 | static unsigned long hpet_period; /* fsecs / HPET clock */ |
61 | unsigned long hpet_tick; /* HPET clocks / interrupt */ | 61 | unsigned long hpet_tick; /* HPET clocks / interrupt */ |
62 | static int hpet_use_timer; | 62 | static int hpet_use_timer; /* Use counter of hpet for time keeping, otherwise PIT */ |
63 | unsigned long vxtime_hz = PIT_TICK_RATE; | 63 | unsigned long vxtime_hz = PIT_TICK_RATE; |
64 | int report_lost_ticks; /* command line option */ | 64 | int report_lost_ticks; /* command line option */ |
65 | unsigned long long monotonic_base; | 65 | unsigned long long monotonic_base; |
@@ -71,14 +71,6 @@ unsigned long __wall_jiffies __section_wall_jiffies = INITIAL_JIFFIES; | |||
71 | struct timespec __xtime __section_xtime; | 71 | struct timespec __xtime __section_xtime; |
72 | struct timezone __sys_tz __section_sys_tz; | 72 | struct timezone __sys_tz __section_sys_tz; |
73 | 73 | ||
74 | static inline void rdtscll_sync(unsigned long *tsc) | ||
75 | { | ||
76 | #ifdef CONFIG_SMP | ||
77 | sync_core(); | ||
78 | #endif | ||
79 | rdtscll(*tsc); | ||
80 | } | ||
81 | |||
82 | /* | 74 | /* |
83 | * do_gettimeoffset() returns microseconds since last timer interrupt was | 75 | * do_gettimeoffset() returns microseconds since last timer interrupt was |
84 | * triggered by hardware. A memory read of HPET is slower than a register read | 76 | * triggered by hardware. A memory read of HPET is slower than a register read |
@@ -93,7 +85,7 @@ static inline unsigned int do_gettimeoffset_tsc(void) | |||
93 | { | 85 | { |
94 | unsigned long t; | 86 | unsigned long t; |
95 | unsigned long x; | 87 | unsigned long x; |
96 | rdtscll_sync(&t); | 88 | t = get_cycles_sync(); |
97 | if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ | 89 | if (t < vxtime.last_tsc) t = vxtime.last_tsc; /* hack */ |
98 | x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; | 90 | x = ((t - vxtime.last_tsc) * vxtime.tsc_quot) >> 32; |
99 | return x; | 91 | return x; |
@@ -259,8 +251,8 @@ static void set_rtc_mmss(unsigned long nowtime) | |||
259 | #endif | 251 | #endif |
260 | 252 | ||
261 | { | 253 | { |
262 | BIN_TO_BCD(real_seconds); | 254 | BIN_TO_BCD(real_seconds); |
263 | BIN_TO_BCD(real_minutes); | 255 | BIN_TO_BCD(real_minutes); |
264 | CMOS_WRITE(real_seconds, RTC_SECONDS); | 256 | CMOS_WRITE(real_seconds, RTC_SECONDS); |
265 | CMOS_WRITE(real_minutes, RTC_MINUTES); | 257 | CMOS_WRITE(real_minutes, RTC_MINUTES); |
266 | } | 258 | } |
@@ -297,25 +289,21 @@ unsigned long long monotonic_clock(void) | |||
297 | last_offset = vxtime.last; | 289 | last_offset = vxtime.last; |
298 | base = monotonic_base; | 290 | base = monotonic_base; |
299 | this_offset = hpet_readl(HPET_COUNTER); | 291 | this_offset = hpet_readl(HPET_COUNTER); |
300 | |||
301 | } while (read_seqretry(&xtime_lock, seq)); | 292 | } while (read_seqretry(&xtime_lock, seq)); |
302 | offset = (this_offset - last_offset); | 293 | offset = (this_offset - last_offset); |
303 | offset *=(NSEC_PER_SEC/HZ)/hpet_tick; | 294 | offset *=(NSEC_PER_SEC/HZ)/hpet_tick; |
304 | return base + offset; | 295 | return base + offset; |
305 | }else{ | 296 | } else { |
306 | do { | 297 | do { |
307 | seq = read_seqbegin(&xtime_lock); | 298 | seq = read_seqbegin(&xtime_lock); |
308 | 299 | ||
309 | last_offset = vxtime.last_tsc; | 300 | last_offset = vxtime.last_tsc; |
310 | base = monotonic_base; | 301 | base = monotonic_base; |
311 | } while (read_seqretry(&xtime_lock, seq)); | 302 | } while (read_seqretry(&xtime_lock, seq)); |
312 | sync_core(); | 303 | this_offset = get_cycles_sync(); |
313 | rdtscll(this_offset); | ||
314 | offset = (this_offset - last_offset)*1000/cpu_khz; | 304 | offset = (this_offset - last_offset)*1000/cpu_khz; |
315 | return base + offset; | 305 | return base + offset; |
316 | } | 306 | } |
317 | |||
318 | |||
319 | } | 307 | } |
320 | EXPORT_SYMBOL(monotonic_clock); | 308 | EXPORT_SYMBOL(monotonic_clock); |
321 | 309 | ||
@@ -391,7 +379,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
391 | delay = LATCH - 1 - delay; | 379 | delay = LATCH - 1 - delay; |
392 | } | 380 | } |
393 | 381 | ||
394 | rdtscll_sync(&tsc); | 382 | tsc = get_cycles_sync(); |
395 | 383 | ||
396 | if (vxtime.mode == VXTIME_HPET) { | 384 | if (vxtime.mode == VXTIME_HPET) { |
397 | if (offset - vxtime.last > hpet_tick) { | 385 | if (offset - vxtime.last > hpet_tick) { |
@@ -471,6 +459,11 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
471 | 459 | ||
472 | write_sequnlock(&xtime_lock); | 460 | write_sequnlock(&xtime_lock); |
473 | 461 | ||
462 | #ifdef CONFIG_X86_LOCAL_APIC | ||
463 | if (using_apic_timer) | ||
464 | smp_send_timer_broadcast_ipi(); | ||
465 | #endif | ||
466 | |||
474 | return IRQ_HANDLED; | 467 | return IRQ_HANDLED; |
475 | } | 468 | } |
476 | 469 | ||
@@ -509,10 +502,10 @@ unsigned long long sched_clock(void) | |||
509 | return cycles_2_ns(a); | 502 | return cycles_2_ns(a); |
510 | } | 503 | } |
511 | 504 | ||
512 | unsigned long get_cmos_time(void) | 505 | static unsigned long get_cmos_time(void) |
513 | { | 506 | { |
514 | unsigned int timeout, year, mon, day, hour, min, sec; | 507 | unsigned int timeout = 1000000, year, mon, day, hour, min, sec; |
515 | unsigned char last, this; | 508 | unsigned char uip = 0, this = 0; |
516 | unsigned long flags; | 509 | unsigned long flags; |
517 | 510 | ||
518 | /* | 511 | /* |
@@ -525,45 +518,41 @@ unsigned long get_cmos_time(void) | |||
525 | 518 | ||
526 | spin_lock_irqsave(&rtc_lock, flags); | 519 | spin_lock_irqsave(&rtc_lock, flags); |
527 | 520 | ||
528 | timeout = 1000000; | 521 | while (timeout && (!uip || this)) { |
529 | last = this = 0; | 522 | uip |= this; |
530 | |||
531 | while (timeout && last && !this) { | ||
532 | last = this; | ||
533 | this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; | 523 | this = CMOS_READ(RTC_FREQ_SELECT) & RTC_UIP; |
534 | timeout--; | 524 | timeout--; |
535 | } | 525 | } |
536 | 526 | ||
537 | /* | 527 | /* |
538 | * Here we are safe to assume the registers won't change for a whole second, so | 528 | * Here we are safe to assume the registers won't change for a whole |
539 | * we just go ahead and read them. | 529 | * second, so we just go ahead and read them. |
540 | */ | 530 | */ |
541 | 531 | sec = CMOS_READ(RTC_SECONDS); | |
542 | sec = CMOS_READ(RTC_SECONDS); | 532 | min = CMOS_READ(RTC_MINUTES); |
543 | min = CMOS_READ(RTC_MINUTES); | 533 | hour = CMOS_READ(RTC_HOURS); |
544 | hour = CMOS_READ(RTC_HOURS); | 534 | day = CMOS_READ(RTC_DAY_OF_MONTH); |
545 | day = CMOS_READ(RTC_DAY_OF_MONTH); | 535 | mon = CMOS_READ(RTC_MONTH); |
546 | mon = CMOS_READ(RTC_MONTH); | 536 | year = CMOS_READ(RTC_YEAR); |
547 | year = CMOS_READ(RTC_YEAR); | ||
548 | 537 | ||
549 | spin_unlock_irqrestore(&rtc_lock, flags); | 538 | spin_unlock_irqrestore(&rtc_lock, flags); |
550 | 539 | ||
551 | /* | 540 | /* |
552 | * We know that x86-64 always uses BCD format, no need to check the config | 541 | * We know that x86-64 always uses BCD format, no need to check the |
553 | * register. | 542 | * config register. |
554 | */ | 543 | */ |
555 | 544 | ||
556 | BCD_TO_BIN(sec); | 545 | BCD_TO_BIN(sec); |
557 | BCD_TO_BIN(min); | 546 | BCD_TO_BIN(min); |
558 | BCD_TO_BIN(hour); | 547 | BCD_TO_BIN(hour); |
559 | BCD_TO_BIN(day); | 548 | BCD_TO_BIN(day); |
560 | BCD_TO_BIN(mon); | 549 | BCD_TO_BIN(mon); |
561 | BCD_TO_BIN(year); | 550 | BCD_TO_BIN(year); |
562 | 551 | ||
563 | /* | 552 | /* |
564 | * x86-64 systems only exists since 2002. | 553 | * x86-64 systems only exists since 2002. |
565 | * This will work up to Dec 31, 2100 | 554 | * This will work up to Dec 31, 2100 |
566 | */ | 555 | */ |
567 | year += 2000; | 556 | year += 2000; |
568 | 557 | ||
569 | return mktime(year, mon, day, hour, min, sec); | 558 | return mktime(year, mon, day, hour, min, sec); |
@@ -695,8 +684,7 @@ static unsigned int __init hpet_calibrate_tsc(void) | |||
695 | do { | 684 | do { |
696 | local_irq_disable(); | 685 | local_irq_disable(); |
697 | hpet_now = hpet_readl(HPET_COUNTER); | 686 | hpet_now = hpet_readl(HPET_COUNTER); |
698 | sync_core(); | 687 | tsc_now = get_cycles_sync(); |
699 | rdtscl(tsc_now); | ||
700 | local_irq_restore(flags); | 688 | local_irq_restore(flags); |
701 | } while ((tsc_now - tsc_start) < TICK_COUNT && | 689 | } while ((tsc_now - tsc_start) < TICK_COUNT && |
702 | (hpet_now - hpet_start) < TICK_COUNT); | 690 | (hpet_now - hpet_start) < TICK_COUNT); |
@@ -726,11 +714,9 @@ static unsigned int __init pit_calibrate_tsc(void) | |||
726 | outb(0xb0, 0x43); | 714 | outb(0xb0, 0x43); |
727 | outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); | 715 | outb((PIT_TICK_RATE / (1000 / 50)) & 0xff, 0x42); |
728 | outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); | 716 | outb((PIT_TICK_RATE / (1000 / 50)) >> 8, 0x42); |
729 | rdtscll(start); | 717 | start = get_cycles_sync(); |
730 | sync_core(); | ||
731 | while ((inb(0x61) & 0x20) == 0); | 718 | while ((inb(0x61) & 0x20) == 0); |
732 | sync_core(); | 719 | end = get_cycles_sync(); |
733 | rdtscll(end); | ||
734 | 720 | ||
735 | spin_unlock_irqrestore(&i8253_lock, flags); | 721 | spin_unlock_irqrestore(&i8253_lock, flags); |
736 | 722 | ||
@@ -744,7 +730,7 @@ static __init int late_hpet_init(void) | |||
744 | unsigned int ntimer; | 730 | unsigned int ntimer; |
745 | 731 | ||
746 | if (!vxtime.hpet_address) | 732 | if (!vxtime.hpet_address) |
747 | return -1; | 733 | return -1; |
748 | 734 | ||
749 | memset(&hd, 0, sizeof (hd)); | 735 | memset(&hd, 0, sizeof (hd)); |
750 | 736 | ||
@@ -878,8 +864,6 @@ static struct irqaction irq0 = { | |||
878 | timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL | 864 | timer_interrupt, SA_INTERRUPT, CPU_MASK_NONE, "timer", NULL, NULL |
879 | }; | 865 | }; |
880 | 866 | ||
881 | extern void __init config_acpi_tables(void); | ||
882 | |||
883 | void __init time_init(void) | 867 | void __init time_init(void) |
884 | { | 868 | { |
885 | char *timename; | 869 | char *timename; |
@@ -908,12 +892,14 @@ void __init time_init(void) | |||
908 | if (!hpet_init()) | 892 | if (!hpet_init()) |
909 | vxtime_hz = (1000000000000000L + hpet_period / 2) / | 893 | vxtime_hz = (1000000000000000L + hpet_period / 2) / |
910 | hpet_period; | 894 | hpet_period; |
895 | else | ||
896 | vxtime.hpet_address = 0; | ||
911 | 897 | ||
912 | if (hpet_use_timer) { | 898 | if (hpet_use_timer) { |
913 | cpu_khz = hpet_calibrate_tsc(); | 899 | cpu_khz = hpet_calibrate_tsc(); |
914 | timename = "HPET"; | 900 | timename = "HPET"; |
915 | #ifdef CONFIG_X86_PM_TIMER | 901 | #ifdef CONFIG_X86_PM_TIMER |
916 | } else if (pmtmr_ioport) { | 902 | } else if (pmtmr_ioport && !vxtime.hpet_address) { |
917 | vxtime_hz = PM_TIMER_FREQUENCY; | 903 | vxtime_hz = PM_TIMER_FREQUENCY; |
918 | timename = "PM"; | 904 | timename = "PM"; |
919 | pit_init(); | 905 | pit_init(); |
@@ -932,7 +918,7 @@ void __init time_init(void) | |||
932 | vxtime.mode = VXTIME_TSC; | 918 | vxtime.mode = VXTIME_TSC; |
933 | vxtime.quot = (1000000L << 32) / vxtime_hz; | 919 | vxtime.quot = (1000000L << 32) / vxtime_hz; |
934 | vxtime.tsc_quot = (1000L << 32) / cpu_khz; | 920 | vxtime.tsc_quot = (1000L << 32) / cpu_khz; |
935 | rdtscll_sync(&vxtime.last_tsc); | 921 | vxtime.last_tsc = get_cycles_sync(); |
936 | setup_irq(0, &irq0); | 922 | setup_irq(0, &irq0); |
937 | 923 | ||
938 | set_cyc2ns_scale(cpu_khz); | 924 | set_cyc2ns_scale(cpu_khz); |
@@ -946,7 +932,7 @@ void __init time_init(void) | |||
946 | * Make an educated guess if the TSC is trustworthy and synchronized | 932 | * Make an educated guess if the TSC is trustworthy and synchronized |
947 | * over all CPUs. | 933 | * over all CPUs. |
948 | */ | 934 | */ |
949 | static __init int unsynchronized_tsc(void) | 935 | __init int unsynchronized_tsc(void) |
950 | { | 936 | { |
951 | #ifdef CONFIG_SMP | 937 | #ifdef CONFIG_SMP |
952 | if (oem_force_hpet_timer()) | 938 | if (oem_force_hpet_timer()) |
@@ -957,7 +943,7 @@ static __init int unsynchronized_tsc(void) | |||
957 | return 0; | 943 | return 0; |
958 | #endif | 944 | #endif |
959 | /* Assume multi socket systems are not synchronized */ | 945 | /* Assume multi socket systems are not synchronized */ |
960 | return num_online_cpus() > 1; | 946 | return num_present_cpus() > 1; |
961 | } | 947 | } |
962 | 948 | ||
963 | /* | 949 | /* |
@@ -997,6 +983,10 @@ __setup("report_lost_ticks", time_setup); | |||
997 | static long clock_cmos_diff; | 983 | static long clock_cmos_diff; |
998 | static unsigned long sleep_start; | 984 | static unsigned long sleep_start; |
999 | 985 | ||
986 | /* | ||
987 | * sysfs support for the timer. | ||
988 | */ | ||
989 | |||
1000 | static int timer_suspend(struct sys_device *dev, pm_message_t state) | 990 | static int timer_suspend(struct sys_device *dev, pm_message_t state) |
1001 | { | 991 | { |
1002 | /* | 992 | /* |
@@ -1039,7 +1029,6 @@ static struct sysdev_class timer_sysclass = { | |||
1039 | set_kset_name("timer"), | 1029 | set_kset_name("timer"), |
1040 | }; | 1030 | }; |
1041 | 1031 | ||
1042 | |||
1043 | /* XXX this driverfs stuff should probably go elsewhere later -john */ | 1032 | /* XXX this driverfs stuff should probably go elsewhere later -john */ |
1044 | static struct sys_device device_timer = { | 1033 | static struct sys_device device_timer = { |
1045 | .id = 0, | 1034 | .id = 0, |
@@ -1073,8 +1062,6 @@ device_initcall(time_init_device); | |||
1073 | */ | 1062 | */ |
1074 | #include <linux/rtc.h> | 1063 | #include <linux/rtc.h> |
1075 | 1064 | ||
1076 | extern irqreturn_t rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs); | ||
1077 | |||
1078 | #define DEFAULT_RTC_INT_FREQ 64 | 1065 | #define DEFAULT_RTC_INT_FREQ 64 |
1079 | #define RTC_NUM_INTS 1 | 1066 | #define RTC_NUM_INTS 1 |
1080 | 1067 | ||
@@ -1283,8 +1270,6 @@ irqreturn_t hpet_rtc_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
1283 | } | 1270 | } |
1284 | #endif | 1271 | #endif |
1285 | 1272 | ||
1286 | |||
1287 | |||
1288 | static int __init nohpet_setup(char *s) | 1273 | static int __init nohpet_setup(char *s) |
1289 | { | 1274 | { |
1290 | nohpet = 1; | 1275 | nohpet = 1; |
@@ -1301,5 +1286,3 @@ static int __init notsc_setup(char *s) | |||
1301 | } | 1286 | } |
1302 | 1287 | ||
1303 | __setup("notsc", notsc_setup); | 1288 | __setup("notsc", notsc_setup); |
1304 | |||
1305 | |||
diff --git a/arch/x86_64/kernel/trampoline.S b/arch/x86_64/kernel/trampoline.S index 6d9c9a8e7d0e..23a03eb91fc7 100644 --- a/arch/x86_64/kernel/trampoline.S +++ b/arch/x86_64/kernel/trampoline.S | |||
@@ -42,8 +42,15 @@ r_base = . | |||
42 | movl $0xA5A5A5A5, trampoline_data - r_base | 42 | movl $0xA5A5A5A5, trampoline_data - r_base |
43 | # write marker for master knows we're running | 43 | # write marker for master knows we're running |
44 | 44 | ||
45 | lidt idt_48 - r_base # load idt with 0, 0 | 45 | /* |
46 | lgdt gdt_48 - r_base # load gdt with whatever is appropriate | 46 | * GDT tables in non default location kernel can be beyond 16MB and |
47 | * lgdt will not be able to load the address as in real mode default | ||
48 | * operand size is 16bit. Use lgdtl instead to force operand size | ||
49 | * to 32 bit. | ||
50 | */ | ||
51 | |||
52 | lidtl idt_48 - r_base # load idt with 0, 0 | ||
53 | lgdtl gdt_48 - r_base # load gdt with whatever is appropriate | ||
47 | 54 | ||
48 | xor %ax, %ax | 55 | xor %ax, %ax |
49 | inc %ax # protected mode (PE) bit | 56 | inc %ax # protected mode (PE) bit |
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index b5e09e6b5536..8bb0aeda78b9 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -70,7 +70,6 @@ asmlinkage void reserved(void); | |||
70 | asmlinkage void alignment_check(void); | 70 | asmlinkage void alignment_check(void); |
71 | asmlinkage void machine_check(void); | 71 | asmlinkage void machine_check(void); |
72 | asmlinkage void spurious_interrupt_bug(void); | 72 | asmlinkage void spurious_interrupt_bug(void); |
73 | asmlinkage void call_debug(void); | ||
74 | 73 | ||
75 | struct notifier_block *die_chain; | 74 | struct notifier_block *die_chain; |
76 | static DEFINE_SPINLOCK(die_notifier_lock); | 75 | static DEFINE_SPINLOCK(die_notifier_lock); |
@@ -121,19 +120,31 @@ int printk_address(unsigned long address) | |||
121 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | 120 | static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, |
122 | unsigned *usedp, const char **idp) | 121 | unsigned *usedp, const char **idp) |
123 | { | 122 | { |
124 | static const char ids[N_EXCEPTION_STACKS][8] = { | 123 | static char ids[][8] = { |
125 | [DEBUG_STACK - 1] = "#DB", | 124 | [DEBUG_STACK - 1] = "#DB", |
126 | [NMI_STACK - 1] = "NMI", | 125 | [NMI_STACK - 1] = "NMI", |
127 | [DOUBLEFAULT_STACK - 1] = "#DF", | 126 | [DOUBLEFAULT_STACK - 1] = "#DF", |
128 | [STACKFAULT_STACK - 1] = "#SS", | 127 | [STACKFAULT_STACK - 1] = "#SS", |
129 | [MCE_STACK - 1] = "#MC", | 128 | [MCE_STACK - 1] = "#MC", |
129 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
130 | [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / EXCEPTION_STKSZ - 2] = "#DB[?]" | ||
131 | #endif | ||
130 | }; | 132 | }; |
131 | unsigned k; | 133 | unsigned k; |
132 | 134 | ||
133 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { | 135 | for (k = 0; k < N_EXCEPTION_STACKS; k++) { |
134 | unsigned long end; | 136 | unsigned long end; |
135 | 137 | ||
136 | end = per_cpu(init_tss, cpu).ist[k]; | 138 | switch (k + 1) { |
139 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
140 | case DEBUG_STACK: | ||
141 | end = cpu_pda(cpu)->debugstack + DEBUG_STKSZ; | ||
142 | break; | ||
143 | #endif | ||
144 | default: | ||
145 | end = per_cpu(init_tss, cpu).ist[k]; | ||
146 | break; | ||
147 | } | ||
137 | if (stack >= end) | 148 | if (stack >= end) |
138 | continue; | 149 | continue; |
139 | if (stack >= end - EXCEPTION_STKSZ) { | 150 | if (stack >= end - EXCEPTION_STKSZ) { |
@@ -143,6 +154,22 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
143 | *idp = ids[k]; | 154 | *idp = ids[k]; |
144 | return (unsigned long *)end; | 155 | return (unsigned long *)end; |
145 | } | 156 | } |
157 | #if DEBUG_STKSZ > EXCEPTION_STKSZ | ||
158 | if (k == DEBUG_STACK - 1 && stack >= end - DEBUG_STKSZ) { | ||
159 | unsigned j = N_EXCEPTION_STACKS - 1; | ||
160 | |||
161 | do { | ||
162 | ++j; | ||
163 | end -= EXCEPTION_STKSZ; | ||
164 | ids[j][4] = '1' + (j - N_EXCEPTION_STACKS); | ||
165 | } while (stack < end - EXCEPTION_STKSZ); | ||
166 | if (*usedp & (1U << j)) | ||
167 | break; | ||
168 | *usedp |= 1U << j; | ||
169 | *idp = ids[j]; | ||
170 | return (unsigned long *)end; | ||
171 | } | ||
172 | #endif | ||
146 | } | 173 | } |
147 | return NULL; | 174 | return NULL; |
148 | } | 175 | } |
@@ -156,9 +183,8 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, | |||
156 | 183 | ||
157 | void show_trace(unsigned long *stack) | 184 | void show_trace(unsigned long *stack) |
158 | { | 185 | { |
159 | unsigned long addr; | ||
160 | const unsigned cpu = safe_smp_processor_id(); | 186 | const unsigned cpu = safe_smp_processor_id(); |
161 | unsigned long *irqstack_end = (unsigned long *)cpu_pda[cpu].irqstackptr; | 187 | unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; |
162 | int i; | 188 | int i; |
163 | unsigned used = 0; | 189 | unsigned used = 0; |
164 | 190 | ||
@@ -166,8 +192,14 @@ void show_trace(unsigned long *stack) | |||
166 | 192 | ||
167 | #define HANDLE_STACK(cond) \ | 193 | #define HANDLE_STACK(cond) \ |
168 | do while (cond) { \ | 194 | do while (cond) { \ |
169 | addr = *stack++; \ | 195 | unsigned long addr = *stack++; \ |
170 | if (kernel_text_address(addr)) { \ | 196 | if (kernel_text_address(addr)) { \ |
197 | if (i > 50) { \ | ||
198 | printk("\n "); \ | ||
199 | i = 0; \ | ||
200 | } \ | ||
201 | else \ | ||
202 | i += printk(" "); \ | ||
171 | /* \ | 203 | /* \ |
172 | * If the address is either in the text segment of the \ | 204 | * If the address is either in the text segment of the \ |
173 | * kernel, or in the region which contains vmalloc'ed \ | 205 | * kernel, or in the region which contains vmalloc'ed \ |
@@ -177,25 +209,19 @@ void show_trace(unsigned long *stack) | |||
177 | * out the call path that was taken. \ | 209 | * out the call path that was taken. \ |
178 | */ \ | 210 | */ \ |
179 | i += printk_address(addr); \ | 211 | i += printk_address(addr); \ |
180 | if (i > 50) { \ | ||
181 | printk("\n "); \ | ||
182 | i = 0; \ | ||
183 | } \ | ||
184 | else \ | ||
185 | i += printk(" "); \ | ||
186 | } \ | 212 | } \ |
187 | } while (0) | 213 | } while (0) |
188 | 214 | ||
189 | for(i = 0; ; ) { | 215 | for(i = 11; ; ) { |
190 | const char *id; | 216 | const char *id; |
191 | unsigned long *estack_end; | 217 | unsigned long *estack_end; |
192 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | 218 | estack_end = in_exception_stack(cpu, (unsigned long)stack, |
193 | &used, &id); | 219 | &used, &id); |
194 | 220 | ||
195 | if (estack_end) { | 221 | if (estack_end) { |
196 | i += printk(" <%s> ", id); | 222 | i += printk(" <%s>", id); |
197 | HANDLE_STACK (stack < estack_end); | 223 | HANDLE_STACK (stack < estack_end); |
198 | i += printk(" <EOE> "); | 224 | i += printk(" <EOE>"); |
199 | stack = (unsigned long *) estack_end[-2]; | 225 | stack = (unsigned long *) estack_end[-2]; |
200 | continue; | 226 | continue; |
201 | } | 227 | } |
@@ -205,11 +231,11 @@ void show_trace(unsigned long *stack) | |||
205 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); | 231 | (IRQSTACKSIZE - 64) / sizeof(*irqstack); |
206 | 232 | ||
207 | if (stack >= irqstack && stack < irqstack_end) { | 233 | if (stack >= irqstack && stack < irqstack_end) { |
208 | i += printk(" <IRQ> "); | 234 | i += printk(" <IRQ>"); |
209 | HANDLE_STACK (stack < irqstack_end); | 235 | HANDLE_STACK (stack < irqstack_end); |
210 | stack = (unsigned long *) (irqstack_end[-1]); | 236 | stack = (unsigned long *) (irqstack_end[-1]); |
211 | irqstack_end = NULL; | 237 | irqstack_end = NULL; |
212 | i += printk(" <EOI> "); | 238 | i += printk(" <EOI>"); |
213 | continue; | 239 | continue; |
214 | } | 240 | } |
215 | } | 241 | } |
@@ -226,8 +252,8 @@ void show_stack(struct task_struct *tsk, unsigned long * rsp) | |||
226 | unsigned long *stack; | 252 | unsigned long *stack; |
227 | int i; | 253 | int i; |
228 | const int cpu = safe_smp_processor_id(); | 254 | const int cpu = safe_smp_processor_id(); |
229 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda[cpu].irqstackptr); | 255 | unsigned long *irqstack_end = (unsigned long *) (cpu_pda(cpu)->irqstackptr); |
230 | unsigned long *irqstack = (unsigned long *) (cpu_pda[cpu].irqstackptr - IRQSTACKSIZE); | 256 | unsigned long *irqstack = (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); |
231 | 257 | ||
232 | // debugging aid: "show_stack(NULL, NULL);" prints the | 258 | // debugging aid: "show_stack(NULL, NULL);" prints the |
233 | // back trace for this cpu. | 259 | // back trace for this cpu. |
@@ -275,14 +301,14 @@ void show_registers(struct pt_regs *regs) | |||
275 | int in_kernel = !user_mode(regs); | 301 | int in_kernel = !user_mode(regs); |
276 | unsigned long rsp; | 302 | unsigned long rsp; |
277 | const int cpu = safe_smp_processor_id(); | 303 | const int cpu = safe_smp_processor_id(); |
278 | struct task_struct *cur = cpu_pda[cpu].pcurrent; | 304 | struct task_struct *cur = cpu_pda(cpu)->pcurrent; |
279 | 305 | ||
280 | rsp = regs->rsp; | 306 | rsp = regs->rsp; |
281 | 307 | ||
282 | printk("CPU %d ", cpu); | 308 | printk("CPU %d ", cpu); |
283 | __show_regs(regs); | 309 | __show_regs(regs); |
284 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", | 310 | printk("Process %s (pid: %d, threadinfo %p, task %p)\n", |
285 | cur->comm, cur->pid, cur->thread_info, cur); | 311 | cur->comm, cur->pid, task_thread_info(cur), cur); |
286 | 312 | ||
287 | /* | 313 | /* |
288 | * When in-kernel, we also print out the stack and code at the | 314 | * When in-kernel, we also print out the stack and code at the |
@@ -314,20 +340,26 @@ bad: | |||
314 | void handle_BUG(struct pt_regs *regs) | 340 | void handle_BUG(struct pt_regs *regs) |
315 | { | 341 | { |
316 | struct bug_frame f; | 342 | struct bug_frame f; |
317 | char tmp; | 343 | long len; |
344 | const char *prefix = ""; | ||
318 | 345 | ||
319 | if (user_mode(regs)) | 346 | if (user_mode(regs)) |
320 | return; | 347 | return; |
321 | if (__copy_from_user(&f, (struct bug_frame *) regs->rip, | 348 | if (__copy_from_user(&f, (const void __user *) regs->rip, |
322 | sizeof(struct bug_frame))) | 349 | sizeof(struct bug_frame))) |
323 | return; | 350 | return; |
324 | if (f.filename >= 0 || | 351 | if (f.filename >= 0 || |
325 | f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) | 352 | f.ud2[0] != 0x0f || f.ud2[1] != 0x0b) |
326 | return; | 353 | return; |
327 | if (__get_user(tmp, (char *)(long)f.filename)) | 354 | len = __strnlen_user((char *)(long)f.filename, PATH_MAX) - 1; |
355 | if (len < 0 || len >= PATH_MAX) | ||
328 | f.filename = (int)(long)"unmapped filename"; | 356 | f.filename = (int)(long)"unmapped filename"; |
357 | else if (len > 50) { | ||
358 | f.filename += len - 50; | ||
359 | prefix = "..."; | ||
360 | } | ||
329 | printk("----------- [cut here ] --------- [please bite here ] ---------\n"); | 361 | printk("----------- [cut here ] --------- [please bite here ] ---------\n"); |
330 | printk(KERN_ALERT "Kernel BUG at %.50s:%d\n", (char *)(long)f.filename, f.line); | 362 | printk(KERN_ALERT "Kernel BUG at %s%.50s:%d\n", prefix, (char *)(long)f.filename, f.line); |
331 | } | 363 | } |
332 | 364 | ||
333 | #ifdef CONFIG_BUG | 365 | #ifdef CONFIG_BUG |
@@ -382,7 +414,7 @@ void __die(const char * str, struct pt_regs * regs, long err) | |||
382 | printk("DEBUG_PAGEALLOC"); | 414 | printk("DEBUG_PAGEALLOC"); |
383 | #endif | 415 | #endif |
384 | printk("\n"); | 416 | printk("\n"); |
385 | notify_die(DIE_OOPS, (char *)str, regs, err, 255, SIGSEGV); | 417 | notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV); |
386 | show_registers(regs); | 418 | show_registers(regs); |
387 | /* Executive summary in case the oops scrolled away */ | 419 | /* Executive summary in case the oops scrolled away */ |
388 | printk(KERN_ALERT "RIP "); | 420 | printk(KERN_ALERT "RIP "); |
@@ -399,11 +431,6 @@ void die(const char * str, struct pt_regs * regs, long err) | |||
399 | oops_end(flags); | 431 | oops_end(flags); |
400 | do_exit(SIGSEGV); | 432 | do_exit(SIGSEGV); |
401 | } | 433 | } |
402 | static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err) | ||
403 | { | ||
404 | if (!(regs->eflags & VM_MASK) && (regs->cs == __KERNEL_CS)) | ||
405 | die(str, regs, err); | ||
406 | } | ||
407 | 434 | ||
408 | void die_nmi(char *str, struct pt_regs *regs) | 435 | void die_nmi(char *str, struct pt_regs *regs) |
409 | { | 436 | { |
@@ -426,32 +453,20 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, | |||
426 | struct pt_regs * regs, long error_code, | 453 | struct pt_regs * regs, long error_code, |
427 | siginfo_t *info) | 454 | siginfo_t *info) |
428 | { | 455 | { |
456 | struct task_struct *tsk = current; | ||
457 | |||
429 | conditional_sti(regs); | 458 | conditional_sti(regs); |
430 | 459 | ||
431 | #ifdef CONFIG_CHECKING | 460 | tsk->thread.error_code = error_code; |
432 | { | 461 | tsk->thread.trap_no = trapnr; |
433 | unsigned long gs; | ||
434 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
435 | rdmsrl(MSR_GS_BASE, gs); | ||
436 | if (gs != (unsigned long)pda) { | ||
437 | wrmsrl(MSR_GS_BASE, pda); | ||
438 | printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda, | ||
439 | regs->rip); | ||
440 | } | ||
441 | } | ||
442 | #endif | ||
443 | 462 | ||
444 | if (user_mode(regs)) { | 463 | if (user_mode(regs)) { |
445 | struct task_struct *tsk = current; | ||
446 | |||
447 | if (exception_trace && unhandled_signal(tsk, signr)) | 464 | if (exception_trace && unhandled_signal(tsk, signr)) |
448 | printk(KERN_INFO | 465 | printk(KERN_INFO |
449 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", | 466 | "%s[%d] trap %s rip:%lx rsp:%lx error:%lx\n", |
450 | tsk->comm, tsk->pid, str, | 467 | tsk->comm, tsk->pid, str, |
451 | regs->rip,regs->rsp,error_code); | 468 | regs->rip,regs->rsp,error_code); |
452 | 469 | ||
453 | tsk->thread.error_code = error_code; | ||
454 | tsk->thread.trap_no = trapnr; | ||
455 | if (info) | 470 | if (info) |
456 | force_sig_info(signr, info, tsk); | 471 | force_sig_info(signr, info, tsk); |
457 | else | 472 | else |
@@ -498,7 +513,7 @@ asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ | |||
498 | DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) | 513 | DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->rip) |
499 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) | 514 | DO_ERROR( 4, SIGSEGV, "overflow", overflow) |
500 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) | 515 | DO_ERROR( 5, SIGSEGV, "bounds", bounds) |
501 | DO_ERROR_INFO( 6, SIGILL, "invalid operand", invalid_op, ILL_ILLOPN, regs->rip) | 516 | DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->rip) |
502 | DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) | 517 | DO_ERROR( 7, SIGSEGV, "device not available", device_not_available) |
503 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) | 518 | DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) |
504 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) | 519 | DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) |
@@ -506,38 +521,41 @@ DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) | |||
506 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) | 521 | DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) |
507 | DO_ERROR(18, SIGSEGV, "reserved", reserved) | 522 | DO_ERROR(18, SIGSEGV, "reserved", reserved) |
508 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) | 523 | DO_ERROR(12, SIGBUS, "stack segment", stack_segment) |
509 | DO_ERROR( 8, SIGSEGV, "double fault", double_fault) | 524 | |
525 | asmlinkage void do_double_fault(struct pt_regs * regs, long error_code) | ||
526 | { | ||
527 | static const char str[] = "double fault"; | ||
528 | struct task_struct *tsk = current; | ||
529 | |||
530 | /* Return not checked because double check cannot be ignored */ | ||
531 | notify_die(DIE_TRAP, str, regs, error_code, 8, SIGSEGV); | ||
532 | |||
533 | tsk->thread.error_code = error_code; | ||
534 | tsk->thread.trap_no = 8; | ||
535 | |||
536 | /* This is always a kernel trap and never fixable (and thus must | ||
537 | never return). */ | ||
538 | for (;;) | ||
539 | die(str, regs, error_code); | ||
540 | } | ||
510 | 541 | ||
511 | asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | 542 | asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, |
512 | long error_code) | 543 | long error_code) |
513 | { | 544 | { |
545 | struct task_struct *tsk = current; | ||
546 | |||
514 | conditional_sti(regs); | 547 | conditional_sti(regs); |
515 | 548 | ||
516 | #ifdef CONFIG_CHECKING | 549 | tsk->thread.error_code = error_code; |
517 | { | 550 | tsk->thread.trap_no = 13; |
518 | unsigned long gs; | ||
519 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
520 | rdmsrl(MSR_GS_BASE, gs); | ||
521 | if (gs != (unsigned long)pda) { | ||
522 | wrmsrl(MSR_GS_BASE, pda); | ||
523 | oops_in_progress++; | ||
524 | printk("general protection handler: wrong gs %lx expected %p\n", gs, pda); | ||
525 | oops_in_progress--; | ||
526 | } | ||
527 | } | ||
528 | #endif | ||
529 | 551 | ||
530 | if (user_mode(regs)) { | 552 | if (user_mode(regs)) { |
531 | struct task_struct *tsk = current; | ||
532 | |||
533 | if (exception_trace && unhandled_signal(tsk, SIGSEGV)) | 553 | if (exception_trace && unhandled_signal(tsk, SIGSEGV)) |
534 | printk(KERN_INFO | 554 | printk(KERN_INFO |
535 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", | 555 | "%s[%d] general protection rip:%lx rsp:%lx error:%lx\n", |
536 | tsk->comm, tsk->pid, | 556 | tsk->comm, tsk->pid, |
537 | regs->rip,regs->rsp,error_code); | 557 | regs->rip,regs->rsp,error_code); |
538 | 558 | ||
539 | tsk->thread.error_code = error_code; | ||
540 | tsk->thread.trap_no = 13; | ||
541 | force_sig(SIGSEGV, tsk); | 559 | force_sig(SIGSEGV, tsk); |
542 | return; | 560 | return; |
543 | } | 561 | } |
@@ -600,7 +618,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) | |||
600 | reason = get_nmi_reason(); | 618 | reason = get_nmi_reason(); |
601 | 619 | ||
602 | if (!(reason & 0xc0)) { | 620 | if (!(reason & 0xc0)) { |
603 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) | 621 | if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) |
604 | == NOTIFY_STOP) | 622 | == NOTIFY_STOP) |
605 | return; | 623 | return; |
606 | #ifdef CONFIG_X86_LOCAL_APIC | 624 | #ifdef CONFIG_X86_LOCAL_APIC |
@@ -616,7 +634,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) | |||
616 | unknown_nmi_error(reason, regs); | 634 | unknown_nmi_error(reason, regs); |
617 | return; | 635 | return; |
618 | } | 636 | } |
619 | if (notify_die(DIE_NMI, "nmi", regs, reason, 0, SIGINT) == NOTIFY_STOP) | 637 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) |
620 | return; | 638 | return; |
621 | 639 | ||
622 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ | 640 | /* AK: following checks seem to be broken on modern chipsets. FIXME */ |
@@ -627,6 +645,7 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) | |||
627 | io_check_error(reason, regs); | 645 | io_check_error(reason, regs); |
628 | } | 646 | } |
629 | 647 | ||
648 | /* runs on IST stack. */ | ||
630 | asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) | 649 | asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) |
631 | { | 650 | { |
632 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { | 651 | if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { |
@@ -647,7 +666,7 @@ asmlinkage struct pt_regs *sync_regs(struct pt_regs *eregs) | |||
647 | ; | 666 | ; |
648 | /* Exception from user space */ | 667 | /* Exception from user space */ |
649 | else if (user_mode(eregs)) | 668 | else if (user_mode(eregs)) |
650 | regs = ((struct pt_regs *)current->thread.rsp0) - 1; | 669 | regs = task_pt_regs(current); |
651 | /* Exception from kernel and interrupts are enabled. Move to | 670 | /* Exception from kernel and interrupts are enabled. Move to |
652 | kernel process stack. */ | 671 | kernel process stack. */ |
653 | else if (eregs->eflags & X86_EFLAGS_IF) | 672 | else if (eregs->eflags & X86_EFLAGS_IF) |
@@ -665,19 +684,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
665 | struct task_struct *tsk = current; | 684 | struct task_struct *tsk = current; |
666 | siginfo_t info; | 685 | siginfo_t info; |
667 | 686 | ||
668 | #ifdef CONFIG_CHECKING | ||
669 | { | ||
670 | /* RED-PEN interaction with debugger - could destroy gs */ | ||
671 | unsigned long gs; | ||
672 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
673 | rdmsrl(MSR_GS_BASE, gs); | ||
674 | if (gs != (unsigned long)pda) { | ||
675 | wrmsrl(MSR_GS_BASE, pda); | ||
676 | printk("debug handler: wrong gs %lx expected %p\n", gs, pda); | ||
677 | } | ||
678 | } | ||
679 | #endif | ||
680 | |||
681 | get_debugreg(condition, 6); | 687 | get_debugreg(condition, 6); |
682 | 688 | ||
683 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 689 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
@@ -724,11 +730,9 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
724 | info.si_signo = SIGTRAP; | 730 | info.si_signo = SIGTRAP; |
725 | info.si_errno = 0; | 731 | info.si_errno = 0; |
726 | info.si_code = TRAP_BRKPT; | 732 | info.si_code = TRAP_BRKPT; |
727 | if (!user_mode(regs)) | 733 | info.si_addr = user_mode(regs) ? (void __user *)regs->rip : NULL; |
728 | goto clear_dr7; | 734 | force_sig_info(SIGTRAP, &info, tsk); |
729 | 735 | ||
730 | info.si_addr = (void __user *)regs->rip; | ||
731 | force_sig_info(SIGTRAP, &info, tsk); | ||
732 | clear_dr7: | 736 | clear_dr7: |
733 | set_debugreg(0UL, 7); | 737 | set_debugreg(0UL, 7); |
734 | return; | 738 | return; |
@@ -738,7 +742,7 @@ clear_TF_reenable: | |||
738 | regs->eflags &= ~TF_MASK; | 742 | regs->eflags &= ~TF_MASK; |
739 | } | 743 | } |
740 | 744 | ||
741 | static int kernel_math_error(struct pt_regs *regs, char *str) | 745 | static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) |
742 | { | 746 | { |
743 | const struct exception_table_entry *fixup; | 747 | const struct exception_table_entry *fixup; |
744 | fixup = search_exception_tables(regs->rip); | 748 | fixup = search_exception_tables(regs->rip); |
@@ -746,8 +750,9 @@ static int kernel_math_error(struct pt_regs *regs, char *str) | |||
746 | regs->rip = fixup->fixup; | 750 | regs->rip = fixup->fixup; |
747 | return 1; | 751 | return 1; |
748 | } | 752 | } |
749 | notify_die(DIE_GPF, str, regs, 0, 16, SIGFPE); | 753 | notify_die(DIE_GPF, str, regs, 0, trapnr, SIGFPE); |
750 | /* Illegal floating point operation in the kernel */ | 754 | /* Illegal floating point operation in the kernel */ |
755 | current->thread.trap_no = trapnr; | ||
751 | die(str, regs, 0); | 756 | die(str, regs, 0); |
752 | return 0; | 757 | return 0; |
753 | } | 758 | } |
@@ -766,7 +771,7 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs) | |||
766 | 771 | ||
767 | conditional_sti(regs); | 772 | conditional_sti(regs); |
768 | if (!user_mode(regs) && | 773 | if (!user_mode(regs) && |
769 | kernel_math_error(regs, "kernel x87 math error")) | 774 | kernel_math_error(regs, "kernel x87 math error", 16)) |
770 | return; | 775 | return; |
771 | 776 | ||
772 | /* | 777 | /* |
@@ -835,7 +840,7 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) | |||
835 | 840 | ||
836 | conditional_sti(regs); | 841 | conditional_sti(regs); |
837 | if (!user_mode(regs) && | 842 | if (!user_mode(regs) && |
838 | kernel_math_error(regs, "kernel simd math error")) | 843 | kernel_math_error(regs, "kernel simd math error", 19)) |
839 | return; | 844 | return; |
840 | 845 | ||
841 | /* | 846 | /* |
@@ -888,6 +893,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | |||
888 | { | 893 | { |
889 | } | 894 | } |
890 | 895 | ||
896 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
897 | { | ||
898 | } | ||
899 | |||
891 | /* | 900 | /* |
892 | * 'math_state_restore()' saves the current math information in the | 901 | * 'math_state_restore()' saves the current math information in the |
893 | * old math state array, and gets the new ones from the current task | 902 | * old math state array, and gets the new ones from the current task |
@@ -903,12 +912,7 @@ asmlinkage void math_state_restore(void) | |||
903 | if (!used_math()) | 912 | if (!used_math()) |
904 | init_fpu(me); | 913 | init_fpu(me); |
905 | restore_fpu_checking(&me->thread.i387.fxsave); | 914 | restore_fpu_checking(&me->thread.i387.fxsave); |
906 | me->thread_info->status |= TS_USEDFPU; | 915 | task_thread_info(me)->status |= TS_USEDFPU; |
907 | } | ||
908 | |||
909 | void do_call_debug(struct pt_regs *regs) | ||
910 | { | ||
911 | notify_die(DIE_CALL, "debug call", regs, 0, 255, SIGINT); | ||
912 | } | 916 | } |
913 | 917 | ||
914 | void __init trap_init(void) | 918 | void __init trap_init(void) |
@@ -916,9 +920,9 @@ void __init trap_init(void) | |||
916 | set_intr_gate(0,÷_error); | 920 | set_intr_gate(0,÷_error); |
917 | set_intr_gate_ist(1,&debug,DEBUG_STACK); | 921 | set_intr_gate_ist(1,&debug,DEBUG_STACK); |
918 | set_intr_gate_ist(2,&nmi,NMI_STACK); | 922 | set_intr_gate_ist(2,&nmi,NMI_STACK); |
919 | set_system_gate(3,&int3); | 923 | set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ |
920 | set_system_gate(4,&overflow); /* int4-5 can be called from all */ | 924 | set_system_gate(4,&overflow); /* int4 can be called from all */ |
921 | set_system_gate(5,&bounds); | 925 | set_intr_gate(5,&bounds); |
922 | set_intr_gate(6,&invalid_op); | 926 | set_intr_gate(6,&invalid_op); |
923 | set_intr_gate(7,&device_not_available); | 927 | set_intr_gate(7,&device_not_available); |
924 | set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); | 928 | set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); |
@@ -940,8 +944,6 @@ void __init trap_init(void) | |||
940 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); | 944 | set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); |
941 | #endif | 945 | #endif |
942 | 946 | ||
943 | set_intr_gate(KDB_VECTOR, call_debug); | ||
944 | |||
945 | /* | 947 | /* |
946 | * Should be a barrier for any external CPU state. | 948 | * Should be a barrier for any external CPU state. |
947 | */ | 949 | */ |
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 6dd642cad2ef..b0eed1faf740 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -8,6 +8,8 @@ | |||
8 | #include <asm/page.h> | 8 | #include <asm/page.h> |
9 | #include <linux/config.h> | 9 | #include <linux/config.h> |
10 | 10 | ||
11 | #undef i386 /* in case the preprocessor is a 32bit one */ | ||
12 | |||
11 | OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") | 13 | OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64") |
12 | OUTPUT_ARCH(i386:x86-64) | 14 | OUTPUT_ARCH(i386:x86-64) |
13 | ENTRY(phys_startup_64) | 15 | ENTRY(phys_startup_64) |
@@ -50,7 +52,7 @@ SECTIONS | |||
50 | *(.bss.page_aligned) | 52 | *(.bss.page_aligned) |
51 | *(.bss) | 53 | *(.bss) |
52 | } | 54 | } |
53 | __bss_end = .; | 55 | __bss_stop = .; |
54 | 56 | ||
55 | . = ALIGN(PAGE_SIZE); | 57 | . = ALIGN(PAGE_SIZE); |
56 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 58 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
@@ -189,7 +191,7 @@ SECTIONS | |||
189 | /* Sections to be discarded */ | 191 | /* Sections to be discarded */ |
190 | /DISCARD/ : { | 192 | /DISCARD/ : { |
191 | *(.exitcall.exit) | 193 | *(.exitcall.exit) |
192 | #ifndef CONFIG_DEBUG_INFO | 194 | #ifndef CONFIG_UNWIND_INFO |
193 | *(.eh_frame) | 195 | *(.eh_frame) |
194 | #endif | 196 | #endif |
195 | } | 197 | } |
diff --git a/arch/x86_64/kernel/vsmp.c b/arch/x86_64/kernel/vsmp.c new file mode 100644 index 000000000000..92f70c74965f --- /dev/null +++ b/arch/x86_64/kernel/vsmp.c | |||
@@ -0,0 +1,45 @@ | |||
1 | /* | ||
2 | * vSMPowered(tm) systems specific initialization | ||
3 | * Copyright (C) 2005 ScaleMP Inc. | ||
4 | * | ||
5 | * Use of this code is subject to the terms and conditions of the | ||
6 | * GNU general public license version 2. See "COPYING" or | ||
7 | * http://www.gnu.org/licenses/gpl.html | ||
8 | * | ||
9 | * Ravikiran Thirumalai <kiran@scalemp.com>, | ||
10 | * Shai Fultheim <shai@scalemp.com> | ||
11 | */ | ||
12 | |||
13 | #include <linux/init.h> | ||
14 | #include <linux/pci_ids.h> | ||
15 | #include <linux/pci_regs.h> | ||
16 | #include <asm/pci-direct.h> | ||
17 | |||
18 | static int __init vsmp_init(void) | ||
19 | { | ||
20 | void *address; | ||
21 | unsigned int cap, ctl; | ||
22 | |||
23 | /* Check if we are running on a ScaleMP vSMP box */ | ||
24 | if ((read_pci_config_16(0, 0x1f, 0, PCI_VENDOR_ID) != PCI_VENDOR_ID_SCALEMP) || | ||
25 | (read_pci_config_16(0, 0x1f, 0, PCI_DEVICE_ID) != PCI_DEVICE_ID_SCALEMP_VSMP_CTL)) | ||
26 | return 0; | ||
27 | |||
28 | /* set vSMP magic bits to indicate vSMP capable kernel */ | ||
29 | address = ioremap(read_pci_config(0, 0x1f, 0, PCI_BASE_ADDRESS_0), 8); | ||
30 | cap = readl(address); | ||
31 | ctl = readl(address + 4); | ||
32 | printk("vSMP CTL: capabilities:0x%08x control:0x%08x\n", cap, ctl); | ||
33 | if (cap & ctl & (1 << 4)) { | ||
34 | /* Turn on vSMP IRQ fastpath handling (see system.h) */ | ||
35 | ctl &= ~(1 << 4); | ||
36 | writel(ctl, address + 4); | ||
37 | ctl = readl(address + 4); | ||
38 | printk("vSMP CTL: control set to:0x%08x\n", ctl); | ||
39 | } | ||
40 | |||
41 | iounmap(address); | ||
42 | return 0; | ||
43 | } | ||
44 | |||
45 | core_initcall(vsmp_init); | ||
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c index 70a0bd16085f..9468fb20b0bc 100644 --- a/arch/x86_64/kernel/vsyscall.c +++ b/arch/x86_64/kernel/vsyscall.c | |||
@@ -35,14 +35,13 @@ | |||
35 | #include <asm/io.h> | 35 | #include <asm/io.h> |
36 | 36 | ||
37 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) | 37 | #define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) |
38 | #define force_inline __attribute__((always_inline)) inline | ||
39 | 38 | ||
40 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; | 39 | int __sysctl_vsyscall __section_sysctl_vsyscall = 1; |
41 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; | 40 | seqlock_t __xtime_lock __section_xtime_lock = SEQLOCK_UNLOCKED; |
42 | 41 | ||
43 | #include <asm/unistd.h> | 42 | #include <asm/unistd.h> |
44 | 43 | ||
45 | static force_inline void timeval_normalize(struct timeval * tv) | 44 | static __always_inline void timeval_normalize(struct timeval * tv) |
46 | { | 45 | { |
47 | time_t __sec; | 46 | time_t __sec; |
48 | 47 | ||
@@ -53,7 +52,7 @@ static force_inline void timeval_normalize(struct timeval * tv) | |||
53 | } | 52 | } |
54 | } | 53 | } |
55 | 54 | ||
56 | static force_inline void do_vgettimeofday(struct timeval * tv) | 55 | static __always_inline void do_vgettimeofday(struct timeval * tv) |
57 | { | 56 | { |
58 | long sequence, t; | 57 | long sequence, t; |
59 | unsigned long sec, usec; | 58 | unsigned long sec, usec; |
@@ -66,8 +65,7 @@ static force_inline void do_vgettimeofday(struct timeval * tv) | |||
66 | (__jiffies - __wall_jiffies) * (1000000 / HZ); | 65 | (__jiffies - __wall_jiffies) * (1000000 / HZ); |
67 | 66 | ||
68 | if (__vxtime.mode != VXTIME_HPET) { | 67 | if (__vxtime.mode != VXTIME_HPET) { |
69 | sync_core(); | 68 | t = get_cycles_sync(); |
70 | rdtscll(t); | ||
71 | if (t < __vxtime.last_tsc) | 69 | if (t < __vxtime.last_tsc) |
72 | t = __vxtime.last_tsc; | 70 | t = __vxtime.last_tsc; |
73 | usec += ((t - __vxtime.last_tsc) * | 71 | usec += ((t - __vxtime.last_tsc) * |
@@ -84,12 +82,12 @@ static force_inline void do_vgettimeofday(struct timeval * tv) | |||
84 | } | 82 | } |
85 | 83 | ||
86 | /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ | 84 | /* RED-PEN may want to readd seq locking, but then the variable should be write-once. */ |
87 | static force_inline void do_get_tz(struct timezone * tz) | 85 | static __always_inline void do_get_tz(struct timezone * tz) |
88 | { | 86 | { |
89 | *tz = __sys_tz; | 87 | *tz = __sys_tz; |
90 | } | 88 | } |
91 | 89 | ||
92 | static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | 90 | static __always_inline int gettimeofday(struct timeval *tv, struct timezone *tz) |
93 | { | 91 | { |
94 | int ret; | 92 | int ret; |
95 | asm volatile("vsysc2: syscall" | 93 | asm volatile("vsysc2: syscall" |
@@ -98,7 +96,7 @@ static force_inline int gettimeofday(struct timeval *tv, struct timezone *tz) | |||
98 | return ret; | 96 | return ret; |
99 | } | 97 | } |
100 | 98 | ||
101 | static force_inline long time_syscall(long *t) | 99 | static __always_inline long time_syscall(long *t) |
102 | { | 100 | { |
103 | long secs; | 101 | long secs; |
104 | asm volatile("vsysc1: syscall" | 102 | asm volatile("vsysc1: syscall" |
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index fd99ddd009bc..b614d54d2ae4 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
@@ -13,7 +13,6 @@ | |||
13 | #include <linux/string.h> | 13 | #include <linux/string.h> |
14 | #include <linux/syscalls.h> | 14 | #include <linux/syscalls.h> |
15 | #include <linux/tty.h> | 15 | #include <linux/tty.h> |
16 | #include <linux/ioctl32.h> | ||
17 | 16 | ||
18 | #include <asm/semaphore.h> | 17 | #include <asm/semaphore.h> |
19 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
@@ -45,22 +44,15 @@ extern struct drive_info_struct drive_info; | |||
45 | EXPORT_SYMBOL(drive_info); | 44 | EXPORT_SYMBOL(drive_info); |
46 | #endif | 45 | #endif |
47 | 46 | ||
48 | extern unsigned long get_cmos_time(void); | ||
49 | |||
50 | /* platform dependent support */ | 47 | /* platform dependent support */ |
51 | EXPORT_SYMBOL(boot_cpu_data); | 48 | EXPORT_SYMBOL(boot_cpu_data); |
52 | //EXPORT_SYMBOL(dump_fpu); | 49 | //EXPORT_SYMBOL(dump_fpu); |
53 | EXPORT_SYMBOL(__ioremap); | 50 | EXPORT_SYMBOL(__ioremap); |
54 | EXPORT_SYMBOL(ioremap_nocache); | 51 | EXPORT_SYMBOL(ioremap_nocache); |
55 | EXPORT_SYMBOL(iounmap); | 52 | EXPORT_SYMBOL(iounmap); |
56 | EXPORT_SYMBOL(enable_irq); | ||
57 | EXPORT_SYMBOL(disable_irq); | ||
58 | EXPORT_SYMBOL(disable_irq_nosync); | ||
59 | EXPORT_SYMBOL(probe_irq_mask); | ||
60 | EXPORT_SYMBOL(kernel_thread); | 53 | EXPORT_SYMBOL(kernel_thread); |
61 | EXPORT_SYMBOL(pm_idle); | 54 | EXPORT_SYMBOL(pm_idle); |
62 | EXPORT_SYMBOL(pm_power_off); | 55 | EXPORT_SYMBOL(pm_power_off); |
63 | EXPORT_SYMBOL(get_cmos_time); | ||
64 | 56 | ||
65 | EXPORT_SYMBOL(__down_failed); | 57 | EXPORT_SYMBOL(__down_failed); |
66 | EXPORT_SYMBOL(__down_failed_interruptible); | 58 | EXPORT_SYMBOL(__down_failed_interruptible); |
@@ -84,9 +76,6 @@ EXPORT_SYMBOL(__put_user_2); | |||
84 | EXPORT_SYMBOL(__put_user_4); | 76 | EXPORT_SYMBOL(__put_user_4); |
85 | EXPORT_SYMBOL(__put_user_8); | 77 | EXPORT_SYMBOL(__put_user_8); |
86 | 78 | ||
87 | EXPORT_SYMBOL(strpbrk); | ||
88 | EXPORT_SYMBOL(strstr); | ||
89 | |||
90 | EXPORT_SYMBOL(strncpy_from_user); | 79 | EXPORT_SYMBOL(strncpy_from_user); |
91 | EXPORT_SYMBOL(__strncpy_from_user); | 80 | EXPORT_SYMBOL(__strncpy_from_user); |
92 | EXPORT_SYMBOL(clear_user); | 81 | EXPORT_SYMBOL(clear_user); |
@@ -98,25 +87,18 @@ EXPORT_SYMBOL(copy_in_user); | |||
98 | EXPORT_SYMBOL(strnlen_user); | 87 | EXPORT_SYMBOL(strnlen_user); |
99 | 88 | ||
100 | #ifdef CONFIG_PCI | 89 | #ifdef CONFIG_PCI |
101 | EXPORT_SYMBOL(pci_alloc_consistent); | ||
102 | EXPORT_SYMBOL(pci_free_consistent); | ||
103 | #endif | ||
104 | |||
105 | #ifdef CONFIG_PCI | ||
106 | EXPORT_SYMBOL(pci_mem_start); | 90 | EXPORT_SYMBOL(pci_mem_start); |
107 | #endif | 91 | #endif |
108 | 92 | ||
109 | EXPORT_SYMBOL(copy_page); | 93 | EXPORT_SYMBOL(copy_page); |
110 | EXPORT_SYMBOL(clear_page); | 94 | EXPORT_SYMBOL(clear_page); |
111 | 95 | ||
112 | EXPORT_SYMBOL(cpu_pda); | 96 | EXPORT_SYMBOL(_cpu_pda); |
113 | #ifdef CONFIG_SMP | 97 | #ifdef CONFIG_SMP |
114 | EXPORT_SYMBOL(cpu_data); | 98 | EXPORT_SYMBOL(cpu_data); |
115 | EXPORT_SYMBOL(cpu_online_map); | ||
116 | EXPORT_SYMBOL(__write_lock_failed); | 99 | EXPORT_SYMBOL(__write_lock_failed); |
117 | EXPORT_SYMBOL(__read_lock_failed); | 100 | EXPORT_SYMBOL(__read_lock_failed); |
118 | 101 | ||
119 | EXPORT_SYMBOL(synchronize_irq); | ||
120 | EXPORT_SYMBOL(smp_call_function); | 102 | EXPORT_SYMBOL(smp_call_function); |
121 | EXPORT_SYMBOL(cpu_callout_map); | 103 | EXPORT_SYMBOL(cpu_callout_map); |
122 | #endif | 104 | #endif |
@@ -137,30 +119,17 @@ EXPORT_SYMBOL_GPL(unset_nmi_callback); | |||
137 | #undef memcpy | 119 | #undef memcpy |
138 | #undef memset | 120 | #undef memset |
139 | #undef memmove | 121 | #undef memmove |
140 | #undef memchr | ||
141 | #undef strlen | 122 | #undef strlen |
142 | #undef strncmp | ||
143 | #undef strncpy | ||
144 | #undef strchr | ||
145 | 123 | ||
146 | extern void * memset(void *,int,__kernel_size_t); | 124 | extern void * memset(void *,int,__kernel_size_t); |
147 | extern size_t strlen(const char *); | 125 | extern size_t strlen(const char *); |
148 | extern void * memmove(void * dest,const void *src,size_t count); | 126 | extern void * memmove(void * dest,const void *src,size_t count); |
149 | extern void *memchr(const void *s, int c, size_t n); | ||
150 | extern void * memcpy(void *,const void *,__kernel_size_t); | 127 | extern void * memcpy(void *,const void *,__kernel_size_t); |
151 | extern void * __memcpy(void *,const void *,__kernel_size_t); | 128 | extern void * __memcpy(void *,const void *,__kernel_size_t); |
152 | 129 | ||
153 | EXPORT_SYMBOL(memset); | 130 | EXPORT_SYMBOL(memset); |
154 | EXPORT_SYMBOL(strlen); | 131 | EXPORT_SYMBOL(strlen); |
155 | EXPORT_SYMBOL(memmove); | 132 | EXPORT_SYMBOL(memmove); |
156 | EXPORT_SYMBOL(strncmp); | ||
157 | EXPORT_SYMBOL(strncpy); | ||
158 | EXPORT_SYMBOL(strchr); | ||
159 | EXPORT_SYMBOL(strncat); | ||
160 | EXPORT_SYMBOL(memchr); | ||
161 | EXPORT_SYMBOL(strrchr); | ||
162 | EXPORT_SYMBOL(strnlen); | ||
163 | EXPORT_SYMBOL(memscan); | ||
164 | EXPORT_SYMBOL(memcpy); | 133 | EXPORT_SYMBOL(memcpy); |
165 | EXPORT_SYMBOL(__memcpy); | 134 | EXPORT_SYMBOL(__memcpy); |
166 | 135 | ||
@@ -203,3 +172,6 @@ EXPORT_SYMBOL(flush_tlb_page); | |||
203 | #endif | 172 | #endif |
204 | 173 | ||
205 | EXPORT_SYMBOL(cpu_khz); | 174 | EXPORT_SYMBOL(cpu_khz); |
175 | |||
176 | EXPORT_SYMBOL(load_gs_index); | ||
177 | |||
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S index 30a9da458c15..43d9fa136180 100644 --- a/arch/x86_64/lib/clear_page.S +++ b/arch/x86_64/lib/clear_page.S | |||
@@ -5,46 +5,8 @@ | |||
5 | .globl clear_page | 5 | .globl clear_page |
6 | .p2align 4 | 6 | .p2align 4 |
7 | clear_page: | 7 | clear_page: |
8 | xorl %eax,%eax | ||
9 | movl $4096/64,%ecx | ||
10 | .p2align 4 | ||
11 | .Lloop: | ||
12 | decl %ecx | ||
13 | #define PUT(x) movq %rax,x*8(%rdi) | ||
14 | movq %rax,(%rdi) | ||
15 | PUT(1) | ||
16 | PUT(2) | ||
17 | PUT(3) | ||
18 | PUT(4) | ||
19 | PUT(5) | ||
20 | PUT(6) | ||
21 | PUT(7) | ||
22 | leaq 64(%rdi),%rdi | ||
23 | jnz .Lloop | ||
24 | nop | ||
25 | ret | ||
26 | clear_page_end: | ||
27 | |||
28 | /* C stepping K8 run faster using the string instructions. | ||
29 | It is also a lot simpler. Use this when possible */ | ||
30 | |||
31 | #include <asm/cpufeature.h> | ||
32 | |||
33 | .section .altinstructions,"a" | ||
34 | .align 8 | ||
35 | .quad clear_page | ||
36 | .quad clear_page_c | ||
37 | .byte X86_FEATURE_K8_C | ||
38 | .byte clear_page_end-clear_page | ||
39 | .byte clear_page_c_end-clear_page_c | ||
40 | .previous | ||
41 | |||
42 | .section .altinstr_replacement,"ax" | ||
43 | clear_page_c: | ||
44 | movl $4096/8,%ecx | 8 | movl $4096/8,%ecx |
45 | xorl %eax,%eax | 9 | xorl %eax,%eax |
46 | rep | 10 | rep |
47 | stosq | 11 | stosq |
48 | ret | 12 | ret |
49 | clear_page_c_end: | ||
50 | .previous | ||
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S index dd3aa47b6bf5..621a19769406 100644 --- a/arch/x86_64/lib/copy_page.S +++ b/arch/x86_64/lib/copy_page.S | |||
@@ -8,94 +8,7 @@ | |||
8 | .globl copy_page | 8 | .globl copy_page |
9 | .p2align 4 | 9 | .p2align 4 |
10 | copy_page: | 10 | copy_page: |
11 | subq $3*8,%rsp | ||
12 | movq %rbx,(%rsp) | ||
13 | movq %r12,1*8(%rsp) | ||
14 | movq %r13,2*8(%rsp) | ||
15 | |||
16 | movl $(4096/64)-5,%ecx | ||
17 | .p2align 4 | ||
18 | .Loop64: | ||
19 | dec %rcx | ||
20 | |||
21 | movq (%rsi), %rax | ||
22 | movq 8 (%rsi), %rbx | ||
23 | movq 16 (%rsi), %rdx | ||
24 | movq 24 (%rsi), %r8 | ||
25 | movq 32 (%rsi), %r9 | ||
26 | movq 40 (%rsi), %r10 | ||
27 | movq 48 (%rsi), %r11 | ||
28 | movq 56 (%rsi), %r12 | ||
29 | |||
30 | prefetcht0 5*64(%rsi) | ||
31 | |||
32 | movq %rax, (%rdi) | ||
33 | movq %rbx, 8 (%rdi) | ||
34 | movq %rdx, 16 (%rdi) | ||
35 | movq %r8, 24 (%rdi) | ||
36 | movq %r9, 32 (%rdi) | ||
37 | movq %r10, 40 (%rdi) | ||
38 | movq %r11, 48 (%rdi) | ||
39 | movq %r12, 56 (%rdi) | ||
40 | |||
41 | leaq 64 (%rsi), %rsi | ||
42 | leaq 64 (%rdi), %rdi | ||
43 | |||
44 | jnz .Loop64 | ||
45 | |||
46 | movl $5,%ecx | ||
47 | .p2align 4 | ||
48 | .Loop2: | ||
49 | decl %ecx | ||
50 | |||
51 | movq (%rsi), %rax | ||
52 | movq 8 (%rsi), %rbx | ||
53 | movq 16 (%rsi), %rdx | ||
54 | movq 24 (%rsi), %r8 | ||
55 | movq 32 (%rsi), %r9 | ||
56 | movq 40 (%rsi), %r10 | ||
57 | movq 48 (%rsi), %r11 | ||
58 | movq 56 (%rsi), %r12 | ||
59 | |||
60 | movq %rax, (%rdi) | ||
61 | movq %rbx, 8 (%rdi) | ||
62 | movq %rdx, 16 (%rdi) | ||
63 | movq %r8, 24 (%rdi) | ||
64 | movq %r9, 32 (%rdi) | ||
65 | movq %r10, 40 (%rdi) | ||
66 | movq %r11, 48 (%rdi) | ||
67 | movq %r12, 56 (%rdi) | ||
68 | |||
69 | leaq 64(%rdi),%rdi | ||
70 | leaq 64(%rsi),%rsi | ||
71 | |||
72 | jnz .Loop2 | ||
73 | |||
74 | movq (%rsp),%rbx | ||
75 | movq 1*8(%rsp),%r12 | ||
76 | movq 2*8(%rsp),%r13 | ||
77 | addq $3*8,%rsp | ||
78 | ret | ||
79 | |||
80 | /* C stepping K8 run faster using the string copy instructions. | ||
81 | It is also a lot simpler. Use this when possible */ | ||
82 | |||
83 | #include <asm/cpufeature.h> | ||
84 | |||
85 | .section .altinstructions,"a" | ||
86 | .align 8 | ||
87 | .quad copy_page | ||
88 | .quad copy_page_c | ||
89 | .byte X86_FEATURE_K8_C | ||
90 | .byte copy_page_c_end-copy_page_c | ||
91 | .byte copy_page_c_end-copy_page_c | ||
92 | .previous | ||
93 | |||
94 | .section .altinstr_replacement,"ax" | ||
95 | copy_page_c: | ||
96 | movl $4096/8,%ecx | 11 | movl $4096/8,%ecx |
97 | rep | 12 | rep |
98 | movsq | 13 | movsq |
99 | ret | 14 | ret |
100 | copy_page_c_end: | ||
101 | .previous | ||
diff --git a/arch/x86_64/lib/copy_user.S b/arch/x86_64/lib/copy_user.S index dfa358b05c89..79422b6559c3 100644 --- a/arch/x86_64/lib/copy_user.S +++ b/arch/x86_64/lib/copy_user.S | |||
@@ -4,12 +4,9 @@ | |||
4 | * Functions to copy from and to user space. | 4 | * Functions to copy from and to user space. |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #define FIX_ALIGNMENT 1 | ||
8 | |||
9 | #include <asm/current.h> | 7 | #include <asm/current.h> |
10 | #include <asm/asm-offsets.h> | 8 | #include <asm/asm-offsets.h> |
11 | #include <asm/thread_info.h> | 9 | #include <asm/thread_info.h> |
12 | #include <asm/cpufeature.h> | ||
13 | 10 | ||
14 | /* Standard copy_to_user with segment limit checking */ | 11 | /* Standard copy_to_user with segment limit checking */ |
15 | .globl copy_to_user | 12 | .globl copy_to_user |
@@ -21,23 +18,7 @@ copy_to_user: | |||
21 | jc bad_to_user | 18 | jc bad_to_user |
22 | cmpq threadinfo_addr_limit(%rax),%rcx | 19 | cmpq threadinfo_addr_limit(%rax),%rcx |
23 | jae bad_to_user | 20 | jae bad_to_user |
24 | 2: | 21 | jmp copy_user_generic |
25 | .byte 0xe9 /* 32bit jump */ | ||
26 | .long .Lcug-1f | ||
27 | 1: | ||
28 | |||
29 | .section .altinstr_replacement,"ax" | ||
30 | 3: .byte 0xe9 /* replacement jmp with 8 bit immediate */ | ||
31 | .long copy_user_generic_c-1b /* offset */ | ||
32 | .previous | ||
33 | .section .altinstructions,"a" | ||
34 | .align 8 | ||
35 | .quad 2b | ||
36 | .quad 3b | ||
37 | .byte X86_FEATURE_K8_C | ||
38 | .byte 5 | ||
39 | .byte 5 | ||
40 | .previous | ||
41 | 22 | ||
42 | /* Standard copy_from_user with segment limit checking */ | 23 | /* Standard copy_from_user with segment limit checking */ |
43 | .globl copy_from_user | 24 | .globl copy_from_user |
@@ -72,223 +53,44 @@ bad_to_user: | |||
72 | * rsi source | 53 | * rsi source |
73 | * rdx count | 54 | * rdx count |
74 | * | 55 | * |
56 | * Only 4GB of copy is supported. This shouldn't be a problem | ||
57 | * because the kernel normally only writes from/to page sized chunks | ||
58 | * even if user space passed a longer buffer. | ||
59 | * And more would be dangerous because both Intel and AMD have | ||
60 | * errata with rep movsq > 4GB. If someone feels the need to fix | ||
61 | * this please consider this. | ||
62 | * | ||
75 | * Output: | 63 | * Output: |
76 | * eax uncopied bytes or 0 if successful. | 64 | * eax uncopied bytes or 0 if successful. |
77 | */ | 65 | */ |
78 | .globl copy_user_generic | ||
79 | .p2align 4 | ||
80 | copy_user_generic: | ||
81 | .byte 0x66,0x66,0x90 /* 5 byte nop for replacement jump */ | ||
82 | .byte 0x66,0x90 | ||
83 | 1: | ||
84 | .section .altinstr_replacement,"ax" | ||
85 | 2: .byte 0xe9 /* near jump with 32bit immediate */ | ||
86 | .long copy_user_generic_c-1b /* offset */ | ||
87 | .previous | ||
88 | .section .altinstructions,"a" | ||
89 | .align 8 | ||
90 | .quad copy_user_generic | ||
91 | .quad 2b | ||
92 | .byte X86_FEATURE_K8_C | ||
93 | .byte 5 | ||
94 | .byte 5 | ||
95 | .previous | ||
96 | .Lcug: | ||
97 | pushq %rbx | ||
98 | xorl %eax,%eax /*zero for the exception handler */ | ||
99 | |||
100 | #ifdef FIX_ALIGNMENT | ||
101 | /* check for bad alignment of destination */ | ||
102 | movl %edi,%ecx | ||
103 | andl $7,%ecx | ||
104 | jnz .Lbad_alignment | ||
105 | .Lafter_bad_alignment: | ||
106 | #endif | ||
107 | 66 | ||
108 | movq %rdx,%rcx | 67 | .globl copy_user_generic |
109 | 68 | copy_user_generic: | |
110 | movl $64,%ebx | ||
111 | shrq $6,%rdx | ||
112 | decq %rdx | ||
113 | js .Lhandle_tail | ||
114 | |||
115 | .p2align 4 | ||
116 | .Lloop: | ||
117 | .Ls1: movq (%rsi),%r11 | ||
118 | .Ls2: movq 1*8(%rsi),%r8 | ||
119 | .Ls3: movq 2*8(%rsi),%r9 | ||
120 | .Ls4: movq 3*8(%rsi),%r10 | ||
121 | .Ld1: movq %r11,(%rdi) | ||
122 | .Ld2: movq %r8,1*8(%rdi) | ||
123 | .Ld3: movq %r9,2*8(%rdi) | ||
124 | .Ld4: movq %r10,3*8(%rdi) | ||
125 | |||
126 | .Ls5: movq 4*8(%rsi),%r11 | ||
127 | .Ls6: movq 5*8(%rsi),%r8 | ||
128 | .Ls7: movq 6*8(%rsi),%r9 | ||
129 | .Ls8: movq 7*8(%rsi),%r10 | ||
130 | .Ld5: movq %r11,4*8(%rdi) | ||
131 | .Ld6: movq %r8,5*8(%rdi) | ||
132 | .Ld7: movq %r9,6*8(%rdi) | ||
133 | .Ld8: movq %r10,7*8(%rdi) | ||
134 | |||
135 | decq %rdx | ||
136 | |||
137 | leaq 64(%rsi),%rsi | ||
138 | leaq 64(%rdi),%rdi | ||
139 | |||
140 | jns .Lloop | ||
141 | |||
142 | .p2align 4 | ||
143 | .Lhandle_tail: | ||
144 | movl %ecx,%edx | ||
145 | andl $63,%ecx | ||
146 | shrl $3,%ecx | ||
147 | jz .Lhandle_7 | ||
148 | movl $8,%ebx | ||
149 | .p2align 4 | ||
150 | .Lloop_8: | ||
151 | .Ls9: movq (%rsi),%r8 | ||
152 | .Ld9: movq %r8,(%rdi) | ||
153 | decl %ecx | ||
154 | leaq 8(%rdi),%rdi | ||
155 | leaq 8(%rsi),%rsi | ||
156 | jnz .Lloop_8 | ||
157 | |||
158 | .Lhandle_7: | ||
159 | movl %edx,%ecx | ||
160 | andl $7,%ecx | ||
161 | jz .Lende | ||
162 | .p2align 4 | ||
163 | .Lloop_1: | ||
164 | .Ls10: movb (%rsi),%bl | ||
165 | .Ld10: movb %bl,(%rdi) | ||
166 | incq %rdi | ||
167 | incq %rsi | ||
168 | decl %ecx | ||
169 | jnz .Lloop_1 | ||
170 | |||
171 | .Lende: | ||
172 | popq %rbx | ||
173 | ret | ||
174 | |||
175 | #ifdef FIX_ALIGNMENT | ||
176 | /* align destination */ | ||
177 | .p2align 4 | ||
178 | .Lbad_alignment: | ||
179 | movl $8,%r9d | ||
180 | subl %ecx,%r9d | ||
181 | movl %r9d,%ecx | ||
182 | cmpq %r9,%rdx | ||
183 | jz .Lhandle_7 | ||
184 | js .Lhandle_7 | ||
185 | .Lalign_1: | ||
186 | .Ls11: movb (%rsi),%bl | ||
187 | .Ld11: movb %bl,(%rdi) | ||
188 | incq %rsi | ||
189 | incq %rdi | ||
190 | decl %ecx | ||
191 | jnz .Lalign_1 | ||
192 | subq %r9,%rdx | ||
193 | jmp .Lafter_bad_alignment | ||
194 | #endif | ||
195 | |||
196 | /* table sorted by exception address */ | ||
197 | .section __ex_table,"a" | ||
198 | .align 8 | ||
199 | .quad .Ls1,.Ls1e | ||
200 | .quad .Ls2,.Ls2e | ||
201 | .quad .Ls3,.Ls3e | ||
202 | .quad .Ls4,.Ls4e | ||
203 | .quad .Ld1,.Ls1e | ||
204 | .quad .Ld2,.Ls2e | ||
205 | .quad .Ld3,.Ls3e | ||
206 | .quad .Ld4,.Ls4e | ||
207 | .quad .Ls5,.Ls5e | ||
208 | .quad .Ls6,.Ls6e | ||
209 | .quad .Ls7,.Ls7e | ||
210 | .quad .Ls8,.Ls8e | ||
211 | .quad .Ld5,.Ls5e | ||
212 | .quad .Ld6,.Ls6e | ||
213 | .quad .Ld7,.Ls7e | ||
214 | .quad .Ld8,.Ls8e | ||
215 | .quad .Ls9,.Le_quad | ||
216 | .quad .Ld9,.Le_quad | ||
217 | .quad .Ls10,.Le_byte | ||
218 | .quad .Ld10,.Le_byte | ||
219 | #ifdef FIX_ALIGNMENT | ||
220 | .quad .Ls11,.Lzero_rest | ||
221 | .quad .Ld11,.Lzero_rest | ||
222 | #endif | ||
223 | .quad .Le5,.Le_zero | ||
224 | .previous | ||
225 | |||
226 | /* compute 64-offset for main loop. 8 bytes accuracy with error on the | ||
227 | pessimistic side. this is gross. it would be better to fix the | ||
228 | interface. */ | ||
229 | /* eax: zero, ebx: 64 */ | ||
230 | .Ls1e: addl $8,%eax | ||
231 | .Ls2e: addl $8,%eax | ||
232 | .Ls3e: addl $8,%eax | ||
233 | .Ls4e: addl $8,%eax | ||
234 | .Ls5e: addl $8,%eax | ||
235 | .Ls6e: addl $8,%eax | ||
236 | .Ls7e: addl $8,%eax | ||
237 | .Ls8e: addl $8,%eax | ||
238 | addq %rbx,%rdi /* +64 */ | ||
239 | subq %rax,%rdi /* correct destination with computed offset */ | ||
240 | |||
241 | shlq $6,%rdx /* loop counter * 64 (stride length) */ | ||
242 | addq %rax,%rdx /* add offset to loopcnt */ | ||
243 | andl $63,%ecx /* remaining bytes */ | ||
244 | addq %rcx,%rdx /* add them */ | ||
245 | jmp .Lzero_rest | ||
246 | |||
247 | /* exception on quad word loop in tail handling */ | ||
248 | /* ecx: loopcnt/8, %edx: length, rdi: correct */ | ||
249 | .Le_quad: | ||
250 | shll $3,%ecx | ||
251 | andl $7,%edx | ||
252 | addl %ecx,%edx | ||
253 | /* edx: bytes to zero, rdi: dest, eax:zero */ | ||
254 | .Lzero_rest: | ||
255 | movq %rdx,%rcx | ||
256 | .Le_byte: | ||
257 | xorl %eax,%eax | ||
258 | .Le5: rep | ||
259 | stosb | ||
260 | /* when there is another exception while zeroing the rest just return */ | ||
261 | .Le_zero: | ||
262 | movq %rdx,%rax | ||
263 | jmp .Lende | ||
264 | |||
265 | /* C stepping K8 run faster using the string copy instructions. | ||
266 | This is also a lot simpler. Use them when possible. | ||
267 | Patch in jmps to this code instead of copying it fully | ||
268 | to avoid unwanted aliasing in the exception tables. */ | ||
269 | |||
270 | /* rdi destination | ||
271 | * rsi source | ||
272 | * rdx count | ||
273 | * | ||
274 | * Output: | ||
275 | * eax uncopied bytes or 0 if successfull. | ||
276 | */ | ||
277 | copy_user_generic_c: | ||
278 | movl %edx,%ecx | 69 | movl %edx,%ecx |
279 | shrl $3,%ecx | 70 | shrl $3,%ecx |
280 | andl $7,%edx | 71 | andl $7,%edx |
72 | jz 5f | ||
281 | 1: rep | 73 | 1: rep |
282 | movsq | 74 | movsq |
283 | movl %edx,%ecx | 75 | movl %edx,%ecx |
76 | xor %eax,%eax | ||
284 | 2: rep | 77 | 2: rep |
285 | movsb | 78 | movsb |
286 | 4: movl %ecx,%eax | ||
287 | ret | 79 | ret |
80 | /* align here? */ | ||
81 | 5: xorl %eax,%eax | ||
82 | 6: rep movsq | ||
83 | ret | ||
84 | |||
85 | .section .fixup,"ax" | ||
288 | 3: lea (%rdx,%rcx,8),%rax | 86 | 3: lea (%rdx,%rcx,8),%rax |
289 | ret | 87 | ret |
290 | 88 | 4: movl %ecx,%eax | |
89 | ret | ||
90 | .previous | ||
91 | |||
291 | .section __ex_table,"a" | 92 | .section __ex_table,"a" |
292 | .quad 1b,3b | 93 | .quad 1b,3b |
293 | .quad 2b,4b | 94 | .quad 2b,4b |
95 | .quad 6b,4b | ||
294 | .previous | 96 | .previous |
diff --git a/arch/x86_64/lib/delay.c b/arch/x86_64/lib/delay.c index 841bd738a189..03c460cbdd1c 100644 --- a/arch/x86_64/lib/delay.c +++ b/arch/x86_64/lib/delay.c | |||
@@ -39,7 +39,7 @@ void __delay(unsigned long loops) | |||
39 | 39 | ||
40 | inline void __const_udelay(unsigned long xloops) | 40 | inline void __const_udelay(unsigned long xloops) |
41 | { | 41 | { |
42 | __delay(((xloops * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32) * HZ); | 42 | __delay((xloops * HZ * cpu_data[raw_smp_processor_id()].loops_per_jiffy) >> 32); |
43 | } | 43 | } |
44 | 44 | ||
45 | void __udelay(unsigned long usecs) | 45 | void __udelay(unsigned long usecs) |
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S index c6c46494fef5..92dd80544602 100644 --- a/arch/x86_64/lib/memcpy.S +++ b/arch/x86_64/lib/memcpy.S | |||
@@ -11,6 +11,8 @@ | |||
11 | * | 11 | * |
12 | * Output: | 12 | * Output: |
13 | * rax original destination | 13 | * rax original destination |
14 | * | ||
15 | * TODO: check best memcpy for PSC | ||
14 | */ | 16 | */ |
15 | 17 | ||
16 | .globl __memcpy | 18 | .globl __memcpy |
@@ -18,95 +20,6 @@ | |||
18 | .p2align 4 | 20 | .p2align 4 |
19 | __memcpy: | 21 | __memcpy: |
20 | memcpy: | 22 | memcpy: |
21 | pushq %rbx | ||
22 | movq %rdi,%rax | ||
23 | |||
24 | movl %edx,%ecx | ||
25 | shrl $6,%ecx | ||
26 | jz .Lhandle_tail | ||
27 | |||
28 | .p2align 4 | ||
29 | .Lloop_64: | ||
30 | decl %ecx | ||
31 | |||
32 | movq (%rsi),%r11 | ||
33 | movq 8(%rsi),%r8 | ||
34 | |||
35 | movq %r11,(%rdi) | ||
36 | movq %r8,1*8(%rdi) | ||
37 | |||
38 | movq 2*8(%rsi),%r9 | ||
39 | movq 3*8(%rsi),%r10 | ||
40 | |||
41 | movq %r9,2*8(%rdi) | ||
42 | movq %r10,3*8(%rdi) | ||
43 | |||
44 | movq 4*8(%rsi),%r11 | ||
45 | movq 5*8(%rsi),%r8 | ||
46 | |||
47 | movq %r11,4*8(%rdi) | ||
48 | movq %r8,5*8(%rdi) | ||
49 | |||
50 | movq 6*8(%rsi),%r9 | ||
51 | movq 7*8(%rsi),%r10 | ||
52 | |||
53 | movq %r9,6*8(%rdi) | ||
54 | movq %r10,7*8(%rdi) | ||
55 | |||
56 | leaq 64(%rsi),%rsi | ||
57 | leaq 64(%rdi),%rdi | ||
58 | jnz .Lloop_64 | ||
59 | |||
60 | .Lhandle_tail: | ||
61 | movl %edx,%ecx | ||
62 | andl $63,%ecx | ||
63 | shrl $3,%ecx | ||
64 | jz .Lhandle_7 | ||
65 | .p2align 4 | ||
66 | .Lloop_8: | ||
67 | decl %ecx | ||
68 | movq (%rsi),%r8 | ||
69 | movq %r8,(%rdi) | ||
70 | leaq 8(%rdi),%rdi | ||
71 | leaq 8(%rsi),%rsi | ||
72 | jnz .Lloop_8 | ||
73 | |||
74 | .Lhandle_7: | ||
75 | movl %edx,%ecx | ||
76 | andl $7,%ecx | ||
77 | jz .Lende | ||
78 | .p2align 4 | ||
79 | .Lloop_1: | ||
80 | movb (%rsi),%r8b | ||
81 | movb %r8b,(%rdi) | ||
82 | incq %rdi | ||
83 | incq %rsi | ||
84 | decl %ecx | ||
85 | jnz .Lloop_1 | ||
86 | |||
87 | .Lende: | ||
88 | popq %rbx | ||
89 | ret | ||
90 | .Lfinal: | ||
91 | |||
92 | /* C stepping K8 run faster using the string copy instructions. | ||
93 | It is also a lot simpler. Use this when possible */ | ||
94 | |||
95 | .section .altinstructions,"a" | ||
96 | .align 8 | ||
97 | .quad memcpy | ||
98 | .quad memcpy_c | ||
99 | .byte X86_FEATURE_K8_C | ||
100 | .byte .Lfinal-memcpy | ||
101 | .byte memcpy_c_end-memcpy_c | ||
102 | .previous | ||
103 | |||
104 | .section .altinstr_replacement,"ax" | ||
105 | /* rdi destination | ||
106 | * rsi source | ||
107 | * rdx count | ||
108 | */ | ||
109 | memcpy_c: | ||
110 | movq %rdi,%rax | 23 | movq %rdi,%rax |
111 | movl %edx,%ecx | 24 | movl %edx,%ecx |
112 | shrl $3,%ecx | 25 | shrl $3,%ecx |
@@ -117,5 +30,3 @@ memcpy_c: | |||
117 | rep | 30 | rep |
118 | movsb | 31 | movsb |
119 | ret | 32 | ret |
120 | memcpy_c_end: | ||
121 | .previous | ||
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S index 4b4c40638640..2aa48f24ed1e 100644 --- a/arch/x86_64/lib/memset.S +++ b/arch/x86_64/lib/memset.S | |||
@@ -13,98 +13,6 @@ | |||
13 | .p2align 4 | 13 | .p2align 4 |
14 | memset: | 14 | memset: |
15 | __memset: | 15 | __memset: |
16 | movq %rdi,%r10 | ||
17 | movq %rdx,%r11 | ||
18 | |||
19 | /* expand byte value */ | ||
20 | movzbl %sil,%ecx | ||
21 | movabs $0x0101010101010101,%rax | ||
22 | mul %rcx /* with rax, clobbers rdx */ | ||
23 | |||
24 | /* align dst */ | ||
25 | movl %edi,%r9d | ||
26 | andl $7,%r9d | ||
27 | jnz .Lbad_alignment | ||
28 | .Lafter_bad_alignment: | ||
29 | |||
30 | movl %r11d,%ecx | ||
31 | shrl $6,%ecx | ||
32 | jz .Lhandle_tail | ||
33 | |||
34 | .p2align 4 | ||
35 | .Lloop_64: | ||
36 | decl %ecx | ||
37 | movq %rax,(%rdi) | ||
38 | movq %rax,8(%rdi) | ||
39 | movq %rax,16(%rdi) | ||
40 | movq %rax,24(%rdi) | ||
41 | movq %rax,32(%rdi) | ||
42 | movq %rax,40(%rdi) | ||
43 | movq %rax,48(%rdi) | ||
44 | movq %rax,56(%rdi) | ||
45 | leaq 64(%rdi),%rdi | ||
46 | jnz .Lloop_64 | ||
47 | |||
48 | /* Handle tail in loops. The loops should be faster than hard | ||
49 | to predict jump tables. */ | ||
50 | .p2align 4 | ||
51 | .Lhandle_tail: | ||
52 | movl %r11d,%ecx | ||
53 | andl $63&(~7),%ecx | ||
54 | jz .Lhandle_7 | ||
55 | shrl $3,%ecx | ||
56 | .p2align 4 | ||
57 | .Lloop_8: | ||
58 | decl %ecx | ||
59 | movq %rax,(%rdi) | ||
60 | leaq 8(%rdi),%rdi | ||
61 | jnz .Lloop_8 | ||
62 | |||
63 | .Lhandle_7: | ||
64 | movl %r11d,%ecx | ||
65 | andl $7,%ecx | ||
66 | jz .Lende | ||
67 | .p2align 4 | ||
68 | .Lloop_1: | ||
69 | decl %ecx | ||
70 | movb %al,(%rdi) | ||
71 | leaq 1(%rdi),%rdi | ||
72 | jnz .Lloop_1 | ||
73 | |||
74 | .Lende: | ||
75 | movq %r10,%rax | ||
76 | ret | ||
77 | |||
78 | .Lbad_alignment: | ||
79 | cmpq $7,%r11 | ||
80 | jbe .Lhandle_7 | ||
81 | movq %rax,(%rdi) /* unaligned store */ | ||
82 | movq $8,%r8 | ||
83 | subq %r9,%r8 | ||
84 | addq %r8,%rdi | ||
85 | subq %r8,%r11 | ||
86 | jmp .Lafter_bad_alignment | ||
87 | |||
88 | /* C stepping K8 run faster using the string instructions. | ||
89 | It is also a lot simpler. Use this when possible */ | ||
90 | |||
91 | #include <asm/cpufeature.h> | ||
92 | |||
93 | .section .altinstructions,"a" | ||
94 | .align 8 | ||
95 | .quad memset | ||
96 | .quad memset_c | ||
97 | .byte X86_FEATURE_K8_C | ||
98 | .byte memset_c_end-memset_c | ||
99 | .byte memset_c_end-memset_c | ||
100 | .previous | ||
101 | |||
102 | .section .altinstr_replacement,"ax" | ||
103 | /* rdi destination | ||
104 | * rsi value | ||
105 | * rdx count | ||
106 | */ | ||
107 | memset_c: | ||
108 | movq %rdi,%r9 | 16 | movq %rdi,%r9 |
109 | movl %edx,%r8d | 17 | movl %edx,%r8d |
110 | andl $7,%r8d | 18 | andl $7,%r8d |
@@ -121,5 +29,3 @@ memset_c: | |||
121 | stosb | 29 | stosb |
122 | movq %r9,%rax | 30 | movq %r9,%rax |
123 | ret | 31 | ret |
124 | memset_c_end: | ||
125 | .previous | ||
diff --git a/arch/x86_64/lib/usercopy.c b/arch/x86_64/lib/usercopy.c index db8abba1ad81..9bc2c295818e 100644 --- a/arch/x86_64/lib/usercopy.c +++ b/arch/x86_64/lib/usercopy.c | |||
@@ -109,14 +109,11 @@ unsigned long clear_user(void __user *to, unsigned long n) | |||
109 | * Return 0 on exception, a value greater than N if too long | 109 | * Return 0 on exception, a value greater than N if too long |
110 | */ | 110 | */ |
111 | 111 | ||
112 | long strnlen_user(const char __user *s, long n) | 112 | long __strnlen_user(const char __user *s, long n) |
113 | { | 113 | { |
114 | long res = 0; | 114 | long res = 0; |
115 | char c; | 115 | char c; |
116 | 116 | ||
117 | if (!access_ok(VERIFY_READ, s, n)) | ||
118 | return 0; | ||
119 | |||
120 | while (1) { | 117 | while (1) { |
121 | if (res>n) | 118 | if (res>n) |
122 | return n+1; | 119 | return n+1; |
@@ -129,6 +126,13 @@ long strnlen_user(const char __user *s, long n) | |||
129 | } | 126 | } |
130 | } | 127 | } |
131 | 128 | ||
129 | long strnlen_user(const char __user *s, long n) | ||
130 | { | ||
131 | if (!access_ok(VERIFY_READ, s, n)) | ||
132 | return 0; | ||
133 | return __strnlen_user(s, n); | ||
134 | } | ||
135 | |||
132 | long strlen_user(const char __user *s) | 136 | long strlen_user(const char __user *s) |
133 | { | 137 | { |
134 | long res = 0; | 138 | long res = 0; |
diff --git a/arch/x86_64/mm/Makefile b/arch/x86_64/mm/Makefile index 1d232a87f113..d25ac86fe27a 100644 --- a/arch/x86_64/mm/Makefile +++ b/arch/x86_64/mm/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for the linux x86_64-specific parts of the memory manager. | 2 | # Makefile for the linux x86_64-specific parts of the memory manager. |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := init.o fault.o ioremap.o extable.o pageattr.o | 5 | obj-y := init.o fault.o ioremap.o extable.o pageattr.o mmap.o |
6 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o | 6 | obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o |
7 | obj-$(CONFIG_NUMA) += numa.o | 7 | obj-$(CONFIG_NUMA) += numa.o |
8 | obj-$(CONFIG_K8_NUMA) += k8topology.o | 8 | obj-$(CONFIG_K8_NUMA) += k8topology.o |
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index b75b872ec154..26eac194064b 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -35,6 +35,13 @@ | |||
35 | #include <asm-generic/sections.h> | 35 | #include <asm-generic/sections.h> |
36 | #include <asm/kdebug.h> | 36 | #include <asm/kdebug.h> |
37 | 37 | ||
38 | /* Page fault error code bits */ | ||
39 | #define PF_PROT (1<<0) /* or no page found */ | ||
40 | #define PF_WRITE (1<<1) | ||
41 | #define PF_USER (1<<2) | ||
42 | #define PF_RSVD (1<<3) | ||
43 | #define PF_INSTR (1<<4) | ||
44 | |||
38 | void bust_spinlocks(int yes) | 45 | void bust_spinlocks(int yes) |
39 | { | 46 | { |
40 | int loglevel_save = console_loglevel; | 47 | int loglevel_save = console_loglevel; |
@@ -68,7 +75,7 @@ static noinline int is_prefetch(struct pt_regs *regs, unsigned long addr, | |||
68 | unsigned char *max_instr; | 75 | unsigned char *max_instr; |
69 | 76 | ||
70 | /* If it was a exec fault ignore */ | 77 | /* If it was a exec fault ignore */ |
71 | if (error_code & (1<<4)) | 78 | if (error_code & PF_INSTR) |
72 | return 0; | 79 | return 0; |
73 | 80 | ||
74 | instr = (unsigned char *)convert_rip_to_linear(current, regs); | 81 | instr = (unsigned char *)convert_rip_to_linear(current, regs); |
@@ -222,17 +229,22 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, | |||
222 | unsigned long error_code) | 229 | unsigned long error_code) |
223 | { | 230 | { |
224 | unsigned long flags = oops_begin(); | 231 | unsigned long flags = oops_begin(); |
232 | struct task_struct *tsk; | ||
225 | 233 | ||
226 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", | 234 | printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", |
227 | current->comm, address); | 235 | current->comm, address); |
228 | dump_pagetable(address); | 236 | dump_pagetable(address); |
237 | tsk = current; | ||
238 | tsk->thread.cr2 = address; | ||
239 | tsk->thread.trap_no = 14; | ||
240 | tsk->thread.error_code = error_code; | ||
229 | __die("Bad pagetable", regs, error_code); | 241 | __die("Bad pagetable", regs, error_code); |
230 | oops_end(flags); | 242 | oops_end(flags); |
231 | do_exit(SIGKILL); | 243 | do_exit(SIGKILL); |
232 | } | 244 | } |
233 | 245 | ||
234 | /* | 246 | /* |
235 | * Handle a fault on the vmalloc or module mapping area | 247 | * Handle a fault on the vmalloc area |
236 | * | 248 | * |
237 | * This assumes no large pages in there. | 249 | * This assumes no large pages in there. |
238 | */ | 250 | */ |
@@ -278,7 +290,6 @@ static int vmalloc_fault(unsigned long address) | |||
278 | that. */ | 290 | that. */ |
279 | if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) | 291 | if (!pte_present(*pte) || pte_pfn(*pte) != pte_pfn(*pte_ref)) |
280 | BUG(); | 292 | BUG(); |
281 | __flush_tlb_all(); | ||
282 | return 0; | 293 | return 0; |
283 | } | 294 | } |
284 | 295 | ||
@@ -289,12 +300,6 @@ int exception_trace = 1; | |||
289 | * This routine handles page faults. It determines the address, | 300 | * This routine handles page faults. It determines the address, |
290 | * and the problem, and then passes it off to one of the appropriate | 301 | * and the problem, and then passes it off to one of the appropriate |
291 | * routines. | 302 | * routines. |
292 | * | ||
293 | * error_code: | ||
294 | * bit 0 == 0 means no page found, 1 means protection fault | ||
295 | * bit 1 == 0 means read, 1 means write | ||
296 | * bit 2 == 0 means kernel, 1 means user-mode | ||
297 | * bit 3 == 1 means fault was an instruction fetch | ||
298 | */ | 303 | */ |
299 | asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | 304 | asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, |
300 | unsigned long error_code) | 305 | unsigned long error_code) |
@@ -308,18 +313,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
308 | unsigned long flags; | 313 | unsigned long flags; |
309 | siginfo_t info; | 314 | siginfo_t info; |
310 | 315 | ||
311 | #ifdef CONFIG_CHECKING | ||
312 | { | ||
313 | unsigned long gs; | ||
314 | struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); | ||
315 | rdmsrl(MSR_GS_BASE, gs); | ||
316 | if (gs != (unsigned long)pda) { | ||
317 | wrmsrl(MSR_GS_BASE, pda); | ||
318 | printk("page_fault: wrong gs %lx expected %p\n", gs, pda); | ||
319 | } | ||
320 | } | ||
321 | #endif | ||
322 | |||
323 | /* get the address */ | 316 | /* get the address */ |
324 | __asm__("movq %%cr2,%0":"=r" (address)); | 317 | __asm__("movq %%cr2,%0":"=r" (address)); |
325 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | 318 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
@@ -349,12 +342,16 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
349 | * | 342 | * |
350 | * This verifies that the fault happens in kernel space | 343 | * This verifies that the fault happens in kernel space |
351 | * (error_code & 4) == 0, and that the fault was not a | 344 | * (error_code & 4) == 0, and that the fault was not a |
352 | * protection error (error_code & 1) == 0. | 345 | * protection error (error_code & 9) == 0. |
353 | */ | 346 | */ |
354 | if (unlikely(address >= TASK_SIZE64)) { | 347 | if (unlikely(address >= TASK_SIZE64)) { |
355 | if (!(error_code & 5) && | 348 | /* |
356 | ((address >= VMALLOC_START && address < VMALLOC_END) || | 349 | * Don't check for the module range here: its PML4 |
357 | (address >= MODULES_VADDR && address < MODULES_END))) { | 350 | * is always initialized because it's shared with the main |
351 | * kernel text. Only vmalloc may need PML4 syncups. | ||
352 | */ | ||
353 | if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) && | ||
354 | ((address >= VMALLOC_START && address < VMALLOC_END))) { | ||
358 | if (vmalloc_fault(address) < 0) | 355 | if (vmalloc_fault(address) < 0) |
359 | goto bad_area_nosemaphore; | 356 | goto bad_area_nosemaphore; |
360 | return; | 357 | return; |
@@ -366,7 +363,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
366 | goto bad_area_nosemaphore; | 363 | goto bad_area_nosemaphore; |
367 | } | 364 | } |
368 | 365 | ||
369 | if (unlikely(error_code & (1 << 3))) | 366 | if (unlikely(error_code & PF_RSVD)) |
370 | pgtable_bad(address, regs, error_code); | 367 | pgtable_bad(address, regs, error_code); |
371 | 368 | ||
372 | /* | 369 | /* |
@@ -393,7 +390,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
393 | * thus avoiding the deadlock. | 390 | * thus avoiding the deadlock. |
394 | */ | 391 | */ |
395 | if (!down_read_trylock(&mm->mmap_sem)) { | 392 | if (!down_read_trylock(&mm->mmap_sem)) { |
396 | if ((error_code & 4) == 0 && | 393 | if ((error_code & PF_USER) == 0 && |
397 | !search_exception_tables(regs->rip)) | 394 | !search_exception_tables(regs->rip)) |
398 | goto bad_area_nosemaphore; | 395 | goto bad_area_nosemaphore; |
399 | down_read(&mm->mmap_sem); | 396 | down_read(&mm->mmap_sem); |
@@ -420,17 +417,17 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
420 | good_area: | 417 | good_area: |
421 | info.si_code = SEGV_ACCERR; | 418 | info.si_code = SEGV_ACCERR; |
422 | write = 0; | 419 | write = 0; |
423 | switch (error_code & 3) { | 420 | switch (error_code & (PF_PROT|PF_WRITE)) { |
424 | default: /* 3: write, present */ | 421 | default: /* 3: write, present */ |
425 | /* fall through */ | 422 | /* fall through */ |
426 | case 2: /* write, not present */ | 423 | case PF_WRITE: /* write, not present */ |
427 | if (!(vma->vm_flags & VM_WRITE)) | 424 | if (!(vma->vm_flags & VM_WRITE)) |
428 | goto bad_area; | 425 | goto bad_area; |
429 | write++; | 426 | write++; |
430 | break; | 427 | break; |
431 | case 1: /* read, present */ | 428 | case PF_PROT: /* read, present */ |
432 | goto bad_area; | 429 | goto bad_area; |
433 | case 0: /* read, not present */ | 430 | case 0: /* read, not present */ |
434 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) | 431 | if (!(vma->vm_flags & (VM_READ | VM_EXEC))) |
435 | goto bad_area; | 432 | goto bad_area; |
436 | } | 433 | } |
@@ -465,7 +462,7 @@ bad_area: | |||
465 | 462 | ||
466 | bad_area_nosemaphore: | 463 | bad_area_nosemaphore: |
467 | /* User mode accesses just cause a SIGSEGV */ | 464 | /* User mode accesses just cause a SIGSEGV */ |
468 | if (error_code & 4) { | 465 | if (error_code & PF_USER) { |
469 | if (is_prefetch(regs, address, error_code)) | 466 | if (is_prefetch(regs, address, error_code)) |
470 | return; | 467 | return; |
471 | 468 | ||
@@ -533,6 +530,9 @@ no_context: | |||
533 | printk_address(regs->rip); | 530 | printk_address(regs->rip); |
534 | printk("\n"); | 531 | printk("\n"); |
535 | dump_pagetable(address); | 532 | dump_pagetable(address); |
533 | tsk->thread.cr2 = address; | ||
534 | tsk->thread.trap_no = 14; | ||
535 | tsk->thread.error_code = error_code; | ||
536 | __die("Oops", regs, error_code); | 536 | __die("Oops", regs, error_code); |
537 | /* Executive summary in case the body of the oops scrolled away */ | 537 | /* Executive summary in case the body of the oops scrolled away */ |
538 | printk(KERN_EMERG "CR2: %016lx\n", address); | 538 | printk(KERN_EMERG "CR2: %016lx\n", address); |
@@ -558,7 +558,7 @@ do_sigbus: | |||
558 | up_read(&mm->mmap_sem); | 558 | up_read(&mm->mmap_sem); |
559 | 559 | ||
560 | /* Kernel mode? Handle exceptions or die */ | 560 | /* Kernel mode? Handle exceptions or die */ |
561 | if (!(error_code & 4)) | 561 | if (!(error_code & PF_USER)) |
562 | goto no_context; | 562 | goto no_context; |
563 | 563 | ||
564 | tsk->thread.cr2 = address; | 564 | tsk->thread.cr2 = address; |
@@ -571,3 +571,10 @@ do_sigbus: | |||
571 | force_sig_info(SIGBUS, &info, tsk); | 571 | force_sig_info(SIGBUS, &info, tsk); |
572 | return; | 572 | return; |
573 | } | 573 | } |
574 | |||
575 | static int __init enable_pagefaulttrace(char *str) | ||
576 | { | ||
577 | page_fault_trace = 1; | ||
578 | return 0; | ||
579 | } | ||
580 | __setup("pagefaulttrace", enable_pagefaulttrace); | ||
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index e60a1a848de8..7af1742aa958 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -22,6 +22,10 @@ | |||
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
24 | #include <linux/proc_fs.h> | 24 | #include <linux/proc_fs.h> |
25 | #include <linux/pci.h> | ||
26 | #include <linux/dma-mapping.h> | ||
27 | #include <linux/module.h> | ||
28 | #include <linux/memory_hotplug.h> | ||
25 | 29 | ||
26 | #include <asm/processor.h> | 30 | #include <asm/processor.h> |
27 | #include <asm/system.h> | 31 | #include <asm/system.h> |
@@ -36,16 +40,18 @@ | |||
36 | #include <asm/mmu_context.h> | 40 | #include <asm/mmu_context.h> |
37 | #include <asm/proto.h> | 41 | #include <asm/proto.h> |
38 | #include <asm/smp.h> | 42 | #include <asm/smp.h> |
43 | #include <asm/sections.h> | ||
44 | #include <asm/dma-mapping.h> | ||
45 | #include <asm/swiotlb.h> | ||
39 | 46 | ||
40 | #ifndef Dprintk | 47 | #ifndef Dprintk |
41 | #define Dprintk(x...) | 48 | #define Dprintk(x...) |
42 | #endif | 49 | #endif |
43 | 50 | ||
44 | #ifdef CONFIG_GART_IOMMU | 51 | struct dma_mapping_ops* dma_ops; |
45 | extern int swiotlb; | 52 | EXPORT_SYMBOL(dma_ops); |
46 | #endif | ||
47 | 53 | ||
48 | extern char _stext[]; | 54 | static unsigned long dma_reserve __initdata; |
49 | 55 | ||
50 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 56 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
51 | 57 | ||
@@ -86,9 +92,6 @@ void show_mem(void) | |||
86 | 92 | ||
87 | /* References to section boundaries */ | 93 | /* References to section boundaries */ |
88 | 94 | ||
89 | extern char _text, _etext, _edata, __bss_start, _end[]; | ||
90 | extern char __init_begin, __init_end; | ||
91 | |||
92 | int after_bootmem; | 95 | int after_bootmem; |
93 | 96 | ||
94 | static void *spp_getpage(void) | 97 | static void *spp_getpage(void) |
@@ -179,13 +182,19 @@ static struct temp_map { | |||
179 | {} | 182 | {} |
180 | }; | 183 | }; |
181 | 184 | ||
182 | static __init void *alloc_low_page(int *index, unsigned long *phys) | 185 | static __meminit void *alloc_low_page(int *index, unsigned long *phys) |
183 | { | 186 | { |
184 | struct temp_map *ti; | 187 | struct temp_map *ti; |
185 | int i; | 188 | int i; |
186 | unsigned long pfn = table_end++, paddr; | 189 | unsigned long pfn = table_end++, paddr; |
187 | void *adr; | 190 | void *adr; |
188 | 191 | ||
192 | if (after_bootmem) { | ||
193 | adr = (void *)get_zeroed_page(GFP_ATOMIC); | ||
194 | *phys = __pa(adr); | ||
195 | return adr; | ||
196 | } | ||
197 | |||
189 | if (pfn >= end_pfn) | 198 | if (pfn >= end_pfn) |
190 | panic("alloc_low_page: ran out of memory"); | 199 | panic("alloc_low_page: ran out of memory"); |
191 | for (i = 0; temp_mappings[i].allocated; i++) { | 200 | for (i = 0; temp_mappings[i].allocated; i++) { |
@@ -198,55 +207,86 @@ static __init void *alloc_low_page(int *index, unsigned long *phys) | |||
198 | ti->allocated = 1; | 207 | ti->allocated = 1; |
199 | __flush_tlb(); | 208 | __flush_tlb(); |
200 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); | 209 | adr = ti->address + ((pfn << PAGE_SHIFT) & ~PMD_MASK); |
210 | memset(adr, 0, PAGE_SIZE); | ||
201 | *index = i; | 211 | *index = i; |
202 | *phys = pfn * PAGE_SIZE; | 212 | *phys = pfn * PAGE_SIZE; |
203 | return adr; | 213 | return adr; |
204 | } | 214 | } |
205 | 215 | ||
206 | static __init void unmap_low_page(int i) | 216 | static __meminit void unmap_low_page(int i) |
207 | { | 217 | { |
208 | struct temp_map *ti = &temp_mappings[i]; | 218 | struct temp_map *ti; |
219 | |||
220 | if (after_bootmem) | ||
221 | return; | ||
222 | |||
223 | ti = &temp_mappings[i]; | ||
209 | set_pmd(ti->pmd, __pmd(0)); | 224 | set_pmd(ti->pmd, __pmd(0)); |
210 | ti->allocated = 0; | 225 | ti->allocated = 0; |
211 | } | 226 | } |
212 | 227 | ||
213 | static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | 228 | static void __meminit |
229 | phys_pmd_init(pmd_t *pmd, unsigned long address, unsigned long end) | ||
230 | { | ||
231 | int i; | ||
232 | |||
233 | for (i = 0; i < PTRS_PER_PMD; pmd++, i++, address += PMD_SIZE) { | ||
234 | unsigned long entry; | ||
235 | |||
236 | if (address > end) { | ||
237 | for (; i < PTRS_PER_PMD; i++, pmd++) | ||
238 | set_pmd(pmd, __pmd(0)); | ||
239 | break; | ||
240 | } | ||
241 | entry = _PAGE_NX|_PAGE_PSE|_KERNPG_TABLE|_PAGE_GLOBAL|address; | ||
242 | entry &= __supported_pte_mask; | ||
243 | set_pmd(pmd, __pmd(entry)); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | static void __meminit | ||
248 | phys_pmd_update(pud_t *pud, unsigned long address, unsigned long end) | ||
249 | { | ||
250 | pmd_t *pmd = pmd_offset(pud, (unsigned long)__va(address)); | ||
251 | |||
252 | if (pmd_none(*pmd)) { | ||
253 | spin_lock(&init_mm.page_table_lock); | ||
254 | phys_pmd_init(pmd, address, end); | ||
255 | spin_unlock(&init_mm.page_table_lock); | ||
256 | __flush_tlb_all(); | ||
257 | } | ||
258 | } | ||
259 | |||
260 | static void __meminit phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | ||
214 | { | 261 | { |
215 | long i, j; | 262 | long i = pud_index(address); |
216 | 263 | ||
217 | i = pud_index(address); | ||
218 | pud = pud + i; | 264 | pud = pud + i; |
265 | |||
266 | if (after_bootmem && pud_val(*pud)) { | ||
267 | phys_pmd_update(pud, address, end); | ||
268 | return; | ||
269 | } | ||
270 | |||
219 | for (; i < PTRS_PER_PUD; pud++, i++) { | 271 | for (; i < PTRS_PER_PUD; pud++, i++) { |
220 | int map; | 272 | int map; |
221 | unsigned long paddr, pmd_phys; | 273 | unsigned long paddr, pmd_phys; |
222 | pmd_t *pmd; | 274 | pmd_t *pmd; |
223 | 275 | ||
224 | paddr = address + i*PUD_SIZE; | 276 | paddr = (address & PGDIR_MASK) + i*PUD_SIZE; |
225 | if (paddr >= end) { | 277 | if (paddr >= end) |
226 | for (; i < PTRS_PER_PUD; i++, pud++) | ||
227 | set_pud(pud, __pud(0)); | ||
228 | break; | 278 | break; |
229 | } | ||
230 | 279 | ||
231 | if (!e820_mapped(paddr, paddr+PUD_SIZE, 0)) { | 280 | if (!after_bootmem && !e820_mapped(paddr, paddr+PUD_SIZE, 0)) { |
232 | set_pud(pud, __pud(0)); | 281 | set_pud(pud, __pud(0)); |
233 | continue; | 282 | continue; |
234 | } | 283 | } |
235 | 284 | ||
236 | pmd = alloc_low_page(&map, &pmd_phys); | 285 | pmd = alloc_low_page(&map, &pmd_phys); |
286 | spin_lock(&init_mm.page_table_lock); | ||
237 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); | 287 | set_pud(pud, __pud(pmd_phys | _KERNPG_TABLE)); |
238 | for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { | 288 | phys_pmd_init(pmd, paddr, end); |
239 | unsigned long pe; | 289 | spin_unlock(&init_mm.page_table_lock); |
240 | |||
241 | if (paddr >= end) { | ||
242 | for (; j < PTRS_PER_PMD; j++, pmd++) | ||
243 | set_pmd(pmd, __pmd(0)); | ||
244 | break; | ||
245 | } | ||
246 | pe = _PAGE_NX|_PAGE_PSE | _KERNPG_TABLE | _PAGE_GLOBAL | paddr; | ||
247 | pe &= __supported_pte_mask; | ||
248 | set_pmd(pmd, __pmd(pe)); | ||
249 | } | ||
250 | unmap_low_page(map); | 290 | unmap_low_page(map); |
251 | } | 291 | } |
252 | __flush_tlb(); | 292 | __flush_tlb(); |
@@ -254,25 +294,32 @@ static void __init phys_pud_init(pud_t *pud, unsigned long address, unsigned lon | |||
254 | 294 | ||
255 | static void __init find_early_table_space(unsigned long end) | 295 | static void __init find_early_table_space(unsigned long end) |
256 | { | 296 | { |
257 | unsigned long puds, pmds, tables; | 297 | unsigned long puds, pmds, tables, start; |
258 | 298 | ||
259 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; | 299 | puds = (end + PUD_SIZE - 1) >> PUD_SHIFT; |
260 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; | 300 | pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT; |
261 | tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + | 301 | tables = round_up(puds * sizeof(pud_t), PAGE_SIZE) + |
262 | round_up(pmds * sizeof(pmd_t), PAGE_SIZE); | 302 | round_up(pmds * sizeof(pmd_t), PAGE_SIZE); |
263 | 303 | ||
264 | table_start = find_e820_area(0x8000, __pa_symbol(&_text), tables); | 304 | /* RED-PEN putting page tables only on node 0 could |
305 | cause a hotspot and fill up ZONE_DMA. The page tables | ||
306 | need roughly 0.5KB per GB. */ | ||
307 | start = 0x8000; | ||
308 | table_start = find_e820_area(start, end, tables); | ||
265 | if (table_start == -1UL) | 309 | if (table_start == -1UL) |
266 | panic("Cannot find space for the kernel page tables"); | 310 | panic("Cannot find space for the kernel page tables"); |
267 | 311 | ||
268 | table_start >>= PAGE_SHIFT; | 312 | table_start >>= PAGE_SHIFT; |
269 | table_end = table_start; | 313 | table_end = table_start; |
314 | |||
315 | early_printk("kernel direct mapping tables up to %lx @ %lx-%lx\n", | ||
316 | end, table_start << PAGE_SHIFT, table_end << PAGE_SHIFT); | ||
270 | } | 317 | } |
271 | 318 | ||
272 | /* Setup the direct mapping of the physical memory at PAGE_OFFSET. | 319 | /* Setup the direct mapping of the physical memory at PAGE_OFFSET. |
273 | This runs before bootmem is initialized and gets pages directly from the | 320 | This runs before bootmem is initialized and gets pages directly from the |
274 | physical memory. To access them they are temporarily mapped. */ | 321 | physical memory. To access them they are temporarily mapped. */ |
275 | void __init init_memory_mapping(unsigned long start, unsigned long end) | 322 | void __meminit init_memory_mapping(unsigned long start, unsigned long end) |
276 | { | 323 | { |
277 | unsigned long next; | 324 | unsigned long next; |
278 | 325 | ||
@@ -284,7 +331,8 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) | |||
284 | * mapped. Unfortunately this is done currently before the nodes are | 331 | * mapped. Unfortunately this is done currently before the nodes are |
285 | * discovered. | 332 | * discovered. |
286 | */ | 333 | */ |
287 | find_early_table_space(end); | 334 | if (!after_bootmem) |
335 | find_early_table_space(end); | ||
288 | 336 | ||
289 | start = (unsigned long)__va(start); | 337 | start = (unsigned long)__va(start); |
290 | end = (unsigned long)__va(end); | 338 | end = (unsigned long)__va(end); |
@@ -292,58 +340,106 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) | |||
292 | for (; start < end; start = next) { | 340 | for (; start < end; start = next) { |
293 | int map; | 341 | int map; |
294 | unsigned long pud_phys; | 342 | unsigned long pud_phys; |
295 | pud_t *pud = alloc_low_page(&map, &pud_phys); | 343 | pgd_t *pgd = pgd_offset_k(start); |
344 | pud_t *pud; | ||
345 | |||
346 | if (after_bootmem) | ||
347 | pud = pud_offset_k(pgd, __PAGE_OFFSET); | ||
348 | else | ||
349 | pud = alloc_low_page(&map, &pud_phys); | ||
350 | |||
296 | next = start + PGDIR_SIZE; | 351 | next = start + PGDIR_SIZE; |
297 | if (next > end) | 352 | if (next > end) |
298 | next = end; | 353 | next = end; |
299 | phys_pud_init(pud, __pa(start), __pa(next)); | 354 | phys_pud_init(pud, __pa(start), __pa(next)); |
300 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | 355 | if (!after_bootmem) |
356 | set_pgd(pgd_offset_k(start), mk_kernel_pgd(pud_phys)); | ||
301 | unmap_low_page(map); | 357 | unmap_low_page(map); |
302 | } | 358 | } |
303 | 359 | ||
304 | asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); | 360 | if (!after_bootmem) |
361 | asm volatile("movq %%cr4,%0" : "=r" (mmu_cr4_features)); | ||
305 | __flush_tlb_all(); | 362 | __flush_tlb_all(); |
306 | early_printk("kernel direct mapping tables upto %lx @ %lx-%lx\n", end, | ||
307 | table_start<<PAGE_SHIFT, | ||
308 | table_end<<PAGE_SHIFT); | ||
309 | } | 363 | } |
310 | 364 | ||
311 | extern struct x8664_pda cpu_pda[NR_CPUS]; | 365 | void __cpuinit zap_low_mappings(int cpu) |
366 | { | ||
367 | if (cpu == 0) { | ||
368 | pgd_t *pgd = pgd_offset_k(0UL); | ||
369 | pgd_clear(pgd); | ||
370 | } else { | ||
371 | /* | ||
372 | * For AP's, zap the low identity mappings by changing the cr3 | ||
373 | * to init_level4_pgt and doing local flush tlb all | ||
374 | */ | ||
375 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
376 | } | ||
377 | __flush_tlb_all(); | ||
378 | } | ||
312 | 379 | ||
313 | /* Assumes all CPUs still execute in init_mm */ | 380 | /* Compute zone sizes for the DMA and DMA32 zones in a node. */ |
314 | void zap_low_mappings(void) | 381 | __init void |
382 | size_zones(unsigned long *z, unsigned long *h, | ||
383 | unsigned long start_pfn, unsigned long end_pfn) | ||
315 | { | 384 | { |
316 | pgd_t *pgd = pgd_offset_k(0UL); | 385 | int i; |
317 | pgd_clear(pgd); | 386 | unsigned long w; |
318 | flush_tlb_all(); | 387 | |
388 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
389 | z[i] = 0; | ||
390 | |||
391 | if (start_pfn < MAX_DMA_PFN) | ||
392 | z[ZONE_DMA] = MAX_DMA_PFN - start_pfn; | ||
393 | if (start_pfn < MAX_DMA32_PFN) { | ||
394 | unsigned long dma32_pfn = MAX_DMA32_PFN; | ||
395 | if (dma32_pfn > end_pfn) | ||
396 | dma32_pfn = end_pfn; | ||
397 | z[ZONE_DMA32] = dma32_pfn - start_pfn; | ||
398 | } | ||
399 | z[ZONE_NORMAL] = end_pfn - start_pfn; | ||
400 | |||
401 | /* Remove lower zones from higher ones. */ | ||
402 | w = 0; | ||
403 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
404 | if (z[i]) | ||
405 | z[i] -= w; | ||
406 | w += z[i]; | ||
407 | } | ||
408 | |||
409 | /* Compute holes */ | ||
410 | w = start_pfn; | ||
411 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
412 | unsigned long s = w; | ||
413 | w += z[i]; | ||
414 | h[i] = e820_hole_size(s, w); | ||
415 | } | ||
416 | |||
417 | /* Add the space pace needed for mem_map to the holes too. */ | ||
418 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
419 | h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE; | ||
420 | |||
421 | /* The 16MB DMA zone has the kernel and other misc mappings. | ||
422 | Account them too */ | ||
423 | if (h[ZONE_DMA]) { | ||
424 | h[ZONE_DMA] += dma_reserve; | ||
425 | if (h[ZONE_DMA] >= z[ZONE_DMA]) { | ||
426 | printk(KERN_WARNING | ||
427 | "Kernel too large and filling up ZONE_DMA?\n"); | ||
428 | h[ZONE_DMA] = z[ZONE_DMA]; | ||
429 | } | ||
430 | } | ||
319 | } | 431 | } |
320 | 432 | ||
321 | #ifndef CONFIG_NUMA | 433 | #ifndef CONFIG_NUMA |
322 | void __init paging_init(void) | 434 | void __init paging_init(void) |
323 | { | 435 | { |
324 | { | 436 | unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; |
325 | unsigned long zones_size[MAX_NR_ZONES]; | 437 | |
326 | unsigned long holes[MAX_NR_ZONES]; | 438 | memory_present(0, 0, end_pfn); |
327 | unsigned int max_dma; | 439 | sparse_init(); |
328 | 440 | size_zones(zones, holes, 0, end_pfn); | |
329 | memset(zones_size, 0, sizeof(zones_size)); | 441 | free_area_init_node(0, NODE_DATA(0), zones, |
330 | memset(holes, 0, sizeof(holes)); | 442 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); |
331 | |||
332 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
333 | |||
334 | if (end_pfn < max_dma) { | ||
335 | zones_size[ZONE_DMA] = end_pfn; | ||
336 | holes[ZONE_DMA] = e820_hole_size(0, end_pfn); | ||
337 | } else { | ||
338 | zones_size[ZONE_DMA] = max_dma; | ||
339 | holes[ZONE_DMA] = e820_hole_size(0, max_dma); | ||
340 | zones_size[ZONE_NORMAL] = end_pfn - max_dma; | ||
341 | holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn); | ||
342 | } | ||
343 | free_area_init_node(0, NODE_DATA(0), zones_size, | ||
344 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); | ||
345 | } | ||
346 | return; | ||
347 | } | 443 | } |
348 | #endif | 444 | #endif |
349 | 445 | ||
@@ -381,6 +477,50 @@ void __init clear_kernel_mapping(unsigned long address, unsigned long size) | |||
381 | __flush_tlb_all(); | 477 | __flush_tlb_all(); |
382 | } | 478 | } |
383 | 479 | ||
480 | /* | ||
481 | * Memory hotplug specific functions | ||
482 | * These are only for non-NUMA machines right now. | ||
483 | */ | ||
484 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
485 | |||
486 | void online_page(struct page *page) | ||
487 | { | ||
488 | ClearPageReserved(page); | ||
489 | set_page_count(page, 1); | ||
490 | __free_page(page); | ||
491 | totalram_pages++; | ||
492 | num_physpages++; | ||
493 | } | ||
494 | |||
495 | int add_memory(u64 start, u64 size) | ||
496 | { | ||
497 | struct pglist_data *pgdat = NODE_DATA(0); | ||
498 | struct zone *zone = pgdat->node_zones + MAX_NR_ZONES-2; | ||
499 | unsigned long start_pfn = start >> PAGE_SHIFT; | ||
500 | unsigned long nr_pages = size >> PAGE_SHIFT; | ||
501 | int ret; | ||
502 | |||
503 | ret = __add_pages(zone, start_pfn, nr_pages); | ||
504 | if (ret) | ||
505 | goto error; | ||
506 | |||
507 | init_memory_mapping(start, (start + size -1)); | ||
508 | |||
509 | return ret; | ||
510 | error: | ||
511 | printk("%s: Problem encountered in __add_pages!\n", __func__); | ||
512 | return ret; | ||
513 | } | ||
514 | EXPORT_SYMBOL_GPL(add_memory); | ||
515 | |||
516 | int remove_memory(u64 start, u64 size) | ||
517 | { | ||
518 | return -EINVAL; | ||
519 | } | ||
520 | EXPORT_SYMBOL_GPL(remove_memory); | ||
521 | |||
522 | #endif | ||
523 | |||
384 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, | 524 | static struct kcore_list kcore_mem, kcore_vmalloc, kcore_kernel, kcore_modules, |
385 | kcore_vsyscall; | 525 | kcore_vsyscall; |
386 | 526 | ||
@@ -389,12 +529,9 @@ void __init mem_init(void) | |||
389 | long codesize, reservedpages, datasize, initsize; | 529 | long codesize, reservedpages, datasize, initsize; |
390 | 530 | ||
391 | #ifdef CONFIG_SWIOTLB | 531 | #ifdef CONFIG_SWIOTLB |
392 | if (!iommu_aperture && | 532 | pci_swiotlb_init(); |
393 | (end_pfn >= 0xffffffff>>PAGE_SHIFT || force_iommu)) | ||
394 | swiotlb = 1; | ||
395 | if (swiotlb) | ||
396 | swiotlb_init(); | ||
397 | #endif | 533 | #endif |
534 | no_iommu_init(); | ||
398 | 535 | ||
399 | /* How many end-of-memory variables you have, grandma! */ | 536 | /* How many end-of-memory variables you have, grandma! */ |
400 | max_low_pfn = end_pfn; | 537 | max_low_pfn = end_pfn; |
@@ -438,19 +575,16 @@ void __init mem_init(void) | |||
438 | datasize >> 10, | 575 | datasize >> 10, |
439 | initsize >> 10); | 576 | initsize >> 10); |
440 | 577 | ||
578 | #ifdef CONFIG_SMP | ||
441 | /* | 579 | /* |
442 | * Subtle. SMP is doing its boot stuff late (because it has to | 580 | * Sync boot_level4_pgt mappings with the init_level4_pgt |
443 | * fork idle threads) - but it also needs low mappings for the | 581 | * except for the low identity mappings which are already zapped |
444 | * protected-mode entry to work. We zap these entries only after | 582 | * in init_level4_pgt. This sync-up is essential for AP's bringup |
445 | * the WP-bit has been tested. | ||
446 | */ | 583 | */ |
447 | #ifndef CONFIG_SMP | 584 | memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); |
448 | zap_low_mappings(); | ||
449 | #endif | 585 | #endif |
450 | } | 586 | } |
451 | 587 | ||
452 | extern char __initdata_begin[], __initdata_end[]; | ||
453 | |||
454 | void free_initmem(void) | 588 | void free_initmem(void) |
455 | { | 589 | { |
456 | unsigned long addr; | 590 | unsigned long addr; |
@@ -464,13 +598,36 @@ void free_initmem(void) | |||
464 | totalram_pages++; | 598 | totalram_pages++; |
465 | } | 599 | } |
466 | memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); | 600 | memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); |
467 | printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10); | 601 | printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); |
468 | } | 602 | } |
469 | 603 | ||
604 | #ifdef CONFIG_DEBUG_RODATA | ||
605 | |||
606 | extern char __start_rodata, __end_rodata; | ||
607 | void mark_rodata_ro(void) | ||
608 | { | ||
609 | unsigned long addr = (unsigned long)&__start_rodata; | ||
610 | |||
611 | for (; addr < (unsigned long)&__end_rodata; addr += PAGE_SIZE) | ||
612 | change_page_attr_addr(addr, 1, PAGE_KERNEL_RO); | ||
613 | |||
614 | printk ("Write protecting the kernel read-only data: %luk\n", | ||
615 | (&__end_rodata - &__start_rodata) >> 10); | ||
616 | |||
617 | /* | ||
618 | * change_page_attr_addr() requires a global_flush_tlb() call after it. | ||
619 | * We do this after the printk so that if something went wrong in the | ||
620 | * change, the printk gets out at least to give a better debug hint | ||
621 | * of who is the culprit. | ||
622 | */ | ||
623 | global_flush_tlb(); | ||
624 | } | ||
625 | #endif | ||
626 | |||
470 | #ifdef CONFIG_BLK_DEV_INITRD | 627 | #ifdef CONFIG_BLK_DEV_INITRD |
471 | void free_initrd_mem(unsigned long start, unsigned long end) | 628 | void free_initrd_mem(unsigned long start, unsigned long end) |
472 | { | 629 | { |
473 | if (start < (unsigned long)&_end) | 630 | if (start >= end) |
474 | return; | 631 | return; |
475 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); | 632 | printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10); |
476 | for (; start < end; start += PAGE_SIZE) { | 633 | for (; start < end; start += PAGE_SIZE) { |
@@ -491,6 +648,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |||
491 | #else | 648 | #else |
492 | reserve_bootmem(phys, len); | 649 | reserve_bootmem(phys, len); |
493 | #endif | 650 | #endif |
651 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) | ||
652 | dma_reserve += len / PAGE_SIZE; | ||
494 | } | 653 | } |
495 | 654 | ||
496 | int kern_addr_valid(unsigned long addr) | 655 | int kern_addr_valid(unsigned long addr) |
@@ -532,10 +691,6 @@ extern int exception_trace, page_fault_trace; | |||
532 | static ctl_table debug_table2[] = { | 691 | static ctl_table debug_table2[] = { |
533 | { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, | 692 | { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, |
534 | proc_dointvec }, | 693 | proc_dointvec }, |
535 | #ifdef CONFIG_CHECKING | ||
536 | { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL, | ||
537 | proc_dointvec }, | ||
538 | #endif | ||
539 | { 0, } | 694 | { 0, } |
540 | }; | 695 | }; |
541 | 696 | ||
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index ecf7acb5db9b..ae207064201e 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c | |||
@@ -247,9 +247,15 @@ void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size) | |||
247 | return __ioremap(phys_addr, size, _PAGE_PCD); | 247 | return __ioremap(phys_addr, size, _PAGE_PCD); |
248 | } | 248 | } |
249 | 249 | ||
250 | /** | ||
251 | * iounmap - Free a IO remapping | ||
252 | * @addr: virtual address from ioremap_* | ||
253 | * | ||
254 | * Caller must ensure there is only one unmapping for the same pointer. | ||
255 | */ | ||
250 | void iounmap(volatile void __iomem *addr) | 256 | void iounmap(volatile void __iomem *addr) |
251 | { | 257 | { |
252 | struct vm_struct *p; | 258 | struct vm_struct *p, *o; |
253 | 259 | ||
254 | if (addr <= high_memory) | 260 | if (addr <= high_memory) |
255 | return; | 261 | return; |
@@ -257,12 +263,31 @@ void iounmap(volatile void __iomem *addr) | |||
257 | addr < phys_to_virt(ISA_END_ADDRESS)) | 263 | addr < phys_to_virt(ISA_END_ADDRESS)) |
258 | return; | 264 | return; |
259 | 265 | ||
260 | write_lock(&vmlist_lock); | 266 | addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr); |
261 | p = __remove_vm_area((void *)((unsigned long)addr & PAGE_MASK)); | 267 | /* Use the vm area unlocked, assuming the caller |
262 | if (!p) | 268 | ensures there isn't another iounmap for the same address |
269 | in parallel. Reuse of the virtual address is prevented by | ||
270 | leaving it in the global lists until we're done with it. | ||
271 | cpa takes care of the direct mappings. */ | ||
272 | read_lock(&vmlist_lock); | ||
273 | for (p = vmlist; p; p = p->next) { | ||
274 | if (p->addr == addr) | ||
275 | break; | ||
276 | } | ||
277 | read_unlock(&vmlist_lock); | ||
278 | |||
279 | if (!p) { | ||
263 | printk("iounmap: bad address %p\n", addr); | 280 | printk("iounmap: bad address %p\n", addr); |
264 | else if (p->flags >> 20) | 281 | dump_stack(); |
282 | return; | ||
283 | } | ||
284 | |||
285 | /* Reset the direct mapping. Can block */ | ||
286 | if (p->flags >> 20) | ||
265 | ioremap_change_attr(p->phys_addr, p->size, 0); | 287 | ioremap_change_attr(p->phys_addr, p->size, 0); |
266 | write_unlock(&vmlist_lock); | 288 | |
289 | /* Finally remove it */ | ||
290 | o = remove_vm_area((void *)addr); | ||
291 | BUG_ON(p != o || o == NULL); | ||
267 | kfree(p); | 292 | kfree(p); |
268 | } | 293 | } |
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index 65417b040c1b..a5663e0bb01c 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c | |||
@@ -108,6 +108,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
108 | limit >>= 16; | 108 | limit >>= 16; |
109 | limit <<= 24; | 109 | limit <<= 24; |
110 | limit |= (1<<24)-1; | 110 | limit |= (1<<24)-1; |
111 | limit++; | ||
111 | 112 | ||
112 | if (limit > end_pfn << PAGE_SHIFT) | 113 | if (limit > end_pfn << PAGE_SHIFT) |
113 | limit = end_pfn << PAGE_SHIFT; | 114 | limit = end_pfn << PAGE_SHIFT; |
diff --git a/arch/x86_64/mm/mmap.c b/arch/x86_64/mm/mmap.c new file mode 100644 index 000000000000..43e9b99bdf25 --- /dev/null +++ b/arch/x86_64/mm/mmap.c | |||
@@ -0,0 +1,30 @@ | |||
1 | /* Copyright 2005 Andi Kleen, SuSE Labs. | ||
2 | * Licensed under GPL, v.2 | ||
3 | */ | ||
4 | #include <linux/config.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/sched.h> | ||
7 | #include <linux/random.h> | ||
8 | #include <asm/ia32.h> | ||
9 | |||
10 | /* Notebook: move the mmap code from sys_x86_64.c over here. */ | ||
11 | |||
12 | void arch_pick_mmap_layout(struct mm_struct *mm) | ||
13 | { | ||
14 | #ifdef CONFIG_IA32_EMULATION | ||
15 | if (current_thread_info()->flags & _TIF_IA32) | ||
16 | return ia32_pick_mmap_layout(mm); | ||
17 | #endif | ||
18 | mm->mmap_base = TASK_UNMAPPED_BASE; | ||
19 | if (current->flags & PF_RANDOMIZE) { | ||
20 | /* Add 28bit randomness which is about 40bits of address space | ||
21 | because mmap base has to be page aligned. | ||
22 | or ~1/128 of the total user VM | ||
23 | (total user address space is 47bits) */ | ||
24 | unsigned rnd = get_random_int() & 0xfffffff; | ||
25 | mm->mmap_base += ((unsigned long)rnd) << PAGE_SHIFT; | ||
26 | } | ||
27 | mm->get_unmapped_area = arch_get_unmapped_area; | ||
28 | mm->unmap_area = arch_unmap_area; | ||
29 | } | ||
30 | |||
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 214803821001..6ef9f9a76235 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -38,38 +38,59 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; | |||
38 | 38 | ||
39 | int numa_off __initdata; | 39 | int numa_off __initdata; |
40 | 40 | ||
41 | int __init compute_hash_shift(struct node *nodes, int numnodes) | 41 | |
42 | /* | ||
43 | * Given a shift value, try to populate memnodemap[] | ||
44 | * Returns : | ||
45 | * 1 if OK | ||
46 | * 0 if memnodmap[] too small (of shift too small) | ||
47 | * -1 if node overlap or lost ram (shift too big) | ||
48 | */ | ||
49 | static int __init | ||
50 | populate_memnodemap(const struct node *nodes, int numnodes, int shift) | ||
42 | { | 51 | { |
43 | int i; | 52 | int i; |
44 | int shift = 20; | 53 | int res = -1; |
45 | unsigned long addr,maxend=0; | 54 | unsigned long addr, end; |
46 | |||
47 | for (i = 0; i < numnodes; i++) | ||
48 | if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend)) | ||
49 | maxend = nodes[i].end; | ||
50 | 55 | ||
51 | while ((1UL << shift) < (maxend / NODEMAPSIZE)) | 56 | if (shift >= 64) |
52 | shift++; | 57 | return -1; |
53 | 58 | memset(memnodemap, 0xff, sizeof(memnodemap)); | |
54 | printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n", | ||
55 | shift,maxend); | ||
56 | memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); | ||
57 | for (i = 0; i < numnodes; i++) { | 59 | for (i = 0; i < numnodes; i++) { |
58 | if (nodes[i].start == nodes[i].end) | 60 | addr = nodes[i].start; |
61 | end = nodes[i].end; | ||
62 | if (addr >= end) | ||
59 | continue; | 63 | continue; |
60 | for (addr = nodes[i].start; | 64 | if ((end >> shift) >= NODEMAPSIZE) |
61 | addr < nodes[i].end; | 65 | return 0; |
62 | addr += (1UL << shift)) { | 66 | do { |
63 | if (memnodemap[addr >> shift] != 0xff) { | 67 | if (memnodemap[addr >> shift] != 0xff) |
64 | printk(KERN_INFO | ||
65 | "Your memory is not aligned you need to rebuild your kernel " | ||
66 | "with a bigger NODEMAPSIZE shift=%d adder=%lu\n", | ||
67 | shift,addr); | ||
68 | return -1; | 68 | return -1; |
69 | } | ||
70 | memnodemap[addr >> shift] = i; | 69 | memnodemap[addr >> shift] = i; |
71 | } | 70 | addr += (1UL << shift); |
71 | } while (addr < end); | ||
72 | res = 1; | ||
72 | } | 73 | } |
74 | return res; | ||
75 | } | ||
76 | |||
77 | int __init compute_hash_shift(struct node *nodes, int numnodes) | ||
78 | { | ||
79 | int shift = 20; | ||
80 | |||
81 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) | ||
82 | shift++; | ||
83 | |||
84 | printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", | ||
85 | shift); | ||
86 | |||
87 | if (populate_memnodemap(nodes, numnodes, shift) != 1) { | ||
88 | printk(KERN_INFO | ||
89 | "Your memory is not aligned you need to rebuild your kernel " | ||
90 | "with a bigger NODEMAPSIZE shift=%d\n", | ||
91 | shift); | ||
92 | return -1; | ||
93 | } | ||
73 | return shift; | 94 | return shift; |
74 | } | 95 | } |
75 | 96 | ||
@@ -89,12 +110,11 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en | |||
89 | 110 | ||
90 | start = round_up(start, ZONE_ALIGN); | 111 | start = round_up(start, ZONE_ALIGN); |
91 | 112 | ||
92 | printk("Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); | 113 | printk(KERN_INFO "Bootmem setup node %d %016lx-%016lx\n", nodeid, start, end); |
93 | 114 | ||
94 | start_pfn = start >> PAGE_SHIFT; | 115 | start_pfn = start >> PAGE_SHIFT; |
95 | end_pfn = end >> PAGE_SHIFT; | 116 | end_pfn = end >> PAGE_SHIFT; |
96 | 117 | ||
97 | memory_present(nodeid, start_pfn, end_pfn); | ||
98 | nodedata_phys = find_e820_area(start, end, pgdat_size); | 118 | nodedata_phys = find_e820_area(start, end, pgdat_size); |
99 | if (nodedata_phys == -1L) | 119 | if (nodedata_phys == -1L) |
100 | panic("Cannot find memory pgdat in node %d\n", nodeid); | 120 | panic("Cannot find memory pgdat in node %d\n", nodeid); |
@@ -132,29 +152,14 @@ void __init setup_node_zones(int nodeid) | |||
132 | unsigned long start_pfn, end_pfn; | 152 | unsigned long start_pfn, end_pfn; |
133 | unsigned long zones[MAX_NR_ZONES]; | 153 | unsigned long zones[MAX_NR_ZONES]; |
134 | unsigned long holes[MAX_NR_ZONES]; | 154 | unsigned long holes[MAX_NR_ZONES]; |
135 | unsigned long dma_end_pfn; | ||
136 | 155 | ||
137 | memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); | 156 | start_pfn = node_start_pfn(nodeid); |
138 | memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); | 157 | end_pfn = node_end_pfn(nodeid); |
139 | 158 | ||
140 | start_pfn = node_start_pfn(nodeid); | 159 | Dprintk(KERN_INFO "Setting up node %d %lx-%lx\n", |
141 | end_pfn = node_end_pfn(nodeid); | 160 | nodeid, start_pfn, end_pfn); |
142 | 161 | ||
143 | Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); | 162 | size_zones(zones, holes, start_pfn, end_pfn); |
144 | |||
145 | /* All nodes > 0 have a zero length zone DMA */ | ||
146 | dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
147 | if (start_pfn < dma_end_pfn) { | ||
148 | zones[ZONE_DMA] = dma_end_pfn - start_pfn; | ||
149 | holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn); | ||
150 | zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; | ||
151 | holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn); | ||
152 | |||
153 | } else { | ||
154 | zones[ZONE_NORMAL] = end_pfn - start_pfn; | ||
155 | holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn); | ||
156 | } | ||
157 | |||
158 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, | 163 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, |
159 | start_pfn, holes); | 164 | start_pfn, holes); |
160 | } | 165 | } |
@@ -171,7 +176,7 @@ void __init numa_init_array(void) | |||
171 | for (i = 0; i < NR_CPUS; i++) { | 176 | for (i = 0; i < NR_CPUS; i++) { |
172 | if (cpu_to_node[i] != NUMA_NO_NODE) | 177 | if (cpu_to_node[i] != NUMA_NO_NODE) |
173 | continue; | 178 | continue; |
174 | cpu_to_node[i] = rr; | 179 | numa_set_node(i, rr); |
175 | rr = next_node(rr, node_online_map); | 180 | rr = next_node(rr, node_online_map); |
176 | if (rr == MAX_NUMNODES) | 181 | if (rr == MAX_NUMNODES) |
177 | rr = first_node(node_online_map); | 182 | rr = first_node(node_online_map); |
@@ -195,7 +200,7 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
195 | while ((x << 1) < sz) | 200 | while ((x << 1) < sz) |
196 | x <<= 1; | 201 | x <<= 1; |
197 | if (x < sz/2) | 202 | if (x < sz/2) |
198 | printk("Numa emulation unbalanced. Complain to maintainer\n"); | 203 | printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n"); |
199 | sz = x; | 204 | sz = x; |
200 | } | 205 | } |
201 | 206 | ||
@@ -205,8 +210,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
205 | if (i == numa_fake-1) | 210 | if (i == numa_fake-1) |
206 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; | 211 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; |
207 | nodes[i].end = nodes[i].start + sz; | 212 | nodes[i].end = nodes[i].start + sz; |
208 | if (i != numa_fake-1) | ||
209 | nodes[i].end--; | ||
210 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", | 213 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", |
211 | i, | 214 | i, |
212 | nodes[i].start, nodes[i].end, | 215 | nodes[i].start, nodes[i].end, |
@@ -257,7 +260,7 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
257 | nodes_clear(node_online_map); | 260 | nodes_clear(node_online_map); |
258 | node_set_online(0); | 261 | node_set_online(0); |
259 | for (i = 0; i < NR_CPUS; i++) | 262 | for (i = 0; i < NR_CPUS; i++) |
260 | cpu_to_node[i] = 0; | 263 | numa_set_node(i, 0); |
261 | node_to_cpumask[0] = cpumask_of_cpu(0); | 264 | node_to_cpumask[0] = cpumask_of_cpu(0); |
262 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); | 265 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); |
263 | } | 266 | } |
@@ -267,6 +270,12 @@ __cpuinit void numa_add_cpu(int cpu) | |||
267 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); | 270 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); |
268 | } | 271 | } |
269 | 272 | ||
273 | void __cpuinit numa_set_node(int cpu, int node) | ||
274 | { | ||
275 | cpu_pda(cpu)->nodenumber = node; | ||
276 | cpu_to_node[cpu] = node; | ||
277 | } | ||
278 | |||
270 | unsigned long __init numa_free_all_bootmem(void) | 279 | unsigned long __init numa_free_all_bootmem(void) |
271 | { | 280 | { |
272 | int i; | 281 | int i; |
@@ -277,9 +286,26 @@ unsigned long __init numa_free_all_bootmem(void) | |||
277 | return pages; | 286 | return pages; |
278 | } | 287 | } |
279 | 288 | ||
289 | #ifdef CONFIG_SPARSEMEM | ||
290 | static void __init arch_sparse_init(void) | ||
291 | { | ||
292 | int i; | ||
293 | |||
294 | for_each_online_node(i) | ||
295 | memory_present(i, node_start_pfn(i), node_end_pfn(i)); | ||
296 | |||
297 | sparse_init(); | ||
298 | } | ||
299 | #else | ||
300 | #define arch_sparse_init() do {} while (0) | ||
301 | #endif | ||
302 | |||
280 | void __init paging_init(void) | 303 | void __init paging_init(void) |
281 | { | 304 | { |
282 | int i; | 305 | int i; |
306 | |||
307 | arch_sparse_init(); | ||
308 | |||
283 | for_each_online_node(i) { | 309 | for_each_online_node(i) { |
284 | setup_node_zones(i); | 310 | setup_node_zones(i); |
285 | } | 311 | } |
@@ -304,8 +330,69 @@ __init int numa_setup(char *opt) | |||
304 | return 1; | 330 | return 1; |
305 | } | 331 | } |
306 | 332 | ||
333 | /* | ||
334 | * Setup early cpu_to_node. | ||
335 | * | ||
336 | * Populate cpu_to_node[] only if x86_cpu_to_apicid[], | ||
337 | * and apicid_to_node[] tables have valid entries for a CPU. | ||
338 | * This means we skip cpu_to_node[] initialisation for NUMA | ||
339 | * emulation and faking node case (when running a kernel compiled | ||
340 | * for NUMA on a non NUMA box), which is OK as cpu_to_node[] | ||
341 | * is already initialized in a round robin manner at numa_init_array, | ||
342 | * prior to this call, and this initialization is good enough | ||
343 | * for the fake NUMA cases. | ||
344 | */ | ||
345 | void __init init_cpu_to_node(void) | ||
346 | { | ||
347 | int i; | ||
348 | for (i = 0; i < NR_CPUS; i++) { | ||
349 | u8 apicid = x86_cpu_to_apicid[i]; | ||
350 | if (apicid == BAD_APICID) | ||
351 | continue; | ||
352 | if (apicid_to_node[apicid] == NUMA_NO_NODE) | ||
353 | continue; | ||
354 | cpu_to_node[i] = apicid_to_node[apicid]; | ||
355 | } | ||
356 | } | ||
357 | |||
307 | EXPORT_SYMBOL(cpu_to_node); | 358 | EXPORT_SYMBOL(cpu_to_node); |
308 | EXPORT_SYMBOL(node_to_cpumask); | 359 | EXPORT_SYMBOL(node_to_cpumask); |
309 | EXPORT_SYMBOL(memnode_shift); | 360 | EXPORT_SYMBOL(memnode_shift); |
310 | EXPORT_SYMBOL(memnodemap); | 361 | EXPORT_SYMBOL(memnodemap); |
311 | EXPORT_SYMBOL(node_data); | 362 | EXPORT_SYMBOL(node_data); |
363 | |||
364 | #ifdef CONFIG_DISCONTIGMEM | ||
365 | /* | ||
366 | * Functions to convert PFNs from/to per node page addresses. | ||
367 | * These are out of line because they are quite big. | ||
368 | * They could be all tuned by pre caching more state. | ||
369 | * Should do that. | ||
370 | */ | ||
371 | |||
372 | /* Requires pfn_valid(pfn) to be true */ | ||
373 | struct page *pfn_to_page(unsigned long pfn) | ||
374 | { | ||
375 | int nid = phys_to_nid(((unsigned long)(pfn)) << PAGE_SHIFT); | ||
376 | return (pfn - node_start_pfn(nid)) + NODE_DATA(nid)->node_mem_map; | ||
377 | } | ||
378 | EXPORT_SYMBOL(pfn_to_page); | ||
379 | |||
380 | unsigned long page_to_pfn(struct page *page) | ||
381 | { | ||
382 | return (long)(((page) - page_zone(page)->zone_mem_map) + | ||
383 | page_zone(page)->zone_start_pfn); | ||
384 | } | ||
385 | EXPORT_SYMBOL(page_to_pfn); | ||
386 | |||
387 | int pfn_valid(unsigned long pfn) | ||
388 | { | ||
389 | unsigned nid; | ||
390 | if (pfn >= num_physpages) | ||
391 | return 0; | ||
392 | nid = pfn_to_nid(pfn); | ||
393 | if (nid == 0xff) | ||
394 | return 0; | ||
395 | return pfn >= node_start_pfn(nid) && (pfn) < node_end_pfn(nid); | ||
396 | } | ||
397 | EXPORT_SYMBOL(pfn_valid); | ||
398 | #endif | ||
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index b90e8fe9eeb0..35f1f1aab063 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
@@ -128,6 +128,7 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
128 | pte_t *kpte; | 128 | pte_t *kpte; |
129 | struct page *kpte_page; | 129 | struct page *kpte_page; |
130 | unsigned kpte_flags; | 130 | unsigned kpte_flags; |
131 | pgprot_t ref_prot2; | ||
131 | kpte = lookup_address(address); | 132 | kpte = lookup_address(address); |
132 | if (!kpte) return 0; | 133 | if (!kpte) return 0; |
133 | kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); | 134 | kpte_page = virt_to_page(((unsigned long)kpte) & PAGE_MASK); |
@@ -140,10 +141,14 @@ __change_page_attr(unsigned long address, unsigned long pfn, pgprot_t prot, | |||
140 | * split_large_page will take the reference for this change_page_attr | 141 | * split_large_page will take the reference for this change_page_attr |
141 | * on the split page. | 142 | * on the split page. |
142 | */ | 143 | */ |
143 | struct page *split = split_large_page(address, prot, ref_prot); | 144 | |
145 | struct page *split; | ||
146 | ref_prot2 = __pgprot(pgprot_val(pte_pgprot(*lookup_address(address))) & ~(1<<_PAGE_BIT_PSE)); | ||
147 | |||
148 | split = split_large_page(address, prot, ref_prot2); | ||
144 | if (!split) | 149 | if (!split) |
145 | return -ENOMEM; | 150 | return -ENOMEM; |
146 | set_pte(kpte,mk_pte(split, ref_prot)); | 151 | set_pte(kpte,mk_pte(split, ref_prot2)); |
147 | kpte_page = split; | 152 | kpte_page = split; |
148 | } | 153 | } |
149 | get_page(kpte_page); | 154 | get_page(kpte_page); |
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 4b2e844c15a7..8b7f85608fa8 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c | |||
@@ -17,21 +17,23 @@ | |||
17 | #include <linux/topology.h> | 17 | #include <linux/topology.h> |
18 | #include <asm/proto.h> | 18 | #include <asm/proto.h> |
19 | #include <asm/numa.h> | 19 | #include <asm/numa.h> |
20 | #include <asm/e820.h> | ||
20 | 21 | ||
21 | static struct acpi_table_slit *acpi_slit; | 22 | static struct acpi_table_slit *acpi_slit; |
22 | 23 | ||
23 | static nodemask_t nodes_parsed __initdata; | 24 | static nodemask_t nodes_parsed __initdata; |
24 | static nodemask_t nodes_found __initdata; | 25 | static nodemask_t nodes_found __initdata; |
25 | static struct node nodes[MAX_NUMNODES] __initdata; | 26 | static struct node nodes[MAX_NUMNODES] __initdata; |
26 | static __u8 pxm2node[256] = { [0 ... 255] = 0xff }; | 27 | static u8 pxm2node[256] = { [0 ... 255] = 0xff }; |
27 | 28 | ||
28 | static int node_to_pxm(int n); | 29 | static int node_to_pxm(int n); |
29 | 30 | ||
30 | int pxm_to_node(int pxm) | 31 | int pxm_to_node(int pxm) |
31 | { | 32 | { |
32 | if ((unsigned)pxm >= 256) | 33 | if ((unsigned)pxm >= 256) |
33 | return 0; | 34 | return -1; |
34 | return pxm2node[pxm]; | 35 | /* Extend 0xff to (int)-1 */ |
36 | return (signed char)pxm2node[pxm]; | ||
35 | } | 37 | } |
36 | 38 | ||
37 | static __init int setup_node(int pxm) | 39 | static __init int setup_node(int pxm) |
@@ -71,8 +73,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) | |||
71 | nd->start = nd->end; | 73 | nd->start = nd->end; |
72 | } | 74 | } |
73 | if (nd->end > end) { | 75 | if (nd->end > end) { |
74 | if (!(end & 0xfff)) | ||
75 | end--; | ||
76 | nd->end = end; | 76 | nd->end = end; |
77 | if (nd->start > nd->end) | 77 | if (nd->start > nd->end) |
78 | nd->start = nd->end; | 78 | nd->start = nd->end; |
@@ -93,9 +93,36 @@ static __init inline int srat_disabled(void) | |||
93 | return numa_off || acpi_numa < 0; | 93 | return numa_off || acpi_numa < 0; |
94 | } | 94 | } |
95 | 95 | ||
96 | /* | ||
97 | * A lot of BIOS fill in 10 (= no distance) everywhere. This messes | ||
98 | * up the NUMA heuristics which wants the local node to have a smaller | ||
99 | * distance than the others. | ||
100 | * Do some quick checks here and only use the SLIT if it passes. | ||
101 | */ | ||
102 | static __init int slit_valid(struct acpi_table_slit *slit) | ||
103 | { | ||
104 | int i, j; | ||
105 | int d = slit->localities; | ||
106 | for (i = 0; i < d; i++) { | ||
107 | for (j = 0; j < d; j++) { | ||
108 | u8 val = slit->entry[d*i + j]; | ||
109 | if (i == j) { | ||
110 | if (val != 10) | ||
111 | return 0; | ||
112 | } else if (val <= 10) | ||
113 | return 0; | ||
114 | } | ||
115 | } | ||
116 | return 1; | ||
117 | } | ||
118 | |||
96 | /* Callback for SLIT parsing */ | 119 | /* Callback for SLIT parsing */ |
97 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | 120 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) |
98 | { | 121 | { |
122 | if (!slit_valid(slit)) { | ||
123 | printk(KERN_INFO "ACPI: SLIT table looks invalid. Not used.\n"); | ||
124 | return; | ||
125 | } | ||
99 | acpi_slit = slit; | 126 | acpi_slit = slit; |
100 | } | 127 | } |
101 | 128 | ||
@@ -166,18 +193,43 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) | |||
166 | if (nd->end < end) | 193 | if (nd->end < end) |
167 | nd->end = end; | 194 | nd->end = end; |
168 | } | 195 | } |
169 | if (!(nd->end & 0xfff)) | ||
170 | nd->end--; | ||
171 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, | 196 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, |
172 | nd->start, nd->end); | 197 | nd->start, nd->end); |
173 | } | 198 | } |
174 | 199 | ||
200 | /* Sanity check to catch more bad SRATs (they are amazingly common). | ||
201 | Make sure the PXMs cover all memory. */ | ||
202 | static int nodes_cover_memory(void) | ||
203 | { | ||
204 | int i; | ||
205 | unsigned long pxmram, e820ram; | ||
206 | |||
207 | pxmram = 0; | ||
208 | for_each_node_mask(i, nodes_parsed) { | ||
209 | unsigned long s = nodes[i].start >> PAGE_SHIFT; | ||
210 | unsigned long e = nodes[i].end >> PAGE_SHIFT; | ||
211 | pxmram += e - s; | ||
212 | pxmram -= e820_hole_size(s, e); | ||
213 | } | ||
214 | |||
215 | e820ram = end_pfn - e820_hole_size(0, end_pfn); | ||
216 | if (pxmram < e820ram) { | ||
217 | printk(KERN_ERR | ||
218 | "SRAT: PXMs only cover %luMB of your %luMB e820 RAM. Not used.\n", | ||
219 | (pxmram << PAGE_SHIFT) >> 20, | ||
220 | (e820ram << PAGE_SHIFT) >> 20); | ||
221 | return 0; | ||
222 | } | ||
223 | return 1; | ||
224 | } | ||
225 | |||
175 | void __init acpi_numa_arch_fixup(void) {} | 226 | void __init acpi_numa_arch_fixup(void) {} |
176 | 227 | ||
177 | /* Use the information discovered above to actually set up the nodes. */ | 228 | /* Use the information discovered above to actually set up the nodes. */ |
178 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) | 229 | int __init acpi_scan_nodes(unsigned long start, unsigned long end) |
179 | { | 230 | { |
180 | int i; | 231 | int i; |
232 | |||
181 | if (acpi_numa <= 0) | 233 | if (acpi_numa <= 0) |
182 | return -1; | 234 | return -1; |
183 | 235 | ||
@@ -188,6 +240,11 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
188 | node_clear(i, nodes_parsed); | 240 | node_clear(i, nodes_parsed); |
189 | } | 241 | } |
190 | 242 | ||
243 | if (!nodes_cover_memory()) { | ||
244 | bad_srat(); | ||
245 | return -1; | ||
246 | } | ||
247 | |||
191 | memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed)); | 248 | memnode_shift = compute_hash_shift(nodes, nodes_weight(nodes_parsed)); |
192 | if (memnode_shift < 0) { | 249 | if (memnode_shift < 0) { |
193 | printk(KERN_ERR | 250 | printk(KERN_ERR |
@@ -203,7 +260,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
203 | if (cpu_to_node[i] == NUMA_NO_NODE) | 260 | if (cpu_to_node[i] == NUMA_NO_NODE) |
204 | continue; | 261 | continue; |
205 | if (!node_isset(cpu_to_node[i], nodes_parsed)) | 262 | if (!node_isset(cpu_to_node[i], nodes_parsed)) |
206 | cpu_to_node[i] = NUMA_NO_NODE; | 263 | numa_set_node(i, NUMA_NO_NODE); |
207 | } | 264 | } |
208 | numa_init_array(); | 265 | numa_init_array(); |
209 | return 0; | 266 | return 0; |
diff --git a/arch/x86_64/pci/Makefile b/arch/x86_64/pci/Makefile index bb34e5ef916c..a8f75a2a0f6f 100644 --- a/arch/x86_64/pci/Makefile +++ b/arch/x86_64/pci/Makefile | |||
@@ -11,7 +11,7 @@ obj-y += fixup.o | |||
11 | obj-$(CONFIG_ACPI) += acpi.o | 11 | obj-$(CONFIG_ACPI) += acpi.o |
12 | obj-y += legacy.o irq.o common.o | 12 | obj-y += legacy.o irq.o common.o |
13 | # mmconfig has a 64bit special | 13 | # mmconfig has a 64bit special |
14 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o | 14 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o |
15 | 15 | ||
16 | obj-$(CONFIG_NUMA) += k8-bus.o | 16 | obj-$(CONFIG_NUMA) += k8-bus.o |
17 | 17 | ||
diff --git a/arch/x86_64/pci/Makefile-BUS b/arch/x86_64/pci/Makefile-BUS deleted file mode 100644 index 4f0c05abd408..000000000000 --- a/arch/x86_64/pci/Makefile-BUS +++ /dev/null | |||
@@ -1,22 +0,0 @@ | |||
1 | # | ||
2 | # Makefile for X86_64 specific PCI routines | ||
3 | # | ||
4 | # Reuse the i386 PCI subsystem | ||
5 | # | ||
6 | CFLAGS += -I arch/i386/pci | ||
7 | |||
8 | obj-y := i386.o | ||
9 | obj-$(CONFIG_PCI_DIRECT)+= direct.o | ||
10 | obj-y += fixup.o | ||
11 | obj-$(CONFIG_ACPI) += acpi.o | ||
12 | obj-y += legacy.o irq.o common.o | ||
13 | # mmconfig has a 64bit special | ||
14 | obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o | ||
15 | |||
16 | direct-y += ../../i386/pci/direct.o | ||
17 | acpi-y += ../../i386/pci/acpi.o | ||
18 | legacy-y += ../../i386/pci/legacy.o | ||
19 | irq-y += ../../i386/pci/irq.o | ||
20 | common-y += ../../i386/pci/common.o | ||
21 | fixup-y += ../../i386/pci/fixup.o | ||
22 | i386-y += ../../i386/pci/i386.o | ||
diff --git a/arch/x86_64/pci/mmconfig.c b/arch/x86_64/pci/mmconfig.c index a0838c4a94e4..f16c0d57c552 100644 --- a/arch/x86_64/pci/mmconfig.c +++ b/arch/x86_64/pci/mmconfig.c | |||
@@ -8,18 +8,21 @@ | |||
8 | #include <linux/pci.h> | 8 | #include <linux/pci.h> |
9 | #include <linux/init.h> | 9 | #include <linux/init.h> |
10 | #include <linux/acpi.h> | 10 | #include <linux/acpi.h> |
11 | #include <linux/bitmap.h> | ||
11 | #include "pci.h" | 12 | #include "pci.h" |
12 | 13 | ||
13 | #define MMCONFIG_APER_SIZE (256*1024*1024) | 14 | #define MMCONFIG_APER_SIZE (256*1024*1024) |
14 | 15 | ||
16 | static DECLARE_BITMAP(fallback_slots, 32); | ||
17 | |||
15 | /* Static virtual mapping of the MMCONFIG aperture */ | 18 | /* Static virtual mapping of the MMCONFIG aperture */ |
16 | struct mmcfg_virt { | 19 | struct mmcfg_virt { |
17 | struct acpi_table_mcfg_config *cfg; | 20 | struct acpi_table_mcfg_config *cfg; |
18 | char *virt; | 21 | char __iomem *virt; |
19 | }; | 22 | }; |
20 | static struct mmcfg_virt *pci_mmcfg_virt; | 23 | static struct mmcfg_virt *pci_mmcfg_virt; |
21 | 24 | ||
22 | static char *get_virt(unsigned int seg, int bus) | 25 | static char __iomem *get_virt(unsigned int seg, unsigned bus) |
23 | { | 26 | { |
24 | int cfg_num = -1; | 27 | int cfg_num = -1; |
25 | struct acpi_table_mcfg_config *cfg; | 28 | struct acpi_table_mcfg_config *cfg; |
@@ -27,10 +30,9 @@ static char *get_virt(unsigned int seg, int bus) | |||
27 | while (1) { | 30 | while (1) { |
28 | ++cfg_num; | 31 | ++cfg_num; |
29 | if (cfg_num >= pci_mmcfg_config_num) { | 32 | if (cfg_num >= pci_mmcfg_config_num) { |
30 | /* something bad is going on, no cfg table is found. */ | 33 | /* Not found - fall back to type 1. This happens |
31 | /* so we fall back to the old way we used to do this */ | 34 | e.g. on the internal devices of a K8 northbridge. */ |
32 | /* and just rely on the first entry to be correct. */ | 35 | return NULL; |
33 | return pci_mmcfg_virt[0].virt; | ||
34 | } | 36 | } |
35 | cfg = pci_mmcfg_virt[cfg_num].cfg; | 37 | cfg = pci_mmcfg_virt[cfg_num].cfg; |
36 | if (cfg->pci_segment_group_number != seg) | 38 | if (cfg->pci_segment_group_number != seg) |
@@ -41,20 +43,30 @@ static char *get_virt(unsigned int seg, int bus) | |||
41 | } | 43 | } |
42 | } | 44 | } |
43 | 45 | ||
44 | static inline char *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) | 46 | static char __iomem *pci_dev_base(unsigned int seg, unsigned int bus, unsigned int devfn) |
45 | { | 47 | { |
46 | 48 | char __iomem *addr; | |
47 | return get_virt(seg, bus) + ((bus << 20) | (devfn << 12)); | 49 | if (seg == 0 && bus == 0 && test_bit(PCI_SLOT(devfn), &fallback_slots)) |
50 | return NULL; | ||
51 | addr = get_virt(seg, bus); | ||
52 | if (!addr) | ||
53 | return NULL; | ||
54 | return addr + ((bus << 20) | (devfn << 12)); | ||
48 | } | 55 | } |
49 | 56 | ||
50 | static int pci_mmcfg_read(unsigned int seg, unsigned int bus, | 57 | static int pci_mmcfg_read(unsigned int seg, unsigned int bus, |
51 | unsigned int devfn, int reg, int len, u32 *value) | 58 | unsigned int devfn, int reg, int len, u32 *value) |
52 | { | 59 | { |
53 | char *addr = pci_dev_base(seg, bus, devfn); | 60 | char __iomem *addr; |
54 | 61 | ||
62 | /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ | ||
55 | if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) | 63 | if (unlikely(!value || (bus > 255) || (devfn > 255) || (reg > 4095))) |
56 | return -EINVAL; | 64 | return -EINVAL; |
57 | 65 | ||
66 | addr = pci_dev_base(seg, bus, devfn); | ||
67 | if (!addr) | ||
68 | return pci_conf1_read(seg,bus,devfn,reg,len,value); | ||
69 | |||
58 | switch (len) { | 70 | switch (len) { |
59 | case 1: | 71 | case 1: |
60 | *value = readb(addr + reg); | 72 | *value = readb(addr + reg); |
@@ -73,11 +85,16 @@ static int pci_mmcfg_read(unsigned int seg, unsigned int bus, | |||
73 | static int pci_mmcfg_write(unsigned int seg, unsigned int bus, | 85 | static int pci_mmcfg_write(unsigned int seg, unsigned int bus, |
74 | unsigned int devfn, int reg, int len, u32 value) | 86 | unsigned int devfn, int reg, int len, u32 value) |
75 | { | 87 | { |
76 | char *addr = pci_dev_base(seg, bus, devfn); | 88 | char __iomem *addr; |
77 | 89 | ||
90 | /* Why do we have this when nobody checks it. How about a BUG()!? -AK */ | ||
78 | if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) | 91 | if (unlikely((bus > 255) || (devfn > 255) || (reg > 4095))) |
79 | return -EINVAL; | 92 | return -EINVAL; |
80 | 93 | ||
94 | addr = pci_dev_base(seg, bus, devfn); | ||
95 | if (!addr) | ||
96 | return pci_conf1_write(seg,bus,devfn,reg,len,value); | ||
97 | |||
81 | switch (len) { | 98 | switch (len) { |
82 | case 1: | 99 | case 1: |
83 | writeb(value, addr + reg); | 100 | writeb(value, addr + reg); |
@@ -98,6 +115,30 @@ static struct pci_raw_ops pci_mmcfg = { | |||
98 | .write = pci_mmcfg_write, | 115 | .write = pci_mmcfg_write, |
99 | }; | 116 | }; |
100 | 117 | ||
118 | /* K8 systems have some devices (typically in the builtin northbridge) | ||
119 | that are only accessible using type1 | ||
120 | Normally this can be expressed in the MCFG by not listing them | ||
121 | and assigning suitable _SEGs, but this isn't implemented in some BIOS. | ||
122 | Instead try to discover all devices on bus 0 that are unreachable using MM | ||
123 | and fallback for them. | ||
124 | We only do this for bus 0/seg 0 */ | ||
125 | static __init void unreachable_devices(void) | ||
126 | { | ||
127 | int i; | ||
128 | for (i = 0; i < 32; i++) { | ||
129 | u32 val1; | ||
130 | char __iomem *addr; | ||
131 | |||
132 | pci_conf1_read(0, 0, PCI_DEVFN(i,0), 0, 4, &val1); | ||
133 | if (val1 == 0xffffffff) | ||
134 | continue; | ||
135 | addr = pci_dev_base(0, 0, PCI_DEVFN(i, 0)); | ||
136 | if (addr == NULL|| readl(addr) != val1) { | ||
137 | set_bit(i, &fallback_slots); | ||
138 | } | ||
139 | } | ||
140 | } | ||
141 | |||
101 | static int __init pci_mmcfg_init(void) | 142 | static int __init pci_mmcfg_init(void) |
102 | { | 143 | { |
103 | int i; | 144 | int i; |
@@ -128,6 +169,8 @@ static int __init pci_mmcfg_init(void) | |||
128 | printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address); | 169 | printk(KERN_INFO "PCI: Using MMCONFIG at %x\n", pci_mmcfg_config[i].base_address); |
129 | } | 170 | } |
130 | 171 | ||
172 | unreachable_devices(); | ||
173 | |||
131 | raw_pci_ops = &pci_mmcfg; | 174 | raw_pci_ops = &pci_mmcfg; |
132 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; | 175 | pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; |
133 | 176 | ||