diff options
author | Len Brown <len.brown@intel.com> | 2005-12-06 17:31:30 -0500 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2005-12-06 17:31:30 -0500 |
commit | 3d5271f9883cba7b54762bc4fe027d4172f06db7 (patch) | |
tree | ab8a881a14478598a0c8bda0d26c62cdccfffd6d /arch/x86_64 | |
parent | 378b2556f4e09fa6f87ff0cb5c4395ff28257d02 (diff) | |
parent | 9115a6c787596e687df03010d97fccc5e0762506 (diff) |
Pull release into acpica branch
Diffstat (limited to 'arch/x86_64')
50 files changed, 1546 insertions, 1106 deletions
diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index 21afa69a086d..6ece645e4dbe 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig | |||
@@ -226,22 +226,42 @@ config SCHED_SMT | |||
226 | 226 | ||
227 | source "kernel/Kconfig.preempt" | 227 | source "kernel/Kconfig.preempt" |
228 | 228 | ||
229 | config K8_NUMA | 229 | config NUMA |
230 | bool "K8 NUMA support" | 230 | bool "Non Uniform Memory Access (NUMA) Support" |
231 | select NUMA | ||
232 | depends on SMP | 231 | depends on SMP |
233 | help | 232 | help |
234 | Enable NUMA (Non Unified Memory Architecture) support for | 233 | Enable NUMA (Non Uniform Memory Access) support. The kernel |
235 | AMD Opteron Multiprocessor systems. The kernel will try to allocate | 234 | will try to allocate memory used by a CPU on the local memory |
236 | memory used by a CPU on the local memory controller of the CPU | 235 | controller of the CPU and add some more NUMA awareness to the kernel. |
237 | and add some more NUMA awareness to the kernel. | 236 | This code is recommended on all multiprocessor Opteron systems. |
238 | This code is recommended on all multiprocessor Opteron systems | 237 | If the system is EM64T, you should say N unless your system is EM64T |
239 | and normally doesn't hurt on others. | 238 | NUMA. |
239 | |||
240 | config K8_NUMA | ||
241 | bool "Old style AMD Opteron NUMA detection" | ||
242 | depends on NUMA | ||
243 | default y | ||
244 | help | ||
245 | Enable K8 NUMA node topology detection. You should say Y here if | ||
246 | you have a multi processor AMD K8 system. This uses an old | ||
247 | method to read the NUMA configurtion directly from the builtin | ||
248 | Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA | ||
249 | instead, which also takes priority if both are compiled in. | ||
250 | |||
251 | # Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig. | ||
252 | |||
253 | config X86_64_ACPI_NUMA | ||
254 | bool "ACPI NUMA detection" | ||
255 | depends on NUMA | ||
256 | select ACPI | ||
257 | select ACPI_NUMA | ||
258 | default y | ||
259 | help | ||
260 | Enable ACPI SRAT based node topology detection. | ||
240 | 261 | ||
241 | config NUMA_EMU | 262 | config NUMA_EMU |
242 | bool "NUMA emulation support" | 263 | bool "NUMA emulation" |
243 | select NUMA | 264 | depends on NUMA |
244 | depends on SMP | ||
245 | help | 265 | help |
246 | Enable NUMA emulation. A flat machine will be split | 266 | Enable NUMA emulation. A flat machine will be split |
247 | into virtual nodes when booted with "numa=fake=N", where N is the | 267 | into virtual nodes when booted with "numa=fake=N", where N is the |
@@ -252,9 +272,6 @@ config ARCH_DISCONTIGMEM_ENABLE | |||
252 | depends on NUMA | 272 | depends on NUMA |
253 | default y | 273 | default y |
254 | 274 | ||
255 | config NUMA | ||
256 | bool | ||
257 | default n | ||
258 | 275 | ||
259 | config ARCH_DISCONTIGMEM_ENABLE | 276 | config ARCH_DISCONTIGMEM_ENABLE |
260 | def_bool y | 277 | def_bool y |
@@ -374,6 +391,14 @@ config X86_MCE_INTEL | |||
374 | Additional support for intel specific MCE features such as | 391 | Additional support for intel specific MCE features such as |
375 | the thermal monitor. | 392 | the thermal monitor. |
376 | 393 | ||
394 | config X86_MCE_AMD | ||
395 | bool "AMD MCE features" | ||
396 | depends on X86_MCE && X86_LOCAL_APIC | ||
397 | default y | ||
398 | help | ||
399 | Additional support for AMD specific MCE features such as | ||
400 | the DRAM Error Threshold. | ||
401 | |||
377 | config PHYSICAL_START | 402 | config PHYSICAL_START |
378 | hex "Physical address where the kernel is loaded" if EMBEDDED | 403 | hex "Physical address where the kernel is loaded" if EMBEDDED |
379 | default "0x100000" | 404 | default "0x100000" |
@@ -502,7 +527,7 @@ config IA32_EMULATION | |||
502 | left. | 527 | left. |
503 | 528 | ||
504 | config IA32_AOUT | 529 | config IA32_AOUT |
505 | bool "IA32 a.out support" | 530 | tristate "IA32 a.out support" |
506 | depends on IA32_EMULATION | 531 | depends on IA32_EMULATION |
507 | help | 532 | help |
508 | Support old a.out binaries in the 32bit emulation. | 533 | Support old a.out binaries in the 32bit emulation. |
@@ -532,8 +557,21 @@ source "drivers/firmware/Kconfig" | |||
532 | 557 | ||
533 | source fs/Kconfig | 558 | source fs/Kconfig |
534 | 559 | ||
560 | menu "Instrumentation Support" | ||
561 | depends on EXPERIMENTAL | ||
562 | |||
535 | source "arch/x86_64/oprofile/Kconfig" | 563 | source "arch/x86_64/oprofile/Kconfig" |
536 | 564 | ||
565 | config KPROBES | ||
566 | bool "Kprobes (EXPERIMENTAL)" | ||
567 | help | ||
568 | Kprobes allows you to trap at almost any kernel address and | ||
569 | execute a callback function. register_kprobe() establishes | ||
570 | a probepoint and specifies the callback. Kprobes is useful | ||
571 | for kernel debugging, non-intrusive instrumentation and testing. | ||
572 | If in doubt, say "N". | ||
573 | endmenu | ||
574 | |||
537 | source "arch/x86_64/Kconfig.debug" | 575 | source "arch/x86_64/Kconfig.debug" |
538 | 576 | ||
539 | source "security/Kconfig" | 577 | source "security/Kconfig" |
diff --git a/arch/x86_64/Kconfig.debug b/arch/x86_64/Kconfig.debug index 9cf1410d2f5a..e2c6e64a85ec 100644 --- a/arch/x86_64/Kconfig.debug +++ b/arch/x86_64/Kconfig.debug | |||
@@ -2,15 +2,6 @@ menu "Kernel hacking" | |||
2 | 2 | ||
3 | source "lib/Kconfig.debug" | 3 | source "lib/Kconfig.debug" |
4 | 4 | ||
5 | # !SMP for now because the context switch early causes GPF in segment reloading | ||
6 | # and the GS base checking does the wrong thing then, causing a hang. | ||
7 | config CHECKING | ||
8 | bool "Additional run-time checks" | ||
9 | depends on DEBUG_KERNEL && !SMP | ||
10 | help | ||
11 | Enables some internal consistency checks for kernel debugging. | ||
12 | You should normally say N. | ||
13 | |||
14 | config INIT_DEBUG | 5 | config INIT_DEBUG |
15 | bool "Debug __init statements" | 6 | bool "Debug __init statements" |
16 | depends on DEBUG_KERNEL | 7 | depends on DEBUG_KERNEL |
@@ -33,16 +24,6 @@ config IOMMU_DEBUG | |||
33 | options. See Documentation/x86_64/boot-options.txt for more | 24 | options. See Documentation/x86_64/boot-options.txt for more |
34 | details. | 25 | details. |
35 | 26 | ||
36 | config KPROBES | ||
37 | bool "Kprobes" | ||
38 | depends on DEBUG_KERNEL | ||
39 | help | ||
40 | Kprobes allows you to trap at almost any kernel address and | ||
41 | execute a callback function. register_kprobe() establishes | ||
42 | a probepoint and specifies the callback. Kprobes is useful | ||
43 | for kernel debugging, non-intrusive instrumentation and testing. | ||
44 | If in doubt, say "N". | ||
45 | |||
46 | config IOMMU_LEAK | 27 | config IOMMU_LEAK |
47 | bool "IOMMU leak tracing" | 28 | bool "IOMMU leak tracing" |
48 | depends on DEBUG_KERNEL | 29 | depends on DEBUG_KERNEL |
diff --git a/arch/x86_64/defconfig b/arch/x86_64/defconfig index f8db7e500fbf..5d56542fb68f 100644 --- a/arch/x86_64/defconfig +++ b/arch/x86_64/defconfig | |||
@@ -1,7 +1,7 @@ | |||
1 | # | 1 | # |
2 | # Automatically generated make config: don't edit | 2 | # Automatically generated make config: don't edit |
3 | # Linux kernel version: 2.6.13-git11 | 3 | # Linux kernel version: 2.6.14-git7 |
4 | # Mon Sep 12 16:16:16 2005 | 4 | # Sat Nov 5 15:55:50 2005 |
5 | # | 5 | # |
6 | CONFIG_X86_64=y | 6 | CONFIG_X86_64=y |
7 | CONFIG_64BIT=y | 7 | CONFIG_64BIT=y |
@@ -35,7 +35,7 @@ CONFIG_POSIX_MQUEUE=y | |||
35 | # CONFIG_BSD_PROCESS_ACCT is not set | 35 | # CONFIG_BSD_PROCESS_ACCT is not set |
36 | CONFIG_SYSCTL=y | 36 | CONFIG_SYSCTL=y |
37 | # CONFIG_AUDIT is not set | 37 | # CONFIG_AUDIT is not set |
38 | # CONFIG_HOTPLUG is not set | 38 | CONFIG_HOTPLUG=y |
39 | CONFIG_KOBJECT_UEVENT=y | 39 | CONFIG_KOBJECT_UEVENT=y |
40 | CONFIG_IKCONFIG=y | 40 | CONFIG_IKCONFIG=y |
41 | CONFIG_IKCONFIG_PROC=y | 41 | CONFIG_IKCONFIG_PROC=y |
@@ -93,10 +93,11 @@ CONFIG_PREEMPT_NONE=y | |||
93 | # CONFIG_PREEMPT_VOLUNTARY is not set | 93 | # CONFIG_PREEMPT_VOLUNTARY is not set |
94 | # CONFIG_PREEMPT is not set | 94 | # CONFIG_PREEMPT is not set |
95 | CONFIG_PREEMPT_BKL=y | 95 | CONFIG_PREEMPT_BKL=y |
96 | CONFIG_NUMA=y | ||
96 | CONFIG_K8_NUMA=y | 97 | CONFIG_K8_NUMA=y |
98 | CONFIG_X86_64_ACPI_NUMA=y | ||
97 | # CONFIG_NUMA_EMU is not set | 99 | # CONFIG_NUMA_EMU is not set |
98 | CONFIG_ARCH_DISCONTIGMEM_ENABLE=y | 100 | CONFIG_ARCH_DISCONTIGMEM_ENABLE=y |
99 | CONFIG_NUMA=y | ||
100 | CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y | 101 | CONFIG_ARCH_DISCONTIGMEM_DEFAULT=y |
101 | CONFIG_ARCH_SPARSEMEM_ENABLE=y | 102 | CONFIG_ARCH_SPARSEMEM_ENABLE=y |
102 | CONFIG_SELECT_MEMORY_MODEL=y | 103 | CONFIG_SELECT_MEMORY_MODEL=y |
@@ -107,9 +108,10 @@ CONFIG_DISCONTIGMEM=y | |||
107 | CONFIG_FLAT_NODE_MEM_MAP=y | 108 | CONFIG_FLAT_NODE_MEM_MAP=y |
108 | CONFIG_NEED_MULTIPLE_NODES=y | 109 | CONFIG_NEED_MULTIPLE_NODES=y |
109 | # CONFIG_SPARSEMEM_STATIC is not set | 110 | # CONFIG_SPARSEMEM_STATIC is not set |
111 | CONFIG_SPLIT_PTLOCK_CPUS=4 | ||
110 | CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y | 112 | CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y |
111 | CONFIG_HAVE_DEC_LOCK=y | ||
112 | CONFIG_NR_CPUS=32 | 113 | CONFIG_NR_CPUS=32 |
114 | CONFIG_HOTPLUG_CPU=y | ||
113 | CONFIG_HPET_TIMER=y | 115 | CONFIG_HPET_TIMER=y |
114 | CONFIG_X86_PM_TIMER=y | 116 | CONFIG_X86_PM_TIMER=y |
115 | CONFIG_HPET_EMULATE_RTC=y | 117 | CONFIG_HPET_EMULATE_RTC=y |
@@ -117,6 +119,7 @@ CONFIG_GART_IOMMU=y | |||
117 | CONFIG_SWIOTLB=y | 119 | CONFIG_SWIOTLB=y |
118 | CONFIG_X86_MCE=y | 120 | CONFIG_X86_MCE=y |
119 | CONFIG_X86_MCE_INTEL=y | 121 | CONFIG_X86_MCE_INTEL=y |
122 | CONFIG_X86_MCE_AMD=y | ||
120 | CONFIG_PHYSICAL_START=0x100000 | 123 | CONFIG_PHYSICAL_START=0x100000 |
121 | # CONFIG_KEXEC is not set | 124 | # CONFIG_KEXEC is not set |
122 | CONFIG_SECCOMP=y | 125 | CONFIG_SECCOMP=y |
@@ -136,11 +139,15 @@ CONFIG_PM=y | |||
136 | # CONFIG_PM_DEBUG is not set | 139 | # CONFIG_PM_DEBUG is not set |
137 | CONFIG_SOFTWARE_SUSPEND=y | 140 | CONFIG_SOFTWARE_SUSPEND=y |
138 | CONFIG_PM_STD_PARTITION="" | 141 | CONFIG_PM_STD_PARTITION="" |
142 | CONFIG_SUSPEND_SMP=y | ||
139 | 143 | ||
140 | # | 144 | # |
141 | # ACPI (Advanced Configuration and Power Interface) Support | 145 | # ACPI (Advanced Configuration and Power Interface) Support |
142 | # | 146 | # |
143 | CONFIG_ACPI=y | 147 | CONFIG_ACPI=y |
148 | CONFIG_ACPI_SLEEP=y | ||
149 | CONFIG_ACPI_SLEEP_PROC_FS=y | ||
150 | CONFIG_ACPI_SLEEP_PROC_SLEEP=y | ||
144 | CONFIG_ACPI_AC=y | 151 | CONFIG_ACPI_AC=y |
145 | CONFIG_ACPI_BATTERY=y | 152 | CONFIG_ACPI_BATTERY=y |
146 | CONFIG_ACPI_BUTTON=y | 153 | CONFIG_ACPI_BUTTON=y |
@@ -148,6 +155,7 @@ CONFIG_ACPI_BUTTON=y | |||
148 | CONFIG_ACPI_HOTKEY=m | 155 | CONFIG_ACPI_HOTKEY=m |
149 | CONFIG_ACPI_FAN=y | 156 | CONFIG_ACPI_FAN=y |
150 | CONFIG_ACPI_PROCESSOR=y | 157 | CONFIG_ACPI_PROCESSOR=y |
158 | CONFIG_ACPI_HOTPLUG_CPU=y | ||
151 | CONFIG_ACPI_THERMAL=y | 159 | CONFIG_ACPI_THERMAL=y |
152 | CONFIG_ACPI_NUMA=y | 160 | CONFIG_ACPI_NUMA=y |
153 | # CONFIG_ACPI_ASUS is not set | 161 | # CONFIG_ACPI_ASUS is not set |
@@ -158,7 +166,7 @@ CONFIG_ACPI_BLACKLIST_YEAR=2001 | |||
158 | CONFIG_ACPI_EC=y | 166 | CONFIG_ACPI_EC=y |
159 | CONFIG_ACPI_POWER=y | 167 | CONFIG_ACPI_POWER=y |
160 | CONFIG_ACPI_SYSTEM=y | 168 | CONFIG_ACPI_SYSTEM=y |
161 | # CONFIG_ACPI_CONTAINER is not set | 169 | CONFIG_ACPI_CONTAINER=y |
162 | 170 | ||
163 | # | 171 | # |
164 | # CPU Frequency scaling | 172 | # CPU Frequency scaling |
@@ -293,7 +301,6 @@ CONFIG_IPV6=y | |||
293 | # Network testing | 301 | # Network testing |
294 | # | 302 | # |
295 | # CONFIG_NET_PKTGEN is not set | 303 | # CONFIG_NET_PKTGEN is not set |
296 | # CONFIG_NETFILTER_NETLINK is not set | ||
297 | # CONFIG_HAMRADIO is not set | 304 | # CONFIG_HAMRADIO is not set |
298 | # CONFIG_IRDA is not set | 305 | # CONFIG_IRDA is not set |
299 | # CONFIG_BT is not set | 306 | # CONFIG_BT is not set |
@@ -312,6 +319,11 @@ CONFIG_PREVENT_FIRMWARE_BUILD=y | |||
312 | # CONFIG_DEBUG_DRIVER is not set | 319 | # CONFIG_DEBUG_DRIVER is not set |
313 | 320 | ||
314 | # | 321 | # |
322 | # Connector - unified userspace <-> kernelspace linker | ||
323 | # | ||
324 | # CONFIG_CONNECTOR is not set | ||
325 | |||
326 | # | ||
315 | # Memory Technology Devices (MTD) | 327 | # Memory Technology Devices (MTD) |
316 | # | 328 | # |
317 | # CONFIG_MTD is not set | 329 | # CONFIG_MTD is not set |
@@ -354,6 +366,11 @@ CONFIG_IOSCHED_NOOP=y | |||
354 | # CONFIG_IOSCHED_AS is not set | 366 | # CONFIG_IOSCHED_AS is not set |
355 | CONFIG_IOSCHED_DEADLINE=y | 367 | CONFIG_IOSCHED_DEADLINE=y |
356 | CONFIG_IOSCHED_CFQ=y | 368 | CONFIG_IOSCHED_CFQ=y |
369 | # CONFIG_DEFAULT_AS is not set | ||
370 | CONFIG_DEFAULT_DEADLINE=y | ||
371 | # CONFIG_DEFAULT_CFQ is not set | ||
372 | # CONFIG_DEFAULT_NOOP is not set | ||
373 | CONFIG_DEFAULT_IOSCHED="cfq" | ||
357 | # CONFIG_ATA_OVER_ETH is not set | 374 | # CONFIG_ATA_OVER_ETH is not set |
358 | 375 | ||
359 | # | 376 | # |
@@ -450,6 +467,7 @@ CONFIG_BLK_DEV_SD=y | |||
450 | CONFIG_SCSI_SPI_ATTRS=y | 467 | CONFIG_SCSI_SPI_ATTRS=y |
451 | # CONFIG_SCSI_FC_ATTRS is not set | 468 | # CONFIG_SCSI_FC_ATTRS is not set |
452 | # CONFIG_SCSI_ISCSI_ATTRS is not set | 469 | # CONFIG_SCSI_ISCSI_ATTRS is not set |
470 | # CONFIG_SCSI_SAS_ATTRS is not set | ||
453 | 471 | ||
454 | # | 472 | # |
455 | # SCSI low-level drivers | 473 | # SCSI low-level drivers |
@@ -469,20 +487,24 @@ CONFIG_AIC79XX_DEBUG_MASK=0 | |||
469 | # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set | 487 | # CONFIG_AIC79XX_REG_PRETTY_PRINT is not set |
470 | # CONFIG_MEGARAID_NEWGEN is not set | 488 | # CONFIG_MEGARAID_NEWGEN is not set |
471 | # CONFIG_MEGARAID_LEGACY is not set | 489 | # CONFIG_MEGARAID_LEGACY is not set |
490 | # CONFIG_MEGARAID_SAS is not set | ||
472 | CONFIG_SCSI_SATA=y | 491 | CONFIG_SCSI_SATA=y |
473 | # CONFIG_SCSI_SATA_AHCI is not set | 492 | # CONFIG_SCSI_SATA_AHCI is not set |
474 | # CONFIG_SCSI_SATA_SVW is not set | 493 | # CONFIG_SCSI_SATA_SVW is not set |
475 | CONFIG_SCSI_ATA_PIIX=y | 494 | CONFIG_SCSI_ATA_PIIX=y |
476 | # CONFIG_SCSI_SATA_MV is not set | 495 | # CONFIG_SCSI_SATA_MV is not set |
477 | # CONFIG_SCSI_SATA_NV is not set | 496 | CONFIG_SCSI_SATA_NV=y |
478 | # CONFIG_SCSI_SATA_PROMISE is not set | 497 | # CONFIG_SCSI_PDC_ADMA is not set |
479 | # CONFIG_SCSI_SATA_QSTOR is not set | 498 | # CONFIG_SCSI_SATA_QSTOR is not set |
499 | # CONFIG_SCSI_SATA_PROMISE is not set | ||
480 | # CONFIG_SCSI_SATA_SX4 is not set | 500 | # CONFIG_SCSI_SATA_SX4 is not set |
481 | # CONFIG_SCSI_SATA_SIL is not set | 501 | # CONFIG_SCSI_SATA_SIL is not set |
502 | # CONFIG_SCSI_SATA_SIL24 is not set | ||
482 | # CONFIG_SCSI_SATA_SIS is not set | 503 | # CONFIG_SCSI_SATA_SIS is not set |
483 | # CONFIG_SCSI_SATA_ULI is not set | 504 | # CONFIG_SCSI_SATA_ULI is not set |
484 | CONFIG_SCSI_SATA_VIA=y | 505 | CONFIG_SCSI_SATA_VIA=y |
485 | # CONFIG_SCSI_SATA_VITESSE is not set | 506 | # CONFIG_SCSI_SATA_VITESSE is not set |
507 | CONFIG_SCSI_SATA_INTEL_COMBINED=y | ||
486 | # CONFIG_SCSI_BUSLOGIC is not set | 508 | # CONFIG_SCSI_BUSLOGIC is not set |
487 | # CONFIG_SCSI_DMX3191D is not set | 509 | # CONFIG_SCSI_DMX3191D is not set |
488 | # CONFIG_SCSI_EATA is not set | 510 | # CONFIG_SCSI_EATA is not set |
@@ -525,6 +547,7 @@ CONFIG_BLK_DEV_DM=y | |||
525 | CONFIG_FUSION=y | 547 | CONFIG_FUSION=y |
526 | CONFIG_FUSION_SPI=y | 548 | CONFIG_FUSION_SPI=y |
527 | # CONFIG_FUSION_FC is not set | 549 | # CONFIG_FUSION_FC is not set |
550 | # CONFIG_FUSION_SAS is not set | ||
528 | CONFIG_FUSION_MAX_SGE=128 | 551 | CONFIG_FUSION_MAX_SGE=128 |
529 | # CONFIG_FUSION_CTL is not set | 552 | # CONFIG_FUSION_CTL is not set |
530 | 553 | ||
@@ -564,6 +587,7 @@ CONFIG_NET_ETHERNET=y | |||
564 | CONFIG_MII=y | 587 | CONFIG_MII=y |
565 | # CONFIG_HAPPYMEAL is not set | 588 | # CONFIG_HAPPYMEAL is not set |
566 | # CONFIG_SUNGEM is not set | 589 | # CONFIG_SUNGEM is not set |
590 | # CONFIG_CASSINI is not set | ||
567 | CONFIG_NET_VENDOR_3COM=y | 591 | CONFIG_NET_VENDOR_3COM=y |
568 | CONFIG_VORTEX=y | 592 | CONFIG_VORTEX=y |
569 | # CONFIG_TYPHOON is not set | 593 | # CONFIG_TYPHOON is not set |
@@ -740,7 +764,43 @@ CONFIG_LEGACY_PTY_COUNT=256 | |||
740 | # | 764 | # |
741 | # Watchdog Cards | 765 | # Watchdog Cards |
742 | # | 766 | # |
743 | # CONFIG_WATCHDOG is not set | 767 | CONFIG_WATCHDOG=y |
768 | # CONFIG_WATCHDOG_NOWAYOUT is not set | ||
769 | |||
770 | # | ||
771 | # Watchdog Device Drivers | ||
772 | # | ||
773 | CONFIG_SOFT_WATCHDOG=y | ||
774 | # CONFIG_ACQUIRE_WDT is not set | ||
775 | # CONFIG_ADVANTECH_WDT is not set | ||
776 | # CONFIG_ALIM1535_WDT is not set | ||
777 | # CONFIG_ALIM7101_WDT is not set | ||
778 | # CONFIG_SC520_WDT is not set | ||
779 | # CONFIG_EUROTECH_WDT is not set | ||
780 | # CONFIG_IB700_WDT is not set | ||
781 | # CONFIG_IBMASR is not set | ||
782 | # CONFIG_WAFER_WDT is not set | ||
783 | # CONFIG_I6300ESB_WDT is not set | ||
784 | # CONFIG_I8XX_TCO is not set | ||
785 | # CONFIG_SC1200_WDT is not set | ||
786 | # CONFIG_60XX_WDT is not set | ||
787 | # CONFIG_SBC8360_WDT is not set | ||
788 | # CONFIG_CPU5_WDT is not set | ||
789 | # CONFIG_W83627HF_WDT is not set | ||
790 | # CONFIG_W83877F_WDT is not set | ||
791 | # CONFIG_W83977F_WDT is not set | ||
792 | # CONFIG_MACHZ_WDT is not set | ||
793 | |||
794 | # | ||
795 | # PCI-based Watchdog Cards | ||
796 | # | ||
797 | # CONFIG_PCIPCWATCHDOG is not set | ||
798 | # CONFIG_WDTPCI is not set | ||
799 | |||
800 | # | ||
801 | # USB-based Watchdog Cards | ||
802 | # | ||
803 | # CONFIG_USBPCWATCHDOG is not set | ||
744 | CONFIG_HW_RANDOM=y | 804 | CONFIG_HW_RANDOM=y |
745 | # CONFIG_NVRAM is not set | 805 | # CONFIG_NVRAM is not set |
746 | CONFIG_RTC=y | 806 | CONFIG_RTC=y |
@@ -767,6 +827,7 @@ CONFIG_MAX_RAW_DEVS=256 | |||
767 | # TPM devices | 827 | # TPM devices |
768 | # | 828 | # |
769 | # CONFIG_TCG_TPM is not set | 829 | # CONFIG_TCG_TPM is not set |
830 | # CONFIG_TELCLOCK is not set | ||
770 | 831 | ||
771 | # | 832 | # |
772 | # I2C support | 833 | # I2C support |
@@ -783,6 +844,7 @@ CONFIG_MAX_RAW_DEVS=256 | |||
783 | # | 844 | # |
784 | CONFIG_HWMON=y | 845 | CONFIG_HWMON=y |
785 | # CONFIG_HWMON_VID is not set | 846 | # CONFIG_HWMON_VID is not set |
847 | # CONFIG_SENSORS_HDAPS is not set | ||
786 | # CONFIG_HWMON_DEBUG_CHIP is not set | 848 | # CONFIG_HWMON_DEBUG_CHIP is not set |
787 | 849 | ||
788 | # | 850 | # |
@@ -886,12 +948,15 @@ CONFIG_USB_UHCI_HCD=y | |||
886 | # USB Device Class drivers | 948 | # USB Device Class drivers |
887 | # | 949 | # |
888 | # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set | 950 | # CONFIG_OBSOLETE_OSS_USB_DRIVER is not set |
889 | # CONFIG_USB_BLUETOOTH_TTY is not set | ||
890 | # CONFIG_USB_ACM is not set | 951 | # CONFIG_USB_ACM is not set |
891 | CONFIG_USB_PRINTER=y | 952 | CONFIG_USB_PRINTER=y |
892 | 953 | ||
893 | # | 954 | # |
894 | # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information | 955 | # NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' |
956 | # | ||
957 | |||
958 | # | ||
959 | # may also be needed; see USB_STORAGE Help for more information | ||
895 | # | 960 | # |
896 | CONFIG_USB_STORAGE=y | 961 | CONFIG_USB_STORAGE=y |
897 | # CONFIG_USB_STORAGE_DEBUG is not set | 962 | # CONFIG_USB_STORAGE_DEBUG is not set |
@@ -924,6 +989,7 @@ CONFIG_USB_HIDINPUT=y | |||
924 | # CONFIG_USB_XPAD is not set | 989 | # CONFIG_USB_XPAD is not set |
925 | # CONFIG_USB_ATI_REMOTE is not set | 990 | # CONFIG_USB_ATI_REMOTE is not set |
926 | # CONFIG_USB_KEYSPAN_REMOTE is not set | 991 | # CONFIG_USB_KEYSPAN_REMOTE is not set |
992 | # CONFIG_USB_APPLETOUCH is not set | ||
927 | 993 | ||
928 | # | 994 | # |
929 | # USB Imaging devices | 995 | # USB Imaging devices |
@@ -1005,7 +1071,7 @@ CONFIG_USB_MON=y | |||
1005 | # | 1071 | # |
1006 | # CONFIG_EDD is not set | 1072 | # CONFIG_EDD is not set |
1007 | # CONFIG_DELL_RBU is not set | 1073 | # CONFIG_DELL_RBU is not set |
1008 | CONFIG_DCDBAS=m | 1074 | # CONFIG_DCDBAS is not set |
1009 | 1075 | ||
1010 | # | 1076 | # |
1011 | # File systems | 1077 | # File systems |
@@ -1037,7 +1103,7 @@ CONFIG_INOTIFY=y | |||
1037 | # CONFIG_QUOTA is not set | 1103 | # CONFIG_QUOTA is not set |
1038 | CONFIG_DNOTIFY=y | 1104 | CONFIG_DNOTIFY=y |
1039 | CONFIG_AUTOFS_FS=y | 1105 | CONFIG_AUTOFS_FS=y |
1040 | # CONFIG_AUTOFS4_FS is not set | 1106 | CONFIG_AUTOFS4_FS=y |
1041 | # CONFIG_FUSE_FS is not set | 1107 | # CONFIG_FUSE_FS is not set |
1042 | 1108 | ||
1043 | # | 1109 | # |
@@ -1068,7 +1134,7 @@ CONFIG_TMPFS=y | |||
1068 | CONFIG_HUGETLBFS=y | 1134 | CONFIG_HUGETLBFS=y |
1069 | CONFIG_HUGETLB_PAGE=y | 1135 | CONFIG_HUGETLB_PAGE=y |
1070 | CONFIG_RAMFS=y | 1136 | CONFIG_RAMFS=y |
1071 | # CONFIG_RELAYFS_FS is not set | 1137 | CONFIG_RELAYFS_FS=y |
1072 | 1138 | ||
1073 | # | 1139 | # |
1074 | # Miscellaneous filesystems | 1140 | # Miscellaneous filesystems |
@@ -1186,7 +1252,9 @@ CONFIG_DETECT_SOFTLOCKUP=y | |||
1186 | # CONFIG_DEBUG_KOBJECT is not set | 1252 | # CONFIG_DEBUG_KOBJECT is not set |
1187 | # CONFIG_DEBUG_INFO is not set | 1253 | # CONFIG_DEBUG_INFO is not set |
1188 | CONFIG_DEBUG_FS=y | 1254 | CONFIG_DEBUG_FS=y |
1255 | # CONFIG_DEBUG_VM is not set | ||
1189 | # CONFIG_FRAME_POINTER is not set | 1256 | # CONFIG_FRAME_POINTER is not set |
1257 | # CONFIG_RCU_TORTURE_TEST is not set | ||
1190 | CONFIG_INIT_DEBUG=y | 1258 | CONFIG_INIT_DEBUG=y |
1191 | # CONFIG_IOMMU_DEBUG is not set | 1259 | # CONFIG_IOMMU_DEBUG is not set |
1192 | CONFIG_KPROBES=y | 1260 | CONFIG_KPROBES=y |
diff --git a/arch/x86_64/ia32/ia32_aout.c b/arch/x86_64/ia32/ia32_aout.c index 3e6780fa0186..3bf58af98936 100644 --- a/arch/x86_64/ia32/ia32_aout.c +++ b/arch/x86_64/ia32/ia32_aout.c | |||
@@ -36,9 +36,6 @@ | |||
36 | #undef WARN_OLD | 36 | #undef WARN_OLD |
37 | #undef CORE_DUMP /* probably broken */ | 37 | #undef CORE_DUMP /* probably broken */ |
38 | 38 | ||
39 | extern int ia32_setup_arg_pages(struct linux_binprm *bprm, | ||
40 | unsigned long stack_top, int exec_stack); | ||
41 | |||
42 | static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); | 39 | static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs); |
43 | static int load_aout_library(struct file*); | 40 | static int load_aout_library(struct file*); |
44 | 41 | ||
@@ -314,7 +311,6 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
314 | current->mm->free_area_cache = TASK_UNMAPPED_BASE; | 311 | current->mm->free_area_cache = TASK_UNMAPPED_BASE; |
315 | current->mm->cached_hole_size = 0; | 312 | current->mm->cached_hole_size = 0; |
316 | 313 | ||
317 | set_mm_counter(current->mm, rss, 0); | ||
318 | current->mm->mmap = NULL; | 314 | current->mm->mmap = NULL; |
319 | compute_creds(bprm); | 315 | compute_creds(bprm); |
320 | current->flags &= ~PF_FORKNOEXEC; | 316 | current->flags &= ~PF_FORKNOEXEC; |
diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c index d9161e395978..830feb272eca 100644 --- a/arch/x86_64/ia32/ia32_binfmt.c +++ b/arch/x86_64/ia32/ia32_binfmt.c | |||
@@ -335,7 +335,8 @@ static void elf32_init(struct pt_regs *regs) | |||
335 | me->thread.es = __USER_DS; | 335 | me->thread.es = __USER_DS; |
336 | } | 336 | } |
337 | 337 | ||
338 | int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int executable_stack) | 338 | int ia32_setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, |
339 | int executable_stack) | ||
339 | { | 340 | { |
340 | unsigned long stack_base; | 341 | unsigned long stack_base; |
341 | struct vm_area_struct *mpnt; | 342 | struct vm_area_struct *mpnt; |
@@ -389,6 +390,7 @@ int setup_arg_pages(struct linux_binprm *bprm, unsigned long stack_top, int exec | |||
389 | 390 | ||
390 | return 0; | 391 | return 0; |
391 | } | 392 | } |
393 | EXPORT_SYMBOL(ia32_setup_arg_pages); | ||
392 | 394 | ||
393 | static unsigned long | 395 | static unsigned long |
394 | elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) | 396 | elf32_map (struct file *filep, unsigned long addr, struct elf_phdr *eppnt, int prot, int type) |
diff --git a/arch/x86_64/ia32/ia32_ioctl.c b/arch/x86_64/ia32/ia32_ioctl.c index 419758f19ca4..e335bd0b637d 100644 --- a/arch/x86_64/ia32/ia32_ioctl.c +++ b/arch/x86_64/ia32/ia32_ioctl.c | |||
@@ -12,40 +12,11 @@ | |||
12 | #define INCLUDES | 12 | #define INCLUDES |
13 | #include <linux/syscalls.h> | 13 | #include <linux/syscalls.h> |
14 | #include "compat_ioctl.c" | 14 | #include "compat_ioctl.c" |
15 | #include <asm/mtrr.h> | ||
16 | #include <asm/ia32.h> | 15 | #include <asm/ia32.h> |
17 | 16 | ||
18 | #define CODE | 17 | #define CODE |
19 | #include "compat_ioctl.c" | 18 | #include "compat_ioctl.c" |
20 | 19 | ||
21 | #ifndef TIOCGDEV | ||
22 | #define TIOCGDEV _IOR('T',0x32, unsigned int) | ||
23 | #endif | ||
24 | static int tiocgdev(unsigned fd, unsigned cmd, unsigned int __user *ptr) | ||
25 | { | ||
26 | |||
27 | struct file *file; | ||
28 | struct tty_struct *real_tty; | ||
29 | int fput_needed, ret; | ||
30 | |||
31 | file = fget_light(fd, &fput_needed); | ||
32 | if (!file) | ||
33 | return -EBADF; | ||
34 | |||
35 | ret = -EINVAL; | ||
36 | if (file->f_op->ioctl != tty_ioctl) | ||
37 | goto out; | ||
38 | real_tty = (struct tty_struct *)file->private_data; | ||
39 | if (!real_tty) | ||
40 | goto out; | ||
41 | |||
42 | ret = put_user(new_encode_dev(tty_devnum(real_tty)), ptr); | ||
43 | |||
44 | out: | ||
45 | fput_light(file, fput_needed); | ||
46 | return ret; | ||
47 | } | ||
48 | |||
49 | #define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */ | 20 | #define RTC_IRQP_READ32 _IOR('p', 0x0b, unsigned int) /* Read IRQ rate */ |
50 | #define RTC_IRQP_SET32 _IOW('p', 0x0c, unsigned int) /* Set IRQ rate */ | 21 | #define RTC_IRQP_SET32 _IOW('p', 0x0c, unsigned int) /* Set IRQ rate */ |
51 | #define RTC_EPOCH_READ32 _IOR('p', 0x0d, unsigned) /* Read epoch */ | 22 | #define RTC_EPOCH_READ32 _IOR('p', 0x0d, unsigned) /* Read epoch */ |
@@ -85,90 +56,6 @@ static int rtc32_ioctl(unsigned fd, unsigned cmd, unsigned long arg) | |||
85 | return sys_ioctl(fd,cmd,arg); | 56 | return sys_ioctl(fd,cmd,arg); |
86 | } | 57 | } |
87 | 58 | ||
88 | /* /proc/mtrr ioctls */ | ||
89 | |||
90 | |||
91 | struct mtrr_sentry32 | ||
92 | { | ||
93 | compat_ulong_t base; /* Base address */ | ||
94 | compat_uint_t size; /* Size of region */ | ||
95 | compat_uint_t type; /* Type of region */ | ||
96 | }; | ||
97 | |||
98 | struct mtrr_gentry32 | ||
99 | { | ||
100 | compat_ulong_t regnum; /* Register number */ | ||
101 | compat_uint_t base; /* Base address */ | ||
102 | compat_uint_t size; /* Size of region */ | ||
103 | compat_uint_t type; /* Type of region */ | ||
104 | }; | ||
105 | |||
106 | #define MTRR_IOCTL_BASE 'M' | ||
107 | |||
108 | #define MTRRIOC32_ADD_ENTRY _IOW(MTRR_IOCTL_BASE, 0, struct mtrr_sentry32) | ||
109 | #define MTRRIOC32_SET_ENTRY _IOW(MTRR_IOCTL_BASE, 1, struct mtrr_sentry32) | ||
110 | #define MTRRIOC32_DEL_ENTRY _IOW(MTRR_IOCTL_BASE, 2, struct mtrr_sentry32) | ||
111 | #define MTRRIOC32_GET_ENTRY _IOWR(MTRR_IOCTL_BASE, 3, struct mtrr_gentry32) | ||
112 | #define MTRRIOC32_KILL_ENTRY _IOW(MTRR_IOCTL_BASE, 4, struct mtrr_sentry32) | ||
113 | #define MTRRIOC32_ADD_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 5, struct mtrr_sentry32) | ||
114 | #define MTRRIOC32_SET_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 6, struct mtrr_sentry32) | ||
115 | #define MTRRIOC32_DEL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 7, struct mtrr_sentry32) | ||
116 | #define MTRRIOC32_GET_PAGE_ENTRY _IOWR(MTRR_IOCTL_BASE, 8, struct mtrr_gentry32) | ||
117 | #define MTRRIOC32_KILL_PAGE_ENTRY _IOW(MTRR_IOCTL_BASE, 9, struct mtrr_sentry32) | ||
118 | |||
119 | |||
120 | static int mtrr_ioctl32(unsigned int fd, unsigned int cmd, unsigned long arg) | ||
121 | { | ||
122 | struct mtrr_gentry g; | ||
123 | struct mtrr_sentry s; | ||
124 | int get = 0, err = 0; | ||
125 | struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)arg; | ||
126 | mm_segment_t oldfs = get_fs(); | ||
127 | |||
128 | switch (cmd) { | ||
129 | #define SET(x) case MTRRIOC32_ ## x ## _ENTRY: cmd = MTRRIOC_ ## x ## _ENTRY; break | ||
130 | #define GET(x) case MTRRIOC32_ ## x ## _ENTRY: cmd = MTRRIOC_ ## x ## _ENTRY; get=1; break | ||
131 | SET(ADD); | ||
132 | SET(SET); | ||
133 | SET(DEL); | ||
134 | GET(GET); | ||
135 | SET(KILL); | ||
136 | SET(ADD_PAGE); | ||
137 | SET(SET_PAGE); | ||
138 | SET(DEL_PAGE); | ||
139 | GET(GET_PAGE); | ||
140 | SET(KILL_PAGE); | ||
141 | } | ||
142 | |||
143 | if (get) { | ||
144 | err = get_user(g.regnum, &g32->regnum); | ||
145 | err |= get_user(g.base, &g32->base); | ||
146 | err |= get_user(g.size, &g32->size); | ||
147 | err |= get_user(g.type, &g32->type); | ||
148 | |||
149 | arg = (unsigned long)&g; | ||
150 | } else { | ||
151 | struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)arg; | ||
152 | err = get_user(s.base, &s32->base); | ||
153 | err |= get_user(s.size, &s32->size); | ||
154 | err |= get_user(s.type, &s32->type); | ||
155 | |||
156 | arg = (unsigned long)&s; | ||
157 | } | ||
158 | if (err) return err; | ||
159 | |||
160 | set_fs(KERNEL_DS); | ||
161 | err = sys_ioctl(fd, cmd, arg); | ||
162 | set_fs(oldfs); | ||
163 | |||
164 | if (!err && get) { | ||
165 | err = put_user(g.base, &g32->base); | ||
166 | err |= put_user(g.size, &g32->size); | ||
167 | err |= put_user(g.regnum, &g32->regnum); | ||
168 | err |= put_user(g.type, &g32->type); | ||
169 | } | ||
170 | return err; | ||
171 | } | ||
172 | 59 | ||
173 | #define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) }, | 60 | #define HANDLE_IOCTL(cmd,handler) { (cmd), (ioctl_trans_handler_t)(handler) }, |
174 | #define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) | 61 | #define COMPATIBLE_IOCTL(cmd) HANDLE_IOCTL(cmd,sys_ioctl) |
@@ -177,15 +64,8 @@ struct ioctl_trans ioctl_start[] = { | |||
177 | #include <linux/compat_ioctl.h> | 64 | #include <linux/compat_ioctl.h> |
178 | #define DECLARES | 65 | #define DECLARES |
179 | #include "compat_ioctl.c" | 66 | #include "compat_ioctl.c" |
180 | COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS) | ||
181 | COMPATIBLE_IOCTL(HDIO_SCAN_HWIF) | ||
182 | COMPATIBLE_IOCTL(BLKRASET) | ||
183 | COMPATIBLE_IOCTL(0x4B50) /* KDGHWCLK - not in the kernel, but don't complain */ | ||
184 | COMPATIBLE_IOCTL(0x4B51) /* KDSHWCLK - not in the kernel, but don't complain */ | ||
185 | COMPATIBLE_IOCTL(FIOQSIZE) | ||
186 | 67 | ||
187 | /* And these ioctls need translation */ | 68 | /* And these ioctls need translation */ |
188 | HANDLE_IOCTL(TIOCGDEV, tiocgdev) | ||
189 | /* realtime device */ | 69 | /* realtime device */ |
190 | HANDLE_IOCTL(RTC_IRQP_READ, rtc32_ioctl) | 70 | HANDLE_IOCTL(RTC_IRQP_READ, rtc32_ioctl) |
191 | HANDLE_IOCTL(RTC_IRQP_READ32,rtc32_ioctl) | 71 | HANDLE_IOCTL(RTC_IRQP_READ32,rtc32_ioctl) |
@@ -193,17 +73,6 @@ HANDLE_IOCTL(RTC_IRQP_SET32, rtc32_ioctl) | |||
193 | HANDLE_IOCTL(RTC_EPOCH_READ32, rtc32_ioctl) | 73 | HANDLE_IOCTL(RTC_EPOCH_READ32, rtc32_ioctl) |
194 | HANDLE_IOCTL(RTC_EPOCH_SET32, rtc32_ioctl) | 74 | HANDLE_IOCTL(RTC_EPOCH_SET32, rtc32_ioctl) |
195 | /* take care of sizeof(sizeof()) breakage */ | 75 | /* take care of sizeof(sizeof()) breakage */ |
196 | /* mtrr */ | ||
197 | HANDLE_IOCTL(MTRRIOC32_ADD_ENTRY, mtrr_ioctl32) | ||
198 | HANDLE_IOCTL(MTRRIOC32_SET_ENTRY, mtrr_ioctl32) | ||
199 | HANDLE_IOCTL(MTRRIOC32_DEL_ENTRY, mtrr_ioctl32) | ||
200 | HANDLE_IOCTL(MTRRIOC32_GET_ENTRY, mtrr_ioctl32) | ||
201 | HANDLE_IOCTL(MTRRIOC32_KILL_ENTRY, mtrr_ioctl32) | ||
202 | HANDLE_IOCTL(MTRRIOC32_ADD_PAGE_ENTRY, mtrr_ioctl32) | ||
203 | HANDLE_IOCTL(MTRRIOC32_SET_PAGE_ENTRY, mtrr_ioctl32) | ||
204 | HANDLE_IOCTL(MTRRIOC32_DEL_PAGE_ENTRY, mtrr_ioctl32) | ||
205 | HANDLE_IOCTL(MTRRIOC32_GET_PAGE_ENTRY, mtrr_ioctl32) | ||
206 | HANDLE_IOCTL(MTRRIOC32_KILL_PAGE_ENTRY, mtrr_ioctl32) | ||
207 | }; | 76 | }; |
208 | 77 | ||
209 | int ioctl_table_size = ARRAY_SIZE(ioctl_start); | 78 | int ioctl_table_size = ARRAY_SIZE(ioctl_start); |
diff --git a/arch/x86_64/ia32/ia32_signal.c b/arch/x86_64/ia32/ia32_signal.c index 66e2821533db..0903cc1faef2 100644 --- a/arch/x86_64/ia32/ia32_signal.c +++ b/arch/x86_64/ia32/ia32_signal.c | |||
@@ -425,7 +425,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs * regs, size_t frame_size) | |||
425 | rsp = (unsigned long) ka->sa.sa_restorer; | 425 | rsp = (unsigned long) ka->sa.sa_restorer; |
426 | } | 426 | } |
427 | 427 | ||
428 | return (void __user *)((rsp - frame_size) & -8UL); | 428 | rsp -= frame_size; |
429 | /* Align the stack pointer according to the i386 ABI, | ||
430 | * i.e. so that on function entry ((sp + 4) & 15) == 0. */ | ||
431 | rsp = ((rsp + 4) & -16ul) - 4; | ||
432 | return (void __user *) rsp; | ||
429 | } | 433 | } |
430 | 434 | ||
431 | int ia32_setup_frame(int sig, struct k_sigaction *ka, | 435 | int ia32_setup_frame(int sig, struct k_sigaction *ka, |
diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index bcdd0a805fe7..fe4cbd1c4b2f 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile | |||
@@ -11,6 +11,7 @@ obj-y := process.o signal.o entry.o traps.o irq.o \ | |||
11 | 11 | ||
12 | obj-$(CONFIG_X86_MCE) += mce.o | 12 | obj-$(CONFIG_X86_MCE) += mce.o |
13 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 13 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
14 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | ||
14 | obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ | 15 | obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/ |
15 | obj-$(CONFIG_ACPI) += acpi/ | 16 | obj-$(CONFIG_ACPI) += acpi/ |
16 | obj-$(CONFIG_X86_MSR) += msr.o | 17 | obj-$(CONFIG_X86_MSR) += msr.o |
@@ -27,7 +28,6 @@ obj-$(CONFIG_CPU_FREQ) += cpufreq/ | |||
27 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | 28 | obj-$(CONFIG_EARLY_PRINTK) += early_printk.o |
28 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o | 29 | obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o |
29 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o | 30 | obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o |
30 | obj-$(CONFIG_SWIOTLB) += swiotlb.o | ||
31 | obj-$(CONFIG_KPROBES) += kprobes.o | 31 | obj-$(CONFIG_KPROBES) += kprobes.o |
32 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o | 32 | obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o |
33 | 33 | ||
@@ -41,7 +41,6 @@ CFLAGS_vsyscall.o := $(PROFILING) -g0 | |||
41 | bootflag-y += ../../i386/kernel/bootflag.o | 41 | bootflag-y += ../../i386/kernel/bootflag.o |
42 | cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o | 42 | cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o |
43 | topology-y += ../../i386/mach-default/topology.o | 43 | topology-y += ../../i386/mach-default/topology.o |
44 | swiotlb-$(CONFIG_SWIOTLB) += ../../ia64/lib/swiotlb.o | ||
45 | microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o | 44 | microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o |
46 | intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o | 45 | intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o |
47 | quirks-y += ../../i386/kernel/quirks.o | 46 | quirks-y += ../../i386/kernel/quirks.o |
diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 962ad4823b6a..c7f4fdd20f05 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c | |||
@@ -196,7 +196,7 @@ static __u32 __init search_agp_bridge(u32 *order, int *valid_agp) | |||
196 | void __init iommu_hole_init(void) | 196 | void __init iommu_hole_init(void) |
197 | { | 197 | { |
198 | int fix, num; | 198 | int fix, num; |
199 | u32 aper_size, aper_alloc = 0, aper_order, last_aper_order = 0; | 199 | u32 aper_size, aper_alloc = 0, aper_order = 0, last_aper_order = 0; |
200 | u64 aper_base, last_aper_base = 0; | 200 | u64 aper_base, last_aper_base = 0; |
201 | int valid_agp = 0; | 201 | int valid_agp = 0; |
202 | 202 | ||
diff --git a/arch/x86_64/kernel/apic.c b/arch/x86_64/kernel/apic.c index b6e7715d877f..18691ce4c759 100644 --- a/arch/x86_64/kernel/apic.c +++ b/arch/x86_64/kernel/apic.c | |||
@@ -833,6 +833,16 @@ int setup_profiling_timer(unsigned int multiplier) | |||
833 | return 0; | 833 | return 0; |
834 | } | 834 | } |
835 | 835 | ||
836 | #ifdef CONFIG_X86_MCE_AMD | ||
837 | void setup_threshold_lvt(unsigned long lvt_off) | ||
838 | { | ||
839 | unsigned int v = 0; | ||
840 | unsigned long reg = (lvt_off << 4) + 0x500; | ||
841 | v |= THRESHOLD_APIC_VECTOR; | ||
842 | apic_write(reg, v); | ||
843 | } | ||
844 | #endif /* CONFIG_X86_MCE_AMD */ | ||
845 | |||
836 | #undef APIC_DIVISOR | 846 | #undef APIC_DIVISOR |
837 | 847 | ||
838 | /* | 848 | /* |
diff --git a/arch/x86_64/kernel/e820.c b/arch/x86_64/kernel/e820.c index ab3f87aaff70..17579a1a174b 100644 --- a/arch/x86_64/kernel/e820.c +++ b/arch/x86_64/kernel/e820.c | |||
@@ -23,8 +23,7 @@ | |||
23 | #include <asm/e820.h> | 23 | #include <asm/e820.h> |
24 | #include <asm/proto.h> | 24 | #include <asm/proto.h> |
25 | #include <asm/bootsetup.h> | 25 | #include <asm/bootsetup.h> |
26 | 26 | #include <asm/sections.h> | |
27 | extern char _end[]; | ||
28 | 27 | ||
29 | /* | 28 | /* |
30 | * PFN of last memory page. | 29 | * PFN of last memory page. |
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 7937971d1853..9ff42041bb6b 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S | |||
@@ -612,6 +612,9 @@ retint_kernel: | |||
612 | ENTRY(thermal_interrupt) | 612 | ENTRY(thermal_interrupt) |
613 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt | 613 | apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt |
614 | 614 | ||
615 | ENTRY(threshold_interrupt) | ||
616 | apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt | ||
617 | |||
615 | #ifdef CONFIG_SMP | 618 | #ifdef CONFIG_SMP |
616 | ENTRY(reschedule_interrupt) | 619 | ENTRY(reschedule_interrupt) |
617 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt | 620 | apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt |
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 4592bf21fcaf..15290968e49d 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S | |||
@@ -12,6 +12,7 @@ | |||
12 | 12 | ||
13 | #include <linux/linkage.h> | 13 | #include <linux/linkage.h> |
14 | #include <linux/threads.h> | 14 | #include <linux/threads.h> |
15 | #include <linux/init.h> | ||
15 | #include <asm/desc.h> | 16 | #include <asm/desc.h> |
16 | #include <asm/segment.h> | 17 | #include <asm/segment.h> |
17 | #include <asm/page.h> | 18 | #include <asm/page.h> |
@@ -70,7 +71,7 @@ startup_32: | |||
70 | movl %eax, %cr4 | 71 | movl %eax, %cr4 |
71 | 72 | ||
72 | /* Setup early boot stage 4 level pagetables */ | 73 | /* Setup early boot stage 4 level pagetables */ |
73 | movl $(init_level4_pgt - __START_KERNEL_map), %eax | 74 | movl $(boot_level4_pgt - __START_KERNEL_map), %eax |
74 | movl %eax, %cr3 | 75 | movl %eax, %cr3 |
75 | 76 | ||
76 | /* Setup EFER (Extended Feature Enable Register) */ | 77 | /* Setup EFER (Extended Feature Enable Register) */ |
@@ -113,7 +114,7 @@ startup_64: | |||
113 | movq %rax, %cr4 | 114 | movq %rax, %cr4 |
114 | 115 | ||
115 | /* Setup early boot stage 4 level pagetables. */ | 116 | /* Setup early boot stage 4 level pagetables. */ |
116 | movq $(init_level4_pgt - __START_KERNEL_map), %rax | 117 | movq $(boot_level4_pgt - __START_KERNEL_map), %rax |
117 | movq %rax, %cr3 | 118 | movq %rax, %cr3 |
118 | 119 | ||
119 | /* Check if nx is implemented */ | 120 | /* Check if nx is implemented */ |
@@ -240,20 +241,10 @@ ljumpvector: | |||
240 | ENTRY(stext) | 241 | ENTRY(stext) |
241 | ENTRY(_stext) | 242 | ENTRY(_stext) |
242 | 243 | ||
243 | /* | ||
244 | * This default setting generates an ident mapping at address 0x100000 | ||
245 | * and a mapping for the kernel that precisely maps virtual address | ||
246 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
247 | * 2Mbyte large pages provided by PAE mode) | ||
248 | */ | ||
249 | .org 0x1000 | 244 | .org 0x1000 |
250 | ENTRY(init_level4_pgt) | 245 | ENTRY(init_level4_pgt) |
251 | .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ | 246 | /* This gets initialized in x86_64_start_kernel */ |
252 | .fill 255,8,0 | 247 | .fill 512,8,0 |
253 | .quad 0x000000000000a007 + __PHYSICAL_START | ||
254 | .fill 254,8,0 | ||
255 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
256 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ | ||
257 | 248 | ||
258 | .org 0x2000 | 249 | .org 0x2000 |
259 | ENTRY(level3_ident_pgt) | 250 | ENTRY(level3_ident_pgt) |
@@ -270,26 +261,26 @@ ENTRY(level3_kernel_pgt) | |||
270 | .org 0x4000 | 261 | .org 0x4000 |
271 | ENTRY(level2_ident_pgt) | 262 | ENTRY(level2_ident_pgt) |
272 | /* 40MB for bootup. */ | 263 | /* 40MB for bootup. */ |
273 | .quad 0x0000000000000183 | 264 | .quad 0x0000000000000083 |
274 | .quad 0x0000000000200183 | 265 | .quad 0x0000000000200083 |
275 | .quad 0x0000000000400183 | 266 | .quad 0x0000000000400083 |
276 | .quad 0x0000000000600183 | 267 | .quad 0x0000000000600083 |
277 | .quad 0x0000000000800183 | 268 | .quad 0x0000000000800083 |
278 | .quad 0x0000000000A00183 | 269 | .quad 0x0000000000A00083 |
279 | .quad 0x0000000000C00183 | 270 | .quad 0x0000000000C00083 |
280 | .quad 0x0000000000E00183 | 271 | .quad 0x0000000000E00083 |
281 | .quad 0x0000000001000183 | 272 | .quad 0x0000000001000083 |
282 | .quad 0x0000000001200183 | 273 | .quad 0x0000000001200083 |
283 | .quad 0x0000000001400183 | 274 | .quad 0x0000000001400083 |
284 | .quad 0x0000000001600183 | 275 | .quad 0x0000000001600083 |
285 | .quad 0x0000000001800183 | 276 | .quad 0x0000000001800083 |
286 | .quad 0x0000000001A00183 | 277 | .quad 0x0000000001A00083 |
287 | .quad 0x0000000001C00183 | 278 | .quad 0x0000000001C00083 |
288 | .quad 0x0000000001E00183 | 279 | .quad 0x0000000001E00083 |
289 | .quad 0x0000000002000183 | 280 | .quad 0x0000000002000083 |
290 | .quad 0x0000000002200183 | 281 | .quad 0x0000000002200083 |
291 | .quad 0x0000000002400183 | 282 | .quad 0x0000000002400083 |
292 | .quad 0x0000000002600183 | 283 | .quad 0x0000000002600083 |
293 | /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ | 284 | /* Temporary mappings for the super early allocator in arch/x86_64/mm/init.c */ |
294 | .globl temp_boot_pmds | 285 | .globl temp_boot_pmds |
295 | temp_boot_pmds: | 286 | temp_boot_pmds: |
@@ -350,6 +341,24 @@ ENTRY(wakeup_level4_pgt) | |||
350 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ | 341 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ |
351 | #endif | 342 | #endif |
352 | 343 | ||
344 | #ifndef CONFIG_HOTPLUG_CPU | ||
345 | __INITDATA | ||
346 | #endif | ||
347 | /* | ||
348 | * This default setting generates an ident mapping at address 0x100000 | ||
349 | * and a mapping for the kernel that precisely maps virtual address | ||
350 | * 0xffffffff80000000 to physical address 0x000000. (always using | ||
351 | * 2Mbyte large pages provided by PAE mode) | ||
352 | */ | ||
353 | .align PAGE_SIZE | ||
354 | ENTRY(boot_level4_pgt) | ||
355 | .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */ | ||
356 | .fill 255,8,0 | ||
357 | .quad 0x000000000000a007 + __PHYSICAL_START | ||
358 | .fill 254,8,0 | ||
359 | /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */ | ||
360 | .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */ | ||
361 | |||
353 | .data | 362 | .data |
354 | 363 | ||
355 | .align 16 | 364 | .align 16 |
diff --git a/arch/x86_64/kernel/head64.c b/arch/x86_64/kernel/head64.c index cf6ab147a2a5..b675c5add01e 100644 --- a/arch/x86_64/kernel/head64.c +++ b/arch/x86_64/kernel/head64.c | |||
@@ -19,14 +19,15 @@ | |||
19 | #include <asm/bootsetup.h> | 19 | #include <asm/bootsetup.h> |
20 | #include <asm/setup.h> | 20 | #include <asm/setup.h> |
21 | #include <asm/desc.h> | 21 | #include <asm/desc.h> |
22 | #include <asm/pgtable.h> | ||
23 | #include <asm/sections.h> | ||
22 | 24 | ||
23 | /* Don't add a printk in there. printk relies on the PDA which is not initialized | 25 | /* Don't add a printk in there. printk relies on the PDA which is not initialized |
24 | yet. */ | 26 | yet. */ |
25 | static void __init clear_bss(void) | 27 | static void __init clear_bss(void) |
26 | { | 28 | { |
27 | extern char __bss_start[], __bss_end[]; | ||
28 | memset(__bss_start, 0, | 29 | memset(__bss_start, 0, |
29 | (unsigned long) __bss_end - (unsigned long) __bss_start); | 30 | (unsigned long) __bss_stop - (unsigned long) __bss_start); |
30 | } | 31 | } |
31 | 32 | ||
32 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ | 33 | #define NEW_CL_POINTER 0x228 /* Relative to real mode data */ |
@@ -75,8 +76,6 @@ static void __init setup_boot_cpu_data(void) | |||
75 | boot_cpu_data.x86_mask = eax & 0xf; | 76 | boot_cpu_data.x86_mask = eax & 0xf; |
76 | } | 77 | } |
77 | 78 | ||
78 | extern char _end[]; | ||
79 | |||
80 | void __init x86_64_start_kernel(char * real_mode_data) | 79 | void __init x86_64_start_kernel(char * real_mode_data) |
81 | { | 80 | { |
82 | char *s; | 81 | char *s; |
@@ -86,6 +85,13 @@ void __init x86_64_start_kernel(char * real_mode_data) | |||
86 | set_intr_gate(i, early_idt_handler); | 85 | set_intr_gate(i, early_idt_handler); |
87 | asm volatile("lidt %0" :: "m" (idt_descr)); | 86 | asm volatile("lidt %0" :: "m" (idt_descr)); |
88 | clear_bss(); | 87 | clear_bss(); |
88 | |||
89 | /* | ||
90 | * switch to init_level4_pgt from boot_level4_pgt | ||
91 | */ | ||
92 | memcpy(init_level4_pgt, boot_level4_pgt, PTRS_PER_PGD*sizeof(pgd_t)); | ||
93 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
94 | |||
89 | pda_init(0); | 95 | pda_init(0); |
90 | copy_bootdata(real_mode_data); | 96 | copy_bootdata(real_mode_data); |
91 | #ifdef CONFIG_SMP | 97 | #ifdef CONFIG_SMP |
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c index b2a238b5a17e..6e5101ad3d1a 100644 --- a/arch/x86_64/kernel/i8259.c +++ b/arch/x86_64/kernel/i8259.c | |||
@@ -492,9 +492,10 @@ void invalidate_interrupt5(void); | |||
492 | void invalidate_interrupt6(void); | 492 | void invalidate_interrupt6(void); |
493 | void invalidate_interrupt7(void); | 493 | void invalidate_interrupt7(void); |
494 | void thermal_interrupt(void); | 494 | void thermal_interrupt(void); |
495 | void threshold_interrupt(void); | ||
495 | void i8254_timer_resume(void); | 496 | void i8254_timer_resume(void); |
496 | 497 | ||
497 | static void setup_timer(void) | 498 | static void setup_timer_hardware(void) |
498 | { | 499 | { |
499 | outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ | 500 | outb_p(0x34,0x43); /* binary, mode 2, LSB/MSB, ch 0 */ |
500 | udelay(10); | 501 | udelay(10); |
@@ -505,17 +506,17 @@ static void setup_timer(void) | |||
505 | 506 | ||
506 | static int timer_resume(struct sys_device *dev) | 507 | static int timer_resume(struct sys_device *dev) |
507 | { | 508 | { |
508 | setup_timer(); | 509 | setup_timer_hardware(); |
509 | return 0; | 510 | return 0; |
510 | } | 511 | } |
511 | 512 | ||
512 | void i8254_timer_resume(void) | 513 | void i8254_timer_resume(void) |
513 | { | 514 | { |
514 | setup_timer(); | 515 | setup_timer_hardware(); |
515 | } | 516 | } |
516 | 517 | ||
517 | static struct sysdev_class timer_sysclass = { | 518 | static struct sysdev_class timer_sysclass = { |
518 | set_kset_name("timer"), | 519 | set_kset_name("timer_pit"), |
519 | .resume = timer_resume, | 520 | .resume = timer_resume, |
520 | }; | 521 | }; |
521 | 522 | ||
@@ -580,6 +581,7 @@ void __init init_IRQ(void) | |||
580 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 581 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
581 | #endif | 582 | #endif |
582 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); | 583 | set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); |
584 | set_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | ||
583 | 585 | ||
584 | #ifdef CONFIG_X86_LOCAL_APIC | 586 | #ifdef CONFIG_X86_LOCAL_APIC |
585 | /* self generated IPI for local APIC timer */ | 587 | /* self generated IPI for local APIC timer */ |
@@ -594,7 +596,7 @@ void __init init_IRQ(void) | |||
594 | * Set the clock to HZ Hz, we already have a valid | 596 | * Set the clock to HZ Hz, we already have a valid |
595 | * vector now: | 597 | * vector now: |
596 | */ | 598 | */ |
597 | setup_timer(); | 599 | setup_timer_hardware(); |
598 | 600 | ||
599 | if (!acpi_ioapic) | 601 | if (!acpi_ioapic) |
600 | setup_irq(2, &irq2); | 602 | setup_irq(2, &irq2); |
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c index c8eee20cd519..97154ab058b4 100644 --- a/arch/x86_64/kernel/io_apic.c +++ b/arch/x86_64/kernel/io_apic.c | |||
@@ -57,7 +57,7 @@ int nr_ioapic_registers[MAX_IO_APICS]; | |||
57 | * Rough estimation of how many shared IRQs there are, can | 57 | * Rough estimation of how many shared IRQs there are, can |
58 | * be changed anytime. | 58 | * be changed anytime. |
59 | */ | 59 | */ |
60 | #define MAX_PLUS_SHARED_IRQS NR_IRQS | 60 | #define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS |
61 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) | 61 | #define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS) |
62 | 62 | ||
63 | /* | 63 | /* |
@@ -85,6 +85,7 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; | |||
85 | int pin; \ | 85 | int pin; \ |
86 | struct irq_pin_list *entry = irq_2_pin + irq; \ | 86 | struct irq_pin_list *entry = irq_2_pin + irq; \ |
87 | \ | 87 | \ |
88 | BUG_ON(irq >= NR_IRQS); \ | ||
88 | for (;;) { \ | 89 | for (;;) { \ |
89 | unsigned int reg; \ | 90 | unsigned int reg; \ |
90 | pin = entry->pin; \ | 91 | pin = entry->pin; \ |
@@ -127,6 +128,8 @@ static void set_ioapic_affinity_irq(unsigned int irq, cpumask_t mask) | |||
127 | } | 128 | } |
128 | #endif | 129 | #endif |
129 | 130 | ||
131 | static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF }; | ||
132 | |||
130 | /* | 133 | /* |
131 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are | 134 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are |
132 | * shared ISA-space IRQs, so we have to support them. We are super | 135 | * shared ISA-space IRQs, so we have to support them. We are super |
@@ -137,6 +140,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
137 | static int first_free_entry = NR_IRQS; | 140 | static int first_free_entry = NR_IRQS; |
138 | struct irq_pin_list *entry = irq_2_pin + irq; | 141 | struct irq_pin_list *entry = irq_2_pin + irq; |
139 | 142 | ||
143 | BUG_ON(irq >= NR_IRQS); | ||
140 | while (entry->next) | 144 | while (entry->next) |
141 | entry = irq_2_pin + entry->next; | 145 | entry = irq_2_pin + entry->next; |
142 | 146 | ||
@@ -144,7 +148,7 @@ static void add_pin_to_irq(unsigned int irq, int apic, int pin) | |||
144 | entry->next = first_free_entry; | 148 | entry->next = first_free_entry; |
145 | entry = irq_2_pin + entry->next; | 149 | entry = irq_2_pin + entry->next; |
146 | if (++first_free_entry >= PIN_MAP_SIZE) | 150 | if (++first_free_entry >= PIN_MAP_SIZE) |
147 | panic("io_apic.c: whoops"); | 151 | panic("io_apic.c: ran out of irq_2_pin entries!"); |
148 | } | 152 | } |
149 | entry->apic = apic; | 153 | entry->apic = apic; |
150 | entry->pin = pin; | 154 | entry->pin = pin; |
@@ -420,6 +424,7 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin) | |||
420 | best_guess = irq; | 424 | best_guess = irq; |
421 | } | 425 | } |
422 | } | 426 | } |
427 | BUG_ON(best_guess >= NR_IRQS); | ||
423 | return best_guess; | 428 | return best_guess; |
424 | } | 429 | } |
425 | 430 | ||
@@ -610,6 +615,64 @@ static inline int irq_trigger(int idx) | |||
610 | return MPBIOS_trigger(idx); | 615 | return MPBIOS_trigger(idx); |
611 | } | 616 | } |
612 | 617 | ||
618 | static int next_irq = 16; | ||
619 | |||
620 | /* | ||
621 | * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ | ||
622 | * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number | ||
623 | * from ACPI, which can reach 800 in large boxen. | ||
624 | * | ||
625 | * Compact the sparse GSI space into a sequential IRQ series and reuse | ||
626 | * vectors if possible. | ||
627 | */ | ||
628 | int gsi_irq_sharing(int gsi) | ||
629 | { | ||
630 | int i, tries, vector; | ||
631 | |||
632 | BUG_ON(gsi >= NR_IRQ_VECTORS); | ||
633 | |||
634 | if (platform_legacy_irq(gsi)) | ||
635 | return gsi; | ||
636 | |||
637 | if (gsi_2_irq[gsi] != 0xFF) | ||
638 | return (int)gsi_2_irq[gsi]; | ||
639 | |||
640 | tries = NR_IRQS; | ||
641 | try_again: | ||
642 | vector = assign_irq_vector(gsi); | ||
643 | |||
644 | /* | ||
645 | * Sharing vectors means sharing IRQs, so scan irq_vectors for previous | ||
646 | * use of vector and if found, return that IRQ. However, we never want | ||
647 | * to share legacy IRQs, which usually have a different trigger mode | ||
648 | * than PCI. | ||
649 | */ | ||
650 | for (i = 0; i < NR_IRQS; i++) | ||
651 | if (IO_APIC_VECTOR(i) == vector) | ||
652 | break; | ||
653 | if (platform_legacy_irq(i)) { | ||
654 | if (--tries >= 0) { | ||
655 | IO_APIC_VECTOR(i) = 0; | ||
656 | goto try_again; | ||
657 | } | ||
658 | panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi); | ||
659 | } | ||
660 | if (i < NR_IRQS) { | ||
661 | gsi_2_irq[gsi] = i; | ||
662 | printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n", | ||
663 | gsi, vector, i); | ||
664 | return i; | ||
665 | } | ||
666 | |||
667 | i = next_irq++; | ||
668 | BUG_ON(i >= NR_IRQS); | ||
669 | gsi_2_irq[gsi] = i; | ||
670 | IO_APIC_VECTOR(i) = vector; | ||
671 | printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n", | ||
672 | gsi, vector, i); | ||
673 | return i; | ||
674 | } | ||
675 | |||
613 | static int pin_2_irq(int idx, int apic, int pin) | 676 | static int pin_2_irq(int idx, int apic, int pin) |
614 | { | 677 | { |
615 | int irq, i; | 678 | int irq, i; |
@@ -639,6 +702,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
639 | while (i < apic) | 702 | while (i < apic) |
640 | irq += nr_ioapic_registers[i++]; | 703 | irq += nr_ioapic_registers[i++]; |
641 | irq += pin; | 704 | irq += pin; |
705 | irq = gsi_irq_sharing(irq); | ||
642 | break; | 706 | break; |
643 | } | 707 | } |
644 | default: | 708 | default: |
@@ -648,6 +712,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
648 | break; | 712 | break; |
649 | } | 713 | } |
650 | } | 714 | } |
715 | BUG_ON(irq >= NR_IRQS); | ||
651 | 716 | ||
652 | /* | 717 | /* |
653 | * PCI IRQ command line redirection. Yes, limits are hardcoded. | 718 | * PCI IRQ command line redirection. Yes, limits are hardcoded. |
@@ -663,6 +728,7 @@ static int pin_2_irq(int idx, int apic, int pin) | |||
663 | } | 728 | } |
664 | } | 729 | } |
665 | } | 730 | } |
731 | BUG_ON(irq >= NR_IRQS); | ||
666 | return irq; | 732 | return irq; |
667 | } | 733 | } |
668 | 734 | ||
@@ -690,8 +756,8 @@ int assign_irq_vector(int irq) | |||
690 | { | 756 | { |
691 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; | 757 | static int current_vector = FIRST_DEVICE_VECTOR, offset = 0; |
692 | 758 | ||
693 | BUG_ON(irq >= NR_IRQ_VECTORS); | 759 | BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS); |
694 | if (IO_APIC_VECTOR(irq) > 0) | 760 | if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0) |
695 | return IO_APIC_VECTOR(irq); | 761 | return IO_APIC_VECTOR(irq); |
696 | next: | 762 | next: |
697 | current_vector += 8; | 763 | current_vector += 8; |
@@ -699,9 +765,8 @@ next: | |||
699 | goto next; | 765 | goto next; |
700 | 766 | ||
701 | if (current_vector >= FIRST_SYSTEM_VECTOR) { | 767 | if (current_vector >= FIRST_SYSTEM_VECTOR) { |
702 | offset++; | 768 | /* If we run out of vectors on large boxen, must share them. */ |
703 | if (!(offset%8)) | 769 | offset = (offset + 1) % 8; |
704 | return -ENOSPC; | ||
705 | current_vector = FIRST_DEVICE_VECTOR + offset; | 770 | current_vector = FIRST_DEVICE_VECTOR + offset; |
706 | } | 771 | } |
707 | 772 | ||
@@ -1917,6 +1982,7 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a | |||
1917 | entry.polarity = active_high_low; | 1982 | entry.polarity = active_high_low; |
1918 | entry.mask = 1; /* Disabled (masked) */ | 1983 | entry.mask = 1; /* Disabled (masked) */ |
1919 | 1984 | ||
1985 | irq = gsi_irq_sharing(irq); | ||
1920 | /* | 1986 | /* |
1921 | * IRQs < 16 are already in the irq_2_pin[] map | 1987 | * IRQs < 16 are already in the irq_2_pin[] map |
1922 | */ | 1988 | */ |
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c index df08c43276a0..dddeb678b440 100644 --- a/arch/x86_64/kernel/kprobes.c +++ b/arch/x86_64/kernel/kprobes.c | |||
@@ -34,7 +34,6 @@ | |||
34 | #include <linux/config.h> | 34 | #include <linux/config.h> |
35 | #include <linux/kprobes.h> | 35 | #include <linux/kprobes.h> |
36 | #include <linux/ptrace.h> | 36 | #include <linux/ptrace.h> |
37 | #include <linux/spinlock.h> | ||
38 | #include <linux/string.h> | 37 | #include <linux/string.h> |
39 | #include <linux/slab.h> | 38 | #include <linux/slab.h> |
40 | #include <linux/preempt.h> | 39 | #include <linux/preempt.h> |
@@ -44,17 +43,10 @@ | |||
44 | #include <asm/kdebug.h> | 43 | #include <asm/kdebug.h> |
45 | 44 | ||
46 | static DECLARE_MUTEX(kprobe_mutex); | 45 | static DECLARE_MUTEX(kprobe_mutex); |
47 | |||
48 | static struct kprobe *current_kprobe; | ||
49 | static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags; | ||
50 | static struct kprobe *kprobe_prev; | ||
51 | static unsigned long kprobe_status_prev, kprobe_old_rflags_prev, kprobe_saved_rflags_prev; | ||
52 | static struct pt_regs jprobe_saved_regs; | ||
53 | static long *jprobe_saved_rsp; | ||
54 | void jprobe_return_end(void); | 46 | void jprobe_return_end(void); |
55 | 47 | ||
56 | /* copy of the kernel stack at the probe fire time */ | 48 | DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; |
57 | static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE]; | 49 | DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); |
58 | 50 | ||
59 | /* | 51 | /* |
60 | * returns non-zero if opcode modifies the interrupt flag. | 52 | * returns non-zero if opcode modifies the interrupt flag. |
@@ -77,9 +69,9 @@ static inline int is_IF_modifier(kprobe_opcode_t *insn) | |||
77 | int __kprobes arch_prepare_kprobe(struct kprobe *p) | 69 | int __kprobes arch_prepare_kprobe(struct kprobe *p) |
78 | { | 70 | { |
79 | /* insn: must be on special executable page on x86_64. */ | 71 | /* insn: must be on special executable page on x86_64. */ |
80 | up(&kprobe_mutex); | ||
81 | p->ainsn.insn = get_insn_slot(); | ||
82 | down(&kprobe_mutex); | 72 | down(&kprobe_mutex); |
73 | p->ainsn.insn = get_insn_slot(); | ||
74 | up(&kprobe_mutex); | ||
83 | if (!p->ainsn.insn) { | 75 | if (!p->ainsn.insn) { |
84 | return -ENOMEM; | 76 | return -ENOMEM; |
85 | } | 77 | } |
@@ -231,34 +223,35 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p) | |||
231 | 223 | ||
232 | void __kprobes arch_remove_kprobe(struct kprobe *p) | 224 | void __kprobes arch_remove_kprobe(struct kprobe *p) |
233 | { | 225 | { |
234 | up(&kprobe_mutex); | ||
235 | free_insn_slot(p->ainsn.insn); | ||
236 | down(&kprobe_mutex); | 226 | down(&kprobe_mutex); |
227 | free_insn_slot(p->ainsn.insn); | ||
228 | up(&kprobe_mutex); | ||
237 | } | 229 | } |
238 | 230 | ||
239 | static inline void save_previous_kprobe(void) | 231 | static inline void save_previous_kprobe(struct kprobe_ctlblk *kcb) |
240 | { | 232 | { |
241 | kprobe_prev = current_kprobe; | 233 | kcb->prev_kprobe.kp = kprobe_running(); |
242 | kprobe_status_prev = kprobe_status; | 234 | kcb->prev_kprobe.status = kcb->kprobe_status; |
243 | kprobe_old_rflags_prev = kprobe_old_rflags; | 235 | kcb->prev_kprobe.old_rflags = kcb->kprobe_old_rflags; |
244 | kprobe_saved_rflags_prev = kprobe_saved_rflags; | 236 | kcb->prev_kprobe.saved_rflags = kcb->kprobe_saved_rflags; |
245 | } | 237 | } |
246 | 238 | ||
247 | static inline void restore_previous_kprobe(void) | 239 | static inline void restore_previous_kprobe(struct kprobe_ctlblk *kcb) |
248 | { | 240 | { |
249 | current_kprobe = kprobe_prev; | 241 | __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; |
250 | kprobe_status = kprobe_status_prev; | 242 | kcb->kprobe_status = kcb->prev_kprobe.status; |
251 | kprobe_old_rflags = kprobe_old_rflags_prev; | 243 | kcb->kprobe_old_rflags = kcb->prev_kprobe.old_rflags; |
252 | kprobe_saved_rflags = kprobe_saved_rflags_prev; | 244 | kcb->kprobe_saved_rflags = kcb->prev_kprobe.saved_rflags; |
253 | } | 245 | } |
254 | 246 | ||
255 | static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs) | 247 | static inline void set_current_kprobe(struct kprobe *p, struct pt_regs *regs, |
248 | struct kprobe_ctlblk *kcb) | ||
256 | { | 249 | { |
257 | current_kprobe = p; | 250 | __get_cpu_var(current_kprobe) = p; |
258 | kprobe_saved_rflags = kprobe_old_rflags | 251 | kcb->kprobe_saved_rflags = kcb->kprobe_old_rflags |
259 | = (regs->eflags & (TF_MASK | IF_MASK)); | 252 | = (regs->eflags & (TF_MASK | IF_MASK)); |
260 | if (is_IF_modifier(p->ainsn.insn)) | 253 | if (is_IF_modifier(p->ainsn.insn)) |
261 | kprobe_saved_rflags &= ~IF_MASK; | 254 | kcb->kprobe_saved_rflags &= ~IF_MASK; |
262 | } | 255 | } |
263 | 256 | ||
264 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | 257 | static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) |
@@ -272,6 +265,7 @@ static void __kprobes prepare_singlestep(struct kprobe *p, struct pt_regs *regs) | |||
272 | regs->rip = (unsigned long)p->ainsn.insn; | 265 | regs->rip = (unsigned long)p->ainsn.insn; |
273 | } | 266 | } |
274 | 267 | ||
268 | /* Called with kretprobe_lock held */ | ||
275 | void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, | 269 | void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, |
276 | struct pt_regs *regs) | 270 | struct pt_regs *regs) |
277 | { | 271 | { |
@@ -292,32 +286,30 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe *rp, | |||
292 | } | 286 | } |
293 | } | 287 | } |
294 | 288 | ||
295 | /* | ||
296 | * Interrupts are disabled on entry as trap3 is an interrupt gate and they | ||
297 | * remain disabled thorough out this function. | ||
298 | */ | ||
299 | int __kprobes kprobe_handler(struct pt_regs *regs) | 289 | int __kprobes kprobe_handler(struct pt_regs *regs) |
300 | { | 290 | { |
301 | struct kprobe *p; | 291 | struct kprobe *p; |
302 | int ret = 0; | 292 | int ret = 0; |
303 | kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); | 293 | kprobe_opcode_t *addr = (kprobe_opcode_t *)(regs->rip - sizeof(kprobe_opcode_t)); |
294 | struct kprobe_ctlblk *kcb; | ||
304 | 295 | ||
305 | /* We're in an interrupt, but this is clear and BUG()-safe. */ | 296 | /* |
297 | * We don't want to be preempted for the entire | ||
298 | * duration of kprobe processing | ||
299 | */ | ||
306 | preempt_disable(); | 300 | preempt_disable(); |
301 | kcb = get_kprobe_ctlblk(); | ||
307 | 302 | ||
308 | /* Check we're not actually recursing */ | 303 | /* Check we're not actually recursing */ |
309 | if (kprobe_running()) { | 304 | if (kprobe_running()) { |
310 | /* We *are* holding lock here, so this is safe. | ||
311 | Disarm the probe we just hit, and ignore it. */ | ||
312 | p = get_kprobe(addr); | 305 | p = get_kprobe(addr); |
313 | if (p) { | 306 | if (p) { |
314 | if (kprobe_status == KPROBE_HIT_SS && | 307 | if (kcb->kprobe_status == KPROBE_HIT_SS && |
315 | *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { | 308 | *p->ainsn.insn == BREAKPOINT_INSTRUCTION) { |
316 | regs->eflags &= ~TF_MASK; | 309 | regs->eflags &= ~TF_MASK; |
317 | regs->eflags |= kprobe_saved_rflags; | 310 | regs->eflags |= kcb->kprobe_saved_rflags; |
318 | unlock_kprobes(); | ||
319 | goto no_kprobe; | 311 | goto no_kprobe; |
320 | } else if (kprobe_status == KPROBE_HIT_SSDONE) { | 312 | } else if (kcb->kprobe_status == KPROBE_HIT_SSDONE) { |
321 | /* TODO: Provide re-entrancy from | 313 | /* TODO: Provide re-entrancy from |
322 | * post_kprobes_handler() and avoid exception | 314 | * post_kprobes_handler() and avoid exception |
323 | * stack corruption while single-stepping on | 315 | * stack corruption while single-stepping on |
@@ -325,6 +317,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs) | |||
325 | */ | 317 | */ |
326 | arch_disarm_kprobe(p); | 318 | arch_disarm_kprobe(p); |
327 | regs->rip = (unsigned long)p->addr; | 319 | regs->rip = (unsigned long)p->addr; |
320 | reset_current_kprobe(); | ||
328 | ret = 1; | 321 | ret = 1; |
329 | } else { | 322 | } else { |
330 | /* We have reentered the kprobe_handler(), since | 323 | /* We have reentered the kprobe_handler(), since |
@@ -334,27 +327,24 @@ int __kprobes kprobe_handler(struct pt_regs *regs) | |||
334 | * of the new probe without calling any user | 327 | * of the new probe without calling any user |
335 | * handlers. | 328 | * handlers. |
336 | */ | 329 | */ |
337 | save_previous_kprobe(); | 330 | save_previous_kprobe(kcb); |
338 | set_current_kprobe(p, regs); | 331 | set_current_kprobe(p, regs, kcb); |
339 | p->nmissed++; | 332 | p->nmissed++; |
340 | prepare_singlestep(p, regs); | 333 | prepare_singlestep(p, regs); |
341 | kprobe_status = KPROBE_REENTER; | 334 | kcb->kprobe_status = KPROBE_REENTER; |
342 | return 1; | 335 | return 1; |
343 | } | 336 | } |
344 | } else { | 337 | } else { |
345 | p = current_kprobe; | 338 | p = __get_cpu_var(current_kprobe); |
346 | if (p->break_handler && p->break_handler(p, regs)) { | 339 | if (p->break_handler && p->break_handler(p, regs)) { |
347 | goto ss_probe; | 340 | goto ss_probe; |
348 | } | 341 | } |
349 | } | 342 | } |
350 | /* If it's not ours, can't be delete race, (we hold lock). */ | ||
351 | goto no_kprobe; | 343 | goto no_kprobe; |
352 | } | 344 | } |
353 | 345 | ||
354 | lock_kprobes(); | ||
355 | p = get_kprobe(addr); | 346 | p = get_kprobe(addr); |
356 | if (!p) { | 347 | if (!p) { |
357 | unlock_kprobes(); | ||
358 | if (*addr != BREAKPOINT_INSTRUCTION) { | 348 | if (*addr != BREAKPOINT_INSTRUCTION) { |
359 | /* | 349 | /* |
360 | * The breakpoint instruction was removed right | 350 | * The breakpoint instruction was removed right |
@@ -372,8 +362,8 @@ int __kprobes kprobe_handler(struct pt_regs *regs) | |||
372 | goto no_kprobe; | 362 | goto no_kprobe; |
373 | } | 363 | } |
374 | 364 | ||
375 | kprobe_status = KPROBE_HIT_ACTIVE; | 365 | set_current_kprobe(p, regs, kcb); |
376 | set_current_kprobe(p, regs); | 366 | kcb->kprobe_status = KPROBE_HIT_ACTIVE; |
377 | 367 | ||
378 | if (p->pre_handler && p->pre_handler(p, regs)) | 368 | if (p->pre_handler && p->pre_handler(p, regs)) |
379 | /* handler has already set things up, so skip ss setup */ | 369 | /* handler has already set things up, so skip ss setup */ |
@@ -381,7 +371,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs) | |||
381 | 371 | ||
382 | ss_probe: | 372 | ss_probe: |
383 | prepare_singlestep(p, regs); | 373 | prepare_singlestep(p, regs); |
384 | kprobe_status = KPROBE_HIT_SS; | 374 | kcb->kprobe_status = KPROBE_HIT_SS; |
385 | return 1; | 375 | return 1; |
386 | 376 | ||
387 | no_kprobe: | 377 | no_kprobe: |
@@ -409,9 +399,10 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) | |||
409 | struct kretprobe_instance *ri = NULL; | 399 | struct kretprobe_instance *ri = NULL; |
410 | struct hlist_head *head; | 400 | struct hlist_head *head; |
411 | struct hlist_node *node, *tmp; | 401 | struct hlist_node *node, *tmp; |
412 | unsigned long orig_ret_address = 0; | 402 | unsigned long flags, orig_ret_address = 0; |
413 | unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; | 403 | unsigned long trampoline_address =(unsigned long)&kretprobe_trampoline; |
414 | 404 | ||
405 | spin_lock_irqsave(&kretprobe_lock, flags); | ||
415 | head = kretprobe_inst_table_head(current); | 406 | head = kretprobe_inst_table_head(current); |
416 | 407 | ||
417 | /* | 408 | /* |
@@ -450,13 +441,14 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) | |||
450 | BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); | 441 | BUG_ON(!orig_ret_address || (orig_ret_address == trampoline_address)); |
451 | regs->rip = orig_ret_address; | 442 | regs->rip = orig_ret_address; |
452 | 443 | ||
453 | unlock_kprobes(); | 444 | reset_current_kprobe(); |
445 | spin_unlock_irqrestore(&kretprobe_lock, flags); | ||
454 | preempt_enable_no_resched(); | 446 | preempt_enable_no_resched(); |
455 | 447 | ||
456 | /* | 448 | /* |
457 | * By returning a non-zero value, we are telling | 449 | * By returning a non-zero value, we are telling |
458 | * kprobe_handler() that we have handled unlocking | 450 | * kprobe_handler() that we don't want the post_handler |
459 | * and re-enabling preemption. | 451 | * to run (and have re-enabled preemption) |
460 | */ | 452 | */ |
461 | return 1; | 453 | return 1; |
462 | } | 454 | } |
@@ -483,7 +475,8 @@ int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) | |||
483 | * that is atop the stack is the address following the copied instruction. | 475 | * that is atop the stack is the address following the copied instruction. |
484 | * We need to make it the address following the original instruction. | 476 | * We need to make it the address following the original instruction. |
485 | */ | 477 | */ |
486 | static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) | 478 | static void __kprobes resume_execution(struct kprobe *p, |
479 | struct pt_regs *regs, struct kprobe_ctlblk *kcb) | ||
487 | { | 480 | { |
488 | unsigned long *tos = (unsigned long *)regs->rsp; | 481 | unsigned long *tos = (unsigned long *)regs->rsp; |
489 | unsigned long next_rip = 0; | 482 | unsigned long next_rip = 0; |
@@ -498,7 +491,7 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) | |||
498 | switch (*insn) { | 491 | switch (*insn) { |
499 | case 0x9c: /* pushfl */ | 492 | case 0x9c: /* pushfl */ |
500 | *tos &= ~(TF_MASK | IF_MASK); | 493 | *tos &= ~(TF_MASK | IF_MASK); |
501 | *tos |= kprobe_old_rflags; | 494 | *tos |= kcb->kprobe_old_rflags; |
502 | break; | 495 | break; |
503 | case 0xc3: /* ret/lret */ | 496 | case 0xc3: /* ret/lret */ |
504 | case 0xcb: | 497 | case 0xcb: |
@@ -537,30 +530,28 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) | |||
537 | } | 530 | } |
538 | } | 531 | } |
539 | 532 | ||
540 | /* | ||
541 | * Interrupts are disabled on entry as trap1 is an interrupt gate and they | ||
542 | * remain disabled thoroughout this function. And we hold kprobe lock. | ||
543 | */ | ||
544 | int __kprobes post_kprobe_handler(struct pt_regs *regs) | 533 | int __kprobes post_kprobe_handler(struct pt_regs *regs) |
545 | { | 534 | { |
546 | if (!kprobe_running()) | 535 | struct kprobe *cur = kprobe_running(); |
536 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
537 | |||
538 | if (!cur) | ||
547 | return 0; | 539 | return 0; |
548 | 540 | ||
549 | if ((kprobe_status != KPROBE_REENTER) && current_kprobe->post_handler) { | 541 | if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { |
550 | kprobe_status = KPROBE_HIT_SSDONE; | 542 | kcb->kprobe_status = KPROBE_HIT_SSDONE; |
551 | current_kprobe->post_handler(current_kprobe, regs, 0); | 543 | cur->post_handler(cur, regs, 0); |
552 | } | 544 | } |
553 | 545 | ||
554 | resume_execution(current_kprobe, regs); | 546 | resume_execution(cur, regs, kcb); |
555 | regs->eflags |= kprobe_saved_rflags; | 547 | regs->eflags |= kcb->kprobe_saved_rflags; |
556 | 548 | ||
557 | /* Restore the original saved kprobes variables and continue. */ | 549 | /* Restore the original saved kprobes variables and continue. */ |
558 | if (kprobe_status == KPROBE_REENTER) { | 550 | if (kcb->kprobe_status == KPROBE_REENTER) { |
559 | restore_previous_kprobe(); | 551 | restore_previous_kprobe(kcb); |
560 | goto out; | 552 | goto out; |
561 | } else { | ||
562 | unlock_kprobes(); | ||
563 | } | 553 | } |
554 | reset_current_kprobe(); | ||
564 | out: | 555 | out: |
565 | preempt_enable_no_resched(); | 556 | preempt_enable_no_resched(); |
566 | 557 | ||
@@ -575,18 +566,19 @@ out: | |||
575 | return 1; | 566 | return 1; |
576 | } | 567 | } |
577 | 568 | ||
578 | /* Interrupts disabled, kprobe_lock held. */ | ||
579 | int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) | 569 | int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) |
580 | { | 570 | { |
581 | if (current_kprobe->fault_handler | 571 | struct kprobe *cur = kprobe_running(); |
582 | && current_kprobe->fault_handler(current_kprobe, regs, trapnr)) | 572 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
573 | |||
574 | if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr)) | ||
583 | return 1; | 575 | return 1; |
584 | 576 | ||
585 | if (kprobe_status & KPROBE_HIT_SS) { | 577 | if (kcb->kprobe_status & KPROBE_HIT_SS) { |
586 | resume_execution(current_kprobe, regs); | 578 | resume_execution(cur, regs, kcb); |
587 | regs->eflags |= kprobe_old_rflags; | 579 | regs->eflags |= kcb->kprobe_old_rflags; |
588 | 580 | ||
589 | unlock_kprobes(); | 581 | reset_current_kprobe(); |
590 | preempt_enable_no_resched(); | 582 | preempt_enable_no_resched(); |
591 | } | 583 | } |
592 | return 0; | 584 | return 0; |
@@ -599,39 +591,41 @@ int __kprobes kprobe_exceptions_notify(struct notifier_block *self, | |||
599 | unsigned long val, void *data) | 591 | unsigned long val, void *data) |
600 | { | 592 | { |
601 | struct die_args *args = (struct die_args *)data; | 593 | struct die_args *args = (struct die_args *)data; |
594 | int ret = NOTIFY_DONE; | ||
595 | |||
602 | switch (val) { | 596 | switch (val) { |
603 | case DIE_INT3: | 597 | case DIE_INT3: |
604 | if (kprobe_handler(args->regs)) | 598 | if (kprobe_handler(args->regs)) |
605 | return NOTIFY_STOP; | 599 | ret = NOTIFY_STOP; |
606 | break; | 600 | break; |
607 | case DIE_DEBUG: | 601 | case DIE_DEBUG: |
608 | if (post_kprobe_handler(args->regs)) | 602 | if (post_kprobe_handler(args->regs)) |
609 | return NOTIFY_STOP; | 603 | ret = NOTIFY_STOP; |
610 | break; | 604 | break; |
611 | case DIE_GPF: | 605 | case DIE_GPF: |
612 | if (kprobe_running() && | ||
613 | kprobe_fault_handler(args->regs, args->trapnr)) | ||
614 | return NOTIFY_STOP; | ||
615 | break; | ||
616 | case DIE_PAGE_FAULT: | 606 | case DIE_PAGE_FAULT: |
607 | /* kprobe_running() needs smp_processor_id() */ | ||
608 | preempt_disable(); | ||
617 | if (kprobe_running() && | 609 | if (kprobe_running() && |
618 | kprobe_fault_handler(args->regs, args->trapnr)) | 610 | kprobe_fault_handler(args->regs, args->trapnr)) |
619 | return NOTIFY_STOP; | 611 | ret = NOTIFY_STOP; |
612 | preempt_enable(); | ||
620 | break; | 613 | break; |
621 | default: | 614 | default: |
622 | break; | 615 | break; |
623 | } | 616 | } |
624 | return NOTIFY_DONE; | 617 | return ret; |
625 | } | 618 | } |
626 | 619 | ||
627 | int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | 620 | int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) |
628 | { | 621 | { |
629 | struct jprobe *jp = container_of(p, struct jprobe, kp); | 622 | struct jprobe *jp = container_of(p, struct jprobe, kp); |
630 | unsigned long addr; | 623 | unsigned long addr; |
624 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
631 | 625 | ||
632 | jprobe_saved_regs = *regs; | 626 | kcb->jprobe_saved_regs = *regs; |
633 | jprobe_saved_rsp = (long *) regs->rsp; | 627 | kcb->jprobe_saved_rsp = (long *) regs->rsp; |
634 | addr = (unsigned long)jprobe_saved_rsp; | 628 | addr = (unsigned long)(kcb->jprobe_saved_rsp); |
635 | /* | 629 | /* |
636 | * As Linus pointed out, gcc assumes that the callee | 630 | * As Linus pointed out, gcc assumes that the callee |
637 | * owns the argument space and could overwrite it, e.g. | 631 | * owns the argument space and could overwrite it, e.g. |
@@ -639,7 +633,8 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
639 | * we also save and restore enough stack bytes to cover | 633 | * we also save and restore enough stack bytes to cover |
640 | * the argument area. | 634 | * the argument area. |
641 | */ | 635 | */ |
642 | memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr)); | 636 | memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, |
637 | MIN_STACK_SIZE(addr)); | ||
643 | regs->eflags &= ~IF_MASK; | 638 | regs->eflags &= ~IF_MASK; |
644 | regs->rip = (unsigned long)(jp->entry); | 639 | regs->rip = (unsigned long)(jp->entry); |
645 | return 1; | 640 | return 1; |
@@ -647,36 +642,40 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) | |||
647 | 642 | ||
648 | void __kprobes jprobe_return(void) | 643 | void __kprobes jprobe_return(void) |
649 | { | 644 | { |
650 | preempt_enable_no_resched(); | 645 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); |
646 | |||
651 | asm volatile (" xchg %%rbx,%%rsp \n" | 647 | asm volatile (" xchg %%rbx,%%rsp \n" |
652 | " int3 \n" | 648 | " int3 \n" |
653 | " .globl jprobe_return_end \n" | 649 | " .globl jprobe_return_end \n" |
654 | " jprobe_return_end: \n" | 650 | " jprobe_return_end: \n" |
655 | " nop \n"::"b" | 651 | " nop \n"::"b" |
656 | (jprobe_saved_rsp):"memory"); | 652 | (kcb->jprobe_saved_rsp):"memory"); |
657 | } | 653 | } |
658 | 654 | ||
659 | int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) | 655 | int __kprobes longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) |
660 | { | 656 | { |
657 | struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); | ||
661 | u8 *addr = (u8 *) (regs->rip - 1); | 658 | u8 *addr = (u8 *) (regs->rip - 1); |
662 | unsigned long stack_addr = (unsigned long)jprobe_saved_rsp; | 659 | unsigned long stack_addr = (unsigned long)(kcb->jprobe_saved_rsp); |
663 | struct jprobe *jp = container_of(p, struct jprobe, kp); | 660 | struct jprobe *jp = container_of(p, struct jprobe, kp); |
664 | 661 | ||
665 | if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { | 662 | if ((addr > (u8 *) jprobe_return) && (addr < (u8 *) jprobe_return_end)) { |
666 | if ((long *)regs->rsp != jprobe_saved_rsp) { | 663 | if ((long *)regs->rsp != kcb->jprobe_saved_rsp) { |
667 | struct pt_regs *saved_regs = | 664 | struct pt_regs *saved_regs = |
668 | container_of(jprobe_saved_rsp, struct pt_regs, rsp); | 665 | container_of(kcb->jprobe_saved_rsp, |
666 | struct pt_regs, rsp); | ||
669 | printk("current rsp %p does not match saved rsp %p\n", | 667 | printk("current rsp %p does not match saved rsp %p\n", |
670 | (long *)regs->rsp, jprobe_saved_rsp); | 668 | (long *)regs->rsp, kcb->jprobe_saved_rsp); |
671 | printk("Saved registers for jprobe %p\n", jp); | 669 | printk("Saved registers for jprobe %p\n", jp); |
672 | show_registers(saved_regs); | 670 | show_registers(saved_regs); |
673 | printk("Current registers\n"); | 671 | printk("Current registers\n"); |
674 | show_registers(regs); | 672 | show_registers(regs); |
675 | BUG(); | 673 | BUG(); |
676 | } | 674 | } |
677 | *regs = jprobe_saved_regs; | 675 | *regs = kcb->jprobe_saved_regs; |
678 | memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack, | 676 | memcpy((kprobe_opcode_t *) stack_addr, kcb->jprobes_stack, |
679 | MIN_STACK_SIZE(stack_addr)); | 677 | MIN_STACK_SIZE(stack_addr)); |
678 | preempt_enable_no_resched(); | ||
680 | return 1; | 679 | return 1; |
681 | } | 680 | } |
682 | return 0; | 681 | return 0; |
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 08203b07f4bd..183dc6105429 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c | |||
@@ -37,7 +37,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; | |||
37 | static unsigned long console_logged; | 37 | static unsigned long console_logged; |
38 | static int notify_user; | 38 | static int notify_user; |
39 | static int rip_msr; | 39 | static int rip_msr; |
40 | static int mce_bootlog; | 40 | static int mce_bootlog = 1; |
41 | 41 | ||
42 | /* | 42 | /* |
43 | * Lockless MCE logging infrastructure. | 43 | * Lockless MCE logging infrastructure. |
@@ -54,9 +54,12 @@ void mce_log(struct mce *mce) | |||
54 | { | 54 | { |
55 | unsigned next, entry; | 55 | unsigned next, entry; |
56 | mce->finished = 0; | 56 | mce->finished = 0; |
57 | smp_wmb(); | 57 | wmb(); |
58 | for (;;) { | 58 | for (;;) { |
59 | entry = rcu_dereference(mcelog.next); | 59 | entry = rcu_dereference(mcelog.next); |
60 | /* The rmb forces the compiler to reload next in each | ||
61 | iteration */ | ||
62 | rmb(); | ||
60 | for (;;) { | 63 | for (;;) { |
61 | /* When the buffer fills up discard new entries. Assume | 64 | /* When the buffer fills up discard new entries. Assume |
62 | that the earlier errors are the more interesting. */ | 65 | that the earlier errors are the more interesting. */ |
@@ -69,6 +72,7 @@ void mce_log(struct mce *mce) | |||
69 | entry++; | 72 | entry++; |
70 | continue; | 73 | continue; |
71 | } | 74 | } |
75 | break; | ||
72 | } | 76 | } |
73 | smp_rmb(); | 77 | smp_rmb(); |
74 | next = entry + 1; | 78 | next = entry + 1; |
@@ -76,9 +80,9 @@ void mce_log(struct mce *mce) | |||
76 | break; | 80 | break; |
77 | } | 81 | } |
78 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); | 82 | memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); |
79 | smp_wmb(); | 83 | wmb(); |
80 | mcelog.entry[entry].finished = 1; | 84 | mcelog.entry[entry].finished = 1; |
81 | smp_wmb(); | 85 | wmb(); |
82 | 86 | ||
83 | if (!test_and_set_bit(0, &console_logged)) | 87 | if (!test_and_set_bit(0, &console_logged)) |
84 | notify_user = 1; | 88 | notify_user = 1; |
@@ -343,7 +347,11 @@ static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
343 | /* disable GART TBL walk error reporting, which trips off | 347 | /* disable GART TBL walk error reporting, which trips off |
344 | incorrectly with the IOMMU & 3ware & Cerberus. */ | 348 | incorrectly with the IOMMU & 3ware & Cerberus. */ |
345 | clear_bit(10, &bank[4]); | 349 | clear_bit(10, &bank[4]); |
350 | /* Lots of broken BIOS around that don't clear them | ||
351 | by default and leave crap in there. Don't log. */ | ||
352 | mce_bootlog = 0; | ||
346 | } | 353 | } |
354 | |||
347 | } | 355 | } |
348 | 356 | ||
349 | static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | 357 | static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) |
@@ -352,6 +360,9 @@ static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c) | |||
352 | case X86_VENDOR_INTEL: | 360 | case X86_VENDOR_INTEL: |
353 | mce_intel_feature_init(c); | 361 | mce_intel_feature_init(c); |
354 | break; | 362 | break; |
363 | case X86_VENDOR_AMD: | ||
364 | mce_amd_feature_init(c); | ||
365 | break; | ||
355 | default: | 366 | default: |
356 | break; | 367 | break; |
357 | } | 368 | } |
@@ -491,16 +502,16 @@ static int __init mcheck_disable(char *str) | |||
491 | /* mce=off disables machine check. Note you can reenable it later | 502 | /* mce=off disables machine check. Note you can reenable it later |
492 | using sysfs. | 503 | using sysfs. |
493 | mce=TOLERANCELEVEL (number, see above) | 504 | mce=TOLERANCELEVEL (number, see above) |
494 | mce=bootlog Log MCEs from before booting. Disabled by default to work | 505 | mce=bootlog Log MCEs from before booting. Disabled by default on AMD. |
495 | around buggy BIOS that leave bogus MCEs. */ | 506 | mce=nobootlog Don't log MCEs from before booting. */ |
496 | static int __init mcheck_enable(char *str) | 507 | static int __init mcheck_enable(char *str) |
497 | { | 508 | { |
498 | if (*str == '=') | 509 | if (*str == '=') |
499 | str++; | 510 | str++; |
500 | if (!strcmp(str, "off")) | 511 | if (!strcmp(str, "off")) |
501 | mce_dont_init = 1; | 512 | mce_dont_init = 1; |
502 | else if (!strcmp(str, "bootlog")) | 513 | else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog")) |
503 | mce_bootlog = 1; | 514 | mce_bootlog = str[0] == 'b'; |
504 | else if (isdigit(str[0])) | 515 | else if (isdigit(str[0])) |
505 | get_option(&str, &tolerant); | 516 | get_option(&str, &tolerant); |
506 | else | 517 | else |
diff --git a/arch/x86_64/kernel/mce_amd.c b/arch/x86_64/kernel/mce_amd.c new file mode 100644 index 000000000000..1f76175ace02 --- /dev/null +++ b/arch/x86_64/kernel/mce_amd.c | |||
@@ -0,0 +1,538 @@ | |||
1 | /* | ||
2 | * (c) 2005 Advanced Micro Devices, Inc. | ||
3 | * Your use of this code is subject to the terms and conditions of the | ||
4 | * GNU general public license version 2. See "COPYING" or | ||
5 | * http://www.gnu.org/licenses/gpl.html | ||
6 | * | ||
7 | * Written by Jacob Shin - AMD, Inc. | ||
8 | * | ||
9 | * Support : jacob.shin@amd.com | ||
10 | * | ||
11 | * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F. | ||
12 | * MC4_MISC0 exists per physical processor. | ||
13 | * | ||
14 | */ | ||
15 | |||
16 | #include <linux/cpu.h> | ||
17 | #include <linux/errno.h> | ||
18 | #include <linux/init.h> | ||
19 | #include <linux/interrupt.h> | ||
20 | #include <linux/kobject.h> | ||
21 | #include <linux/notifier.h> | ||
22 | #include <linux/sched.h> | ||
23 | #include <linux/smp.h> | ||
24 | #include <linux/sysdev.h> | ||
25 | #include <linux/sysfs.h> | ||
26 | #include <asm/apic.h> | ||
27 | #include <asm/mce.h> | ||
28 | #include <asm/msr.h> | ||
29 | #include <asm/percpu.h> | ||
30 | |||
31 | #define PFX "mce_threshold: " | ||
32 | #define VERSION "version 1.00.9" | ||
33 | #define NR_BANKS 5 | ||
34 | #define THRESHOLD_MAX 0xFFF | ||
35 | #define INT_TYPE_APIC 0x00020000 | ||
36 | #define MASK_VALID_HI 0x80000000 | ||
37 | #define MASK_LVTOFF_HI 0x00F00000 | ||
38 | #define MASK_COUNT_EN_HI 0x00080000 | ||
39 | #define MASK_INT_TYPE_HI 0x00060000 | ||
40 | #define MASK_OVERFLOW_HI 0x00010000 | ||
41 | #define MASK_ERR_COUNT_HI 0x00000FFF | ||
42 | #define MASK_OVERFLOW 0x0001000000000000L | ||
43 | |||
44 | struct threshold_bank { | ||
45 | unsigned int cpu; | ||
46 | u8 bank; | ||
47 | u8 interrupt_enable; | ||
48 | u16 threshold_limit; | ||
49 | struct kobject kobj; | ||
50 | }; | ||
51 | |||
52 | static struct threshold_bank threshold_defaults = { | ||
53 | .interrupt_enable = 0, | ||
54 | .threshold_limit = THRESHOLD_MAX, | ||
55 | }; | ||
56 | |||
57 | #ifdef CONFIG_SMP | ||
58 | static unsigned char shared_bank[NR_BANKS] = { | ||
59 | 0, 0, 0, 0, 1 | ||
60 | }; | ||
61 | #endif | ||
62 | |||
63 | static DEFINE_PER_CPU(unsigned char, bank_map); /* see which banks are on */ | ||
64 | |||
65 | /* | ||
66 | * CPU Initialization | ||
67 | */ | ||
68 | |||
69 | /* must be called with correct cpu affinity */ | ||
70 | static void threshold_restart_bank(struct threshold_bank *b, | ||
71 | int reset, u16 old_limit) | ||
72 | { | ||
73 | u32 mci_misc_hi, mci_misc_lo; | ||
74 | |||
75 | rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); | ||
76 | |||
77 | if (b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) | ||
78 | reset = 1; /* limit cannot be lower than err count */ | ||
79 | |||
80 | if (reset) { /* reset err count and overflow bit */ | ||
81 | mci_misc_hi = | ||
82 | (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | | ||
83 | (THRESHOLD_MAX - b->threshold_limit); | ||
84 | } else if (old_limit) { /* change limit w/o reset */ | ||
85 | int new_count = (mci_misc_hi & THRESHOLD_MAX) + | ||
86 | (old_limit - b->threshold_limit); | ||
87 | mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | | ||
88 | (new_count & THRESHOLD_MAX); | ||
89 | } | ||
90 | |||
91 | b->interrupt_enable ? | ||
92 | (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : | ||
93 | (mci_misc_hi &= ~MASK_INT_TYPE_HI); | ||
94 | |||
95 | mci_misc_hi |= MASK_COUNT_EN_HI; | ||
96 | wrmsr(MSR_IA32_MC0_MISC + b->bank * 4, mci_misc_lo, mci_misc_hi); | ||
97 | } | ||
98 | |||
99 | void __cpuinit mce_amd_feature_init(struct cpuinfo_x86 *c) | ||
100 | { | ||
101 | int bank; | ||
102 | u32 mci_misc_lo, mci_misc_hi; | ||
103 | unsigned int cpu = smp_processor_id(); | ||
104 | |||
105 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
106 | rdmsr(MSR_IA32_MC0_MISC + bank * 4, mci_misc_lo, mci_misc_hi); | ||
107 | |||
108 | /* !valid, !counter present, bios locked */ | ||
109 | if (!(mci_misc_hi & MASK_VALID_HI) || | ||
110 | !(mci_misc_hi & MASK_VALID_HI >> 1) || | ||
111 | (mci_misc_hi & MASK_VALID_HI >> 2)) | ||
112 | continue; | ||
113 | |||
114 | per_cpu(bank_map, cpu) |= (1 << bank); | ||
115 | |||
116 | #ifdef CONFIG_SMP | ||
117 | if (shared_bank[bank] && cpu_core_id[cpu]) | ||
118 | continue; | ||
119 | #endif | ||
120 | |||
121 | setup_threshold_lvt((mci_misc_hi & MASK_LVTOFF_HI) >> 20); | ||
122 | threshold_defaults.cpu = cpu; | ||
123 | threshold_defaults.bank = bank; | ||
124 | threshold_restart_bank(&threshold_defaults, 0, 0); | ||
125 | } | ||
126 | } | ||
127 | |||
128 | /* | ||
129 | * APIC Interrupt Handler | ||
130 | */ | ||
131 | |||
132 | /* | ||
133 | * threshold interrupt handler will service THRESHOLD_APIC_VECTOR. | ||
134 | * the interrupt goes off when error_count reaches threshold_limit. | ||
135 | * the handler will simply log mcelog w/ software defined bank number. | ||
136 | */ | ||
137 | asmlinkage void mce_threshold_interrupt(void) | ||
138 | { | ||
139 | int bank; | ||
140 | struct mce m; | ||
141 | |||
142 | ack_APIC_irq(); | ||
143 | irq_enter(); | ||
144 | |||
145 | memset(&m, 0, sizeof(m)); | ||
146 | rdtscll(m.tsc); | ||
147 | m.cpu = smp_processor_id(); | ||
148 | |||
149 | /* assume first bank caused it */ | ||
150 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
151 | m.bank = MCE_THRESHOLD_BASE + bank; | ||
152 | rdmsrl(MSR_IA32_MC0_MISC + bank * 4, m.misc); | ||
153 | |||
154 | if (m.misc & MASK_OVERFLOW) { | ||
155 | mce_log(&m); | ||
156 | goto out; | ||
157 | } | ||
158 | } | ||
159 | out: | ||
160 | irq_exit(); | ||
161 | } | ||
162 | |||
163 | /* | ||
164 | * Sysfs Interface | ||
165 | */ | ||
166 | |||
167 | static struct sysdev_class threshold_sysclass = { | ||
168 | set_kset_name("threshold"), | ||
169 | }; | ||
170 | |||
171 | static DEFINE_PER_CPU(struct sys_device, device_threshold); | ||
172 | |||
173 | struct threshold_attr { | ||
174 | struct attribute attr; | ||
175 | ssize_t(*show) (struct threshold_bank *, char *); | ||
176 | ssize_t(*store) (struct threshold_bank *, const char *, size_t count); | ||
177 | }; | ||
178 | |||
179 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | ||
180 | |||
181 | static cpumask_t affinity_set(unsigned int cpu) | ||
182 | { | ||
183 | cpumask_t oldmask = current->cpus_allowed; | ||
184 | cpumask_t newmask = CPU_MASK_NONE; | ||
185 | cpu_set(cpu, newmask); | ||
186 | set_cpus_allowed(current, newmask); | ||
187 | return oldmask; | ||
188 | } | ||
189 | |||
190 | static void affinity_restore(cpumask_t oldmask) | ||
191 | { | ||
192 | set_cpus_allowed(current, oldmask); | ||
193 | } | ||
194 | |||
195 | #define SHOW_FIELDS(name) \ | ||
196 | static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \ | ||
197 | { \ | ||
198 | return sprintf(buf, "%lx\n", (unsigned long) b->name); \ | ||
199 | } | ||
200 | SHOW_FIELDS(interrupt_enable) | ||
201 | SHOW_FIELDS(threshold_limit) | ||
202 | |||
203 | static ssize_t store_interrupt_enable(struct threshold_bank *b, | ||
204 | const char *buf, size_t count) | ||
205 | { | ||
206 | char *end; | ||
207 | cpumask_t oldmask; | ||
208 | unsigned long new = simple_strtoul(buf, &end, 0); | ||
209 | if (end == buf) | ||
210 | return -EINVAL; | ||
211 | b->interrupt_enable = !!new; | ||
212 | |||
213 | oldmask = affinity_set(b->cpu); | ||
214 | threshold_restart_bank(b, 0, 0); | ||
215 | affinity_restore(oldmask); | ||
216 | |||
217 | return end - buf; | ||
218 | } | ||
219 | |||
220 | static ssize_t store_threshold_limit(struct threshold_bank *b, | ||
221 | const char *buf, size_t count) | ||
222 | { | ||
223 | char *end; | ||
224 | cpumask_t oldmask; | ||
225 | u16 old; | ||
226 | unsigned long new = simple_strtoul(buf, &end, 0); | ||
227 | if (end == buf) | ||
228 | return -EINVAL; | ||
229 | if (new > THRESHOLD_MAX) | ||
230 | new = THRESHOLD_MAX; | ||
231 | if (new < 1) | ||
232 | new = 1; | ||
233 | old = b->threshold_limit; | ||
234 | b->threshold_limit = new; | ||
235 | |||
236 | oldmask = affinity_set(b->cpu); | ||
237 | threshold_restart_bank(b, 0, old); | ||
238 | affinity_restore(oldmask); | ||
239 | |||
240 | return end - buf; | ||
241 | } | ||
242 | |||
243 | static ssize_t show_error_count(struct threshold_bank *b, char *buf) | ||
244 | { | ||
245 | u32 high, low; | ||
246 | cpumask_t oldmask; | ||
247 | oldmask = affinity_set(b->cpu); | ||
248 | rdmsr(MSR_IA32_MC0_MISC + b->bank * 4, low, high); /* ignore low 32 */ | ||
249 | affinity_restore(oldmask); | ||
250 | return sprintf(buf, "%x\n", | ||
251 | (high & 0xFFF) - (THRESHOLD_MAX - b->threshold_limit)); | ||
252 | } | ||
253 | |||
254 | static ssize_t store_error_count(struct threshold_bank *b, | ||
255 | const char *buf, size_t count) | ||
256 | { | ||
257 | cpumask_t oldmask; | ||
258 | oldmask = affinity_set(b->cpu); | ||
259 | threshold_restart_bank(b, 1, 0); | ||
260 | affinity_restore(oldmask); | ||
261 | return 1; | ||
262 | } | ||
263 | |||
264 | #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \ | ||
265 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | ||
266 | .show = _show, \ | ||
267 | .store = _store, \ | ||
268 | }; | ||
269 | |||
270 | #define ATTR_FIELDS(name) \ | ||
271 | static struct threshold_attr name = \ | ||
272 | THRESHOLD_ATTR(name, 0644, show_## name, store_## name) | ||
273 | |||
274 | ATTR_FIELDS(interrupt_enable); | ||
275 | ATTR_FIELDS(threshold_limit); | ||
276 | ATTR_FIELDS(error_count); | ||
277 | |||
278 | static struct attribute *default_attrs[] = { | ||
279 | &interrupt_enable.attr, | ||
280 | &threshold_limit.attr, | ||
281 | &error_count.attr, | ||
282 | NULL | ||
283 | }; | ||
284 | |||
285 | #define to_bank(k) container_of(k,struct threshold_bank,kobj) | ||
286 | #define to_attr(a) container_of(a,struct threshold_attr,attr) | ||
287 | |||
288 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) | ||
289 | { | ||
290 | struct threshold_bank *b = to_bank(kobj); | ||
291 | struct threshold_attr *a = to_attr(attr); | ||
292 | ssize_t ret; | ||
293 | ret = a->show ? a->show(b, buf) : -EIO; | ||
294 | return ret; | ||
295 | } | ||
296 | |||
297 | static ssize_t store(struct kobject *kobj, struct attribute *attr, | ||
298 | const char *buf, size_t count) | ||
299 | { | ||
300 | struct threshold_bank *b = to_bank(kobj); | ||
301 | struct threshold_attr *a = to_attr(attr); | ||
302 | ssize_t ret; | ||
303 | ret = a->store ? a->store(b, buf, count) : -EIO; | ||
304 | return ret; | ||
305 | } | ||
306 | |||
307 | static struct sysfs_ops threshold_ops = { | ||
308 | .show = show, | ||
309 | .store = store, | ||
310 | }; | ||
311 | |||
312 | static struct kobj_type threshold_ktype = { | ||
313 | .sysfs_ops = &threshold_ops, | ||
314 | .default_attrs = default_attrs, | ||
315 | }; | ||
316 | |||
317 | /* symlinks sibling shared banks to first core. first core owns dir/files. */ | ||
318 | static __cpuinit int threshold_create_bank(unsigned int cpu, int bank) | ||
319 | { | ||
320 | int err = 0; | ||
321 | struct threshold_bank *b = 0; | ||
322 | |||
323 | #ifdef CONFIG_SMP | ||
324 | if (cpu_core_id[cpu] && shared_bank[bank]) { /* symlink */ | ||
325 | char name[16]; | ||
326 | unsigned lcpu = first_cpu(cpu_core_map[cpu]); | ||
327 | if (cpu_core_id[lcpu]) | ||
328 | goto out; /* first core not up yet */ | ||
329 | |||
330 | b = per_cpu(threshold_banks, lcpu)[bank]; | ||
331 | if (!b) | ||
332 | goto out; | ||
333 | sprintf(name, "bank%i", bank); | ||
334 | err = sysfs_create_link(&per_cpu(device_threshold, cpu).kobj, | ||
335 | &b->kobj, name); | ||
336 | if (err) | ||
337 | goto out; | ||
338 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
339 | goto out; | ||
340 | } | ||
341 | #endif | ||
342 | |||
343 | b = kmalloc(sizeof(struct threshold_bank), GFP_KERNEL); | ||
344 | if (!b) { | ||
345 | err = -ENOMEM; | ||
346 | goto out; | ||
347 | } | ||
348 | memset(b, 0, sizeof(struct threshold_bank)); | ||
349 | |||
350 | b->cpu = cpu; | ||
351 | b->bank = bank; | ||
352 | b->interrupt_enable = 0; | ||
353 | b->threshold_limit = THRESHOLD_MAX; | ||
354 | kobject_set_name(&b->kobj, "bank%i", bank); | ||
355 | b->kobj.parent = &per_cpu(device_threshold, cpu).kobj; | ||
356 | b->kobj.ktype = &threshold_ktype; | ||
357 | |||
358 | err = kobject_register(&b->kobj); | ||
359 | if (err) { | ||
360 | kfree(b); | ||
361 | goto out; | ||
362 | } | ||
363 | per_cpu(threshold_banks, cpu)[bank] = b; | ||
364 | out: | ||
365 | return err; | ||
366 | } | ||
367 | |||
368 | /* create dir/files for all valid threshold banks */ | ||
369 | static __cpuinit int threshold_create_device(unsigned int cpu) | ||
370 | { | ||
371 | int bank; | ||
372 | int err = 0; | ||
373 | |||
374 | per_cpu(device_threshold, cpu).id = cpu; | ||
375 | per_cpu(device_threshold, cpu).cls = &threshold_sysclass; | ||
376 | err = sysdev_register(&per_cpu(device_threshold, cpu)); | ||
377 | if (err) | ||
378 | goto out; | ||
379 | |||
380 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
381 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
382 | continue; | ||
383 | err = threshold_create_bank(cpu, bank); | ||
384 | if (err) | ||
385 | goto out; | ||
386 | } | ||
387 | out: | ||
388 | return err; | ||
389 | } | ||
390 | |||
391 | #ifdef CONFIG_HOTPLUG_CPU | ||
392 | /* | ||
393 | * let's be hotplug friendly. | ||
394 | * in case of multiple core processors, the first core always takes ownership | ||
395 | * of shared sysfs dir/files, and rest of the cores will be symlinked to it. | ||
396 | */ | ||
397 | |||
398 | /* cpu hotplug call removes all symlinks before first core dies */ | ||
399 | static __cpuinit void threshold_remove_bank(unsigned int cpu, int bank) | ||
400 | { | ||
401 | struct threshold_bank *b; | ||
402 | char name[16]; | ||
403 | |||
404 | b = per_cpu(threshold_banks, cpu)[bank]; | ||
405 | if (!b) | ||
406 | return; | ||
407 | if (shared_bank[bank] && atomic_read(&b->kobj.kref.refcount) > 2) { | ||
408 | sprintf(name, "bank%i", bank); | ||
409 | sysfs_remove_link(&per_cpu(device_threshold, cpu).kobj, name); | ||
410 | per_cpu(threshold_banks, cpu)[bank] = 0; | ||
411 | } else { | ||
412 | kobject_unregister(&b->kobj); | ||
413 | kfree(per_cpu(threshold_banks, cpu)[bank]); | ||
414 | } | ||
415 | } | ||
416 | |||
417 | static __cpuinit void threshold_remove_device(unsigned int cpu) | ||
418 | { | ||
419 | int bank; | ||
420 | |||
421 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
422 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
423 | continue; | ||
424 | threshold_remove_bank(cpu, bank); | ||
425 | } | ||
426 | sysdev_unregister(&per_cpu(device_threshold, cpu)); | ||
427 | } | ||
428 | |||
429 | /* link all existing siblings when first core comes up */ | ||
430 | static __cpuinit int threshold_create_symlinks(unsigned int cpu) | ||
431 | { | ||
432 | int bank, err = 0; | ||
433 | unsigned int lcpu = 0; | ||
434 | |||
435 | if (cpu_core_id[cpu]) | ||
436 | return 0; | ||
437 | for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { | ||
438 | if (lcpu == cpu) | ||
439 | continue; | ||
440 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
441 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
442 | continue; | ||
443 | if (!shared_bank[bank]) | ||
444 | continue; | ||
445 | err = threshold_create_bank(lcpu, bank); | ||
446 | } | ||
447 | } | ||
448 | return err; | ||
449 | } | ||
450 | |||
451 | /* remove all symlinks before first core dies. */ | ||
452 | static __cpuinit void threshold_remove_symlinks(unsigned int cpu) | ||
453 | { | ||
454 | int bank; | ||
455 | unsigned int lcpu = 0; | ||
456 | if (cpu_core_id[cpu]) | ||
457 | return; | ||
458 | for_each_cpu_mask(lcpu, cpu_core_map[cpu]) { | ||
459 | if (lcpu == cpu) | ||
460 | continue; | ||
461 | for (bank = 0; bank < NR_BANKS; ++bank) { | ||
462 | if (!(per_cpu(bank_map, cpu) & 1 << bank)) | ||
463 | continue; | ||
464 | if (!shared_bank[bank]) | ||
465 | continue; | ||
466 | threshold_remove_bank(lcpu, bank); | ||
467 | } | ||
468 | } | ||
469 | } | ||
470 | #else /* !CONFIG_HOTPLUG_CPU */ | ||
471 | static __cpuinit void threshold_create_symlinks(unsigned int cpu) | ||
472 | { | ||
473 | } | ||
474 | static __cpuinit void threshold_remove_symlinks(unsigned int cpu) | ||
475 | { | ||
476 | } | ||
477 | static void threshold_remove_device(unsigned int cpu) | ||
478 | { | ||
479 | } | ||
480 | #endif | ||
481 | |||
482 | /* get notified when a cpu comes on/off */ | ||
483 | static __cpuinit int threshold_cpu_callback(struct notifier_block *nfb, | ||
484 | unsigned long action, void *hcpu) | ||
485 | { | ||
486 | /* cpu was unsigned int to begin with */ | ||
487 | unsigned int cpu = (unsigned long)hcpu; | ||
488 | |||
489 | if (cpu >= NR_CPUS) | ||
490 | goto out; | ||
491 | |||
492 | switch (action) { | ||
493 | case CPU_ONLINE: | ||
494 | threshold_create_device(cpu); | ||
495 | threshold_create_symlinks(cpu); | ||
496 | break; | ||
497 | case CPU_DOWN_PREPARE: | ||
498 | threshold_remove_symlinks(cpu); | ||
499 | break; | ||
500 | case CPU_DOWN_FAILED: | ||
501 | threshold_create_symlinks(cpu); | ||
502 | break; | ||
503 | case CPU_DEAD: | ||
504 | threshold_remove_device(cpu); | ||
505 | break; | ||
506 | default: | ||
507 | break; | ||
508 | } | ||
509 | out: | ||
510 | return NOTIFY_OK; | ||
511 | } | ||
512 | |||
513 | static struct notifier_block threshold_cpu_notifier = { | ||
514 | .notifier_call = threshold_cpu_callback, | ||
515 | }; | ||
516 | |||
517 | static __init int threshold_init_device(void) | ||
518 | { | ||
519 | int err; | ||
520 | int lcpu = 0; | ||
521 | |||
522 | err = sysdev_class_register(&threshold_sysclass); | ||
523 | if (err) | ||
524 | goto out; | ||
525 | |||
526 | /* to hit CPUs online before the notifier is up */ | ||
527 | for_each_online_cpu(lcpu) { | ||
528 | err = threshold_create_device(lcpu); | ||
529 | if (err) | ||
530 | goto out; | ||
531 | } | ||
532 | register_cpu_notifier(&threshold_cpu_notifier); | ||
533 | |||
534 | out: | ||
535 | return err; | ||
536 | } | ||
537 | |||
538 | device_initcall(threshold_init_device); | ||
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index f16d38d09daf..1105250bf02c 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c | |||
@@ -42,7 +42,7 @@ int acpi_found_madt; | |||
42 | * Various Linux-internal data structures created from the | 42 | * Various Linux-internal data structures created from the |
43 | * MP-table. | 43 | * MP-table. |
44 | */ | 44 | */ |
45 | int apic_version [MAX_APICS]; | 45 | unsigned char apic_version [MAX_APICS]; |
46 | unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | 46 | unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
47 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; | 47 | int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 }; |
48 | 48 | ||
@@ -65,7 +65,9 @@ unsigned long mp_lapic_addr = 0; | |||
65 | /* Processor that is doing the boot up */ | 65 | /* Processor that is doing the boot up */ |
66 | unsigned int boot_cpu_id = -1U; | 66 | unsigned int boot_cpu_id = -1U; |
67 | /* Internal processor count */ | 67 | /* Internal processor count */ |
68 | static unsigned int num_processors = 0; | 68 | unsigned int num_processors __initdata = 0; |
69 | |||
70 | unsigned disabled_cpus __initdata; | ||
69 | 71 | ||
70 | /* Bitmask of physically existing CPUs */ | 72 | /* Bitmask of physically existing CPUs */ |
71 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; | 73 | physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE; |
@@ -106,11 +108,14 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
106 | 108 | ||
107 | static void __init MP_processor_info (struct mpc_config_processor *m) | 109 | static void __init MP_processor_info (struct mpc_config_processor *m) |
108 | { | 110 | { |
109 | int ver, cpu; | 111 | int cpu; |
112 | unsigned char ver; | ||
110 | static int found_bsp=0; | 113 | static int found_bsp=0; |
111 | 114 | ||
112 | if (!(m->mpc_cpuflag & CPU_ENABLED)) | 115 | if (!(m->mpc_cpuflag & CPU_ENABLED)) { |
116 | disabled_cpus++; | ||
113 | return; | 117 | return; |
118 | } | ||
114 | 119 | ||
115 | printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", | 120 | printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n", |
116 | m->mpc_apicid, | 121 | m->mpc_apicid, |
@@ -129,12 +134,14 @@ static void __init MP_processor_info (struct mpc_config_processor *m) | |||
129 | } | 134 | } |
130 | 135 | ||
131 | cpu = num_processors++; | 136 | cpu = num_processors++; |
132 | 137 | ||
133 | if (m->mpc_apicid > MAX_APICS) { | 138 | #if MAX_APICS < 255 |
139 | if ((int)m->mpc_apicid > MAX_APICS) { | ||
134 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | 140 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", |
135 | m->mpc_apicid, MAX_APICS); | 141 | m->mpc_apicid, MAX_APICS); |
136 | return; | 142 | return; |
137 | } | 143 | } |
144 | #endif | ||
138 | ver = m->mpc_apicver; | 145 | ver = m->mpc_apicver; |
139 | 146 | ||
140 | physid_set(m->mpc_apicid, phys_cpu_present_map); | 147 | physid_set(m->mpc_apicid, phys_cpu_present_map); |
@@ -218,7 +225,7 @@ static void __init MP_intsrc_info (struct mpc_config_intsrc *m) | |||
218 | m->mpc_irqtype, m->mpc_irqflag & 3, | 225 | m->mpc_irqtype, m->mpc_irqflag & 3, |
219 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, | 226 | (m->mpc_irqflag >> 2) & 3, m->mpc_srcbus, |
220 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); | 227 | m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq); |
221 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | 228 | if (++mp_irq_entries >= MAX_IRQ_SOURCES) |
222 | panic("Max # of irq sources exceeded!!\n"); | 229 | panic("Max # of irq sources exceeded!!\n"); |
223 | } | 230 | } |
224 | 231 | ||
@@ -549,7 +556,7 @@ void __init get_smp_config (void) | |||
549 | * Read the physical hardware table. Anything here will | 556 | * Read the physical hardware table. Anything here will |
550 | * override the defaults. | 557 | * override the defaults. |
551 | */ | 558 | */ |
552 | if (!smp_read_mpc((void *)(unsigned long)mpf->mpf_physptr)) { | 559 | if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr))) { |
553 | smp_found_config = 0; | 560 | smp_found_config = 0; |
554 | printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); | 561 | printk(KERN_ERR "BIOS bug, MP table errors detected!...\n"); |
555 | printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); | 562 | printk(KERN_ERR "... disabling SMP support. (tell your hw vendor)\n"); |
diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index cf0a0315d586..2e28e855ec3c 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c | |||
@@ -187,7 +187,7 @@ static void flush_gart(struct device *dev) | |||
187 | 187 | ||
188 | /* Allocate DMA memory on node near device */ | 188 | /* Allocate DMA memory on node near device */ |
189 | noinline | 189 | noinline |
190 | static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order) | 190 | static void *dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) |
191 | { | 191 | { |
192 | struct page *page; | 192 | struct page *page; |
193 | int node; | 193 | int node; |
@@ -204,7 +204,7 @@ static void *dma_alloc_pages(struct device *dev, unsigned gfp, unsigned order) | |||
204 | */ | 204 | */ |
205 | void * | 205 | void * |
206 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | 206 | dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, |
207 | unsigned gfp) | 207 | gfp_t gfp) |
208 | { | 208 | { |
209 | void *memory; | 209 | void *memory; |
210 | unsigned long dma_mask = 0; | 210 | unsigned long dma_mask = 0; |
@@ -220,6 +220,12 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
220 | uses the normal dma_mask for alloc_coherent. */ | 220 | uses the normal dma_mask for alloc_coherent. */ |
221 | dma_mask &= *dev->dma_mask; | 221 | dma_mask &= *dev->dma_mask; |
222 | 222 | ||
223 | /* Why <=? Even when the mask is smaller than 4GB it is often larger | ||
224 | than 16MB and in this case we have a chance of finding fitting memory | ||
225 | in the next higher zone first. If not retry with true GFP_DMA. -AK */ | ||
226 | if (dma_mask <= 0xffffffff) | ||
227 | gfp |= GFP_DMA32; | ||
228 | |||
223 | again: | 229 | again: |
224 | memory = dma_alloc_pages(dev, gfp, get_order(size)); | 230 | memory = dma_alloc_pages(dev, gfp, get_order(size)); |
225 | if (memory == NULL) | 231 | if (memory == NULL) |
@@ -245,7 +251,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, | |||
245 | } | 251 | } |
246 | 252 | ||
247 | if (!(gfp & GFP_DMA)) { | 253 | if (!(gfp & GFP_DMA)) { |
248 | gfp |= GFP_DMA; | 254 | gfp = (gfp & ~GFP_DMA32) | GFP_DMA; |
249 | goto again; | 255 | goto again; |
250 | } | 256 | } |
251 | return NULL; | 257 | return NULL; |
diff --git a/arch/x86_64/kernel/pci-nommu.c b/arch/x86_64/kernel/pci-nommu.c index 67d90b89af0b..5a981dca87ff 100644 --- a/arch/x86_64/kernel/pci-nommu.c +++ b/arch/x86_64/kernel/pci-nommu.c | |||
@@ -24,7 +24,7 @@ EXPORT_SYMBOL(iommu_sac_force); | |||
24 | */ | 24 | */ |
25 | 25 | ||
26 | void *dma_alloc_coherent(struct device *hwdev, size_t size, | 26 | void *dma_alloc_coherent(struct device *hwdev, size_t size, |
27 | dma_addr_t *dma_handle, unsigned gfp) | 27 | dma_addr_t *dma_handle, gfp_t gfp) |
28 | { | 28 | { |
29 | void *ret; | 29 | void *ret; |
30 | u64 mask; | 30 | u64 mask; |
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c index b5a89c0bdf59..7519fc520eb3 100644 --- a/arch/x86_64/kernel/process.c +++ b/arch/x86_64/kernel/process.c | |||
@@ -86,12 +86,22 @@ EXPORT_SYMBOL(enable_hlt); | |||
86 | */ | 86 | */ |
87 | void default_idle(void) | 87 | void default_idle(void) |
88 | { | 88 | { |
89 | local_irq_enable(); | ||
90 | |||
89 | if (!atomic_read(&hlt_counter)) { | 91 | if (!atomic_read(&hlt_counter)) { |
90 | local_irq_disable(); | 92 | clear_thread_flag(TIF_POLLING_NRFLAG); |
91 | if (!need_resched()) | 93 | smp_mb__after_clear_bit(); |
92 | safe_halt(); | 94 | while (!need_resched()) { |
93 | else | 95 | local_irq_disable(); |
94 | local_irq_enable(); | 96 | if (!need_resched()) |
97 | safe_halt(); | ||
98 | else | ||
99 | local_irq_enable(); | ||
100 | } | ||
101 | set_thread_flag(TIF_POLLING_NRFLAG); | ||
102 | } else { | ||
103 | while (!need_resched()) | ||
104 | cpu_relax(); | ||
95 | } | 105 | } |
96 | } | 106 | } |
97 | 107 | ||
@@ -102,30 +112,16 @@ void default_idle(void) | |||
102 | */ | 112 | */ |
103 | static void poll_idle (void) | 113 | static void poll_idle (void) |
104 | { | 114 | { |
105 | int oldval; | ||
106 | |||
107 | local_irq_enable(); | 115 | local_irq_enable(); |
108 | 116 | ||
109 | /* | 117 | asm volatile( |
110 | * Deal with another CPU just having chosen a thread to | 118 | "2:" |
111 | * run here: | 119 | "testl %0,%1;" |
112 | */ | 120 | "rep; nop;" |
113 | oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED); | 121 | "je 2b;" |
114 | 122 | : : | |
115 | if (!oldval) { | 123 | "i" (_TIF_NEED_RESCHED), |
116 | set_thread_flag(TIF_POLLING_NRFLAG); | 124 | "m" (current_thread_info()->flags)); |
117 | asm volatile( | ||
118 | "2:" | ||
119 | "testl %0,%1;" | ||
120 | "rep; nop;" | ||
121 | "je 2b;" | ||
122 | : : | ||
123 | "i" (_TIF_NEED_RESCHED), | ||
124 | "m" (current_thread_info()->flags)); | ||
125 | clear_thread_flag(TIF_POLLING_NRFLAG); | ||
126 | } else { | ||
127 | set_need_resched(); | ||
128 | } | ||
129 | } | 125 | } |
130 | 126 | ||
131 | void cpu_idle_wait(void) | 127 | void cpu_idle_wait(void) |
@@ -148,7 +144,8 @@ void cpu_idle_wait(void) | |||
148 | do { | 144 | do { |
149 | ssleep(1); | 145 | ssleep(1); |
150 | for_each_online_cpu(cpu) { | 146 | for_each_online_cpu(cpu) { |
151 | if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu)) | 147 | if (cpu_isset(cpu, map) && |
148 | !per_cpu(cpu_idle_state, cpu)) | ||
152 | cpu_clear(cpu, map); | 149 | cpu_clear(cpu, map); |
153 | } | 150 | } |
154 | cpus_and(map, map, cpu_online_map); | 151 | cpus_and(map, map, cpu_online_map); |
@@ -187,6 +184,8 @@ static inline void play_dead(void) | |||
187 | */ | 184 | */ |
188 | void cpu_idle (void) | 185 | void cpu_idle (void) |
189 | { | 186 | { |
187 | set_thread_flag(TIF_POLLING_NRFLAG); | ||
188 | |||
190 | /* endless idle loop with no priority at all */ | 189 | /* endless idle loop with no priority at all */ |
191 | while (1) { | 190 | while (1) { |
192 | while (!need_resched()) { | 191 | while (!need_resched()) { |
@@ -204,7 +203,9 @@ void cpu_idle (void) | |||
204 | idle(); | 203 | idle(); |
205 | } | 204 | } |
206 | 205 | ||
206 | preempt_enable_no_resched(); | ||
207 | schedule(); | 207 | schedule(); |
208 | preempt_disable(); | ||
208 | } | 209 | } |
209 | } | 210 | } |
210 | 211 | ||
@@ -219,15 +220,12 @@ static void mwait_idle(void) | |||
219 | { | 220 | { |
220 | local_irq_enable(); | 221 | local_irq_enable(); |
221 | 222 | ||
222 | if (!need_resched()) { | 223 | while (!need_resched()) { |
223 | set_thread_flag(TIF_POLLING_NRFLAG); | 224 | __monitor((void *)¤t_thread_info()->flags, 0, 0); |
224 | do { | 225 | smp_mb(); |
225 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | 226 | if (need_resched()) |
226 | if (need_resched()) | 227 | break; |
227 | break; | 228 | __mwait(0, 0); |
228 | __mwait(0, 0); | ||
229 | } while (!need_resched()); | ||
230 | clear_thread_flag(TIF_POLLING_NRFLAG); | ||
231 | } | 229 | } |
232 | } | 230 | } |
233 | 231 | ||
@@ -278,7 +276,8 @@ void __show_regs(struct pt_regs * regs) | |||
278 | system_utsname.version); | 276 | system_utsname.version); |
279 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); | 277 | printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip); |
280 | printk_address(regs->rip); | 278 | printk_address(regs->rip); |
281 | printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags); | 279 | printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, |
280 | regs->eflags); | ||
282 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", | 281 | printk("RAX: %016lx RBX: %016lx RCX: %016lx\n", |
283 | regs->rax, regs->rbx, regs->rcx); | 282 | regs->rax, regs->rbx, regs->rcx); |
284 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", | 283 | printk("RDX: %016lx RSI: %016lx RDI: %016lx\n", |
@@ -352,13 +351,6 @@ void flush_thread(void) | |||
352 | struct task_struct *tsk = current; | 351 | struct task_struct *tsk = current; |
353 | struct thread_info *t = current_thread_info(); | 352 | struct thread_info *t = current_thread_info(); |
354 | 353 | ||
355 | /* | ||
356 | * Remove function-return probe instances associated with this task | ||
357 | * and put them back on the free list. Do not insert an exit probe for | ||
358 | * this function, it will be disabled by kprobe_flush_task if you do. | ||
359 | */ | ||
360 | kprobe_flush_task(tsk); | ||
361 | |||
362 | if (t->flags & _TIF_ABI_PENDING) | 354 | if (t->flags & _TIF_ABI_PENDING) |
363 | t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); | 355 | t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32); |
364 | 356 | ||
@@ -430,15 +422,14 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
430 | struct pt_regs * childregs; | 422 | struct pt_regs * childregs; |
431 | struct task_struct *me = current; | 423 | struct task_struct *me = current; |
432 | 424 | ||
433 | childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; | 425 | childregs = ((struct pt_regs *) |
434 | 426 | (THREAD_SIZE + (unsigned long) p->thread_info)) - 1; | |
435 | *childregs = *regs; | 427 | *childregs = *regs; |
436 | 428 | ||
437 | childregs->rax = 0; | 429 | childregs->rax = 0; |
438 | childregs->rsp = rsp; | 430 | childregs->rsp = rsp; |
439 | if (rsp == ~0UL) { | 431 | if (rsp == ~0UL) |
440 | childregs->rsp = (unsigned long)childregs; | 432 | childregs->rsp = (unsigned long)childregs; |
441 | } | ||
442 | 433 | ||
443 | p->thread.rsp = (unsigned long) childregs; | 434 | p->thread.rsp = (unsigned long) childregs; |
444 | p->thread.rsp0 = (unsigned long) (childregs+1); | 435 | p->thread.rsp0 = (unsigned long) (childregs+1); |
@@ -460,7 +451,8 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp, | |||
460 | p->thread.io_bitmap_max = 0; | 451 | p->thread.io_bitmap_max = 0; |
461 | return -ENOMEM; | 452 | return -ENOMEM; |
462 | } | 453 | } |
463 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES); | 454 | memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, |
455 | IO_BITMAP_BYTES); | ||
464 | } | 456 | } |
465 | 457 | ||
466 | /* | 458 | /* |
@@ -497,7 +489,8 @@ out: | |||
497 | * - fold all the options into a flag word and test it with a single test. | 489 | * - fold all the options into a flag word and test it with a single test. |
498 | * - could test fs/gs bitsliced | 490 | * - could test fs/gs bitsliced |
499 | */ | 491 | */ |
500 | struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p) | 492 | struct task_struct * |
493 | __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | ||
501 | { | 494 | { |
502 | struct thread_struct *prev = &prev_p->thread, | 495 | struct thread_struct *prev = &prev_p->thread, |
503 | *next = &next_p->thread; | 496 | *next = &next_p->thread; |
@@ -568,7 +561,8 @@ struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct * | |||
568 | prev->userrsp = read_pda(oldrsp); | 561 | prev->userrsp = read_pda(oldrsp); |
569 | write_pda(oldrsp, next->userrsp); | 562 | write_pda(oldrsp, next->userrsp); |
570 | write_pda(pcurrent, next_p); | 563 | write_pda(pcurrent, next_p); |
571 | write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); | 564 | write_pda(kernelstack, |
565 | (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET); | ||
572 | 566 | ||
573 | /* | 567 | /* |
574 | * Now maybe reload the debug registers | 568 | * Now maybe reload the debug registers |
@@ -649,7 +643,9 @@ asmlinkage long sys_fork(struct pt_regs *regs) | |||
649 | return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); | 643 | return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL); |
650 | } | 644 | } |
651 | 645 | ||
652 | asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | 646 | asmlinkage long |
647 | sys_clone(unsigned long clone_flags, unsigned long newsp, | ||
648 | void __user *parent_tid, void __user *child_tid, struct pt_regs *regs) | ||
653 | { | 649 | { |
654 | if (!newsp) | 650 | if (!newsp) |
655 | newsp = regs->rsp; | 651 | newsp = regs->rsp; |
@@ -685,7 +681,8 @@ unsigned long get_wchan(struct task_struct *p) | |||
685 | return 0; | 681 | return 0; |
686 | fp = *(u64 *)(p->thread.rsp); | 682 | fp = *(u64 *)(p->thread.rsp); |
687 | do { | 683 | do { |
688 | if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE) | 684 | if (fp < (unsigned long)stack || |
685 | fp > (unsigned long)stack+THREAD_SIZE) | ||
689 | return 0; | 686 | return 0; |
690 | rip = *(u64 *)(fp+8); | 687 | rip = *(u64 *)(fp+8); |
691 | if (!in_sched_functions(rip)) | 688 | if (!in_sched_functions(rip)) |
@@ -720,8 +717,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
720 | task->thread.gsindex = 0; | 717 | task->thread.gsindex = 0; |
721 | task->thread.gs = addr; | 718 | task->thread.gs = addr; |
722 | if (doit) { | 719 | if (doit) { |
723 | load_gs_index(0); | 720 | load_gs_index(0); |
724 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); | 721 | ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr); |
725 | } | 722 | } |
726 | } | 723 | } |
727 | put_cpu(); | 724 | put_cpu(); |
@@ -738,7 +735,7 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
738 | set_32bit_tls(task, FS_TLS, addr); | 735 | set_32bit_tls(task, FS_TLS, addr); |
739 | if (doit) { | 736 | if (doit) { |
740 | load_TLS(&task->thread, cpu); | 737 | load_TLS(&task->thread, cpu); |
741 | asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL)); | 738 | asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL)); |
742 | } | 739 | } |
743 | task->thread.fsindex = FS_TLS_SEL; | 740 | task->thread.fsindex = FS_TLS_SEL; |
744 | task->thread.fs = 0; | 741 | task->thread.fs = 0; |
@@ -748,8 +745,8 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
748 | if (doit) { | 745 | if (doit) { |
749 | /* set the selector to 0 to not confuse | 746 | /* set the selector to 0 to not confuse |
750 | __switch_to */ | 747 | __switch_to */ |
751 | asm volatile("movl %0,%%fs" :: "r" (0)); | 748 | asm volatile("movl %0,%%fs" :: "r" (0)); |
752 | ret = checking_wrmsrl(MSR_FS_BASE, addr); | 749 | ret = checking_wrmsrl(MSR_FS_BASE, addr); |
753 | } | 750 | } |
754 | } | 751 | } |
755 | put_cpu(); | 752 | put_cpu(); |
@@ -758,9 +755,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
758 | unsigned long base; | 755 | unsigned long base; |
759 | if (task->thread.fsindex == FS_TLS_SEL) | 756 | if (task->thread.fsindex == FS_TLS_SEL) |
760 | base = read_32bit_tls(task, FS_TLS); | 757 | base = read_32bit_tls(task, FS_TLS); |
761 | else if (doit) { | 758 | else if (doit) |
762 | rdmsrl(MSR_FS_BASE, base); | 759 | rdmsrl(MSR_FS_BASE, base); |
763 | } else | 760 | else |
764 | base = task->thread.fs; | 761 | base = task->thread.fs; |
765 | ret = put_user(base, (unsigned long __user *)addr); | 762 | ret = put_user(base, (unsigned long __user *)addr); |
766 | break; | 763 | break; |
@@ -769,9 +766,9 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr) | |||
769 | unsigned long base; | 766 | unsigned long base; |
770 | if (task->thread.gsindex == GS_TLS_SEL) | 767 | if (task->thread.gsindex == GS_TLS_SEL) |
771 | base = read_32bit_tls(task, GS_TLS); | 768 | base = read_32bit_tls(task, GS_TLS); |
772 | else if (doit) { | 769 | else if (doit) |
773 | rdmsrl(MSR_KERNEL_GS_BASE, base); | 770 | rdmsrl(MSR_KERNEL_GS_BASE, base); |
774 | } else | 771 | else |
775 | base = task->thread.gs; | 772 | base = task->thread.gs; |
776 | ret = put_user(base, (unsigned long __user *)addr); | 773 | ret = put_user(base, (unsigned long __user *)addr); |
777 | break; | 774 | break; |
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c index bbf64b59a21e..a87b6cebe80f 100644 --- a/arch/x86_64/kernel/ptrace.c +++ b/arch/x86_64/kernel/ptrace.c | |||
@@ -313,48 +313,11 @@ static unsigned long getreg(struct task_struct *child, unsigned long regno) | |||
313 | 313 | ||
314 | } | 314 | } |
315 | 315 | ||
316 | asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data) | 316 | long arch_ptrace(struct task_struct *child, long request, long addr, long data) |
317 | { | 317 | { |
318 | struct task_struct *child; | ||
319 | long i, ret; | 318 | long i, ret; |
320 | unsigned ui; | 319 | unsigned ui; |
321 | 320 | ||
322 | /* This lock_kernel fixes a subtle race with suid exec */ | ||
323 | lock_kernel(); | ||
324 | ret = -EPERM; | ||
325 | if (request == PTRACE_TRACEME) { | ||
326 | /* are we already being traced? */ | ||
327 | if (current->ptrace & PT_PTRACED) | ||
328 | goto out; | ||
329 | ret = security_ptrace(current->parent, current); | ||
330 | if (ret) | ||
331 | goto out; | ||
332 | /* set the ptrace bit in the process flags. */ | ||
333 | current->ptrace |= PT_PTRACED; | ||
334 | ret = 0; | ||
335 | goto out; | ||
336 | } | ||
337 | ret = -ESRCH; | ||
338 | read_lock(&tasklist_lock); | ||
339 | child = find_task_by_pid(pid); | ||
340 | if (child) | ||
341 | get_task_struct(child); | ||
342 | read_unlock(&tasklist_lock); | ||
343 | if (!child) | ||
344 | goto out; | ||
345 | |||
346 | ret = -EPERM; | ||
347 | if (pid == 1) /* you may not mess with init */ | ||
348 | goto out_tsk; | ||
349 | |||
350 | if (request == PTRACE_ATTACH) { | ||
351 | ret = ptrace_attach(child); | ||
352 | goto out_tsk; | ||
353 | } | ||
354 | ret = ptrace_check_attach(child, request == PTRACE_KILL); | ||
355 | if (ret < 0) | ||
356 | goto out_tsk; | ||
357 | |||
358 | switch (request) { | 321 | switch (request) { |
359 | /* when I and D space are separate, these will need to be fixed. */ | 322 | /* when I and D space are separate, these will need to be fixed. */ |
360 | case PTRACE_PEEKTEXT: /* read word at location addr. */ | 323 | case PTRACE_PEEKTEXT: /* read word at location addr. */ |
@@ -608,10 +571,6 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data | |||
608 | ret = ptrace_request(child, request, addr, data); | 571 | ret = ptrace_request(child, request, addr, data); |
609 | break; | 572 | break; |
610 | } | 573 | } |
611 | out_tsk: | ||
612 | put_task_struct(child); | ||
613 | out: | ||
614 | unlock_kernel(); | ||
615 | return ret; | 574 | return ret; |
616 | } | 575 | } |
617 | 576 | ||
diff --git a/arch/x86_64/kernel/reboot.c b/arch/x86_64/kernel/reboot.c index 47f95687905f..75235ed2b31b 100644 --- a/arch/x86_64/kernel/reboot.c +++ b/arch/x86_64/kernel/reboot.c | |||
@@ -77,6 +77,7 @@ static inline void kb_wait(void) | |||
77 | 77 | ||
78 | void machine_shutdown(void) | 78 | void machine_shutdown(void) |
79 | { | 79 | { |
80 | unsigned long flags; | ||
80 | /* Stop the cpus and apics */ | 81 | /* Stop the cpus and apics */ |
81 | #ifdef CONFIG_SMP | 82 | #ifdef CONFIG_SMP |
82 | int reboot_cpu_id; | 83 | int reboot_cpu_id; |
@@ -98,7 +99,7 @@ void machine_shutdown(void) | |||
98 | smp_send_stop(); | 99 | smp_send_stop(); |
99 | #endif | 100 | #endif |
100 | 101 | ||
101 | local_irq_disable(); | 102 | local_irq_save(flags); |
102 | 103 | ||
103 | #ifndef CONFIG_SMP | 104 | #ifndef CONFIG_SMP |
104 | disable_local_APIC(); | 105 | disable_local_APIC(); |
@@ -106,7 +107,7 @@ void machine_shutdown(void) | |||
106 | 107 | ||
107 | disable_IO_APIC(); | 108 | disable_IO_APIC(); |
108 | 109 | ||
109 | local_irq_enable(); | 110 | local_irq_restore(flags); |
110 | } | 111 | } |
111 | 112 | ||
112 | void machine_emergency_restart(void) | 113 | void machine_emergency_restart(void) |
@@ -120,7 +121,7 @@ void machine_emergency_restart(void) | |||
120 | /* Could also try the reset bit in the Hammer NB */ | 121 | /* Could also try the reset bit in the Hammer NB */ |
121 | switch (reboot_type) { | 122 | switch (reboot_type) { |
122 | case BOOT_KBD: | 123 | case BOOT_KBD: |
123 | for (i=0; i<100; i++) { | 124 | for (i=0; i<10; i++) { |
124 | kb_wait(); | 125 | kb_wait(); |
125 | udelay(50); | 126 | udelay(50); |
126 | outb(0xfe,0x64); /* pulse reset low */ | 127 | outb(0xfe,0x64); /* pulse reset low */ |
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 238f73e1a834..750e01dcbdf4 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c | |||
@@ -60,6 +60,7 @@ | |||
60 | #include <asm/setup.h> | 60 | #include <asm/setup.h> |
61 | #include <asm/mach_apic.h> | 61 | #include <asm/mach_apic.h> |
62 | #include <asm/numa.h> | 62 | #include <asm/numa.h> |
63 | #include <asm/sections.h> | ||
63 | 64 | ||
64 | /* | 65 | /* |
65 | * Machine setup.. | 66 | * Machine setup.. |
@@ -103,7 +104,6 @@ struct edid_info edid_info; | |||
103 | struct e820map e820; | 104 | struct e820map e820; |
104 | 105 | ||
105 | extern int root_mountflags; | 106 | extern int root_mountflags; |
106 | extern char _text, _etext, _edata, _end; | ||
107 | 107 | ||
108 | char command_line[COMMAND_LINE_SIZE]; | 108 | char command_line[COMMAND_LINE_SIZE]; |
109 | 109 | ||
@@ -412,7 +412,6 @@ contig_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
412 | { | 412 | { |
413 | unsigned long bootmap_size, bootmap; | 413 | unsigned long bootmap_size, bootmap; |
414 | 414 | ||
415 | memory_present(0, start_pfn, end_pfn); | ||
416 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; | 415 | bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT; |
417 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); | 416 | bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size); |
418 | if (bootmap == -1L) | 417 | if (bootmap == -1L) |
@@ -571,6 +570,8 @@ void __init setup_arch(char **cmdline_p) | |||
571 | 570 | ||
572 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); | 571 | init_memory_mapping(0, (end_pfn_map << PAGE_SHIFT)); |
573 | 572 | ||
573 | zap_low_mappings(0); | ||
574 | |||
574 | #ifdef CONFIG_ACPI | 575 | #ifdef CONFIG_ACPI |
575 | /* | 576 | /* |
576 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). | 577 | * Initialize the ACPI boot-time table parser (gets the RSDP and SDT). |
@@ -657,8 +658,6 @@ void __init setup_arch(char **cmdline_p) | |||
657 | } | 658 | } |
658 | #endif | 659 | #endif |
659 | 660 | ||
660 | sparse_init(); | ||
661 | |||
662 | paging_init(); | 661 | paging_init(); |
663 | 662 | ||
664 | check_ioapic(); | 663 | check_ioapic(); |
@@ -793,7 +792,7 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
793 | #endif | 792 | #endif |
794 | 793 | ||
795 | bits = 0; | 794 | bits = 0; |
796 | while ((1 << bits) < c->x86_num_cores) | 795 | while ((1 << bits) < c->x86_max_cores) |
797 | bits++; | 796 | bits++; |
798 | 797 | ||
799 | /* Low order bits define the core id (index of core in socket) */ | 798 | /* Low order bits define the core id (index of core in socket) */ |
@@ -823,16 +822,14 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) | |||
823 | if (!node_online(node)) | 822 | if (!node_online(node)) |
824 | node = nearby_node(apicid); | 823 | node = nearby_node(apicid); |
825 | } | 824 | } |
826 | cpu_to_node[cpu] = node; | 825 | numa_set_node(cpu, node); |
827 | 826 | ||
828 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", | 827 | printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n", |
829 | cpu, c->x86_num_cores, node, cpu_core_id[cpu]); | 828 | cpu, c->x86_max_cores, node, cpu_core_id[cpu]); |
830 | #endif | 829 | #endif |
831 | #endif | 830 | #endif |
832 | } | 831 | } |
833 | 832 | ||
834 | #define HWCR 0xc0010015 | ||
835 | |||
836 | static int __init init_amd(struct cpuinfo_x86 *c) | 833 | static int __init init_amd(struct cpuinfo_x86 *c) |
837 | { | 834 | { |
838 | int r; | 835 | int r; |
@@ -841,14 +838,18 @@ static int __init init_amd(struct cpuinfo_x86 *c) | |||
841 | #ifdef CONFIG_SMP | 838 | #ifdef CONFIG_SMP |
842 | unsigned long value; | 839 | unsigned long value; |
843 | 840 | ||
844 | // Disable TLB flush filter by setting HWCR.FFDIS: | 841 | /* |
845 | // bit 6 of msr C001_0015 | 842 | * Disable TLB flush filter by setting HWCR.FFDIS on K8 |
846 | // | 843 | * bit 6 of msr C001_0015 |
847 | // Errata 63 for SH-B3 steppings | 844 | * |
848 | // Errata 122 for all(?) steppings | 845 | * Errata 63 for SH-B3 steppings |
849 | rdmsrl(HWCR, value); | 846 | * Errata 122 for all steppings (F+ have it disabled by default) |
850 | value |= 1 << 6; | 847 | */ |
851 | wrmsrl(HWCR, value); | 848 | if (c->x86 == 15) { |
849 | rdmsrl(MSR_K8_HWCR, value); | ||
850 | value |= 1 << 6; | ||
851 | wrmsrl(MSR_K8_HWCR, value); | ||
852 | } | ||
852 | #endif | 853 | #endif |
853 | 854 | ||
854 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; | 855 | /* Bit 31 in normal CPUID used for nonstandard 3DNow ID; |
@@ -873,9 +874,9 @@ static int __init init_amd(struct cpuinfo_x86 *c) | |||
873 | display_cacheinfo(c); | 874 | display_cacheinfo(c); |
874 | 875 | ||
875 | if (c->extended_cpuid_level >= 0x80000008) { | 876 | if (c->extended_cpuid_level >= 0x80000008) { |
876 | c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; | 877 | c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; |
877 | if (c->x86_num_cores & (c->x86_num_cores - 1)) | 878 | if (c->x86_max_cores & (c->x86_max_cores - 1)) |
878 | c->x86_num_cores = 1; | 879 | c->x86_max_cores = 1; |
879 | 880 | ||
880 | amd_detect_cmp(c); | 881 | amd_detect_cmp(c); |
881 | } | 882 | } |
@@ -887,54 +888,44 @@ static void __cpuinit detect_ht(struct cpuinfo_x86 *c) | |||
887 | { | 888 | { |
888 | #ifdef CONFIG_SMP | 889 | #ifdef CONFIG_SMP |
889 | u32 eax, ebx, ecx, edx; | 890 | u32 eax, ebx, ecx, edx; |
890 | int index_msb, tmp; | 891 | int index_msb, core_bits; |
891 | int cpu = smp_processor_id(); | 892 | int cpu = smp_processor_id(); |
892 | 893 | ||
894 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
895 | |||
896 | c->apicid = phys_pkg_id(0); | ||
897 | |||
893 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) | 898 | if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) |
894 | return; | 899 | return; |
895 | 900 | ||
896 | cpuid(1, &eax, &ebx, &ecx, &edx); | ||
897 | smp_num_siblings = (ebx & 0xff0000) >> 16; | 901 | smp_num_siblings = (ebx & 0xff0000) >> 16; |
898 | 902 | ||
899 | if (smp_num_siblings == 1) { | 903 | if (smp_num_siblings == 1) { |
900 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); | 904 | printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); |
901 | } else if (smp_num_siblings > 1) { | 905 | } else if (smp_num_siblings > 1 ) { |
902 | index_msb = 31; | 906 | |
903 | /* | ||
904 | * At this point we only support two siblings per | ||
905 | * processor package. | ||
906 | */ | ||
907 | if (smp_num_siblings > NR_CPUS) { | 907 | if (smp_num_siblings > NR_CPUS) { |
908 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); | 908 | printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); |
909 | smp_num_siblings = 1; | 909 | smp_num_siblings = 1; |
910 | return; | 910 | return; |
911 | } | 911 | } |
912 | tmp = smp_num_siblings; | 912 | |
913 | while ((tmp & 0x80000000 ) == 0) { | 913 | index_msb = get_count_order(smp_num_siblings); |
914 | tmp <<=1 ; | ||
915 | index_msb--; | ||
916 | } | ||
917 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
918 | index_msb++; | ||
919 | phys_proc_id[cpu] = phys_pkg_id(index_msb); | 914 | phys_proc_id[cpu] = phys_pkg_id(index_msb); |
920 | 915 | ||
921 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", | 916 | printk(KERN_INFO "CPU: Physical Processor ID: %d\n", |
922 | phys_proc_id[cpu]); | 917 | phys_proc_id[cpu]); |
923 | 918 | ||
924 | smp_num_siblings = smp_num_siblings / c->x86_num_cores; | 919 | smp_num_siblings = smp_num_siblings / c->x86_max_cores; |
925 | 920 | ||
926 | tmp = smp_num_siblings; | 921 | index_msb = get_count_order(smp_num_siblings) ; |
927 | index_msb = 31; | 922 | |
928 | while ((tmp & 0x80000000) == 0) { | 923 | core_bits = get_count_order(c->x86_max_cores); |
929 | tmp <<=1 ; | ||
930 | index_msb--; | ||
931 | } | ||
932 | if (smp_num_siblings & (smp_num_siblings - 1)) | ||
933 | index_msb++; | ||
934 | 924 | ||
935 | cpu_core_id[cpu] = phys_pkg_id(index_msb); | 925 | cpu_core_id[cpu] = phys_pkg_id(index_msb) & |
926 | ((1 << core_bits) - 1); | ||
936 | 927 | ||
937 | if (c->x86_num_cores > 1) | 928 | if (c->x86_max_cores > 1) |
938 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", | 929 | printk(KERN_INFO "CPU: Processor Core ID: %d\n", |
939 | cpu_core_id[cpu]); | 930 | cpu_core_id[cpu]); |
940 | } | 931 | } |
@@ -965,16 +956,15 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | |||
965 | static void srat_detect_node(void) | 956 | static void srat_detect_node(void) |
966 | { | 957 | { |
967 | #ifdef CONFIG_NUMA | 958 | #ifdef CONFIG_NUMA |
968 | unsigned apicid, node; | 959 | unsigned node; |
969 | int cpu = smp_processor_id(); | 960 | int cpu = smp_processor_id(); |
970 | 961 | ||
971 | /* Don't do the funky fallback heuristics the AMD version employs | 962 | /* Don't do the funky fallback heuristics the AMD version employs |
972 | for now. */ | 963 | for now. */ |
973 | apicid = phys_proc_id[cpu]; | 964 | node = apicid_to_node[hard_smp_processor_id()]; |
974 | node = apicid_to_node[apicid]; | ||
975 | if (node == NUMA_NO_NODE) | 965 | if (node == NUMA_NO_NODE) |
976 | node = 0; | 966 | node = 0; |
977 | cpu_to_node[cpu] = node; | 967 | numa_set_node(cpu, node); |
978 | 968 | ||
979 | if (acpi_numa > 0) | 969 | if (acpi_numa > 0) |
980 | printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); | 970 | printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node); |
@@ -992,13 +982,18 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
992 | unsigned eax = cpuid_eax(0x80000008); | 982 | unsigned eax = cpuid_eax(0x80000008); |
993 | c->x86_virt_bits = (eax >> 8) & 0xff; | 983 | c->x86_virt_bits = (eax >> 8) & 0xff; |
994 | c->x86_phys_bits = eax & 0xff; | 984 | c->x86_phys_bits = eax & 0xff; |
985 | /* CPUID workaround for Intel 0F34 CPU */ | ||
986 | if (c->x86_vendor == X86_VENDOR_INTEL && | ||
987 | c->x86 == 0xF && c->x86_model == 0x3 && | ||
988 | c->x86_mask == 0x4) | ||
989 | c->x86_phys_bits = 36; | ||
995 | } | 990 | } |
996 | 991 | ||
997 | if (c->x86 == 15) | 992 | if (c->x86 == 15) |
998 | c->x86_cache_alignment = c->x86_clflush_size * 2; | 993 | c->x86_cache_alignment = c->x86_clflush_size * 2; |
999 | if (c->x86 >= 15) | 994 | if (c->x86 >= 15) |
1000 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); | 995 | set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability); |
1001 | c->x86_num_cores = intel_num_cpu_cores(c); | 996 | c->x86_max_cores = intel_num_cpu_cores(c); |
1002 | 997 | ||
1003 | srat_detect_node(); | 998 | srat_detect_node(); |
1004 | } | 999 | } |
@@ -1036,7 +1031,7 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1036 | c->x86_model_id[0] = '\0'; /* Unset */ | 1031 | c->x86_model_id[0] = '\0'; /* Unset */ |
1037 | c->x86_clflush_size = 64; | 1032 | c->x86_clflush_size = 64; |
1038 | c->x86_cache_alignment = c->x86_clflush_size; | 1033 | c->x86_cache_alignment = c->x86_clflush_size; |
1039 | c->x86_num_cores = 1; | 1034 | c->x86_max_cores = 1; |
1040 | c->extended_cpuid_level = 0; | 1035 | c->extended_cpuid_level = 0; |
1041 | memset(&c->x86_capability, 0, sizeof c->x86_capability); | 1036 | memset(&c->x86_capability, 0, sizeof c->x86_capability); |
1042 | 1037 | ||
@@ -1059,10 +1054,10 @@ void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c) | |||
1059 | c->x86 = (tfms >> 8) & 0xf; | 1054 | c->x86 = (tfms >> 8) & 0xf; |
1060 | c->x86_model = (tfms >> 4) & 0xf; | 1055 | c->x86_model = (tfms >> 4) & 0xf; |
1061 | c->x86_mask = tfms & 0xf; | 1056 | c->x86_mask = tfms & 0xf; |
1062 | if (c->x86 == 0xf) { | 1057 | if (c->x86 == 0xf) |
1063 | c->x86 += (tfms >> 20) & 0xff; | 1058 | c->x86 += (tfms >> 20) & 0xff; |
1059 | if (c->x86 >= 0x6) | ||
1064 | c->x86_model += ((tfms >> 16) & 0xF) << 4; | 1060 | c->x86_model += ((tfms >> 16) & 0xF) << 4; |
1065 | } | ||
1066 | if (c->x86_capability[0] & (1<<19)) | 1061 | if (c->x86_capability[0] & (1<<19)) |
1067 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; | 1062 | c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; |
1068 | } else { | 1063 | } else { |
@@ -1212,7 +1207,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1212 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1207 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1213 | 1208 | ||
1214 | /* Intel-defined (#2) */ | 1209 | /* Intel-defined (#2) */ |
1215 | "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est", | 1210 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est", |
1216 | "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, | 1211 | "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, |
1217 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1212 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
1218 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 1213 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
@@ -1270,13 +1265,12 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
1270 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); | 1265 | seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size); |
1271 | 1266 | ||
1272 | #ifdef CONFIG_SMP | 1267 | #ifdef CONFIG_SMP |
1273 | if (smp_num_siblings * c->x86_num_cores > 1) { | 1268 | if (smp_num_siblings * c->x86_max_cores > 1) { |
1274 | int cpu = c - cpu_data; | 1269 | int cpu = c - cpu_data; |
1275 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); | 1270 | seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]); |
1276 | seq_printf(m, "siblings\t: %d\n", | 1271 | seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu])); |
1277 | c->x86_num_cores * smp_num_siblings); | ||
1278 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); | 1272 | seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]); |
1279 | seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores); | 1273 | seq_printf(m, "cpu cores\t: %d\n", c->booted_cores); |
1280 | } | 1274 | } |
1281 | #endif | 1275 | #endif |
1282 | 1276 | ||
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index bd33be24a386..06dc354375c3 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c | |||
@@ -87,6 +87,10 @@ void __init setup_per_cpu_areas(void) | |||
87 | int i; | 87 | int i; |
88 | unsigned long size; | 88 | unsigned long size; |
89 | 89 | ||
90 | #ifdef CONFIG_HOTPLUG_CPU | ||
91 | prefill_possible_map(); | ||
92 | #endif | ||
93 | |||
90 | /* Copy section for each CPU (we discard the original) */ | 94 | /* Copy section for each CPU (we discard the original) */ |
91 | size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); | 95 | size = ALIGN(__per_cpu_end - __per_cpu_start, SMP_CACHE_BYTES); |
92 | #ifdef CONFIG_MODULES | 96 | #ifdef CONFIG_MODULES |
@@ -137,7 +141,6 @@ void pda_init(int cpu) | |||
137 | panic("cannot allocate irqstack for cpu %d", cpu); | 141 | panic("cannot allocate irqstack for cpu %d", cpu); |
138 | } | 142 | } |
139 | 143 | ||
140 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
141 | 144 | ||
142 | pda->irqstackptr += IRQSTACKSIZE-64; | 145 | pda->irqstackptr += IRQSTACKSIZE-64; |
143 | } | 146 | } |
@@ -193,6 +196,7 @@ void __cpuinit cpu_init (void) | |||
193 | /* CPU 0 is initialised in head64.c */ | 196 | /* CPU 0 is initialised in head64.c */ |
194 | if (cpu != 0) { | 197 | if (cpu != 0) { |
195 | pda_init(cpu); | 198 | pda_init(cpu); |
199 | zap_low_mappings(cpu); | ||
196 | } else | 200 | } else |
197 | estacks = boot_exception_stacks; | 201 | estacks = boot_exception_stacks; |
198 | 202 | ||
diff --git a/arch/x86_64/kernel/signal.c b/arch/x86_64/kernel/signal.c index d642fbf3da29..5876df116c92 100644 --- a/arch/x86_64/kernel/signal.c +++ b/arch/x86_64/kernel/signal.c | |||
@@ -110,6 +110,15 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, unsigned | |||
110 | COPY(r14); | 110 | COPY(r14); |
111 | COPY(r15); | 111 | COPY(r15); |
112 | 112 | ||
113 | /* Kernel saves and restores only the CS segment register on signals, | ||
114 | * which is the bare minimum needed to allow mixed 32/64-bit code. | ||
115 | * App's signal handler can save/restore other segments if needed. */ | ||
116 | { | ||
117 | unsigned cs; | ||
118 | err |= __get_user(cs, &sc->cs); | ||
119 | regs->cs = cs | 3; /* Force into user mode */ | ||
120 | } | ||
121 | |||
113 | { | 122 | { |
114 | unsigned int tmpflags; | 123 | unsigned int tmpflags; |
115 | err |= __get_user(tmpflags, &sc->eflags); | 124 | err |= __get_user(tmpflags, &sc->eflags); |
@@ -187,6 +196,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, unsigned lo | |||
187 | { | 196 | { |
188 | int err = 0; | 197 | int err = 0; |
189 | 198 | ||
199 | err |= __put_user(regs->cs, &sc->cs); | ||
190 | err |= __put_user(0, &sc->gs); | 200 | err |= __put_user(0, &sc->gs); |
191 | err |= __put_user(0, &sc->fs); | 201 | err |= __put_user(0, &sc->fs); |
192 | 202 | ||
@@ -318,7 +328,14 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, | |||
318 | 328 | ||
319 | regs->rsp = (unsigned long)frame; | 329 | regs->rsp = (unsigned long)frame; |
320 | 330 | ||
331 | /* Set up the CS register to run signal handlers in 64-bit mode, | ||
332 | even if the handler happens to be interrupting 32-bit code. */ | ||
333 | regs->cs = __USER_CS; | ||
334 | |||
335 | /* This, by contrast, has nothing to do with segment registers - | ||
336 | see include/asm-x86_64/uaccess.h for details. */ | ||
321 | set_fs(USER_DS); | 337 | set_fs(USER_DS); |
338 | |||
322 | regs->eflags &= ~TF_MASK; | 339 | regs->eflags &= ~TF_MASK; |
323 | if (test_thread_flag(TIF_SINGLESTEP)) | 340 | if (test_thread_flag(TIF_SINGLESTEP)) |
324 | ptrace_notify(SIGTRAP); | 341 | ptrace_notify(SIGTRAP); |
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 9db9dda161b4..cfc3d9dccbd9 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c | |||
@@ -28,8 +28,6 @@ | |||
28 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
29 | #include <asm/apicdef.h> | 29 | #include <asm/apicdef.h> |
30 | 30 | ||
31 | #define __cpuinit __init | ||
32 | |||
33 | /* | 31 | /* |
34 | * Smarter SMP flushing macros. | 32 | * Smarter SMP flushing macros. |
35 | * c/o Linus Torvalds. | 33 | * c/o Linus Torvalds. |
@@ -452,13 +450,14 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
452 | 450 | ||
453 | void smp_stop_cpu(void) | 451 | void smp_stop_cpu(void) |
454 | { | 452 | { |
453 | unsigned long flags; | ||
455 | /* | 454 | /* |
456 | * Remove this CPU: | 455 | * Remove this CPU: |
457 | */ | 456 | */ |
458 | cpu_clear(smp_processor_id(), cpu_online_map); | 457 | cpu_clear(smp_processor_id(), cpu_online_map); |
459 | local_irq_disable(); | 458 | local_irq_save(flags); |
460 | disable_local_APIC(); | 459 | disable_local_APIC(); |
461 | local_irq_enable(); | 460 | local_irq_restore(flags); |
462 | } | 461 | } |
463 | 462 | ||
464 | static void smp_really_stop_cpu(void *dummy) | 463 | static void smp_really_stop_cpu(void *dummy) |
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index e12d7baeb33e..683c33f7b967 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c | |||
@@ -64,9 +64,8 @@ | |||
64 | int smp_num_siblings = 1; | 64 | int smp_num_siblings = 1; |
65 | /* Package ID of each logical CPU */ | 65 | /* Package ID of each logical CPU */ |
66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 66 | u8 phys_proc_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
67 | /* core ID of each logical CPU */ | ||
67 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; | 68 | u8 cpu_core_id[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = BAD_APICID }; |
68 | EXPORT_SYMBOL(phys_proc_id); | ||
69 | EXPORT_SYMBOL(cpu_core_id); | ||
70 | 69 | ||
71 | /* Bitmask of currently online CPUs */ | 70 | /* Bitmask of currently online CPUs */ |
72 | cpumask_t cpu_online_map __read_mostly; | 71 | cpumask_t cpu_online_map __read_mostly; |
@@ -89,7 +88,10 @@ struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned; | |||
89 | /* Set when the idlers are all forked */ | 88 | /* Set when the idlers are all forked */ |
90 | int smp_threads_ready; | 89 | int smp_threads_ready; |
91 | 90 | ||
91 | /* representing HT siblings of each logical CPU */ | ||
92 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; | 92 | cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly; |
93 | |||
94 | /* representing HT and core siblings of each logical CPU */ | ||
93 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; | 95 | cpumask_t cpu_core_map[NR_CPUS] __read_mostly; |
94 | EXPORT_SYMBOL(cpu_core_map); | 96 | EXPORT_SYMBOL(cpu_core_map); |
95 | 97 | ||
@@ -436,30 +438,59 @@ void __cpuinit smp_callin(void) | |||
436 | cpu_set(cpuid, cpu_callin_map); | 438 | cpu_set(cpuid, cpu_callin_map); |
437 | } | 439 | } |
438 | 440 | ||
441 | /* representing cpus for which sibling maps can be computed */ | ||
442 | static cpumask_t cpu_sibling_setup_map; | ||
443 | |||
439 | static inline void set_cpu_sibling_map(int cpu) | 444 | static inline void set_cpu_sibling_map(int cpu) |
440 | { | 445 | { |
441 | int i; | 446 | int i; |
447 | struct cpuinfo_x86 *c = cpu_data; | ||
448 | |||
449 | cpu_set(cpu, cpu_sibling_setup_map); | ||
442 | 450 | ||
443 | if (smp_num_siblings > 1) { | 451 | if (smp_num_siblings > 1) { |
444 | for_each_cpu(i) { | 452 | for_each_cpu_mask(i, cpu_sibling_setup_map) { |
445 | if (cpu_core_id[cpu] == cpu_core_id[i]) { | 453 | if (phys_proc_id[cpu] == phys_proc_id[i] && |
454 | cpu_core_id[cpu] == cpu_core_id[i]) { | ||
446 | cpu_set(i, cpu_sibling_map[cpu]); | 455 | cpu_set(i, cpu_sibling_map[cpu]); |
447 | cpu_set(cpu, cpu_sibling_map[i]); | 456 | cpu_set(cpu, cpu_sibling_map[i]); |
457 | cpu_set(i, cpu_core_map[cpu]); | ||
458 | cpu_set(cpu, cpu_core_map[i]); | ||
448 | } | 459 | } |
449 | } | 460 | } |
450 | } else { | 461 | } else { |
451 | cpu_set(cpu, cpu_sibling_map[cpu]); | 462 | cpu_set(cpu, cpu_sibling_map[cpu]); |
452 | } | 463 | } |
453 | 464 | ||
454 | if (current_cpu_data.x86_num_cores > 1) { | 465 | if (current_cpu_data.x86_max_cores == 1) { |
455 | for_each_cpu(i) { | ||
456 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
457 | cpu_set(i, cpu_core_map[cpu]); | ||
458 | cpu_set(cpu, cpu_core_map[i]); | ||
459 | } | ||
460 | } | ||
461 | } else { | ||
462 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; | 466 | cpu_core_map[cpu] = cpu_sibling_map[cpu]; |
467 | c[cpu].booted_cores = 1; | ||
468 | return; | ||
469 | } | ||
470 | |||
471 | for_each_cpu_mask(i, cpu_sibling_setup_map) { | ||
472 | if (phys_proc_id[cpu] == phys_proc_id[i]) { | ||
473 | cpu_set(i, cpu_core_map[cpu]); | ||
474 | cpu_set(cpu, cpu_core_map[i]); | ||
475 | /* | ||
476 | * Does this new cpu bringup a new core? | ||
477 | */ | ||
478 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) { | ||
479 | /* | ||
480 | * for each core in package, increment | ||
481 | * the booted_cores for this new cpu | ||
482 | */ | ||
483 | if (first_cpu(cpu_sibling_map[i]) == i) | ||
484 | c[cpu].booted_cores++; | ||
485 | /* | ||
486 | * increment the core count for all | ||
487 | * the other cpus in this package | ||
488 | */ | ||
489 | if (i != cpu) | ||
490 | c[i].booted_cores++; | ||
491 | } else if (i != cpu && !c[cpu].booted_cores) | ||
492 | c[cpu].booted_cores = c[i].booted_cores; | ||
493 | } | ||
463 | } | 494 | } |
464 | } | 495 | } |
465 | 496 | ||
@@ -474,6 +505,7 @@ void __cpuinit start_secondary(void) | |||
474 | * things done here to the most necessary things. | 505 | * things done here to the most necessary things. |
475 | */ | 506 | */ |
476 | cpu_init(); | 507 | cpu_init(); |
508 | preempt_disable(); | ||
477 | smp_callin(); | 509 | smp_callin(); |
478 | 510 | ||
479 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ | 511 | /* otherwise gcc will move up the smp_processor_id before the cpu_init */ |
@@ -880,6 +912,9 @@ static __init void disable_smp(void) | |||
880 | } | 912 | } |
881 | 913 | ||
882 | #ifdef CONFIG_HOTPLUG_CPU | 914 | #ifdef CONFIG_HOTPLUG_CPU |
915 | |||
916 | int additional_cpus __initdata = -1; | ||
917 | |||
883 | /* | 918 | /* |
884 | * cpu_possible_map should be static, it cannot change as cpu's | 919 | * cpu_possible_map should be static, it cannot change as cpu's |
885 | * are onlined, or offlined. The reason is per-cpu data-structures | 920 | * are onlined, or offlined. The reason is per-cpu data-structures |
@@ -888,14 +923,38 @@ static __init void disable_smp(void) | |||
888 | * cpu_present_map on the other hand can change dynamically. | 923 | * cpu_present_map on the other hand can change dynamically. |
889 | * In case when cpu_hotplug is not compiled, then we resort to current | 924 | * In case when cpu_hotplug is not compiled, then we resort to current |
890 | * behaviour, which is cpu_possible == cpu_present. | 925 | * behaviour, which is cpu_possible == cpu_present. |
891 | * If cpu-hotplug is supported, then we need to preallocate for all | ||
892 | * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. | ||
893 | * - Ashok Raj | 926 | * - Ashok Raj |
927 | * | ||
928 | * Three ways to find out the number of additional hotplug CPUs: | ||
929 | * - If the BIOS specified disabled CPUs in ACPI/mptables use that. | ||
930 | * - otherwise use half of the available CPUs or 2, whatever is more. | ||
931 | * - The user can overwrite it with additional_cpus=NUM | ||
932 | * We do this because additional CPUs waste a lot of memory. | ||
933 | * -AK | ||
894 | */ | 934 | */ |
895 | static void prefill_possible_map(void) | 935 | __init void prefill_possible_map(void) |
896 | { | 936 | { |
897 | int i; | 937 | int i; |
898 | for (i = 0; i < NR_CPUS; i++) | 938 | int possible; |
939 | |||
940 | if (additional_cpus == -1) { | ||
941 | if (disabled_cpus > 0) { | ||
942 | additional_cpus = disabled_cpus; | ||
943 | } else { | ||
944 | additional_cpus = num_processors / 2; | ||
945 | if (additional_cpus == 0) | ||
946 | additional_cpus = 2; | ||
947 | } | ||
948 | } | ||
949 | possible = num_processors + additional_cpus; | ||
950 | if (possible > NR_CPUS) | ||
951 | possible = NR_CPUS; | ||
952 | |||
953 | printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", | ||
954 | possible, | ||
955 | max_t(int, possible - num_processors, 0)); | ||
956 | |||
957 | for (i = 0; i < possible; i++) | ||
899 | cpu_set(i, cpu_possible_map); | 958 | cpu_set(i, cpu_possible_map); |
900 | } | 959 | } |
901 | #endif | 960 | #endif |
@@ -966,10 +1025,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
966 | nmi_watchdog_default(); | 1025 | nmi_watchdog_default(); |
967 | current_cpu_data = boot_cpu_data; | 1026 | current_cpu_data = boot_cpu_data; |
968 | current_thread_info()->cpu = 0; /* needed? */ | 1027 | current_thread_info()->cpu = 0; /* needed? */ |
969 | 1028 | set_cpu_sibling_map(0); | |
970 | #ifdef CONFIG_HOTPLUG_CPU | ||
971 | prefill_possible_map(); | ||
972 | #endif | ||
973 | 1029 | ||
974 | if (smp_sanity_check(max_cpus) < 0) { | 1030 | if (smp_sanity_check(max_cpus) < 0) { |
975 | printk(KERN_INFO "SMP disabled\n"); | 1031 | printk(KERN_INFO "SMP disabled\n"); |
@@ -1013,8 +1069,6 @@ void __init smp_prepare_boot_cpu(void) | |||
1013 | int me = smp_processor_id(); | 1069 | int me = smp_processor_id(); |
1014 | cpu_set(me, cpu_online_map); | 1070 | cpu_set(me, cpu_online_map); |
1015 | cpu_set(me, cpu_callout_map); | 1071 | cpu_set(me, cpu_callout_map); |
1016 | cpu_set(0, cpu_sibling_map[0]); | ||
1017 | cpu_set(0, cpu_core_map[0]); | ||
1018 | per_cpu(cpu_state, me) = CPU_ONLINE; | 1072 | per_cpu(cpu_state, me) = CPU_ONLINE; |
1019 | } | 1073 | } |
1020 | 1074 | ||
@@ -1067,9 +1121,6 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1067 | */ | 1121 | */ |
1068 | void __init smp_cpus_done(unsigned int max_cpus) | 1122 | void __init smp_cpus_done(unsigned int max_cpus) |
1069 | { | 1123 | { |
1070 | #ifndef CONFIG_HOTPLUG_CPU | ||
1071 | zap_low_mappings(); | ||
1072 | #endif | ||
1073 | smp_cleanup_boot(); | 1124 | smp_cleanup_boot(); |
1074 | 1125 | ||
1075 | #ifdef CONFIG_X86_IO_APIC | 1126 | #ifdef CONFIG_X86_IO_APIC |
@@ -1086,15 +1137,24 @@ void __init smp_cpus_done(unsigned int max_cpus) | |||
1086 | static void remove_siblinginfo(int cpu) | 1137 | static void remove_siblinginfo(int cpu) |
1087 | { | 1138 | { |
1088 | int sibling; | 1139 | int sibling; |
1140 | struct cpuinfo_x86 *c = cpu_data; | ||
1089 | 1141 | ||
1142 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) { | ||
1143 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1144 | /* | ||
1145 | * last thread sibling in this cpu core going down | ||
1146 | */ | ||
1147 | if (cpus_weight(cpu_sibling_map[cpu]) == 1) | ||
1148 | c[sibling].booted_cores--; | ||
1149 | } | ||
1150 | |||
1090 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) | 1151 | for_each_cpu_mask(sibling, cpu_sibling_map[cpu]) |
1091 | cpu_clear(cpu, cpu_sibling_map[sibling]); | 1152 | cpu_clear(cpu, cpu_sibling_map[sibling]); |
1092 | for_each_cpu_mask(sibling, cpu_core_map[cpu]) | ||
1093 | cpu_clear(cpu, cpu_core_map[sibling]); | ||
1094 | cpus_clear(cpu_sibling_map[cpu]); | 1153 | cpus_clear(cpu_sibling_map[cpu]); |
1095 | cpus_clear(cpu_core_map[cpu]); | 1154 | cpus_clear(cpu_core_map[cpu]); |
1096 | phys_proc_id[cpu] = BAD_APICID; | 1155 | phys_proc_id[cpu] = BAD_APICID; |
1097 | cpu_core_id[cpu] = BAD_APICID; | 1156 | cpu_core_id[cpu] = BAD_APICID; |
1157 | cpu_clear(cpu, cpu_sibling_setup_map); | ||
1098 | } | 1158 | } |
1099 | 1159 | ||
1100 | void remove_cpu_from_maps(void) | 1160 | void remove_cpu_from_maps(void) |
@@ -1158,6 +1218,12 @@ void __cpu_die(unsigned int cpu) | |||
1158 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); | 1218 | printk(KERN_ERR "CPU %u didn't die...\n", cpu); |
1159 | } | 1219 | } |
1160 | 1220 | ||
1221 | static __init int setup_additional_cpus(char *s) | ||
1222 | { | ||
1223 | return get_option(&s, &additional_cpus); | ||
1224 | } | ||
1225 | __setup("additional_cpus=", setup_additional_cpus); | ||
1226 | |||
1161 | #else /* ... !CONFIG_HOTPLUG_CPU */ | 1227 | #else /* ... !CONFIG_HOTPLUG_CPU */ |
1162 | 1228 | ||
1163 | int __cpu_disable(void) | 1229 | int __cpu_disable(void) |
diff --git a/arch/x86_64/kernel/suspend.c b/arch/x86_64/kernel/suspend.c index ebb9abf3ce6d..fd2bef780882 100644 --- a/arch/x86_64/kernel/suspend.c +++ b/arch/x86_64/kernel/suspend.c | |||
@@ -11,6 +11,8 @@ | |||
11 | #include <linux/smp.h> | 11 | #include <linux/smp.h> |
12 | #include <linux/suspend.h> | 12 | #include <linux/suspend.h> |
13 | #include <asm/proto.h> | 13 | #include <asm/proto.h> |
14 | #include <asm/page.h> | ||
15 | #include <asm/pgtable.h> | ||
14 | 16 | ||
15 | struct saved_context saved_context; | 17 | struct saved_context saved_context; |
16 | 18 | ||
@@ -61,13 +63,12 @@ void save_processor_state(void) | |||
61 | __save_processor_state(&saved_context); | 63 | __save_processor_state(&saved_context); |
62 | } | 64 | } |
63 | 65 | ||
64 | static void | 66 | static void do_fpu_end(void) |
65 | do_fpu_end(void) | ||
66 | { | 67 | { |
67 | /* restore FPU regs if necessary */ | 68 | /* |
68 | /* Do it out of line so that gcc does not move cr0 load to some stupid place */ | 69 | * Restore FPU regs if necessary |
69 | kernel_fpu_end(); | 70 | */ |
70 | mxcsr_feature_mask_init(); | 71 | kernel_fpu_end(); |
71 | } | 72 | } |
72 | 73 | ||
73 | void __restore_processor_state(struct saved_context *ctxt) | 74 | void __restore_processor_state(struct saved_context *ctxt) |
@@ -140,4 +141,83 @@ void fix_processor_context(void) | |||
140 | 141 | ||
141 | } | 142 | } |
142 | 143 | ||
144 | #ifdef CONFIG_SOFTWARE_SUSPEND | ||
145 | /* Defined in arch/x86_64/kernel/suspend_asm.S */ | ||
146 | extern int restore_image(void); | ||
147 | |||
148 | pgd_t *temp_level4_pgt; | ||
149 | |||
150 | static int res_phys_pud_init(pud_t *pud, unsigned long address, unsigned long end) | ||
151 | { | ||
152 | long i, j; | ||
153 | |||
154 | i = pud_index(address); | ||
155 | pud = pud + i; | ||
156 | for (; i < PTRS_PER_PUD; pud++, i++) { | ||
157 | unsigned long paddr; | ||
158 | pmd_t *pmd; | ||
159 | |||
160 | paddr = address + i*PUD_SIZE; | ||
161 | if (paddr >= end) | ||
162 | break; | ||
163 | |||
164 | pmd = (pmd_t *)get_safe_page(GFP_ATOMIC); | ||
165 | if (!pmd) | ||
166 | return -ENOMEM; | ||
167 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
168 | for (j = 0; j < PTRS_PER_PMD; pmd++, j++, paddr += PMD_SIZE) { | ||
169 | unsigned long pe; | ||
170 | |||
171 | if (paddr >= end) | ||
172 | break; | ||
173 | pe = _PAGE_NX | _PAGE_PSE | _KERNPG_TABLE | paddr; | ||
174 | pe &= __supported_pte_mask; | ||
175 | set_pmd(pmd, __pmd(pe)); | ||
176 | } | ||
177 | } | ||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int set_up_temporary_mappings(void) | ||
182 | { | ||
183 | unsigned long start, end, next; | ||
184 | int error; | ||
185 | |||
186 | temp_level4_pgt = (pgd_t *)get_safe_page(GFP_ATOMIC); | ||
187 | if (!temp_level4_pgt) | ||
188 | return -ENOMEM; | ||
189 | |||
190 | /* It is safe to reuse the original kernel mapping */ | ||
191 | set_pgd(temp_level4_pgt + pgd_index(__START_KERNEL_map), | ||
192 | init_level4_pgt[pgd_index(__START_KERNEL_map)]); | ||
193 | |||
194 | /* Set up the direct mapping from scratch */ | ||
195 | start = (unsigned long)pfn_to_kaddr(0); | ||
196 | end = (unsigned long)pfn_to_kaddr(end_pfn); | ||
197 | |||
198 | for (; start < end; start = next) { | ||
199 | pud_t *pud = (pud_t *)get_safe_page(GFP_ATOMIC); | ||
200 | if (!pud) | ||
201 | return -ENOMEM; | ||
202 | next = start + PGDIR_SIZE; | ||
203 | if (next > end) | ||
204 | next = end; | ||
205 | if ((error = res_phys_pud_init(pud, __pa(start), __pa(next)))) | ||
206 | return error; | ||
207 | set_pgd(temp_level4_pgt + pgd_index(start), | ||
208 | mk_kernel_pgd(__pa(pud))); | ||
209 | } | ||
210 | return 0; | ||
211 | } | ||
212 | |||
213 | int swsusp_arch_resume(void) | ||
214 | { | ||
215 | int error; | ||
143 | 216 | ||
217 | /* We have got enough memory and from now on we cannot recover */ | ||
218 | if ((error = set_up_temporary_mappings())) | ||
219 | return error; | ||
220 | restore_image(); | ||
221 | return 0; | ||
222 | } | ||
223 | #endif /* CONFIG_SOFTWARE_SUSPEND */ | ||
diff --git a/arch/x86_64/kernel/suspend_asm.S b/arch/x86_64/kernel/suspend_asm.S index 4d659e97df10..320b6fb00cca 100644 --- a/arch/x86_64/kernel/suspend_asm.S +++ b/arch/x86_64/kernel/suspend_asm.S | |||
@@ -39,12 +39,13 @@ ENTRY(swsusp_arch_suspend) | |||
39 | call swsusp_save | 39 | call swsusp_save |
40 | ret | 40 | ret |
41 | 41 | ||
42 | ENTRY(swsusp_arch_resume) | 42 | ENTRY(restore_image) |
43 | /* set up cr3 */ | 43 | /* switch to temporary page tables */ |
44 | leaq init_level4_pgt(%rip),%rax | 44 | movq $__PAGE_OFFSET, %rdx |
45 | subq $__START_KERNEL_map,%rax | 45 | movq temp_level4_pgt(%rip), %rax |
46 | movq %rax,%cr3 | 46 | subq %rdx, %rax |
47 | 47 | movq %rax, %cr3 | |
48 | /* Flush TLB */ | ||
48 | movq mmu_cr4_features(%rip), %rax | 49 | movq mmu_cr4_features(%rip), %rax |
49 | movq %rax, %rdx | 50 | movq %rax, %rdx |
50 | andq $~(1<<7), %rdx # PGE | 51 | andq $~(1<<7), %rdx # PGE |
@@ -69,6 +70,10 @@ loop: | |||
69 | movq pbe_next(%rdx), %rdx | 70 | movq pbe_next(%rdx), %rdx |
70 | jmp loop | 71 | jmp loop |
71 | done: | 72 | done: |
73 | /* go back to the original page tables */ | ||
74 | leaq init_level4_pgt(%rip), %rax | ||
75 | subq $__START_KERNEL_map, %rax | ||
76 | movq %rax, %cr3 | ||
72 | /* Flush TLB, including "global" things (vmalloc) */ | 77 | /* Flush TLB, including "global" things (vmalloc) */ |
73 | movq mmu_cr4_features(%rip), %rax | 78 | movq mmu_cr4_features(%rip), %rax |
74 | movq %rax, %rdx | 79 | movq %rax, %rdx |
diff --git a/arch/x86_64/kernel/sys_x86_64.c b/arch/x86_64/kernel/sys_x86_64.c index cc7821c68851..6449ea8fe756 100644 --- a/arch/x86_64/kernel/sys_x86_64.c +++ b/arch/x86_64/kernel/sys_x86_64.c | |||
@@ -154,17 +154,3 @@ asmlinkage long sys_uname(struct new_utsname __user * name) | |||
154 | err |= copy_to_user(&name->machine, "i686", 5); | 154 | err |= copy_to_user(&name->machine, "i686", 5); |
155 | return err ? -EFAULT : 0; | 155 | return err ? -EFAULT : 0; |
156 | } | 156 | } |
157 | |||
158 | asmlinkage long sys_time64(long __user * tloc) | ||
159 | { | ||
160 | struct timeval now; | ||
161 | int i; | ||
162 | |||
163 | do_gettimeofday(&now); | ||
164 | i = now.tv_sec; | ||
165 | if (tloc) { | ||
166 | if (put_user(i,tloc)) | ||
167 | i = -EFAULT; | ||
168 | } | ||
169 | return i; | ||
170 | } | ||
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c index 2373cb8b8625..fdaddc4e5284 100644 --- a/arch/x86_64/kernel/time.c +++ b/arch/x86_64/kernel/time.c | |||
@@ -42,10 +42,6 @@ | |||
42 | #include <asm/apic.h> | 42 | #include <asm/apic.h> |
43 | #endif | 43 | #endif |
44 | 44 | ||
45 | u64 jiffies_64 = INITIAL_JIFFIES; | ||
46 | |||
47 | EXPORT_SYMBOL(jiffies_64); | ||
48 | |||
49 | #ifdef CONFIG_CPU_FREQ | 45 | #ifdef CONFIG_CPU_FREQ |
50 | static void cpufreq_delayed_get(void); | 46 | static void cpufreq_delayed_get(void); |
51 | #endif | 47 | #endif |
@@ -481,9 +477,9 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id, struct pt_regs *regs) | |||
481 | static unsigned int cyc2ns_scale; | 477 | static unsigned int cyc2ns_scale; |
482 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 478 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ |
483 | 479 | ||
484 | static inline void set_cyc2ns_scale(unsigned long cpu_mhz) | 480 | static inline void set_cyc2ns_scale(unsigned long cpu_khz) |
485 | { | 481 | { |
486 | cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz; | 482 | cyc2ns_scale = (1000000 << CYC2NS_SCALE_FACTOR)/cpu_khz; |
487 | } | 483 | } |
488 | 484 | ||
489 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | 485 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) |
@@ -655,7 +651,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
655 | vxtime.tsc_quot = (1000L << 32) / cpu_khz; | 651 | vxtime.tsc_quot = (1000L << 32) / cpu_khz; |
656 | } | 652 | } |
657 | 653 | ||
658 | set_cyc2ns_scale(cpu_khz_ref / 1000); | 654 | set_cyc2ns_scale(cpu_khz_ref); |
659 | 655 | ||
660 | return 0; | 656 | return 0; |
661 | } | 657 | } |
@@ -939,7 +935,7 @@ void __init time_init(void) | |||
939 | rdtscll_sync(&vxtime.last_tsc); | 935 | rdtscll_sync(&vxtime.last_tsc); |
940 | setup_irq(0, &irq0); | 936 | setup_irq(0, &irq0); |
941 | 937 | ||
942 | set_cyc2ns_scale(cpu_khz / 1000); | 938 | set_cyc2ns_scale(cpu_khz); |
943 | 939 | ||
944 | #ifndef CONFIG_SMP | 940 | #ifndef CONFIG_SMP |
945 | time_init_gtod(); | 941 | time_init_gtod(); |
@@ -959,9 +955,6 @@ static __init int unsynchronized_tsc(void) | |||
959 | are handled in the OEM check above. */ | 955 | are handled in the OEM check above. */ |
960 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) | 956 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) |
961 | return 0; | 957 | return 0; |
962 | /* All in a single socket - should be synchronized */ | ||
963 | if (cpus_weight(cpu_core_map[0]) == num_online_cpus()) | ||
964 | return 0; | ||
965 | #endif | 958 | #endif |
966 | /* Assume multi socket systems are not synchronized */ | 959 | /* Assume multi socket systems are not synchronized */ |
967 | return num_online_cpus() > 1; | 960 | return num_online_cpus() > 1; |
@@ -1096,6 +1089,7 @@ static unsigned long PIE_freq = DEFAULT_RTC_INT_FREQ; | |||
1096 | static unsigned long PIE_count; | 1089 | static unsigned long PIE_count; |
1097 | 1090 | ||
1098 | static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ | 1091 | static unsigned long hpet_rtc_int_freq; /* RTC interrupt frequency */ |
1092 | static unsigned int hpet_t1_cmp; /* cached comparator register */ | ||
1099 | 1093 | ||
1100 | int is_hpet_enabled(void) | 1094 | int is_hpet_enabled(void) |
1101 | { | 1095 | { |
@@ -1132,10 +1126,12 @@ int hpet_rtc_timer_init(void) | |||
1132 | cnt = hpet_readl(HPET_COUNTER); | 1126 | cnt = hpet_readl(HPET_COUNTER); |
1133 | cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); | 1127 | cnt += ((hpet_tick*HZ)/hpet_rtc_int_freq); |
1134 | hpet_writel(cnt, HPET_T1_CMP); | 1128 | hpet_writel(cnt, HPET_T1_CMP); |
1129 | hpet_t1_cmp = cnt; | ||
1135 | local_irq_restore(flags); | 1130 | local_irq_restore(flags); |
1136 | 1131 | ||
1137 | cfg = hpet_readl(HPET_T1_CFG); | 1132 | cfg = hpet_readl(HPET_T1_CFG); |
1138 | cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; | 1133 | cfg &= ~HPET_TN_PERIODIC; |
1134 | cfg |= HPET_TN_ENABLE | HPET_TN_32BIT; | ||
1139 | hpet_writel(cfg, HPET_T1_CFG); | 1135 | hpet_writel(cfg, HPET_T1_CFG); |
1140 | 1136 | ||
1141 | return 1; | 1137 | return 1; |
@@ -1145,8 +1141,12 @@ static void hpet_rtc_timer_reinit(void) | |||
1145 | { | 1141 | { |
1146 | unsigned int cfg, cnt; | 1142 | unsigned int cfg, cnt; |
1147 | 1143 | ||
1148 | if (!(PIE_on | AIE_on | UIE_on)) | 1144 | if (unlikely(!(PIE_on | AIE_on | UIE_on))) { |
1145 | cfg = hpet_readl(HPET_T1_CFG); | ||
1146 | cfg &= ~HPET_TN_ENABLE; | ||
1147 | hpet_writel(cfg, HPET_T1_CFG); | ||
1149 | return; | 1148 | return; |
1149 | } | ||
1150 | 1150 | ||
1151 | if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) | 1151 | if (PIE_on && (PIE_freq > DEFAULT_RTC_INT_FREQ)) |
1152 | hpet_rtc_int_freq = PIE_freq; | 1152 | hpet_rtc_int_freq = PIE_freq; |
@@ -1154,15 +1154,10 @@ static void hpet_rtc_timer_reinit(void) | |||
1154 | hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; | 1154 | hpet_rtc_int_freq = DEFAULT_RTC_INT_FREQ; |
1155 | 1155 | ||
1156 | /* It is more accurate to use the comparator value than current count.*/ | 1156 | /* It is more accurate to use the comparator value than current count.*/ |
1157 | cnt = hpet_readl(HPET_T1_CMP); | 1157 | cnt = hpet_t1_cmp; |
1158 | cnt += hpet_tick*HZ/hpet_rtc_int_freq; | 1158 | cnt += hpet_tick*HZ/hpet_rtc_int_freq; |
1159 | hpet_writel(cnt, HPET_T1_CMP); | 1159 | hpet_writel(cnt, HPET_T1_CMP); |
1160 | 1160 | hpet_t1_cmp = cnt; | |
1161 | cfg = hpet_readl(HPET_T1_CFG); | ||
1162 | cfg |= HPET_TN_ENABLE | HPET_TN_SETVAL | HPET_TN_32BIT; | ||
1163 | hpet_writel(cfg, HPET_T1_CFG); | ||
1164 | |||
1165 | return; | ||
1166 | } | 1161 | } |
1167 | 1162 | ||
1168 | /* | 1163 | /* |
diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index b5e09e6b5536..bf337f493189 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c | |||
@@ -428,19 +428,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, | |||
428 | { | 428 | { |
429 | conditional_sti(regs); | 429 | conditional_sti(regs); |
430 | 430 | ||
431 | #ifdef CONFIG_CHECKING | ||
432 | { | ||
433 | unsigned long gs; | ||
434 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
435 | rdmsrl(MSR_GS_BASE, gs); | ||
436 | if (gs != (unsigned long)pda) { | ||
437 | wrmsrl(MSR_GS_BASE, pda); | ||
438 | printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda, | ||
439 | regs->rip); | ||
440 | } | ||
441 | } | ||
442 | #endif | ||
443 | |||
444 | if (user_mode(regs)) { | 431 | if (user_mode(regs)) { |
445 | struct task_struct *tsk = current; | 432 | struct task_struct *tsk = current; |
446 | 433 | ||
@@ -513,20 +500,6 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, | |||
513 | { | 500 | { |
514 | conditional_sti(regs); | 501 | conditional_sti(regs); |
515 | 502 | ||
516 | #ifdef CONFIG_CHECKING | ||
517 | { | ||
518 | unsigned long gs; | ||
519 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
520 | rdmsrl(MSR_GS_BASE, gs); | ||
521 | if (gs != (unsigned long)pda) { | ||
522 | wrmsrl(MSR_GS_BASE, pda); | ||
523 | oops_in_progress++; | ||
524 | printk("general protection handler: wrong gs %lx expected %p\n", gs, pda); | ||
525 | oops_in_progress--; | ||
526 | } | ||
527 | } | ||
528 | #endif | ||
529 | |||
530 | if (user_mode(regs)) { | 503 | if (user_mode(regs)) { |
531 | struct task_struct *tsk = current; | 504 | struct task_struct *tsk = current; |
532 | 505 | ||
@@ -665,19 +638,6 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs, | |||
665 | struct task_struct *tsk = current; | 638 | struct task_struct *tsk = current; |
666 | siginfo_t info; | 639 | siginfo_t info; |
667 | 640 | ||
668 | #ifdef CONFIG_CHECKING | ||
669 | { | ||
670 | /* RED-PEN interaction with debugger - could destroy gs */ | ||
671 | unsigned long gs; | ||
672 | struct x8664_pda *pda = cpu_pda + safe_smp_processor_id(); | ||
673 | rdmsrl(MSR_GS_BASE, gs); | ||
674 | if (gs != (unsigned long)pda) { | ||
675 | wrmsrl(MSR_GS_BASE, pda); | ||
676 | printk("debug handler: wrong gs %lx expected %p\n", gs, pda); | ||
677 | } | ||
678 | } | ||
679 | #endif | ||
680 | |||
681 | get_debugreg(condition, 6); | 641 | get_debugreg(condition, 6); |
682 | 642 | ||
683 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, | 643 | if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, |
@@ -888,6 +848,10 @@ asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | |||
888 | { | 848 | { |
889 | } | 849 | } |
890 | 850 | ||
851 | asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) | ||
852 | { | ||
853 | } | ||
854 | |||
891 | /* | 855 | /* |
892 | * 'math_state_restore()' saves the current math information in the | 856 | * 'math_state_restore()' saves the current math information in the |
893 | * old math state array, and gets the new ones from the current task | 857 | * old math state array, and gets the new ones from the current task |
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S index 6dd642cad2ef..58b19215b4b3 100644 --- a/arch/x86_64/kernel/vmlinux.lds.S +++ b/arch/x86_64/kernel/vmlinux.lds.S | |||
@@ -50,7 +50,7 @@ SECTIONS | |||
50 | *(.bss.page_aligned) | 50 | *(.bss.page_aligned) |
51 | *(.bss) | 51 | *(.bss) |
52 | } | 52 | } |
53 | __bss_end = .; | 53 | __bss_stop = .; |
54 | 54 | ||
55 | . = ALIGN(PAGE_SIZE); | 55 | . = ALIGN(PAGE_SIZE); |
56 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); | 56 | . = ALIGN(CONFIG_X86_L1_CACHE_BYTES); |
diff --git a/arch/x86_64/kernel/x8664_ksyms.c b/arch/x86_64/kernel/x8664_ksyms.c index fd99ddd009bc..4a54221e10bc 100644 --- a/arch/x86_64/kernel/x8664_ksyms.c +++ b/arch/x86_64/kernel/x8664_ksyms.c | |||
@@ -203,3 +203,6 @@ EXPORT_SYMBOL(flush_tlb_page); | |||
203 | #endif | 203 | #endif |
204 | 204 | ||
205 | EXPORT_SYMBOL(cpu_khz); | 205 | EXPORT_SYMBOL(cpu_khz); |
206 | |||
207 | EXPORT_SYMBOL(load_gs_index); | ||
208 | |||
diff --git a/arch/x86_64/lib/bitops.c b/arch/x86_64/lib/bitops.c index a29fb75b33ac..95b6d9639fba 100644 --- a/arch/x86_64/lib/bitops.c +++ b/arch/x86_64/lib/bitops.c | |||
@@ -5,19 +5,23 @@ | |||
5 | #undef find_first_bit | 5 | #undef find_first_bit |
6 | #undef find_next_bit | 6 | #undef find_next_bit |
7 | 7 | ||
8 | /** | 8 | static inline long |
9 | * find_first_zero_bit - find the first zero bit in a memory region | 9 | __find_first_zero_bit(const unsigned long * addr, unsigned long size) |
10 | * @addr: The address to start the search at | ||
11 | * @size: The maximum size to search | ||
12 | * | ||
13 | * Returns the bit-number of the first zero bit, not the number of the byte | ||
14 | * containing a bit. | ||
15 | */ | ||
16 | inline long find_first_zero_bit(const unsigned long * addr, unsigned long size) | ||
17 | { | 10 | { |
18 | long d0, d1, d2; | 11 | long d0, d1, d2; |
19 | long res; | 12 | long res; |
20 | 13 | ||
14 | /* | ||
15 | * We must test the size in words, not in bits, because | ||
16 | * otherwise incoming sizes in the range -63..-1 will not run | ||
17 | * any scasq instructions, and then the flags used by the je | ||
18 | * instruction will have whatever random value was in place | ||
19 | * before. Nobody should call us like that, but | ||
20 | * find_next_zero_bit() does when offset and size are at the | ||
21 | * same word and it fails to find a zero itself. | ||
22 | */ | ||
23 | size += 63; | ||
24 | size >>= 6; | ||
21 | if (!size) | 25 | if (!size) |
22 | return 0; | 26 | return 0; |
23 | asm volatile( | 27 | asm volatile( |
@@ -30,12 +34,30 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size) | |||
30 | " shlq $3,%%rdi\n" | 34 | " shlq $3,%%rdi\n" |
31 | " addq %%rdi,%%rdx" | 35 | " addq %%rdi,%%rdx" |
32 | :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) | 36 | :"=d" (res), "=&c" (d0), "=&D" (d1), "=&a" (d2) |
33 | :"0" (0ULL), "1" ((size + 63) >> 6), "2" (addr), "3" (-1ULL), | 37 | :"0" (0ULL), "1" (size), "2" (addr), "3" (-1ULL), |
34 | [addr] "r" (addr) : "memory"); | 38 | [addr] "S" (addr) : "memory"); |
39 | /* | ||
40 | * Any register would do for [addr] above, but GCC tends to | ||
41 | * prefer rbx over rsi, even though rsi is readily available | ||
42 | * and doesn't have to be saved. | ||
43 | */ | ||
35 | return res; | 44 | return res; |
36 | } | 45 | } |
37 | 46 | ||
38 | /** | 47 | /** |
48 | * find_first_zero_bit - find the first zero bit in a memory region | ||
49 | * @addr: The address to start the search at | ||
50 | * @size: The maximum size to search | ||
51 | * | ||
52 | * Returns the bit-number of the first zero bit, not the number of the byte | ||
53 | * containing a bit. | ||
54 | */ | ||
55 | long find_first_zero_bit(const unsigned long * addr, unsigned long size) | ||
56 | { | ||
57 | return __find_first_zero_bit (addr, size); | ||
58 | } | ||
59 | |||
60 | /** | ||
39 | * find_next_zero_bit - find the first zero bit in a memory region | 61 | * find_next_zero_bit - find the first zero bit in a memory region |
40 | * @addr: The address to base the search on | 62 | * @addr: The address to base the search on |
41 | * @offset: The bitnumber to start searching at | 63 | * @offset: The bitnumber to start searching at |
@@ -43,7 +65,7 @@ inline long find_first_zero_bit(const unsigned long * addr, unsigned long size) | |||
43 | */ | 65 | */ |
44 | long find_next_zero_bit (const unsigned long * addr, long size, long offset) | 66 | long find_next_zero_bit (const unsigned long * addr, long size, long offset) |
45 | { | 67 | { |
46 | unsigned long * p = ((unsigned long *) addr) + (offset >> 6); | 68 | const unsigned long * p = addr + (offset >> 6); |
47 | unsigned long set = 0; | 69 | unsigned long set = 0; |
48 | unsigned long res, bit = offset&63; | 70 | unsigned long res, bit = offset&63; |
49 | 71 | ||
@@ -63,8 +85,8 @@ long find_next_zero_bit (const unsigned long * addr, long size, long offset) | |||
63 | /* | 85 | /* |
64 | * No zero yet, search remaining full words for a zero | 86 | * No zero yet, search remaining full words for a zero |
65 | */ | 87 | */ |
66 | res = find_first_zero_bit ((const unsigned long *)p, | 88 | res = __find_first_zero_bit (p, size - 64 * (p - addr)); |
67 | size - 64 * (p - (unsigned long *) addr)); | 89 | |
68 | return (offset + set + res); | 90 | return (offset + set + res); |
69 | } | 91 | } |
70 | 92 | ||
@@ -74,6 +96,19 @@ __find_first_bit(const unsigned long * addr, unsigned long size) | |||
74 | long d0, d1; | 96 | long d0, d1; |
75 | long res; | 97 | long res; |
76 | 98 | ||
99 | /* | ||
100 | * We must test the size in words, not in bits, because | ||
101 | * otherwise incoming sizes in the range -63..-1 will not run | ||
102 | * any scasq instructions, and then the flags used by the jz | ||
103 | * instruction will have whatever random value was in place | ||
104 | * before. Nobody should call us like that, but | ||
105 | * find_next_bit() does when offset and size are at the same | ||
106 | * word and it fails to find a one itself. | ||
107 | */ | ||
108 | size += 63; | ||
109 | size >>= 6; | ||
110 | if (!size) | ||
111 | return 0; | ||
77 | asm volatile( | 112 | asm volatile( |
78 | " repe; scasq\n" | 113 | " repe; scasq\n" |
79 | " jz 1f\n" | 114 | " jz 1f\n" |
@@ -83,8 +118,7 @@ __find_first_bit(const unsigned long * addr, unsigned long size) | |||
83 | " shlq $3,%%rdi\n" | 118 | " shlq $3,%%rdi\n" |
84 | " addq %%rdi,%%rax" | 119 | " addq %%rdi,%%rax" |
85 | :"=a" (res), "=&c" (d0), "=&D" (d1) | 120 | :"=a" (res), "=&c" (d0), "=&D" (d1) |
86 | :"0" (0ULL), | 121 | :"0" (0ULL), "1" (size), "2" (addr), |
87 | "1" ((size + 63) >> 6), "2" (addr), | ||
88 | [addr] "r" (addr) : "memory"); | 122 | [addr] "r" (addr) : "memory"); |
89 | return res; | 123 | return res; |
90 | } | 124 | } |
diff --git a/arch/x86_64/lib/clear_page.S b/arch/x86_64/lib/clear_page.S index 30a9da458c15..43d9fa136180 100644 --- a/arch/x86_64/lib/clear_page.S +++ b/arch/x86_64/lib/clear_page.S | |||
@@ -5,46 +5,8 @@ | |||
5 | .globl clear_page | 5 | .globl clear_page |
6 | .p2align 4 | 6 | .p2align 4 |
7 | clear_page: | 7 | clear_page: |
8 | xorl %eax,%eax | ||
9 | movl $4096/64,%ecx | ||
10 | .p2align 4 | ||
11 | .Lloop: | ||
12 | decl %ecx | ||
13 | #define PUT(x) movq %rax,x*8(%rdi) | ||
14 | movq %rax,(%rdi) | ||
15 | PUT(1) | ||
16 | PUT(2) | ||
17 | PUT(3) | ||
18 | PUT(4) | ||
19 | PUT(5) | ||
20 | PUT(6) | ||
21 | PUT(7) | ||
22 | leaq 64(%rdi),%rdi | ||
23 | jnz .Lloop | ||
24 | nop | ||
25 | ret | ||
26 | clear_page_end: | ||
27 | |||
28 | /* C stepping K8 run faster using the string instructions. | ||
29 | It is also a lot simpler. Use this when possible */ | ||
30 | |||
31 | #include <asm/cpufeature.h> | ||
32 | |||
33 | .section .altinstructions,"a" | ||
34 | .align 8 | ||
35 | .quad clear_page | ||
36 | .quad clear_page_c | ||
37 | .byte X86_FEATURE_K8_C | ||
38 | .byte clear_page_end-clear_page | ||
39 | .byte clear_page_c_end-clear_page_c | ||
40 | .previous | ||
41 | |||
42 | .section .altinstr_replacement,"ax" | ||
43 | clear_page_c: | ||
44 | movl $4096/8,%ecx | 8 | movl $4096/8,%ecx |
45 | xorl %eax,%eax | 9 | xorl %eax,%eax |
46 | rep | 10 | rep |
47 | stosq | 11 | stosq |
48 | ret | 12 | ret |
49 | clear_page_c_end: | ||
50 | .previous | ||
diff --git a/arch/x86_64/lib/copy_page.S b/arch/x86_64/lib/copy_page.S index dd3aa47b6bf5..621a19769406 100644 --- a/arch/x86_64/lib/copy_page.S +++ b/arch/x86_64/lib/copy_page.S | |||
@@ -8,94 +8,7 @@ | |||
8 | .globl copy_page | 8 | .globl copy_page |
9 | .p2align 4 | 9 | .p2align 4 |
10 | copy_page: | 10 | copy_page: |
11 | subq $3*8,%rsp | ||
12 | movq %rbx,(%rsp) | ||
13 | movq %r12,1*8(%rsp) | ||
14 | movq %r13,2*8(%rsp) | ||
15 | |||
16 | movl $(4096/64)-5,%ecx | ||
17 | .p2align 4 | ||
18 | .Loop64: | ||
19 | dec %rcx | ||
20 | |||
21 | movq (%rsi), %rax | ||
22 | movq 8 (%rsi), %rbx | ||
23 | movq 16 (%rsi), %rdx | ||
24 | movq 24 (%rsi), %r8 | ||
25 | movq 32 (%rsi), %r9 | ||
26 | movq 40 (%rsi), %r10 | ||
27 | movq 48 (%rsi), %r11 | ||
28 | movq 56 (%rsi), %r12 | ||
29 | |||
30 | prefetcht0 5*64(%rsi) | ||
31 | |||
32 | movq %rax, (%rdi) | ||
33 | movq %rbx, 8 (%rdi) | ||
34 | movq %rdx, 16 (%rdi) | ||
35 | movq %r8, 24 (%rdi) | ||
36 | movq %r9, 32 (%rdi) | ||
37 | movq %r10, 40 (%rdi) | ||
38 | movq %r11, 48 (%rdi) | ||
39 | movq %r12, 56 (%rdi) | ||
40 | |||
41 | leaq 64 (%rsi), %rsi | ||
42 | leaq 64 (%rdi), %rdi | ||
43 | |||
44 | jnz .Loop64 | ||
45 | |||
46 | movl $5,%ecx | ||
47 | .p2align 4 | ||
48 | .Loop2: | ||
49 | decl %ecx | ||
50 | |||
51 | movq (%rsi), %rax | ||
52 | movq 8 (%rsi), %rbx | ||
53 | movq 16 (%rsi), %rdx | ||
54 | movq 24 (%rsi), %r8 | ||
55 | movq 32 (%rsi), %r9 | ||
56 | movq 40 (%rsi), %r10 | ||
57 | movq 48 (%rsi), %r11 | ||
58 | movq 56 (%rsi), %r12 | ||
59 | |||
60 | movq %rax, (%rdi) | ||
61 | movq %rbx, 8 (%rdi) | ||
62 | movq %rdx, 16 (%rdi) | ||
63 | movq %r8, 24 (%rdi) | ||
64 | movq %r9, 32 (%rdi) | ||
65 | movq %r10, 40 (%rdi) | ||
66 | movq %r11, 48 (%rdi) | ||
67 | movq %r12, 56 (%rdi) | ||
68 | |||
69 | leaq 64(%rdi),%rdi | ||
70 | leaq 64(%rsi),%rsi | ||
71 | |||
72 | jnz .Loop2 | ||
73 | |||
74 | movq (%rsp),%rbx | ||
75 | movq 1*8(%rsp),%r12 | ||
76 | movq 2*8(%rsp),%r13 | ||
77 | addq $3*8,%rsp | ||
78 | ret | ||
79 | |||
80 | /* C stepping K8 run faster using the string copy instructions. | ||
81 | It is also a lot simpler. Use this when possible */ | ||
82 | |||
83 | #include <asm/cpufeature.h> | ||
84 | |||
85 | .section .altinstructions,"a" | ||
86 | .align 8 | ||
87 | .quad copy_page | ||
88 | .quad copy_page_c | ||
89 | .byte X86_FEATURE_K8_C | ||
90 | .byte copy_page_c_end-copy_page_c | ||
91 | .byte copy_page_c_end-copy_page_c | ||
92 | .previous | ||
93 | |||
94 | .section .altinstr_replacement,"ax" | ||
95 | copy_page_c: | ||
96 | movl $4096/8,%ecx | 11 | movl $4096/8,%ecx |
97 | rep | 12 | rep |
98 | movsq | 13 | movsq |
99 | ret | 14 | ret |
100 | copy_page_c_end: | ||
101 | .previous | ||
diff --git a/arch/x86_64/lib/memcpy.S b/arch/x86_64/lib/memcpy.S index c6c46494fef5..92dd80544602 100644 --- a/arch/x86_64/lib/memcpy.S +++ b/arch/x86_64/lib/memcpy.S | |||
@@ -11,6 +11,8 @@ | |||
11 | * | 11 | * |
12 | * Output: | 12 | * Output: |
13 | * rax original destination | 13 | * rax original destination |
14 | * | ||
15 | * TODO: check best memcpy for PSC | ||
14 | */ | 16 | */ |
15 | 17 | ||
16 | .globl __memcpy | 18 | .globl __memcpy |
@@ -18,95 +20,6 @@ | |||
18 | .p2align 4 | 20 | .p2align 4 |
19 | __memcpy: | 21 | __memcpy: |
20 | memcpy: | 22 | memcpy: |
21 | pushq %rbx | ||
22 | movq %rdi,%rax | ||
23 | |||
24 | movl %edx,%ecx | ||
25 | shrl $6,%ecx | ||
26 | jz .Lhandle_tail | ||
27 | |||
28 | .p2align 4 | ||
29 | .Lloop_64: | ||
30 | decl %ecx | ||
31 | |||
32 | movq (%rsi),%r11 | ||
33 | movq 8(%rsi),%r8 | ||
34 | |||
35 | movq %r11,(%rdi) | ||
36 | movq %r8,1*8(%rdi) | ||
37 | |||
38 | movq 2*8(%rsi),%r9 | ||
39 | movq 3*8(%rsi),%r10 | ||
40 | |||
41 | movq %r9,2*8(%rdi) | ||
42 | movq %r10,3*8(%rdi) | ||
43 | |||
44 | movq 4*8(%rsi),%r11 | ||
45 | movq 5*8(%rsi),%r8 | ||
46 | |||
47 | movq %r11,4*8(%rdi) | ||
48 | movq %r8,5*8(%rdi) | ||
49 | |||
50 | movq 6*8(%rsi),%r9 | ||
51 | movq 7*8(%rsi),%r10 | ||
52 | |||
53 | movq %r9,6*8(%rdi) | ||
54 | movq %r10,7*8(%rdi) | ||
55 | |||
56 | leaq 64(%rsi),%rsi | ||
57 | leaq 64(%rdi),%rdi | ||
58 | jnz .Lloop_64 | ||
59 | |||
60 | .Lhandle_tail: | ||
61 | movl %edx,%ecx | ||
62 | andl $63,%ecx | ||
63 | shrl $3,%ecx | ||
64 | jz .Lhandle_7 | ||
65 | .p2align 4 | ||
66 | .Lloop_8: | ||
67 | decl %ecx | ||
68 | movq (%rsi),%r8 | ||
69 | movq %r8,(%rdi) | ||
70 | leaq 8(%rdi),%rdi | ||
71 | leaq 8(%rsi),%rsi | ||
72 | jnz .Lloop_8 | ||
73 | |||
74 | .Lhandle_7: | ||
75 | movl %edx,%ecx | ||
76 | andl $7,%ecx | ||
77 | jz .Lende | ||
78 | .p2align 4 | ||
79 | .Lloop_1: | ||
80 | movb (%rsi),%r8b | ||
81 | movb %r8b,(%rdi) | ||
82 | incq %rdi | ||
83 | incq %rsi | ||
84 | decl %ecx | ||
85 | jnz .Lloop_1 | ||
86 | |||
87 | .Lende: | ||
88 | popq %rbx | ||
89 | ret | ||
90 | .Lfinal: | ||
91 | |||
92 | /* C stepping K8 run faster using the string copy instructions. | ||
93 | It is also a lot simpler. Use this when possible */ | ||
94 | |||
95 | .section .altinstructions,"a" | ||
96 | .align 8 | ||
97 | .quad memcpy | ||
98 | .quad memcpy_c | ||
99 | .byte X86_FEATURE_K8_C | ||
100 | .byte .Lfinal-memcpy | ||
101 | .byte memcpy_c_end-memcpy_c | ||
102 | .previous | ||
103 | |||
104 | .section .altinstr_replacement,"ax" | ||
105 | /* rdi destination | ||
106 | * rsi source | ||
107 | * rdx count | ||
108 | */ | ||
109 | memcpy_c: | ||
110 | movq %rdi,%rax | 23 | movq %rdi,%rax |
111 | movl %edx,%ecx | 24 | movl %edx,%ecx |
112 | shrl $3,%ecx | 25 | shrl $3,%ecx |
@@ -117,5 +30,3 @@ memcpy_c: | |||
117 | rep | 30 | rep |
118 | movsb | 31 | movsb |
119 | ret | 32 | ret |
120 | memcpy_c_end: | ||
121 | .previous | ||
diff --git a/arch/x86_64/lib/memset.S b/arch/x86_64/lib/memset.S index 4b4c40638640..2aa48f24ed1e 100644 --- a/arch/x86_64/lib/memset.S +++ b/arch/x86_64/lib/memset.S | |||
@@ -13,98 +13,6 @@ | |||
13 | .p2align 4 | 13 | .p2align 4 |
14 | memset: | 14 | memset: |
15 | __memset: | 15 | __memset: |
16 | movq %rdi,%r10 | ||
17 | movq %rdx,%r11 | ||
18 | |||
19 | /* expand byte value */ | ||
20 | movzbl %sil,%ecx | ||
21 | movabs $0x0101010101010101,%rax | ||
22 | mul %rcx /* with rax, clobbers rdx */ | ||
23 | |||
24 | /* align dst */ | ||
25 | movl %edi,%r9d | ||
26 | andl $7,%r9d | ||
27 | jnz .Lbad_alignment | ||
28 | .Lafter_bad_alignment: | ||
29 | |||
30 | movl %r11d,%ecx | ||
31 | shrl $6,%ecx | ||
32 | jz .Lhandle_tail | ||
33 | |||
34 | .p2align 4 | ||
35 | .Lloop_64: | ||
36 | decl %ecx | ||
37 | movq %rax,(%rdi) | ||
38 | movq %rax,8(%rdi) | ||
39 | movq %rax,16(%rdi) | ||
40 | movq %rax,24(%rdi) | ||
41 | movq %rax,32(%rdi) | ||
42 | movq %rax,40(%rdi) | ||
43 | movq %rax,48(%rdi) | ||
44 | movq %rax,56(%rdi) | ||
45 | leaq 64(%rdi),%rdi | ||
46 | jnz .Lloop_64 | ||
47 | |||
48 | /* Handle tail in loops. The loops should be faster than hard | ||
49 | to predict jump tables. */ | ||
50 | .p2align 4 | ||
51 | .Lhandle_tail: | ||
52 | movl %r11d,%ecx | ||
53 | andl $63&(~7),%ecx | ||
54 | jz .Lhandle_7 | ||
55 | shrl $3,%ecx | ||
56 | .p2align 4 | ||
57 | .Lloop_8: | ||
58 | decl %ecx | ||
59 | movq %rax,(%rdi) | ||
60 | leaq 8(%rdi),%rdi | ||
61 | jnz .Lloop_8 | ||
62 | |||
63 | .Lhandle_7: | ||
64 | movl %r11d,%ecx | ||
65 | andl $7,%ecx | ||
66 | jz .Lende | ||
67 | .p2align 4 | ||
68 | .Lloop_1: | ||
69 | decl %ecx | ||
70 | movb %al,(%rdi) | ||
71 | leaq 1(%rdi),%rdi | ||
72 | jnz .Lloop_1 | ||
73 | |||
74 | .Lende: | ||
75 | movq %r10,%rax | ||
76 | ret | ||
77 | |||
78 | .Lbad_alignment: | ||
79 | cmpq $7,%r11 | ||
80 | jbe .Lhandle_7 | ||
81 | movq %rax,(%rdi) /* unaligned store */ | ||
82 | movq $8,%r8 | ||
83 | subq %r9,%r8 | ||
84 | addq %r8,%rdi | ||
85 | subq %r8,%r11 | ||
86 | jmp .Lafter_bad_alignment | ||
87 | |||
88 | /* C stepping K8 run faster using the string instructions. | ||
89 | It is also a lot simpler. Use this when possible */ | ||
90 | |||
91 | #include <asm/cpufeature.h> | ||
92 | |||
93 | .section .altinstructions,"a" | ||
94 | .align 8 | ||
95 | .quad memset | ||
96 | .quad memset_c | ||
97 | .byte X86_FEATURE_K8_C | ||
98 | .byte memset_c_end-memset_c | ||
99 | .byte memset_c_end-memset_c | ||
100 | .previous | ||
101 | |||
102 | .section .altinstr_replacement,"ax" | ||
103 | /* rdi destination | ||
104 | * rsi value | ||
105 | * rdx count | ||
106 | */ | ||
107 | memset_c: | ||
108 | movq %rdi,%r9 | 16 | movq %rdi,%r9 |
109 | movl %edx,%r8d | 17 | movl %edx,%r8d |
110 | andl $7,%r8d | 18 | andl $7,%r8d |
@@ -121,5 +29,3 @@ memset_c: | |||
121 | stosb | 29 | stosb |
122 | movq %r9,%rax | 30 | movq %r9,%rax |
123 | ret | 31 | ret |
124 | memset_c_end: | ||
125 | .previous | ||
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c index b75b872ec154..3a63707a698b 100644 --- a/arch/x86_64/mm/fault.c +++ b/arch/x86_64/mm/fault.c | |||
@@ -308,18 +308,6 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, | |||
308 | unsigned long flags; | 308 | unsigned long flags; |
309 | siginfo_t info; | 309 | siginfo_t info; |
310 | 310 | ||
311 | #ifdef CONFIG_CHECKING | ||
312 | { | ||
313 | unsigned long gs; | ||
314 | struct x8664_pda *pda = cpu_pda + stack_smp_processor_id(); | ||
315 | rdmsrl(MSR_GS_BASE, gs); | ||
316 | if (gs != (unsigned long)pda) { | ||
317 | wrmsrl(MSR_GS_BASE, pda); | ||
318 | printk("page_fault: wrong gs %lx expected %p\n", gs, pda); | ||
319 | } | ||
320 | } | ||
321 | #endif | ||
322 | |||
323 | /* get the address */ | 311 | /* get the address */ |
324 | __asm__("movq %%cr2,%0":"=r" (address)); | 312 | __asm__("movq %%cr2,%0":"=r" (address)); |
325 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, | 313 | if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14, |
@@ -571,3 +559,10 @@ do_sigbus: | |||
571 | force_sig_info(SIGBUS, &info, tsk); | 559 | force_sig_info(SIGBUS, &info, tsk); |
572 | return; | 560 | return; |
573 | } | 561 | } |
562 | |||
563 | static int __init enable_pagefaulttrace(char *str) | ||
564 | { | ||
565 | page_fault_trace = 1; | ||
566 | return 0; | ||
567 | } | ||
568 | __setup("pagefaulttrace", enable_pagefaulttrace); | ||
diff --git a/arch/x86_64/mm/init.c b/arch/x86_64/mm/init.c index e60a1a848de8..286f6a624c3a 100644 --- a/arch/x86_64/mm/init.c +++ b/arch/x86_64/mm/init.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/pagemap.h> | 22 | #include <linux/pagemap.h> |
23 | #include <linux/bootmem.h> | 23 | #include <linux/bootmem.h> |
24 | #include <linux/proc_fs.h> | 24 | #include <linux/proc_fs.h> |
25 | #include <linux/pci.h> | ||
25 | 26 | ||
26 | #include <asm/processor.h> | 27 | #include <asm/processor.h> |
27 | #include <asm/system.h> | 28 | #include <asm/system.h> |
@@ -36,16 +37,13 @@ | |||
36 | #include <asm/mmu_context.h> | 37 | #include <asm/mmu_context.h> |
37 | #include <asm/proto.h> | 38 | #include <asm/proto.h> |
38 | #include <asm/smp.h> | 39 | #include <asm/smp.h> |
40 | #include <asm/sections.h> | ||
39 | 41 | ||
40 | #ifndef Dprintk | 42 | #ifndef Dprintk |
41 | #define Dprintk(x...) | 43 | #define Dprintk(x...) |
42 | #endif | 44 | #endif |
43 | 45 | ||
44 | #ifdef CONFIG_GART_IOMMU | 46 | static unsigned long dma_reserve __initdata; |
45 | extern int swiotlb; | ||
46 | #endif | ||
47 | |||
48 | extern char _stext[]; | ||
49 | 47 | ||
50 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); | 48 | DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); |
51 | 49 | ||
@@ -86,9 +84,6 @@ void show_mem(void) | |||
86 | 84 | ||
87 | /* References to section boundaries */ | 85 | /* References to section boundaries */ |
88 | 86 | ||
89 | extern char _text, _etext, _edata, __bss_start, _end[]; | ||
90 | extern char __init_begin, __init_end; | ||
91 | |||
92 | int after_bootmem; | 87 | int after_bootmem; |
93 | 88 | ||
94 | static void *spp_getpage(void) | 89 | static void *spp_getpage(void) |
@@ -308,42 +303,81 @@ void __init init_memory_mapping(unsigned long start, unsigned long end) | |||
308 | table_end<<PAGE_SHIFT); | 303 | table_end<<PAGE_SHIFT); |
309 | } | 304 | } |
310 | 305 | ||
311 | extern struct x8664_pda cpu_pda[NR_CPUS]; | 306 | void __cpuinit zap_low_mappings(int cpu) |
307 | { | ||
308 | if (cpu == 0) { | ||
309 | pgd_t *pgd = pgd_offset_k(0UL); | ||
310 | pgd_clear(pgd); | ||
311 | } else { | ||
312 | /* | ||
313 | * For AP's, zap the low identity mappings by changing the cr3 | ||
314 | * to init_level4_pgt and doing local flush tlb all | ||
315 | */ | ||
316 | asm volatile("movq %0,%%cr3" :: "r" (__pa_symbol(&init_level4_pgt))); | ||
317 | } | ||
318 | __flush_tlb_all(); | ||
319 | } | ||
312 | 320 | ||
313 | /* Assumes all CPUs still execute in init_mm */ | 321 | /* Compute zone sizes for the DMA and DMA32 zones in a node. */ |
314 | void zap_low_mappings(void) | 322 | __init void |
323 | size_zones(unsigned long *z, unsigned long *h, | ||
324 | unsigned long start_pfn, unsigned long end_pfn) | ||
315 | { | 325 | { |
316 | pgd_t *pgd = pgd_offset_k(0UL); | 326 | int i; |
317 | pgd_clear(pgd); | 327 | unsigned long w; |
318 | flush_tlb_all(); | 328 | |
329 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
330 | z[i] = 0; | ||
331 | |||
332 | if (start_pfn < MAX_DMA_PFN) | ||
333 | z[ZONE_DMA] = MAX_DMA_PFN - start_pfn; | ||
334 | if (start_pfn < MAX_DMA32_PFN) { | ||
335 | unsigned long dma32_pfn = MAX_DMA32_PFN; | ||
336 | if (dma32_pfn > end_pfn) | ||
337 | dma32_pfn = end_pfn; | ||
338 | z[ZONE_DMA32] = dma32_pfn - start_pfn; | ||
339 | } | ||
340 | z[ZONE_NORMAL] = end_pfn - start_pfn; | ||
341 | |||
342 | /* Remove lower zones from higher ones. */ | ||
343 | w = 0; | ||
344 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
345 | if (z[i]) | ||
346 | z[i] -= w; | ||
347 | w += z[i]; | ||
348 | } | ||
349 | |||
350 | /* Compute holes */ | ||
351 | w = 0; | ||
352 | for (i = 0; i < MAX_NR_ZONES; i++) { | ||
353 | unsigned long s = w; | ||
354 | w += z[i]; | ||
355 | h[i] = e820_hole_size(s, w); | ||
356 | } | ||
357 | |||
358 | /* Add the space pace needed for mem_map to the holes too. */ | ||
359 | for (i = 0; i < MAX_NR_ZONES; i++) | ||
360 | h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE; | ||
361 | |||
362 | /* The 16MB DMA zone has the kernel and other misc mappings. | ||
363 | Account them too */ | ||
364 | if (h[ZONE_DMA]) { | ||
365 | h[ZONE_DMA] += dma_reserve; | ||
366 | if (h[ZONE_DMA] >= z[ZONE_DMA]) { | ||
367 | printk(KERN_WARNING | ||
368 | "Kernel too large and filling up ZONE_DMA?\n"); | ||
369 | h[ZONE_DMA] = z[ZONE_DMA]; | ||
370 | } | ||
371 | } | ||
319 | } | 372 | } |
320 | 373 | ||
321 | #ifndef CONFIG_NUMA | 374 | #ifndef CONFIG_NUMA |
322 | void __init paging_init(void) | 375 | void __init paging_init(void) |
323 | { | 376 | { |
324 | { | 377 | unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES]; |
325 | unsigned long zones_size[MAX_NR_ZONES]; | 378 | size_zones(zones, holes, 0, end_pfn); |
326 | unsigned long holes[MAX_NR_ZONES]; | 379 | free_area_init_node(0, NODE_DATA(0), zones, |
327 | unsigned int max_dma; | 380 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); |
328 | |||
329 | memset(zones_size, 0, sizeof(zones_size)); | ||
330 | memset(holes, 0, sizeof(holes)); | ||
331 | |||
332 | max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
333 | |||
334 | if (end_pfn < max_dma) { | ||
335 | zones_size[ZONE_DMA] = end_pfn; | ||
336 | holes[ZONE_DMA] = e820_hole_size(0, end_pfn); | ||
337 | } else { | ||
338 | zones_size[ZONE_DMA] = max_dma; | ||
339 | holes[ZONE_DMA] = e820_hole_size(0, max_dma); | ||
340 | zones_size[ZONE_NORMAL] = end_pfn - max_dma; | ||
341 | holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn); | ||
342 | } | ||
343 | free_area_init_node(0, NODE_DATA(0), zones_size, | ||
344 | __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes); | ||
345 | } | ||
346 | return; | ||
347 | } | 381 | } |
348 | #endif | 382 | #endif |
349 | 383 | ||
@@ -438,19 +472,16 @@ void __init mem_init(void) | |||
438 | datasize >> 10, | 472 | datasize >> 10, |
439 | initsize >> 10); | 473 | initsize >> 10); |
440 | 474 | ||
475 | #ifdef CONFIG_SMP | ||
441 | /* | 476 | /* |
442 | * Subtle. SMP is doing its boot stuff late (because it has to | 477 | * Sync boot_level4_pgt mappings with the init_level4_pgt |
443 | * fork idle threads) - but it also needs low mappings for the | 478 | * except for the low identity mappings which are already zapped |
444 | * protected-mode entry to work. We zap these entries only after | 479 | * in init_level4_pgt. This sync-up is essential for AP's bringup |
445 | * the WP-bit has been tested. | ||
446 | */ | 480 | */ |
447 | #ifndef CONFIG_SMP | 481 | memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t)); |
448 | zap_low_mappings(); | ||
449 | #endif | 482 | #endif |
450 | } | 483 | } |
451 | 484 | ||
452 | extern char __initdata_begin[], __initdata_end[]; | ||
453 | |||
454 | void free_initmem(void) | 485 | void free_initmem(void) |
455 | { | 486 | { |
456 | unsigned long addr; | 487 | unsigned long addr; |
@@ -464,7 +495,7 @@ void free_initmem(void) | |||
464 | totalram_pages++; | 495 | totalram_pages++; |
465 | } | 496 | } |
466 | memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); | 497 | memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin); |
467 | printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10); | 498 | printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10); |
468 | } | 499 | } |
469 | 500 | ||
470 | #ifdef CONFIG_BLK_DEV_INITRD | 501 | #ifdef CONFIG_BLK_DEV_INITRD |
@@ -491,6 +522,8 @@ void __init reserve_bootmem_generic(unsigned long phys, unsigned len) | |||
491 | #else | 522 | #else |
492 | reserve_bootmem(phys, len); | 523 | reserve_bootmem(phys, len); |
493 | #endif | 524 | #endif |
525 | if (phys+len <= MAX_DMA_PFN*PAGE_SIZE) | ||
526 | dma_reserve += len / PAGE_SIZE; | ||
494 | } | 527 | } |
495 | 528 | ||
496 | int kern_addr_valid(unsigned long addr) | 529 | int kern_addr_valid(unsigned long addr) |
@@ -532,10 +565,6 @@ extern int exception_trace, page_fault_trace; | |||
532 | static ctl_table debug_table2[] = { | 565 | static ctl_table debug_table2[] = { |
533 | { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, | 566 | { 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL, |
534 | proc_dointvec }, | 567 | proc_dointvec }, |
535 | #ifdef CONFIG_CHECKING | ||
536 | { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL, | ||
537 | proc_dointvec }, | ||
538 | #endif | ||
539 | { 0, } | 568 | { 0, } |
540 | }; | 569 | }; |
541 | 570 | ||
diff --git a/arch/x86_64/mm/ioremap.c b/arch/x86_64/mm/ioremap.c index 6972df480d2b..ecf7acb5db9b 100644 --- a/arch/x86_64/mm/ioremap.c +++ b/arch/x86_64/mm/ioremap.c | |||
@@ -60,7 +60,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo | |||
60 | if (address >= end) | 60 | if (address >= end) |
61 | BUG(); | 61 | BUG(); |
62 | do { | 62 | do { |
63 | pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address); | 63 | pte_t * pte = pte_alloc_kernel(pmd, address); |
64 | if (!pte) | 64 | if (!pte) |
65 | return -ENOMEM; | 65 | return -ENOMEM; |
66 | remap_area_pte(pte, address, end - address, address + phys_addr, flags); | 66 | remap_area_pte(pte, address, end - address, address + phys_addr, flags); |
@@ -105,7 +105,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, | |||
105 | flush_cache_all(); | 105 | flush_cache_all(); |
106 | if (address >= end) | 106 | if (address >= end) |
107 | BUG(); | 107 | BUG(); |
108 | spin_lock(&init_mm.page_table_lock); | ||
109 | do { | 108 | do { |
110 | pud_t *pud; | 109 | pud_t *pud; |
111 | pud = pud_alloc(&init_mm, pgd, address); | 110 | pud = pud_alloc(&init_mm, pgd, address); |
@@ -119,7 +118,6 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, | |||
119 | address = (address + PGDIR_SIZE) & PGDIR_MASK; | 118 | address = (address + PGDIR_SIZE) & PGDIR_MASK; |
120 | pgd++; | 119 | pgd++; |
121 | } while (address && (address < end)); | 120 | } while (address && (address < end)); |
122 | spin_unlock(&init_mm.page_table_lock); | ||
123 | flush_tlb_all(); | 121 | flush_tlb_all(); |
124 | return error; | 122 | return error; |
125 | } | 123 | } |
diff --git a/arch/x86_64/mm/k8topology.c b/arch/x86_64/mm/k8topology.c index 65417b040c1b..a5663e0bb01c 100644 --- a/arch/x86_64/mm/k8topology.c +++ b/arch/x86_64/mm/k8topology.c | |||
@@ -108,6 +108,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) | |||
108 | limit >>= 16; | 108 | limit >>= 16; |
109 | limit <<= 24; | 109 | limit <<= 24; |
110 | limit |= (1<<24)-1; | 110 | limit |= (1<<24)-1; |
111 | limit++; | ||
111 | 112 | ||
112 | if (limit > end_pfn << PAGE_SHIFT) | 113 | if (limit > end_pfn << PAGE_SHIFT) |
113 | limit = end_pfn << PAGE_SHIFT; | 114 | limit = end_pfn << PAGE_SHIFT; |
diff --git a/arch/x86_64/mm/numa.c b/arch/x86_64/mm/numa.c index 80a49d9bd8a7..a828a01739cc 100644 --- a/arch/x86_64/mm/numa.c +++ b/arch/x86_64/mm/numa.c | |||
@@ -38,38 +38,57 @@ cpumask_t node_to_cpumask[MAX_NUMNODES] __read_mostly; | |||
38 | 38 | ||
39 | int numa_off __initdata; | 39 | int numa_off __initdata; |
40 | 40 | ||
41 | int __init compute_hash_shift(struct node *nodes, int numnodes) | 41 | |
42 | /* | ||
43 | * Given a shift value, try to populate memnodemap[] | ||
44 | * Returns : | ||
45 | * 1 if OK | ||
46 | * 0 if memnodmap[] too small (of shift too small) | ||
47 | * -1 if node overlap or lost ram (shift too big) | ||
48 | */ | ||
49 | static int __init populate_memnodemap( | ||
50 | const struct node *nodes, int numnodes, int shift) | ||
42 | { | 51 | { |
43 | int i; | 52 | int i; |
44 | int shift = 20; | 53 | int res = -1; |
45 | unsigned long addr,maxend=0; | 54 | unsigned long addr, end; |
46 | |||
47 | for (i = 0; i < numnodes; i++) | ||
48 | if ((nodes[i].start != nodes[i].end) && (nodes[i].end > maxend)) | ||
49 | maxend = nodes[i].end; | ||
50 | 55 | ||
51 | while ((1UL << shift) < (maxend / NODEMAPSIZE)) | 56 | memset(memnodemap, 0xff, sizeof(memnodemap)); |
52 | shift++; | ||
53 | |||
54 | printk (KERN_DEBUG"Using %d for the hash shift. Max adder is %lx \n", | ||
55 | shift,maxend); | ||
56 | memset(memnodemap,0xff,sizeof(*memnodemap) * NODEMAPSIZE); | ||
57 | for (i = 0; i < numnodes; i++) { | 57 | for (i = 0; i < numnodes; i++) { |
58 | if (nodes[i].start == nodes[i].end) | 58 | addr = nodes[i].start; |
59 | end = nodes[i].end; | ||
60 | if (addr >= end) | ||
59 | continue; | 61 | continue; |
60 | for (addr = nodes[i].start; | 62 | if ((end >> shift) >= NODEMAPSIZE) |
61 | addr < nodes[i].end; | 63 | return 0; |
62 | addr += (1UL << shift)) { | 64 | do { |
63 | if (memnodemap[addr >> shift] != 0xff) { | 65 | if (memnodemap[addr >> shift] != 0xff) |
64 | printk(KERN_INFO | ||
65 | "Your memory is not aligned you need to rebuild your kernel " | ||
66 | "with a bigger NODEMAPSIZE shift=%d adder=%lu\n", | ||
67 | shift,addr); | ||
68 | return -1; | 66 | return -1; |
69 | } | ||
70 | memnodemap[addr >> shift] = i; | 67 | memnodemap[addr >> shift] = i; |
71 | } | 68 | addr += (1 << shift); |
69 | } while (addr < end); | ||
70 | res = 1; | ||
72 | } | 71 | } |
72 | return res; | ||
73 | } | ||
74 | |||
75 | int __init compute_hash_shift(struct node *nodes, int numnodes) | ||
76 | { | ||
77 | int shift = 20; | ||
78 | |||
79 | while (populate_memnodemap(nodes, numnodes, shift + 1) >= 0) | ||
80 | shift++; | ||
81 | |||
82 | printk(KERN_DEBUG "Using %d for the hash shift.\n", | ||
83 | shift); | ||
84 | |||
85 | if (populate_memnodemap(nodes, numnodes, shift) != 1) { | ||
86 | printk(KERN_INFO | ||
87 | "Your memory is not aligned you need to rebuild your kernel " | ||
88 | "with a bigger NODEMAPSIZE shift=%d\n", | ||
89 | shift); | ||
90 | return -1; | ||
91 | } | ||
73 | return shift; | 92 | return shift; |
74 | } | 93 | } |
75 | 94 | ||
@@ -94,7 +113,6 @@ void __init setup_node_bootmem(int nodeid, unsigned long start, unsigned long en | |||
94 | start_pfn = start >> PAGE_SHIFT; | 113 | start_pfn = start >> PAGE_SHIFT; |
95 | end_pfn = end >> PAGE_SHIFT; | 114 | end_pfn = end >> PAGE_SHIFT; |
96 | 115 | ||
97 | memory_present(nodeid, start_pfn, end_pfn); | ||
98 | nodedata_phys = find_e820_area(start, end, pgdat_size); | 116 | nodedata_phys = find_e820_area(start, end, pgdat_size); |
99 | if (nodedata_phys == -1L) | 117 | if (nodedata_phys == -1L) |
100 | panic("Cannot find memory pgdat in node %d\n", nodeid); | 118 | panic("Cannot find memory pgdat in node %d\n", nodeid); |
@@ -132,29 +150,14 @@ void __init setup_node_zones(int nodeid) | |||
132 | unsigned long start_pfn, end_pfn; | 150 | unsigned long start_pfn, end_pfn; |
133 | unsigned long zones[MAX_NR_ZONES]; | 151 | unsigned long zones[MAX_NR_ZONES]; |
134 | unsigned long holes[MAX_NR_ZONES]; | 152 | unsigned long holes[MAX_NR_ZONES]; |
135 | unsigned long dma_end_pfn; | ||
136 | 153 | ||
137 | memset(zones, 0, sizeof(unsigned long) * MAX_NR_ZONES); | 154 | start_pfn = node_start_pfn(nodeid); |
138 | memset(holes, 0, sizeof(unsigned long) * MAX_NR_ZONES); | 155 | end_pfn = node_end_pfn(nodeid); |
139 | 156 | ||
140 | start_pfn = node_start_pfn(nodeid); | 157 | Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", |
141 | end_pfn = node_end_pfn(nodeid); | 158 | nodeid, start_pfn, end_pfn); |
142 | 159 | ||
143 | Dprintk(KERN_INFO "setting up node %d %lx-%lx\n", nodeid, start_pfn, end_pfn); | 160 | size_zones(zones, holes, start_pfn, end_pfn); |
144 | |||
145 | /* All nodes > 0 have a zero length zone DMA */ | ||
146 | dma_end_pfn = __pa(MAX_DMA_ADDRESS) >> PAGE_SHIFT; | ||
147 | if (start_pfn < dma_end_pfn) { | ||
148 | zones[ZONE_DMA] = dma_end_pfn - start_pfn; | ||
149 | holes[ZONE_DMA] = e820_hole_size(start_pfn, dma_end_pfn); | ||
150 | zones[ZONE_NORMAL] = end_pfn - dma_end_pfn; | ||
151 | holes[ZONE_NORMAL] = e820_hole_size(dma_end_pfn, end_pfn); | ||
152 | |||
153 | } else { | ||
154 | zones[ZONE_NORMAL] = end_pfn - start_pfn; | ||
155 | holes[ZONE_NORMAL] = e820_hole_size(start_pfn, end_pfn); | ||
156 | } | ||
157 | |||
158 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, | 161 | free_area_init_node(nodeid, NODE_DATA(nodeid), zones, |
159 | start_pfn, holes); | 162 | start_pfn, holes); |
160 | } | 163 | } |
@@ -167,18 +170,16 @@ void __init numa_init_array(void) | |||
167 | mapping. To avoid this fill in the mapping for all possible | 170 | mapping. To avoid this fill in the mapping for all possible |
168 | CPUs, as the number of CPUs is not known yet. | 171 | CPUs, as the number of CPUs is not known yet. |
169 | We round robin the existing nodes. */ | 172 | We round robin the existing nodes. */ |
170 | rr = 0; | 173 | rr = first_node(node_online_map); |
171 | for (i = 0; i < NR_CPUS; i++) { | 174 | for (i = 0; i < NR_CPUS; i++) { |
172 | if (cpu_to_node[i] != NUMA_NO_NODE) | 175 | if (cpu_to_node[i] != NUMA_NO_NODE) |
173 | continue; | 176 | continue; |
177 | numa_set_node(i, rr); | ||
174 | rr = next_node(rr, node_online_map); | 178 | rr = next_node(rr, node_online_map); |
175 | if (rr == MAX_NUMNODES) | 179 | if (rr == MAX_NUMNODES) |
176 | rr = first_node(node_online_map); | 180 | rr = first_node(node_online_map); |
177 | cpu_to_node[i] = rr; | ||
178 | rr++; | ||
179 | } | 181 | } |
180 | 182 | ||
181 | set_bit(0, &node_to_cpumask[cpu_to_node(0)]); | ||
182 | } | 183 | } |
183 | 184 | ||
184 | #ifdef CONFIG_NUMA_EMU | 185 | #ifdef CONFIG_NUMA_EMU |
@@ -207,8 +208,6 @@ static int numa_emulation(unsigned long start_pfn, unsigned long end_pfn) | |||
207 | if (i == numa_fake-1) | 208 | if (i == numa_fake-1) |
208 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; | 209 | sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start; |
209 | nodes[i].end = nodes[i].start + sz; | 210 | nodes[i].end = nodes[i].start + sz; |
210 | if (i != numa_fake-1) | ||
211 | nodes[i].end--; | ||
212 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", | 211 | printk(KERN_INFO "Faking node %d at %016Lx-%016Lx (%LuMB)\n", |
213 | i, | 212 | i, |
214 | nodes[i].start, nodes[i].end, | 213 | nodes[i].start, nodes[i].end, |
@@ -259,18 +258,22 @@ void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn) | |||
259 | nodes_clear(node_online_map); | 258 | nodes_clear(node_online_map); |
260 | node_set_online(0); | 259 | node_set_online(0); |
261 | for (i = 0; i < NR_CPUS; i++) | 260 | for (i = 0; i < NR_CPUS; i++) |
262 | cpu_to_node[i] = 0; | 261 | numa_set_node(i, 0); |
263 | node_to_cpumask[0] = cpumask_of_cpu(0); | 262 | node_to_cpumask[0] = cpumask_of_cpu(0); |
264 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); | 263 | setup_node_bootmem(0, start_pfn << PAGE_SHIFT, end_pfn << PAGE_SHIFT); |
265 | } | 264 | } |
266 | 265 | ||
267 | __cpuinit void numa_add_cpu(int cpu) | 266 | __cpuinit void numa_add_cpu(int cpu) |
268 | { | 267 | { |
269 | /* BP is initialized elsewhere */ | 268 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); |
270 | if (cpu) | ||
271 | set_bit(cpu, &node_to_cpumask[cpu_to_node(cpu)]); | ||
272 | } | 269 | } |
273 | 270 | ||
271 | void __cpuinit numa_set_node(int cpu, int node) | ||
272 | { | ||
273 | cpu_pda[cpu].nodenumber = node; | ||
274 | cpu_to_node[cpu] = node; | ||
275 | } | ||
276 | |||
274 | unsigned long __init numa_free_all_bootmem(void) | 277 | unsigned long __init numa_free_all_bootmem(void) |
275 | { | 278 | { |
276 | int i; | 279 | int i; |
@@ -281,9 +284,26 @@ unsigned long __init numa_free_all_bootmem(void) | |||
281 | return pages; | 284 | return pages; |
282 | } | 285 | } |
283 | 286 | ||
287 | #ifdef CONFIG_SPARSEMEM | ||
288 | static void __init arch_sparse_init(void) | ||
289 | { | ||
290 | int i; | ||
291 | |||
292 | for_each_online_node(i) | ||
293 | memory_present(i, node_start_pfn(i), node_end_pfn(i)); | ||
294 | |||
295 | sparse_init(); | ||
296 | } | ||
297 | #else | ||
298 | #define arch_sparse_init() do {} while (0) | ||
299 | #endif | ||
300 | |||
284 | void __init paging_init(void) | 301 | void __init paging_init(void) |
285 | { | 302 | { |
286 | int i; | 303 | int i; |
304 | |||
305 | arch_sparse_init(); | ||
306 | |||
287 | for_each_online_node(i) { | 307 | for_each_online_node(i) { |
288 | setup_node_zones(i); | 308 | setup_node_zones(i); |
289 | } | 309 | } |
diff --git a/arch/x86_64/mm/pageattr.c b/arch/x86_64/mm/pageattr.c index 94862e1ec032..b90e8fe9eeb0 100644 --- a/arch/x86_64/mm/pageattr.c +++ b/arch/x86_64/mm/pageattr.c | |||
@@ -220,8 +220,6 @@ void global_flush_tlb(void) | |||
220 | down_read(&init_mm.mmap_sem); | 220 | down_read(&init_mm.mmap_sem); |
221 | df = xchg(&df_list, NULL); | 221 | df = xchg(&df_list, NULL); |
222 | up_read(&init_mm.mmap_sem); | 222 | up_read(&init_mm.mmap_sem); |
223 | if (!df) | ||
224 | return; | ||
225 | flush_map((df && !df->next) ? df->address : 0); | 223 | flush_map((df && !df->next) ? df->address : 0); |
226 | for (; df; df = next_df) { | 224 | for (; df; df = next_df) { |
227 | next_df = df->next; | 225 | next_df = df->next; |
diff --git a/arch/x86_64/mm/srat.c b/arch/x86_64/mm/srat.c index 4b2e844c15a7..33340bd1e328 100644 --- a/arch/x86_64/mm/srat.c +++ b/arch/x86_64/mm/srat.c | |||
@@ -71,8 +71,6 @@ static __init void cutoff_node(int i, unsigned long start, unsigned long end) | |||
71 | nd->start = nd->end; | 71 | nd->start = nd->end; |
72 | } | 72 | } |
73 | if (nd->end > end) { | 73 | if (nd->end > end) { |
74 | if (!(end & 0xfff)) | ||
75 | end--; | ||
76 | nd->end = end; | 74 | nd->end = end; |
77 | if (nd->start > nd->end) | 75 | if (nd->start > nd->end) |
78 | nd->start = nd->end; | 76 | nd->start = nd->end; |
@@ -166,8 +164,6 @@ acpi_numa_memory_affinity_init(struct acpi_table_memory_affinity *ma) | |||
166 | if (nd->end < end) | 164 | if (nd->end < end) |
167 | nd->end = end; | 165 | nd->end = end; |
168 | } | 166 | } |
169 | if (!(nd->end & 0xfff)) | ||
170 | nd->end--; | ||
171 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, | 167 | printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, |
172 | nd->start, nd->end); | 168 | nd->start, nd->end); |
173 | } | 169 | } |
@@ -203,7 +199,7 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) | |||
203 | if (cpu_to_node[i] == NUMA_NO_NODE) | 199 | if (cpu_to_node[i] == NUMA_NO_NODE) |
204 | continue; | 200 | continue; |
205 | if (!node_isset(cpu_to_node[i], nodes_parsed)) | 201 | if (!node_isset(cpu_to_node[i], nodes_parsed)) |
206 | cpu_to_node[i] = NUMA_NO_NODE; | 202 | numa_set_node(i, NUMA_NO_NODE); |
207 | } | 203 | } |
208 | numa_init_array(); | 204 | numa_init_array(); |
209 | return 0; | 205 | return 0; |
diff --git a/arch/x86_64/oprofile/Kconfig b/arch/x86_64/oprofile/Kconfig index 5ade19801b97..d8a84088471a 100644 --- a/arch/x86_64/oprofile/Kconfig +++ b/arch/x86_64/oprofile/Kconfig | |||
@@ -1,7 +1,3 @@ | |||
1 | |||
2 | menu "Profiling support" | ||
3 | depends on EXPERIMENTAL | ||
4 | |||
5 | config PROFILING | 1 | config PROFILING |
6 | bool "Profiling support (EXPERIMENTAL)" | 2 | bool "Profiling support (EXPERIMENTAL)" |
7 | help | 3 | help |
@@ -19,5 +15,3 @@ config OPROFILE | |||
19 | 15 | ||
20 | If unsure, say N. | 16 | If unsure, say N. |
21 | 17 | ||
22 | endmenu | ||
23 | |||