diff options
Diffstat (limited to 'arch/x86')
348 files changed, 14999 insertions, 7579 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 56f47caf6fa0..25d2c6f7325e 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -23,6 +23,7 @@ config X86 | |||
23 | def_bool y | 23 | def_bool y |
24 | select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS | 24 | select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS |
25 | select ARCH_MIGHT_HAVE_PC_PARPORT | 25 | select ARCH_MIGHT_HAVE_PC_PARPORT |
26 | select ARCH_MIGHT_HAVE_PC_SERIO | ||
26 | select HAVE_AOUT if X86_32 | 27 | select HAVE_AOUT if X86_32 |
27 | select HAVE_UNSTABLE_SCHED_CLOCK | 28 | select HAVE_UNSTABLE_SCHED_CLOCK |
28 | select ARCH_SUPPORTS_NUMA_BALANCING | 29 | select ARCH_SUPPORTS_NUMA_BALANCING |
@@ -42,6 +43,7 @@ config X86 | |||
42 | select HAVE_DMA_ATTRS | 43 | select HAVE_DMA_ATTRS |
43 | select HAVE_DMA_CONTIGUOUS if !SWIOTLB | 44 | select HAVE_DMA_CONTIGUOUS if !SWIOTLB |
44 | select HAVE_KRETPROBES | 45 | select HAVE_KRETPROBES |
46 | select GENERIC_EARLY_IOREMAP | ||
45 | select HAVE_OPTPROBES | 47 | select HAVE_OPTPROBES |
46 | select HAVE_KPROBES_ON_FTRACE | 48 | select HAVE_KPROBES_ON_FTRACE |
47 | select HAVE_FTRACE_MCOUNT_RECORD | 49 | select HAVE_FTRACE_MCOUNT_RECORD |
@@ -106,9 +108,9 @@ config X86 | |||
106 | select HAVE_ARCH_SOFT_DIRTY | 108 | select HAVE_ARCH_SOFT_DIRTY |
107 | select CLOCKSOURCE_WATCHDOG | 109 | select CLOCKSOURCE_WATCHDOG |
108 | select GENERIC_CLOCKEVENTS | 110 | select GENERIC_CLOCKEVENTS |
109 | select ARCH_CLOCKSOURCE_DATA if X86_64 | 111 | select ARCH_CLOCKSOURCE_DATA |
110 | select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) | 112 | select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) |
111 | select GENERIC_TIME_VSYSCALL if X86_64 | 113 | select GENERIC_TIME_VSYSCALL |
112 | select KTIME_SCALAR if X86_32 | 114 | select KTIME_SCALAR if X86_32 |
113 | select GENERIC_STRNCPY_FROM_USER | 115 | select GENERIC_STRNCPY_FROM_USER |
114 | select GENERIC_STRNLEN_USER | 116 | select GENERIC_STRNLEN_USER |
@@ -125,6 +127,8 @@ config X86 | |||
125 | select RTC_LIB | 127 | select RTC_LIB |
126 | select HAVE_DEBUG_STACKOVERFLOW | 128 | select HAVE_DEBUG_STACKOVERFLOW |
127 | select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 | 129 | select HAVE_IRQ_EXIT_ON_IRQ_STACK if X86_64 |
130 | select HAVE_CC_STACKPROTECTOR | ||
131 | select GENERIC_CPU_AUTOPROBE | ||
128 | select HAVE_ARCH_AUDITSYSCALL | 132 | select HAVE_ARCH_AUDITSYSCALL |
129 | 133 | ||
130 | config INSTRUCTION_DECODER | 134 | config INSTRUCTION_DECODER |
@@ -194,9 +198,6 @@ config ARCH_HAS_CPU_RELAX | |||
194 | config ARCH_HAS_CACHE_LINE_SIZE | 198 | config ARCH_HAS_CACHE_LINE_SIZE |
195 | def_bool y | 199 | def_bool y |
196 | 200 | ||
197 | config ARCH_HAS_CPU_AUTOPROBE | ||
198 | def_bool y | ||
199 | |||
200 | config HAVE_SETUP_PER_CPU_AREA | 201 | config HAVE_SETUP_PER_CPU_AREA |
201 | def_bool y | 202 | def_bool y |
202 | 203 | ||
@@ -279,13 +280,13 @@ config SMP | |||
279 | bool "Symmetric multi-processing support" | 280 | bool "Symmetric multi-processing support" |
280 | ---help--- | 281 | ---help--- |
281 | This enables support for systems with more than one CPU. If you have | 282 | This enables support for systems with more than one CPU. If you have |
282 | a system with only one CPU, like most personal computers, say N. If | 283 | a system with only one CPU, say N. If you have a system with more |
283 | you have a system with more than one CPU, say Y. | 284 | than one CPU, say Y. |
284 | 285 | ||
285 | If you say N here, the kernel will run on single and multiprocessor | 286 | If you say N here, the kernel will run on uni- and multiprocessor |
286 | machines, but will use only one CPU of a multiprocessor machine. If | 287 | machines, but will use only one CPU of a multiprocessor machine. If |
287 | you say Y here, the kernel will run on many, but not all, | 288 | you say Y here, the kernel will run on many, but not all, |
288 | singleprocessor machines. On a singleprocessor machine, the kernel | 289 | uniprocessor machines. On a uniprocessor machine, the kernel |
289 | will run faster if you say N here. | 290 | will run faster if you say N here. |
290 | 291 | ||
291 | Note that if you say Y here and choose architecture "586" or | 292 | Note that if you say Y here and choose architecture "586" or |
@@ -345,12 +346,9 @@ config X86_EXTENDED_PLATFORM | |||
345 | for the following (non-PC) 32 bit x86 platforms: | 346 | for the following (non-PC) 32 bit x86 platforms: |
346 | Goldfish (Android emulator) | 347 | Goldfish (Android emulator) |
347 | AMD Elan | 348 | AMD Elan |
348 | NUMAQ (IBM/Sequent) | ||
349 | RDC R-321x SoC | 349 | RDC R-321x SoC |
350 | SGI 320/540 (Visual Workstation) | 350 | SGI 320/540 (Visual Workstation) |
351 | STA2X11-based (e.g. Northville) | 351 | STA2X11-based (e.g. Northville) |
352 | Summit/EXA (IBM x440) | ||
353 | Unisys ES7000 IA32 series | ||
354 | Moorestown MID devices | 352 | Moorestown MID devices |
355 | 353 | ||
356 | If you have one of these systems, or if you want to build a | 354 | If you have one of these systems, or if you want to build a |
@@ -439,42 +437,27 @@ config X86_INTEL_CE | |||
439 | This option compiles in support for the CE4100 SOC for settop | 437 | This option compiles in support for the CE4100 SOC for settop |
440 | boxes and media devices. | 438 | boxes and media devices. |
441 | 439 | ||
442 | config X86_WANT_INTEL_MID | 440 | config X86_INTEL_MID |
443 | bool "Intel MID platform support" | 441 | bool "Intel MID platform support" |
444 | depends on X86_32 | 442 | depends on X86_32 |
445 | depends on X86_EXTENDED_PLATFORM | 443 | depends on X86_EXTENDED_PLATFORM |
446 | ---help--- | 444 | depends on X86_PLATFORM_DEVICES |
447 | Select to build a kernel capable of supporting Intel MID platform | ||
448 | systems which do not have the PCI legacy interfaces (Moorestown, | ||
449 | Medfield). If you are building for a PC class system say N here. | ||
450 | |||
451 | if X86_WANT_INTEL_MID | ||
452 | |||
453 | config X86_INTEL_MID | ||
454 | bool | ||
455 | |||
456 | config X86_MDFLD | ||
457 | bool "Medfield MID platform" | ||
458 | depends on PCI | 445 | depends on PCI |
459 | depends on PCI_GOANY | 446 | depends on PCI_GOANY |
460 | depends on X86_IO_APIC | 447 | depends on X86_IO_APIC |
461 | select X86_INTEL_MID | ||
462 | select SFI | 448 | select SFI |
449 | select I2C | ||
463 | select DW_APB_TIMER | 450 | select DW_APB_TIMER |
464 | select APB_TIMER | 451 | select APB_TIMER |
465 | select I2C | ||
466 | select SPI | ||
467 | select INTEL_SCU_IPC | 452 | select INTEL_SCU_IPC |
468 | select X86_PLATFORM_DEVICES | ||
469 | select MFD_INTEL_MSIC | 453 | select MFD_INTEL_MSIC |
470 | ---help--- | 454 | ---help--- |
471 | Medfield is Intel's Low Power Intel Architecture (LPIA) based Moblin | 455 | Select to build a kernel capable of supporting Intel MID (Mobile |
472 | Internet Device(MID) platform. | 456 | Internet Device) platform systems which do not have the PCI legacy |
473 | Unlike standard x86 PCs, Medfield does not have many legacy devices | 457 | interfaces. If you are building for a PC class system say N here. |
474 | nor standard legacy replacement devices/features. e.g. Medfield does | ||
475 | not contain i8259, i8254, HPET, legacy BIOS, most of the io ports. | ||
476 | 458 | ||
477 | endif | 459 | Intel MID platforms are based on an Intel processor and chipset which |
460 | consume less power than most of the x86 derivatives. | ||
478 | 461 | ||
479 | config X86_INTEL_LPSS | 462 | config X86_INTEL_LPSS |
480 | bool "Intel Low Power Subsystem Support" | 463 | bool "Intel Low Power Subsystem Support" |
@@ -503,49 +486,22 @@ config X86_32_NON_STANDARD | |||
503 | depends on X86_32 && SMP | 486 | depends on X86_32 && SMP |
504 | depends on X86_EXTENDED_PLATFORM | 487 | depends on X86_EXTENDED_PLATFORM |
505 | ---help--- | 488 | ---help--- |
506 | This option compiles in the NUMAQ, Summit, bigsmp, ES7000, | 489 | This option compiles in the bigsmp and STA2X11 default |
507 | STA2X11, default subarchitectures. It is intended for a generic | 490 | subarchitectures. It is intended for a generic binary |
508 | binary kernel. If you select them all, kernel will probe it | 491 | kernel. If you select them all, kernel will probe it one by |
509 | one by one and will fallback to default. | 492 | one and will fallback to default. |
510 | 493 | ||
511 | # Alphabetically sorted list of Non standard 32 bit platforms | 494 | # Alphabetically sorted list of Non standard 32 bit platforms |
512 | 495 | ||
513 | config X86_NUMAQ | ||
514 | bool "NUMAQ (IBM/Sequent)" | ||
515 | depends on X86_32_NON_STANDARD | ||
516 | depends on PCI | ||
517 | select NUMA | ||
518 | select X86_MPPARSE | ||
519 | ---help--- | ||
520 | This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) | ||
521 | NUMA multiquad box. This changes the way that processors are | ||
522 | bootstrapped, and uses Clustered Logical APIC addressing mode instead | ||
523 | of Flat Logical. You will need a new lynxer.elf file to flash your | ||
524 | firmware with - send email to <Martin.Bligh@us.ibm.com>. | ||
525 | |||
526 | config X86_SUPPORTS_MEMORY_FAILURE | 496 | config X86_SUPPORTS_MEMORY_FAILURE |
527 | def_bool y | 497 | def_bool y |
528 | # MCE code calls memory_failure(): | 498 | # MCE code calls memory_failure(): |
529 | depends on X86_MCE | 499 | depends on X86_MCE |
530 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: | 500 | # On 32-bit this adds too big of NODES_SHIFT and we run out of page flags: |
531 | depends on !X86_NUMAQ | ||
532 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: | 501 | # On 32-bit SPARSEMEM adds too big of SECTIONS_WIDTH: |
533 | depends on X86_64 || !SPARSEMEM | 502 | depends on X86_64 || !SPARSEMEM |
534 | select ARCH_SUPPORTS_MEMORY_FAILURE | 503 | select ARCH_SUPPORTS_MEMORY_FAILURE |
535 | 504 | ||
536 | config X86_VISWS | ||
537 | bool "SGI 320/540 (Visual Workstation)" | ||
538 | depends on X86_32 && PCI && X86_MPPARSE && PCI_GODIRECT | ||
539 | depends on X86_32_NON_STANDARD | ||
540 | ---help--- | ||
541 | The SGI Visual Workstation series is an IA32-based workstation | ||
542 | based on SGI systems chips with some legacy PC hardware attached. | ||
543 | |||
544 | Say Y here to create a kernel to run on the SGI 320 or 540. | ||
545 | |||
546 | A kernel compiled for the Visual Workstation will run on general | ||
547 | PCs as well. See <file:Documentation/sgi-visws.txt> for details. | ||
548 | |||
549 | config STA2X11 | 505 | config STA2X11 |
550 | bool "STA2X11 Companion Chip Support" | 506 | bool "STA2X11 Companion Chip Support" |
551 | depends on X86_32_NON_STANDARD && PCI | 507 | depends on X86_32_NON_STANDARD && PCI |
@@ -562,20 +518,6 @@ config STA2X11 | |||
562 | option is selected the kernel will still be able to boot on | 518 | option is selected the kernel will still be able to boot on |
563 | standard PC machines. | 519 | standard PC machines. |
564 | 520 | ||
565 | config X86_SUMMIT | ||
566 | bool "Summit/EXA (IBM x440)" | ||
567 | depends on X86_32_NON_STANDARD | ||
568 | ---help--- | ||
569 | This option is needed for IBM systems that use the Summit/EXA chipset. | ||
570 | In particular, it is needed for the x440. | ||
571 | |||
572 | config X86_ES7000 | ||
573 | bool "Unisys ES7000 IA32 series" | ||
574 | depends on X86_32_NON_STANDARD && X86_BIGSMP | ||
575 | ---help--- | ||
576 | Support for Unisys ES7000 systems. Say 'Y' here if this kernel is | ||
577 | supposed to run on an IA32-based Unisys ES7000 system. | ||
578 | |||
579 | config X86_32_IRIS | 521 | config X86_32_IRIS |
580 | tristate "Eurobraille/Iris poweroff module" | 522 | tristate "Eurobraille/Iris poweroff module" |
581 | depends on X86_32 | 523 | depends on X86_32 |
@@ -698,14 +640,6 @@ config MEMTEST | |||
698 | memtest=4, mean do 4 test patterns. | 640 | memtest=4, mean do 4 test patterns. |
699 | If you are unsure how to answer this question, answer N. | 641 | If you are unsure how to answer this question, answer N. |
700 | 642 | ||
701 | config X86_SUMMIT_NUMA | ||
702 | def_bool y | ||
703 | depends on X86_32 && NUMA && X86_32_NON_STANDARD | ||
704 | |||
705 | config X86_CYCLONE_TIMER | ||
706 | def_bool y | ||
707 | depends on X86_SUMMIT | ||
708 | |||
709 | source "arch/x86/Kconfig.cpu" | 643 | source "arch/x86/Kconfig.cpu" |
710 | 644 | ||
711 | config HPET_TIMER | 645 | config HPET_TIMER |
@@ -747,6 +681,7 @@ config APB_TIMER | |||
747 | # The code disables itself when not needed. | 681 | # The code disables itself when not needed. |
748 | config DMI | 682 | config DMI |
749 | default y | 683 | default y |
684 | select DMI_SCAN_MACHINE_NON_EFI_FALLBACK | ||
750 | bool "Enable DMI scanning" if EXPERT | 685 | bool "Enable DMI scanning" if EXPERT |
751 | ---help--- | 686 | ---help--- |
752 | Enabled scanning of DMI to identify machine quirks. Say Y | 687 | Enabled scanning of DMI to identify machine quirks. Say Y |
@@ -833,7 +768,7 @@ config NR_CPUS | |||
833 | range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 | 768 | range 2 8192 if SMP && !MAXSMP && CPUMASK_OFFSTACK && X86_64 |
834 | default "1" if !SMP | 769 | default "1" if !SMP |
835 | default "8192" if MAXSMP | 770 | default "8192" if MAXSMP |
836 | default "32" if SMP && (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP || X86_ES7000) | 771 | default "32" if SMP && X86_BIGSMP |
837 | default "8" if SMP | 772 | default "8" if SMP |
838 | ---help--- | 773 | ---help--- |
839 | This allows you to specify the maximum number of CPUs which this | 774 | This allows you to specify the maximum number of CPUs which this |
@@ -897,10 +832,6 @@ config X86_IO_APIC | |||
897 | def_bool y | 832 | def_bool y |
898 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI | 833 | depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI |
899 | 834 | ||
900 | config X86_VISWS_APIC | ||
901 | def_bool y | ||
902 | depends on X86_32 && X86_VISWS | ||
903 | |||
904 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS | 835 | config X86_REROUTE_FOR_BROKEN_BOOT_IRQS |
905 | bool "Reroute for broken boot IRQs" | 836 | bool "Reroute for broken boot IRQs" |
906 | depends on X86_IO_APIC | 837 | depends on X86_IO_APIC |
@@ -954,7 +885,7 @@ config X86_ANCIENT_MCE | |||
954 | depends on X86_32 && X86_MCE | 885 | depends on X86_32 && X86_MCE |
955 | ---help--- | 886 | ---help--- |
956 | Include support for machine check handling on old Pentium 5 or WinChip | 887 | Include support for machine check handling on old Pentium 5 or WinChip |
957 | systems. These typically need to be enabled explicitely on the command | 888 | systems. These typically need to be enabled explicitly on the command |
958 | line. | 889 | line. |
959 | 890 | ||
960 | config X86_MCE_THRESHOLD | 891 | config X86_MCE_THRESHOLD |
@@ -1065,9 +996,9 @@ config MICROCODE_INTEL | |||
1065 | This options enables microcode patch loading support for Intel | 996 | This options enables microcode patch loading support for Intel |
1066 | processors. | 997 | processors. |
1067 | 998 | ||
1068 | For latest news and information on obtaining all the required | 999 | For the current Intel microcode data package go to |
1069 | Intel ingredients for this driver, check: | 1000 | <https://downloadcenter.intel.com> and search for |
1070 | <http://www.urbanmyth.org/microcode/>. | 1001 | 'Linux Processor Microcode Data File'. |
1071 | 1002 | ||
1072 | config MICROCODE_AMD | 1003 | config MICROCODE_AMD |
1073 | bool "AMD microcode loading support" | 1004 | bool "AMD microcode loading support" |
@@ -1081,10 +1012,6 @@ config MICROCODE_OLD_INTERFACE | |||
1081 | def_bool y | 1012 | def_bool y |
1082 | depends on MICROCODE | 1013 | depends on MICROCODE |
1083 | 1014 | ||
1084 | config MICROCODE_INTEL_LIB | ||
1085 | def_bool y | ||
1086 | depends on MICROCODE_INTEL | ||
1087 | |||
1088 | config MICROCODE_INTEL_EARLY | 1015 | config MICROCODE_INTEL_EARLY |
1089 | def_bool n | 1016 | def_bool n |
1090 | 1017 | ||
@@ -1122,13 +1049,11 @@ config X86_CPUID | |||
1122 | 1049 | ||
1123 | choice | 1050 | choice |
1124 | prompt "High Memory Support" | 1051 | prompt "High Memory Support" |
1125 | default HIGHMEM64G if X86_NUMAQ | ||
1126 | default HIGHMEM4G | 1052 | default HIGHMEM4G |
1127 | depends on X86_32 | 1053 | depends on X86_32 |
1128 | 1054 | ||
1129 | config NOHIGHMEM | 1055 | config NOHIGHMEM |
1130 | bool "off" | 1056 | bool "off" |
1131 | depends on !X86_NUMAQ | ||
1132 | ---help--- | 1057 | ---help--- |
1133 | Linux can use up to 64 Gigabytes of physical memory on x86 systems. | 1058 | Linux can use up to 64 Gigabytes of physical memory on x86 systems. |
1134 | However, the address space of 32-bit x86 processors is only 4 | 1059 | However, the address space of 32-bit x86 processors is only 4 |
@@ -1165,7 +1090,6 @@ config NOHIGHMEM | |||
1165 | 1090 | ||
1166 | config HIGHMEM4G | 1091 | config HIGHMEM4G |
1167 | bool "4GB" | 1092 | bool "4GB" |
1168 | depends on !X86_NUMAQ | ||
1169 | ---help--- | 1093 | ---help--- |
1170 | Select this if you have a 32-bit processor and between 1 and 4 | 1094 | Select this if you have a 32-bit processor and between 1 and 4 |
1171 | gigabytes of physical RAM. | 1095 | gigabytes of physical RAM. |
@@ -1257,8 +1181,8 @@ config DIRECT_GBPAGES | |||
1257 | config NUMA | 1181 | config NUMA |
1258 | bool "Numa Memory Allocation and Scheduler Support" | 1182 | bool "Numa Memory Allocation and Scheduler Support" |
1259 | depends on SMP | 1183 | depends on SMP |
1260 | depends on X86_64 || (X86_32 && HIGHMEM64G && (X86_NUMAQ || X86_BIGSMP || X86_SUMMIT && ACPI)) | 1184 | depends on X86_64 || (X86_32 && HIGHMEM64G && X86_BIGSMP) |
1261 | default y if (X86_NUMAQ || X86_SUMMIT || X86_BIGSMP) | 1185 | default y if X86_BIGSMP |
1262 | ---help--- | 1186 | ---help--- |
1263 | Enable NUMA (Non Uniform Memory Access) support. | 1187 | Enable NUMA (Non Uniform Memory Access) support. |
1264 | 1188 | ||
@@ -1269,15 +1193,11 @@ config NUMA | |||
1269 | For 64-bit this is recommended if the system is Intel Core i7 | 1193 | For 64-bit this is recommended if the system is Intel Core i7 |
1270 | (or later), AMD Opteron, or EM64T NUMA. | 1194 | (or later), AMD Opteron, or EM64T NUMA. |
1271 | 1195 | ||
1272 | For 32-bit this is only needed on (rare) 32-bit-only platforms | 1196 | For 32-bit this is only needed if you boot a 32-bit |
1273 | that support NUMA topologies, such as NUMAQ / Summit, or if you | 1197 | kernel on a 64-bit NUMA platform. |
1274 | boot a 32-bit kernel on a 64-bit NUMA platform. | ||
1275 | 1198 | ||
1276 | Otherwise, you should say N. | 1199 | Otherwise, you should say N. |
1277 | 1200 | ||
1278 | comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI" | ||
1279 | depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI) | ||
1280 | |||
1281 | config AMD_NUMA | 1201 | config AMD_NUMA |
1282 | def_bool y | 1202 | def_bool y |
1283 | prompt "Old style AMD Opteron NUMA detection" | 1203 | prompt "Old style AMD Opteron NUMA detection" |
@@ -1319,7 +1239,6 @@ config NODES_SHIFT | |||
1319 | range 1 10 | 1239 | range 1 10 |
1320 | default "10" if MAXSMP | 1240 | default "10" if MAXSMP |
1321 | default "6" if X86_64 | 1241 | default "6" if X86_64 |
1322 | default "4" if X86_NUMAQ | ||
1323 | default "3" | 1242 | default "3" |
1324 | depends on NEED_MULTIPLE_NODES | 1243 | depends on NEED_MULTIPLE_NODES |
1325 | ---help--- | 1244 | ---help--- |
@@ -1602,6 +1521,20 @@ config EFI_STUB | |||
1602 | 1521 | ||
1603 | See Documentation/efi-stub.txt for more information. | 1522 | See Documentation/efi-stub.txt for more information. |
1604 | 1523 | ||
1524 | config EFI_MIXED | ||
1525 | bool "EFI mixed-mode support" | ||
1526 | depends on EFI_STUB && X86_64 | ||
1527 | ---help--- | ||
1528 | Enabling this feature allows a 64-bit kernel to be booted | ||
1529 | on a 32-bit firmware, provided that your CPU supports 64-bit | ||
1530 | mode. | ||
1531 | |||
1532 | Note that it is not possible to boot a mixed-mode enabled | ||
1533 | kernel via the EFI boot stub - a bootloader that supports | ||
1534 | the EFI handover protocol must be used. | ||
1535 | |||
1536 | If unsure, say N. | ||
1537 | |||
1605 | config SECCOMP | 1538 | config SECCOMP |
1606 | def_bool y | 1539 | def_bool y |
1607 | prompt "Enable seccomp to safely compute untrusted bytecode" | 1540 | prompt "Enable seccomp to safely compute untrusted bytecode" |
@@ -1618,22 +1551,6 @@ config SECCOMP | |||
1618 | 1551 | ||
1619 | If unsure, say Y. Only embedded should say N here. | 1552 | If unsure, say Y. Only embedded should say N here. |
1620 | 1553 | ||
1621 | config CC_STACKPROTECTOR | ||
1622 | bool "Enable -fstack-protector buffer overflow detection" | ||
1623 | ---help--- | ||
1624 | This option turns on the -fstack-protector GCC feature. This | ||
1625 | feature puts, at the beginning of functions, a canary value on | ||
1626 | the stack just before the return address, and validates | ||
1627 | the value just before actually returning. Stack based buffer | ||
1628 | overflows (that need to overwrite this return address) now also | ||
1629 | overwrite the canary, which gets detected and the attack is then | ||
1630 | neutralized via a kernel panic. | ||
1631 | |||
1632 | This feature requires gcc version 4.2 or above, or a distribution | ||
1633 | gcc with the feature backported. Older versions are automatically | ||
1634 | detected and for those versions, this configuration option is | ||
1635 | ignored. (and a warning is printed during bootup) | ||
1636 | |||
1637 | source kernel/Kconfig.hz | 1554 | source kernel/Kconfig.hz |
1638 | 1555 | ||
1639 | config KEXEC | 1556 | config KEXEC |
@@ -1729,16 +1646,67 @@ config RELOCATABLE | |||
1729 | 1646 | ||
1730 | Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address | 1647 | Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address |
1731 | it has been loaded at and the compile time physical address | 1648 | it has been loaded at and the compile time physical address |
1732 | (CONFIG_PHYSICAL_START) is ignored. | 1649 | (CONFIG_PHYSICAL_START) is used as the minimum location. |
1733 | 1650 | ||
1734 | # Relocation on x86-32 needs some additional build support | 1651 | config RANDOMIZE_BASE |
1652 | bool "Randomize the address of the kernel image" | ||
1653 | depends on RELOCATABLE | ||
1654 | depends on !HIBERNATION | ||
1655 | default n | ||
1656 | ---help--- | ||
1657 | Randomizes the physical and virtual address at which the | ||
1658 | kernel image is decompressed, as a security feature that | ||
1659 | deters exploit attempts relying on knowledge of the location | ||
1660 | of kernel internals. | ||
1661 | |||
1662 | Entropy is generated using the RDRAND instruction if it is | ||
1663 | supported. If RDTSC is supported, it is used as well. If | ||
1664 | neither RDRAND nor RDTSC are supported, then randomness is | ||
1665 | read from the i8254 timer. | ||
1666 | |||
1667 | The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET, | ||
1668 | and aligned according to PHYSICAL_ALIGN. Since the kernel is | ||
1669 | built using 2GiB addressing, and PHYSICAL_ALGIN must be at a | ||
1670 | minimum of 2MiB, only 10 bits of entropy is theoretically | ||
1671 | possible. At best, due to page table layouts, 64-bit can use | ||
1672 | 9 bits of entropy and 32-bit uses 8 bits. | ||
1673 | |||
1674 | If unsure, say N. | ||
1675 | |||
1676 | config RANDOMIZE_BASE_MAX_OFFSET | ||
1677 | hex "Maximum kASLR offset allowed" if EXPERT | ||
1678 | depends on RANDOMIZE_BASE | ||
1679 | range 0x0 0x20000000 if X86_32 | ||
1680 | default "0x20000000" if X86_32 | ||
1681 | range 0x0 0x40000000 if X86_64 | ||
1682 | default "0x40000000" if X86_64 | ||
1683 | ---help--- | ||
1684 | The lesser of RANDOMIZE_BASE_MAX_OFFSET and available physical | ||
1685 | memory is used to determine the maximal offset in bytes that will | ||
1686 | be applied to the kernel when kernel Address Space Layout | ||
1687 | Randomization (kASLR) is active. This must be a multiple of | ||
1688 | PHYSICAL_ALIGN. | ||
1689 | |||
1690 | On 32-bit this is limited to 512MiB by page table layouts. The | ||
1691 | default is 512MiB. | ||
1692 | |||
1693 | On 64-bit this is limited by how the kernel fixmap page table is | ||
1694 | positioned, so this cannot be larger than 1GiB currently. Without | ||
1695 | RANDOMIZE_BASE, there is a 512MiB to 1.5GiB split between kernel | ||
1696 | and modules. When RANDOMIZE_BASE_MAX_OFFSET is above 512MiB, the | ||
1697 | modules area will shrink to compensate, up to the current maximum | ||
1698 | 1GiB to 1GiB split. The default is 1GiB. | ||
1699 | |||
1700 | If unsure, leave at the default value. | ||
1701 | |||
1702 | # Relocation on x86 needs some additional build support | ||
1735 | config X86_NEED_RELOCS | 1703 | config X86_NEED_RELOCS |
1736 | def_bool y | 1704 | def_bool y |
1737 | depends on X86_32 && RELOCATABLE | 1705 | depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE) |
1738 | 1706 | ||
1739 | config PHYSICAL_ALIGN | 1707 | config PHYSICAL_ALIGN |
1740 | hex "Alignment value to which kernel should be aligned" | 1708 | hex "Alignment value to which kernel should be aligned" |
1741 | default "0x1000000" | 1709 | default "0x200000" |
1742 | range 0x2000 0x1000000 if X86_32 | 1710 | range 0x2000 0x1000000 if X86_32 |
1743 | range 0x200000 0x1000000 if X86_64 | 1711 | range 0x200000 0x1000000 if X86_64 |
1744 | ---help--- | 1712 | ---help--- |
@@ -1818,17 +1786,29 @@ config DEBUG_HOTPLUG_CPU0 | |||
1818 | If unsure, say N. | 1786 | If unsure, say N. |
1819 | 1787 | ||
1820 | config COMPAT_VDSO | 1788 | config COMPAT_VDSO |
1821 | def_bool y | 1789 | def_bool n |
1822 | prompt "Compat VDSO support" | 1790 | prompt "Disable the 32-bit vDSO (needed for glibc 2.3.3)" |
1823 | depends on X86_32 || IA32_EMULATION | 1791 | depends on X86_32 || IA32_EMULATION |
1824 | ---help--- | 1792 | ---help--- |
1825 | Map the 32-bit VDSO to the predictable old-style address too. | 1793 | Certain buggy versions of glibc will crash if they are |
1794 | presented with a 32-bit vDSO that is not mapped at the address | ||
1795 | indicated in its segment table. | ||
1826 | 1796 | ||
1827 | Say N here if you are running a sufficiently recent glibc | 1797 | The bug was introduced by f866314b89d56845f55e6f365e18b31ec978ec3a |
1828 | version (2.3.3 or later), to remove the high-mapped | 1798 | and fixed by 3b3ddb4f7db98ec9e912ccdf54d35df4aa30e04a and |
1829 | VDSO mapping and to exclusively use the randomized VDSO. | 1799 | 49ad572a70b8aeb91e57483a11dd1b77e31c4468. Glibc 2.3.3 is |
1800 | the only released version with the bug, but OpenSUSE 9 | ||
1801 | contains a buggy "glibc 2.3.2". | ||
1830 | 1802 | ||
1831 | If unsure, say Y. | 1803 | The symptom of the bug is that everything crashes on startup, saying: |
1804 | dl_main: Assertion `(void *) ph->p_vaddr == _rtld_local._dl_sysinfo_dso' failed! | ||
1805 | |||
1806 | Saying Y here changes the default value of the vdso32 boot | ||
1807 | option from 1 to 0, which turns off the 32-bit vDSO entirely. | ||
1808 | This works around the glibc bug but hurts performance. | ||
1809 | |||
1810 | If unsure, say N: if you are compiling your own kernel, you | ||
1811 | are unlikely to be using a buggy version of glibc. | ||
1832 | 1812 | ||
1833 | config CMDLINE_BOOL | 1813 | config CMDLINE_BOOL |
1834 | bool "Built-in kernel command line" | 1814 | bool "Built-in kernel command line" |
@@ -2394,6 +2374,14 @@ config X86_DMA_REMAP | |||
2394 | bool | 2374 | bool |
2395 | depends on STA2X11 | 2375 | depends on STA2X11 |
2396 | 2376 | ||
2377 | config IOSF_MBI | ||
2378 | bool | ||
2379 | depends on PCI | ||
2380 | ---help--- | ||
2381 | To be selected by modules requiring access to the Intel OnChip System | ||
2382 | Fabric (IOSF) Sideband MailBox Interface (MBI). For MBI platforms | ||
2383 | enumerable by PCI. | ||
2384 | |||
2397 | source "net/Kconfig" | 2385 | source "net/Kconfig" |
2398 | 2386 | ||
2399 | source "drivers/Kconfig" | 2387 | source "drivers/Kconfig" |
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index c026cca5602c..6983314c8b37 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu | |||
@@ -341,10 +341,6 @@ config X86_USE_3DNOW | |||
341 | def_bool y | 341 | def_bool y |
342 | depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML | 342 | depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML |
343 | 343 | ||
344 | config X86_OOSTORE | ||
345 | def_bool y | ||
346 | depends on (MWINCHIP3D || MWINCHIPC6) && MTRR | ||
347 | |||
348 | # | 344 | # |
349 | # P6_NOPs are a relatively minor optimization that require a family >= | 345 | # P6_NOPs are a relatively minor optimization that require a family >= |
350 | # 6 processor, except that it is broken on certain VIA chips. | 346 | # 6 processor, except that it is broken on certain VIA chips. |
@@ -363,7 +359,7 @@ config X86_P6_NOP | |||
363 | 359 | ||
364 | config X86_TSC | 360 | config X86_TSC |
365 | def_bool y | 361 | def_bool y |
366 | depends on ((MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) && !X86_NUMAQ) || X86_64 | 362 | depends on (MWINCHIP3D || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2 || MVIAC7 || MGEODEGX1 || MGEODE_LX || MCORE2 || MATOM) || X86_64 |
367 | 363 | ||
368 | config X86_CMPXCHG64 | 364 | config X86_CMPXCHG64 |
369 | def_bool y | 365 | def_bool y |
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 0f3621ed1db6..61bd2ad94281 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug | |||
@@ -81,6 +81,15 @@ config X86_PTDUMP | |||
81 | kernel. | 81 | kernel. |
82 | If in doubt, say "N" | 82 | If in doubt, say "N" |
83 | 83 | ||
84 | config EFI_PGT_DUMP | ||
85 | bool "Dump the EFI pagetable" | ||
86 | depends on EFI && X86_PTDUMP | ||
87 | ---help--- | ||
88 | Enable this if you want to dump the EFI page table before | ||
89 | enabling virtual mode. This can be used to debug miscellaneous | ||
90 | issues with the mapping of the EFI runtime regions into that | ||
91 | table. | ||
92 | |||
84 | config DEBUG_RODATA | 93 | config DEBUG_RODATA |
85 | bool "Write protect kernel read-only data structures" | 94 | bool "Write protect kernel read-only data structures" |
86 | default y | 95 | default y |
@@ -184,6 +193,7 @@ config HAVE_MMIOTRACE_SUPPORT | |||
184 | config X86_DECODER_SELFTEST | 193 | config X86_DECODER_SELFTEST |
185 | bool "x86 instruction decoder selftest" | 194 | bool "x86 instruction decoder selftest" |
186 | depends on DEBUG_KERNEL && KPROBES | 195 | depends on DEBUG_KERNEL && KPROBES |
196 | depends on !COMPILE_TEST | ||
187 | ---help--- | 197 | ---help--- |
188 | Perform x86 instruction decoder selftests at build time. | 198 | Perform x86 instruction decoder selftests at build time. |
189 | This option is useful for checking the sanity of x86 instruction | 199 | This option is useful for checking the sanity of x86 instruction |
diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 57d021507120..3b9348a0c1a4 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile | |||
@@ -11,6 +11,28 @@ else | |||
11 | KBUILD_DEFCONFIG := $(ARCH)_defconfig | 11 | KBUILD_DEFCONFIG := $(ARCH)_defconfig |
12 | endif | 12 | endif |
13 | 13 | ||
14 | # How to compile the 16-bit code. Note we always compile for -march=i386; | ||
15 | # that way we can complain to the user if the CPU is insufficient. | ||
16 | # | ||
17 | # The -m16 option is supported by GCC >= 4.9 and clang >= 3.5. For | ||
18 | # older versions of GCC, we need to play evil and unreliable tricks to | ||
19 | # attempt to ensure that our asm(".code16gcc") is first in the asm | ||
20 | # output. | ||
21 | CODE16GCC_CFLAGS := -m32 -include $(srctree)/arch/x86/boot/code16gcc.h \ | ||
22 | $(call cc-option, -fno-toplevel-reorder,\ | ||
23 | $(call cc-option, -fno-unit-at-a-time)) | ||
24 | M16_CFLAGS := $(call cc-option, -m16, $(CODE16GCC_CFLAGS)) | ||
25 | |||
26 | REALMODE_CFLAGS := $(M16_CFLAGS) -g -Os -D__KERNEL__ \ | ||
27 | -DDISABLE_BRANCH_PROFILING \ | ||
28 | -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ | ||
29 | -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ | ||
30 | -mno-mmx -mno-sse \ | ||
31 | $(call cc-option, -ffreestanding) \ | ||
32 | $(call cc-option, -fno-stack-protector) \ | ||
33 | $(call cc-option, -mpreferred-stack-boundary=2) | ||
34 | export REALMODE_CFLAGS | ||
35 | |||
14 | # BITS is used as extension for files which are available in a 32 bit | 36 | # BITS is used as extension for files which are available in a 32 bit |
15 | # and a 64 bit version to simplify shared Makefiles. | 37 | # and a 64 bit version to simplify shared Makefiles. |
16 | # e.g.: obj-y += foo_$(BITS).o | 38 | # e.g.: obj-y += foo_$(BITS).o |
@@ -60,8 +82,8 @@ else | |||
60 | KBUILD_AFLAGS += -m64 | 82 | KBUILD_AFLAGS += -m64 |
61 | KBUILD_CFLAGS += -m64 | 83 | KBUILD_CFLAGS += -m64 |
62 | 84 | ||
63 | # Don't autogenerate MMX or SSE instructions | 85 | # Don't autogenerate traditional x87, MMX or SSE instructions |
64 | KBUILD_CFLAGS += -mno-mmx -mno-sse | 86 | KBUILD_CFLAGS += -mno-mmx -mno-sse -mno-80387 -mno-fp-ret-in-387 |
65 | 87 | ||
66 | # Use -mpreferred-stack-boundary=3 if supported. | 88 | # Use -mpreferred-stack-boundary=3 if supported. |
67 | KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) | 89 | KBUILD_CFLAGS += $(call cc-option,-mpreferred-stack-boundary=3) |
@@ -89,13 +111,11 @@ else | |||
89 | KBUILD_CFLAGS += -maccumulate-outgoing-args | 111 | KBUILD_CFLAGS += -maccumulate-outgoing-args |
90 | endif | 112 | endif |
91 | 113 | ||
114 | # Make sure compiler does not have buggy stack-protector support. | ||
92 | ifdef CONFIG_CC_STACKPROTECTOR | 115 | ifdef CONFIG_CC_STACKPROTECTOR |
93 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh | 116 | cc_has_sp := $(srctree)/scripts/gcc-x86_$(BITS)-has-stack-protector.sh |
94 | ifeq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) | 117 | ifneq ($(shell $(CONFIG_SHELL) $(cc_has_sp) $(CC) $(KBUILD_CPPFLAGS) $(biarch)),y) |
95 | stackp-y := -fstack-protector | 118 | $(warning stack-protector enabled but compiler support broken) |
96 | KBUILD_CFLAGS += $(stackp-y) | ||
97 | else | ||
98 | $(warning stack protector enabled but no compiler support) | ||
99 | endif | 119 | endif |
100 | endif | 120 | endif |
101 | 121 | ||
@@ -132,6 +152,7 @@ cfi-sections := $(call as-instr,.cfi_sections .debug_frame,-DCONFIG_AS_CFI_SECTI | |||
132 | 152 | ||
133 | # does binutils support specific instructions? | 153 | # does binutils support specific instructions? |
134 | asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) | 154 | asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1) |
155 | asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) | ||
135 | avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) | 156 | avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) |
136 | avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) | 157 | avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) |
137 | 158 | ||
diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index d9c11956fce0..abb9eba61b50 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile | |||
@@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage | |||
20 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf | 20 | targets += fdimage fdimage144 fdimage288 image.iso mtools.conf |
21 | subdir- := compressed | 21 | subdir- := compressed |
22 | 22 | ||
23 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o | 23 | setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o |
24 | setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o | 24 | setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o |
25 | setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o | 25 | setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o |
26 | setup-y += video-mode.o version.o | 26 | setup-y += video-mode.o version.o |
@@ -51,20 +51,7 @@ $(obj)/cpustr.h: $(obj)/mkcpustr FORCE | |||
51 | 51 | ||
52 | # --------------------------------------------------------------------------- | 52 | # --------------------------------------------------------------------------- |
53 | 53 | ||
54 | # How to compile the 16-bit code. Note we always compile for -march=i386, | 54 | KBUILD_CFLAGS := $(USERINCLUDE) $(REALMODE_CFLAGS) -D_SETUP |
55 | # that way we can complain to the user if the CPU is insufficient. | ||
56 | KBUILD_CFLAGS := $(USERINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ \ | ||
57 | -DDISABLE_BRANCH_PROFILING \ | ||
58 | -Wall -Wstrict-prototypes \ | ||
59 | -march=i386 -mregparm=3 \ | ||
60 | -include $(srctree)/$(src)/code16gcc.h \ | ||
61 | -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ | ||
62 | -mno-mmx -mno-sse \ | ||
63 | $(call cc-option, -ffreestanding) \ | ||
64 | $(call cc-option, -fno-toplevel-reorder,\ | ||
65 | $(call cc-option, -fno-unit-at-a-time)) \ | ||
66 | $(call cc-option, -fno-stack-protector) \ | ||
67 | $(call cc-option, -mpreferred-stack-boundary=2) | ||
68 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 55 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
69 | GCOV_PROFILE := n | 56 | GCOV_PROFILE := n |
70 | 57 | ||
@@ -93,7 +80,7 @@ targets += voffset.h | |||
93 | $(obj)/voffset.h: vmlinux FORCE | 80 | $(obj)/voffset.h: vmlinux FORCE |
94 | $(call if_changed,voffset) | 81 | $(call if_changed,voffset) |
95 | 82 | ||
96 | sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi_pe_entry\|efi_stub_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' | 83 | sed-zoffset := -e 's/^\([0-9a-fA-F]*\) . \(startup_32\|startup_64\|efi32_stub_entry\|efi64_stub_entry\|efi_pe_entry\|input_data\|_end\|z_.*\)$$/\#define ZO_\2 0x\1/p' |
97 | 84 | ||
98 | quiet_cmd_zoffset = ZOFFSET $@ | 85 | quiet_cmd_zoffset = ZOFFSET $@ |
99 | cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ | 86 | cmd_zoffset = $(NM) $< | sed -n $(sed-zoffset) > $@ |
diff --git a/arch/x86/boot/bioscall.S b/arch/x86/boot/bioscall.S index 1dfbf64e52a2..d401b4a262b0 100644 --- a/arch/x86/boot/bioscall.S +++ b/arch/x86/boot/bioscall.S | |||
@@ -1,6 +1,6 @@ | |||
1 | /* ----------------------------------------------------------------------- | 1 | /* ----------------------------------------------------------------------- |
2 | * | 2 | * |
3 | * Copyright 2009 Intel Corporation; author H. Peter Anvin | 3 | * Copyright 2009-2014 Intel Corporation; author H. Peter Anvin |
4 | * | 4 | * |
5 | * This file is part of the Linux kernel, and is made available under | 5 | * This file is part of the Linux kernel, and is made available under |
6 | * the terms of the GNU General Public License version 2 or (at your | 6 | * the terms of the GNU General Public License version 2 or (at your |
@@ -13,8 +13,8 @@ | |||
13 | * touching registers they shouldn't be. | 13 | * touching registers they shouldn't be. |
14 | */ | 14 | */ |
15 | 15 | ||
16 | .code16gcc | 16 | .code16 |
17 | .text | 17 | .section ".inittext","ax" |
18 | .globl intcall | 18 | .globl intcall |
19 | .type intcall, @function | 19 | .type intcall, @function |
20 | intcall: | 20 | intcall: |
diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index ef72baeff484..bd49ec61255c 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h | |||
@@ -26,9 +26,8 @@ | |||
26 | #include <asm/boot.h> | 26 | #include <asm/boot.h> |
27 | #include <asm/setup.h> | 27 | #include <asm/setup.h> |
28 | #include "bitops.h" | 28 | #include "bitops.h" |
29 | #include <asm/cpufeature.h> | ||
30 | #include <asm/processor-flags.h> | ||
31 | #include "ctype.h" | 29 | #include "ctype.h" |
30 | #include "cpuflags.h" | ||
32 | 31 | ||
33 | /* Useful macros */ | 32 | /* Useful macros */ |
34 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) | 33 | #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) |
@@ -178,14 +177,6 @@ static inline void wrgs32(u32 v, addr_t addr) | |||
178 | } | 177 | } |
179 | 178 | ||
180 | /* Note: these only return true/false, not a signed return value! */ | 179 | /* Note: these only return true/false, not a signed return value! */ |
181 | static inline int memcmp(const void *s1, const void *s2, size_t len) | ||
182 | { | ||
183 | u8 diff; | ||
184 | asm("repe; cmpsb; setnz %0" | ||
185 | : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); | ||
186 | return diff; | ||
187 | } | ||
188 | |||
189 | static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) | 180 | static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) |
190 | { | 181 | { |
191 | u8 diff; | 182 | u8 diff; |
@@ -229,11 +220,6 @@ void copy_to_fs(addr_t dst, void *src, size_t len); | |||
229 | void *copy_from_fs(void *dst, addr_t src, size_t len); | 220 | void *copy_from_fs(void *dst, addr_t src, size_t len); |
230 | void copy_to_gs(addr_t dst, void *src, size_t len); | 221 | void copy_to_gs(addr_t dst, void *src, size_t len); |
231 | void *copy_from_gs(void *dst, addr_t src, size_t len); | 222 | void *copy_from_gs(void *dst, addr_t src, size_t len); |
232 | void *memcpy(void *dst, void *src, size_t len); | ||
233 | void *memset(void *dst, int c, size_t len); | ||
234 | |||
235 | #define memcpy(d,s,l) __builtin_memcpy(d,s,l) | ||
236 | #define memset(d,c,l) __builtin_memset(d,c,l) | ||
237 | 223 | ||
238 | /* a20.c */ | 224 | /* a20.c */ |
239 | int enable_a20(void); | 225 | int enable_a20(void); |
@@ -307,14 +293,7 @@ static inline int cmdline_find_option_bool(const char *option) | |||
307 | return __cmdline_find_option_bool(cmd_line_ptr, option); | 293 | return __cmdline_find_option_bool(cmd_line_ptr, option); |
308 | } | 294 | } |
309 | 295 | ||
310 | |||
311 | /* cpu.c, cpucheck.c */ | 296 | /* cpu.c, cpucheck.c */ |
312 | struct cpu_features { | ||
313 | int level; /* Family, or 64 for x86-64 */ | ||
314 | int model; | ||
315 | u32 flags[NCAPINTS]; | ||
316 | }; | ||
317 | extern struct cpu_features cpu; | ||
318 | int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); | 297 | int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); |
319 | int validate_cpu(void); | 298 | int validate_cpu(void); |
320 | 299 | ||
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index c8a6792e7842..0fcd9133790c 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile | |||
@@ -28,7 +28,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include | |||
28 | 28 | ||
29 | VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ | 29 | VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ |
30 | $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ | 30 | $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ |
31 | $(obj)/piggy.o | 31 | $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o |
32 | 32 | ||
33 | $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone | 33 | $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone |
34 | 34 | ||
diff --git a/arch/x86/boot/compressed/aslr.c b/arch/x86/boot/compressed/aslr.c new file mode 100644 index 000000000000..4dbf967da50d --- /dev/null +++ b/arch/x86/boot/compressed/aslr.c | |||
@@ -0,0 +1,317 @@ | |||
1 | #include "misc.h" | ||
2 | |||
3 | #ifdef CONFIG_RANDOMIZE_BASE | ||
4 | #include <asm/msr.h> | ||
5 | #include <asm/archrandom.h> | ||
6 | #include <asm/e820.h> | ||
7 | |||
8 | #include <generated/compile.h> | ||
9 | #include <linux/module.h> | ||
10 | #include <linux/uts.h> | ||
11 | #include <linux/utsname.h> | ||
12 | #include <generated/utsrelease.h> | ||
13 | |||
14 | /* Simplified build-specific string for starting entropy. */ | ||
15 | static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@" | ||
16 | LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION; | ||
17 | |||
18 | #define I8254_PORT_CONTROL 0x43 | ||
19 | #define I8254_PORT_COUNTER0 0x40 | ||
20 | #define I8254_CMD_READBACK 0xC0 | ||
21 | #define I8254_SELECT_COUNTER0 0x02 | ||
22 | #define I8254_STATUS_NOTREADY 0x40 | ||
23 | static inline u16 i8254(void) | ||
24 | { | ||
25 | u16 status, timer; | ||
26 | |||
27 | do { | ||
28 | outb(I8254_PORT_CONTROL, | ||
29 | I8254_CMD_READBACK | I8254_SELECT_COUNTER0); | ||
30 | status = inb(I8254_PORT_COUNTER0); | ||
31 | timer = inb(I8254_PORT_COUNTER0); | ||
32 | timer |= inb(I8254_PORT_COUNTER0) << 8; | ||
33 | } while (status & I8254_STATUS_NOTREADY); | ||
34 | |||
35 | return timer; | ||
36 | } | ||
37 | |||
38 | static unsigned long rotate_xor(unsigned long hash, const void *area, | ||
39 | size_t size) | ||
40 | { | ||
41 | size_t i; | ||
42 | unsigned long *ptr = (unsigned long *)area; | ||
43 | |||
44 | for (i = 0; i < size / sizeof(hash); i++) { | ||
45 | /* Rotate by odd number of bits and XOR. */ | ||
46 | hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7); | ||
47 | hash ^= ptr[i]; | ||
48 | } | ||
49 | |||
50 | return hash; | ||
51 | } | ||
52 | |||
53 | /* Attempt to create a simple but unpredictable starting entropy. */ | ||
54 | static unsigned long get_random_boot(void) | ||
55 | { | ||
56 | unsigned long hash = 0; | ||
57 | |||
58 | hash = rotate_xor(hash, build_str, sizeof(build_str)); | ||
59 | hash = rotate_xor(hash, real_mode, sizeof(*real_mode)); | ||
60 | |||
61 | return hash; | ||
62 | } | ||
63 | |||
64 | static unsigned long get_random_long(void) | ||
65 | { | ||
66 | #ifdef CONFIG_X86_64 | ||
67 | const unsigned long mix_const = 0x5d6008cbf3848dd3UL; | ||
68 | #else | ||
69 | const unsigned long mix_const = 0x3f39e593UL; | ||
70 | #endif | ||
71 | unsigned long raw, random = get_random_boot(); | ||
72 | bool use_i8254 = true; | ||
73 | |||
74 | debug_putstr("KASLR using"); | ||
75 | |||
76 | if (has_cpuflag(X86_FEATURE_RDRAND)) { | ||
77 | debug_putstr(" RDRAND"); | ||
78 | if (rdrand_long(&raw)) { | ||
79 | random ^= raw; | ||
80 | use_i8254 = false; | ||
81 | } | ||
82 | } | ||
83 | |||
84 | if (has_cpuflag(X86_FEATURE_TSC)) { | ||
85 | debug_putstr(" RDTSC"); | ||
86 | rdtscll(raw); | ||
87 | |||
88 | random ^= raw; | ||
89 | use_i8254 = false; | ||
90 | } | ||
91 | |||
92 | if (use_i8254) { | ||
93 | debug_putstr(" i8254"); | ||
94 | random ^= i8254(); | ||
95 | } | ||
96 | |||
97 | /* Circular multiply for better bit diffusion */ | ||
98 | asm("mul %3" | ||
99 | : "=a" (random), "=d" (raw) | ||
100 | : "a" (random), "rm" (mix_const)); | ||
101 | random += raw; | ||
102 | |||
103 | debug_putstr("...\n"); | ||
104 | |||
105 | return random; | ||
106 | } | ||
107 | |||
108 | struct mem_vector { | ||
109 | unsigned long start; | ||
110 | unsigned long size; | ||
111 | }; | ||
112 | |||
113 | #define MEM_AVOID_MAX 5 | ||
114 | static struct mem_vector mem_avoid[MEM_AVOID_MAX]; | ||
115 | |||
116 | static bool mem_contains(struct mem_vector *region, struct mem_vector *item) | ||
117 | { | ||
118 | /* Item at least partially before region. */ | ||
119 | if (item->start < region->start) | ||
120 | return false; | ||
121 | /* Item at least partially after region. */ | ||
122 | if (item->start + item->size > region->start + region->size) | ||
123 | return false; | ||
124 | return true; | ||
125 | } | ||
126 | |||
127 | static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) | ||
128 | { | ||
129 | /* Item one is entirely before item two. */ | ||
130 | if (one->start + one->size <= two->start) | ||
131 | return false; | ||
132 | /* Item one is entirely after item two. */ | ||
133 | if (one->start >= two->start + two->size) | ||
134 | return false; | ||
135 | return true; | ||
136 | } | ||
137 | |||
138 | static void mem_avoid_init(unsigned long input, unsigned long input_size, | ||
139 | unsigned long output, unsigned long output_size) | ||
140 | { | ||
141 | u64 initrd_start, initrd_size; | ||
142 | u64 cmd_line, cmd_line_size; | ||
143 | unsigned long unsafe, unsafe_len; | ||
144 | char *ptr; | ||
145 | |||
146 | /* | ||
147 | * Avoid the region that is unsafe to overlap during | ||
148 | * decompression (see calculations at top of misc.c). | ||
149 | */ | ||
150 | unsafe_len = (output_size >> 12) + 32768 + 18; | ||
151 | unsafe = (unsigned long)input + input_size - unsafe_len; | ||
152 | mem_avoid[0].start = unsafe; | ||
153 | mem_avoid[0].size = unsafe_len; | ||
154 | |||
155 | /* Avoid initrd. */ | ||
156 | initrd_start = (u64)real_mode->ext_ramdisk_image << 32; | ||
157 | initrd_start |= real_mode->hdr.ramdisk_image; | ||
158 | initrd_size = (u64)real_mode->ext_ramdisk_size << 32; | ||
159 | initrd_size |= real_mode->hdr.ramdisk_size; | ||
160 | mem_avoid[1].start = initrd_start; | ||
161 | mem_avoid[1].size = initrd_size; | ||
162 | |||
163 | /* Avoid kernel command line. */ | ||
164 | cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32; | ||
165 | cmd_line |= real_mode->hdr.cmd_line_ptr; | ||
166 | /* Calculate size of cmd_line. */ | ||
167 | ptr = (char *)(unsigned long)cmd_line; | ||
168 | for (cmd_line_size = 0; ptr[cmd_line_size++]; ) | ||
169 | ; | ||
170 | mem_avoid[2].start = cmd_line; | ||
171 | mem_avoid[2].size = cmd_line_size; | ||
172 | |||
173 | /* Avoid heap memory. */ | ||
174 | mem_avoid[3].start = (unsigned long)free_mem_ptr; | ||
175 | mem_avoid[3].size = BOOT_HEAP_SIZE; | ||
176 | |||
177 | /* Avoid stack memory. */ | ||
178 | mem_avoid[4].start = (unsigned long)free_mem_end_ptr; | ||
179 | mem_avoid[4].size = BOOT_STACK_SIZE; | ||
180 | } | ||
181 | |||
182 | /* Does this memory vector overlap a known avoided area? */ | ||
183 | static bool mem_avoid_overlap(struct mem_vector *img) | ||
184 | { | ||
185 | int i; | ||
186 | |||
187 | for (i = 0; i < MEM_AVOID_MAX; i++) { | ||
188 | if (mem_overlaps(img, &mem_avoid[i])) | ||
189 | return true; | ||
190 | } | ||
191 | |||
192 | return false; | ||
193 | } | ||
194 | |||
195 | static unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / | ||
196 | CONFIG_PHYSICAL_ALIGN]; | ||
197 | static unsigned long slot_max; | ||
198 | |||
199 | static void slots_append(unsigned long addr) | ||
200 | { | ||
201 | /* Overflowing the slots list should be impossible. */ | ||
202 | if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET / | ||
203 | CONFIG_PHYSICAL_ALIGN) | ||
204 | return; | ||
205 | |||
206 | slots[slot_max++] = addr; | ||
207 | } | ||
208 | |||
209 | static unsigned long slots_fetch_random(void) | ||
210 | { | ||
211 | /* Handle case of no slots stored. */ | ||
212 | if (slot_max == 0) | ||
213 | return 0; | ||
214 | |||
215 | return slots[get_random_long() % slot_max]; | ||
216 | } | ||
217 | |||
218 | static void process_e820_entry(struct e820entry *entry, | ||
219 | unsigned long minimum, | ||
220 | unsigned long image_size) | ||
221 | { | ||
222 | struct mem_vector region, img; | ||
223 | |||
224 | /* Skip non-RAM entries. */ | ||
225 | if (entry->type != E820_RAM) | ||
226 | return; | ||
227 | |||
228 | /* Ignore entries entirely above our maximum. */ | ||
229 | if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET) | ||
230 | return; | ||
231 | |||
232 | /* Ignore entries entirely below our minimum. */ | ||
233 | if (entry->addr + entry->size < minimum) | ||
234 | return; | ||
235 | |||
236 | region.start = entry->addr; | ||
237 | region.size = entry->size; | ||
238 | |||
239 | /* Potentially raise address to minimum location. */ | ||
240 | if (region.start < minimum) | ||
241 | region.start = minimum; | ||
242 | |||
243 | /* Potentially raise address to meet alignment requirements. */ | ||
244 | region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN); | ||
245 | |||
246 | /* Did we raise the address above the bounds of this e820 region? */ | ||
247 | if (region.start > entry->addr + entry->size) | ||
248 | return; | ||
249 | |||
250 | /* Reduce size by any delta from the original address. */ | ||
251 | region.size -= region.start - entry->addr; | ||
252 | |||
253 | /* Reduce maximum size to fit end of image within maximum limit. */ | ||
254 | if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET) | ||
255 | region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start; | ||
256 | |||
257 | /* Walk each aligned slot and check for avoided areas. */ | ||
258 | for (img.start = region.start, img.size = image_size ; | ||
259 | mem_contains(®ion, &img) ; | ||
260 | img.start += CONFIG_PHYSICAL_ALIGN) { | ||
261 | if (mem_avoid_overlap(&img)) | ||
262 | continue; | ||
263 | slots_append(img.start); | ||
264 | } | ||
265 | } | ||
266 | |||
267 | static unsigned long find_random_addr(unsigned long minimum, | ||
268 | unsigned long size) | ||
269 | { | ||
270 | int i; | ||
271 | unsigned long addr; | ||
272 | |||
273 | /* Make sure minimum is aligned. */ | ||
274 | minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); | ||
275 | |||
276 | /* Verify potential e820 positions, appending to slots list. */ | ||
277 | for (i = 0; i < real_mode->e820_entries; i++) { | ||
278 | process_e820_entry(&real_mode->e820_map[i], minimum, size); | ||
279 | } | ||
280 | |||
281 | return slots_fetch_random(); | ||
282 | } | ||
283 | |||
284 | unsigned char *choose_kernel_location(unsigned char *input, | ||
285 | unsigned long input_size, | ||
286 | unsigned char *output, | ||
287 | unsigned long output_size) | ||
288 | { | ||
289 | unsigned long choice = (unsigned long)output; | ||
290 | unsigned long random; | ||
291 | |||
292 | if (cmdline_find_option_bool("nokaslr")) { | ||
293 | debug_putstr("KASLR disabled...\n"); | ||
294 | goto out; | ||
295 | } | ||
296 | |||
297 | /* Record the various known unsafe memory ranges. */ | ||
298 | mem_avoid_init((unsigned long)input, input_size, | ||
299 | (unsigned long)output, output_size); | ||
300 | |||
301 | /* Walk e820 and find a random address. */ | ||
302 | random = find_random_addr(choice, output_size); | ||
303 | if (!random) { | ||
304 | debug_putstr("KASLR could not find suitable E820 region...\n"); | ||
305 | goto out; | ||
306 | } | ||
307 | |||
308 | /* Always enforce the minimum. */ | ||
309 | if (random < choice) | ||
310 | goto out; | ||
311 | |||
312 | choice = random; | ||
313 | out: | ||
314 | return (unsigned char *)choice; | ||
315 | } | ||
316 | |||
317 | #endif /* CONFIG_RANDOMIZE_BASE */ | ||
diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index bffd73b45b1f..b68e3033e6b9 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c | |||
@@ -1,6 +1,6 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | 2 | ||
3 | #ifdef CONFIG_EARLY_PRINTK | 3 | #if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE |
4 | 4 | ||
5 | static unsigned long fs; | 5 | static unsigned long fs; |
6 | static inline void set_fs(unsigned long seg) | 6 | static inline void set_fs(unsigned long seg) |
diff --git a/arch/x86/boot/compressed/cpuflags.c b/arch/x86/boot/compressed/cpuflags.c new file mode 100644 index 000000000000..aa313466118b --- /dev/null +++ b/arch/x86/boot/compressed/cpuflags.c | |||
@@ -0,0 +1,12 @@ | |||
1 | #ifdef CONFIG_RANDOMIZE_BASE | ||
2 | |||
3 | #include "../cpuflags.c" | ||
4 | |||
5 | bool has_cpuflag(int flag) | ||
6 | { | ||
7 | get_cpuflags(); | ||
8 | |||
9 | return test_bit(flag, cpu.flags); | ||
10 | } | ||
11 | |||
12 | #endif | ||
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index a7677babf946..4703a6c4b8e3 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c | |||
@@ -19,10 +19,272 @@ | |||
19 | 19 | ||
20 | static efi_system_table_t *sys_table; | 20 | static efi_system_table_t *sys_table; |
21 | 21 | ||
22 | static struct efi_config *efi_early; | ||
23 | |||
24 | #define efi_call_early(f, ...) \ | ||
25 | efi_early->call(efi_early->f, __VA_ARGS__); | ||
26 | |||
27 | #define BOOT_SERVICES(bits) \ | ||
28 | static void setup_boot_services##bits(struct efi_config *c) \ | ||
29 | { \ | ||
30 | efi_system_table_##bits##_t *table; \ | ||
31 | efi_boot_services_##bits##_t *bt; \ | ||
32 | \ | ||
33 | table = (typeof(table))sys_table; \ | ||
34 | \ | ||
35 | c->text_output = table->con_out; \ | ||
36 | \ | ||
37 | bt = (typeof(bt))(unsigned long)(table->boottime); \ | ||
38 | \ | ||
39 | c->allocate_pool = bt->allocate_pool; \ | ||
40 | c->allocate_pages = bt->allocate_pages; \ | ||
41 | c->get_memory_map = bt->get_memory_map; \ | ||
42 | c->free_pool = bt->free_pool; \ | ||
43 | c->free_pages = bt->free_pages; \ | ||
44 | c->locate_handle = bt->locate_handle; \ | ||
45 | c->handle_protocol = bt->handle_protocol; \ | ||
46 | c->exit_boot_services = bt->exit_boot_services; \ | ||
47 | } | ||
48 | BOOT_SERVICES(32); | ||
49 | BOOT_SERVICES(64); | ||
22 | 50 | ||
23 | #include "../../../../drivers/firmware/efi/efi-stub-helper.c" | 51 | static void efi_printk(efi_system_table_t *, char *); |
52 | static void efi_char16_printk(efi_system_table_t *, efi_char16_t *); | ||
53 | |||
54 | static efi_status_t | ||
55 | __file_size32(void *__fh, efi_char16_t *filename_16, | ||
56 | void **handle, u64 *file_sz) | ||
57 | { | ||
58 | efi_file_handle_32_t *h, *fh = __fh; | ||
59 | efi_file_info_t *info; | ||
60 | efi_status_t status; | ||
61 | efi_guid_t info_guid = EFI_FILE_INFO_ID; | ||
62 | u32 info_sz; | ||
63 | |||
64 | status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, | ||
65 | EFI_FILE_MODE_READ, (u64)0); | ||
66 | if (status != EFI_SUCCESS) { | ||
67 | efi_printk(sys_table, "Failed to open file: "); | ||
68 | efi_char16_printk(sys_table, filename_16); | ||
69 | efi_printk(sys_table, "\n"); | ||
70 | return status; | ||
71 | } | ||
72 | |||
73 | *handle = h; | ||
74 | |||
75 | info_sz = 0; | ||
76 | status = efi_early->call((unsigned long)h->get_info, h, &info_guid, | ||
77 | &info_sz, NULL); | ||
78 | if (status != EFI_BUFFER_TOO_SMALL) { | ||
79 | efi_printk(sys_table, "Failed to get file info size\n"); | ||
80 | return status; | ||
81 | } | ||
82 | |||
83 | grow: | ||
84 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, | ||
85 | info_sz, (void **)&info); | ||
86 | if (status != EFI_SUCCESS) { | ||
87 | efi_printk(sys_table, "Failed to alloc mem for file info\n"); | ||
88 | return status; | ||
89 | } | ||
90 | |||
91 | status = efi_early->call((unsigned long)h->get_info, h, &info_guid, | ||
92 | &info_sz, info); | ||
93 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
94 | efi_call_early(free_pool, info); | ||
95 | goto grow; | ||
96 | } | ||
97 | |||
98 | *file_sz = info->file_size; | ||
99 | efi_call_early(free_pool, info); | ||
100 | |||
101 | if (status != EFI_SUCCESS) | ||
102 | efi_printk(sys_table, "Failed to get initrd info\n"); | ||
103 | |||
104 | return status; | ||
105 | } | ||
106 | |||
107 | static efi_status_t | ||
108 | __file_size64(void *__fh, efi_char16_t *filename_16, | ||
109 | void **handle, u64 *file_sz) | ||
110 | { | ||
111 | efi_file_handle_64_t *h, *fh = __fh; | ||
112 | efi_file_info_t *info; | ||
113 | efi_status_t status; | ||
114 | efi_guid_t info_guid = EFI_FILE_INFO_ID; | ||
115 | u64 info_sz; | ||
24 | 116 | ||
117 | status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, | ||
118 | EFI_FILE_MODE_READ, (u64)0); | ||
119 | if (status != EFI_SUCCESS) { | ||
120 | efi_printk(sys_table, "Failed to open file: "); | ||
121 | efi_char16_printk(sys_table, filename_16); | ||
122 | efi_printk(sys_table, "\n"); | ||
123 | return status; | ||
124 | } | ||
25 | 125 | ||
126 | *handle = h; | ||
127 | |||
128 | info_sz = 0; | ||
129 | status = efi_early->call((unsigned long)h->get_info, h, &info_guid, | ||
130 | &info_sz, NULL); | ||
131 | if (status != EFI_BUFFER_TOO_SMALL) { | ||
132 | efi_printk(sys_table, "Failed to get file info size\n"); | ||
133 | return status; | ||
134 | } | ||
135 | |||
136 | grow: | ||
137 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, | ||
138 | info_sz, (void **)&info); | ||
139 | if (status != EFI_SUCCESS) { | ||
140 | efi_printk(sys_table, "Failed to alloc mem for file info\n"); | ||
141 | return status; | ||
142 | } | ||
143 | |||
144 | status = efi_early->call((unsigned long)h->get_info, h, &info_guid, | ||
145 | &info_sz, info); | ||
146 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
147 | efi_call_early(free_pool, info); | ||
148 | goto grow; | ||
149 | } | ||
150 | |||
151 | *file_sz = info->file_size; | ||
152 | efi_call_early(free_pool, info); | ||
153 | |||
154 | if (status != EFI_SUCCESS) | ||
155 | efi_printk(sys_table, "Failed to get initrd info\n"); | ||
156 | |||
157 | return status; | ||
158 | } | ||
159 | static efi_status_t | ||
160 | efi_file_size(efi_system_table_t *sys_table, void *__fh, | ||
161 | efi_char16_t *filename_16, void **handle, u64 *file_sz) | ||
162 | { | ||
163 | if (efi_early->is64) | ||
164 | return __file_size64(__fh, filename_16, handle, file_sz); | ||
165 | |||
166 | return __file_size32(__fh, filename_16, handle, file_sz); | ||
167 | } | ||
168 | |||
169 | static inline efi_status_t | ||
170 | efi_file_read(void *handle, unsigned long *size, void *addr) | ||
171 | { | ||
172 | unsigned long func; | ||
173 | |||
174 | if (efi_early->is64) { | ||
175 | efi_file_handle_64_t *fh = handle; | ||
176 | |||
177 | func = (unsigned long)fh->read; | ||
178 | return efi_early->call(func, handle, size, addr); | ||
179 | } else { | ||
180 | efi_file_handle_32_t *fh = handle; | ||
181 | |||
182 | func = (unsigned long)fh->read; | ||
183 | return efi_early->call(func, handle, size, addr); | ||
184 | } | ||
185 | } | ||
186 | |||
187 | static inline efi_status_t efi_file_close(void *handle) | ||
188 | { | ||
189 | if (efi_early->is64) { | ||
190 | efi_file_handle_64_t *fh = handle; | ||
191 | |||
192 | return efi_early->call((unsigned long)fh->close, handle); | ||
193 | } else { | ||
194 | efi_file_handle_32_t *fh = handle; | ||
195 | |||
196 | return efi_early->call((unsigned long)fh->close, handle); | ||
197 | } | ||
198 | } | ||
199 | |||
200 | static inline efi_status_t __open_volume32(void *__image, void **__fh) | ||
201 | { | ||
202 | efi_file_io_interface_t *io; | ||
203 | efi_loaded_image_32_t *image = __image; | ||
204 | efi_file_handle_32_t *fh; | ||
205 | efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; | ||
206 | efi_status_t status; | ||
207 | void *handle = (void *)(unsigned long)image->device_handle; | ||
208 | unsigned long func; | ||
209 | |||
210 | status = efi_call_early(handle_protocol, handle, | ||
211 | &fs_proto, (void **)&io); | ||
212 | if (status != EFI_SUCCESS) { | ||
213 | efi_printk(sys_table, "Failed to handle fs_proto\n"); | ||
214 | return status; | ||
215 | } | ||
216 | |||
217 | func = (unsigned long)io->open_volume; | ||
218 | status = efi_early->call(func, io, &fh); | ||
219 | if (status != EFI_SUCCESS) | ||
220 | efi_printk(sys_table, "Failed to open volume\n"); | ||
221 | |||
222 | *__fh = fh; | ||
223 | return status; | ||
224 | } | ||
225 | |||
226 | static inline efi_status_t __open_volume64(void *__image, void **__fh) | ||
227 | { | ||
228 | efi_file_io_interface_t *io; | ||
229 | efi_loaded_image_64_t *image = __image; | ||
230 | efi_file_handle_64_t *fh; | ||
231 | efi_guid_t fs_proto = EFI_FILE_SYSTEM_GUID; | ||
232 | efi_status_t status; | ||
233 | void *handle = (void *)(unsigned long)image->device_handle; | ||
234 | unsigned long func; | ||
235 | |||
236 | status = efi_call_early(handle_protocol, handle, | ||
237 | &fs_proto, (void **)&io); | ||
238 | if (status != EFI_SUCCESS) { | ||
239 | efi_printk(sys_table, "Failed to handle fs_proto\n"); | ||
240 | return status; | ||
241 | } | ||
242 | |||
243 | func = (unsigned long)io->open_volume; | ||
244 | status = efi_early->call(func, io, &fh); | ||
245 | if (status != EFI_SUCCESS) | ||
246 | efi_printk(sys_table, "Failed to open volume\n"); | ||
247 | |||
248 | *__fh = fh; | ||
249 | return status; | ||
250 | } | ||
251 | |||
252 | static inline efi_status_t | ||
253 | efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) | ||
254 | { | ||
255 | if (efi_early->is64) | ||
256 | return __open_volume64(__image, __fh); | ||
257 | |||
258 | return __open_volume32(__image, __fh); | ||
259 | } | ||
260 | |||
261 | static void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) | ||
262 | { | ||
263 | unsigned long output_string; | ||
264 | size_t offset; | ||
265 | |||
266 | if (efi_early->is64) { | ||
267 | struct efi_simple_text_output_protocol_64 *out; | ||
268 | u64 *func; | ||
269 | |||
270 | offset = offsetof(typeof(*out), output_string); | ||
271 | output_string = efi_early->text_output + offset; | ||
272 | func = (u64 *)output_string; | ||
273 | |||
274 | efi_early->call(*func, efi_early->text_output, str); | ||
275 | } else { | ||
276 | struct efi_simple_text_output_protocol_32 *out; | ||
277 | u32 *func; | ||
278 | |||
279 | offset = offsetof(typeof(*out), output_string); | ||
280 | output_string = efi_early->text_output + offset; | ||
281 | func = (u32 *)output_string; | ||
282 | |||
283 | efi_early->call(*func, efi_early->text_output, str); | ||
284 | } | ||
285 | } | ||
286 | |||
287 | #include "../../../../drivers/firmware/efi/efi-stub-helper.c" | ||
26 | 288 | ||
27 | static void find_bits(unsigned long mask, u8 *pos, u8 *size) | 289 | static void find_bits(unsigned long mask, u8 *pos, u8 *size) |
28 | { | 290 | { |
@@ -47,105 +309,97 @@ static void find_bits(unsigned long mask, u8 *pos, u8 *size) | |||
47 | *size = len; | 309 | *size = len; |
48 | } | 310 | } |
49 | 311 | ||
50 | static efi_status_t setup_efi_pci(struct boot_params *params) | 312 | static efi_status_t |
313 | __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) | ||
51 | { | 314 | { |
52 | efi_pci_io_protocol *pci; | 315 | struct pci_setup_rom *rom = NULL; |
53 | efi_status_t status; | 316 | efi_status_t status; |
54 | void **pci_handle; | 317 | unsigned long size; |
55 | efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; | 318 | uint64_t attributes; |
56 | unsigned long nr_pci, size = 0; | ||
57 | int i; | ||
58 | struct setup_data *data; | ||
59 | |||
60 | data = (struct setup_data *)(unsigned long)params->hdr.setup_data; | ||
61 | 319 | ||
62 | while (data && data->next) | 320 | status = efi_early->call(pci->attributes, pci, |
63 | data = (struct setup_data *)(unsigned long)data->next; | 321 | EfiPciIoAttributeOperationGet, 0, 0, |
322 | &attributes); | ||
323 | if (status != EFI_SUCCESS) | ||
324 | return status; | ||
64 | 325 | ||
65 | status = efi_call_phys5(sys_table->boottime->locate_handle, | 326 | if (!pci->romimage || !pci->romsize) |
66 | EFI_LOCATE_BY_PROTOCOL, &pci_proto, | 327 | return EFI_INVALID_PARAMETER; |
67 | NULL, &size, pci_handle); | ||
68 | 328 | ||
69 | if (status == EFI_BUFFER_TOO_SMALL) { | 329 | size = pci->romsize + sizeof(*rom); |
70 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | ||
71 | EFI_LOADER_DATA, size, &pci_handle); | ||
72 | 330 | ||
73 | if (status != EFI_SUCCESS) | 331 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); |
74 | return status; | 332 | if (status != EFI_SUCCESS) |
333 | return status; | ||
75 | 334 | ||
76 | status = efi_call_phys5(sys_table->boottime->locate_handle, | 335 | memset(rom, 0, sizeof(*rom)); |
77 | EFI_LOCATE_BY_PROTOCOL, &pci_proto, | ||
78 | NULL, &size, pci_handle); | ||
79 | } | ||
80 | 336 | ||
81 | if (status != EFI_SUCCESS) | 337 | rom->data.type = SETUP_PCI; |
82 | goto free_handle; | 338 | rom->data.len = size - sizeof(struct setup_data); |
339 | rom->data.next = 0; | ||
340 | rom->pcilen = pci->romsize; | ||
341 | *__rom = rom; | ||
83 | 342 | ||
84 | nr_pci = size / sizeof(void *); | 343 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
85 | for (i = 0; i < nr_pci; i++) { | 344 | PCI_VENDOR_ID, 1, &(rom->vendor)); |
86 | void *h = pci_handle[i]; | ||
87 | uint64_t attributes; | ||
88 | struct pci_setup_rom *rom; | ||
89 | 345 | ||
90 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | 346 | if (status != EFI_SUCCESS) |
91 | h, &pci_proto, &pci); | 347 | goto free_struct; |
92 | 348 | ||
93 | if (status != EFI_SUCCESS) | 349 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
94 | continue; | 350 | PCI_DEVICE_ID, 1, &(rom->devid)); |
95 | 351 | ||
96 | if (!pci) | 352 | if (status != EFI_SUCCESS) |
97 | continue; | 353 | goto free_struct; |
98 | 354 | ||
99 | #ifdef CONFIG_X86_64 | 355 | status = efi_early->call(pci->get_location, pci, &(rom->segment), |
100 | status = efi_call_phys4(pci->attributes, pci, | 356 | &(rom->bus), &(rom->device), &(rom->function)); |
101 | EfiPciIoAttributeOperationGet, 0, | ||
102 | &attributes); | ||
103 | #else | ||
104 | status = efi_call_phys5(pci->attributes, pci, | ||
105 | EfiPciIoAttributeOperationGet, 0, 0, | ||
106 | &attributes); | ||
107 | #endif | ||
108 | if (status != EFI_SUCCESS) | ||
109 | continue; | ||
110 | 357 | ||
111 | if (!pci->romimage || !pci->romsize) | 358 | if (status != EFI_SUCCESS) |
112 | continue; | 359 | goto free_struct; |
113 | 360 | ||
114 | size = pci->romsize + sizeof(*rom); | 361 | memcpy(rom->romdata, pci->romimage, pci->romsize); |
362 | return status; | ||
115 | 363 | ||
116 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 364 | free_struct: |
117 | EFI_LOADER_DATA, size, &rom); | 365 | efi_call_early(free_pool, rom); |
366 | return status; | ||
367 | } | ||
118 | 368 | ||
119 | if (status != EFI_SUCCESS) | 369 | static efi_status_t |
120 | continue; | 370 | setup_efi_pci32(struct boot_params *params, void **pci_handle, |
371 | unsigned long size) | ||
372 | { | ||
373 | efi_pci_io_protocol_32 *pci = NULL; | ||
374 | efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; | ||
375 | u32 *handles = (u32 *)(unsigned long)pci_handle; | ||
376 | efi_status_t status; | ||
377 | unsigned long nr_pci; | ||
378 | struct setup_data *data; | ||
379 | int i; | ||
121 | 380 | ||
122 | rom->data.type = SETUP_PCI; | 381 | data = (struct setup_data *)(unsigned long)params->hdr.setup_data; |
123 | rom->data.len = size - sizeof(struct setup_data); | ||
124 | rom->data.next = 0; | ||
125 | rom->pcilen = pci->romsize; | ||
126 | 382 | ||
127 | status = efi_call_phys5(pci->pci.read, pci, | 383 | while (data && data->next) |
128 | EfiPciIoWidthUint16, PCI_VENDOR_ID, | 384 | data = (struct setup_data *)(unsigned long)data->next; |
129 | 1, &(rom->vendor)); | ||
130 | 385 | ||
131 | if (status != EFI_SUCCESS) | 386 | nr_pci = size / sizeof(u32); |
132 | goto free_struct; | 387 | for (i = 0; i < nr_pci; i++) { |
388 | struct pci_setup_rom *rom = NULL; | ||
389 | u32 h = handles[i]; | ||
133 | 390 | ||
134 | status = efi_call_phys5(pci->pci.read, pci, | 391 | status = efi_call_early(handle_protocol, h, |
135 | EfiPciIoWidthUint16, PCI_DEVICE_ID, | 392 | &pci_proto, (void **)&pci); |
136 | 1, &(rom->devid)); | ||
137 | 393 | ||
138 | if (status != EFI_SUCCESS) | 394 | if (status != EFI_SUCCESS) |
139 | goto free_struct; | 395 | continue; |
140 | 396 | ||
141 | status = efi_call_phys5(pci->get_location, pci, | 397 | if (!pci) |
142 | &(rom->segment), &(rom->bus), | 398 | continue; |
143 | &(rom->device), &(rom->function)); | ||
144 | 399 | ||
400 | status = __setup_efi_pci32(pci, &rom); | ||
145 | if (status != EFI_SUCCESS) | 401 | if (status != EFI_SUCCESS) |
146 | goto free_struct; | 402 | continue; |
147 | |||
148 | memcpy(rom->romdata, pci->romimage, pci->romsize); | ||
149 | 403 | ||
150 | if (data) | 404 | if (data) |
151 | data->next = (unsigned long)rom; | 405 | data->next = (unsigned long)rom; |
@@ -154,105 +408,155 @@ static efi_status_t setup_efi_pci(struct boot_params *params) | |||
154 | 408 | ||
155 | data = (struct setup_data *)rom; | 409 | data = (struct setup_data *)rom; |
156 | 410 | ||
157 | continue; | ||
158 | free_struct: | ||
159 | efi_call_phys1(sys_table->boottime->free_pool, rom); | ||
160 | } | 411 | } |
161 | 412 | ||
162 | free_handle: | ||
163 | efi_call_phys1(sys_table->boottime->free_pool, pci_handle); | ||
164 | return status; | 413 | return status; |
165 | } | 414 | } |
166 | 415 | ||
167 | /* | 416 | static efi_status_t |
168 | * See if we have Graphics Output Protocol | 417 | __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) |
169 | */ | ||
170 | static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, | ||
171 | unsigned long size) | ||
172 | { | 418 | { |
173 | struct efi_graphics_output_protocol *gop, *first_gop; | 419 | struct pci_setup_rom *rom; |
174 | struct efi_pixel_bitmask pixel_info; | ||
175 | unsigned long nr_gops; | ||
176 | efi_status_t status; | 420 | efi_status_t status; |
177 | void **gop_handle; | 421 | unsigned long size; |
178 | u16 width, height; | 422 | uint64_t attributes; |
179 | u32 fb_base, fb_size; | ||
180 | u32 pixels_per_scan_line; | ||
181 | int pixel_format; | ||
182 | int i; | ||
183 | 423 | ||
184 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 424 | status = efi_early->call(pci->attributes, pci, |
185 | EFI_LOADER_DATA, size, &gop_handle); | 425 | EfiPciIoAttributeOperationGet, 0, |
426 | &attributes); | ||
186 | if (status != EFI_SUCCESS) | 427 | if (status != EFI_SUCCESS) |
187 | return status; | 428 | return status; |
188 | 429 | ||
189 | status = efi_call_phys5(sys_table->boottime->locate_handle, | 430 | if (!pci->romimage || !pci->romsize) |
190 | EFI_LOCATE_BY_PROTOCOL, proto, | 431 | return EFI_INVALID_PARAMETER; |
191 | NULL, &size, gop_handle); | 432 | |
433 | size = pci->romsize + sizeof(*rom); | ||
434 | |||
435 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); | ||
192 | if (status != EFI_SUCCESS) | 436 | if (status != EFI_SUCCESS) |
193 | goto free_handle; | 437 | return status; |
194 | 438 | ||
195 | first_gop = NULL; | 439 | rom->data.type = SETUP_PCI; |
440 | rom->data.len = size - sizeof(struct setup_data); | ||
441 | rom->data.next = 0; | ||
442 | rom->pcilen = pci->romsize; | ||
443 | *__rom = rom; | ||
196 | 444 | ||
197 | nr_gops = size / sizeof(void *); | 445 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
198 | for (i = 0; i < nr_gops; i++) { | 446 | PCI_VENDOR_ID, 1, &(rom->vendor)); |
199 | struct efi_graphics_output_mode_info *info; | 447 | |
200 | efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; | 448 | if (status != EFI_SUCCESS) |
201 | bool conout_found = false; | 449 | goto free_struct; |
202 | void *dummy; | 450 | |
203 | void *h = gop_handle[i]; | 451 | status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, |
452 | PCI_DEVICE_ID, 1, &(rom->devid)); | ||
453 | |||
454 | if (status != EFI_SUCCESS) | ||
455 | goto free_struct; | ||
456 | |||
457 | status = efi_early->call(pci->get_location, pci, &(rom->segment), | ||
458 | &(rom->bus), &(rom->device), &(rom->function)); | ||
459 | |||
460 | if (status != EFI_SUCCESS) | ||
461 | goto free_struct; | ||
462 | |||
463 | memcpy(rom->romdata, pci->romimage, pci->romsize); | ||
464 | return status; | ||
465 | |||
466 | free_struct: | ||
467 | efi_call_early(free_pool, rom); | ||
468 | return status; | ||
469 | |||
470 | } | ||
471 | |||
472 | static efi_status_t | ||
473 | setup_efi_pci64(struct boot_params *params, void **pci_handle, | ||
474 | unsigned long size) | ||
475 | { | ||
476 | efi_pci_io_protocol_64 *pci = NULL; | ||
477 | efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; | ||
478 | u64 *handles = (u64 *)(unsigned long)pci_handle; | ||
479 | efi_status_t status; | ||
480 | unsigned long nr_pci; | ||
481 | struct setup_data *data; | ||
482 | int i; | ||
483 | |||
484 | data = (struct setup_data *)(unsigned long)params->hdr.setup_data; | ||
485 | |||
486 | while (data && data->next) | ||
487 | data = (struct setup_data *)(unsigned long)data->next; | ||
488 | |||
489 | nr_pci = size / sizeof(u64); | ||
490 | for (i = 0; i < nr_pci; i++) { | ||
491 | struct pci_setup_rom *rom = NULL; | ||
492 | u64 h = handles[i]; | ||
493 | |||
494 | status = efi_call_early(handle_protocol, h, | ||
495 | &pci_proto, (void **)&pci); | ||
204 | 496 | ||
205 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | ||
206 | h, proto, &gop); | ||
207 | if (status != EFI_SUCCESS) | 497 | if (status != EFI_SUCCESS) |
208 | continue; | 498 | continue; |
209 | 499 | ||
210 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | 500 | if (!pci) |
211 | h, &conout_proto, &dummy); | 501 | continue; |
212 | 502 | ||
213 | if (status == EFI_SUCCESS) | 503 | status = __setup_efi_pci64(pci, &rom); |
214 | conout_found = true; | 504 | if (status != EFI_SUCCESS) |
505 | continue; | ||
215 | 506 | ||
216 | status = efi_call_phys4(gop->query_mode, gop, | 507 | if (data) |
217 | gop->mode->mode, &size, &info); | 508 | data->next = (unsigned long)rom; |
218 | if (status == EFI_SUCCESS && (!first_gop || conout_found)) { | 509 | else |
219 | /* | 510 | params->hdr.setup_data = (unsigned long)rom; |
220 | * Systems that use the UEFI Console Splitter may | 511 | |
221 | * provide multiple GOP devices, not all of which are | 512 | data = (struct setup_data *)rom; |
222 | * backed by real hardware. The workaround is to search | ||
223 | * for a GOP implementing the ConOut protocol, and if | ||
224 | * one isn't found, to just fall back to the first GOP. | ||
225 | */ | ||
226 | width = info->horizontal_resolution; | ||
227 | height = info->vertical_resolution; | ||
228 | fb_base = gop->mode->frame_buffer_base; | ||
229 | fb_size = gop->mode->frame_buffer_size; | ||
230 | pixel_format = info->pixel_format; | ||
231 | pixel_info = info->pixel_information; | ||
232 | pixels_per_scan_line = info->pixels_per_scan_line; | ||
233 | 513 | ||
234 | /* | ||
235 | * Once we've found a GOP supporting ConOut, | ||
236 | * don't bother looking any further. | ||
237 | */ | ||
238 | first_gop = gop; | ||
239 | if (conout_found) | ||
240 | break; | ||
241 | } | ||
242 | } | 514 | } |
243 | 515 | ||
244 | /* Did we find any GOPs? */ | 516 | return status; |
245 | if (!first_gop) | 517 | } |
518 | |||
519 | static efi_status_t setup_efi_pci(struct boot_params *params) | ||
520 | { | ||
521 | efi_status_t status; | ||
522 | void **pci_handle = NULL; | ||
523 | efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; | ||
524 | unsigned long size = 0; | ||
525 | |||
526 | status = efi_call_early(locate_handle, | ||
527 | EFI_LOCATE_BY_PROTOCOL, | ||
528 | &pci_proto, NULL, &size, pci_handle); | ||
529 | |||
530 | if (status == EFI_BUFFER_TOO_SMALL) { | ||
531 | status = efi_call_early(allocate_pool, | ||
532 | EFI_LOADER_DATA, | ||
533 | size, (void **)&pci_handle); | ||
534 | |||
535 | if (status != EFI_SUCCESS) | ||
536 | return status; | ||
537 | |||
538 | status = efi_call_early(locate_handle, | ||
539 | EFI_LOCATE_BY_PROTOCOL, &pci_proto, | ||
540 | NULL, &size, pci_handle); | ||
541 | } | ||
542 | |||
543 | if (status != EFI_SUCCESS) | ||
246 | goto free_handle; | 544 | goto free_handle; |
247 | 545 | ||
248 | /* EFI framebuffer */ | 546 | if (efi_early->is64) |
249 | si->orig_video_isVGA = VIDEO_TYPE_EFI; | 547 | status = setup_efi_pci64(params, pci_handle, size); |
548 | else | ||
549 | status = setup_efi_pci32(params, pci_handle, size); | ||
250 | 550 | ||
251 | si->lfb_width = width; | 551 | free_handle: |
252 | si->lfb_height = height; | 552 | efi_call_early(free_pool, pci_handle); |
253 | si->lfb_base = fb_base; | 553 | return status; |
254 | si->pages = 1; | 554 | } |
255 | 555 | ||
556 | static void | ||
557 | setup_pixel_info(struct screen_info *si, u32 pixels_per_scan_line, | ||
558 | struct efi_pixel_bitmask pixel_info, int pixel_format) | ||
559 | { | ||
256 | if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { | 560 | if (pixel_format == PIXEL_RGB_RESERVED_8BIT_PER_COLOR) { |
257 | si->lfb_depth = 32; | 561 | si->lfb_depth = 32; |
258 | si->lfb_linelength = pixels_per_scan_line * 4; | 562 | si->lfb_linelength = pixels_per_scan_line * 4; |
@@ -297,62 +601,319 @@ static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, | |||
297 | si->rsvd_size = 0; | 601 | si->rsvd_size = 0; |
298 | si->rsvd_pos = 0; | 602 | si->rsvd_pos = 0; |
299 | } | 603 | } |
604 | } | ||
605 | |||
606 | static efi_status_t | ||
607 | __gop_query32(struct efi_graphics_output_protocol_32 *gop32, | ||
608 | struct efi_graphics_output_mode_info **info, | ||
609 | unsigned long *size, u32 *fb_base) | ||
610 | { | ||
611 | struct efi_graphics_output_protocol_mode_32 *mode; | ||
612 | efi_status_t status; | ||
613 | unsigned long m; | ||
614 | |||
615 | m = gop32->mode; | ||
616 | mode = (struct efi_graphics_output_protocol_mode_32 *)m; | ||
617 | |||
618 | status = efi_early->call(gop32->query_mode, gop32, | ||
619 | mode->mode, size, info); | ||
620 | if (status != EFI_SUCCESS) | ||
621 | return status; | ||
622 | |||
623 | *fb_base = mode->frame_buffer_base; | ||
624 | return status; | ||
625 | } | ||
626 | |||
627 | static efi_status_t | ||
628 | setup_gop32(struct screen_info *si, efi_guid_t *proto, | ||
629 | unsigned long size, void **gop_handle) | ||
630 | { | ||
631 | struct efi_graphics_output_protocol_32 *gop32, *first_gop; | ||
632 | unsigned long nr_gops; | ||
633 | u16 width, height; | ||
634 | u32 pixels_per_scan_line; | ||
635 | u32 fb_base; | ||
636 | struct efi_pixel_bitmask pixel_info; | ||
637 | int pixel_format; | ||
638 | efi_status_t status; | ||
639 | u32 *handles = (u32 *)(unsigned long)gop_handle; | ||
640 | int i; | ||
641 | |||
642 | first_gop = NULL; | ||
643 | gop32 = NULL; | ||
644 | |||
645 | nr_gops = size / sizeof(u32); | ||
646 | for (i = 0; i < nr_gops; i++) { | ||
647 | struct efi_graphics_output_mode_info *info = NULL; | ||
648 | efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; | ||
649 | bool conout_found = false; | ||
650 | void *dummy = NULL; | ||
651 | u32 h = handles[i]; | ||
652 | |||
653 | status = efi_call_early(handle_protocol, h, | ||
654 | proto, (void **)&gop32); | ||
655 | if (status != EFI_SUCCESS) | ||
656 | continue; | ||
657 | |||
658 | status = efi_call_early(handle_protocol, h, | ||
659 | &conout_proto, &dummy); | ||
660 | if (status == EFI_SUCCESS) | ||
661 | conout_found = true; | ||
662 | |||
663 | status = __gop_query32(gop32, &info, &size, &fb_base); | ||
664 | if (status == EFI_SUCCESS && (!first_gop || conout_found)) { | ||
665 | /* | ||
666 | * Systems that use the UEFI Console Splitter may | ||
667 | * provide multiple GOP devices, not all of which are | ||
668 | * backed by real hardware. The workaround is to search | ||
669 | * for a GOP implementing the ConOut protocol, and if | ||
670 | * one isn't found, to just fall back to the first GOP. | ||
671 | */ | ||
672 | width = info->horizontal_resolution; | ||
673 | height = info->vertical_resolution; | ||
674 | pixel_format = info->pixel_format; | ||
675 | pixel_info = info->pixel_information; | ||
676 | pixels_per_scan_line = info->pixels_per_scan_line; | ||
677 | |||
678 | /* | ||
679 | * Once we've found a GOP supporting ConOut, | ||
680 | * don't bother looking any further. | ||
681 | */ | ||
682 | first_gop = gop32; | ||
683 | if (conout_found) | ||
684 | break; | ||
685 | } | ||
686 | } | ||
687 | |||
688 | /* Did we find any GOPs? */ | ||
689 | if (!first_gop) | ||
690 | goto out; | ||
691 | |||
692 | /* EFI framebuffer */ | ||
693 | si->orig_video_isVGA = VIDEO_TYPE_EFI; | ||
694 | |||
695 | si->lfb_width = width; | ||
696 | si->lfb_height = height; | ||
697 | si->lfb_base = fb_base; | ||
698 | si->pages = 1; | ||
699 | |||
700 | setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); | ||
300 | 701 | ||
301 | si->lfb_size = si->lfb_linelength * si->lfb_height; | 702 | si->lfb_size = si->lfb_linelength * si->lfb_height; |
302 | 703 | ||
303 | si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; | 704 | si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; |
705 | out: | ||
706 | return status; | ||
707 | } | ||
304 | 708 | ||
305 | free_handle: | 709 | static efi_status_t |
306 | efi_call_phys1(sys_table->boottime->free_pool, gop_handle); | 710 | __gop_query64(struct efi_graphics_output_protocol_64 *gop64, |
711 | struct efi_graphics_output_mode_info **info, | ||
712 | unsigned long *size, u32 *fb_base) | ||
713 | { | ||
714 | struct efi_graphics_output_protocol_mode_64 *mode; | ||
715 | efi_status_t status; | ||
716 | unsigned long m; | ||
717 | |||
718 | m = gop64->mode; | ||
719 | mode = (struct efi_graphics_output_protocol_mode_64 *)m; | ||
720 | |||
721 | status = efi_early->call(gop64->query_mode, gop64, | ||
722 | mode->mode, size, info); | ||
723 | if (status != EFI_SUCCESS) | ||
724 | return status; | ||
725 | |||
726 | *fb_base = mode->frame_buffer_base; | ||
727 | return status; | ||
728 | } | ||
729 | |||
730 | static efi_status_t | ||
731 | setup_gop64(struct screen_info *si, efi_guid_t *proto, | ||
732 | unsigned long size, void **gop_handle) | ||
733 | { | ||
734 | struct efi_graphics_output_protocol_64 *gop64, *first_gop; | ||
735 | unsigned long nr_gops; | ||
736 | u16 width, height; | ||
737 | u32 pixels_per_scan_line; | ||
738 | u32 fb_base; | ||
739 | struct efi_pixel_bitmask pixel_info; | ||
740 | int pixel_format; | ||
741 | efi_status_t status; | ||
742 | u64 *handles = (u64 *)(unsigned long)gop_handle; | ||
743 | int i; | ||
744 | |||
745 | first_gop = NULL; | ||
746 | gop64 = NULL; | ||
747 | |||
748 | nr_gops = size / sizeof(u64); | ||
749 | for (i = 0; i < nr_gops; i++) { | ||
750 | struct efi_graphics_output_mode_info *info = NULL; | ||
751 | efi_guid_t conout_proto = EFI_CONSOLE_OUT_DEVICE_GUID; | ||
752 | bool conout_found = false; | ||
753 | void *dummy = NULL; | ||
754 | u64 h = handles[i]; | ||
755 | |||
756 | status = efi_call_early(handle_protocol, h, | ||
757 | proto, (void **)&gop64); | ||
758 | if (status != EFI_SUCCESS) | ||
759 | continue; | ||
760 | |||
761 | status = efi_call_early(handle_protocol, h, | ||
762 | &conout_proto, &dummy); | ||
763 | if (status == EFI_SUCCESS) | ||
764 | conout_found = true; | ||
765 | |||
766 | status = __gop_query64(gop64, &info, &size, &fb_base); | ||
767 | if (status == EFI_SUCCESS && (!first_gop || conout_found)) { | ||
768 | /* | ||
769 | * Systems that use the UEFI Console Splitter may | ||
770 | * provide multiple GOP devices, not all of which are | ||
771 | * backed by real hardware. The workaround is to search | ||
772 | * for a GOP implementing the ConOut protocol, and if | ||
773 | * one isn't found, to just fall back to the first GOP. | ||
774 | */ | ||
775 | width = info->horizontal_resolution; | ||
776 | height = info->vertical_resolution; | ||
777 | pixel_format = info->pixel_format; | ||
778 | pixel_info = info->pixel_information; | ||
779 | pixels_per_scan_line = info->pixels_per_scan_line; | ||
780 | |||
781 | /* | ||
782 | * Once we've found a GOP supporting ConOut, | ||
783 | * don't bother looking any further. | ||
784 | */ | ||
785 | first_gop = gop64; | ||
786 | if (conout_found) | ||
787 | break; | ||
788 | } | ||
789 | } | ||
790 | |||
791 | /* Did we find any GOPs? */ | ||
792 | if (!first_gop) | ||
793 | goto out; | ||
794 | |||
795 | /* EFI framebuffer */ | ||
796 | si->orig_video_isVGA = VIDEO_TYPE_EFI; | ||
797 | |||
798 | si->lfb_width = width; | ||
799 | si->lfb_height = height; | ||
800 | si->lfb_base = fb_base; | ||
801 | si->pages = 1; | ||
802 | |||
803 | setup_pixel_info(si, pixels_per_scan_line, pixel_info, pixel_format); | ||
804 | |||
805 | si->lfb_size = si->lfb_linelength * si->lfb_height; | ||
806 | |||
807 | si->capabilities |= VIDEO_CAPABILITY_SKIP_QUIRKS; | ||
808 | out: | ||
307 | return status; | 809 | return status; |
308 | } | 810 | } |
309 | 811 | ||
310 | /* | 812 | /* |
311 | * See if we have Universal Graphics Adapter (UGA) protocol | 813 | * See if we have Graphics Output Protocol |
312 | */ | 814 | */ |
313 | static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, | 815 | static efi_status_t setup_gop(struct screen_info *si, efi_guid_t *proto, |
314 | unsigned long size) | 816 | unsigned long size) |
315 | { | 817 | { |
316 | struct efi_uga_draw_protocol *uga, *first_uga; | ||
317 | unsigned long nr_ugas; | ||
318 | efi_status_t status; | 818 | efi_status_t status; |
319 | u32 width, height; | 819 | void **gop_handle = NULL; |
320 | void **uga_handle = NULL; | ||
321 | int i; | ||
322 | 820 | ||
323 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 821 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, |
324 | EFI_LOADER_DATA, size, &uga_handle); | 822 | size, (void **)&gop_handle); |
325 | if (status != EFI_SUCCESS) | 823 | if (status != EFI_SUCCESS) |
326 | return status; | 824 | return status; |
327 | 825 | ||
328 | status = efi_call_phys5(sys_table->boottime->locate_handle, | 826 | status = efi_call_early(locate_handle, |
329 | EFI_LOCATE_BY_PROTOCOL, uga_proto, | 827 | EFI_LOCATE_BY_PROTOCOL, |
330 | NULL, &size, uga_handle); | 828 | proto, NULL, &size, gop_handle); |
331 | if (status != EFI_SUCCESS) | 829 | if (status != EFI_SUCCESS) |
332 | goto free_handle; | 830 | goto free_handle; |
333 | 831 | ||
832 | if (efi_early->is64) | ||
833 | status = setup_gop64(si, proto, size, gop_handle); | ||
834 | else | ||
835 | status = setup_gop32(si, proto, size, gop_handle); | ||
836 | |||
837 | free_handle: | ||
838 | efi_call_early(free_pool, gop_handle); | ||
839 | return status; | ||
840 | } | ||
841 | |||
842 | static efi_status_t | ||
843 | setup_uga32(void **uga_handle, unsigned long size, u32 *width, u32 *height) | ||
844 | { | ||
845 | struct efi_uga_draw_protocol *uga = NULL, *first_uga; | ||
846 | efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; | ||
847 | unsigned long nr_ugas; | ||
848 | u32 *handles = (u32 *)uga_handle;; | ||
849 | efi_status_t status; | ||
850 | int i; | ||
851 | |||
334 | first_uga = NULL; | 852 | first_uga = NULL; |
853 | nr_ugas = size / sizeof(u32); | ||
854 | for (i = 0; i < nr_ugas; i++) { | ||
855 | efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; | ||
856 | u32 w, h, depth, refresh; | ||
857 | void *pciio; | ||
858 | u32 handle = handles[i]; | ||
335 | 859 | ||
336 | nr_ugas = size / sizeof(void *); | 860 | status = efi_call_early(handle_protocol, handle, |
861 | &uga_proto, (void **)&uga); | ||
862 | if (status != EFI_SUCCESS) | ||
863 | continue; | ||
864 | |||
865 | efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); | ||
866 | |||
867 | status = efi_early->call((unsigned long)uga->get_mode, uga, | ||
868 | &w, &h, &depth, &refresh); | ||
869 | if (status == EFI_SUCCESS && (!first_uga || pciio)) { | ||
870 | *width = w; | ||
871 | *height = h; | ||
872 | |||
873 | /* | ||
874 | * Once we've found a UGA supporting PCIIO, | ||
875 | * don't bother looking any further. | ||
876 | */ | ||
877 | if (pciio) | ||
878 | break; | ||
879 | |||
880 | first_uga = uga; | ||
881 | } | ||
882 | } | ||
883 | |||
884 | return status; | ||
885 | } | ||
886 | |||
887 | static efi_status_t | ||
888 | setup_uga64(void **uga_handle, unsigned long size, u32 *width, u32 *height) | ||
889 | { | ||
890 | struct efi_uga_draw_protocol *uga = NULL, *first_uga; | ||
891 | efi_guid_t uga_proto = EFI_UGA_PROTOCOL_GUID; | ||
892 | unsigned long nr_ugas; | ||
893 | u64 *handles = (u64 *)uga_handle;; | ||
894 | efi_status_t status; | ||
895 | int i; | ||
896 | |||
897 | first_uga = NULL; | ||
898 | nr_ugas = size / sizeof(u64); | ||
337 | for (i = 0; i < nr_ugas; i++) { | 899 | for (i = 0; i < nr_ugas; i++) { |
338 | efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; | 900 | efi_guid_t pciio_proto = EFI_PCI_IO_PROTOCOL_GUID; |
339 | void *handle = uga_handle[i]; | ||
340 | u32 w, h, depth, refresh; | 901 | u32 w, h, depth, refresh; |
341 | void *pciio; | 902 | void *pciio; |
903 | u64 handle = handles[i]; | ||
342 | 904 | ||
343 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | 905 | status = efi_call_early(handle_protocol, handle, |
344 | handle, uga_proto, &uga); | 906 | &uga_proto, (void **)&uga); |
345 | if (status != EFI_SUCCESS) | 907 | if (status != EFI_SUCCESS) |
346 | continue; | 908 | continue; |
347 | 909 | ||
348 | efi_call_phys3(sys_table->boottime->handle_protocol, | 910 | efi_call_early(handle_protocol, handle, &pciio_proto, &pciio); |
349 | handle, &pciio_proto, &pciio); | ||
350 | 911 | ||
351 | status = efi_call_phys5(uga->get_mode, uga, &w, &h, | 912 | status = efi_early->call((unsigned long)uga->get_mode, uga, |
352 | &depth, &refresh); | 913 | &w, &h, &depth, &refresh); |
353 | if (status == EFI_SUCCESS && (!first_uga || pciio)) { | 914 | if (status == EFI_SUCCESS && (!first_uga || pciio)) { |
354 | width = w; | 915 | *width = w; |
355 | height = h; | 916 | *height = h; |
356 | 917 | ||
357 | /* | 918 | /* |
358 | * Once we've found a UGA supporting PCIIO, | 919 | * Once we've found a UGA supporting PCIIO, |
@@ -365,7 +926,39 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, | |||
365 | } | 926 | } |
366 | } | 927 | } |
367 | 928 | ||
368 | if (!first_uga) | 929 | return status; |
930 | } | ||
931 | |||
932 | /* | ||
933 | * See if we have Universal Graphics Adapter (UGA) protocol | ||
934 | */ | ||
935 | static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, | ||
936 | unsigned long size) | ||
937 | { | ||
938 | efi_status_t status; | ||
939 | u32 width, height; | ||
940 | void **uga_handle = NULL; | ||
941 | |||
942 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, | ||
943 | size, (void **)&uga_handle); | ||
944 | if (status != EFI_SUCCESS) | ||
945 | return status; | ||
946 | |||
947 | status = efi_call_early(locate_handle, | ||
948 | EFI_LOCATE_BY_PROTOCOL, | ||
949 | uga_proto, NULL, &size, uga_handle); | ||
950 | if (status != EFI_SUCCESS) | ||
951 | goto free_handle; | ||
952 | |||
953 | height = 0; | ||
954 | width = 0; | ||
955 | |||
956 | if (efi_early->is64) | ||
957 | status = setup_uga64(uga_handle, size, &width, &height); | ||
958 | else | ||
959 | status = setup_uga32(uga_handle, size, &width, &height); | ||
960 | |||
961 | if (!width && !height) | ||
369 | goto free_handle; | 962 | goto free_handle; |
370 | 963 | ||
371 | /* EFI framebuffer */ | 964 | /* EFI framebuffer */ |
@@ -384,9 +977,8 @@ static efi_status_t setup_uga(struct screen_info *si, efi_guid_t *uga_proto, | |||
384 | si->rsvd_size = 8; | 977 | si->rsvd_size = 8; |
385 | si->rsvd_pos = 24; | 978 | si->rsvd_pos = 24; |
386 | 979 | ||
387 | |||
388 | free_handle: | 980 | free_handle: |
389 | efi_call_phys1(sys_table->boottime->free_pool, uga_handle); | 981 | efi_call_early(free_pool, uga_handle); |
390 | return status; | 982 | return status; |
391 | } | 983 | } |
392 | 984 | ||
@@ -404,29 +996,31 @@ void setup_graphics(struct boot_params *boot_params) | |||
404 | memset(si, 0, sizeof(*si)); | 996 | memset(si, 0, sizeof(*si)); |
405 | 997 | ||
406 | size = 0; | 998 | size = 0; |
407 | status = efi_call_phys5(sys_table->boottime->locate_handle, | 999 | status = efi_call_early(locate_handle, |
408 | EFI_LOCATE_BY_PROTOCOL, &graphics_proto, | 1000 | EFI_LOCATE_BY_PROTOCOL, |
409 | NULL, &size, gop_handle); | 1001 | &graphics_proto, NULL, &size, gop_handle); |
410 | if (status == EFI_BUFFER_TOO_SMALL) | 1002 | if (status == EFI_BUFFER_TOO_SMALL) |
411 | status = setup_gop(si, &graphics_proto, size); | 1003 | status = setup_gop(si, &graphics_proto, size); |
412 | 1004 | ||
413 | if (status != EFI_SUCCESS) { | 1005 | if (status != EFI_SUCCESS) { |
414 | size = 0; | 1006 | size = 0; |
415 | status = efi_call_phys5(sys_table->boottime->locate_handle, | 1007 | status = efi_call_early(locate_handle, |
416 | EFI_LOCATE_BY_PROTOCOL, &uga_proto, | 1008 | EFI_LOCATE_BY_PROTOCOL, |
417 | NULL, &size, uga_handle); | 1009 | &uga_proto, NULL, &size, uga_handle); |
418 | if (status == EFI_BUFFER_TOO_SMALL) | 1010 | if (status == EFI_BUFFER_TOO_SMALL) |
419 | setup_uga(si, &uga_proto, size); | 1011 | setup_uga(si, &uga_proto, size); |
420 | } | 1012 | } |
421 | } | 1013 | } |
422 | 1014 | ||
423 | |||
424 | /* | 1015 | /* |
425 | * Because the x86 boot code expects to be passed a boot_params we | 1016 | * Because the x86 boot code expects to be passed a boot_params we |
426 | * need to create one ourselves (usually the bootloader would create | 1017 | * need to create one ourselves (usually the bootloader would create |
427 | * one for us). | 1018 | * one for us). |
1019 | * | ||
1020 | * The caller is responsible for filling out ->code32_start in the | ||
1021 | * returned boot_params. | ||
428 | */ | 1022 | */ |
429 | struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) | 1023 | struct boot_params *make_boot_params(struct efi_config *c) |
430 | { | 1024 | { |
431 | struct boot_params *boot_params; | 1025 | struct boot_params *boot_params; |
432 | struct sys_desc_table *sdt; | 1026 | struct sys_desc_table *sdt; |
@@ -434,7 +1028,7 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) | |||
434 | struct setup_header *hdr; | 1028 | struct setup_header *hdr; |
435 | struct efi_info *efi; | 1029 | struct efi_info *efi; |
436 | efi_loaded_image_t *image; | 1030 | efi_loaded_image_t *image; |
437 | void *options; | 1031 | void *options, *handle; |
438 | efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; | 1032 | efi_guid_t proto = LOADED_IMAGE_PROTOCOL_GUID; |
439 | int options_size = 0; | 1033 | int options_size = 0; |
440 | efi_status_t status; | 1034 | efi_status_t status; |
@@ -445,14 +1039,21 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) | |||
445 | unsigned long ramdisk_addr; | 1039 | unsigned long ramdisk_addr; |
446 | unsigned long ramdisk_size; | 1040 | unsigned long ramdisk_size; |
447 | 1041 | ||
448 | sys_table = _table; | 1042 | efi_early = c; |
1043 | sys_table = (efi_system_table_t *)(unsigned long)efi_early->table; | ||
1044 | handle = (void *)(unsigned long)efi_early->image_handle; | ||
449 | 1045 | ||
450 | /* Check if we were booted by the EFI firmware */ | 1046 | /* Check if we were booted by the EFI firmware */ |
451 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | 1047 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) |
452 | return NULL; | 1048 | return NULL; |
453 | 1049 | ||
454 | status = efi_call_phys3(sys_table->boottime->handle_protocol, | 1050 | if (efi_early->is64) |
455 | handle, &proto, (void *)&image); | 1051 | setup_boot_services64(efi_early); |
1052 | else | ||
1053 | setup_boot_services32(efi_early); | ||
1054 | |||
1055 | status = efi_call_early(handle_protocol, handle, | ||
1056 | &proto, (void *)&image); | ||
456 | if (status != EFI_SUCCESS) { | 1057 | if (status != EFI_SUCCESS) { |
457 | efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); | 1058 | efi_printk(sys_table, "Failed to get handle for LOADED_IMAGE_PROTOCOL\n"); |
458 | return NULL; | 1059 | return NULL; |
@@ -483,8 +1084,6 @@ struct boot_params *make_boot_params(void *handle, efi_system_table_t *_table) | |||
483 | hdr->vid_mode = 0xffff; | 1084 | hdr->vid_mode = 0xffff; |
484 | hdr->boot_flag = 0xAA55; | 1085 | hdr->boot_flag = 0xAA55; |
485 | 1086 | ||
486 | hdr->code32_start = (__u64)(unsigned long)image->image_base; | ||
487 | |||
488 | hdr->type_of_loader = 0x21; | 1087 | hdr->type_of_loader = 0x21; |
489 | 1088 | ||
490 | /* Convert unicode cmdline to ascii */ | 1089 | /* Convert unicode cmdline to ascii */ |
@@ -641,14 +1240,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, | |||
641 | sizeof(struct e820entry) * nr_desc; | 1240 | sizeof(struct e820entry) * nr_desc; |
642 | 1241 | ||
643 | if (*e820ext) { | 1242 | if (*e820ext) { |
644 | efi_call_phys1(sys_table->boottime->free_pool, *e820ext); | 1243 | efi_call_early(free_pool, *e820ext); |
645 | *e820ext = NULL; | 1244 | *e820ext = NULL; |
646 | *e820ext_size = 0; | 1245 | *e820ext_size = 0; |
647 | } | 1246 | } |
648 | 1247 | ||
649 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 1248 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, |
650 | EFI_LOADER_DATA, size, e820ext); | 1249 | size, (void **)e820ext); |
651 | |||
652 | if (status == EFI_SUCCESS) | 1250 | if (status == EFI_SUCCESS) |
653 | *e820ext_size = size; | 1251 | *e820ext_size = size; |
654 | 1252 | ||
@@ -656,12 +1254,13 @@ static efi_status_t alloc_e820ext(u32 nr_desc, struct setup_data **e820ext, | |||
656 | } | 1254 | } |
657 | 1255 | ||
658 | static efi_status_t exit_boot(struct boot_params *boot_params, | 1256 | static efi_status_t exit_boot(struct boot_params *boot_params, |
659 | void *handle) | 1257 | void *handle, bool is64) |
660 | { | 1258 | { |
661 | struct efi_info *efi = &boot_params->efi_info; | 1259 | struct efi_info *efi = &boot_params->efi_info; |
662 | unsigned long map_sz, key, desc_size; | 1260 | unsigned long map_sz, key, desc_size; |
663 | efi_memory_desc_t *mem_map; | 1261 | efi_memory_desc_t *mem_map; |
664 | struct setup_data *e820ext; | 1262 | struct setup_data *e820ext; |
1263 | const char *signature; | ||
665 | __u32 e820ext_size; | 1264 | __u32 e820ext_size; |
666 | __u32 nr_desc, prev_nr_desc; | 1265 | __u32 nr_desc, prev_nr_desc; |
667 | efi_status_t status; | 1266 | efi_status_t status; |
@@ -691,11 +1290,13 @@ get_map: | |||
691 | if (status != EFI_SUCCESS) | 1290 | if (status != EFI_SUCCESS) |
692 | goto free_mem_map; | 1291 | goto free_mem_map; |
693 | 1292 | ||
694 | efi_call_phys1(sys_table->boottime->free_pool, mem_map); | 1293 | efi_call_early(free_pool, mem_map); |
695 | goto get_map; /* Allocated memory, get map again */ | 1294 | goto get_map; /* Allocated memory, get map again */ |
696 | } | 1295 | } |
697 | 1296 | ||
698 | memcpy(&efi->efi_loader_signature, EFI_LOADER_SIGNATURE, sizeof(__u32)); | 1297 | signature = is64 ? EFI64_LOADER_SIGNATURE : EFI32_LOADER_SIGNATURE; |
1298 | memcpy(&efi->efi_loader_signature, signature, sizeof(__u32)); | ||
1299 | |||
699 | efi->efi_systab = (unsigned long)sys_table; | 1300 | efi->efi_systab = (unsigned long)sys_table; |
700 | efi->efi_memdesc_size = desc_size; | 1301 | efi->efi_memdesc_size = desc_size; |
701 | efi->efi_memdesc_version = desc_version; | 1302 | efi->efi_memdesc_version = desc_version; |
@@ -708,8 +1309,7 @@ get_map: | |||
708 | #endif | 1309 | #endif |
709 | 1310 | ||
710 | /* Might as well exit boot services now */ | 1311 | /* Might as well exit boot services now */ |
711 | status = efi_call_phys2(sys_table->boottime->exit_boot_services, | 1312 | status = efi_call_early(exit_boot_services, handle, key); |
712 | handle, key); | ||
713 | if (status != EFI_SUCCESS) { | 1313 | if (status != EFI_SUCCESS) { |
714 | /* | 1314 | /* |
715 | * ExitBootServices() will fail if any of the event | 1315 | * ExitBootServices() will fail if any of the event |
@@ -722,7 +1322,7 @@ get_map: | |||
722 | goto free_mem_map; | 1322 | goto free_mem_map; |
723 | 1323 | ||
724 | called_exit = true; | 1324 | called_exit = true; |
725 | efi_call_phys1(sys_table->boottime->free_pool, mem_map); | 1325 | efi_call_early(free_pool, mem_map); |
726 | goto get_map; | 1326 | goto get_map; |
727 | } | 1327 | } |
728 | 1328 | ||
@@ -736,23 +1336,31 @@ get_map: | |||
736 | return EFI_SUCCESS; | 1336 | return EFI_SUCCESS; |
737 | 1337 | ||
738 | free_mem_map: | 1338 | free_mem_map: |
739 | efi_call_phys1(sys_table->boottime->free_pool, mem_map); | 1339 | efi_call_early(free_pool, mem_map); |
740 | return status; | 1340 | return status; |
741 | } | 1341 | } |
742 | 1342 | ||
743 | |||
744 | /* | 1343 | /* |
745 | * On success we return a pointer to a boot_params structure, and NULL | 1344 | * On success we return a pointer to a boot_params structure, and NULL |
746 | * on failure. | 1345 | * on failure. |
747 | */ | 1346 | */ |
748 | struct boot_params *efi_main(void *handle, efi_system_table_t *_table, | 1347 | struct boot_params *efi_main(struct efi_config *c, |
749 | struct boot_params *boot_params) | 1348 | struct boot_params *boot_params) |
750 | { | 1349 | { |
751 | struct desc_ptr *gdt; | 1350 | struct desc_ptr *gdt = NULL; |
752 | efi_loaded_image_t *image; | 1351 | efi_loaded_image_t *image; |
753 | struct setup_header *hdr = &boot_params->hdr; | 1352 | struct setup_header *hdr = &boot_params->hdr; |
754 | efi_status_t status; | 1353 | efi_status_t status; |
755 | struct desc_struct *desc; | 1354 | struct desc_struct *desc; |
1355 | void *handle; | ||
1356 | efi_system_table_t *_table; | ||
1357 | bool is64; | ||
1358 | |||
1359 | efi_early = c; | ||
1360 | |||
1361 | _table = (efi_system_table_t *)(unsigned long)efi_early->table; | ||
1362 | handle = (void *)(unsigned long)efi_early->image_handle; | ||
1363 | is64 = efi_early->is64; | ||
756 | 1364 | ||
757 | sys_table = _table; | 1365 | sys_table = _table; |
758 | 1366 | ||
@@ -760,13 +1368,17 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, | |||
760 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) | 1368 | if (sys_table->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) |
761 | goto fail; | 1369 | goto fail; |
762 | 1370 | ||
1371 | if (is64) | ||
1372 | setup_boot_services64(efi_early); | ||
1373 | else | ||
1374 | setup_boot_services32(efi_early); | ||
1375 | |||
763 | setup_graphics(boot_params); | 1376 | setup_graphics(boot_params); |
764 | 1377 | ||
765 | setup_efi_pci(boot_params); | 1378 | setup_efi_pci(boot_params); |
766 | 1379 | ||
767 | status = efi_call_phys3(sys_table->boottime->allocate_pool, | 1380 | status = efi_call_early(allocate_pool, EFI_LOADER_DATA, |
768 | EFI_LOADER_DATA, sizeof(*gdt), | 1381 | sizeof(*gdt), (void **)&gdt); |
769 | (void **)&gdt); | ||
770 | if (status != EFI_SUCCESS) { | 1382 | if (status != EFI_SUCCESS) { |
771 | efi_printk(sys_table, "Failed to alloc mem for gdt structure\n"); | 1383 | efi_printk(sys_table, "Failed to alloc mem for gdt structure\n"); |
772 | goto fail; | 1384 | goto fail; |
@@ -797,7 +1409,7 @@ struct boot_params *efi_main(void *handle, efi_system_table_t *_table, | |||
797 | hdr->code32_start = bzimage_addr; | 1409 | hdr->code32_start = bzimage_addr; |
798 | } | 1410 | } |
799 | 1411 | ||
800 | status = exit_boot(boot_params, handle); | 1412 | status = exit_boot(boot_params, handle, is64); |
801 | if (status != EFI_SUCCESS) | 1413 | if (status != EFI_SUCCESS) |
802 | goto fail; | 1414 | goto fail; |
803 | 1415 | ||
diff --git a/arch/x86/boot/compressed/eboot.h b/arch/x86/boot/compressed/eboot.h index 81b6b652b46a..c88c31ecad12 100644 --- a/arch/x86/boot/compressed/eboot.h +++ b/arch/x86/boot/compressed/eboot.h | |||
@@ -37,6 +37,24 @@ struct efi_graphics_output_mode_info { | |||
37 | u32 pixels_per_scan_line; | 37 | u32 pixels_per_scan_line; |
38 | } __packed; | 38 | } __packed; |
39 | 39 | ||
40 | struct efi_graphics_output_protocol_mode_32 { | ||
41 | u32 max_mode; | ||
42 | u32 mode; | ||
43 | u32 info; | ||
44 | u32 size_of_info; | ||
45 | u64 frame_buffer_base; | ||
46 | u32 frame_buffer_size; | ||
47 | } __packed; | ||
48 | |||
49 | struct efi_graphics_output_protocol_mode_64 { | ||
50 | u32 max_mode; | ||
51 | u32 mode; | ||
52 | u64 info; | ||
53 | u64 size_of_info; | ||
54 | u64 frame_buffer_base; | ||
55 | u64 frame_buffer_size; | ||
56 | } __packed; | ||
57 | |||
40 | struct efi_graphics_output_protocol_mode { | 58 | struct efi_graphics_output_protocol_mode { |
41 | u32 max_mode; | 59 | u32 max_mode; |
42 | u32 mode; | 60 | u32 mode; |
@@ -46,6 +64,20 @@ struct efi_graphics_output_protocol_mode { | |||
46 | unsigned long frame_buffer_size; | 64 | unsigned long frame_buffer_size; |
47 | } __packed; | 65 | } __packed; |
48 | 66 | ||
67 | struct efi_graphics_output_protocol_32 { | ||
68 | u32 query_mode; | ||
69 | u32 set_mode; | ||
70 | u32 blt; | ||
71 | u32 mode; | ||
72 | }; | ||
73 | |||
74 | struct efi_graphics_output_protocol_64 { | ||
75 | u64 query_mode; | ||
76 | u64 set_mode; | ||
77 | u64 blt; | ||
78 | u64 mode; | ||
79 | }; | ||
80 | |||
49 | struct efi_graphics_output_protocol { | 81 | struct efi_graphics_output_protocol { |
50 | void *query_mode; | 82 | void *query_mode; |
51 | unsigned long set_mode; | 83 | unsigned long set_mode; |
@@ -53,10 +85,38 @@ struct efi_graphics_output_protocol { | |||
53 | struct efi_graphics_output_protocol_mode *mode; | 85 | struct efi_graphics_output_protocol_mode *mode; |
54 | }; | 86 | }; |
55 | 87 | ||
88 | struct efi_uga_draw_protocol_32 { | ||
89 | u32 get_mode; | ||
90 | u32 set_mode; | ||
91 | u32 blt; | ||
92 | }; | ||
93 | |||
94 | struct efi_uga_draw_protocol_64 { | ||
95 | u64 get_mode; | ||
96 | u64 set_mode; | ||
97 | u64 blt; | ||
98 | }; | ||
99 | |||
56 | struct efi_uga_draw_protocol { | 100 | struct efi_uga_draw_protocol { |
57 | void *get_mode; | 101 | void *get_mode; |
58 | void *set_mode; | 102 | void *set_mode; |
59 | void *blt; | 103 | void *blt; |
60 | }; | 104 | }; |
61 | 105 | ||
106 | struct efi_config { | ||
107 | u64 image_handle; | ||
108 | u64 table; | ||
109 | u64 allocate_pool; | ||
110 | u64 allocate_pages; | ||
111 | u64 get_memory_map; | ||
112 | u64 free_pool; | ||
113 | u64 free_pages; | ||
114 | u64 locate_handle; | ||
115 | u64 handle_protocol; | ||
116 | u64 exit_boot_services; | ||
117 | u64 text_output; | ||
118 | efi_status_t (*call)(unsigned long, ...); | ||
119 | bool is64; | ||
120 | } __packed; | ||
121 | |||
62 | #endif /* BOOT_COMPRESSED_EBOOT_H */ | 122 | #endif /* BOOT_COMPRESSED_EBOOT_H */ |
diff --git a/arch/x86/boot/compressed/efi_stub_64.S b/arch/x86/boot/compressed/efi_stub_64.S index cedc60de86eb..7ff3632806b1 100644 --- a/arch/x86/boot/compressed/efi_stub_64.S +++ b/arch/x86/boot/compressed/efi_stub_64.S | |||
@@ -1 +1,30 @@ | |||
1 | #include <asm/segment.h> | ||
2 | #include <asm/msr.h> | ||
3 | #include <asm/processor-flags.h> | ||
4 | |||
1 | #include "../../platform/efi/efi_stub_64.S" | 5 | #include "../../platform/efi/efi_stub_64.S" |
6 | |||
7 | #ifdef CONFIG_EFI_MIXED | ||
8 | .code64 | ||
9 | .text | ||
10 | ENTRY(efi64_thunk) | ||
11 | push %rbp | ||
12 | push %rbx | ||
13 | |||
14 | subq $16, %rsp | ||
15 | leaq efi_exit32(%rip), %rax | ||
16 | movl %eax, 8(%rsp) | ||
17 | leaq efi_gdt64(%rip), %rax | ||
18 | movl %eax, 4(%rsp) | ||
19 | movl %eax, 2(%rax) /* Fixup the gdt base address */ | ||
20 | leaq efi32_boot_gdt(%rip), %rax | ||
21 | movl %eax, (%rsp) | ||
22 | |||
23 | call __efi64_thunk | ||
24 | |||
25 | addq $16, %rsp | ||
26 | pop %rbx | ||
27 | pop %rbp | ||
28 | ret | ||
29 | ENDPROC(efi64_thunk) | ||
30 | #endif /* CONFIG_EFI_MIXED */ | ||
diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 5d6f6891b188..cbed1407a5cd 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S | |||
@@ -42,33 +42,56 @@ ENTRY(startup_32) | |||
42 | ENTRY(efi_pe_entry) | 42 | ENTRY(efi_pe_entry) |
43 | add $0x4, %esp | 43 | add $0x4, %esp |
44 | 44 | ||
45 | call 1f | ||
46 | 1: popl %esi | ||
47 | subl $1b, %esi | ||
48 | |||
49 | popl %ecx | ||
50 | movl %ecx, efi32_config(%esi) /* Handle */ | ||
51 | popl %ecx | ||
52 | movl %ecx, efi32_config+8(%esi) /* EFI System table pointer */ | ||
53 | |||
54 | /* Relocate efi_config->call() */ | ||
55 | leal efi32_config(%esi), %eax | ||
56 | add %esi, 88(%eax) | ||
57 | pushl %eax | ||
58 | |||
45 | call make_boot_params | 59 | call make_boot_params |
46 | cmpl $0, %eax | 60 | cmpl $0, %eax |
47 | je 1f | 61 | je fail |
48 | movl 0x4(%esp), %esi | 62 | movl %esi, BP_code32_start(%eax) |
49 | movl (%esp), %ecx | 63 | popl %ecx |
50 | pushl %eax | 64 | pushl %eax |
51 | pushl %esi | ||
52 | pushl %ecx | 65 | pushl %ecx |
53 | sub $0x4, %esp | 66 | jmp 2f /* Skip efi_config initialization */ |
54 | 67 | ||
55 | ENTRY(efi_stub_entry) | 68 | ENTRY(efi32_stub_entry) |
56 | add $0x4, %esp | 69 | add $0x4, %esp |
70 | popl %ecx | ||
71 | popl %edx | ||
72 | |||
73 | call 1f | ||
74 | 1: popl %esi | ||
75 | subl $1b, %esi | ||
76 | |||
77 | movl %ecx, efi32_config(%esi) /* Handle */ | ||
78 | movl %edx, efi32_config+8(%esi) /* EFI System table pointer */ | ||
79 | |||
80 | /* Relocate efi_config->call() */ | ||
81 | leal efi32_config(%esi), %eax | ||
82 | add %esi, 88(%eax) | ||
83 | pushl %eax | ||
84 | 2: | ||
57 | call efi_main | 85 | call efi_main |
58 | cmpl $0, %eax | 86 | cmpl $0, %eax |
59 | movl %eax, %esi | 87 | movl %eax, %esi |
60 | jne 2f | 88 | jne 2f |
61 | 1: | 89 | fail: |
62 | /* EFI init failed, so hang. */ | 90 | /* EFI init failed, so hang. */ |
63 | hlt | 91 | hlt |
64 | jmp 1b | 92 | jmp fail |
65 | 2: | 93 | 2: |
66 | call 3f | 94 | movl BP_code32_start(%esi), %eax |
67 | 3: | ||
68 | popl %eax | ||
69 | subl $3b, %eax | ||
70 | subl BP_pref_address(%esi), %eax | ||
71 | add BP_code32_start(%esi), %eax | ||
72 | leal preferred_addr(%eax), %eax | 95 | leal preferred_addr(%eax), %eax |
73 | jmp *%eax | 96 | jmp *%eax |
74 | 97 | ||
@@ -117,9 +140,11 @@ preferred_addr: | |||
117 | addl %eax, %ebx | 140 | addl %eax, %ebx |
118 | notl %eax | 141 | notl %eax |
119 | andl %eax, %ebx | 142 | andl %eax, %ebx |
120 | #else | 143 | cmpl $LOAD_PHYSICAL_ADDR, %ebx |
121 | movl $LOAD_PHYSICAL_ADDR, %ebx | 144 | jge 1f |
122 | #endif | 145 | #endif |
146 | movl $LOAD_PHYSICAL_ADDR, %ebx | ||
147 | 1: | ||
123 | 148 | ||
124 | /* Target address to relocate to for decompression */ | 149 | /* Target address to relocate to for decompression */ |
125 | addl $z_extract_offset, %ebx | 150 | addl $z_extract_offset, %ebx |
@@ -191,14 +216,23 @@ relocated: | |||
191 | leal boot_heap(%ebx), %eax | 216 | leal boot_heap(%ebx), %eax |
192 | pushl %eax /* heap area */ | 217 | pushl %eax /* heap area */ |
193 | pushl %esi /* real mode pointer */ | 218 | pushl %esi /* real mode pointer */ |
194 | call decompress_kernel | 219 | call decompress_kernel /* returns kernel location in %eax */ |
195 | addl $24, %esp | 220 | addl $24, %esp |
196 | 221 | ||
197 | /* | 222 | /* |
198 | * Jump to the decompressed kernel. | 223 | * Jump to the decompressed kernel. |
199 | */ | 224 | */ |
200 | xorl %ebx, %ebx | 225 | xorl %ebx, %ebx |
201 | jmp *%ebp | 226 | jmp *%eax |
227 | |||
228 | #ifdef CONFIG_EFI_STUB | ||
229 | .data | ||
230 | efi32_config: | ||
231 | .fill 11,8,0 | ||
232 | .long efi_call_phys | ||
233 | .long 0 | ||
234 | .byte 0 | ||
235 | #endif | ||
202 | 236 | ||
203 | /* | 237 | /* |
204 | * Stack and heap for uncompression | 238 | * Stack and heap for uncompression |
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index c337422b575d..0d558ee899ae 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S | |||
@@ -94,9 +94,11 @@ ENTRY(startup_32) | |||
94 | addl %eax, %ebx | 94 | addl %eax, %ebx |
95 | notl %eax | 95 | notl %eax |
96 | andl %eax, %ebx | 96 | andl %eax, %ebx |
97 | #else | 97 | cmpl $LOAD_PHYSICAL_ADDR, %ebx |
98 | movl $LOAD_PHYSICAL_ADDR, %ebx | 98 | jge 1f |
99 | #endif | 99 | #endif |
100 | movl $LOAD_PHYSICAL_ADDR, %ebx | ||
101 | 1: | ||
100 | 102 | ||
101 | /* Target address to relocate to for decompression */ | 103 | /* Target address to relocate to for decompression */ |
102 | addl $z_extract_offset, %ebx | 104 | addl $z_extract_offset, %ebx |
@@ -111,7 +113,8 @@ ENTRY(startup_32) | |||
111 | lgdt gdt(%ebp) | 113 | lgdt gdt(%ebp) |
112 | 114 | ||
113 | /* Enable PAE mode */ | 115 | /* Enable PAE mode */ |
114 | movl $(X86_CR4_PAE), %eax | 116 | movl %cr4, %eax |
117 | orl $X86_CR4_PAE, %eax | ||
115 | movl %eax, %cr4 | 118 | movl %eax, %cr4 |
116 | 119 | ||
117 | /* | 120 | /* |
@@ -176,6 +179,13 @@ ENTRY(startup_32) | |||
176 | */ | 179 | */ |
177 | pushl $__KERNEL_CS | 180 | pushl $__KERNEL_CS |
178 | leal startup_64(%ebp), %eax | 181 | leal startup_64(%ebp), %eax |
182 | #ifdef CONFIG_EFI_MIXED | ||
183 | movl efi32_config(%ebp), %ebx | ||
184 | cmp $0, %ebx | ||
185 | jz 1f | ||
186 | leal handover_entry(%ebp), %eax | ||
187 | 1: | ||
188 | #endif | ||
179 | pushl %eax | 189 | pushl %eax |
180 | 190 | ||
181 | /* Enter paged protected Mode, activating Long Mode */ | 191 | /* Enter paged protected Mode, activating Long Mode */ |
@@ -186,6 +196,30 @@ ENTRY(startup_32) | |||
186 | lret | 196 | lret |
187 | ENDPROC(startup_32) | 197 | ENDPROC(startup_32) |
188 | 198 | ||
199 | #ifdef CONFIG_EFI_MIXED | ||
200 | .org 0x190 | ||
201 | ENTRY(efi32_stub_entry) | ||
202 | add $0x4, %esp /* Discard return address */ | ||
203 | popl %ecx | ||
204 | popl %edx | ||
205 | popl %esi | ||
206 | |||
207 | leal (BP_scratch+4)(%esi), %esp | ||
208 | call 1f | ||
209 | 1: pop %ebp | ||
210 | subl $1b, %ebp | ||
211 | |||
212 | movl %ecx, efi32_config(%ebp) | ||
213 | movl %edx, efi32_config+8(%ebp) | ||
214 | sgdtl efi32_boot_gdt(%ebp) | ||
215 | |||
216 | leal efi32_config(%ebp), %eax | ||
217 | movl %eax, efi_config(%ebp) | ||
218 | |||
219 | jmp startup_32 | ||
220 | ENDPROC(efi32_stub_entry) | ||
221 | #endif | ||
222 | |||
189 | .code64 | 223 | .code64 |
190 | .org 0x200 | 224 | .org 0x200 |
191 | ENTRY(startup_64) | 225 | ENTRY(startup_64) |
@@ -207,33 +241,52 @@ ENTRY(startup_64) | |||
207 | jmp preferred_addr | 241 | jmp preferred_addr |
208 | 242 | ||
209 | ENTRY(efi_pe_entry) | 243 | ENTRY(efi_pe_entry) |
210 | mov %rcx, %rdi | 244 | movq %rcx, efi64_config(%rip) /* Handle */ |
211 | mov %rdx, %rsi | 245 | movq %rdx, efi64_config+8(%rip) /* EFI System table pointer */ |
212 | pushq %rdi | 246 | |
213 | pushq %rsi | 247 | leaq efi64_config(%rip), %rax |
248 | movq %rax, efi_config(%rip) | ||
249 | |||
250 | call 1f | ||
251 | 1: popq %rbp | ||
252 | subq $1b, %rbp | ||
253 | |||
254 | /* | ||
255 | * Relocate efi_config->call(). | ||
256 | */ | ||
257 | addq %rbp, efi64_config+88(%rip) | ||
258 | |||
259 | movq %rax, %rdi | ||
214 | call make_boot_params | 260 | call make_boot_params |
215 | cmpq $0,%rax | 261 | cmpq $0,%rax |
216 | je 1f | 262 | je fail |
217 | mov %rax, %rdx | 263 | mov %rax, %rsi |
218 | popq %rsi | 264 | leaq startup_32(%rip), %rax |
219 | popq %rdi | 265 | movl %eax, BP_code32_start(%rsi) |
266 | jmp 2f /* Skip the relocation */ | ||
267 | |||
268 | handover_entry: | ||
269 | call 1f | ||
270 | 1: popq %rbp | ||
271 | subq $1b, %rbp | ||
220 | 272 | ||
221 | ENTRY(efi_stub_entry) | 273 | /* |
274 | * Relocate efi_config->call(). | ||
275 | */ | ||
276 | movq efi_config(%rip), %rax | ||
277 | addq %rbp, 88(%rax) | ||
278 | 2: | ||
279 | movq efi_config(%rip), %rdi | ||
222 | call efi_main | 280 | call efi_main |
223 | movq %rax,%rsi | 281 | movq %rax,%rsi |
224 | cmpq $0,%rax | 282 | cmpq $0,%rax |
225 | jne 2f | 283 | jne 2f |
226 | 1: | 284 | fail: |
227 | /* EFI init failed, so hang. */ | 285 | /* EFI init failed, so hang. */ |
228 | hlt | 286 | hlt |
229 | jmp 1b | 287 | jmp fail |
230 | 2: | 288 | 2: |
231 | call 3f | 289 | movl BP_code32_start(%esi), %eax |
232 | 3: | ||
233 | popq %rax | ||
234 | subq $3b, %rax | ||
235 | subq BP_pref_address(%rsi), %rax | ||
236 | add BP_code32_start(%esi), %eax | ||
237 | leaq preferred_addr(%rax), %rax | 290 | leaq preferred_addr(%rax), %rax |
238 | jmp *%rax | 291 | jmp *%rax |
239 | 292 | ||
@@ -269,9 +322,11 @@ preferred_addr: | |||
269 | addq %rax, %rbp | 322 | addq %rax, %rbp |
270 | notq %rax | 323 | notq %rax |
271 | andq %rax, %rbp | 324 | andq %rax, %rbp |
272 | #else | 325 | cmpq $LOAD_PHYSICAL_ADDR, %rbp |
273 | movq $LOAD_PHYSICAL_ADDR, %rbp | 326 | jge 1f |
274 | #endif | 327 | #endif |
328 | movq $LOAD_PHYSICAL_ADDR, %rbp | ||
329 | 1: | ||
275 | 330 | ||
276 | /* Target address to relocate to for decompression */ | 331 | /* Target address to relocate to for decompression */ |
277 | leaq z_extract_offset(%rbp), %rbx | 332 | leaq z_extract_offset(%rbp), %rbx |
@@ -303,6 +358,20 @@ preferred_addr: | |||
303 | leaq relocated(%rbx), %rax | 358 | leaq relocated(%rbx), %rax |
304 | jmp *%rax | 359 | jmp *%rax |
305 | 360 | ||
361 | #ifdef CONFIG_EFI_STUB | ||
362 | .org 0x390 | ||
363 | ENTRY(efi64_stub_entry) | ||
364 | movq %rdi, efi64_config(%rip) /* Handle */ | ||
365 | movq %rsi, efi64_config+8(%rip) /* EFI System table pointer */ | ||
366 | |||
367 | leaq efi64_config(%rip), %rax | ||
368 | movq %rax, efi_config(%rip) | ||
369 | |||
370 | movq %rdx, %rsi | ||
371 | jmp handover_entry | ||
372 | ENDPROC(efi64_stub_entry) | ||
373 | #endif | ||
374 | |||
306 | .text | 375 | .text |
307 | relocated: | 376 | relocated: |
308 | 377 | ||
@@ -339,13 +408,13 @@ relocated: | |||
339 | movl $z_input_len, %ecx /* input_len */ | 408 | movl $z_input_len, %ecx /* input_len */ |
340 | movq %rbp, %r8 /* output target address */ | 409 | movq %rbp, %r8 /* output target address */ |
341 | movq $z_output_len, %r9 /* decompressed length */ | 410 | movq $z_output_len, %r9 /* decompressed length */ |
342 | call decompress_kernel | 411 | call decompress_kernel /* returns kernel location in %rax */ |
343 | popq %rsi | 412 | popq %rsi |
344 | 413 | ||
345 | /* | 414 | /* |
346 | * Jump to the decompressed kernel. | 415 | * Jump to the decompressed kernel. |
347 | */ | 416 | */ |
348 | jmp *%rbp | 417 | jmp *%rax |
349 | 418 | ||
350 | .code32 | 419 | .code32 |
351 | no_longmode: | 420 | no_longmode: |
@@ -368,6 +437,25 @@ gdt: | |||
368 | .quad 0x0000000000000000 /* TS continued */ | 437 | .quad 0x0000000000000000 /* TS continued */ |
369 | gdt_end: | 438 | gdt_end: |
370 | 439 | ||
440 | #ifdef CONFIG_EFI_STUB | ||
441 | efi_config: | ||
442 | .quad 0 | ||
443 | |||
444 | #ifdef CONFIG_EFI_MIXED | ||
445 | .global efi32_config | ||
446 | efi32_config: | ||
447 | .fill 11,8,0 | ||
448 | .quad efi64_thunk | ||
449 | .byte 0 | ||
450 | #endif | ||
451 | |||
452 | .global efi64_config | ||
453 | efi64_config: | ||
454 | .fill 11,8,0 | ||
455 | .quad efi_call6 | ||
456 | .byte 1 | ||
457 | #endif /* CONFIG_EFI_STUB */ | ||
458 | |||
371 | /* | 459 | /* |
372 | * Stack and heap for uncompression | 460 | * Stack and heap for uncompression |
373 | */ | 461 | */ |
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 434f077d2c4d..17684615374b 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c | |||
@@ -10,6 +10,7 @@ | |||
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include "misc.h" | 12 | #include "misc.h" |
13 | #include "../string.h" | ||
13 | 14 | ||
14 | /* WARNING!! | 15 | /* WARNING!! |
15 | * This code is compiled with -fPIC and it is relocated dynamically | 16 | * This code is compiled with -fPIC and it is relocated dynamically |
@@ -97,8 +98,14 @@ | |||
97 | */ | 98 | */ |
98 | #define STATIC static | 99 | #define STATIC static |
99 | 100 | ||
100 | #undef memset | ||
101 | #undef memcpy | 101 | #undef memcpy |
102 | |||
103 | /* | ||
104 | * Use a normal definition of memset() from string.c. There are already | ||
105 | * included header files which expect a definition of memset() and by | ||
106 | * the time we define memset macro, it is too late. | ||
107 | */ | ||
108 | #undef memset | ||
102 | #define memzero(s, n) memset((s), 0, (n)) | 109 | #define memzero(s, n) memset((s), 0, (n)) |
103 | 110 | ||
104 | 111 | ||
@@ -109,17 +116,8 @@ static void error(char *m); | |||
109 | */ | 116 | */ |
110 | struct boot_params *real_mode; /* Pointer to real-mode data */ | 117 | struct boot_params *real_mode; /* Pointer to real-mode data */ |
111 | 118 | ||
112 | void *memset(void *s, int c, size_t n); | 119 | memptr free_mem_ptr; |
113 | void *memcpy(void *dest, const void *src, size_t n); | 120 | memptr free_mem_end_ptr; |
114 | |||
115 | #ifdef CONFIG_X86_64 | ||
116 | #define memptr long | ||
117 | #else | ||
118 | #define memptr unsigned | ||
119 | #endif | ||
120 | |||
121 | static memptr free_mem_ptr; | ||
122 | static memptr free_mem_end_ptr; | ||
123 | 121 | ||
124 | static char *vidmem; | 122 | static char *vidmem; |
125 | static int vidport; | 123 | static int vidport; |
@@ -222,45 +220,6 @@ void __putstr(const char *s) | |||
222 | outb(0xff & (pos >> 1), vidport+1); | 220 | outb(0xff & (pos >> 1), vidport+1); |
223 | } | 221 | } |
224 | 222 | ||
225 | void *memset(void *s, int c, size_t n) | ||
226 | { | ||
227 | int i; | ||
228 | char *ss = s; | ||
229 | |||
230 | for (i = 0; i < n; i++) | ||
231 | ss[i] = c; | ||
232 | return s; | ||
233 | } | ||
234 | #ifdef CONFIG_X86_32 | ||
235 | void *memcpy(void *dest, const void *src, size_t n) | ||
236 | { | ||
237 | int d0, d1, d2; | ||
238 | asm volatile( | ||
239 | "rep ; movsl\n\t" | ||
240 | "movl %4,%%ecx\n\t" | ||
241 | "rep ; movsb\n\t" | ||
242 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) | ||
243 | : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src) | ||
244 | : "memory"); | ||
245 | |||
246 | return dest; | ||
247 | } | ||
248 | #else | ||
249 | void *memcpy(void *dest, const void *src, size_t n) | ||
250 | { | ||
251 | long d0, d1, d2; | ||
252 | asm volatile( | ||
253 | "rep ; movsq\n\t" | ||
254 | "movq %4,%%rcx\n\t" | ||
255 | "rep ; movsb\n\t" | ||
256 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) | ||
257 | : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src) | ||
258 | : "memory"); | ||
259 | |||
260 | return dest; | ||
261 | } | ||
262 | #endif | ||
263 | |||
264 | static void error(char *x) | 223 | static void error(char *x) |
265 | { | 224 | { |
266 | error_putstr("\n\n"); | 225 | error_putstr("\n\n"); |
@@ -395,7 +354,7 @@ static void parse_elf(void *output) | |||
395 | free(phdrs); | 354 | free(phdrs); |
396 | } | 355 | } |
397 | 356 | ||
398 | asmlinkage void decompress_kernel(void *rmode, memptr heap, | 357 | asmlinkage void *decompress_kernel(void *rmode, memptr heap, |
399 | unsigned char *input_data, | 358 | unsigned char *input_data, |
400 | unsigned long input_len, | 359 | unsigned long input_len, |
401 | unsigned char *output, | 360 | unsigned char *output, |
@@ -422,6 +381,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
422 | free_mem_ptr = heap; /* Heap */ | 381 | free_mem_ptr = heap; /* Heap */ |
423 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; | 382 | free_mem_end_ptr = heap + BOOT_HEAP_SIZE; |
424 | 383 | ||
384 | output = choose_kernel_location(input_data, input_len, | ||
385 | output, output_len); | ||
386 | |||
387 | /* Validate memory location choices. */ | ||
425 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) | 388 | if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) |
426 | error("Destination address inappropriately aligned"); | 389 | error("Destination address inappropriately aligned"); |
427 | #ifdef CONFIG_X86_64 | 390 | #ifdef CONFIG_X86_64 |
@@ -441,5 +404,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, | |||
441 | parse_elf(output); | 404 | parse_elf(output); |
442 | handle_relocations(output, output_len); | 405 | handle_relocations(output, output_len); |
443 | debug_putstr("done.\nBooting the kernel.\n"); | 406 | debug_putstr("done.\nBooting the kernel.\n"); |
444 | return; | 407 | return output; |
445 | } | 408 | } |
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 674019d8e235..24e3e569a13c 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h | |||
@@ -23,7 +23,15 @@ | |||
23 | #define BOOT_BOOT_H | 23 | #define BOOT_BOOT_H |
24 | #include "../ctype.h" | 24 | #include "../ctype.h" |
25 | 25 | ||
26 | #ifdef CONFIG_X86_64 | ||
27 | #define memptr long | ||
28 | #else | ||
29 | #define memptr unsigned | ||
30 | #endif | ||
31 | |||
26 | /* misc.c */ | 32 | /* misc.c */ |
33 | extern memptr free_mem_ptr; | ||
34 | extern memptr free_mem_end_ptr; | ||
27 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ | 35 | extern struct boot_params *real_mode; /* Pointer to real-mode data */ |
28 | void __putstr(const char *s); | 36 | void __putstr(const char *s); |
29 | #define error_putstr(__x) __putstr(__x) | 37 | #define error_putstr(__x) __putstr(__x) |
@@ -39,23 +47,40 @@ static inline void debug_putstr(const char *s) | |||
39 | 47 | ||
40 | #endif | 48 | #endif |
41 | 49 | ||
42 | #ifdef CONFIG_EARLY_PRINTK | 50 | #if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE |
43 | |||
44 | /* cmdline.c */ | 51 | /* cmdline.c */ |
45 | int cmdline_find_option(const char *option, char *buffer, int bufsize); | 52 | int cmdline_find_option(const char *option, char *buffer, int bufsize); |
46 | int cmdline_find_option_bool(const char *option); | 53 | int cmdline_find_option_bool(const char *option); |
54 | #endif | ||
47 | 55 | ||
48 | /* early_serial_console.c */ | ||
49 | extern int early_serial_base; | ||
50 | void console_init(void); | ||
51 | 56 | ||
57 | #if CONFIG_RANDOMIZE_BASE | ||
58 | /* aslr.c */ | ||
59 | unsigned char *choose_kernel_location(unsigned char *input, | ||
60 | unsigned long input_size, | ||
61 | unsigned char *output, | ||
62 | unsigned long output_size); | ||
63 | /* cpuflags.c */ | ||
64 | bool has_cpuflag(int flag); | ||
52 | #else | 65 | #else |
66 | static inline | ||
67 | unsigned char *choose_kernel_location(unsigned char *input, | ||
68 | unsigned long input_size, | ||
69 | unsigned char *output, | ||
70 | unsigned long output_size) | ||
71 | { | ||
72 | return output; | ||
73 | } | ||
74 | #endif | ||
53 | 75 | ||
76 | #ifdef CONFIG_EARLY_PRINTK | ||
54 | /* early_serial_console.c */ | 77 | /* early_serial_console.c */ |
78 | extern int early_serial_base; | ||
79 | void console_init(void); | ||
80 | #else | ||
55 | static const int early_serial_base; | 81 | static const int early_serial_base; |
56 | static inline void console_init(void) | 82 | static inline void console_init(void) |
57 | { } | 83 | { } |
58 | |||
59 | #endif | 84 | #endif |
60 | 85 | ||
61 | #endif | 86 | #endif |
diff --git a/arch/x86/boot/compressed/string.c b/arch/x86/boot/compressed/string.c index ffb9c5c9d748..f3c57e341402 100644 --- a/arch/x86/boot/compressed/string.c +++ b/arch/x86/boot/compressed/string.c | |||
@@ -1,11 +1,45 @@ | |||
1 | #include "misc.h" | 1 | #include "misc.h" |
2 | #include "../string.c" | ||
3 | |||
4 | /* misc.h might pull in string_32.h which has a macro for memcpy. undef that */ | ||
5 | #undef memcpy | ||
2 | 6 | ||
3 | int memcmp(const void *s1, const void *s2, size_t len) | 7 | #ifdef CONFIG_X86_32 |
8 | void *memcpy(void *dest, const void *src, size_t n) | ||
4 | { | 9 | { |
5 | u8 diff; | 10 | int d0, d1, d2; |
6 | asm("repe; cmpsb; setnz %0" | 11 | asm volatile( |
7 | : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); | 12 | "rep ; movsl\n\t" |
8 | return diff; | 13 | "movl %4,%%ecx\n\t" |
14 | "rep ; movsb\n\t" | ||
15 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) | ||
16 | : "0" (n >> 2), "g" (n & 3), "1" (dest), "2" (src) | ||
17 | : "memory"); | ||
18 | |||
19 | return dest; | ||
9 | } | 20 | } |
21 | #else | ||
22 | void *memcpy(void *dest, const void *src, size_t n) | ||
23 | { | ||
24 | long d0, d1, d2; | ||
25 | asm volatile( | ||
26 | "rep ; movsq\n\t" | ||
27 | "movq %4,%%rcx\n\t" | ||
28 | "rep ; movsb\n\t" | ||
29 | : "=&c" (d0), "=&D" (d1), "=&S" (d2) | ||
30 | : "0" (n >> 3), "g" (n & 7), "1" (dest), "2" (src) | ||
31 | : "memory"); | ||
10 | 32 | ||
11 | #include "../string.c" | 33 | return dest; |
34 | } | ||
35 | #endif | ||
36 | |||
37 | void *memset(void *s, int c, size_t n) | ||
38 | { | ||
39 | int i; | ||
40 | char *ss = s; | ||
41 | |||
42 | for (i = 0; i < n; i++) | ||
43 | ss[i] = c; | ||
44 | return s; | ||
45 | } | ||
diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index 11f272c6f5e9..1eb7d298b47d 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S | |||
@@ -14,7 +14,7 @@ | |||
14 | * Memory copy routines | 14 | * Memory copy routines |
15 | */ | 15 | */ |
16 | 16 | ||
17 | .code16gcc | 17 | .code16 |
18 | .text | 18 | .text |
19 | 19 | ||
20 | GLOBAL(memcpy) | 20 | GLOBAL(memcpy) |
@@ -30,7 +30,7 @@ GLOBAL(memcpy) | |||
30 | rep; movsb | 30 | rep; movsb |
31 | popw %di | 31 | popw %di |
32 | popw %si | 32 | popw %si |
33 | ret | 33 | retl |
34 | ENDPROC(memcpy) | 34 | ENDPROC(memcpy) |
35 | 35 | ||
36 | GLOBAL(memset) | 36 | GLOBAL(memset) |
@@ -45,25 +45,25 @@ GLOBAL(memset) | |||
45 | andw $3, %cx | 45 | andw $3, %cx |
46 | rep; stosb | 46 | rep; stosb |
47 | popw %di | 47 | popw %di |
48 | ret | 48 | retl |
49 | ENDPROC(memset) | 49 | ENDPROC(memset) |
50 | 50 | ||
51 | GLOBAL(copy_from_fs) | 51 | GLOBAL(copy_from_fs) |
52 | pushw %ds | 52 | pushw %ds |
53 | pushw %fs | 53 | pushw %fs |
54 | popw %ds | 54 | popw %ds |
55 | call memcpy | 55 | calll memcpy |
56 | popw %ds | 56 | popw %ds |
57 | ret | 57 | retl |
58 | ENDPROC(copy_from_fs) | 58 | ENDPROC(copy_from_fs) |
59 | 59 | ||
60 | GLOBAL(copy_to_fs) | 60 | GLOBAL(copy_to_fs) |
61 | pushw %es | 61 | pushw %es |
62 | pushw %fs | 62 | pushw %fs |
63 | popw %es | 63 | popw %es |
64 | call memcpy | 64 | calll memcpy |
65 | popw %es | 65 | popw %es |
66 | ret | 66 | retl |
67 | ENDPROC(copy_to_fs) | 67 | ENDPROC(copy_to_fs) |
68 | 68 | ||
69 | #if 0 /* Not currently used, but can be enabled as needed */ | 69 | #if 0 /* Not currently used, but can be enabled as needed */ |
@@ -71,17 +71,17 @@ GLOBAL(copy_from_gs) | |||
71 | pushw %ds | 71 | pushw %ds |
72 | pushw %gs | 72 | pushw %gs |
73 | popw %ds | 73 | popw %ds |
74 | call memcpy | 74 | calll memcpy |
75 | popw %ds | 75 | popw %ds |
76 | ret | 76 | retl |
77 | ENDPROC(copy_from_gs) | 77 | ENDPROC(copy_from_gs) |
78 | 78 | ||
79 | GLOBAL(copy_to_gs) | 79 | GLOBAL(copy_to_gs) |
80 | pushw %es | 80 | pushw %es |
81 | pushw %gs | 81 | pushw %gs |
82 | popw %es | 82 | popw %es |
83 | call memcpy | 83 | calll memcpy |
84 | popw %es | 84 | popw %es |
85 | ret | 85 | retl |
86 | ENDPROC(copy_to_gs) | 86 | ENDPROC(copy_to_gs) |
87 | #endif | 87 | #endif |
diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 4d3ff037201f..1fd7d575092e 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c | |||
@@ -27,9 +27,8 @@ | |||
27 | #include <asm/processor-flags.h> | 27 | #include <asm/processor-flags.h> |
28 | #include <asm/required-features.h> | 28 | #include <asm/required-features.h> |
29 | #include <asm/msr-index.h> | 29 | #include <asm/msr-index.h> |
30 | #include "string.h" | ||
30 | 31 | ||
31 | struct cpu_features cpu; | ||
32 | static u32 cpu_vendor[3]; | ||
33 | static u32 err_flags[NCAPINTS]; | 32 | static u32 err_flags[NCAPINTS]; |
34 | 33 | ||
35 | static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; | 34 | static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; |
@@ -69,92 +68,15 @@ static int is_transmeta(void) | |||
69 | cpu_vendor[2] == A32('M', 'x', '8', '6'); | 68 | cpu_vendor[2] == A32('M', 'x', '8', '6'); |
70 | } | 69 | } |
71 | 70 | ||
72 | static int has_fpu(void) | 71 | static int is_intel(void) |
73 | { | 72 | { |
74 | u16 fcw = -1, fsw = -1; | 73 | return cpu_vendor[0] == A32('G', 'e', 'n', 'u') && |
75 | u32 cr0; | 74 | cpu_vendor[1] == A32('i', 'n', 'e', 'I') && |
76 | 75 | cpu_vendor[2] == A32('n', 't', 'e', 'l'); | |
77 | asm("movl %%cr0,%0" : "=r" (cr0)); | ||
78 | if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { | ||
79 | cr0 &= ~(X86_CR0_EM|X86_CR0_TS); | ||
80 | asm volatile("movl %0,%%cr0" : : "r" (cr0)); | ||
81 | } | ||
82 | |||
83 | asm volatile("fninit ; fnstsw %0 ; fnstcw %1" | ||
84 | : "+m" (fsw), "+m" (fcw)); | ||
85 | |||
86 | return fsw == 0 && (fcw & 0x103f) == 0x003f; | ||
87 | } | ||
88 | |||
89 | static int has_eflag(u32 mask) | ||
90 | { | ||
91 | u32 f0, f1; | ||
92 | |||
93 | asm("pushfl ; " | ||
94 | "pushfl ; " | ||
95 | "popl %0 ; " | ||
96 | "movl %0,%1 ; " | ||
97 | "xorl %2,%1 ; " | ||
98 | "pushl %1 ; " | ||
99 | "popfl ; " | ||
100 | "pushfl ; " | ||
101 | "popl %1 ; " | ||
102 | "popfl" | ||
103 | : "=&r" (f0), "=&r" (f1) | ||
104 | : "ri" (mask)); | ||
105 | |||
106 | return !!((f0^f1) & mask); | ||
107 | } | ||
108 | |||
109 | static void get_flags(void) | ||
110 | { | ||
111 | u32 max_intel_level, max_amd_level; | ||
112 | u32 tfms; | ||
113 | |||
114 | if (has_fpu()) | ||
115 | set_bit(X86_FEATURE_FPU, cpu.flags); | ||
116 | |||
117 | if (has_eflag(X86_EFLAGS_ID)) { | ||
118 | asm("cpuid" | ||
119 | : "=a" (max_intel_level), | ||
120 | "=b" (cpu_vendor[0]), | ||
121 | "=d" (cpu_vendor[1]), | ||
122 | "=c" (cpu_vendor[2]) | ||
123 | : "a" (0)); | ||
124 | |||
125 | if (max_intel_level >= 0x00000001 && | ||
126 | max_intel_level <= 0x0000ffff) { | ||
127 | asm("cpuid" | ||
128 | : "=a" (tfms), | ||
129 | "=c" (cpu.flags[4]), | ||
130 | "=d" (cpu.flags[0]) | ||
131 | : "a" (0x00000001) | ||
132 | : "ebx"); | ||
133 | cpu.level = (tfms >> 8) & 15; | ||
134 | cpu.model = (tfms >> 4) & 15; | ||
135 | if (cpu.level >= 6) | ||
136 | cpu.model += ((tfms >> 16) & 0xf) << 4; | ||
137 | } | ||
138 | |||
139 | asm("cpuid" | ||
140 | : "=a" (max_amd_level) | ||
141 | : "a" (0x80000000) | ||
142 | : "ebx", "ecx", "edx"); | ||
143 | |||
144 | if (max_amd_level >= 0x80000001 && | ||
145 | max_amd_level <= 0x8000ffff) { | ||
146 | u32 eax = 0x80000001; | ||
147 | asm("cpuid" | ||
148 | : "+a" (eax), | ||
149 | "=c" (cpu.flags[6]), | ||
150 | "=d" (cpu.flags[1]) | ||
151 | : : "ebx"); | ||
152 | } | ||
153 | } | ||
154 | } | 76 | } |
155 | 77 | ||
156 | /* Returns a bitmask of which words we have error bits in */ | 78 | /* Returns a bitmask of which words we have error bits in */ |
157 | static int check_flags(void) | 79 | static int check_cpuflags(void) |
158 | { | 80 | { |
159 | u32 err; | 81 | u32 err; |
160 | int i; | 82 | int i; |
@@ -187,8 +109,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) | |||
187 | if (has_eflag(X86_EFLAGS_AC)) | 109 | if (has_eflag(X86_EFLAGS_AC)) |
188 | cpu.level = 4; | 110 | cpu.level = 4; |
189 | 111 | ||
190 | get_flags(); | 112 | get_cpuflags(); |
191 | err = check_flags(); | 113 | err = check_cpuflags(); |
192 | 114 | ||
193 | if (test_bit(X86_FEATURE_LM, cpu.flags)) | 115 | if (test_bit(X86_FEATURE_LM, cpu.flags)) |
194 | cpu.level = 64; | 116 | cpu.level = 64; |
@@ -207,8 +129,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) | |||
207 | eax &= ~(1 << 15); | 129 | eax &= ~(1 << 15); |
208 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); | 130 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); |
209 | 131 | ||
210 | get_flags(); /* Make sure it really did something */ | 132 | get_cpuflags(); /* Make sure it really did something */ |
211 | err = check_flags(); | 133 | err = check_cpuflags(); |
212 | } else if (err == 0x01 && | 134 | } else if (err == 0x01 && |
213 | !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && | 135 | !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && |
214 | is_centaur() && cpu.model >= 6) { | 136 | is_centaur() && cpu.model >= 6) { |
@@ -223,7 +145,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) | |||
223 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); | 145 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); |
224 | 146 | ||
225 | set_bit(X86_FEATURE_CX8, cpu.flags); | 147 | set_bit(X86_FEATURE_CX8, cpu.flags); |
226 | err = check_flags(); | 148 | err = check_cpuflags(); |
227 | } else if (err == 0x01 && is_transmeta()) { | 149 | } else if (err == 0x01 && is_transmeta()) { |
228 | /* Transmeta might have masked feature bits in word 0 */ | 150 | /* Transmeta might have masked feature bits in word 0 */ |
229 | 151 | ||
@@ -238,7 +160,20 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr) | |||
238 | : : "ecx", "ebx"); | 160 | : : "ecx", "ebx"); |
239 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); | 161 | asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); |
240 | 162 | ||
241 | err = check_flags(); | 163 | err = check_cpuflags(); |
164 | } else if (err == 0x01 && | ||
165 | !(err_flags[0] & ~(1 << X86_FEATURE_PAE)) && | ||
166 | is_intel() && cpu.level == 6 && | ||
167 | (cpu.model == 9 || cpu.model == 13)) { | ||
168 | /* PAE is disabled on this Pentium M but can be forced */ | ||
169 | if (cmdline_find_option_bool("forcepae")) { | ||
170 | puts("WARNING: Forcing PAE in CPU flags\n"); | ||
171 | set_bit(X86_FEATURE_PAE, cpu.flags); | ||
172 | err = check_cpuflags(); | ||
173 | } | ||
174 | else { | ||
175 | puts("WARNING: PAE disabled. Use parameter 'forcepae' to enable at your own risk!\n"); | ||
176 | } | ||
242 | } | 177 | } |
243 | 178 | ||
244 | if (err_flags_ptr) | 179 | if (err_flags_ptr) |
diff --git a/arch/x86/boot/cpuflags.c b/arch/x86/boot/cpuflags.c new file mode 100644 index 000000000000..431fa5f84537 --- /dev/null +++ b/arch/x86/boot/cpuflags.c | |||
@@ -0,0 +1,119 @@ | |||
1 | #include <linux/types.h> | ||
2 | #include "bitops.h" | ||
3 | |||
4 | #include <asm/processor-flags.h> | ||
5 | #include <asm/required-features.h> | ||
6 | #include <asm/msr-index.h> | ||
7 | #include "cpuflags.h" | ||
8 | |||
9 | struct cpu_features cpu; | ||
10 | u32 cpu_vendor[3]; | ||
11 | |||
12 | static bool loaded_flags; | ||
13 | |||
14 | static int has_fpu(void) | ||
15 | { | ||
16 | u16 fcw = -1, fsw = -1; | ||
17 | unsigned long cr0; | ||
18 | |||
19 | asm volatile("mov %%cr0,%0" : "=r" (cr0)); | ||
20 | if (cr0 & (X86_CR0_EM|X86_CR0_TS)) { | ||
21 | cr0 &= ~(X86_CR0_EM|X86_CR0_TS); | ||
22 | asm volatile("mov %0,%%cr0" : : "r" (cr0)); | ||
23 | } | ||
24 | |||
25 | asm volatile("fninit ; fnstsw %0 ; fnstcw %1" | ||
26 | : "+m" (fsw), "+m" (fcw)); | ||
27 | |||
28 | return fsw == 0 && (fcw & 0x103f) == 0x003f; | ||
29 | } | ||
30 | |||
31 | /* | ||
32 | * For building the 16-bit code we want to explicitly specify 32-bit | ||
33 | * push/pop operations, rather than just saying 'pushf' or 'popf' and | ||
34 | * letting the compiler choose. But this is also included from the | ||
35 | * compressed/ directory where it may be 64-bit code, and thus needs | ||
36 | * to be 'pushfq' or 'popfq' in that case. | ||
37 | */ | ||
38 | #ifdef __x86_64__ | ||
39 | #define PUSHF "pushfq" | ||
40 | #define POPF "popfq" | ||
41 | #else | ||
42 | #define PUSHF "pushfl" | ||
43 | #define POPF "popfl" | ||
44 | #endif | ||
45 | |||
46 | int has_eflag(unsigned long mask) | ||
47 | { | ||
48 | unsigned long f0, f1; | ||
49 | |||
50 | asm volatile(PUSHF " \n\t" | ||
51 | PUSHF " \n\t" | ||
52 | "pop %0 \n\t" | ||
53 | "mov %0,%1 \n\t" | ||
54 | "xor %2,%1 \n\t" | ||
55 | "push %1 \n\t" | ||
56 | POPF " \n\t" | ||
57 | PUSHF " \n\t" | ||
58 | "pop %1 \n\t" | ||
59 | POPF | ||
60 | : "=&r" (f0), "=&r" (f1) | ||
61 | : "ri" (mask)); | ||
62 | |||
63 | return !!((f0^f1) & mask); | ||
64 | } | ||
65 | |||
66 | /* Handle x86_32 PIC using ebx. */ | ||
67 | #if defined(__i386__) && defined(__PIC__) | ||
68 | # define EBX_REG "=r" | ||
69 | #else | ||
70 | # define EBX_REG "=b" | ||
71 | #endif | ||
72 | |||
73 | static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d) | ||
74 | { | ||
75 | asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t" | ||
76 | "cpuid \n\t" | ||
77 | ".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t" | ||
78 | : "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b) | ||
79 | : "a" (id) | ||
80 | ); | ||
81 | } | ||
82 | |||
83 | void get_cpuflags(void) | ||
84 | { | ||
85 | u32 max_intel_level, max_amd_level; | ||
86 | u32 tfms; | ||
87 | u32 ignored; | ||
88 | |||
89 | if (loaded_flags) | ||
90 | return; | ||
91 | loaded_flags = true; | ||
92 | |||
93 | if (has_fpu()) | ||
94 | set_bit(X86_FEATURE_FPU, cpu.flags); | ||
95 | |||
96 | if (has_eflag(X86_EFLAGS_ID)) { | ||
97 | cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2], | ||
98 | &cpu_vendor[1]); | ||
99 | |||
100 | if (max_intel_level >= 0x00000001 && | ||
101 | max_intel_level <= 0x0000ffff) { | ||
102 | cpuid(0x1, &tfms, &ignored, &cpu.flags[4], | ||
103 | &cpu.flags[0]); | ||
104 | cpu.level = (tfms >> 8) & 15; | ||
105 | cpu.model = (tfms >> 4) & 15; | ||
106 | if (cpu.level >= 6) | ||
107 | cpu.model += ((tfms >> 16) & 0xf) << 4; | ||
108 | } | ||
109 | |||
110 | cpuid(0x80000000, &max_amd_level, &ignored, &ignored, | ||
111 | &ignored); | ||
112 | |||
113 | if (max_amd_level >= 0x80000001 && | ||
114 | max_amd_level <= 0x8000ffff) { | ||
115 | cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6], | ||
116 | &cpu.flags[1]); | ||
117 | } | ||
118 | } | ||
119 | } | ||
diff --git a/arch/x86/boot/cpuflags.h b/arch/x86/boot/cpuflags.h new file mode 100644 index 000000000000..ea97697e51e4 --- /dev/null +++ b/arch/x86/boot/cpuflags.h | |||
@@ -0,0 +1,19 @@ | |||
1 | #ifndef BOOT_CPUFLAGS_H | ||
2 | #define BOOT_CPUFLAGS_H | ||
3 | |||
4 | #include <asm/cpufeature.h> | ||
5 | #include <asm/processor-flags.h> | ||
6 | |||
7 | struct cpu_features { | ||
8 | int level; /* Family, or 64 for x86-64 */ | ||
9 | int model; | ||
10 | u32 flags[NCAPINTS]; | ||
11 | }; | ||
12 | |||
13 | extern struct cpu_features cpu; | ||
14 | extern u32 cpu_vendor[3]; | ||
15 | |||
16 | int has_eflag(unsigned long mask); | ||
17 | void get_cpuflags(void); | ||
18 | |||
19 | #endif | ||
diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index c501a5b466f8..223e42527077 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c | |||
@@ -15,6 +15,7 @@ | |||
15 | 15 | ||
16 | #include "boot.h" | 16 | #include "boot.h" |
17 | #include <linux/edd.h> | 17 | #include <linux/edd.h> |
18 | #include "string.h" | ||
18 | 19 | ||
19 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) | 20 | #if defined(CONFIG_EDD) || defined(CONFIG_EDD_MODULE) |
20 | 21 | ||
diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 9ec06a1f6d61..0ca9a5c362bc 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S | |||
@@ -283,7 +283,7 @@ _start: | |||
283 | # Part 2 of the header, from the old setup.S | 283 | # Part 2 of the header, from the old setup.S |
284 | 284 | ||
285 | .ascii "HdrS" # header signature | 285 | .ascii "HdrS" # header signature |
286 | .word 0x020c # header version number (>= 0x0105) | 286 | .word 0x020d # header version number (>= 0x0105) |
287 | # or else old loadlin-1.5 will fail) | 287 | # or else old loadlin-1.5 will fail) |
288 | .globl realmode_swtch | 288 | .globl realmode_swtch |
289 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG | 289 | realmode_swtch: .word 0, 0 # default_switch, SETUPSEG |
@@ -350,7 +350,7 @@ cmd_line_ptr: .long 0 # (Header version 0x0202 or later) | |||
350 | # can be located anywhere in | 350 | # can be located anywhere in |
351 | # low memory 0x10000 or higher. | 351 | # low memory 0x10000 or higher. |
352 | 352 | ||
353 | ramdisk_max: .long 0x7fffffff | 353 | initrd_addr_max: .long 0x7fffffff |
354 | # (Header version 0x0203 or later) | 354 | # (Header version 0x0203 or later) |
355 | # The highest safe address for | 355 | # The highest safe address for |
356 | # the contents of an initrd | 356 | # the contents of an initrd |
@@ -375,7 +375,8 @@ xloadflags: | |||
375 | # define XLF0 0 | 375 | # define XLF0 0 |
376 | #endif | 376 | #endif |
377 | 377 | ||
378 | #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) | 378 | #if defined(CONFIG_RELOCATABLE) && defined(CONFIG_X86_64) && \ |
379 | !defined(CONFIG_EFI_MIXED) | ||
379 | /* kernel/boot_param/ramdisk could be loaded above 4g */ | 380 | /* kernel/boot_param/ramdisk could be loaded above 4g */ |
380 | # define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G | 381 | # define XLF1 XLF_CAN_BE_LOADED_ABOVE_4G |
381 | #else | 382 | #else |
@@ -383,15 +384,26 @@ xloadflags: | |||
383 | #endif | 384 | #endif |
384 | 385 | ||
385 | #ifdef CONFIG_EFI_STUB | 386 | #ifdef CONFIG_EFI_STUB |
386 | # ifdef CONFIG_X86_64 | 387 | # ifdef CONFIG_EFI_MIXED |
387 | # define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ | 388 | # define XLF23 (XLF_EFI_HANDOVER_32|XLF_EFI_HANDOVER_64) |
388 | # else | 389 | # else |
389 | # define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */ | 390 | # ifdef CONFIG_X86_64 |
391 | # define XLF23 XLF_EFI_HANDOVER_64 /* 64-bit EFI handover ok */ | ||
392 | # else | ||
393 | # define XLF23 XLF_EFI_HANDOVER_32 /* 32-bit EFI handover ok */ | ||
394 | # endif | ||
390 | # endif | 395 | # endif |
391 | #else | 396 | #else |
392 | # define XLF23 0 | 397 | # define XLF23 0 |
393 | #endif | 398 | #endif |
394 | .word XLF0 | XLF1 | XLF23 | 399 | |
400 | #if defined(CONFIG_X86_64) && defined(CONFIG_EFI) && defined(CONFIG_KEXEC) | ||
401 | # define XLF4 XLF_EFI_KEXEC | ||
402 | #else | ||
403 | # define XLF4 0 | ||
404 | #endif | ||
405 | |||
406 | .word XLF0 | XLF1 | XLF23 | XLF4 | ||
395 | 407 | ||
396 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, | 408 | cmdline_size: .long COMMAND_LINE_SIZE-1 #length of the command line, |
397 | #added with boot protocol | 409 | #added with boot protocol |
@@ -419,13 +431,7 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr | |||
419 | #define INIT_SIZE VO_INIT_SIZE | 431 | #define INIT_SIZE VO_INIT_SIZE |
420 | #endif | 432 | #endif |
421 | init_size: .long INIT_SIZE # kernel initialization size | 433 | init_size: .long INIT_SIZE # kernel initialization size |
422 | handover_offset: | 434 | handover_offset: .long 0 # Filled in by build.c |
423 | #ifdef CONFIG_EFI_STUB | ||
424 | .long 0x30 # offset to the handover | ||
425 | # protocol entry point | ||
426 | #else | ||
427 | .long 0 | ||
428 | #endif | ||
429 | 435 | ||
430 | # End of setup header ##################################################### | 436 | # End of setup header ##################################################### |
431 | 437 | ||
diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index cf6083d444f4..fd6c9f236996 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c | |||
@@ -14,6 +14,7 @@ | |||
14 | */ | 14 | */ |
15 | 15 | ||
16 | #include "boot.h" | 16 | #include "boot.h" |
17 | #include "string.h" | ||
17 | 18 | ||
18 | struct boot_params boot_params __attribute__((aligned(16))); | 19 | struct boot_params boot_params __attribute__((aligned(16))); |
19 | 20 | ||
diff --git a/arch/x86/boot/regs.c b/arch/x86/boot/regs.c index 958019b1cfa5..c0fb356a3092 100644 --- a/arch/x86/boot/regs.c +++ b/arch/x86/boot/regs.c | |||
@@ -17,6 +17,7 @@ | |||
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include "boot.h" | 19 | #include "boot.h" |
20 | #include "string.h" | ||
20 | 21 | ||
21 | void initregs(struct biosregs *reg) | 22 | void initregs(struct biosregs *reg) |
22 | { | 23 | { |
diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 574dedfe2890..5339040ef86e 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c | |||
@@ -14,6 +14,20 @@ | |||
14 | 14 | ||
15 | #include "boot.h" | 15 | #include "boot.h" |
16 | 16 | ||
17 | /* | ||
18 | * This file gets included in compressed/string.c which might pull in | ||
19 | * string_32.h and which in turn maps memcmp to __builtin_memcmp(). Undo | ||
20 | * that first. | ||
21 | */ | ||
22 | #undef memcmp | ||
23 | int memcmp(const void *s1, const void *s2, size_t len) | ||
24 | { | ||
25 | u8 diff; | ||
26 | asm("repe; cmpsb; setnz %0" | ||
27 | : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); | ||
28 | return diff; | ||
29 | } | ||
30 | |||
17 | int strcmp(const char *str1, const char *str2) | 31 | int strcmp(const char *str1, const char *str2) |
18 | { | 32 | { |
19 | const unsigned char *s1 = (const unsigned char *)str1; | 33 | const unsigned char *s1 = (const unsigned char *)str1; |
diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h new file mode 100644 index 000000000000..725e820602b1 --- /dev/null +++ b/arch/x86/boot/string.h | |||
@@ -0,0 +1,21 @@ | |||
1 | #ifndef BOOT_STRING_H | ||
2 | #define BOOT_STRING_H | ||
3 | |||
4 | /* Undef any of these macros coming from string_32.h. */ | ||
5 | #undef memcpy | ||
6 | #undef memset | ||
7 | #undef memcmp | ||
8 | |||
9 | void *memcpy(void *dst, const void *src, size_t len); | ||
10 | void *memset(void *dst, int c, size_t len); | ||
11 | int memcmp(const void *s1, const void *s2, size_t len); | ||
12 | |||
13 | /* | ||
14 | * Access builtin version by default. If one needs to use optimized version, | ||
15 | * do "undef memcpy" in .c file and link against right string.c | ||
16 | */ | ||
17 | #define memcpy(d,s,l) __builtin_memcpy(d,s,l) | ||
18 | #define memset(d,c,l) __builtin_memset(d,c,l) | ||
19 | #define memcmp __builtin_memcmp | ||
20 | |||
21 | #endif /* BOOT_STRING_H */ | ||
diff --git a/arch/x86/boot/tools/build.c b/arch/x86/boot/tools/build.c index 8e15b22391fc..1a2f2121cada 100644 --- a/arch/x86/boot/tools/build.c +++ b/arch/x86/boot/tools/build.c | |||
@@ -53,7 +53,8 @@ int is_big_kernel; | |||
53 | 53 | ||
54 | #define PECOFF_RELOC_RESERVE 0x20 | 54 | #define PECOFF_RELOC_RESERVE 0x20 |
55 | 55 | ||
56 | unsigned long efi_stub_entry; | 56 | unsigned long efi32_stub_entry; |
57 | unsigned long efi64_stub_entry; | ||
57 | unsigned long efi_pe_entry; | 58 | unsigned long efi_pe_entry; |
58 | unsigned long startup_64; | 59 | unsigned long startup_64; |
59 | 60 | ||
@@ -219,6 +220,52 @@ static void update_pecoff_text(unsigned int text_start, unsigned int file_sz) | |||
219 | update_pecoff_section_header(".text", text_start, text_sz); | 220 | update_pecoff_section_header(".text", text_start, text_sz); |
220 | } | 221 | } |
221 | 222 | ||
223 | static int reserve_pecoff_reloc_section(int c) | ||
224 | { | ||
225 | /* Reserve 0x20 bytes for .reloc section */ | ||
226 | memset(buf+c, 0, PECOFF_RELOC_RESERVE); | ||
227 | return PECOFF_RELOC_RESERVE; | ||
228 | } | ||
229 | |||
230 | static void efi_stub_defaults(void) | ||
231 | { | ||
232 | /* Defaults for old kernel */ | ||
233 | #ifdef CONFIG_X86_32 | ||
234 | efi_pe_entry = 0x10; | ||
235 | #else | ||
236 | efi_pe_entry = 0x210; | ||
237 | startup_64 = 0x200; | ||
238 | #endif | ||
239 | } | ||
240 | |||
241 | static void efi_stub_entry_update(void) | ||
242 | { | ||
243 | unsigned long addr = efi32_stub_entry; | ||
244 | |||
245 | #ifdef CONFIG_X86_64 | ||
246 | /* Yes, this is really how we defined it :( */ | ||
247 | addr = efi64_stub_entry - 0x200; | ||
248 | #endif | ||
249 | |||
250 | #ifdef CONFIG_EFI_MIXED | ||
251 | if (efi32_stub_entry != addr) | ||
252 | die("32-bit and 64-bit EFI entry points do not match\n"); | ||
253 | #endif | ||
254 | put_unaligned_le32(addr, &buf[0x264]); | ||
255 | } | ||
256 | |||
257 | #else | ||
258 | |||
259 | static inline void update_pecoff_setup_and_reloc(unsigned int size) {} | ||
260 | static inline void update_pecoff_text(unsigned int text_start, | ||
261 | unsigned int file_sz) {} | ||
262 | static inline void efi_stub_defaults(void) {} | ||
263 | static inline void efi_stub_entry_update(void) {} | ||
264 | |||
265 | static inline int reserve_pecoff_reloc_section(int c) | ||
266 | { | ||
267 | return 0; | ||
268 | } | ||
222 | #endif /* CONFIG_EFI_STUB */ | 269 | #endif /* CONFIG_EFI_STUB */ |
223 | 270 | ||
224 | 271 | ||
@@ -250,7 +297,8 @@ static void parse_zoffset(char *fname) | |||
250 | p = (char *)buf; | 297 | p = (char *)buf; |
251 | 298 | ||
252 | while (p && *p) { | 299 | while (p && *p) { |
253 | PARSE_ZOFS(p, efi_stub_entry); | 300 | PARSE_ZOFS(p, efi32_stub_entry); |
301 | PARSE_ZOFS(p, efi64_stub_entry); | ||
254 | PARSE_ZOFS(p, efi_pe_entry); | 302 | PARSE_ZOFS(p, efi_pe_entry); |
255 | PARSE_ZOFS(p, startup_64); | 303 | PARSE_ZOFS(p, startup_64); |
256 | 304 | ||
@@ -271,15 +319,7 @@ int main(int argc, char ** argv) | |||
271 | void *kernel; | 319 | void *kernel; |
272 | u32 crc = 0xffffffffUL; | 320 | u32 crc = 0xffffffffUL; |
273 | 321 | ||
274 | /* Defaults for old kernel */ | 322 | efi_stub_defaults(); |
275 | #ifdef CONFIG_X86_32 | ||
276 | efi_pe_entry = 0x10; | ||
277 | efi_stub_entry = 0x30; | ||
278 | #else | ||
279 | efi_pe_entry = 0x210; | ||
280 | efi_stub_entry = 0x230; | ||
281 | startup_64 = 0x200; | ||
282 | #endif | ||
283 | 323 | ||
284 | if (argc != 5) | 324 | if (argc != 5) |
285 | usage(); | 325 | usage(); |
@@ -302,11 +342,7 @@ int main(int argc, char ** argv) | |||
302 | die("Boot block hasn't got boot flag (0xAA55)"); | 342 | die("Boot block hasn't got boot flag (0xAA55)"); |
303 | fclose(file); | 343 | fclose(file); |
304 | 344 | ||
305 | #ifdef CONFIG_EFI_STUB | 345 | c += reserve_pecoff_reloc_section(c); |
306 | /* Reserve 0x20 bytes for .reloc section */ | ||
307 | memset(buf+c, 0, PECOFF_RELOC_RESERVE); | ||
308 | c += PECOFF_RELOC_RESERVE; | ||
309 | #endif | ||
310 | 346 | ||
311 | /* Pad unused space with zeros */ | 347 | /* Pad unused space with zeros */ |
312 | setup_sectors = (c + 511) / 512; | 348 | setup_sectors = (c + 511) / 512; |
@@ -315,9 +351,7 @@ int main(int argc, char ** argv) | |||
315 | i = setup_sectors*512; | 351 | i = setup_sectors*512; |
316 | memset(buf+c, 0, i-c); | 352 | memset(buf+c, 0, i-c); |
317 | 353 | ||
318 | #ifdef CONFIG_EFI_STUB | ||
319 | update_pecoff_setup_and_reloc(i); | 354 | update_pecoff_setup_and_reloc(i); |
320 | #endif | ||
321 | 355 | ||
322 | /* Set the default root device */ | 356 | /* Set the default root device */ |
323 | put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); | 357 | put_unaligned_le16(DEFAULT_ROOT_DEV, &buf[508]); |
@@ -342,14 +376,9 @@ int main(int argc, char ** argv) | |||
342 | buf[0x1f1] = setup_sectors-1; | 376 | buf[0x1f1] = setup_sectors-1; |
343 | put_unaligned_le32(sys_size, &buf[0x1f4]); | 377 | put_unaligned_le32(sys_size, &buf[0x1f4]); |
344 | 378 | ||
345 | #ifdef CONFIG_EFI_STUB | ||
346 | update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); | 379 | update_pecoff_text(setup_sectors * 512, sz + i + ((sys_size * 16) - sz)); |
347 | 380 | ||
348 | #ifdef CONFIG_X86_64 /* Yes, this is really how we defined it :( */ | 381 | efi_stub_entry_update(); |
349 | efi_stub_entry -= 0x200; | ||
350 | #endif | ||
351 | put_unaligned_le32(efi_stub_entry, &buf[0x264]); | ||
352 | #endif | ||
353 | 382 | ||
354 | crc = partial_crc32(buf, i, crc); | 383 | crc = partial_crc32(buf, i, crc); |
355 | if (fwrite(buf, 1, i, dest) != i) | 384 | if (fwrite(buf, 1, i, dest) != i) |
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 11e8c6eb80a1..ba3e100654db 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include "boot.h" | 16 | #include "boot.h" |
17 | #include "video.h" | 17 | #include "video.h" |
18 | #include "vesa.h" | 18 | #include "vesa.h" |
19 | #include "string.h" | ||
19 | 20 | ||
20 | /* VESA information */ | 21 | /* VESA information */ |
21 | static struct vesa_general_info vginfo; | 22 | static struct vesa_general_info vginfo; |
diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index ff339c5db311..0bb25491262d 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h | |||
@@ -80,7 +80,7 @@ struct card_info { | |||
80 | u16 xmode_n; /* Size of unprobed mode range */ | 80 | u16 xmode_n; /* Size of unprobed mode range */ |
81 | }; | 81 | }; |
82 | 82 | ||
83 | #define __videocard struct card_info __attribute__((section(".videocards"))) | 83 | #define __videocard struct card_info __attribute__((used,section(".videocards"))) |
84 | extern struct card_info video_cards[], video_cards_end[]; | 84 | extern struct card_info video_cards[], video_cards_end[]; |
85 | 85 | ||
86 | int mode_defined(u16 mode); /* video.c */ | 86 | int mode_defined(u16 mode); /* video.c */ |
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index a7fef2621cc9..619e7f7426c6 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig | |||
@@ -60,7 +60,6 @@ CONFIG_CRASH_DUMP=y | |||
60 | CONFIG_HIBERNATION=y | 60 | CONFIG_HIBERNATION=y |
61 | CONFIG_PM_DEBUG=y | 61 | CONFIG_PM_DEBUG=y |
62 | CONFIG_PM_TRACE_RTC=y | 62 | CONFIG_PM_TRACE_RTC=y |
63 | CONFIG_ACPI_PROCFS=y | ||
64 | CONFIG_ACPI_DOCK=y | 63 | CONFIG_ACPI_DOCK=y |
65 | CONFIG_CPU_FREQ=y | 64 | CONFIG_CPU_FREQ=y |
66 | # CONFIG_CPU_FREQ_STAT is not set | 65 | # CONFIG_CPU_FREQ_STAT is not set |
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index c1119d4c1281..6181c69b786b 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig | |||
@@ -58,7 +58,6 @@ CONFIG_CRASH_DUMP=y | |||
58 | CONFIG_HIBERNATION=y | 58 | CONFIG_HIBERNATION=y |
59 | CONFIG_PM_DEBUG=y | 59 | CONFIG_PM_DEBUG=y |
60 | CONFIG_PM_TRACE_RTC=y | 60 | CONFIG_PM_TRACE_RTC=y |
61 | CONFIG_ACPI_PROCFS=y | ||
62 | CONFIG_ACPI_DOCK=y | 61 | CONFIG_ACPI_DOCK=y |
63 | CONFIG_CPU_FREQ=y | 62 | CONFIG_CPU_FREQ=y |
64 | # CONFIG_CPU_FREQ_STAT is not set | 63 | # CONFIG_CPU_FREQ_STAT is not set |
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index e0fc24db234a..61d6e281898b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile | |||
@@ -76,8 +76,12 @@ ifeq ($(avx2_supported),yes) | |||
76 | endif | 76 | endif |
77 | 77 | ||
78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o | 78 | aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o |
79 | aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o | ||
79 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o | 80 | ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o |
80 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o | 81 | sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o |
82 | ifeq ($(avx2_supported),yes) | ||
83 | sha1-ssse3-y += sha1_avx2_x86_64_asm.o | ||
84 | endif | ||
81 | crc32c-intel-y := crc32c-intel_glue.o | 85 | crc32c-intel-y := crc32c-intel_glue.o |
82 | crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o | 86 | crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o |
83 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o | 87 | crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o |
diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S new file mode 100644 index 000000000000..522ab68d1c88 --- /dev/null +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S | |||
@@ -0,0 +1,2811 @@ | |||
1 | ######################################################################## | ||
2 | # Copyright (c) 2013, Intel Corporation | ||
3 | # | ||
4 | # This software is available to you under a choice of one of two | ||
5 | # licenses. You may choose to be licensed under the terms of the GNU | ||
6 | # General Public License (GPL) Version 2, available from the file | ||
7 | # COPYING in the main directory of this source tree, or the | ||
8 | # OpenIB.org BSD license below: | ||
9 | # | ||
10 | # Redistribution and use in source and binary forms, with or without | ||
11 | # modification, are permitted provided that the following conditions are | ||
12 | # met: | ||
13 | # | ||
14 | # * Redistributions of source code must retain the above copyright | ||
15 | # notice, this list of conditions and the following disclaimer. | ||
16 | # | ||
17 | # * Redistributions in binary form must reproduce the above copyright | ||
18 | # notice, this list of conditions and the following disclaimer in the | ||
19 | # documentation and/or other materials provided with the | ||
20 | # distribution. | ||
21 | # | ||
22 | # * Neither the name of the Intel Corporation nor the names of its | ||
23 | # contributors may be used to endorse or promote products derived from | ||
24 | # this software without specific prior written permission. | ||
25 | # | ||
26 | # | ||
27 | # THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY | ||
28 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
29 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
30 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR | ||
31 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
32 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
33 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES# LOSS OF USE, DATA, OR | ||
34 | # PROFITS# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
35 | # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
36 | # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
37 | # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | ######################################################################## | ||
39 | ## | ||
40 | ## Authors: | ||
41 | ## Erdinc Ozturk <erdinc.ozturk@intel.com> | ||
42 | ## Vinodh Gopal <vinodh.gopal@intel.com> | ||
43 | ## James Guilford <james.guilford@intel.com> | ||
44 | ## Tim Chen <tim.c.chen@linux.intel.com> | ||
45 | ## | ||
46 | ## References: | ||
47 | ## This code was derived and highly optimized from the code described in paper: | ||
48 | ## Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation | ||
49 | ## on Intel Architecture Processors. August, 2010 | ||
50 | ## The details of the implementation is explained in: | ||
51 | ## Erdinc Ozturk et. al. Enabling High-Performance Galois-Counter-Mode | ||
52 | ## on Intel Architecture Processors. October, 2012. | ||
53 | ## | ||
54 | ## Assumptions: | ||
55 | ## | ||
56 | ## | ||
57 | ## | ||
58 | ## iv: | ||
59 | ## 0 1 2 3 | ||
60 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
61 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
62 | ## | Salt (From the SA) | | ||
63 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
64 | ## | Initialization Vector | | ||
65 | ## | (This is the sequence number from IPSec header) | | ||
66 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
67 | ## | 0x1 | | ||
68 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
69 | ## | ||
70 | ## | ||
71 | ## | ||
72 | ## AAD: | ||
73 | ## AAD padded to 128 bits with 0 | ||
74 | ## for example, assume AAD is a u32 vector | ||
75 | ## | ||
76 | ## if AAD is 8 bytes: | ||
77 | ## AAD[3] = {A0, A1}# | ||
78 | ## padded AAD in xmm register = {A1 A0 0 0} | ||
79 | ## | ||
80 | ## 0 1 2 3 | ||
81 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
82 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
83 | ## | SPI (A1) | | ||
84 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
85 | ## | 32-bit Sequence Number (A0) | | ||
86 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
87 | ## | 0x0 | | ||
88 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
89 | ## | ||
90 | ## AAD Format with 32-bit Sequence Number | ||
91 | ## | ||
92 | ## if AAD is 12 bytes: | ||
93 | ## AAD[3] = {A0, A1, A2}# | ||
94 | ## padded AAD in xmm register = {A2 A1 A0 0} | ||
95 | ## | ||
96 | ## 0 1 2 3 | ||
97 | ## 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 | ||
98 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
99 | ## | SPI (A2) | | ||
100 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
101 | ## | 64-bit Extended Sequence Number {A1,A0} | | ||
102 | ## | | | ||
103 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
104 | ## | 0x0 | | ||
105 | ## +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | ||
106 | ## | ||
107 | ## AAD Format with 64-bit Extended Sequence Number | ||
108 | ## | ||
109 | ## | ||
110 | ## aadLen: | ||
111 | ## from the definition of the spec, aadLen can only be 8 or 12 bytes. | ||
112 | ## The code additionally supports aadLen of length 16 bytes. | ||
113 | ## | ||
114 | ## TLen: | ||
115 | ## from the definition of the spec, TLen can only be 8, 12 or 16 bytes. | ||
116 | ## | ||
117 | ## poly = x^128 + x^127 + x^126 + x^121 + 1 | ||
118 | ## throughout the code, one tab and two tab indentations are used. one tab is | ||
119 | ## for GHASH part, two tabs is for AES part. | ||
120 | ## | ||
121 | |||
122 | #include <linux/linkage.h> | ||
123 | #include <asm/inst.h> | ||
124 | |||
125 | .data | ||
126 | .align 16 | ||
127 | |||
128 | POLY: .octa 0xC2000000000000000000000000000001 | ||
129 | POLY2: .octa 0xC20000000000000000000001C2000000 | ||
130 | TWOONE: .octa 0x00000001000000000000000000000001 | ||
131 | |||
132 | # order of these constants should not change. | ||
133 | # more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F | ||
134 | |||
135 | SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F | ||
136 | SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 | ||
137 | ALL_F: .octa 0xffffffffffffffffffffffffffffffff | ||
138 | ZERO: .octa 0x00000000000000000000000000000000 | ||
139 | ONE: .octa 0x00000000000000000000000000000001 | ||
140 | ONEf: .octa 0x01000000000000000000000000000000 | ||
141 | |||
142 | .text | ||
143 | |||
144 | |||
145 | ##define the fields of the gcm aes context | ||
146 | #{ | ||
147 | # u8 expanded_keys[16*11] store expanded keys | ||
148 | # u8 shifted_hkey_1[16] store HashKey <<1 mod poly here | ||
149 | # u8 shifted_hkey_2[16] store HashKey^2 <<1 mod poly here | ||
150 | # u8 shifted_hkey_3[16] store HashKey^3 <<1 mod poly here | ||
151 | # u8 shifted_hkey_4[16] store HashKey^4 <<1 mod poly here | ||
152 | # u8 shifted_hkey_5[16] store HashKey^5 <<1 mod poly here | ||
153 | # u8 shifted_hkey_6[16] store HashKey^6 <<1 mod poly here | ||
154 | # u8 shifted_hkey_7[16] store HashKey^7 <<1 mod poly here | ||
155 | # u8 shifted_hkey_8[16] store HashKey^8 <<1 mod poly here | ||
156 | # u8 shifted_hkey_1_k[16] store XOR HashKey <<1 mod poly here (for Karatsuba purposes) | ||
157 | # u8 shifted_hkey_2_k[16] store XOR HashKey^2 <<1 mod poly here (for Karatsuba purposes) | ||
158 | # u8 shifted_hkey_3_k[16] store XOR HashKey^3 <<1 mod poly here (for Karatsuba purposes) | ||
159 | # u8 shifted_hkey_4_k[16] store XOR HashKey^4 <<1 mod poly here (for Karatsuba purposes) | ||
160 | # u8 shifted_hkey_5_k[16] store XOR HashKey^5 <<1 mod poly here (for Karatsuba purposes) | ||
161 | # u8 shifted_hkey_6_k[16] store XOR HashKey^6 <<1 mod poly here (for Karatsuba purposes) | ||
162 | # u8 shifted_hkey_7_k[16] store XOR HashKey^7 <<1 mod poly here (for Karatsuba purposes) | ||
163 | # u8 shifted_hkey_8_k[16] store XOR HashKey^8 <<1 mod poly here (for Karatsuba purposes) | ||
164 | #} gcm_ctx# | ||
165 | |||
166 | HashKey = 16*11 # store HashKey <<1 mod poly here | ||
167 | HashKey_2 = 16*12 # store HashKey^2 <<1 mod poly here | ||
168 | HashKey_3 = 16*13 # store HashKey^3 <<1 mod poly here | ||
169 | HashKey_4 = 16*14 # store HashKey^4 <<1 mod poly here | ||
170 | HashKey_5 = 16*15 # store HashKey^5 <<1 mod poly here | ||
171 | HashKey_6 = 16*16 # store HashKey^6 <<1 mod poly here | ||
172 | HashKey_7 = 16*17 # store HashKey^7 <<1 mod poly here | ||
173 | HashKey_8 = 16*18 # store HashKey^8 <<1 mod poly here | ||
174 | HashKey_k = 16*19 # store XOR of HashKey <<1 mod poly here (for Karatsuba purposes) | ||
175 | HashKey_2_k = 16*20 # store XOR of HashKey^2 <<1 mod poly here (for Karatsuba purposes) | ||
176 | HashKey_3_k = 16*21 # store XOR of HashKey^3 <<1 mod poly here (for Karatsuba purposes) | ||
177 | HashKey_4_k = 16*22 # store XOR of HashKey^4 <<1 mod poly here (for Karatsuba purposes) | ||
178 | HashKey_5_k = 16*23 # store XOR of HashKey^5 <<1 mod poly here (for Karatsuba purposes) | ||
179 | HashKey_6_k = 16*24 # store XOR of HashKey^6 <<1 mod poly here (for Karatsuba purposes) | ||
180 | HashKey_7_k = 16*25 # store XOR of HashKey^7 <<1 mod poly here (for Karatsuba purposes) | ||
181 | HashKey_8_k = 16*26 # store XOR of HashKey^8 <<1 mod poly here (for Karatsuba purposes) | ||
182 | |||
183 | #define arg1 %rdi | ||
184 | #define arg2 %rsi | ||
185 | #define arg3 %rdx | ||
186 | #define arg4 %rcx | ||
187 | #define arg5 %r8 | ||
188 | #define arg6 %r9 | ||
189 | #define arg7 STACK_OFFSET+8*1(%r14) | ||
190 | #define arg8 STACK_OFFSET+8*2(%r14) | ||
191 | #define arg9 STACK_OFFSET+8*3(%r14) | ||
192 | |||
193 | i = 0 | ||
194 | j = 0 | ||
195 | |||
196 | out_order = 0 | ||
197 | in_order = 1 | ||
198 | DEC = 0 | ||
199 | ENC = 1 | ||
200 | |||
201 | .macro define_reg r n | ||
202 | reg_\r = %xmm\n | ||
203 | .endm | ||
204 | |||
205 | .macro setreg | ||
206 | .altmacro | ||
207 | define_reg i %i | ||
208 | define_reg j %j | ||
209 | .noaltmacro | ||
210 | .endm | ||
211 | |||
212 | # need to push 4 registers into stack to maintain | ||
213 | STACK_OFFSET = 8*4 | ||
214 | |||
215 | TMP1 = 16*0 # Temporary storage for AAD | ||
216 | TMP2 = 16*1 # Temporary storage for AES State 2 (State 1 is stored in an XMM register) | ||
217 | TMP3 = 16*2 # Temporary storage for AES State 3 | ||
218 | TMP4 = 16*3 # Temporary storage for AES State 4 | ||
219 | TMP5 = 16*4 # Temporary storage for AES State 5 | ||
220 | TMP6 = 16*5 # Temporary storage for AES State 6 | ||
221 | TMP7 = 16*6 # Temporary storage for AES State 7 | ||
222 | TMP8 = 16*7 # Temporary storage for AES State 8 | ||
223 | |||
224 | VARIABLE_OFFSET = 16*8 | ||
225 | |||
226 | ################################ | ||
227 | # Utility Macros | ||
228 | ################################ | ||
229 | |||
230 | # Encryption of a single block | ||
231 | .macro ENCRYPT_SINGLE_BLOCK XMM0 | ||
232 | vpxor (arg1), \XMM0, \XMM0 | ||
233 | i = 1 | ||
234 | setreg | ||
235 | .rep 9 | ||
236 | vaesenc 16*i(arg1), \XMM0, \XMM0 | ||
237 | i = (i+1) | ||
238 | setreg | ||
239 | .endr | ||
240 | vaesenclast 16*10(arg1), \XMM0, \XMM0 | ||
241 | .endm | ||
242 | |||
243 | #ifdef CONFIG_AS_AVX | ||
244 | ############################################################################### | ||
245 | # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
246 | # Input: A and B (128-bits each, bit-reflected) | ||
247 | # Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
248 | # To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
249 | # GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
250 | ############################################################################### | ||
251 | .macro GHASH_MUL_AVX GH HK T1 T2 T3 T4 T5 | ||
252 | |||
253 | vpshufd $0b01001110, \GH, \T2 | ||
254 | vpshufd $0b01001110, \HK, \T3 | ||
255 | vpxor \GH , \T2, \T2 # T2 = (a1+a0) | ||
256 | vpxor \HK , \T3, \T3 # T3 = (b1+b0) | ||
257 | |||
258 | vpclmulqdq $0x11, \HK, \GH, \T1 # T1 = a1*b1 | ||
259 | vpclmulqdq $0x00, \HK, \GH, \GH # GH = a0*b0 | ||
260 | vpclmulqdq $0x00, \T3, \T2, \T2 # T2 = (a1+a0)*(b1+b0) | ||
261 | vpxor \GH, \T2,\T2 | ||
262 | vpxor \T1, \T2,\T2 # T2 = a0*b1+a1*b0 | ||
263 | |||
264 | vpslldq $8, \T2,\T3 # shift-L T3 2 DWs | ||
265 | vpsrldq $8, \T2,\T2 # shift-R T2 2 DWs | ||
266 | vpxor \T3, \GH, \GH | ||
267 | vpxor \T2, \T1, \T1 # <T1:GH> = GH x HK | ||
268 | |||
269 | #first phase of the reduction | ||
270 | vpslld $31, \GH, \T2 # packed right shifting << 31 | ||
271 | vpslld $30, \GH, \T3 # packed right shifting shift << 30 | ||
272 | vpslld $25, \GH, \T4 # packed right shifting shift << 25 | ||
273 | |||
274 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
275 | vpxor \T4, \T2, \T2 | ||
276 | |||
277 | vpsrldq $4, \T2, \T5 # shift-R T5 1 DW | ||
278 | |||
279 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
280 | vpxor \T2, \GH, \GH # first phase of the reduction complete | ||
281 | |||
282 | #second phase of the reduction | ||
283 | |||
284 | vpsrld $1,\GH, \T2 # packed left shifting >> 1 | ||
285 | vpsrld $2,\GH, \T3 # packed left shifting >> 2 | ||
286 | vpsrld $7,\GH, \T4 # packed left shifting >> 7 | ||
287 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
288 | vpxor \T4, \T2, \T2 | ||
289 | |||
290 | vpxor \T5, \T2, \T2 | ||
291 | vpxor \T2, \GH, \GH | ||
292 | vpxor \T1, \GH, \GH # the result is in GH | ||
293 | |||
294 | |||
295 | .endm | ||
296 | |||
297 | .macro PRECOMPUTE_AVX HK T1 T2 T3 T4 T5 T6 | ||
298 | |||
299 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
300 | vmovdqa \HK, \T5 | ||
301 | |||
302 | vpshufd $0b01001110, \T5, \T1 | ||
303 | vpxor \T5, \T1, \T1 | ||
304 | vmovdqa \T1, HashKey_k(arg1) | ||
305 | |||
306 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly | ||
307 | vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly | ||
308 | vpshufd $0b01001110, \T5, \T1 | ||
309 | vpxor \T5, \T1, \T1 | ||
310 | vmovdqa \T1, HashKey_2_k(arg1) | ||
311 | |||
312 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly | ||
313 | vmovdqa \T5, HashKey_3(arg1) | ||
314 | vpshufd $0b01001110, \T5, \T1 | ||
315 | vpxor \T5, \T1, \T1 | ||
316 | vmovdqa \T1, HashKey_3_k(arg1) | ||
317 | |||
318 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly | ||
319 | vmovdqa \T5, HashKey_4(arg1) | ||
320 | vpshufd $0b01001110, \T5, \T1 | ||
321 | vpxor \T5, \T1, \T1 | ||
322 | vmovdqa \T1, HashKey_4_k(arg1) | ||
323 | |||
324 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly | ||
325 | vmovdqa \T5, HashKey_5(arg1) | ||
326 | vpshufd $0b01001110, \T5, \T1 | ||
327 | vpxor \T5, \T1, \T1 | ||
328 | vmovdqa \T1, HashKey_5_k(arg1) | ||
329 | |||
330 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly | ||
331 | vmovdqa \T5, HashKey_6(arg1) | ||
332 | vpshufd $0b01001110, \T5, \T1 | ||
333 | vpxor \T5, \T1, \T1 | ||
334 | vmovdqa \T1, HashKey_6_k(arg1) | ||
335 | |||
336 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly | ||
337 | vmovdqa \T5, HashKey_7(arg1) | ||
338 | vpshufd $0b01001110, \T5, \T1 | ||
339 | vpxor \T5, \T1, \T1 | ||
340 | vmovdqa \T1, HashKey_7_k(arg1) | ||
341 | |||
342 | GHASH_MUL_AVX \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly | ||
343 | vmovdqa \T5, HashKey_8(arg1) | ||
344 | vpshufd $0b01001110, \T5, \T1 | ||
345 | vpxor \T5, \T1, \T1 | ||
346 | vmovdqa \T1, HashKey_8_k(arg1) | ||
347 | |||
348 | .endm | ||
349 | |||
350 | ## if a = number of total plaintext bytes | ||
351 | ## b = floor(a/16) | ||
352 | ## num_initial_blocks = b mod 4# | ||
353 | ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext | ||
354 | ## r10, r11, r12, rax are clobbered | ||
355 | ## arg1, arg2, arg3, r14 are used as a pointer only, not modified | ||
356 | |||
357 | .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC | ||
358 | i = (8-\num_initial_blocks) | ||
359 | setreg | ||
360 | |||
361 | mov arg6, %r10 # r10 = AAD | ||
362 | mov arg7, %r12 # r12 = aadLen | ||
363 | |||
364 | |||
365 | mov %r12, %r11 | ||
366 | |||
367 | vpxor reg_i, reg_i, reg_i | ||
368 | _get_AAD_loop\@: | ||
369 | vmovd (%r10), \T1 | ||
370 | vpslldq $12, \T1, \T1 | ||
371 | vpsrldq $4, reg_i, reg_i | ||
372 | vpxor \T1, reg_i, reg_i | ||
373 | |||
374 | add $4, %r10 | ||
375 | sub $4, %r12 | ||
376 | jg _get_AAD_loop\@ | ||
377 | |||
378 | |||
379 | cmp $16, %r11 | ||
380 | je _get_AAD_loop2_done\@ | ||
381 | mov $16, %r12 | ||
382 | |||
383 | _get_AAD_loop2\@: | ||
384 | vpsrldq $4, reg_i, reg_i | ||
385 | sub $4, %r12 | ||
386 | cmp %r11, %r12 | ||
387 | jg _get_AAD_loop2\@ | ||
388 | |||
389 | _get_AAD_loop2_done\@: | ||
390 | |||
391 | #byte-reflect the AAD data | ||
392 | vpshufb SHUF_MASK(%rip), reg_i, reg_i | ||
393 | |||
394 | # initialize the data pointer offset as zero | ||
395 | xor %r11, %r11 | ||
396 | |||
397 | # start AES for num_initial_blocks blocks | ||
398 | mov arg5, %rax # rax = *Y0 | ||
399 | vmovdqu (%rax), \CTR # CTR = Y0 | ||
400 | vpshufb SHUF_MASK(%rip), \CTR, \CTR | ||
401 | |||
402 | |||
403 | i = (9-\num_initial_blocks) | ||
404 | setreg | ||
405 | .rep \num_initial_blocks | ||
406 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
407 | vmovdqa \CTR, reg_i | ||
408 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap | ||
409 | i = (i+1) | ||
410 | setreg | ||
411 | .endr | ||
412 | |||
413 | vmovdqa (arg1), \T_key | ||
414 | i = (9-\num_initial_blocks) | ||
415 | setreg | ||
416 | .rep \num_initial_blocks | ||
417 | vpxor \T_key, reg_i, reg_i | ||
418 | i = (i+1) | ||
419 | setreg | ||
420 | .endr | ||
421 | |||
422 | j = 1 | ||
423 | setreg | ||
424 | .rep 9 | ||
425 | vmovdqa 16*j(arg1), \T_key | ||
426 | i = (9-\num_initial_blocks) | ||
427 | setreg | ||
428 | .rep \num_initial_blocks | ||
429 | vaesenc \T_key, reg_i, reg_i | ||
430 | i = (i+1) | ||
431 | setreg | ||
432 | .endr | ||
433 | |||
434 | j = (j+1) | ||
435 | setreg | ||
436 | .endr | ||
437 | |||
438 | |||
439 | vmovdqa 16*10(arg1), \T_key | ||
440 | i = (9-\num_initial_blocks) | ||
441 | setreg | ||
442 | .rep \num_initial_blocks | ||
443 | vaesenclast \T_key, reg_i, reg_i | ||
444 | i = (i+1) | ||
445 | setreg | ||
446 | .endr | ||
447 | |||
448 | i = (9-\num_initial_blocks) | ||
449 | setreg | ||
450 | .rep \num_initial_blocks | ||
451 | vmovdqu (arg3, %r11), \T1 | ||
452 | vpxor \T1, reg_i, reg_i | ||
453 | vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for num_initial_blocks blocks | ||
454 | add $16, %r11 | ||
455 | .if \ENC_DEC == DEC | ||
456 | vmovdqa \T1, reg_i | ||
457 | .endif | ||
458 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations | ||
459 | i = (i+1) | ||
460 | setreg | ||
461 | .endr | ||
462 | |||
463 | |||
464 | i = (8-\num_initial_blocks) | ||
465 | j = (9-\num_initial_blocks) | ||
466 | setreg | ||
467 | GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 | ||
468 | |||
469 | .rep \num_initial_blocks | ||
470 | vpxor reg_i, reg_j, reg_j | ||
471 | GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks | ||
472 | i = (i+1) | ||
473 | j = (j+1) | ||
474 | setreg | ||
475 | .endr | ||
476 | # XMM8 has the combined result here | ||
477 | |||
478 | vmovdqa \XMM8, TMP1(%rsp) | ||
479 | vmovdqa \XMM8, \T3 | ||
480 | |||
481 | cmp $128, %r13 | ||
482 | jl _initial_blocks_done\@ # no need for precomputed constants | ||
483 | |||
484 | ############################################################################### | ||
485 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
486 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
487 | vmovdqa \CTR, \XMM1 | ||
488 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
489 | |||
490 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
491 | vmovdqa \CTR, \XMM2 | ||
492 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
493 | |||
494 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
495 | vmovdqa \CTR, \XMM3 | ||
496 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
497 | |||
498 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
499 | vmovdqa \CTR, \XMM4 | ||
500 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
501 | |||
502 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
503 | vmovdqa \CTR, \XMM5 | ||
504 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
505 | |||
506 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
507 | vmovdqa \CTR, \XMM6 | ||
508 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
509 | |||
510 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
511 | vmovdqa \CTR, \XMM7 | ||
512 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
513 | |||
514 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
515 | vmovdqa \CTR, \XMM8 | ||
516 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
517 | |||
518 | vmovdqa (arg1), \T_key | ||
519 | vpxor \T_key, \XMM1, \XMM1 | ||
520 | vpxor \T_key, \XMM2, \XMM2 | ||
521 | vpxor \T_key, \XMM3, \XMM3 | ||
522 | vpxor \T_key, \XMM4, \XMM4 | ||
523 | vpxor \T_key, \XMM5, \XMM5 | ||
524 | vpxor \T_key, \XMM6, \XMM6 | ||
525 | vpxor \T_key, \XMM7, \XMM7 | ||
526 | vpxor \T_key, \XMM8, \XMM8 | ||
527 | |||
528 | i = 1 | ||
529 | setreg | ||
530 | .rep 9 # do 9 rounds | ||
531 | vmovdqa 16*i(arg1), \T_key | ||
532 | vaesenc \T_key, \XMM1, \XMM1 | ||
533 | vaesenc \T_key, \XMM2, \XMM2 | ||
534 | vaesenc \T_key, \XMM3, \XMM3 | ||
535 | vaesenc \T_key, \XMM4, \XMM4 | ||
536 | vaesenc \T_key, \XMM5, \XMM5 | ||
537 | vaesenc \T_key, \XMM6, \XMM6 | ||
538 | vaesenc \T_key, \XMM7, \XMM7 | ||
539 | vaesenc \T_key, \XMM8, \XMM8 | ||
540 | i = (i+1) | ||
541 | setreg | ||
542 | .endr | ||
543 | |||
544 | |||
545 | vmovdqa 16*i(arg1), \T_key | ||
546 | vaesenclast \T_key, \XMM1, \XMM1 | ||
547 | vaesenclast \T_key, \XMM2, \XMM2 | ||
548 | vaesenclast \T_key, \XMM3, \XMM3 | ||
549 | vaesenclast \T_key, \XMM4, \XMM4 | ||
550 | vaesenclast \T_key, \XMM5, \XMM5 | ||
551 | vaesenclast \T_key, \XMM6, \XMM6 | ||
552 | vaesenclast \T_key, \XMM7, \XMM7 | ||
553 | vaesenclast \T_key, \XMM8, \XMM8 | ||
554 | |||
555 | vmovdqu (arg3, %r11), \T1 | ||
556 | vpxor \T1, \XMM1, \XMM1 | ||
557 | vmovdqu \XMM1, (arg2 , %r11) | ||
558 | .if \ENC_DEC == DEC | ||
559 | vmovdqa \T1, \XMM1 | ||
560 | .endif | ||
561 | |||
562 | vmovdqu 16*1(arg3, %r11), \T1 | ||
563 | vpxor \T1, \XMM2, \XMM2 | ||
564 | vmovdqu \XMM2, 16*1(arg2 , %r11) | ||
565 | .if \ENC_DEC == DEC | ||
566 | vmovdqa \T1, \XMM2 | ||
567 | .endif | ||
568 | |||
569 | vmovdqu 16*2(arg3, %r11), \T1 | ||
570 | vpxor \T1, \XMM3, \XMM3 | ||
571 | vmovdqu \XMM3, 16*2(arg2 , %r11) | ||
572 | .if \ENC_DEC == DEC | ||
573 | vmovdqa \T1, \XMM3 | ||
574 | .endif | ||
575 | |||
576 | vmovdqu 16*3(arg3, %r11), \T1 | ||
577 | vpxor \T1, \XMM4, \XMM4 | ||
578 | vmovdqu \XMM4, 16*3(arg2 , %r11) | ||
579 | .if \ENC_DEC == DEC | ||
580 | vmovdqa \T1, \XMM4 | ||
581 | .endif | ||
582 | |||
583 | vmovdqu 16*4(arg3, %r11), \T1 | ||
584 | vpxor \T1, \XMM5, \XMM5 | ||
585 | vmovdqu \XMM5, 16*4(arg2 , %r11) | ||
586 | .if \ENC_DEC == DEC | ||
587 | vmovdqa \T1, \XMM5 | ||
588 | .endif | ||
589 | |||
590 | vmovdqu 16*5(arg3, %r11), \T1 | ||
591 | vpxor \T1, \XMM6, \XMM6 | ||
592 | vmovdqu \XMM6, 16*5(arg2 , %r11) | ||
593 | .if \ENC_DEC == DEC | ||
594 | vmovdqa \T1, \XMM6 | ||
595 | .endif | ||
596 | |||
597 | vmovdqu 16*6(arg3, %r11), \T1 | ||
598 | vpxor \T1, \XMM7, \XMM7 | ||
599 | vmovdqu \XMM7, 16*6(arg2 , %r11) | ||
600 | .if \ENC_DEC == DEC | ||
601 | vmovdqa \T1, \XMM7 | ||
602 | .endif | ||
603 | |||
604 | vmovdqu 16*7(arg3, %r11), \T1 | ||
605 | vpxor \T1, \XMM8, \XMM8 | ||
606 | vmovdqu \XMM8, 16*7(arg2 , %r11) | ||
607 | .if \ENC_DEC == DEC | ||
608 | vmovdqa \T1, \XMM8 | ||
609 | .endif | ||
610 | |||
611 | add $128, %r11 | ||
612 | |||
613 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
614 | vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with the corresponding ciphertext | ||
615 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
616 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
617 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
618 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
619 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
620 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
621 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
622 | |||
623 | ############################################################################### | ||
624 | |||
625 | _initial_blocks_done\@: | ||
626 | |||
627 | .endm | ||
628 | |||
629 | # encrypt 8 blocks at a time | ||
630 | # ghash the 8 previously encrypted ciphertext blocks | ||
631 | # arg1, arg2, arg3 are used as pointers only, not modified | ||
632 | # r11 is the data offset value | ||
633 | .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC | ||
634 | |||
635 | vmovdqa \XMM1, \T2 | ||
636 | vmovdqa \XMM2, TMP2(%rsp) | ||
637 | vmovdqa \XMM3, TMP3(%rsp) | ||
638 | vmovdqa \XMM4, TMP4(%rsp) | ||
639 | vmovdqa \XMM5, TMP5(%rsp) | ||
640 | vmovdqa \XMM6, TMP6(%rsp) | ||
641 | vmovdqa \XMM7, TMP7(%rsp) | ||
642 | vmovdqa \XMM8, TMP8(%rsp) | ||
643 | |||
644 | .if \loop_idx == in_order | ||
645 | vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT | ||
646 | vpaddd ONE(%rip), \XMM1, \XMM2 | ||
647 | vpaddd ONE(%rip), \XMM2, \XMM3 | ||
648 | vpaddd ONE(%rip), \XMM3, \XMM4 | ||
649 | vpaddd ONE(%rip), \XMM4, \XMM5 | ||
650 | vpaddd ONE(%rip), \XMM5, \XMM6 | ||
651 | vpaddd ONE(%rip), \XMM6, \XMM7 | ||
652 | vpaddd ONE(%rip), \XMM7, \XMM8 | ||
653 | vmovdqa \XMM8, \CTR | ||
654 | |||
655 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
656 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
657 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
658 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
659 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
660 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
661 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
662 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
663 | .else | ||
664 | vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT | ||
665 | vpaddd ONEf(%rip), \XMM1, \XMM2 | ||
666 | vpaddd ONEf(%rip), \XMM2, \XMM3 | ||
667 | vpaddd ONEf(%rip), \XMM3, \XMM4 | ||
668 | vpaddd ONEf(%rip), \XMM4, \XMM5 | ||
669 | vpaddd ONEf(%rip), \XMM5, \XMM6 | ||
670 | vpaddd ONEf(%rip), \XMM6, \XMM7 | ||
671 | vpaddd ONEf(%rip), \XMM7, \XMM8 | ||
672 | vmovdqa \XMM8, \CTR | ||
673 | .endif | ||
674 | |||
675 | |||
676 | ####################################################################### | ||
677 | |||
678 | vmovdqu (arg1), \T1 | ||
679 | vpxor \T1, \XMM1, \XMM1 | ||
680 | vpxor \T1, \XMM2, \XMM2 | ||
681 | vpxor \T1, \XMM3, \XMM3 | ||
682 | vpxor \T1, \XMM4, \XMM4 | ||
683 | vpxor \T1, \XMM5, \XMM5 | ||
684 | vpxor \T1, \XMM6, \XMM6 | ||
685 | vpxor \T1, \XMM7, \XMM7 | ||
686 | vpxor \T1, \XMM8, \XMM8 | ||
687 | |||
688 | ####################################################################### | ||
689 | |||
690 | |||
691 | |||
692 | |||
693 | |||
694 | vmovdqu 16*1(arg1), \T1 | ||
695 | vaesenc \T1, \XMM1, \XMM1 | ||
696 | vaesenc \T1, \XMM2, \XMM2 | ||
697 | vaesenc \T1, \XMM3, \XMM3 | ||
698 | vaesenc \T1, \XMM4, \XMM4 | ||
699 | vaesenc \T1, \XMM5, \XMM5 | ||
700 | vaesenc \T1, \XMM6, \XMM6 | ||
701 | vaesenc \T1, \XMM7, \XMM7 | ||
702 | vaesenc \T1, \XMM8, \XMM8 | ||
703 | |||
704 | vmovdqu 16*2(arg1), \T1 | ||
705 | vaesenc \T1, \XMM1, \XMM1 | ||
706 | vaesenc \T1, \XMM2, \XMM2 | ||
707 | vaesenc \T1, \XMM3, \XMM3 | ||
708 | vaesenc \T1, \XMM4, \XMM4 | ||
709 | vaesenc \T1, \XMM5, \XMM5 | ||
710 | vaesenc \T1, \XMM6, \XMM6 | ||
711 | vaesenc \T1, \XMM7, \XMM7 | ||
712 | vaesenc \T1, \XMM8, \XMM8 | ||
713 | |||
714 | |||
715 | ####################################################################### | ||
716 | |||
717 | vmovdqa HashKey_8(arg1), \T5 | ||
718 | vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 | ||
719 | vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 | ||
720 | |||
721 | vpshufd $0b01001110, \T2, \T6 | ||
722 | vpxor \T2, \T6, \T6 | ||
723 | |||
724 | vmovdqa HashKey_8_k(arg1), \T5 | ||
725 | vpclmulqdq $0x00, \T5, \T6, \T6 | ||
726 | |||
727 | vmovdqu 16*3(arg1), \T1 | ||
728 | vaesenc \T1, \XMM1, \XMM1 | ||
729 | vaesenc \T1, \XMM2, \XMM2 | ||
730 | vaesenc \T1, \XMM3, \XMM3 | ||
731 | vaesenc \T1, \XMM4, \XMM4 | ||
732 | vaesenc \T1, \XMM5, \XMM5 | ||
733 | vaesenc \T1, \XMM6, \XMM6 | ||
734 | vaesenc \T1, \XMM7, \XMM7 | ||
735 | vaesenc \T1, \XMM8, \XMM8 | ||
736 | |||
737 | vmovdqa TMP2(%rsp), \T1 | ||
738 | vmovdqa HashKey_7(arg1), \T5 | ||
739 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
740 | vpxor \T3, \T4, \T4 | ||
741 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
742 | vpxor \T3, \T7, \T7 | ||
743 | |||
744 | vpshufd $0b01001110, \T1, \T3 | ||
745 | vpxor \T1, \T3, \T3 | ||
746 | vmovdqa HashKey_7_k(arg1), \T5 | ||
747 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
748 | vpxor \T3, \T6, \T6 | ||
749 | |||
750 | vmovdqu 16*4(arg1), \T1 | ||
751 | vaesenc \T1, \XMM1, \XMM1 | ||
752 | vaesenc \T1, \XMM2, \XMM2 | ||
753 | vaesenc \T1, \XMM3, \XMM3 | ||
754 | vaesenc \T1, \XMM4, \XMM4 | ||
755 | vaesenc \T1, \XMM5, \XMM5 | ||
756 | vaesenc \T1, \XMM6, \XMM6 | ||
757 | vaesenc \T1, \XMM7, \XMM7 | ||
758 | vaesenc \T1, \XMM8, \XMM8 | ||
759 | |||
760 | ####################################################################### | ||
761 | |||
762 | vmovdqa TMP3(%rsp), \T1 | ||
763 | vmovdqa HashKey_6(arg1), \T5 | ||
764 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
765 | vpxor \T3, \T4, \T4 | ||
766 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
767 | vpxor \T3, \T7, \T7 | ||
768 | |||
769 | vpshufd $0b01001110, \T1, \T3 | ||
770 | vpxor \T1, \T3, \T3 | ||
771 | vmovdqa HashKey_6_k(arg1), \T5 | ||
772 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
773 | vpxor \T3, \T6, \T6 | ||
774 | |||
775 | vmovdqu 16*5(arg1), \T1 | ||
776 | vaesenc \T1, \XMM1, \XMM1 | ||
777 | vaesenc \T1, \XMM2, \XMM2 | ||
778 | vaesenc \T1, \XMM3, \XMM3 | ||
779 | vaesenc \T1, \XMM4, \XMM4 | ||
780 | vaesenc \T1, \XMM5, \XMM5 | ||
781 | vaesenc \T1, \XMM6, \XMM6 | ||
782 | vaesenc \T1, \XMM7, \XMM7 | ||
783 | vaesenc \T1, \XMM8, \XMM8 | ||
784 | |||
785 | vmovdqa TMP4(%rsp), \T1 | ||
786 | vmovdqa HashKey_5(arg1), \T5 | ||
787 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
788 | vpxor \T3, \T4, \T4 | ||
789 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
790 | vpxor \T3, \T7, \T7 | ||
791 | |||
792 | vpshufd $0b01001110, \T1, \T3 | ||
793 | vpxor \T1, \T3, \T3 | ||
794 | vmovdqa HashKey_5_k(arg1), \T5 | ||
795 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
796 | vpxor \T3, \T6, \T6 | ||
797 | |||
798 | vmovdqu 16*6(arg1), \T1 | ||
799 | vaesenc \T1, \XMM1, \XMM1 | ||
800 | vaesenc \T1, \XMM2, \XMM2 | ||
801 | vaesenc \T1, \XMM3, \XMM3 | ||
802 | vaesenc \T1, \XMM4, \XMM4 | ||
803 | vaesenc \T1, \XMM5, \XMM5 | ||
804 | vaesenc \T1, \XMM6, \XMM6 | ||
805 | vaesenc \T1, \XMM7, \XMM7 | ||
806 | vaesenc \T1, \XMM8, \XMM8 | ||
807 | |||
808 | |||
809 | vmovdqa TMP5(%rsp), \T1 | ||
810 | vmovdqa HashKey_4(arg1), \T5 | ||
811 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
812 | vpxor \T3, \T4, \T4 | ||
813 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
814 | vpxor \T3, \T7, \T7 | ||
815 | |||
816 | vpshufd $0b01001110, \T1, \T3 | ||
817 | vpxor \T1, \T3, \T3 | ||
818 | vmovdqa HashKey_4_k(arg1), \T5 | ||
819 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
820 | vpxor \T3, \T6, \T6 | ||
821 | |||
822 | vmovdqu 16*7(arg1), \T1 | ||
823 | vaesenc \T1, \XMM1, \XMM1 | ||
824 | vaesenc \T1, \XMM2, \XMM2 | ||
825 | vaesenc \T1, \XMM3, \XMM3 | ||
826 | vaesenc \T1, \XMM4, \XMM4 | ||
827 | vaesenc \T1, \XMM5, \XMM5 | ||
828 | vaesenc \T1, \XMM6, \XMM6 | ||
829 | vaesenc \T1, \XMM7, \XMM7 | ||
830 | vaesenc \T1, \XMM8, \XMM8 | ||
831 | |||
832 | vmovdqa TMP6(%rsp), \T1 | ||
833 | vmovdqa HashKey_3(arg1), \T5 | ||
834 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
835 | vpxor \T3, \T4, \T4 | ||
836 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
837 | vpxor \T3, \T7, \T7 | ||
838 | |||
839 | vpshufd $0b01001110, \T1, \T3 | ||
840 | vpxor \T1, \T3, \T3 | ||
841 | vmovdqa HashKey_3_k(arg1), \T5 | ||
842 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
843 | vpxor \T3, \T6, \T6 | ||
844 | |||
845 | |||
846 | vmovdqu 16*8(arg1), \T1 | ||
847 | vaesenc \T1, \XMM1, \XMM1 | ||
848 | vaesenc \T1, \XMM2, \XMM2 | ||
849 | vaesenc \T1, \XMM3, \XMM3 | ||
850 | vaesenc \T1, \XMM4, \XMM4 | ||
851 | vaesenc \T1, \XMM5, \XMM5 | ||
852 | vaesenc \T1, \XMM6, \XMM6 | ||
853 | vaesenc \T1, \XMM7, \XMM7 | ||
854 | vaesenc \T1, \XMM8, \XMM8 | ||
855 | |||
856 | vmovdqa TMP7(%rsp), \T1 | ||
857 | vmovdqa HashKey_2(arg1), \T5 | ||
858 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
859 | vpxor \T3, \T4, \T4 | ||
860 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
861 | vpxor \T3, \T7, \T7 | ||
862 | |||
863 | vpshufd $0b01001110, \T1, \T3 | ||
864 | vpxor \T1, \T3, \T3 | ||
865 | vmovdqa HashKey_2_k(arg1), \T5 | ||
866 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
867 | vpxor \T3, \T6, \T6 | ||
868 | |||
869 | ####################################################################### | ||
870 | |||
871 | vmovdqu 16*9(arg1), \T5 | ||
872 | vaesenc \T5, \XMM1, \XMM1 | ||
873 | vaesenc \T5, \XMM2, \XMM2 | ||
874 | vaesenc \T5, \XMM3, \XMM3 | ||
875 | vaesenc \T5, \XMM4, \XMM4 | ||
876 | vaesenc \T5, \XMM5, \XMM5 | ||
877 | vaesenc \T5, \XMM6, \XMM6 | ||
878 | vaesenc \T5, \XMM7, \XMM7 | ||
879 | vaesenc \T5, \XMM8, \XMM8 | ||
880 | |||
881 | vmovdqa TMP8(%rsp), \T1 | ||
882 | vmovdqa HashKey(arg1), \T5 | ||
883 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
884 | vpxor \T3, \T4, \T4 | ||
885 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
886 | vpxor \T3, \T7, \T7 | ||
887 | |||
888 | vpshufd $0b01001110, \T1, \T3 | ||
889 | vpxor \T1, \T3, \T3 | ||
890 | vmovdqa HashKey_k(arg1), \T5 | ||
891 | vpclmulqdq $0x10, \T5, \T3, \T3 | ||
892 | vpxor \T3, \T6, \T6 | ||
893 | |||
894 | vpxor \T4, \T6, \T6 | ||
895 | vpxor \T7, \T6, \T6 | ||
896 | |||
897 | vmovdqu 16*10(arg1), \T5 | ||
898 | |||
899 | i = 0 | ||
900 | j = 1 | ||
901 | setreg | ||
902 | .rep 8 | ||
903 | vpxor 16*i(arg3, %r11), \T5, \T2 | ||
904 | .if \ENC_DEC == ENC | ||
905 | vaesenclast \T2, reg_j, reg_j | ||
906 | .else | ||
907 | vaesenclast \T2, reg_j, \T3 | ||
908 | vmovdqu 16*i(arg3, %r11), reg_j | ||
909 | vmovdqu \T3, 16*i(arg2, %r11) | ||
910 | .endif | ||
911 | i = (i+1) | ||
912 | j = (j+1) | ||
913 | setreg | ||
914 | .endr | ||
915 | ####################################################################### | ||
916 | |||
917 | |||
918 | vpslldq $8, \T6, \T3 # shift-L T3 2 DWs | ||
919 | vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs | ||
920 | vpxor \T3, \T7, \T7 | ||
921 | vpxor \T4, \T6, \T6 # accumulate the results in T6:T7 | ||
922 | |||
923 | |||
924 | |||
925 | ####################################################################### | ||
926 | #first phase of the reduction | ||
927 | ####################################################################### | ||
928 | vpslld $31, \T7, \T2 # packed right shifting << 31 | ||
929 | vpslld $30, \T7, \T3 # packed right shifting shift << 30 | ||
930 | vpslld $25, \T7, \T4 # packed right shifting shift << 25 | ||
931 | |||
932 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
933 | vpxor \T4, \T2, \T2 | ||
934 | |||
935 | vpsrldq $4, \T2, \T1 # shift-R T1 1 DW | ||
936 | |||
937 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
938 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
939 | ####################################################################### | ||
940 | .if \ENC_DEC == ENC | ||
941 | vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer | ||
942 | vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer | ||
943 | vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer | ||
944 | vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer | ||
945 | vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer | ||
946 | vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer | ||
947 | vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer | ||
948 | vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer | ||
949 | .endif | ||
950 | |||
951 | ####################################################################### | ||
952 | #second phase of the reduction | ||
953 | vpsrld $1, \T7, \T2 # packed left shifting >> 1 | ||
954 | vpsrld $2, \T7, \T3 # packed left shifting >> 2 | ||
955 | vpsrld $7, \T7, \T4 # packed left shifting >> 7 | ||
956 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
957 | vpxor \T4, \T2, \T2 | ||
958 | |||
959 | vpxor \T1, \T2, \T2 | ||
960 | vpxor \T2, \T7, \T7 | ||
961 | vpxor \T7, \T6, \T6 # the result is in T6 | ||
962 | ####################################################################### | ||
963 | |||
964 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
965 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
966 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
967 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
968 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
969 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
970 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
971 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
972 | |||
973 | |||
974 | vpxor \T6, \XMM1, \XMM1 | ||
975 | |||
976 | |||
977 | |||
978 | .endm | ||
979 | |||
980 | |||
981 | # GHASH the last 4 ciphertext blocks. | ||
982 | .macro GHASH_LAST_8_AVX T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 | ||
983 | |||
984 | ## Karatsuba Method | ||
985 | |||
986 | |||
987 | vpshufd $0b01001110, \XMM1, \T2 | ||
988 | vpxor \XMM1, \T2, \T2 | ||
989 | vmovdqa HashKey_8(arg1), \T5 | ||
990 | vpclmulqdq $0x11, \T5, \XMM1, \T6 | ||
991 | vpclmulqdq $0x00, \T5, \XMM1, \T7 | ||
992 | |||
993 | vmovdqa HashKey_8_k(arg1), \T3 | ||
994 | vpclmulqdq $0x00, \T3, \T2, \XMM1 | ||
995 | |||
996 | ###################### | ||
997 | |||
998 | vpshufd $0b01001110, \XMM2, \T2 | ||
999 | vpxor \XMM2, \T2, \T2 | ||
1000 | vmovdqa HashKey_7(arg1), \T5 | ||
1001 | vpclmulqdq $0x11, \T5, \XMM2, \T4 | ||
1002 | vpxor \T4, \T6, \T6 | ||
1003 | |||
1004 | vpclmulqdq $0x00, \T5, \XMM2, \T4 | ||
1005 | vpxor \T4, \T7, \T7 | ||
1006 | |||
1007 | vmovdqa HashKey_7_k(arg1), \T3 | ||
1008 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1009 | vpxor \T2, \XMM1, \XMM1 | ||
1010 | |||
1011 | ###################### | ||
1012 | |||
1013 | vpshufd $0b01001110, \XMM3, \T2 | ||
1014 | vpxor \XMM3, \T2, \T2 | ||
1015 | vmovdqa HashKey_6(arg1), \T5 | ||
1016 | vpclmulqdq $0x11, \T5, \XMM3, \T4 | ||
1017 | vpxor \T4, \T6, \T6 | ||
1018 | |||
1019 | vpclmulqdq $0x00, \T5, \XMM3, \T4 | ||
1020 | vpxor \T4, \T7, \T7 | ||
1021 | |||
1022 | vmovdqa HashKey_6_k(arg1), \T3 | ||
1023 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1024 | vpxor \T2, \XMM1, \XMM1 | ||
1025 | |||
1026 | ###################### | ||
1027 | |||
1028 | vpshufd $0b01001110, \XMM4, \T2 | ||
1029 | vpxor \XMM4, \T2, \T2 | ||
1030 | vmovdqa HashKey_5(arg1), \T5 | ||
1031 | vpclmulqdq $0x11, \T5, \XMM4, \T4 | ||
1032 | vpxor \T4, \T6, \T6 | ||
1033 | |||
1034 | vpclmulqdq $0x00, \T5, \XMM4, \T4 | ||
1035 | vpxor \T4, \T7, \T7 | ||
1036 | |||
1037 | vmovdqa HashKey_5_k(arg1), \T3 | ||
1038 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1039 | vpxor \T2, \XMM1, \XMM1 | ||
1040 | |||
1041 | ###################### | ||
1042 | |||
1043 | vpshufd $0b01001110, \XMM5, \T2 | ||
1044 | vpxor \XMM5, \T2, \T2 | ||
1045 | vmovdqa HashKey_4(arg1), \T5 | ||
1046 | vpclmulqdq $0x11, \T5, \XMM5, \T4 | ||
1047 | vpxor \T4, \T6, \T6 | ||
1048 | |||
1049 | vpclmulqdq $0x00, \T5, \XMM5, \T4 | ||
1050 | vpxor \T4, \T7, \T7 | ||
1051 | |||
1052 | vmovdqa HashKey_4_k(arg1), \T3 | ||
1053 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1054 | vpxor \T2, \XMM1, \XMM1 | ||
1055 | |||
1056 | ###################### | ||
1057 | |||
1058 | vpshufd $0b01001110, \XMM6, \T2 | ||
1059 | vpxor \XMM6, \T2, \T2 | ||
1060 | vmovdqa HashKey_3(arg1), \T5 | ||
1061 | vpclmulqdq $0x11, \T5, \XMM6, \T4 | ||
1062 | vpxor \T4, \T6, \T6 | ||
1063 | |||
1064 | vpclmulqdq $0x00, \T5, \XMM6, \T4 | ||
1065 | vpxor \T4, \T7, \T7 | ||
1066 | |||
1067 | vmovdqa HashKey_3_k(arg1), \T3 | ||
1068 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1069 | vpxor \T2, \XMM1, \XMM1 | ||
1070 | |||
1071 | ###################### | ||
1072 | |||
1073 | vpshufd $0b01001110, \XMM7, \T2 | ||
1074 | vpxor \XMM7, \T2, \T2 | ||
1075 | vmovdqa HashKey_2(arg1), \T5 | ||
1076 | vpclmulqdq $0x11, \T5, \XMM7, \T4 | ||
1077 | vpxor \T4, \T6, \T6 | ||
1078 | |||
1079 | vpclmulqdq $0x00, \T5, \XMM7, \T4 | ||
1080 | vpxor \T4, \T7, \T7 | ||
1081 | |||
1082 | vmovdqa HashKey_2_k(arg1), \T3 | ||
1083 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1084 | vpxor \T2, \XMM1, \XMM1 | ||
1085 | |||
1086 | ###################### | ||
1087 | |||
1088 | vpshufd $0b01001110, \XMM8, \T2 | ||
1089 | vpxor \XMM8, \T2, \T2 | ||
1090 | vmovdqa HashKey(arg1), \T5 | ||
1091 | vpclmulqdq $0x11, \T5, \XMM8, \T4 | ||
1092 | vpxor \T4, \T6, \T6 | ||
1093 | |||
1094 | vpclmulqdq $0x00, \T5, \XMM8, \T4 | ||
1095 | vpxor \T4, \T7, \T7 | ||
1096 | |||
1097 | vmovdqa HashKey_k(arg1), \T3 | ||
1098 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
1099 | |||
1100 | vpxor \T2, \XMM1, \XMM1 | ||
1101 | vpxor \T6, \XMM1, \XMM1 | ||
1102 | vpxor \T7, \XMM1, \T2 | ||
1103 | |||
1104 | |||
1105 | |||
1106 | |||
1107 | vpslldq $8, \T2, \T4 | ||
1108 | vpsrldq $8, \T2, \T2 | ||
1109 | |||
1110 | vpxor \T4, \T7, \T7 | ||
1111 | vpxor \T2, \T6, \T6 # <T6:T7> holds the result of | ||
1112 | # the accumulated carry-less multiplications | ||
1113 | |||
1114 | ####################################################################### | ||
1115 | #first phase of the reduction | ||
1116 | vpslld $31, \T7, \T2 # packed right shifting << 31 | ||
1117 | vpslld $30, \T7, \T3 # packed right shifting shift << 30 | ||
1118 | vpslld $25, \T7, \T4 # packed right shifting shift << 25 | ||
1119 | |||
1120 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
1121 | vpxor \T4, \T2, \T2 | ||
1122 | |||
1123 | vpsrldq $4, \T2, \T1 # shift-R T1 1 DW | ||
1124 | |||
1125 | vpslldq $12, \T2, \T2 # shift-L T2 3 DWs | ||
1126 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
1127 | ####################################################################### | ||
1128 | |||
1129 | |||
1130 | #second phase of the reduction | ||
1131 | vpsrld $1, \T7, \T2 # packed left shifting >> 1 | ||
1132 | vpsrld $2, \T7, \T3 # packed left shifting >> 2 | ||
1133 | vpsrld $7, \T7, \T4 # packed left shifting >> 7 | ||
1134 | vpxor \T3, \T2, \T2 # xor the shifted versions | ||
1135 | vpxor \T4, \T2, \T2 | ||
1136 | |||
1137 | vpxor \T1, \T2, \T2 | ||
1138 | vpxor \T2, \T7, \T7 | ||
1139 | vpxor \T7, \T6, \T6 # the result is in T6 | ||
1140 | |||
1141 | .endm | ||
1142 | |||
1143 | |||
1144 | # combined for GCM encrypt and decrypt functions | ||
1145 | # clobbering all xmm registers | ||
1146 | # clobbering r10, r11, r12, r13, r14, r15 | ||
1147 | .macro GCM_ENC_DEC_AVX ENC_DEC | ||
1148 | |||
1149 | #the number of pushes must equal STACK_OFFSET | ||
1150 | push %r12 | ||
1151 | push %r13 | ||
1152 | push %r14 | ||
1153 | push %r15 | ||
1154 | |||
1155 | mov %rsp, %r14 | ||
1156 | |||
1157 | |||
1158 | |||
1159 | |||
1160 | sub $VARIABLE_OFFSET, %rsp | ||
1161 | and $~63, %rsp # align rsp to 64 bytes | ||
1162 | |||
1163 | |||
1164 | vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey | ||
1165 | |||
1166 | mov arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
1167 | and $-16, %r13 # r13 = r13 - (r13 mod 16) | ||
1168 | |||
1169 | mov %r13, %r12 | ||
1170 | shr $4, %r12 | ||
1171 | and $7, %r12 | ||
1172 | jz _initial_num_blocks_is_0\@ | ||
1173 | |||
1174 | cmp $7, %r12 | ||
1175 | je _initial_num_blocks_is_7\@ | ||
1176 | cmp $6, %r12 | ||
1177 | je _initial_num_blocks_is_6\@ | ||
1178 | cmp $5, %r12 | ||
1179 | je _initial_num_blocks_is_5\@ | ||
1180 | cmp $4, %r12 | ||
1181 | je _initial_num_blocks_is_4\@ | ||
1182 | cmp $3, %r12 | ||
1183 | je _initial_num_blocks_is_3\@ | ||
1184 | cmp $2, %r12 | ||
1185 | je _initial_num_blocks_is_2\@ | ||
1186 | |||
1187 | jmp _initial_num_blocks_is_1\@ | ||
1188 | |||
1189 | _initial_num_blocks_is_7\@: | ||
1190 | INITIAL_BLOCKS_AVX 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1191 | sub $16*7, %r13 | ||
1192 | jmp _initial_blocks_encrypted\@ | ||
1193 | |||
1194 | _initial_num_blocks_is_6\@: | ||
1195 | INITIAL_BLOCKS_AVX 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1196 | sub $16*6, %r13 | ||
1197 | jmp _initial_blocks_encrypted\@ | ||
1198 | |||
1199 | _initial_num_blocks_is_5\@: | ||
1200 | INITIAL_BLOCKS_AVX 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1201 | sub $16*5, %r13 | ||
1202 | jmp _initial_blocks_encrypted\@ | ||
1203 | |||
1204 | _initial_num_blocks_is_4\@: | ||
1205 | INITIAL_BLOCKS_AVX 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1206 | sub $16*4, %r13 | ||
1207 | jmp _initial_blocks_encrypted\@ | ||
1208 | |||
1209 | _initial_num_blocks_is_3\@: | ||
1210 | INITIAL_BLOCKS_AVX 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1211 | sub $16*3, %r13 | ||
1212 | jmp _initial_blocks_encrypted\@ | ||
1213 | |||
1214 | _initial_num_blocks_is_2\@: | ||
1215 | INITIAL_BLOCKS_AVX 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1216 | sub $16*2, %r13 | ||
1217 | jmp _initial_blocks_encrypted\@ | ||
1218 | |||
1219 | _initial_num_blocks_is_1\@: | ||
1220 | INITIAL_BLOCKS_AVX 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1221 | sub $16*1, %r13 | ||
1222 | jmp _initial_blocks_encrypted\@ | ||
1223 | |||
1224 | _initial_num_blocks_is_0\@: | ||
1225 | INITIAL_BLOCKS_AVX 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
1226 | |||
1227 | |||
1228 | _initial_blocks_encrypted\@: | ||
1229 | cmp $0, %r13 | ||
1230 | je _zero_cipher_left\@ | ||
1231 | |||
1232 | sub $128, %r13 | ||
1233 | je _eight_cipher_left\@ | ||
1234 | |||
1235 | |||
1236 | |||
1237 | |||
1238 | vmovd %xmm9, %r15d | ||
1239 | and $255, %r15d | ||
1240 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1241 | |||
1242 | |||
1243 | _encrypt_by_8_new\@: | ||
1244 | cmp $(255-8), %r15d | ||
1245 | jg _encrypt_by_8\@ | ||
1246 | |||
1247 | |||
1248 | |||
1249 | add $8, %r15b | ||
1250 | GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC | ||
1251 | add $128, %r11 | ||
1252 | sub $128, %r13 | ||
1253 | jne _encrypt_by_8_new\@ | ||
1254 | |||
1255 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1256 | jmp _eight_cipher_left\@ | ||
1257 | |||
1258 | _encrypt_by_8\@: | ||
1259 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1260 | add $8, %r15b | ||
1261 | GHASH_8_ENCRYPT_8_PARALLEL_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC | ||
1262 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1263 | add $128, %r11 | ||
1264 | sub $128, %r13 | ||
1265 | jne _encrypt_by_8_new\@ | ||
1266 | |||
1267 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1268 | |||
1269 | |||
1270 | |||
1271 | |||
1272 | _eight_cipher_left\@: | ||
1273 | GHASH_LAST_8_AVX %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 | ||
1274 | |||
1275 | |||
1276 | _zero_cipher_left\@: | ||
1277 | cmp $16, arg4 | ||
1278 | jl _only_less_than_16\@ | ||
1279 | |||
1280 | mov arg4, %r13 | ||
1281 | and $15, %r13 # r13 = (arg4 mod 16) | ||
1282 | |||
1283 | je _multiple_of_16_bytes\@ | ||
1284 | |||
1285 | # handle the last <16 Byte block seperately | ||
1286 | |||
1287 | |||
1288 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
1289 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1290 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
1291 | |||
1292 | sub $16, %r11 | ||
1293 | add %r13, %r11 | ||
1294 | vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block | ||
1295 | |||
1296 | lea SHIFT_MASK+16(%rip), %r12 | ||
1297 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
1298 | # able to shift 16-r13 bytes (r13 is the | ||
1299 | # number of bytes in plaintext mod 16) | ||
1300 | vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
1301 | vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes | ||
1302 | jmp _final_ghash_mul\@ | ||
1303 | |||
1304 | _only_less_than_16\@: | ||
1305 | # check for 0 length | ||
1306 | mov arg4, %r13 | ||
1307 | and $15, %r13 # r13 = (arg4 mod 16) | ||
1308 | |||
1309 | je _multiple_of_16_bytes\@ | ||
1310 | |||
1311 | # handle the last <16 Byte block seperately | ||
1312 | |||
1313 | |||
1314 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
1315 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1316 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
1317 | |||
1318 | |||
1319 | lea SHIFT_MASK+16(%rip), %r12 | ||
1320 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
1321 | # able to shift 16-r13 bytes (r13 is the | ||
1322 | # number of bytes in plaintext mod 16) | ||
1323 | |||
1324 | _get_last_16_byte_loop\@: | ||
1325 | movb (arg3, %r11), %al | ||
1326 | movb %al, TMP1 (%rsp , %r11) | ||
1327 | add $1, %r11 | ||
1328 | cmp %r13, %r11 | ||
1329 | jne _get_last_16_byte_loop\@ | ||
1330 | |||
1331 | vmovdqu TMP1(%rsp), %xmm1 | ||
1332 | |||
1333 | sub $16, %r11 | ||
1334 | |||
1335 | _final_ghash_mul\@: | ||
1336 | .if \ENC_DEC == DEC | ||
1337 | vmovdqa %xmm1, %xmm2 | ||
1338 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
1339 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to | ||
1340 | # mask out top 16-r13 bytes of xmm9 | ||
1341 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
1342 | vpand %xmm1, %xmm2, %xmm2 | ||
1343 | vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 | ||
1344 | vpxor %xmm2, %xmm14, %xmm14 | ||
1345 | #GHASH computation for the last <16 Byte block | ||
1346 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1347 | sub %r13, %r11 | ||
1348 | add $16, %r11 | ||
1349 | .else | ||
1350 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
1351 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to | ||
1352 | # mask out top 16-r13 bytes of xmm9 | ||
1353 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
1354 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
1355 | vpxor %xmm9, %xmm14, %xmm14 | ||
1356 | #GHASH computation for the last <16 Byte block | ||
1357 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
1358 | sub %r13, %r11 | ||
1359 | add $16, %r11 | ||
1360 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext | ||
1361 | .endif | ||
1362 | |||
1363 | |||
1364 | ############################# | ||
1365 | # output r13 Bytes | ||
1366 | vmovq %xmm9, %rax | ||
1367 | cmp $8, %r13 | ||
1368 | jle _less_than_8_bytes_left\@ | ||
1369 | |||
1370 | mov %rax, (arg2 , %r11) | ||
1371 | add $8, %r11 | ||
1372 | vpsrldq $8, %xmm9, %xmm9 | ||
1373 | vmovq %xmm9, %rax | ||
1374 | sub $8, %r13 | ||
1375 | |||
1376 | _less_than_8_bytes_left\@: | ||
1377 | movb %al, (arg2 , %r11) | ||
1378 | add $1, %r11 | ||
1379 | shr $8, %rax | ||
1380 | sub $1, %r13 | ||
1381 | jne _less_than_8_bytes_left\@ | ||
1382 | ############################# | ||
1383 | |||
1384 | _multiple_of_16_bytes\@: | ||
1385 | mov arg7, %r12 # r12 = aadLen (number of bytes) | ||
1386 | shl $3, %r12 # convert into number of bits | ||
1387 | vmovd %r12d, %xmm15 # len(A) in xmm15 | ||
1388 | |||
1389 | shl $3, arg4 # len(C) in bits (*128) | ||
1390 | vmovq arg4, %xmm1 | ||
1391 | vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 | ||
1392 | vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) | ||
1393 | |||
1394 | vpxor %xmm15, %xmm14, %xmm14 | ||
1395 | GHASH_MUL_AVX %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation | ||
1396 | vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap | ||
1397 | |||
1398 | mov arg5, %rax # rax = *Y0 | ||
1399 | vmovdqu (%rax), %xmm9 # xmm9 = Y0 | ||
1400 | |||
1401 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) | ||
1402 | |||
1403 | vpxor %xmm14, %xmm9, %xmm9 | ||
1404 | |||
1405 | |||
1406 | |||
1407 | _return_T\@: | ||
1408 | mov arg8, %r10 # r10 = authTag | ||
1409 | mov arg9, %r11 # r11 = auth_tag_len | ||
1410 | |||
1411 | cmp $16, %r11 | ||
1412 | je _T_16\@ | ||
1413 | |||
1414 | cmp $12, %r11 | ||
1415 | je _T_12\@ | ||
1416 | |||
1417 | _T_8\@: | ||
1418 | vmovq %xmm9, %rax | ||
1419 | mov %rax, (%r10) | ||
1420 | jmp _return_T_done\@ | ||
1421 | _T_12\@: | ||
1422 | vmovq %xmm9, %rax | ||
1423 | mov %rax, (%r10) | ||
1424 | vpsrldq $8, %xmm9, %xmm9 | ||
1425 | vmovd %xmm9, %eax | ||
1426 | mov %eax, 8(%r10) | ||
1427 | jmp _return_T_done\@ | ||
1428 | |||
1429 | _T_16\@: | ||
1430 | vmovdqu %xmm9, (%r10) | ||
1431 | |||
1432 | _return_T_done\@: | ||
1433 | mov %r14, %rsp | ||
1434 | |||
1435 | pop %r15 | ||
1436 | pop %r14 | ||
1437 | pop %r13 | ||
1438 | pop %r12 | ||
1439 | .endm | ||
1440 | |||
1441 | |||
1442 | ############################################################# | ||
1443 | #void aesni_gcm_precomp_avx_gen2 | ||
1444 | # (gcm_data *my_ctx_data, | ||
1445 | # u8 *hash_subkey)# /* H, the Hash sub key input. Data starts on a 16-byte boundary. */ | ||
1446 | ############################################################# | ||
1447 | ENTRY(aesni_gcm_precomp_avx_gen2) | ||
1448 | #the number of pushes must equal STACK_OFFSET | ||
1449 | push %r12 | ||
1450 | push %r13 | ||
1451 | push %r14 | ||
1452 | push %r15 | ||
1453 | |||
1454 | mov %rsp, %r14 | ||
1455 | |||
1456 | |||
1457 | |||
1458 | sub $VARIABLE_OFFSET, %rsp | ||
1459 | and $~63, %rsp # align rsp to 64 bytes | ||
1460 | |||
1461 | vmovdqu (arg2), %xmm6 # xmm6 = HashKey | ||
1462 | |||
1463 | vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 | ||
1464 | ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey | ||
1465 | vmovdqa %xmm6, %xmm2 | ||
1466 | vpsllq $1, %xmm6, %xmm6 | ||
1467 | vpsrlq $63, %xmm2, %xmm2 | ||
1468 | vmovdqa %xmm2, %xmm1 | ||
1469 | vpslldq $8, %xmm2, %xmm2 | ||
1470 | vpsrldq $8, %xmm1, %xmm1 | ||
1471 | vpor %xmm2, %xmm6, %xmm6 | ||
1472 | #reduction | ||
1473 | vpshufd $0b00100100, %xmm1, %xmm2 | ||
1474 | vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 | ||
1475 | vpand POLY(%rip), %xmm2, %xmm2 | ||
1476 | vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly | ||
1477 | ####################################################################### | ||
1478 | vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly | ||
1479 | |||
1480 | |||
1481 | PRECOMPUTE_AVX %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 | ||
1482 | |||
1483 | mov %r14, %rsp | ||
1484 | |||
1485 | pop %r15 | ||
1486 | pop %r14 | ||
1487 | pop %r13 | ||
1488 | pop %r12 | ||
1489 | ret | ||
1490 | ENDPROC(aesni_gcm_precomp_avx_gen2) | ||
1491 | |||
1492 | ############################################################################### | ||
1493 | #void aesni_gcm_enc_avx_gen2( | ||
1494 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
1495 | # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ | ||
1496 | # const u8 *in, /* Plaintext input */ | ||
1497 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
1498 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
1499 | # (from Security Association) concatenated with 8 byte | ||
1500 | # Initialisation Vector (from IPSec ESP Payload) | ||
1501 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
1502 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
1503 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
1504 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
1505 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
1506 | # Valid values are 16 (most likely), 12 or 8. */ | ||
1507 | ############################################################################### | ||
1508 | ENTRY(aesni_gcm_enc_avx_gen2) | ||
1509 | GCM_ENC_DEC_AVX ENC | ||
1510 | ret | ||
1511 | ENDPROC(aesni_gcm_enc_avx_gen2) | ||
1512 | |||
1513 | ############################################################################### | ||
1514 | #void aesni_gcm_dec_avx_gen2( | ||
1515 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
1516 | # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ | ||
1517 | # const u8 *in, /* Ciphertext input */ | ||
1518 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
1519 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
1520 | # (from Security Association) concatenated with 8 byte | ||
1521 | # Initialisation Vector (from IPSec ESP Payload) | ||
1522 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
1523 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
1524 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
1525 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
1526 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
1527 | # Valid values are 16 (most likely), 12 or 8. */ | ||
1528 | ############################################################################### | ||
1529 | ENTRY(aesni_gcm_dec_avx_gen2) | ||
1530 | GCM_ENC_DEC_AVX DEC | ||
1531 | ret | ||
1532 | ENDPROC(aesni_gcm_dec_avx_gen2) | ||
1533 | #endif /* CONFIG_AS_AVX */ | ||
1534 | |||
1535 | #ifdef CONFIG_AS_AVX2 | ||
1536 | ############################################################################### | ||
1537 | # GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0) | ||
1538 | # Input: A and B (128-bits each, bit-reflected) | ||
1539 | # Output: C = A*B*x mod poly, (i.e. >>1 ) | ||
1540 | # To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input | ||
1541 | # GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly. | ||
1542 | ############################################################################### | ||
1543 | .macro GHASH_MUL_AVX2 GH HK T1 T2 T3 T4 T5 | ||
1544 | |||
1545 | vpclmulqdq $0x11,\HK,\GH,\T1 # T1 = a1*b1 | ||
1546 | vpclmulqdq $0x00,\HK,\GH,\T2 # T2 = a0*b0 | ||
1547 | vpclmulqdq $0x01,\HK,\GH,\T3 # T3 = a1*b0 | ||
1548 | vpclmulqdq $0x10,\HK,\GH,\GH # GH = a0*b1 | ||
1549 | vpxor \T3, \GH, \GH | ||
1550 | |||
1551 | |||
1552 | vpsrldq $8 , \GH, \T3 # shift-R GH 2 DWs | ||
1553 | vpslldq $8 , \GH, \GH # shift-L GH 2 DWs | ||
1554 | |||
1555 | vpxor \T3, \T1, \T1 | ||
1556 | vpxor \T2, \GH, \GH | ||
1557 | |||
1558 | ####################################################################### | ||
1559 | #first phase of the reduction | ||
1560 | vmovdqa POLY2(%rip), \T3 | ||
1561 | |||
1562 | vpclmulqdq $0x01, \GH, \T3, \T2 | ||
1563 | vpslldq $8, \T2, \T2 # shift-L T2 2 DWs | ||
1564 | |||
1565 | vpxor \T2, \GH, \GH # first phase of the reduction complete | ||
1566 | ####################################################################### | ||
1567 | #second phase of the reduction | ||
1568 | vpclmulqdq $0x00, \GH, \T3, \T2 | ||
1569 | vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
1570 | |||
1571 | vpclmulqdq $0x10, \GH, \T3, \GH | ||
1572 | vpslldq $4, \GH, \GH # shift-L GH 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
1573 | |||
1574 | vpxor \T2, \GH, \GH # second phase of the reduction complete | ||
1575 | ####################################################################### | ||
1576 | vpxor \T1, \GH, \GH # the result is in GH | ||
1577 | |||
1578 | |||
1579 | .endm | ||
1580 | |||
1581 | .macro PRECOMPUTE_AVX2 HK T1 T2 T3 T4 T5 T6 | ||
1582 | |||
1583 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
1584 | vmovdqa \HK, \T5 | ||
1585 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^2<<1 mod poly | ||
1586 | vmovdqa \T5, HashKey_2(arg1) # [HashKey_2] = HashKey^2<<1 mod poly | ||
1587 | |||
1588 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^3<<1 mod poly | ||
1589 | vmovdqa \T5, HashKey_3(arg1) | ||
1590 | |||
1591 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^4<<1 mod poly | ||
1592 | vmovdqa \T5, HashKey_4(arg1) | ||
1593 | |||
1594 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^5<<1 mod poly | ||
1595 | vmovdqa \T5, HashKey_5(arg1) | ||
1596 | |||
1597 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^6<<1 mod poly | ||
1598 | vmovdqa \T5, HashKey_6(arg1) | ||
1599 | |||
1600 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^7<<1 mod poly | ||
1601 | vmovdqa \T5, HashKey_7(arg1) | ||
1602 | |||
1603 | GHASH_MUL_AVX2 \T5, \HK, \T1, \T3, \T4, \T6, \T2 # T5 = HashKey^8<<1 mod poly | ||
1604 | vmovdqa \T5, HashKey_8(arg1) | ||
1605 | |||
1606 | .endm | ||
1607 | |||
1608 | |||
1609 | ## if a = number of total plaintext bytes | ||
1610 | ## b = floor(a/16) | ||
1611 | ## num_initial_blocks = b mod 4# | ||
1612 | ## encrypt the initial num_initial_blocks blocks and apply ghash on the ciphertext | ||
1613 | ## r10, r11, r12, rax are clobbered | ||
1614 | ## arg1, arg2, arg3, r14 are used as a pointer only, not modified | ||
1615 | |||
1616 | .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER | ||
1617 | i = (8-\num_initial_blocks) | ||
1618 | setreg | ||
1619 | |||
1620 | mov arg6, %r10 # r10 = AAD | ||
1621 | mov arg7, %r12 # r12 = aadLen | ||
1622 | |||
1623 | |||
1624 | mov %r12, %r11 | ||
1625 | |||
1626 | vpxor reg_i, reg_i, reg_i | ||
1627 | _get_AAD_loop\@: | ||
1628 | vmovd (%r10), \T1 | ||
1629 | vpslldq $12, \T1, \T1 | ||
1630 | vpsrldq $4, reg_i, reg_i | ||
1631 | vpxor \T1, reg_i, reg_i | ||
1632 | |||
1633 | add $4, %r10 | ||
1634 | sub $4, %r12 | ||
1635 | jg _get_AAD_loop\@ | ||
1636 | |||
1637 | |||
1638 | cmp $16, %r11 | ||
1639 | je _get_AAD_loop2_done\@ | ||
1640 | mov $16, %r12 | ||
1641 | |||
1642 | _get_AAD_loop2\@: | ||
1643 | vpsrldq $4, reg_i, reg_i | ||
1644 | sub $4, %r12 | ||
1645 | cmp %r11, %r12 | ||
1646 | jg _get_AAD_loop2\@ | ||
1647 | |||
1648 | _get_AAD_loop2_done\@: | ||
1649 | |||
1650 | #byte-reflect the AAD data | ||
1651 | vpshufb SHUF_MASK(%rip), reg_i, reg_i | ||
1652 | |||
1653 | # initialize the data pointer offset as zero | ||
1654 | xor %r11, %r11 | ||
1655 | |||
1656 | # start AES for num_initial_blocks blocks | ||
1657 | mov arg5, %rax # rax = *Y0 | ||
1658 | vmovdqu (%rax), \CTR # CTR = Y0 | ||
1659 | vpshufb SHUF_MASK(%rip), \CTR, \CTR | ||
1660 | |||
1661 | |||
1662 | i = (9-\num_initial_blocks) | ||
1663 | setreg | ||
1664 | .rep \num_initial_blocks | ||
1665 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1666 | vmovdqa \CTR, reg_i | ||
1667 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # perform a 16Byte swap | ||
1668 | i = (i+1) | ||
1669 | setreg | ||
1670 | .endr | ||
1671 | |||
1672 | vmovdqa (arg1), \T_key | ||
1673 | i = (9-\num_initial_blocks) | ||
1674 | setreg | ||
1675 | .rep \num_initial_blocks | ||
1676 | vpxor \T_key, reg_i, reg_i | ||
1677 | i = (i+1) | ||
1678 | setreg | ||
1679 | .endr | ||
1680 | |||
1681 | j = 1 | ||
1682 | setreg | ||
1683 | .rep 9 | ||
1684 | vmovdqa 16*j(arg1), \T_key | ||
1685 | i = (9-\num_initial_blocks) | ||
1686 | setreg | ||
1687 | .rep \num_initial_blocks | ||
1688 | vaesenc \T_key, reg_i, reg_i | ||
1689 | i = (i+1) | ||
1690 | setreg | ||
1691 | .endr | ||
1692 | |||
1693 | j = (j+1) | ||
1694 | setreg | ||
1695 | .endr | ||
1696 | |||
1697 | |||
1698 | vmovdqa 16*10(arg1), \T_key | ||
1699 | i = (9-\num_initial_blocks) | ||
1700 | setreg | ||
1701 | .rep \num_initial_blocks | ||
1702 | vaesenclast \T_key, reg_i, reg_i | ||
1703 | i = (i+1) | ||
1704 | setreg | ||
1705 | .endr | ||
1706 | |||
1707 | i = (9-\num_initial_blocks) | ||
1708 | setreg | ||
1709 | .rep \num_initial_blocks | ||
1710 | vmovdqu (arg3, %r11), \T1 | ||
1711 | vpxor \T1, reg_i, reg_i | ||
1712 | vmovdqu reg_i, (arg2 , %r11) # write back ciphertext for | ||
1713 | # num_initial_blocks blocks | ||
1714 | add $16, %r11 | ||
1715 | .if \ENC_DEC == DEC | ||
1716 | vmovdqa \T1, reg_i | ||
1717 | .endif | ||
1718 | vpshufb SHUF_MASK(%rip), reg_i, reg_i # prepare ciphertext for GHASH computations | ||
1719 | i = (i+1) | ||
1720 | setreg | ||
1721 | .endr | ||
1722 | |||
1723 | |||
1724 | i = (8-\num_initial_blocks) | ||
1725 | j = (9-\num_initial_blocks) | ||
1726 | setreg | ||
1727 | GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 | ||
1728 | |||
1729 | .rep \num_initial_blocks | ||
1730 | vpxor reg_i, reg_j, reg_j | ||
1731 | GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 # apply GHASH on num_initial_blocks blocks | ||
1732 | i = (i+1) | ||
1733 | j = (j+1) | ||
1734 | setreg | ||
1735 | .endr | ||
1736 | # XMM8 has the combined result here | ||
1737 | |||
1738 | vmovdqa \XMM8, TMP1(%rsp) | ||
1739 | vmovdqa \XMM8, \T3 | ||
1740 | |||
1741 | cmp $128, %r13 | ||
1742 | jl _initial_blocks_done\@ # no need for precomputed constants | ||
1743 | |||
1744 | ############################################################################### | ||
1745 | # Haskey_i_k holds XORed values of the low and high parts of the Haskey_i | ||
1746 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1747 | vmovdqa \CTR, \XMM1 | ||
1748 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1749 | |||
1750 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1751 | vmovdqa \CTR, \XMM2 | ||
1752 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1753 | |||
1754 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1755 | vmovdqa \CTR, \XMM3 | ||
1756 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1757 | |||
1758 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1759 | vmovdqa \CTR, \XMM4 | ||
1760 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1761 | |||
1762 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1763 | vmovdqa \CTR, \XMM5 | ||
1764 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1765 | |||
1766 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1767 | vmovdqa \CTR, \XMM6 | ||
1768 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1769 | |||
1770 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1771 | vmovdqa \CTR, \XMM7 | ||
1772 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1773 | |||
1774 | vpaddd ONE(%rip), \CTR, \CTR # INCR Y0 | ||
1775 | vmovdqa \CTR, \XMM8 | ||
1776 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1777 | |||
1778 | vmovdqa (arg1), \T_key | ||
1779 | vpxor \T_key, \XMM1, \XMM1 | ||
1780 | vpxor \T_key, \XMM2, \XMM2 | ||
1781 | vpxor \T_key, \XMM3, \XMM3 | ||
1782 | vpxor \T_key, \XMM4, \XMM4 | ||
1783 | vpxor \T_key, \XMM5, \XMM5 | ||
1784 | vpxor \T_key, \XMM6, \XMM6 | ||
1785 | vpxor \T_key, \XMM7, \XMM7 | ||
1786 | vpxor \T_key, \XMM8, \XMM8 | ||
1787 | |||
1788 | i = 1 | ||
1789 | setreg | ||
1790 | .rep 9 # do 9 rounds | ||
1791 | vmovdqa 16*i(arg1), \T_key | ||
1792 | vaesenc \T_key, \XMM1, \XMM1 | ||
1793 | vaesenc \T_key, \XMM2, \XMM2 | ||
1794 | vaesenc \T_key, \XMM3, \XMM3 | ||
1795 | vaesenc \T_key, \XMM4, \XMM4 | ||
1796 | vaesenc \T_key, \XMM5, \XMM5 | ||
1797 | vaesenc \T_key, \XMM6, \XMM6 | ||
1798 | vaesenc \T_key, \XMM7, \XMM7 | ||
1799 | vaesenc \T_key, \XMM8, \XMM8 | ||
1800 | i = (i+1) | ||
1801 | setreg | ||
1802 | .endr | ||
1803 | |||
1804 | |||
1805 | vmovdqa 16*i(arg1), \T_key | ||
1806 | vaesenclast \T_key, \XMM1, \XMM1 | ||
1807 | vaesenclast \T_key, \XMM2, \XMM2 | ||
1808 | vaesenclast \T_key, \XMM3, \XMM3 | ||
1809 | vaesenclast \T_key, \XMM4, \XMM4 | ||
1810 | vaesenclast \T_key, \XMM5, \XMM5 | ||
1811 | vaesenclast \T_key, \XMM6, \XMM6 | ||
1812 | vaesenclast \T_key, \XMM7, \XMM7 | ||
1813 | vaesenclast \T_key, \XMM8, \XMM8 | ||
1814 | |||
1815 | vmovdqu (arg3, %r11), \T1 | ||
1816 | vpxor \T1, \XMM1, \XMM1 | ||
1817 | vmovdqu \XMM1, (arg2 , %r11) | ||
1818 | .if \ENC_DEC == DEC | ||
1819 | vmovdqa \T1, \XMM1 | ||
1820 | .endif | ||
1821 | |||
1822 | vmovdqu 16*1(arg3, %r11), \T1 | ||
1823 | vpxor \T1, \XMM2, \XMM2 | ||
1824 | vmovdqu \XMM2, 16*1(arg2 , %r11) | ||
1825 | .if \ENC_DEC == DEC | ||
1826 | vmovdqa \T1, \XMM2 | ||
1827 | .endif | ||
1828 | |||
1829 | vmovdqu 16*2(arg3, %r11), \T1 | ||
1830 | vpxor \T1, \XMM3, \XMM3 | ||
1831 | vmovdqu \XMM3, 16*2(arg2 , %r11) | ||
1832 | .if \ENC_DEC == DEC | ||
1833 | vmovdqa \T1, \XMM3 | ||
1834 | .endif | ||
1835 | |||
1836 | vmovdqu 16*3(arg3, %r11), \T1 | ||
1837 | vpxor \T1, \XMM4, \XMM4 | ||
1838 | vmovdqu \XMM4, 16*3(arg2 , %r11) | ||
1839 | .if \ENC_DEC == DEC | ||
1840 | vmovdqa \T1, \XMM4 | ||
1841 | .endif | ||
1842 | |||
1843 | vmovdqu 16*4(arg3, %r11), \T1 | ||
1844 | vpxor \T1, \XMM5, \XMM5 | ||
1845 | vmovdqu \XMM5, 16*4(arg2 , %r11) | ||
1846 | .if \ENC_DEC == DEC | ||
1847 | vmovdqa \T1, \XMM5 | ||
1848 | .endif | ||
1849 | |||
1850 | vmovdqu 16*5(arg3, %r11), \T1 | ||
1851 | vpxor \T1, \XMM6, \XMM6 | ||
1852 | vmovdqu \XMM6, 16*5(arg2 , %r11) | ||
1853 | .if \ENC_DEC == DEC | ||
1854 | vmovdqa \T1, \XMM6 | ||
1855 | .endif | ||
1856 | |||
1857 | vmovdqu 16*6(arg3, %r11), \T1 | ||
1858 | vpxor \T1, \XMM7, \XMM7 | ||
1859 | vmovdqu \XMM7, 16*6(arg2 , %r11) | ||
1860 | .if \ENC_DEC == DEC | ||
1861 | vmovdqa \T1, \XMM7 | ||
1862 | .endif | ||
1863 | |||
1864 | vmovdqu 16*7(arg3, %r11), \T1 | ||
1865 | vpxor \T1, \XMM8, \XMM8 | ||
1866 | vmovdqu \XMM8, 16*7(arg2 , %r11) | ||
1867 | .if \ENC_DEC == DEC | ||
1868 | vmovdqa \T1, \XMM8 | ||
1869 | .endif | ||
1870 | |||
1871 | add $128, %r11 | ||
1872 | |||
1873 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1874 | vpxor TMP1(%rsp), \XMM1, \XMM1 # combine GHASHed value with | ||
1875 | # the corresponding ciphertext | ||
1876 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1877 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1878 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1879 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1880 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1881 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1882 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1883 | |||
1884 | ############################################################################### | ||
1885 | |||
1886 | _initial_blocks_done\@: | ||
1887 | |||
1888 | |||
1889 | .endm | ||
1890 | |||
1891 | |||
1892 | |||
1893 | # encrypt 8 blocks at a time | ||
1894 | # ghash the 8 previously encrypted ciphertext blocks | ||
1895 | # arg1, arg2, arg3 are used as pointers only, not modified | ||
1896 | # r11 is the data offset value | ||
1897 | .macro GHASH_8_ENCRYPT_8_PARALLEL_AVX2 T1 T2 T3 T4 T5 T6 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T7 loop_idx ENC_DEC | ||
1898 | |||
1899 | vmovdqa \XMM1, \T2 | ||
1900 | vmovdqa \XMM2, TMP2(%rsp) | ||
1901 | vmovdqa \XMM3, TMP3(%rsp) | ||
1902 | vmovdqa \XMM4, TMP4(%rsp) | ||
1903 | vmovdqa \XMM5, TMP5(%rsp) | ||
1904 | vmovdqa \XMM6, TMP6(%rsp) | ||
1905 | vmovdqa \XMM7, TMP7(%rsp) | ||
1906 | vmovdqa \XMM8, TMP8(%rsp) | ||
1907 | |||
1908 | .if \loop_idx == in_order | ||
1909 | vpaddd ONE(%rip), \CTR, \XMM1 # INCR CNT | ||
1910 | vpaddd ONE(%rip), \XMM1, \XMM2 | ||
1911 | vpaddd ONE(%rip), \XMM2, \XMM3 | ||
1912 | vpaddd ONE(%rip), \XMM3, \XMM4 | ||
1913 | vpaddd ONE(%rip), \XMM4, \XMM5 | ||
1914 | vpaddd ONE(%rip), \XMM5, \XMM6 | ||
1915 | vpaddd ONE(%rip), \XMM6, \XMM7 | ||
1916 | vpaddd ONE(%rip), \XMM7, \XMM8 | ||
1917 | vmovdqa \XMM8, \CTR | ||
1918 | |||
1919 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
1920 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
1921 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
1922 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
1923 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
1924 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
1925 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
1926 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
1927 | .else | ||
1928 | vpaddd ONEf(%rip), \CTR, \XMM1 # INCR CNT | ||
1929 | vpaddd ONEf(%rip), \XMM1, \XMM2 | ||
1930 | vpaddd ONEf(%rip), \XMM2, \XMM3 | ||
1931 | vpaddd ONEf(%rip), \XMM3, \XMM4 | ||
1932 | vpaddd ONEf(%rip), \XMM4, \XMM5 | ||
1933 | vpaddd ONEf(%rip), \XMM5, \XMM6 | ||
1934 | vpaddd ONEf(%rip), \XMM6, \XMM7 | ||
1935 | vpaddd ONEf(%rip), \XMM7, \XMM8 | ||
1936 | vmovdqa \XMM8, \CTR | ||
1937 | .endif | ||
1938 | |||
1939 | |||
1940 | ####################################################################### | ||
1941 | |||
1942 | vmovdqu (arg1), \T1 | ||
1943 | vpxor \T1, \XMM1, \XMM1 | ||
1944 | vpxor \T1, \XMM2, \XMM2 | ||
1945 | vpxor \T1, \XMM3, \XMM3 | ||
1946 | vpxor \T1, \XMM4, \XMM4 | ||
1947 | vpxor \T1, \XMM5, \XMM5 | ||
1948 | vpxor \T1, \XMM6, \XMM6 | ||
1949 | vpxor \T1, \XMM7, \XMM7 | ||
1950 | vpxor \T1, \XMM8, \XMM8 | ||
1951 | |||
1952 | ####################################################################### | ||
1953 | |||
1954 | |||
1955 | |||
1956 | |||
1957 | |||
1958 | vmovdqu 16*1(arg1), \T1 | ||
1959 | vaesenc \T1, \XMM1, \XMM1 | ||
1960 | vaesenc \T1, \XMM2, \XMM2 | ||
1961 | vaesenc \T1, \XMM3, \XMM3 | ||
1962 | vaesenc \T1, \XMM4, \XMM4 | ||
1963 | vaesenc \T1, \XMM5, \XMM5 | ||
1964 | vaesenc \T1, \XMM6, \XMM6 | ||
1965 | vaesenc \T1, \XMM7, \XMM7 | ||
1966 | vaesenc \T1, \XMM8, \XMM8 | ||
1967 | |||
1968 | vmovdqu 16*2(arg1), \T1 | ||
1969 | vaesenc \T1, \XMM1, \XMM1 | ||
1970 | vaesenc \T1, \XMM2, \XMM2 | ||
1971 | vaesenc \T1, \XMM3, \XMM3 | ||
1972 | vaesenc \T1, \XMM4, \XMM4 | ||
1973 | vaesenc \T1, \XMM5, \XMM5 | ||
1974 | vaesenc \T1, \XMM6, \XMM6 | ||
1975 | vaesenc \T1, \XMM7, \XMM7 | ||
1976 | vaesenc \T1, \XMM8, \XMM8 | ||
1977 | |||
1978 | |||
1979 | ####################################################################### | ||
1980 | |||
1981 | vmovdqa HashKey_8(arg1), \T5 | ||
1982 | vpclmulqdq $0x11, \T5, \T2, \T4 # T4 = a1*b1 | ||
1983 | vpclmulqdq $0x00, \T5, \T2, \T7 # T7 = a0*b0 | ||
1984 | vpclmulqdq $0x01, \T5, \T2, \T6 # T6 = a1*b0 | ||
1985 | vpclmulqdq $0x10, \T5, \T2, \T5 # T5 = a0*b1 | ||
1986 | vpxor \T5, \T6, \T6 | ||
1987 | |||
1988 | vmovdqu 16*3(arg1), \T1 | ||
1989 | vaesenc \T1, \XMM1, \XMM1 | ||
1990 | vaesenc \T1, \XMM2, \XMM2 | ||
1991 | vaesenc \T1, \XMM3, \XMM3 | ||
1992 | vaesenc \T1, \XMM4, \XMM4 | ||
1993 | vaesenc \T1, \XMM5, \XMM5 | ||
1994 | vaesenc \T1, \XMM6, \XMM6 | ||
1995 | vaesenc \T1, \XMM7, \XMM7 | ||
1996 | vaesenc \T1, \XMM8, \XMM8 | ||
1997 | |||
1998 | vmovdqa TMP2(%rsp), \T1 | ||
1999 | vmovdqa HashKey_7(arg1), \T5 | ||
2000 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2001 | vpxor \T3, \T4, \T4 | ||
2002 | |||
2003 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2004 | vpxor \T3, \T7, \T7 | ||
2005 | |||
2006 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2007 | vpxor \T3, \T6, \T6 | ||
2008 | |||
2009 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2010 | vpxor \T3, \T6, \T6 | ||
2011 | |||
2012 | vmovdqu 16*4(arg1), \T1 | ||
2013 | vaesenc \T1, \XMM1, \XMM1 | ||
2014 | vaesenc \T1, \XMM2, \XMM2 | ||
2015 | vaesenc \T1, \XMM3, \XMM3 | ||
2016 | vaesenc \T1, \XMM4, \XMM4 | ||
2017 | vaesenc \T1, \XMM5, \XMM5 | ||
2018 | vaesenc \T1, \XMM6, \XMM6 | ||
2019 | vaesenc \T1, \XMM7, \XMM7 | ||
2020 | vaesenc \T1, \XMM8, \XMM8 | ||
2021 | |||
2022 | ####################################################################### | ||
2023 | |||
2024 | vmovdqa TMP3(%rsp), \T1 | ||
2025 | vmovdqa HashKey_6(arg1), \T5 | ||
2026 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2027 | vpxor \T3, \T4, \T4 | ||
2028 | |||
2029 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2030 | vpxor \T3, \T7, \T7 | ||
2031 | |||
2032 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2033 | vpxor \T3, \T6, \T6 | ||
2034 | |||
2035 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2036 | vpxor \T3, \T6, \T6 | ||
2037 | |||
2038 | vmovdqu 16*5(arg1), \T1 | ||
2039 | vaesenc \T1, \XMM1, \XMM1 | ||
2040 | vaesenc \T1, \XMM2, \XMM2 | ||
2041 | vaesenc \T1, \XMM3, \XMM3 | ||
2042 | vaesenc \T1, \XMM4, \XMM4 | ||
2043 | vaesenc \T1, \XMM5, \XMM5 | ||
2044 | vaesenc \T1, \XMM6, \XMM6 | ||
2045 | vaesenc \T1, \XMM7, \XMM7 | ||
2046 | vaesenc \T1, \XMM8, \XMM8 | ||
2047 | |||
2048 | vmovdqa TMP4(%rsp), \T1 | ||
2049 | vmovdqa HashKey_5(arg1), \T5 | ||
2050 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2051 | vpxor \T3, \T4, \T4 | ||
2052 | |||
2053 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2054 | vpxor \T3, \T7, \T7 | ||
2055 | |||
2056 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2057 | vpxor \T3, \T6, \T6 | ||
2058 | |||
2059 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2060 | vpxor \T3, \T6, \T6 | ||
2061 | |||
2062 | vmovdqu 16*6(arg1), \T1 | ||
2063 | vaesenc \T1, \XMM1, \XMM1 | ||
2064 | vaesenc \T1, \XMM2, \XMM2 | ||
2065 | vaesenc \T1, \XMM3, \XMM3 | ||
2066 | vaesenc \T1, \XMM4, \XMM4 | ||
2067 | vaesenc \T1, \XMM5, \XMM5 | ||
2068 | vaesenc \T1, \XMM6, \XMM6 | ||
2069 | vaesenc \T1, \XMM7, \XMM7 | ||
2070 | vaesenc \T1, \XMM8, \XMM8 | ||
2071 | |||
2072 | |||
2073 | vmovdqa TMP5(%rsp), \T1 | ||
2074 | vmovdqa HashKey_4(arg1), \T5 | ||
2075 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2076 | vpxor \T3, \T4, \T4 | ||
2077 | |||
2078 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2079 | vpxor \T3, \T7, \T7 | ||
2080 | |||
2081 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2082 | vpxor \T3, \T6, \T6 | ||
2083 | |||
2084 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2085 | vpxor \T3, \T6, \T6 | ||
2086 | |||
2087 | vmovdqu 16*7(arg1), \T1 | ||
2088 | vaesenc \T1, \XMM1, \XMM1 | ||
2089 | vaesenc \T1, \XMM2, \XMM2 | ||
2090 | vaesenc \T1, \XMM3, \XMM3 | ||
2091 | vaesenc \T1, \XMM4, \XMM4 | ||
2092 | vaesenc \T1, \XMM5, \XMM5 | ||
2093 | vaesenc \T1, \XMM6, \XMM6 | ||
2094 | vaesenc \T1, \XMM7, \XMM7 | ||
2095 | vaesenc \T1, \XMM8, \XMM8 | ||
2096 | |||
2097 | vmovdqa TMP6(%rsp), \T1 | ||
2098 | vmovdqa HashKey_3(arg1), \T5 | ||
2099 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2100 | vpxor \T3, \T4, \T4 | ||
2101 | |||
2102 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2103 | vpxor \T3, \T7, \T7 | ||
2104 | |||
2105 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2106 | vpxor \T3, \T6, \T6 | ||
2107 | |||
2108 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2109 | vpxor \T3, \T6, \T6 | ||
2110 | |||
2111 | vmovdqu 16*8(arg1), \T1 | ||
2112 | vaesenc \T1, \XMM1, \XMM1 | ||
2113 | vaesenc \T1, \XMM2, \XMM2 | ||
2114 | vaesenc \T1, \XMM3, \XMM3 | ||
2115 | vaesenc \T1, \XMM4, \XMM4 | ||
2116 | vaesenc \T1, \XMM5, \XMM5 | ||
2117 | vaesenc \T1, \XMM6, \XMM6 | ||
2118 | vaesenc \T1, \XMM7, \XMM7 | ||
2119 | vaesenc \T1, \XMM8, \XMM8 | ||
2120 | |||
2121 | vmovdqa TMP7(%rsp), \T1 | ||
2122 | vmovdqa HashKey_2(arg1), \T5 | ||
2123 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2124 | vpxor \T3, \T4, \T4 | ||
2125 | |||
2126 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2127 | vpxor \T3, \T7, \T7 | ||
2128 | |||
2129 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2130 | vpxor \T3, \T6, \T6 | ||
2131 | |||
2132 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2133 | vpxor \T3, \T6, \T6 | ||
2134 | |||
2135 | |||
2136 | ####################################################################### | ||
2137 | |||
2138 | vmovdqu 16*9(arg1), \T5 | ||
2139 | vaesenc \T5, \XMM1, \XMM1 | ||
2140 | vaesenc \T5, \XMM2, \XMM2 | ||
2141 | vaesenc \T5, \XMM3, \XMM3 | ||
2142 | vaesenc \T5, \XMM4, \XMM4 | ||
2143 | vaesenc \T5, \XMM5, \XMM5 | ||
2144 | vaesenc \T5, \XMM6, \XMM6 | ||
2145 | vaesenc \T5, \XMM7, \XMM7 | ||
2146 | vaesenc \T5, \XMM8, \XMM8 | ||
2147 | |||
2148 | vmovdqa TMP8(%rsp), \T1 | ||
2149 | vmovdqa HashKey(arg1), \T5 | ||
2150 | |||
2151 | vpclmulqdq $0x00, \T5, \T1, \T3 | ||
2152 | vpxor \T3, \T7, \T7 | ||
2153 | |||
2154 | vpclmulqdq $0x01, \T5, \T1, \T3 | ||
2155 | vpxor \T3, \T6, \T6 | ||
2156 | |||
2157 | vpclmulqdq $0x10, \T5, \T1, \T3 | ||
2158 | vpxor \T3, \T6, \T6 | ||
2159 | |||
2160 | vpclmulqdq $0x11, \T5, \T1, \T3 | ||
2161 | vpxor \T3, \T4, \T1 | ||
2162 | |||
2163 | |||
2164 | vmovdqu 16*10(arg1), \T5 | ||
2165 | |||
2166 | i = 0 | ||
2167 | j = 1 | ||
2168 | setreg | ||
2169 | .rep 8 | ||
2170 | vpxor 16*i(arg3, %r11), \T5, \T2 | ||
2171 | .if \ENC_DEC == ENC | ||
2172 | vaesenclast \T2, reg_j, reg_j | ||
2173 | .else | ||
2174 | vaesenclast \T2, reg_j, \T3 | ||
2175 | vmovdqu 16*i(arg3, %r11), reg_j | ||
2176 | vmovdqu \T3, 16*i(arg2, %r11) | ||
2177 | .endif | ||
2178 | i = (i+1) | ||
2179 | j = (j+1) | ||
2180 | setreg | ||
2181 | .endr | ||
2182 | ####################################################################### | ||
2183 | |||
2184 | |||
2185 | vpslldq $8, \T6, \T3 # shift-L T3 2 DWs | ||
2186 | vpsrldq $8, \T6, \T6 # shift-R T2 2 DWs | ||
2187 | vpxor \T3, \T7, \T7 | ||
2188 | vpxor \T6, \T1, \T1 # accumulate the results in T1:T7 | ||
2189 | |||
2190 | |||
2191 | |||
2192 | ####################################################################### | ||
2193 | #first phase of the reduction | ||
2194 | vmovdqa POLY2(%rip), \T3 | ||
2195 | |||
2196 | vpclmulqdq $0x01, \T7, \T3, \T2 | ||
2197 | vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs | ||
2198 | |||
2199 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
2200 | ####################################################################### | ||
2201 | .if \ENC_DEC == ENC | ||
2202 | vmovdqu \XMM1, 16*0(arg2,%r11) # Write to the Ciphertext buffer | ||
2203 | vmovdqu \XMM2, 16*1(arg2,%r11) # Write to the Ciphertext buffer | ||
2204 | vmovdqu \XMM3, 16*2(arg2,%r11) # Write to the Ciphertext buffer | ||
2205 | vmovdqu \XMM4, 16*3(arg2,%r11) # Write to the Ciphertext buffer | ||
2206 | vmovdqu \XMM5, 16*4(arg2,%r11) # Write to the Ciphertext buffer | ||
2207 | vmovdqu \XMM6, 16*5(arg2,%r11) # Write to the Ciphertext buffer | ||
2208 | vmovdqu \XMM7, 16*6(arg2,%r11) # Write to the Ciphertext buffer | ||
2209 | vmovdqu \XMM8, 16*7(arg2,%r11) # Write to the Ciphertext buffer | ||
2210 | .endif | ||
2211 | |||
2212 | ####################################################################### | ||
2213 | #second phase of the reduction | ||
2214 | vpclmulqdq $0x00, \T7, \T3, \T2 | ||
2215 | vpsrldq $4, \T2, \T2 # shift-R xmm2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
2216 | |||
2217 | vpclmulqdq $0x10, \T7, \T3, \T4 | ||
2218 | vpslldq $4, \T4, \T4 # shift-L xmm0 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
2219 | |||
2220 | vpxor \T2, \T4, \T4 # second phase of the reduction complete | ||
2221 | ####################################################################### | ||
2222 | vpxor \T4, \T1, \T1 # the result is in T1 | ||
2223 | |||
2224 | vpshufb SHUF_MASK(%rip), \XMM1, \XMM1 # perform a 16Byte swap | ||
2225 | vpshufb SHUF_MASK(%rip), \XMM2, \XMM2 # perform a 16Byte swap | ||
2226 | vpshufb SHUF_MASK(%rip), \XMM3, \XMM3 # perform a 16Byte swap | ||
2227 | vpshufb SHUF_MASK(%rip), \XMM4, \XMM4 # perform a 16Byte swap | ||
2228 | vpshufb SHUF_MASK(%rip), \XMM5, \XMM5 # perform a 16Byte swap | ||
2229 | vpshufb SHUF_MASK(%rip), \XMM6, \XMM6 # perform a 16Byte swap | ||
2230 | vpshufb SHUF_MASK(%rip), \XMM7, \XMM7 # perform a 16Byte swap | ||
2231 | vpshufb SHUF_MASK(%rip), \XMM8, \XMM8 # perform a 16Byte swap | ||
2232 | |||
2233 | |||
2234 | vpxor \T1, \XMM1, \XMM1 | ||
2235 | |||
2236 | |||
2237 | |||
2238 | .endm | ||
2239 | |||
2240 | |||
2241 | # GHASH the last 4 ciphertext blocks. | ||
2242 | .macro GHASH_LAST_8_AVX2 T1 T2 T3 T4 T5 T6 T7 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 | ||
2243 | |||
2244 | ## Karatsuba Method | ||
2245 | |||
2246 | vmovdqa HashKey_8(arg1), \T5 | ||
2247 | |||
2248 | vpshufd $0b01001110, \XMM1, \T2 | ||
2249 | vpshufd $0b01001110, \T5, \T3 | ||
2250 | vpxor \XMM1, \T2, \T2 | ||
2251 | vpxor \T5, \T3, \T3 | ||
2252 | |||
2253 | vpclmulqdq $0x11, \T5, \XMM1, \T6 | ||
2254 | vpclmulqdq $0x00, \T5, \XMM1, \T7 | ||
2255 | |||
2256 | vpclmulqdq $0x00, \T3, \T2, \XMM1 | ||
2257 | |||
2258 | ###################### | ||
2259 | |||
2260 | vmovdqa HashKey_7(arg1), \T5 | ||
2261 | vpshufd $0b01001110, \XMM2, \T2 | ||
2262 | vpshufd $0b01001110, \T5, \T3 | ||
2263 | vpxor \XMM2, \T2, \T2 | ||
2264 | vpxor \T5, \T3, \T3 | ||
2265 | |||
2266 | vpclmulqdq $0x11, \T5, \XMM2, \T4 | ||
2267 | vpxor \T4, \T6, \T6 | ||
2268 | |||
2269 | vpclmulqdq $0x00, \T5, \XMM2, \T4 | ||
2270 | vpxor \T4, \T7, \T7 | ||
2271 | |||
2272 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2273 | |||
2274 | vpxor \T2, \XMM1, \XMM1 | ||
2275 | |||
2276 | ###################### | ||
2277 | |||
2278 | vmovdqa HashKey_6(arg1), \T5 | ||
2279 | vpshufd $0b01001110, \XMM3, \T2 | ||
2280 | vpshufd $0b01001110, \T5, \T3 | ||
2281 | vpxor \XMM3, \T2, \T2 | ||
2282 | vpxor \T5, \T3, \T3 | ||
2283 | |||
2284 | vpclmulqdq $0x11, \T5, \XMM3, \T4 | ||
2285 | vpxor \T4, \T6, \T6 | ||
2286 | |||
2287 | vpclmulqdq $0x00, \T5, \XMM3, \T4 | ||
2288 | vpxor \T4, \T7, \T7 | ||
2289 | |||
2290 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2291 | |||
2292 | vpxor \T2, \XMM1, \XMM1 | ||
2293 | |||
2294 | ###################### | ||
2295 | |||
2296 | vmovdqa HashKey_5(arg1), \T5 | ||
2297 | vpshufd $0b01001110, \XMM4, \T2 | ||
2298 | vpshufd $0b01001110, \T5, \T3 | ||
2299 | vpxor \XMM4, \T2, \T2 | ||
2300 | vpxor \T5, \T3, \T3 | ||
2301 | |||
2302 | vpclmulqdq $0x11, \T5, \XMM4, \T4 | ||
2303 | vpxor \T4, \T6, \T6 | ||
2304 | |||
2305 | vpclmulqdq $0x00, \T5, \XMM4, \T4 | ||
2306 | vpxor \T4, \T7, \T7 | ||
2307 | |||
2308 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2309 | |||
2310 | vpxor \T2, \XMM1, \XMM1 | ||
2311 | |||
2312 | ###################### | ||
2313 | |||
2314 | vmovdqa HashKey_4(arg1), \T5 | ||
2315 | vpshufd $0b01001110, \XMM5, \T2 | ||
2316 | vpshufd $0b01001110, \T5, \T3 | ||
2317 | vpxor \XMM5, \T2, \T2 | ||
2318 | vpxor \T5, \T3, \T3 | ||
2319 | |||
2320 | vpclmulqdq $0x11, \T5, \XMM5, \T4 | ||
2321 | vpxor \T4, \T6, \T6 | ||
2322 | |||
2323 | vpclmulqdq $0x00, \T5, \XMM5, \T4 | ||
2324 | vpxor \T4, \T7, \T7 | ||
2325 | |||
2326 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2327 | |||
2328 | vpxor \T2, \XMM1, \XMM1 | ||
2329 | |||
2330 | ###################### | ||
2331 | |||
2332 | vmovdqa HashKey_3(arg1), \T5 | ||
2333 | vpshufd $0b01001110, \XMM6, \T2 | ||
2334 | vpshufd $0b01001110, \T5, \T3 | ||
2335 | vpxor \XMM6, \T2, \T2 | ||
2336 | vpxor \T5, \T3, \T3 | ||
2337 | |||
2338 | vpclmulqdq $0x11, \T5, \XMM6, \T4 | ||
2339 | vpxor \T4, \T6, \T6 | ||
2340 | |||
2341 | vpclmulqdq $0x00, \T5, \XMM6, \T4 | ||
2342 | vpxor \T4, \T7, \T7 | ||
2343 | |||
2344 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2345 | |||
2346 | vpxor \T2, \XMM1, \XMM1 | ||
2347 | |||
2348 | ###################### | ||
2349 | |||
2350 | vmovdqa HashKey_2(arg1), \T5 | ||
2351 | vpshufd $0b01001110, \XMM7, \T2 | ||
2352 | vpshufd $0b01001110, \T5, \T3 | ||
2353 | vpxor \XMM7, \T2, \T2 | ||
2354 | vpxor \T5, \T3, \T3 | ||
2355 | |||
2356 | vpclmulqdq $0x11, \T5, \XMM7, \T4 | ||
2357 | vpxor \T4, \T6, \T6 | ||
2358 | |||
2359 | vpclmulqdq $0x00, \T5, \XMM7, \T4 | ||
2360 | vpxor \T4, \T7, \T7 | ||
2361 | |||
2362 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2363 | |||
2364 | vpxor \T2, \XMM1, \XMM1 | ||
2365 | |||
2366 | ###################### | ||
2367 | |||
2368 | vmovdqa HashKey(arg1), \T5 | ||
2369 | vpshufd $0b01001110, \XMM8, \T2 | ||
2370 | vpshufd $0b01001110, \T5, \T3 | ||
2371 | vpxor \XMM8, \T2, \T2 | ||
2372 | vpxor \T5, \T3, \T3 | ||
2373 | |||
2374 | vpclmulqdq $0x11, \T5, \XMM8, \T4 | ||
2375 | vpxor \T4, \T6, \T6 | ||
2376 | |||
2377 | vpclmulqdq $0x00, \T5, \XMM8, \T4 | ||
2378 | vpxor \T4, \T7, \T7 | ||
2379 | |||
2380 | vpclmulqdq $0x00, \T3, \T2, \T2 | ||
2381 | |||
2382 | vpxor \T2, \XMM1, \XMM1 | ||
2383 | vpxor \T6, \XMM1, \XMM1 | ||
2384 | vpxor \T7, \XMM1, \T2 | ||
2385 | |||
2386 | |||
2387 | |||
2388 | |||
2389 | vpslldq $8, \T2, \T4 | ||
2390 | vpsrldq $8, \T2, \T2 | ||
2391 | |||
2392 | vpxor \T4, \T7, \T7 | ||
2393 | vpxor \T2, \T6, \T6 # <T6:T7> holds the result of the | ||
2394 | # accumulated carry-less multiplications | ||
2395 | |||
2396 | ####################################################################### | ||
2397 | #first phase of the reduction | ||
2398 | vmovdqa POLY2(%rip), \T3 | ||
2399 | |||
2400 | vpclmulqdq $0x01, \T7, \T3, \T2 | ||
2401 | vpslldq $8, \T2, \T2 # shift-L xmm2 2 DWs | ||
2402 | |||
2403 | vpxor \T2, \T7, \T7 # first phase of the reduction complete | ||
2404 | ####################################################################### | ||
2405 | |||
2406 | |||
2407 | #second phase of the reduction | ||
2408 | vpclmulqdq $0x00, \T7, \T3, \T2 | ||
2409 | vpsrldq $4, \T2, \T2 # shift-R T2 1 DW (Shift-R only 1-DW to obtain 2-DWs shift-R) | ||
2410 | |||
2411 | vpclmulqdq $0x10, \T7, \T3, \T4 | ||
2412 | vpslldq $4, \T4, \T4 # shift-L T4 1 DW (Shift-L 1-DW to obtain result with no shifts) | ||
2413 | |||
2414 | vpxor \T2, \T4, \T4 # second phase of the reduction complete | ||
2415 | ####################################################################### | ||
2416 | vpxor \T4, \T6, \T6 # the result is in T6 | ||
2417 | .endm | ||
2418 | |||
2419 | |||
2420 | |||
2421 | # combined for GCM encrypt and decrypt functions | ||
2422 | # clobbering all xmm registers | ||
2423 | # clobbering r10, r11, r12, r13, r14, r15 | ||
2424 | .macro GCM_ENC_DEC_AVX2 ENC_DEC | ||
2425 | |||
2426 | #the number of pushes must equal STACK_OFFSET | ||
2427 | push %r12 | ||
2428 | push %r13 | ||
2429 | push %r14 | ||
2430 | push %r15 | ||
2431 | |||
2432 | mov %rsp, %r14 | ||
2433 | |||
2434 | |||
2435 | |||
2436 | |||
2437 | sub $VARIABLE_OFFSET, %rsp | ||
2438 | and $~63, %rsp # align rsp to 64 bytes | ||
2439 | |||
2440 | |||
2441 | vmovdqu HashKey(arg1), %xmm13 # xmm13 = HashKey | ||
2442 | |||
2443 | mov arg4, %r13 # save the number of bytes of plaintext/ciphertext | ||
2444 | and $-16, %r13 # r13 = r13 - (r13 mod 16) | ||
2445 | |||
2446 | mov %r13, %r12 | ||
2447 | shr $4, %r12 | ||
2448 | and $7, %r12 | ||
2449 | jz _initial_num_blocks_is_0\@ | ||
2450 | |||
2451 | cmp $7, %r12 | ||
2452 | je _initial_num_blocks_is_7\@ | ||
2453 | cmp $6, %r12 | ||
2454 | je _initial_num_blocks_is_6\@ | ||
2455 | cmp $5, %r12 | ||
2456 | je _initial_num_blocks_is_5\@ | ||
2457 | cmp $4, %r12 | ||
2458 | je _initial_num_blocks_is_4\@ | ||
2459 | cmp $3, %r12 | ||
2460 | je _initial_num_blocks_is_3\@ | ||
2461 | cmp $2, %r12 | ||
2462 | je _initial_num_blocks_is_2\@ | ||
2463 | |||
2464 | jmp _initial_num_blocks_is_1\@ | ||
2465 | |||
2466 | _initial_num_blocks_is_7\@: | ||
2467 | INITIAL_BLOCKS_AVX2 7, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2468 | sub $16*7, %r13 | ||
2469 | jmp _initial_blocks_encrypted\@ | ||
2470 | |||
2471 | _initial_num_blocks_is_6\@: | ||
2472 | INITIAL_BLOCKS_AVX2 6, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2473 | sub $16*6, %r13 | ||
2474 | jmp _initial_blocks_encrypted\@ | ||
2475 | |||
2476 | _initial_num_blocks_is_5\@: | ||
2477 | INITIAL_BLOCKS_AVX2 5, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2478 | sub $16*5, %r13 | ||
2479 | jmp _initial_blocks_encrypted\@ | ||
2480 | |||
2481 | _initial_num_blocks_is_4\@: | ||
2482 | INITIAL_BLOCKS_AVX2 4, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2483 | sub $16*4, %r13 | ||
2484 | jmp _initial_blocks_encrypted\@ | ||
2485 | |||
2486 | _initial_num_blocks_is_3\@: | ||
2487 | INITIAL_BLOCKS_AVX2 3, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2488 | sub $16*3, %r13 | ||
2489 | jmp _initial_blocks_encrypted\@ | ||
2490 | |||
2491 | _initial_num_blocks_is_2\@: | ||
2492 | INITIAL_BLOCKS_AVX2 2, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2493 | sub $16*2, %r13 | ||
2494 | jmp _initial_blocks_encrypted\@ | ||
2495 | |||
2496 | _initial_num_blocks_is_1\@: | ||
2497 | INITIAL_BLOCKS_AVX2 1, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2498 | sub $16*1, %r13 | ||
2499 | jmp _initial_blocks_encrypted\@ | ||
2500 | |||
2501 | _initial_num_blocks_is_0\@: | ||
2502 | INITIAL_BLOCKS_AVX2 0, %xmm12, %xmm13, %xmm14, %xmm15, %xmm11, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm10, %xmm0, \ENC_DEC | ||
2503 | |||
2504 | |||
2505 | _initial_blocks_encrypted\@: | ||
2506 | cmp $0, %r13 | ||
2507 | je _zero_cipher_left\@ | ||
2508 | |||
2509 | sub $128, %r13 | ||
2510 | je _eight_cipher_left\@ | ||
2511 | |||
2512 | |||
2513 | |||
2514 | |||
2515 | vmovd %xmm9, %r15d | ||
2516 | and $255, %r15d | ||
2517 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2518 | |||
2519 | |||
2520 | _encrypt_by_8_new\@: | ||
2521 | cmp $(255-8), %r15d | ||
2522 | jg _encrypt_by_8\@ | ||
2523 | |||
2524 | |||
2525 | |||
2526 | add $8, %r15b | ||
2527 | GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, out_order, \ENC_DEC | ||
2528 | add $128, %r11 | ||
2529 | sub $128, %r13 | ||
2530 | jne _encrypt_by_8_new\@ | ||
2531 | |||
2532 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2533 | jmp _eight_cipher_left\@ | ||
2534 | |||
2535 | _encrypt_by_8\@: | ||
2536 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2537 | add $8, %r15b | ||
2538 | GHASH_8_ENCRYPT_8_PARALLEL_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm9, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8, %xmm15, in_order, \ENC_DEC | ||
2539 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2540 | add $128, %r11 | ||
2541 | sub $128, %r13 | ||
2542 | jne _encrypt_by_8_new\@ | ||
2543 | |||
2544 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2545 | |||
2546 | |||
2547 | |||
2548 | |||
2549 | _eight_cipher_left\@: | ||
2550 | GHASH_LAST_8_AVX2 %xmm0, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, %xmm8 | ||
2551 | |||
2552 | |||
2553 | _zero_cipher_left\@: | ||
2554 | cmp $16, arg4 | ||
2555 | jl _only_less_than_16\@ | ||
2556 | |||
2557 | mov arg4, %r13 | ||
2558 | and $15, %r13 # r13 = (arg4 mod 16) | ||
2559 | |||
2560 | je _multiple_of_16_bytes\@ | ||
2561 | |||
2562 | # handle the last <16 Byte block seperately | ||
2563 | |||
2564 | |||
2565 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
2566 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2567 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
2568 | |||
2569 | sub $16, %r11 | ||
2570 | add %r13, %r11 | ||
2571 | vmovdqu (arg3, %r11), %xmm1 # receive the last <16 Byte block | ||
2572 | |||
2573 | lea SHIFT_MASK+16(%rip), %r12 | ||
2574 | sub %r13, %r12 # adjust the shuffle mask pointer | ||
2575 | # to be able to shift 16-r13 bytes | ||
2576 | # (r13 is the number of bytes in plaintext mod 16) | ||
2577 | vmovdqu (%r12), %xmm2 # get the appropriate shuffle mask | ||
2578 | vpshufb %xmm2, %xmm1, %xmm1 # shift right 16-r13 bytes | ||
2579 | jmp _final_ghash_mul\@ | ||
2580 | |||
2581 | _only_less_than_16\@: | ||
2582 | # check for 0 length | ||
2583 | mov arg4, %r13 | ||
2584 | and $15, %r13 # r13 = (arg4 mod 16) | ||
2585 | |||
2586 | je _multiple_of_16_bytes\@ | ||
2587 | |||
2588 | # handle the last <16 Byte block seperately | ||
2589 | |||
2590 | |||
2591 | vpaddd ONE(%rip), %xmm9, %xmm9 # INCR CNT to get Yn | ||
2592 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2593 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Yn) | ||
2594 | |||
2595 | |||
2596 | lea SHIFT_MASK+16(%rip), %r12 | ||
2597 | sub %r13, %r12 # adjust the shuffle mask pointer to be | ||
2598 | # able to shift 16-r13 bytes (r13 is the | ||
2599 | # number of bytes in plaintext mod 16) | ||
2600 | |||
2601 | _get_last_16_byte_loop\@: | ||
2602 | movb (arg3, %r11), %al | ||
2603 | movb %al, TMP1 (%rsp , %r11) | ||
2604 | add $1, %r11 | ||
2605 | cmp %r13, %r11 | ||
2606 | jne _get_last_16_byte_loop\@ | ||
2607 | |||
2608 | vmovdqu TMP1(%rsp), %xmm1 | ||
2609 | |||
2610 | sub $16, %r11 | ||
2611 | |||
2612 | _final_ghash_mul\@: | ||
2613 | .if \ENC_DEC == DEC | ||
2614 | vmovdqa %xmm1, %xmm2 | ||
2615 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
2616 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 | ||
2617 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
2618 | vpand %xmm1, %xmm2, %xmm2 | ||
2619 | vpshufb SHUF_MASK(%rip), %xmm2, %xmm2 | ||
2620 | vpxor %xmm2, %xmm14, %xmm14 | ||
2621 | #GHASH computation for the last <16 Byte block | ||
2622 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
2623 | sub %r13, %r11 | ||
2624 | add $16, %r11 | ||
2625 | .else | ||
2626 | vpxor %xmm1, %xmm9, %xmm9 # Plaintext XOR E(K, Yn) | ||
2627 | vmovdqu ALL_F-SHIFT_MASK(%r12), %xmm1 # get the appropriate mask to mask out top 16-r13 bytes of xmm9 | ||
2628 | vpand %xmm1, %xmm9, %xmm9 # mask out top 16-r13 bytes of xmm9 | ||
2629 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 | ||
2630 | vpxor %xmm9, %xmm14, %xmm14 | ||
2631 | #GHASH computation for the last <16 Byte block | ||
2632 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 | ||
2633 | sub %r13, %r11 | ||
2634 | add $16, %r11 | ||
2635 | vpshufb SHUF_MASK(%rip), %xmm9, %xmm9 # shuffle xmm9 back to output as ciphertext | ||
2636 | .endif | ||
2637 | |||
2638 | |||
2639 | ############################# | ||
2640 | # output r13 Bytes | ||
2641 | vmovq %xmm9, %rax | ||
2642 | cmp $8, %r13 | ||
2643 | jle _less_than_8_bytes_left\@ | ||
2644 | |||
2645 | mov %rax, (arg2 , %r11) | ||
2646 | add $8, %r11 | ||
2647 | vpsrldq $8, %xmm9, %xmm9 | ||
2648 | vmovq %xmm9, %rax | ||
2649 | sub $8, %r13 | ||
2650 | |||
2651 | _less_than_8_bytes_left\@: | ||
2652 | movb %al, (arg2 , %r11) | ||
2653 | add $1, %r11 | ||
2654 | shr $8, %rax | ||
2655 | sub $1, %r13 | ||
2656 | jne _less_than_8_bytes_left\@ | ||
2657 | ############################# | ||
2658 | |||
2659 | _multiple_of_16_bytes\@: | ||
2660 | mov arg7, %r12 # r12 = aadLen (number of bytes) | ||
2661 | shl $3, %r12 # convert into number of bits | ||
2662 | vmovd %r12d, %xmm15 # len(A) in xmm15 | ||
2663 | |||
2664 | shl $3, arg4 # len(C) in bits (*128) | ||
2665 | vmovq arg4, %xmm1 | ||
2666 | vpslldq $8, %xmm15, %xmm15 # xmm15 = len(A)|| 0x0000000000000000 | ||
2667 | vpxor %xmm1, %xmm15, %xmm15 # xmm15 = len(A)||len(C) | ||
2668 | |||
2669 | vpxor %xmm15, %xmm14, %xmm14 | ||
2670 | GHASH_MUL_AVX2 %xmm14, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6 # final GHASH computation | ||
2671 | vpshufb SHUF_MASK(%rip), %xmm14, %xmm14 # perform a 16Byte swap | ||
2672 | |||
2673 | mov arg5, %rax # rax = *Y0 | ||
2674 | vmovdqu (%rax), %xmm9 # xmm9 = Y0 | ||
2675 | |||
2676 | ENCRYPT_SINGLE_BLOCK %xmm9 # E(K, Y0) | ||
2677 | |||
2678 | vpxor %xmm14, %xmm9, %xmm9 | ||
2679 | |||
2680 | |||
2681 | |||
2682 | _return_T\@: | ||
2683 | mov arg8, %r10 # r10 = authTag | ||
2684 | mov arg9, %r11 # r11 = auth_tag_len | ||
2685 | |||
2686 | cmp $16, %r11 | ||
2687 | je _T_16\@ | ||
2688 | |||
2689 | cmp $12, %r11 | ||
2690 | je _T_12\@ | ||
2691 | |||
2692 | _T_8\@: | ||
2693 | vmovq %xmm9, %rax | ||
2694 | mov %rax, (%r10) | ||
2695 | jmp _return_T_done\@ | ||
2696 | _T_12\@: | ||
2697 | vmovq %xmm9, %rax | ||
2698 | mov %rax, (%r10) | ||
2699 | vpsrldq $8, %xmm9, %xmm9 | ||
2700 | vmovd %xmm9, %eax | ||
2701 | mov %eax, 8(%r10) | ||
2702 | jmp _return_T_done\@ | ||
2703 | |||
2704 | _T_16\@: | ||
2705 | vmovdqu %xmm9, (%r10) | ||
2706 | |||
2707 | _return_T_done\@: | ||
2708 | mov %r14, %rsp | ||
2709 | |||
2710 | pop %r15 | ||
2711 | pop %r14 | ||
2712 | pop %r13 | ||
2713 | pop %r12 | ||
2714 | .endm | ||
2715 | |||
2716 | |||
2717 | ############################################################# | ||
2718 | #void aesni_gcm_precomp_avx_gen4 | ||
2719 | # (gcm_data *my_ctx_data, | ||
2720 | # u8 *hash_subkey)# /* H, the Hash sub key input. | ||
2721 | # Data starts on a 16-byte boundary. */ | ||
2722 | ############################################################# | ||
2723 | ENTRY(aesni_gcm_precomp_avx_gen4) | ||
2724 | #the number of pushes must equal STACK_OFFSET | ||
2725 | push %r12 | ||
2726 | push %r13 | ||
2727 | push %r14 | ||
2728 | push %r15 | ||
2729 | |||
2730 | mov %rsp, %r14 | ||
2731 | |||
2732 | |||
2733 | |||
2734 | sub $VARIABLE_OFFSET, %rsp | ||
2735 | and $~63, %rsp # align rsp to 64 bytes | ||
2736 | |||
2737 | vmovdqu (arg2), %xmm6 # xmm6 = HashKey | ||
2738 | |||
2739 | vpshufb SHUF_MASK(%rip), %xmm6, %xmm6 | ||
2740 | ############### PRECOMPUTATION of HashKey<<1 mod poly from the HashKey | ||
2741 | vmovdqa %xmm6, %xmm2 | ||
2742 | vpsllq $1, %xmm6, %xmm6 | ||
2743 | vpsrlq $63, %xmm2, %xmm2 | ||
2744 | vmovdqa %xmm2, %xmm1 | ||
2745 | vpslldq $8, %xmm2, %xmm2 | ||
2746 | vpsrldq $8, %xmm1, %xmm1 | ||
2747 | vpor %xmm2, %xmm6, %xmm6 | ||
2748 | #reduction | ||
2749 | vpshufd $0b00100100, %xmm1, %xmm2 | ||
2750 | vpcmpeqd TWOONE(%rip), %xmm2, %xmm2 | ||
2751 | vpand POLY(%rip), %xmm2, %xmm2 | ||
2752 | vpxor %xmm2, %xmm6, %xmm6 # xmm6 holds the HashKey<<1 mod poly | ||
2753 | ####################################################################### | ||
2754 | vmovdqa %xmm6, HashKey(arg1) # store HashKey<<1 mod poly | ||
2755 | |||
2756 | |||
2757 | PRECOMPUTE_AVX2 %xmm6, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5 | ||
2758 | |||
2759 | mov %r14, %rsp | ||
2760 | |||
2761 | pop %r15 | ||
2762 | pop %r14 | ||
2763 | pop %r13 | ||
2764 | pop %r12 | ||
2765 | ret | ||
2766 | ENDPROC(aesni_gcm_precomp_avx_gen4) | ||
2767 | |||
2768 | |||
2769 | ############################################################################### | ||
2770 | #void aesni_gcm_enc_avx_gen4( | ||
2771 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
2772 | # u8 *out, /* Ciphertext output. Encrypt in-place is allowed. */ | ||
2773 | # const u8 *in, /* Plaintext input */ | ||
2774 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
2775 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
2776 | # (from Security Association) concatenated with 8 byte | ||
2777 | # Initialisation Vector (from IPSec ESP Payload) | ||
2778 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
2779 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
2780 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
2781 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
2782 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
2783 | # Valid values are 16 (most likely), 12 or 8. */ | ||
2784 | ############################################################################### | ||
2785 | ENTRY(aesni_gcm_enc_avx_gen4) | ||
2786 | GCM_ENC_DEC_AVX2 ENC | ||
2787 | ret | ||
2788 | ENDPROC(aesni_gcm_enc_avx_gen4) | ||
2789 | |||
2790 | ############################################################################### | ||
2791 | #void aesni_gcm_dec_avx_gen4( | ||
2792 | # gcm_data *my_ctx_data, /* aligned to 16 Bytes */ | ||
2793 | # u8 *out, /* Plaintext output. Decrypt in-place is allowed. */ | ||
2794 | # const u8 *in, /* Ciphertext input */ | ||
2795 | # u64 plaintext_len, /* Length of data in Bytes for encryption. */ | ||
2796 | # u8 *iv, /* Pre-counter block j0: 4 byte salt | ||
2797 | # (from Security Association) concatenated with 8 byte | ||
2798 | # Initialisation Vector (from IPSec ESP Payload) | ||
2799 | # concatenated with 0x00000001. 16-byte aligned pointer. */ | ||
2800 | # const u8 *aad, /* Additional Authentication Data (AAD)*/ | ||
2801 | # u64 aad_len, /* Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 Bytes */ | ||
2802 | # u8 *auth_tag, /* Authenticated Tag output. */ | ||
2803 | # u64 auth_tag_len)# /* Authenticated Tag Length in bytes. | ||
2804 | # Valid values are 16 (most likely), 12 or 8. */ | ||
2805 | ############################################################################### | ||
2806 | ENTRY(aesni_gcm_dec_avx_gen4) | ||
2807 | GCM_ENC_DEC_AVX2 DEC | ||
2808 | ret | ||
2809 | ENDPROC(aesni_gcm_dec_avx_gen4) | ||
2810 | |||
2811 | #endif /* CONFIG_AS_AVX2 */ | ||
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 835488b745ee..948ad0e77741 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c | |||
@@ -101,6 +101,9 @@ asmlinkage void aesni_cbc_dec(struct crypto_aes_ctx *ctx, u8 *out, | |||
101 | int crypto_fpu_init(void); | 101 | int crypto_fpu_init(void); |
102 | void crypto_fpu_exit(void); | 102 | void crypto_fpu_exit(void); |
103 | 103 | ||
104 | #define AVX_GEN2_OPTSIZE 640 | ||
105 | #define AVX_GEN4_OPTSIZE 4096 | ||
106 | |||
104 | #ifdef CONFIG_X86_64 | 107 | #ifdef CONFIG_X86_64 |
105 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, | 108 | asmlinkage void aesni_ctr_enc(struct crypto_aes_ctx *ctx, u8 *out, |
106 | const u8 *in, unsigned int len, u8 *iv); | 109 | const u8 *in, unsigned int len, u8 *iv); |
@@ -150,6 +153,123 @@ asmlinkage void aesni_gcm_dec(void *ctx, u8 *out, | |||
150 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | 153 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, |
151 | u8 *auth_tag, unsigned long auth_tag_len); | 154 | u8 *auth_tag, unsigned long auth_tag_len); |
152 | 155 | ||
156 | |||
157 | #ifdef CONFIG_AS_AVX | ||
158 | /* | ||
159 | * asmlinkage void aesni_gcm_precomp_avx_gen2() | ||
160 | * gcm_data *my_ctx_data, context data | ||
161 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
162 | */ | ||
163 | asmlinkage void aesni_gcm_precomp_avx_gen2(void *my_ctx_data, u8 *hash_subkey); | ||
164 | |||
165 | asmlinkage void aesni_gcm_enc_avx_gen2(void *ctx, u8 *out, | ||
166 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
167 | const u8 *aad, unsigned long aad_len, | ||
168 | u8 *auth_tag, unsigned long auth_tag_len); | ||
169 | |||
170 | asmlinkage void aesni_gcm_dec_avx_gen2(void *ctx, u8 *out, | ||
171 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
172 | const u8 *aad, unsigned long aad_len, | ||
173 | u8 *auth_tag, unsigned long auth_tag_len); | ||
174 | |||
175 | static void aesni_gcm_enc_avx(void *ctx, u8 *out, | ||
176 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
177 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
178 | u8 *auth_tag, unsigned long auth_tag_len) | ||
179 | { | ||
180 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | ||
181 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | ||
182 | aad_len, auth_tag, auth_tag_len); | ||
183 | } else { | ||
184 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
185 | aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, | ||
186 | aad_len, auth_tag, auth_tag_len); | ||
187 | } | ||
188 | } | ||
189 | |||
190 | static void aesni_gcm_dec_avx(void *ctx, u8 *out, | ||
191 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
192 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
193 | u8 *auth_tag, unsigned long auth_tag_len) | ||
194 | { | ||
195 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | ||
196 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, aad, | ||
197 | aad_len, auth_tag, auth_tag_len); | ||
198 | } else { | ||
199 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
200 | aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, | ||
201 | aad_len, auth_tag, auth_tag_len); | ||
202 | } | ||
203 | } | ||
204 | #endif | ||
205 | |||
206 | #ifdef CONFIG_AS_AVX2 | ||
207 | /* | ||
208 | * asmlinkage void aesni_gcm_precomp_avx_gen4() | ||
209 | * gcm_data *my_ctx_data, context data | ||
210 | * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. | ||
211 | */ | ||
212 | asmlinkage void aesni_gcm_precomp_avx_gen4(void *my_ctx_data, u8 *hash_subkey); | ||
213 | |||
214 | asmlinkage void aesni_gcm_enc_avx_gen4(void *ctx, u8 *out, | ||
215 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
216 | const u8 *aad, unsigned long aad_len, | ||
217 | u8 *auth_tag, unsigned long auth_tag_len); | ||
218 | |||
219 | asmlinkage void aesni_gcm_dec_avx_gen4(void *ctx, u8 *out, | ||
220 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
221 | const u8 *aad, unsigned long aad_len, | ||
222 | u8 *auth_tag, unsigned long auth_tag_len); | ||
223 | |||
224 | static void aesni_gcm_enc_avx2(void *ctx, u8 *out, | ||
225 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
226 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
227 | u8 *auth_tag, unsigned long auth_tag_len) | ||
228 | { | ||
229 | if (plaintext_len < AVX_GEN2_OPTSIZE) { | ||
230 | aesni_gcm_enc(ctx, out, in, plaintext_len, iv, hash_subkey, aad, | ||
231 | aad_len, auth_tag, auth_tag_len); | ||
232 | } else if (plaintext_len < AVX_GEN4_OPTSIZE) { | ||
233 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
234 | aesni_gcm_enc_avx_gen2(ctx, out, in, plaintext_len, iv, aad, | ||
235 | aad_len, auth_tag, auth_tag_len); | ||
236 | } else { | ||
237 | aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); | ||
238 | aesni_gcm_enc_avx_gen4(ctx, out, in, plaintext_len, iv, aad, | ||
239 | aad_len, auth_tag, auth_tag_len); | ||
240 | } | ||
241 | } | ||
242 | |||
243 | static void aesni_gcm_dec_avx2(void *ctx, u8 *out, | ||
244 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
245 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
246 | u8 *auth_tag, unsigned long auth_tag_len) | ||
247 | { | ||
248 | if (ciphertext_len < AVX_GEN2_OPTSIZE) { | ||
249 | aesni_gcm_dec(ctx, out, in, ciphertext_len, iv, hash_subkey, | ||
250 | aad, aad_len, auth_tag, auth_tag_len); | ||
251 | } else if (ciphertext_len < AVX_GEN4_OPTSIZE) { | ||
252 | aesni_gcm_precomp_avx_gen2(ctx, hash_subkey); | ||
253 | aesni_gcm_dec_avx_gen2(ctx, out, in, ciphertext_len, iv, aad, | ||
254 | aad_len, auth_tag, auth_tag_len); | ||
255 | } else { | ||
256 | aesni_gcm_precomp_avx_gen4(ctx, hash_subkey); | ||
257 | aesni_gcm_dec_avx_gen4(ctx, out, in, ciphertext_len, iv, aad, | ||
258 | aad_len, auth_tag, auth_tag_len); | ||
259 | } | ||
260 | } | ||
261 | #endif | ||
262 | |||
263 | static void (*aesni_gcm_enc_tfm)(void *ctx, u8 *out, | ||
264 | const u8 *in, unsigned long plaintext_len, u8 *iv, | ||
265 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
266 | u8 *auth_tag, unsigned long auth_tag_len); | ||
267 | |||
268 | static void (*aesni_gcm_dec_tfm)(void *ctx, u8 *out, | ||
269 | const u8 *in, unsigned long ciphertext_len, u8 *iv, | ||
270 | u8 *hash_subkey, const u8 *aad, unsigned long aad_len, | ||
271 | u8 *auth_tag, unsigned long auth_tag_len); | ||
272 | |||
153 | static inline struct | 273 | static inline struct |
154 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) | 274 | aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) |
155 | { | 275 | { |
@@ -915,7 +1035,7 @@ static int __driver_rfc4106_encrypt(struct aead_request *req) | |||
915 | dst = src; | 1035 | dst = src; |
916 | } | 1036 | } |
917 | 1037 | ||
918 | aesni_gcm_enc(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, | 1038 | aesni_gcm_enc_tfm(aes_ctx, dst, src, (unsigned long)req->cryptlen, iv, |
919 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst | 1039 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, dst |
920 | + ((unsigned long)req->cryptlen), auth_tag_len); | 1040 | + ((unsigned long)req->cryptlen), auth_tag_len); |
921 | 1041 | ||
@@ -996,12 +1116,12 @@ static int __driver_rfc4106_decrypt(struct aead_request *req) | |||
996 | dst = src; | 1116 | dst = src; |
997 | } | 1117 | } |
998 | 1118 | ||
999 | aesni_gcm_dec(aes_ctx, dst, src, tempCipherLen, iv, | 1119 | aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, |
1000 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, | 1120 | ctx->hash_subkey, assoc, (unsigned long)req->assoclen, |
1001 | authTag, auth_tag_len); | 1121 | authTag, auth_tag_len); |
1002 | 1122 | ||
1003 | /* Compare generated tag with passed in tag. */ | 1123 | /* Compare generated tag with passed in tag. */ |
1004 | retval = memcmp(src + tempCipherLen, authTag, auth_tag_len) ? | 1124 | retval = crypto_memneq(src + tempCipherLen, authTag, auth_tag_len) ? |
1005 | -EBADMSG : 0; | 1125 | -EBADMSG : 0; |
1006 | 1126 | ||
1007 | if (one_entry_in_sg) { | 1127 | if (one_entry_in_sg) { |
@@ -1353,6 +1473,27 @@ static int __init aesni_init(void) | |||
1353 | 1473 | ||
1354 | if (!x86_match_cpu(aesni_cpu_id)) | 1474 | if (!x86_match_cpu(aesni_cpu_id)) |
1355 | return -ENODEV; | 1475 | return -ENODEV; |
1476 | #ifdef CONFIG_X86_64 | ||
1477 | #ifdef CONFIG_AS_AVX2 | ||
1478 | if (boot_cpu_has(X86_FEATURE_AVX2)) { | ||
1479 | pr_info("AVX2 version of gcm_enc/dec engaged.\n"); | ||
1480 | aesni_gcm_enc_tfm = aesni_gcm_enc_avx2; | ||
1481 | aesni_gcm_dec_tfm = aesni_gcm_dec_avx2; | ||
1482 | } else | ||
1483 | #endif | ||
1484 | #ifdef CONFIG_AS_AVX | ||
1485 | if (boot_cpu_has(X86_FEATURE_AVX)) { | ||
1486 | pr_info("AVX version of gcm_enc/dec engaged.\n"); | ||
1487 | aesni_gcm_enc_tfm = aesni_gcm_enc_avx; | ||
1488 | aesni_gcm_dec_tfm = aesni_gcm_dec_avx; | ||
1489 | } else | ||
1490 | #endif | ||
1491 | { | ||
1492 | pr_info("SSE version of gcm_enc/dec engaged.\n"); | ||
1493 | aesni_gcm_enc_tfm = aesni_gcm_enc; | ||
1494 | aesni_gcm_dec_tfm = aesni_gcm_dec; | ||
1495 | } | ||
1496 | #endif | ||
1356 | 1497 | ||
1357 | err = crypto_fpu_init(); | 1498 | err = crypto_fpu_init(); |
1358 | if (err) | 1499 | if (err) |
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 50ec333b70e6..8af519ed73d1 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c | |||
@@ -223,9 +223,6 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |||
223 | src -= 1; | 223 | src -= 1; |
224 | dst -= 1; | 224 | dst -= 1; |
225 | } while (nbytes >= bsize * 4); | 225 | } while (nbytes >= bsize * 4); |
226 | |||
227 | if (nbytes < bsize) | ||
228 | goto done; | ||
229 | } | 226 | } |
230 | 227 | ||
231 | /* Handle leftovers */ | 228 | /* Handle leftovers */ |
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index e6a3700489b9..e57e20ab5e0b 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c | |||
@@ -203,9 +203,6 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, | |||
203 | src -= 1; | 203 | src -= 1; |
204 | dst -= 1; | 204 | dst -= 1; |
205 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); | 205 | } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); |
206 | |||
207 | if (nbytes < bsize) | ||
208 | goto done; | ||
209 | } | 206 | } |
210 | 207 | ||
211 | /* Handle leftovers */ | 208 | /* Handle leftovers */ |
diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 586f41aac361..185fad49d86f 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S | |||
@@ -24,10 +24,6 @@ | |||
24 | .align 16 | 24 | .align 16 |
25 | .Lbswap_mask: | 25 | .Lbswap_mask: |
26 | .octa 0x000102030405060708090a0b0c0d0e0f | 26 | .octa 0x000102030405060708090a0b0c0d0e0f |
27 | .Lpoly: | ||
28 | .octa 0xc2000000000000000000000000000001 | ||
29 | .Ltwo_one: | ||
30 | .octa 0x00000001000000000000000000000001 | ||
31 | 27 | ||
32 | #define DATA %xmm0 | 28 | #define DATA %xmm0 |
33 | #define SHASH %xmm1 | 29 | #define SHASH %xmm1 |
@@ -134,28 +130,3 @@ ENTRY(clmul_ghash_update) | |||
134 | .Lupdate_just_ret: | 130 | .Lupdate_just_ret: |
135 | ret | 131 | ret |
136 | ENDPROC(clmul_ghash_update) | 132 | ENDPROC(clmul_ghash_update) |
137 | |||
138 | /* | ||
139 | * void clmul_ghash_setkey(be128 *shash, const u8 *key); | ||
140 | * | ||
141 | * Calculate hash_key << 1 mod poly | ||
142 | */ | ||
143 | ENTRY(clmul_ghash_setkey) | ||
144 | movaps .Lbswap_mask, BSWAP | ||
145 | movups (%rsi), %xmm0 | ||
146 | PSHUFB_XMM BSWAP %xmm0 | ||
147 | movaps %xmm0, %xmm1 | ||
148 | psllq $1, %xmm0 | ||
149 | psrlq $63, %xmm1 | ||
150 | movaps %xmm1, %xmm2 | ||
151 | pslldq $8, %xmm1 | ||
152 | psrldq $8, %xmm2 | ||
153 | por %xmm1, %xmm0 | ||
154 | # reduction | ||
155 | pshufd $0b00100100, %xmm2, %xmm1 | ||
156 | pcmpeqd .Ltwo_one, %xmm1 | ||
157 | pand .Lpoly, %xmm1 | ||
158 | pxor %xmm1, %xmm0 | ||
159 | movups %xmm0, (%rdi) | ||
160 | ret | ||
161 | ENDPROC(clmul_ghash_setkey) | ||
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 6759dd1135be..d785cf2c529c 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c | |||
@@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash); | |||
30 | void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, | 30 | void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, |
31 | const be128 *shash); | 31 | const be128 *shash); |
32 | 32 | ||
33 | void clmul_ghash_setkey(be128 *shash, const u8 *key); | ||
34 | |||
35 | struct ghash_async_ctx { | 33 | struct ghash_async_ctx { |
36 | struct cryptd_ahash *cryptd_tfm; | 34 | struct cryptd_ahash *cryptd_tfm; |
37 | }; | 35 | }; |
@@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm, | |||
58 | const u8 *key, unsigned int keylen) | 56 | const u8 *key, unsigned int keylen) |
59 | { | 57 | { |
60 | struct ghash_ctx *ctx = crypto_shash_ctx(tfm); | 58 | struct ghash_ctx *ctx = crypto_shash_ctx(tfm); |
59 | be128 *x = (be128 *)key; | ||
60 | u64 a, b; | ||
61 | 61 | ||
62 | if (keylen != GHASH_BLOCK_SIZE) { | 62 | if (keylen != GHASH_BLOCK_SIZE) { |
63 | crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); | 63 | crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); |
64 | return -EINVAL; | 64 | return -EINVAL; |
65 | } | 65 | } |
66 | 66 | ||
67 | clmul_ghash_setkey(&ctx->shash, key); | 67 | /* perform multiplication by 'x' in GF(2^128) */ |
68 | a = be64_to_cpu(x->a); | ||
69 | b = be64_to_cpu(x->b); | ||
70 | |||
71 | ctx->shash.a = (__be64)((b << 1) | (a >> 63)); | ||
72 | ctx->shash.b = (__be64)((a << 1) | (b >> 63)); | ||
73 | |||
74 | if (a >> 63) | ||
75 | ctx->shash.b ^= cpu_to_be64(0xc2); | ||
68 | 76 | ||
69 | return 0; | 77 | return 0; |
70 | } | 78 | } |
diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S new file mode 100644 index 000000000000..1cd792db15ef --- /dev/null +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S | |||
@@ -0,0 +1,708 @@ | |||
1 | /* | ||
2 | * Implement fast SHA-1 with AVX2 instructions. (x86_64) | ||
3 | * | ||
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | ||
5 | * redistributing this file, you may do so under either license. | ||
6 | * | ||
7 | * GPL LICENSE SUMMARY | ||
8 | * | ||
9 | * Copyright(c) 2014 Intel Corporation. | ||
10 | * | ||
11 | * This program is free software; you can redistribute it and/or modify | ||
12 | * it under the terms of version 2 of the GNU General Public License as | ||
13 | * published by the Free Software Foundation. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
18 | * General Public License for more details. | ||
19 | * | ||
20 | * Contact Information: | ||
21 | * Ilya Albrekht <ilya.albrekht@intel.com> | ||
22 | * Maxim Locktyukhin <maxim.locktyukhin@intel.com> | ||
23 | * Ronen Zohar <ronen.zohar@intel.com> | ||
24 | * Chandramouli Narayanan <mouli@linux.intel.com> | ||
25 | * | ||
26 | * BSD LICENSE | ||
27 | * | ||
28 | * Copyright(c) 2014 Intel Corporation. | ||
29 | * | ||
30 | * Redistribution and use in source and binary forms, with or without | ||
31 | * modification, are permitted provided that the following conditions | ||
32 | * are met: | ||
33 | * | ||
34 | * Redistributions of source code must retain the above copyright | ||
35 | * notice, this list of conditions and the following disclaimer. | ||
36 | * Redistributions in binary form must reproduce the above copyright | ||
37 | * notice, this list of conditions and the following disclaimer in | ||
38 | * the documentation and/or other materials provided with the | ||
39 | * distribution. | ||
40 | * Neither the name of Intel Corporation nor the names of its | ||
41 | * contributors may be used to endorse or promote products derived | ||
42 | * from this software without specific prior written permission. | ||
43 | * | ||
44 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
45 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
46 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
47 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
48 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
49 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
50 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
51 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
52 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
53 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
54 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
55 | * | ||
56 | */ | ||
57 | |||
58 | /* | ||
59 | * SHA-1 implementation with Intel(R) AVX2 instruction set extensions. | ||
60 | * | ||
61 | *This implementation is based on the previous SSSE3 release: | ||
62 | *Visit http://software.intel.com/en-us/articles/ | ||
63 | *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/ | ||
64 | * | ||
65 | *Updates 20-byte SHA-1 record in 'hash' for even number of | ||
66 | *'num_blocks' consecutive 64-byte blocks | ||
67 | * | ||
68 | *extern "C" void sha1_transform_avx2( | ||
69 | * int *hash, const char* input, size_t num_blocks ); | ||
70 | */ | ||
71 | |||
72 | #include <linux/linkage.h> | ||
73 | |||
74 | #define CTX %rdi /* arg1 */ | ||
75 | #define BUF %rsi /* arg2 */ | ||
76 | #define CNT %rdx /* arg3 */ | ||
77 | |||
78 | #define REG_A %ecx | ||
79 | #define REG_B %esi | ||
80 | #define REG_C %edi | ||
81 | #define REG_D %eax | ||
82 | #define REG_E %edx | ||
83 | #define REG_TB %ebx | ||
84 | #define REG_TA %r12d | ||
85 | #define REG_RA %rcx | ||
86 | #define REG_RB %rsi | ||
87 | #define REG_RC %rdi | ||
88 | #define REG_RD %rax | ||
89 | #define REG_RE %rdx | ||
90 | #define REG_RTA %r12 | ||
91 | #define REG_RTB %rbx | ||
92 | #define REG_T1 %ebp | ||
93 | #define xmm_mov vmovups | ||
94 | #define avx2_zeroupper vzeroupper | ||
95 | #define RND_F1 1 | ||
96 | #define RND_F2 2 | ||
97 | #define RND_F3 3 | ||
98 | |||
99 | .macro REGALLOC | ||
100 | .set A, REG_A | ||
101 | .set B, REG_B | ||
102 | .set C, REG_C | ||
103 | .set D, REG_D | ||
104 | .set E, REG_E | ||
105 | .set TB, REG_TB | ||
106 | .set TA, REG_TA | ||
107 | |||
108 | .set RA, REG_RA | ||
109 | .set RB, REG_RB | ||
110 | .set RC, REG_RC | ||
111 | .set RD, REG_RD | ||
112 | .set RE, REG_RE | ||
113 | |||
114 | .set RTA, REG_RTA | ||
115 | .set RTB, REG_RTB | ||
116 | |||
117 | .set T1, REG_T1 | ||
118 | .endm | ||
119 | |||
120 | #define K_BASE %r8 | ||
121 | #define HASH_PTR %r9 | ||
122 | #define BUFFER_PTR %r10 | ||
123 | #define BUFFER_PTR2 %r13 | ||
124 | #define BUFFER_END %r11 | ||
125 | |||
126 | #define PRECALC_BUF %r14 | ||
127 | #define WK_BUF %r15 | ||
128 | |||
129 | #define W_TMP %xmm0 | ||
130 | #define WY_TMP %ymm0 | ||
131 | #define WY_TMP2 %ymm9 | ||
132 | |||
133 | # AVX2 variables | ||
134 | #define WY0 %ymm3 | ||
135 | #define WY4 %ymm5 | ||
136 | #define WY08 %ymm7 | ||
137 | #define WY12 %ymm8 | ||
138 | #define WY16 %ymm12 | ||
139 | #define WY20 %ymm13 | ||
140 | #define WY24 %ymm14 | ||
141 | #define WY28 %ymm15 | ||
142 | |||
143 | #define YMM_SHUFB_BSWAP %ymm10 | ||
144 | |||
145 | /* | ||
146 | * Keep 2 iterations precalculated at a time: | ||
147 | * - 80 DWORDs per iteration * 2 | ||
148 | */ | ||
149 | #define W_SIZE (80*2*2 +16) | ||
150 | |||
151 | #define WK(t) ((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF) | ||
152 | #define PRECALC_WK(t) ((t)*2*2)(PRECALC_BUF) | ||
153 | |||
154 | |||
155 | .macro UPDATE_HASH hash, val | ||
156 | add \hash, \val | ||
157 | mov \val, \hash | ||
158 | .endm | ||
159 | |||
160 | .macro PRECALC_RESET_WY | ||
161 | .set WY_00, WY0 | ||
162 | .set WY_04, WY4 | ||
163 | .set WY_08, WY08 | ||
164 | .set WY_12, WY12 | ||
165 | .set WY_16, WY16 | ||
166 | .set WY_20, WY20 | ||
167 | .set WY_24, WY24 | ||
168 | .set WY_28, WY28 | ||
169 | .set WY_32, WY_00 | ||
170 | .endm | ||
171 | |||
172 | .macro PRECALC_ROTATE_WY | ||
173 | /* Rotate macros */ | ||
174 | .set WY_32, WY_28 | ||
175 | .set WY_28, WY_24 | ||
176 | .set WY_24, WY_20 | ||
177 | .set WY_20, WY_16 | ||
178 | .set WY_16, WY_12 | ||
179 | .set WY_12, WY_08 | ||
180 | .set WY_08, WY_04 | ||
181 | .set WY_04, WY_00 | ||
182 | .set WY_00, WY_32 | ||
183 | |||
184 | /* Define register aliases */ | ||
185 | .set WY, WY_00 | ||
186 | .set WY_minus_04, WY_04 | ||
187 | .set WY_minus_08, WY_08 | ||
188 | .set WY_minus_12, WY_12 | ||
189 | .set WY_minus_16, WY_16 | ||
190 | .set WY_minus_20, WY_20 | ||
191 | .set WY_minus_24, WY_24 | ||
192 | .set WY_minus_28, WY_28 | ||
193 | .set WY_minus_32, WY | ||
194 | .endm | ||
195 | |||
196 | .macro PRECALC_00_15 | ||
197 | .if (i == 0) # Initialize and rotate registers | ||
198 | PRECALC_RESET_WY | ||
199 | PRECALC_ROTATE_WY | ||
200 | .endif | ||
201 | |||
202 | /* message scheduling pre-compute for rounds 0-15 */ | ||
203 | .if ((i & 7) == 0) | ||
204 | /* | ||
205 | * blended AVX2 and ALU instruction scheduling | ||
206 | * 1 vector iteration per 8 rounds | ||
207 | */ | ||
208 | vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP | ||
209 | .elseif ((i & 7) == 1) | ||
210 | vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\ | ||
211 | WY_TMP, WY_TMP | ||
212 | .elseif ((i & 7) == 2) | ||
213 | vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY | ||
214 | .elseif ((i & 7) == 4) | ||
215 | vpaddd K_XMM(K_BASE), WY, WY_TMP | ||
216 | .elseif ((i & 7) == 7) | ||
217 | vmovdqu WY_TMP, PRECALC_WK(i&~7) | ||
218 | |||
219 | PRECALC_ROTATE_WY | ||
220 | .endif | ||
221 | .endm | ||
222 | |||
223 | .macro PRECALC_16_31 | ||
224 | /* | ||
225 | * message scheduling pre-compute for rounds 16-31 | ||
226 | * calculating last 32 w[i] values in 8 XMM registers | ||
227 | * pre-calculate K+w[i] values and store to mem | ||
228 | * for later load by ALU add instruction | ||
229 | * | ||
230 | * "brute force" vectorization for rounds 16-31 only | ||
231 | * due to w[i]->w[i-3] dependency | ||
232 | */ | ||
233 | .if ((i & 7) == 0) | ||
234 | /* | ||
235 | * blended AVX2 and ALU instruction scheduling | ||
236 | * 1 vector iteration per 8 rounds | ||
237 | */ | ||
238 | /* w[i-14] */ | ||
239 | vpalignr $8, WY_minus_16, WY_minus_12, WY | ||
240 | vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */ | ||
241 | .elseif ((i & 7) == 1) | ||
242 | vpxor WY_minus_08, WY, WY | ||
243 | vpxor WY_minus_16, WY_TMP, WY_TMP | ||
244 | .elseif ((i & 7) == 2) | ||
245 | vpxor WY_TMP, WY, WY | ||
246 | vpslldq $12, WY, WY_TMP2 | ||
247 | .elseif ((i & 7) == 3) | ||
248 | vpslld $1, WY, WY_TMP | ||
249 | vpsrld $31, WY, WY | ||
250 | .elseif ((i & 7) == 4) | ||
251 | vpor WY, WY_TMP, WY_TMP | ||
252 | vpslld $2, WY_TMP2, WY | ||
253 | .elseif ((i & 7) == 5) | ||
254 | vpsrld $30, WY_TMP2, WY_TMP2 | ||
255 | vpxor WY, WY_TMP, WY_TMP | ||
256 | .elseif ((i & 7) == 7) | ||
257 | vpxor WY_TMP2, WY_TMP, WY | ||
258 | vpaddd K_XMM(K_BASE), WY, WY_TMP | ||
259 | vmovdqu WY_TMP, PRECALC_WK(i&~7) | ||
260 | |||
261 | PRECALC_ROTATE_WY | ||
262 | .endif | ||
263 | .endm | ||
264 | |||
265 | .macro PRECALC_32_79 | ||
266 | /* | ||
267 | * in SHA-1 specification: | ||
268 | * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1 | ||
269 | * instead we do equal: | ||
270 | * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2 | ||
271 | * allows more efficient vectorization | ||
272 | * since w[i]=>w[i-3] dependency is broken | ||
273 | */ | ||
274 | |||
275 | .if ((i & 7) == 0) | ||
276 | /* | ||
277 | * blended AVX2 and ALU instruction scheduling | ||
278 | * 1 vector iteration per 8 rounds | ||
279 | */ | ||
280 | vpalignr $8, WY_minus_08, WY_minus_04, WY_TMP | ||
281 | .elseif ((i & 7) == 1) | ||
282 | /* W is W_minus_32 before xor */ | ||
283 | vpxor WY_minus_28, WY, WY | ||
284 | .elseif ((i & 7) == 2) | ||
285 | vpxor WY_minus_16, WY_TMP, WY_TMP | ||
286 | .elseif ((i & 7) == 3) | ||
287 | vpxor WY_TMP, WY, WY | ||
288 | .elseif ((i & 7) == 4) | ||
289 | vpslld $2, WY, WY_TMP | ||
290 | .elseif ((i & 7) == 5) | ||
291 | vpsrld $30, WY, WY | ||
292 | vpor WY, WY_TMP, WY | ||
293 | .elseif ((i & 7) == 7) | ||
294 | vpaddd K_XMM(K_BASE), WY, WY_TMP | ||
295 | vmovdqu WY_TMP, PRECALC_WK(i&~7) | ||
296 | |||
297 | PRECALC_ROTATE_WY | ||
298 | .endif | ||
299 | .endm | ||
300 | |||
301 | .macro PRECALC r, s | ||
302 | .set i, \r | ||
303 | |||
304 | .if (i < 40) | ||
305 | .set K_XMM, 32*0 | ||
306 | .elseif (i < 80) | ||
307 | .set K_XMM, 32*1 | ||
308 | .elseif (i < 120) | ||
309 | .set K_XMM, 32*2 | ||
310 | .else | ||
311 | .set K_XMM, 32*3 | ||
312 | .endif | ||
313 | |||
314 | .if (i<32) | ||
315 | PRECALC_00_15 \s | ||
316 | .elseif (i<64) | ||
317 | PRECALC_16_31 \s | ||
318 | .elseif (i < 160) | ||
319 | PRECALC_32_79 \s | ||
320 | .endif | ||
321 | .endm | ||
322 | |||
323 | .macro ROTATE_STATE | ||
324 | .set T_REG, E | ||
325 | .set E, D | ||
326 | .set D, C | ||
327 | .set C, B | ||
328 | .set B, TB | ||
329 | .set TB, A | ||
330 | .set A, T_REG | ||
331 | |||
332 | .set T_REG, RE | ||
333 | .set RE, RD | ||
334 | .set RD, RC | ||
335 | .set RC, RB | ||
336 | .set RB, RTB | ||
337 | .set RTB, RA | ||
338 | .set RA, T_REG | ||
339 | .endm | ||
340 | |||
341 | /* Macro relies on saved ROUND_Fx */ | ||
342 | |||
343 | .macro RND_FUN f, r | ||
344 | .if (\f == RND_F1) | ||
345 | ROUND_F1 \r | ||
346 | .elseif (\f == RND_F2) | ||
347 | ROUND_F2 \r | ||
348 | .elseif (\f == RND_F3) | ||
349 | ROUND_F3 \r | ||
350 | .endif | ||
351 | .endm | ||
352 | |||
353 | .macro RR r | ||
354 | .set round_id, (\r % 80) | ||
355 | |||
356 | .if (round_id == 0) /* Precalculate F for first round */ | ||
357 | .set ROUND_FUNC, RND_F1 | ||
358 | mov B, TB | ||
359 | |||
360 | rorx $(32-30), B, B /* b>>>2 */ | ||
361 | andn D, TB, T1 | ||
362 | and C, TB | ||
363 | xor T1, TB | ||
364 | .endif | ||
365 | |||
366 | RND_FUN ROUND_FUNC, \r | ||
367 | ROTATE_STATE | ||
368 | |||
369 | .if (round_id == 18) | ||
370 | .set ROUND_FUNC, RND_F2 | ||
371 | .elseif (round_id == 38) | ||
372 | .set ROUND_FUNC, RND_F3 | ||
373 | .elseif (round_id == 58) | ||
374 | .set ROUND_FUNC, RND_F2 | ||
375 | .endif | ||
376 | |||
377 | .set round_id, ( (\r+1) % 80) | ||
378 | |||
379 | RND_FUN ROUND_FUNC, (\r+1) | ||
380 | ROTATE_STATE | ||
381 | .endm | ||
382 | |||
383 | .macro ROUND_F1 r | ||
384 | add WK(\r), E | ||
385 | |||
386 | andn C, A, T1 /* ~b&d */ | ||
387 | lea (RE,RTB), E /* Add F from the previous round */ | ||
388 | |||
389 | rorx $(32-5), A, TA /* T2 = A >>> 5 */ | ||
390 | rorx $(32-30),A, TB /* b>>>2 for next round */ | ||
391 | |||
392 | PRECALC (\r) /* msg scheduling for next 2 blocks */ | ||
393 | |||
394 | /* | ||
395 | * Calculate F for the next round | ||
396 | * (b & c) ^ andn[b, d] | ||
397 | */ | ||
398 | and B, A /* b&c */ | ||
399 | xor T1, A /* F1 = (b&c) ^ (~b&d) */ | ||
400 | |||
401 | lea (RE,RTA), E /* E += A >>> 5 */ | ||
402 | .endm | ||
403 | |||
404 | .macro ROUND_F2 r | ||
405 | add WK(\r), E | ||
406 | lea (RE,RTB), E /* Add F from the previous round */ | ||
407 | |||
408 | /* Calculate F for the next round */ | ||
409 | rorx $(32-5), A, TA /* T2 = A >>> 5 */ | ||
410 | .if ((round_id) < 79) | ||
411 | rorx $(32-30), A, TB /* b>>>2 for next round */ | ||
412 | .endif | ||
413 | PRECALC (\r) /* msg scheduling for next 2 blocks */ | ||
414 | |||
415 | .if ((round_id) < 79) | ||
416 | xor B, A | ||
417 | .endif | ||
418 | |||
419 | add TA, E /* E += A >>> 5 */ | ||
420 | |||
421 | .if ((round_id) < 79) | ||
422 | xor C, A | ||
423 | .endif | ||
424 | .endm | ||
425 | |||
426 | .macro ROUND_F3 r | ||
427 | add WK(\r), E | ||
428 | PRECALC (\r) /* msg scheduling for next 2 blocks */ | ||
429 | |||
430 | lea (RE,RTB), E /* Add F from the previous round */ | ||
431 | |||
432 | mov B, T1 | ||
433 | or A, T1 | ||
434 | |||
435 | rorx $(32-5), A, TA /* T2 = A >>> 5 */ | ||
436 | rorx $(32-30), A, TB /* b>>>2 for next round */ | ||
437 | |||
438 | /* Calculate F for the next round | ||
439 | * (b and c) or (d and (b or c)) | ||
440 | */ | ||
441 | and C, T1 | ||
442 | and B, A | ||
443 | or T1, A | ||
444 | |||
445 | add TA, E /* E += A >>> 5 */ | ||
446 | |||
447 | .endm | ||
448 | |||
449 | /* | ||
450 | * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining | ||
451 | */ | ||
452 | .macro SHA1_PIPELINED_MAIN_BODY | ||
453 | |||
454 | REGALLOC | ||
455 | |||
456 | mov (HASH_PTR), A | ||
457 | mov 4(HASH_PTR), B | ||
458 | mov 8(HASH_PTR), C | ||
459 | mov 12(HASH_PTR), D | ||
460 | mov 16(HASH_PTR), E | ||
461 | |||
462 | mov %rsp, PRECALC_BUF | ||
463 | lea (2*4*80+32)(%rsp), WK_BUF | ||
464 | |||
465 | # Precalc WK for first 2 blocks | ||
466 | PRECALC_OFFSET = 0 | ||
467 | .set i, 0 | ||
468 | .rept 160 | ||
469 | PRECALC i | ||
470 | .set i, i + 1 | ||
471 | .endr | ||
472 | PRECALC_OFFSET = 128 | ||
473 | xchg WK_BUF, PRECALC_BUF | ||
474 | |||
475 | .align 32 | ||
476 | _loop: | ||
477 | /* | ||
478 | * code loops through more than one block | ||
479 | * we use K_BASE value as a signal of a last block, | ||
480 | * it is set below by: cmovae BUFFER_PTR, K_BASE | ||
481 | */ | ||
482 | cmp K_BASE, BUFFER_PTR | ||
483 | jne _begin | ||
484 | .align 32 | ||
485 | jmp _end | ||
486 | .align 32 | ||
487 | _begin: | ||
488 | |||
489 | /* | ||
490 | * Do first block | ||
491 | * rounds: 0,2,4,6,8 | ||
492 | */ | ||
493 | .set j, 0 | ||
494 | .rept 5 | ||
495 | RR j | ||
496 | .set j, j+2 | ||
497 | .endr | ||
498 | |||
499 | jmp _loop0 | ||
500 | _loop0: | ||
501 | |||
502 | /* | ||
503 | * rounds: | ||
504 | * 10,12,14,16,18 | ||
505 | * 20,22,24,26,28 | ||
506 | * 30,32,34,36,38 | ||
507 | * 40,42,44,46,48 | ||
508 | * 50,52,54,56,58 | ||
509 | */ | ||
510 | .rept 25 | ||
511 | RR j | ||
512 | .set j, j+2 | ||
513 | .endr | ||
514 | |||
515 | add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */ | ||
516 | cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */ | ||
517 | cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ | ||
518 | |||
519 | /* | ||
520 | * rounds | ||
521 | * 60,62,64,66,68 | ||
522 | * 70,72,74,76,78 | ||
523 | */ | ||
524 | .rept 10 | ||
525 | RR j | ||
526 | .set j, j+2 | ||
527 | .endr | ||
528 | |||
529 | UPDATE_HASH (HASH_PTR), A | ||
530 | UPDATE_HASH 4(HASH_PTR), TB | ||
531 | UPDATE_HASH 8(HASH_PTR), C | ||
532 | UPDATE_HASH 12(HASH_PTR), D | ||
533 | UPDATE_HASH 16(HASH_PTR), E | ||
534 | |||
535 | cmp K_BASE, BUFFER_PTR /* is current block the last one? */ | ||
536 | je _loop | ||
537 | |||
538 | mov TB, B | ||
539 | |||
540 | /* Process second block */ | ||
541 | /* | ||
542 | * rounds | ||
543 | * 0+80, 2+80, 4+80, 6+80, 8+80 | ||
544 | * 10+80,12+80,14+80,16+80,18+80 | ||
545 | */ | ||
546 | |||
547 | .set j, 0 | ||
548 | .rept 10 | ||
549 | RR j+80 | ||
550 | .set j, j+2 | ||
551 | .endr | ||
552 | |||
553 | jmp _loop1 | ||
554 | _loop1: | ||
555 | /* | ||
556 | * rounds | ||
557 | * 20+80,22+80,24+80,26+80,28+80 | ||
558 | * 30+80,32+80,34+80,36+80,38+80 | ||
559 | */ | ||
560 | .rept 10 | ||
561 | RR j+80 | ||
562 | .set j, j+2 | ||
563 | .endr | ||
564 | |||
565 | jmp _loop2 | ||
566 | _loop2: | ||
567 | |||
568 | /* | ||
569 | * rounds | ||
570 | * 40+80,42+80,44+80,46+80,48+80 | ||
571 | * 50+80,52+80,54+80,56+80,58+80 | ||
572 | */ | ||
573 | .rept 10 | ||
574 | RR j+80 | ||
575 | .set j, j+2 | ||
576 | .endr | ||
577 | |||
578 | add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */ | ||
579 | |||
580 | cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */ | ||
581 | cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ | ||
582 | |||
583 | jmp _loop3 | ||
584 | _loop3: | ||
585 | |||
586 | /* | ||
587 | * rounds | ||
588 | * 60+80,62+80,64+80,66+80,68+80 | ||
589 | * 70+80,72+80,74+80,76+80,78+80 | ||
590 | */ | ||
591 | .rept 10 | ||
592 | RR j+80 | ||
593 | .set j, j+2 | ||
594 | .endr | ||
595 | |||
596 | UPDATE_HASH (HASH_PTR), A | ||
597 | UPDATE_HASH 4(HASH_PTR), TB | ||
598 | UPDATE_HASH 8(HASH_PTR), C | ||
599 | UPDATE_HASH 12(HASH_PTR), D | ||
600 | UPDATE_HASH 16(HASH_PTR), E | ||
601 | |||
602 | /* Reset state for AVX2 reg permutation */ | ||
603 | mov A, TA | ||
604 | mov TB, A | ||
605 | mov C, TB | ||
606 | mov E, C | ||
607 | mov D, B | ||
608 | mov TA, D | ||
609 | |||
610 | REGALLOC | ||
611 | |||
612 | xchg WK_BUF, PRECALC_BUF | ||
613 | |||
614 | jmp _loop | ||
615 | |||
616 | .align 32 | ||
617 | _end: | ||
618 | |||
619 | .endm | ||
620 | /* | ||
621 | * macro implements SHA-1 function's body for several 64-byte blocks | ||
622 | * param: function's name | ||
623 | */ | ||
624 | .macro SHA1_VECTOR_ASM name | ||
625 | ENTRY(\name) | ||
626 | |||
627 | push %rbx | ||
628 | push %rbp | ||
629 | push %r12 | ||
630 | push %r13 | ||
631 | push %r14 | ||
632 | push %r15 | ||
633 | |||
634 | RESERVE_STACK = (W_SIZE*4 + 8+24) | ||
635 | |||
636 | /* Align stack */ | ||
637 | mov %rsp, %rbx | ||
638 | and $~(0x20-1), %rsp | ||
639 | push %rbx | ||
640 | sub $RESERVE_STACK, %rsp | ||
641 | |||
642 | avx2_zeroupper | ||
643 | |||
644 | lea K_XMM_AR(%rip), K_BASE | ||
645 | |||
646 | mov CTX, HASH_PTR | ||
647 | mov BUF, BUFFER_PTR | ||
648 | lea 64(BUF), BUFFER_PTR2 | ||
649 | |||
650 | shl $6, CNT /* mul by 64 */ | ||
651 | add BUF, CNT | ||
652 | add $64, CNT | ||
653 | mov CNT, BUFFER_END | ||
654 | |||
655 | cmp BUFFER_END, BUFFER_PTR2 | ||
656 | cmovae K_BASE, BUFFER_PTR2 | ||
657 | |||
658 | xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP | ||
659 | |||
660 | SHA1_PIPELINED_MAIN_BODY | ||
661 | |||
662 | avx2_zeroupper | ||
663 | |||
664 | add $RESERVE_STACK, %rsp | ||
665 | pop %rsp | ||
666 | |||
667 | pop %r15 | ||
668 | pop %r14 | ||
669 | pop %r13 | ||
670 | pop %r12 | ||
671 | pop %rbp | ||
672 | pop %rbx | ||
673 | |||
674 | ret | ||
675 | |||
676 | ENDPROC(\name) | ||
677 | .endm | ||
678 | |||
679 | .section .rodata | ||
680 | |||
681 | #define K1 0x5a827999 | ||
682 | #define K2 0x6ed9eba1 | ||
683 | #define K3 0x8f1bbcdc | ||
684 | #define K4 0xca62c1d6 | ||
685 | |||
686 | .align 128 | ||
687 | K_XMM_AR: | ||
688 | .long K1, K1, K1, K1 | ||
689 | .long K1, K1, K1, K1 | ||
690 | .long K2, K2, K2, K2 | ||
691 | .long K2, K2, K2, K2 | ||
692 | .long K3, K3, K3, K3 | ||
693 | .long K3, K3, K3, K3 | ||
694 | .long K4, K4, K4, K4 | ||
695 | .long K4, K4, K4, K4 | ||
696 | |||
697 | BSWAP_SHUFB_CTL: | ||
698 | .long 0x00010203 | ||
699 | .long 0x04050607 | ||
700 | .long 0x08090a0b | ||
701 | .long 0x0c0d0e0f | ||
702 | .long 0x00010203 | ||
703 | .long 0x04050607 | ||
704 | .long 0x08090a0b | ||
705 | .long 0x0c0d0e0f | ||
706 | .text | ||
707 | |||
708 | SHA1_VECTOR_ASM sha1_transform_avx2 | ||
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 4a11a9d72451..74d16ef707c7 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> | 10 | * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk> |
11 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> | 11 | * Copyright (c) Jean-Francois Dive <jef@linuxbe.org> |
12 | * Copyright (c) Mathias Krause <minipli@googlemail.com> | 12 | * Copyright (c) Mathias Krause <minipli@googlemail.com> |
13 | * Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com> | ||
13 | * | 14 | * |
14 | * This program is free software; you can redistribute it and/or modify it | 15 | * This program is free software; you can redistribute it and/or modify it |
15 | * under the terms of the GNU General Public License as published by the Free | 16 | * under the terms of the GNU General Public License as published by the Free |
@@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, | |||
39 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, | 40 | asmlinkage void sha1_transform_avx(u32 *digest, const char *data, |
40 | unsigned int rounds); | 41 | unsigned int rounds); |
41 | #endif | 42 | #endif |
43 | #ifdef CONFIG_AS_AVX2 | ||
44 | #define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ | ||
45 | |||
46 | asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, | ||
47 | unsigned int rounds); | ||
48 | #endif | ||
42 | 49 | ||
43 | static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); | 50 | static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); |
44 | 51 | ||
@@ -165,6 +172,18 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in) | |||
165 | return 0; | 172 | return 0; |
166 | } | 173 | } |
167 | 174 | ||
175 | #ifdef CONFIG_AS_AVX2 | ||
176 | static void sha1_apply_transform_avx2(u32 *digest, const char *data, | ||
177 | unsigned int rounds) | ||
178 | { | ||
179 | /* Select the optimal transform based on data block size */ | ||
180 | if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) | ||
181 | sha1_transform_avx2(digest, data, rounds); | ||
182 | else | ||
183 | sha1_transform_avx(digest, data, rounds); | ||
184 | } | ||
185 | #endif | ||
186 | |||
168 | static struct shash_alg alg = { | 187 | static struct shash_alg alg = { |
169 | .digestsize = SHA1_DIGEST_SIZE, | 188 | .digestsize = SHA1_DIGEST_SIZE, |
170 | .init = sha1_ssse3_init, | 189 | .init = sha1_ssse3_init, |
@@ -201,27 +220,49 @@ static bool __init avx_usable(void) | |||
201 | 220 | ||
202 | return true; | 221 | return true; |
203 | } | 222 | } |
223 | |||
224 | #ifdef CONFIG_AS_AVX2 | ||
225 | static bool __init avx2_usable(void) | ||
226 | { | ||
227 | if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) && | ||
228 | boot_cpu_has(X86_FEATURE_BMI2)) | ||
229 | return true; | ||
230 | |||
231 | return false; | ||
232 | } | ||
233 | #endif | ||
204 | #endif | 234 | #endif |
205 | 235 | ||
206 | static int __init sha1_ssse3_mod_init(void) | 236 | static int __init sha1_ssse3_mod_init(void) |
207 | { | 237 | { |
238 | char *algo_name; | ||
239 | |||
208 | /* test for SSSE3 first */ | 240 | /* test for SSSE3 first */ |
209 | if (cpu_has_ssse3) | 241 | if (cpu_has_ssse3) { |
210 | sha1_transform_asm = sha1_transform_ssse3; | 242 | sha1_transform_asm = sha1_transform_ssse3; |
243 | algo_name = "SSSE3"; | ||
244 | } | ||
211 | 245 | ||
212 | #ifdef CONFIG_AS_AVX | 246 | #ifdef CONFIG_AS_AVX |
213 | /* allow AVX to override SSSE3, it's a little faster */ | 247 | /* allow AVX to override SSSE3, it's a little faster */ |
214 | if (avx_usable()) | 248 | if (avx_usable()) { |
215 | sha1_transform_asm = sha1_transform_avx; | 249 | sha1_transform_asm = sha1_transform_avx; |
250 | algo_name = "AVX"; | ||
251 | #ifdef CONFIG_AS_AVX2 | ||
252 | /* allow AVX2 to override AVX, it's a little faster */ | ||
253 | if (avx2_usable()) { | ||
254 | sha1_transform_asm = sha1_apply_transform_avx2; | ||
255 | algo_name = "AVX2"; | ||
256 | } | ||
257 | #endif | ||
258 | } | ||
216 | #endif | 259 | #endif |
217 | 260 | ||
218 | if (sha1_transform_asm) { | 261 | if (sha1_transform_asm) { |
219 | pr_info("Using %s optimized SHA-1 implementation\n", | 262 | pr_info("Using %s optimized SHA-1 implementation\n", algo_name); |
220 | sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3" | ||
221 | : "AVX"); | ||
222 | return crypto_register_shash(&alg); | 263 | return crypto_register_shash(&alg); |
223 | } | 264 | } |
224 | pr_info("Neither AVX nor SSSE3 is available/usable.\n"); | 265 | pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n"); |
225 | 266 | ||
226 | return -ENODEV; | 267 | return -ENODEV; |
227 | } | 268 | } |
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 7f669853317a..3ca9762e1649 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild | |||
@@ -5,3 +5,6 @@ genhdr-y += unistd_64.h | |||
5 | genhdr-y += unistd_x32.h | 5 | genhdr-y += unistd_x32.h |
6 | 6 | ||
7 | generic-y += clkdev.h | 7 | generic-y += clkdev.h |
8 | generic-y += early_ioremap.h | ||
9 | generic-y += cputime.h | ||
10 | generic-y += mcs_spinlock.h | ||
diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd_nb.h index a54ee1d054d9..aaac3b2fb746 100644 --- a/arch/x86/include/asm/amd_nb.h +++ b/arch/x86/include/asm/amd_nb.h | |||
@@ -19,7 +19,7 @@ extern int amd_cache_northbridges(void); | |||
19 | extern void amd_flush_garts(void); | 19 | extern void amd_flush_garts(void); |
20 | extern int amd_numa_init(void); | 20 | extern int amd_numa_init(void); |
21 | extern int amd_get_subcaches(int); | 21 | extern int amd_get_subcaches(int); |
22 | extern int amd_set_subcaches(int, int); | 22 | extern int amd_set_subcaches(int, unsigned long); |
23 | 23 | ||
24 | struct amd_l3_cache { | 24 | struct amd_l3_cache { |
25 | unsigned indices; | 25 | unsigned indices; |
diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 1d2091a226bc..19b0ebafcd3e 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h | |||
@@ -93,9 +93,6 @@ static inline int is_vsmp_box(void) | |||
93 | return 0; | 93 | return 0; |
94 | } | 94 | } |
95 | #endif | 95 | #endif |
96 | extern void xapic_wait_icr_idle(void); | ||
97 | extern u32 safe_xapic_wait_icr_idle(void); | ||
98 | extern void xapic_icr_write(u32, u32); | ||
99 | extern int setup_profiling_timer(unsigned int); | 96 | extern int setup_profiling_timer(unsigned int); |
100 | 97 | ||
101 | static inline void native_apic_mem_write(u32 reg, u32 v) | 98 | static inline void native_apic_mem_write(u32 reg, u32 v) |
@@ -184,7 +181,6 @@ extern int x2apic_phys; | |||
184 | extern int x2apic_preenabled; | 181 | extern int x2apic_preenabled; |
185 | extern void check_x2apic(void); | 182 | extern void check_x2apic(void); |
186 | extern void enable_x2apic(void); | 183 | extern void enable_x2apic(void); |
187 | extern void x2apic_icr_write(u32 low, u32 id); | ||
188 | static inline int x2apic_enabled(void) | 184 | static inline int x2apic_enabled(void) |
189 | { | 185 | { |
190 | u64 msr; | 186 | u64 msr; |
@@ -221,7 +217,6 @@ static inline void x2apic_force_phys(void) | |||
221 | { | 217 | { |
222 | } | 218 | } |
223 | 219 | ||
224 | #define nox2apic 0 | ||
225 | #define x2apic_preenabled 0 | 220 | #define x2apic_preenabled 0 |
226 | #define x2apic_supported() 0 | 221 | #define x2apic_supported() 0 |
227 | #endif | 222 | #endif |
@@ -351,7 +346,7 @@ struct apic { | |||
351 | int trampoline_phys_low; | 346 | int trampoline_phys_low; |
352 | int trampoline_phys_high; | 347 | int trampoline_phys_high; |
353 | 348 | ||
354 | void (*wait_for_init_deassert)(atomic_t *deassert); | 349 | bool wait_for_init_deassert; |
355 | void (*smp_callin_clear_local_apic)(void); | 350 | void (*smp_callin_clear_local_apic)(void); |
356 | void (*inquire_remote_apic)(int apicid); | 351 | void (*inquire_remote_apic)(int apicid); |
357 | 352 | ||
@@ -517,13 +512,6 @@ extern int default_cpu_present_to_apicid(int mps_cpu); | |||
517 | extern int default_check_phys_apicid_present(int phys_apicid); | 512 | extern int default_check_phys_apicid_present(int phys_apicid); |
518 | #endif | 513 | #endif |
519 | 514 | ||
520 | static inline void default_wait_for_init_deassert(atomic_t *deassert) | ||
521 | { | ||
522 | while (!atomic_read(deassert)) | ||
523 | cpu_relax(); | ||
524 | return; | ||
525 | } | ||
526 | |||
527 | extern void generic_bigsmp_probe(void); | 515 | extern void generic_bigsmp_probe(void); |
528 | 516 | ||
529 | 517 | ||
diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 0d9ec770f2f8..69f1366f1aa3 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h | |||
@@ -1,7 +1,7 @@ | |||
1 | /* | 1 | /* |
2 | * This file is part of the Linux kernel. | 2 | * This file is part of the Linux kernel. |
3 | * | 3 | * |
4 | * Copyright (c) 2011, Intel Corporation | 4 | * Copyright (c) 2011-2014, Intel Corporation |
5 | * Authors: Fenghua Yu <fenghua.yu@intel.com>, | 5 | * Authors: Fenghua Yu <fenghua.yu@intel.com>, |
6 | * H. Peter Anvin <hpa@linux.intel.com> | 6 | * H. Peter Anvin <hpa@linux.intel.com> |
7 | * | 7 | * |
@@ -31,14 +31,41 @@ | |||
31 | #define RDRAND_RETRY_LOOPS 10 | 31 | #define RDRAND_RETRY_LOOPS 10 |
32 | 32 | ||
33 | #define RDRAND_INT ".byte 0x0f,0xc7,0xf0" | 33 | #define RDRAND_INT ".byte 0x0f,0xc7,0xf0" |
34 | #define RDSEED_INT ".byte 0x0f,0xc7,0xf8" | ||
34 | #ifdef CONFIG_X86_64 | 35 | #ifdef CONFIG_X86_64 |
35 | # define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" | 36 | # define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" |
37 | # define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8" | ||
36 | #else | 38 | #else |
37 | # define RDRAND_LONG RDRAND_INT | 39 | # define RDRAND_LONG RDRAND_INT |
40 | # define RDSEED_LONG RDSEED_INT | ||
38 | #endif | 41 | #endif |
39 | 42 | ||
40 | #ifdef CONFIG_ARCH_RANDOM | 43 | #ifdef CONFIG_ARCH_RANDOM |
41 | 44 | ||
45 | /* Instead of arch_get_random_long() when alternatives haven't run. */ | ||
46 | static inline int rdrand_long(unsigned long *v) | ||
47 | { | ||
48 | int ok; | ||
49 | asm volatile("1: " RDRAND_LONG "\n\t" | ||
50 | "jc 2f\n\t" | ||
51 | "decl %0\n\t" | ||
52 | "jnz 1b\n\t" | ||
53 | "2:" | ||
54 | : "=r" (ok), "=a" (*v) | ||
55 | : "0" (RDRAND_RETRY_LOOPS)); | ||
56 | return ok; | ||
57 | } | ||
58 | |||
59 | /* A single attempt at RDSEED */ | ||
60 | static inline bool rdseed_long(unsigned long *v) | ||
61 | { | ||
62 | unsigned char ok; | ||
63 | asm volatile(RDSEED_LONG "\n\t" | ||
64 | "setc %0" | ||
65 | : "=qm" (ok), "=a" (*v)); | ||
66 | return ok; | ||
67 | } | ||
68 | |||
42 | #define GET_RANDOM(name, type, rdrand, nop) \ | 69 | #define GET_RANDOM(name, type, rdrand, nop) \ |
43 | static inline int name(type *v) \ | 70 | static inline int name(type *v) \ |
44 | { \ | 71 | { \ |
@@ -56,18 +83,52 @@ static inline int name(type *v) \ | |||
56 | return ok; \ | 83 | return ok; \ |
57 | } | 84 | } |
58 | 85 | ||
86 | #define GET_SEED(name, type, rdseed, nop) \ | ||
87 | static inline int name(type *v) \ | ||
88 | { \ | ||
89 | unsigned char ok; \ | ||
90 | alternative_io("movb $0, %0\n\t" \ | ||
91 | nop, \ | ||
92 | rdseed "\n\t" \ | ||
93 | "setc %0", \ | ||
94 | X86_FEATURE_RDSEED, \ | ||
95 | ASM_OUTPUT2("=q" (ok), "=a" (*v))); \ | ||
96 | return ok; \ | ||
97 | } | ||
98 | |||
59 | #ifdef CONFIG_X86_64 | 99 | #ifdef CONFIG_X86_64 |
60 | 100 | ||
61 | GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); | 101 | GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); |
62 | GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); | 102 | GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); |
63 | 103 | ||
104 | GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5); | ||
105 | GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); | ||
106 | |||
64 | #else | 107 | #else |
65 | 108 | ||
66 | GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); | 109 | GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); |
67 | GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); | 110 | GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); |
68 | 111 | ||
112 | GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4); | ||
113 | GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); | ||
114 | |||
69 | #endif /* CONFIG_X86_64 */ | 115 | #endif /* CONFIG_X86_64 */ |
70 | 116 | ||
117 | #define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) | ||
118 | #define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) | ||
119 | |||
120 | #else | ||
121 | |||
122 | static inline int rdrand_long(unsigned long *v) | ||
123 | { | ||
124 | return 0; | ||
125 | } | ||
126 | |||
127 | static inline bool rdseed_long(unsigned long *v) | ||
128 | { | ||
129 | return 0; | ||
130 | } | ||
131 | |||
71 | #endif /* CONFIG_ARCH_RANDOM */ | 132 | #endif /* CONFIG_ARCH_RANDOM */ |
72 | 133 | ||
73 | extern void x86_init_rdrand(struct cpuinfo_x86 *c); | 134 | extern void x86_init_rdrand(struct cpuinfo_x86 *c); |
diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index c6cd358a1eec..69bbb4845020 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h | |||
@@ -85,19 +85,56 @@ | |||
85 | #else | 85 | #else |
86 | # define smp_rmb() barrier() | 86 | # define smp_rmb() barrier() |
87 | #endif | 87 | #endif |
88 | #ifdef CONFIG_X86_OOSTORE | 88 | #define smp_wmb() barrier() |
89 | # define smp_wmb() wmb() | ||
90 | #else | ||
91 | # define smp_wmb() barrier() | ||
92 | #endif | ||
93 | #define smp_read_barrier_depends() read_barrier_depends() | 89 | #define smp_read_barrier_depends() read_barrier_depends() |
94 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | 90 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) |
95 | #else | 91 | #else /* !SMP */ |
96 | #define smp_mb() barrier() | 92 | #define smp_mb() barrier() |
97 | #define smp_rmb() barrier() | 93 | #define smp_rmb() barrier() |
98 | #define smp_wmb() barrier() | 94 | #define smp_wmb() barrier() |
99 | #define smp_read_barrier_depends() do { } while (0) | 95 | #define smp_read_barrier_depends() do { } while (0) |
100 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | 96 | #define set_mb(var, value) do { var = value; barrier(); } while (0) |
97 | #endif /* SMP */ | ||
98 | |||
99 | #if defined(CONFIG_X86_PPRO_FENCE) | ||
100 | |||
101 | /* | ||
102 | * For either of these options x86 doesn't have a strong TSO memory | ||
103 | * model and we should fall back to full barriers. | ||
104 | */ | ||
105 | |||
106 | #define smp_store_release(p, v) \ | ||
107 | do { \ | ||
108 | compiletime_assert_atomic_type(*p); \ | ||
109 | smp_mb(); \ | ||
110 | ACCESS_ONCE(*p) = (v); \ | ||
111 | } while (0) | ||
112 | |||
113 | #define smp_load_acquire(p) \ | ||
114 | ({ \ | ||
115 | typeof(*p) ___p1 = ACCESS_ONCE(*p); \ | ||
116 | compiletime_assert_atomic_type(*p); \ | ||
117 | smp_mb(); \ | ||
118 | ___p1; \ | ||
119 | }) | ||
120 | |||
121 | #else /* regular x86 TSO memory ordering */ | ||
122 | |||
123 | #define smp_store_release(p, v) \ | ||
124 | do { \ | ||
125 | compiletime_assert_atomic_type(*p); \ | ||
126 | barrier(); \ | ||
127 | ACCESS_ONCE(*p) = (v); \ | ||
128 | } while (0) | ||
129 | |||
130 | #define smp_load_acquire(p) \ | ||
131 | ({ \ | ||
132 | typeof(*p) ___p1 = ACCESS_ONCE(*p); \ | ||
133 | compiletime_assert_atomic_type(*p); \ | ||
134 | barrier(); \ | ||
135 | ___p1; \ | ||
136 | }) | ||
137 | |||
101 | #endif | 138 | #endif |
102 | 139 | ||
103 | /* | 140 | /* |
diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h index 2f03ff018d36..ba38ebbaced3 100644 --- a/arch/x86/include/asm/bug.h +++ b/arch/x86/include/asm/bug.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_BUG_H | 1 | #ifndef _ASM_X86_BUG_H |
2 | #define _ASM_X86_BUG_H | 2 | #define _ASM_X86_BUG_H |
3 | 3 | ||
4 | #ifdef CONFIG_BUG | ||
5 | #define HAVE_ARCH_BUG | 4 | #define HAVE_ARCH_BUG |
6 | 5 | ||
7 | #ifdef CONFIG_DEBUG_BUGVERBOSE | 6 | #ifdef CONFIG_DEBUG_BUGVERBOSE |
@@ -33,8 +32,6 @@ do { \ | |||
33 | } while (0) | 32 | } while (0) |
34 | #endif | 33 | #endif |
35 | 34 | ||
36 | #endif /* !CONFIG_BUG */ | ||
37 | |||
38 | #include <asm-generic/bug.h> | 35 | #include <asm-generic/bug.h> |
39 | 36 | ||
40 | #endif /* _ASM_X86_BUG_H */ | 37 | #endif /* _ASM_X86_BUG_H */ |
diff --git a/arch/x86/include/asm/clocksource.h b/arch/x86/include/asm/clocksource.h index 16a57f4ed64d..eda81dc0f4ae 100644 --- a/arch/x86/include/asm/clocksource.h +++ b/arch/x86/include/asm/clocksource.h | |||
@@ -3,8 +3,6 @@ | |||
3 | #ifndef _ASM_X86_CLOCKSOURCE_H | 3 | #ifndef _ASM_X86_CLOCKSOURCE_H |
4 | #define _ASM_X86_CLOCKSOURCE_H | 4 | #define _ASM_X86_CLOCKSOURCE_H |
5 | 5 | ||
6 | #ifdef CONFIG_X86_64 | ||
7 | |||
8 | #define VCLOCK_NONE 0 /* No vDSO clock available. */ | 6 | #define VCLOCK_NONE 0 /* No vDSO clock available. */ |
9 | #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ | 7 | #define VCLOCK_TSC 1 /* vDSO should use vread_tsc. */ |
10 | #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ | 8 | #define VCLOCK_HPET 2 /* vDSO should use vread_hpet. */ |
@@ -14,6 +12,4 @@ struct arch_clocksource_data { | |||
14 | int vclock_mode; | 12 | int vclock_mode; |
15 | }; | 13 | }; |
16 | 14 | ||
17 | #endif /* CONFIG_X86_64 */ | ||
18 | |||
19 | #endif /* _ASM_X86_CLOCKSOURCE_H */ | 15 | #endif /* _ASM_X86_CLOCKSOURCE_H */ |
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 89270b4318db..e265ff95d16d 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h | |||
@@ -37,7 +37,7 @@ | |||
37 | #define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ | 37 | #define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ |
38 | #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ | 38 | #define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ |
39 | #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ | 39 | #define X86_FEATURE_PN (0*32+18) /* Processor serial number */ |
40 | #define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" CLFLUSH instruction */ | 40 | #define X86_FEATURE_CLFLUSH (0*32+19) /* CLFLUSH instruction */ |
41 | #define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ | 41 | #define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */ |
42 | #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ | 42 | #define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ |
43 | #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ | 43 | #define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ |
@@ -216,9 +216,15 @@ | |||
216 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ | 216 | #define X86_FEATURE_ERMS (9*32+ 9) /* Enhanced REP MOVSB/STOSB */ |
217 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ | 217 | #define X86_FEATURE_INVPCID (9*32+10) /* Invalidate Processor Context ID */ |
218 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ | 218 | #define X86_FEATURE_RTM (9*32+11) /* Restricted Transactional Memory */ |
219 | #define X86_FEATURE_MPX (9*32+14) /* Memory Protection Extension */ | ||
220 | #define X86_FEATURE_AVX512F (9*32+16) /* AVX-512 Foundation */ | ||
219 | #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ | 221 | #define X86_FEATURE_RDSEED (9*32+18) /* The RDSEED instruction */ |
220 | #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ | 222 | #define X86_FEATURE_ADX (9*32+19) /* The ADCX and ADOX instructions */ |
221 | #define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ | 223 | #define X86_FEATURE_SMAP (9*32+20) /* Supervisor Mode Access Prevention */ |
224 | #define X86_FEATURE_CLFLUSHOPT (9*32+23) /* CLFLUSHOPT instruction */ | ||
225 | #define X86_FEATURE_AVX512PF (9*32+26) /* AVX-512 Prefetch */ | ||
226 | #define X86_FEATURE_AVX512ER (9*32+27) /* AVX-512 Exponential and Reciprocal */ | ||
227 | #define X86_FEATURE_AVX512CD (9*32+28) /* AVX-512 Conflict Detection */ | ||
222 | 228 | ||
223 | /* | 229 | /* |
224 | * BUG word(s) | 230 | * BUG word(s) |
@@ -312,7 +318,7 @@ extern const char * const x86_power_flags[32]; | |||
312 | #define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) | 318 | #define cpu_has_pmm_enabled boot_cpu_has(X86_FEATURE_PMM_EN) |
313 | #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) | 319 | #define cpu_has_ds boot_cpu_has(X86_FEATURE_DS) |
314 | #define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) | 320 | #define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS) |
315 | #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH) | 321 | #define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLUSH) |
316 | #define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) | 322 | #define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS) |
317 | #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) | 323 | #define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) |
318 | #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) | 324 | #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) |
@@ -540,6 +546,13 @@ static __always_inline __pure bool _static_cpu_has_safe(u16 bit) | |||
540 | #define static_cpu_has_bug(bit) static_cpu_has((bit)) | 546 | #define static_cpu_has_bug(bit) static_cpu_has((bit)) |
541 | #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) | 547 | #define boot_cpu_has_bug(bit) cpu_has_bug(&boot_cpu_data, (bit)) |
542 | 548 | ||
549 | #define MAX_CPU_FEATURES (NCAPINTS * 32) | ||
550 | #define cpu_have_feature boot_cpu_has | ||
551 | |||
552 | #define CPU_FEATURE_TYPEFMT "x86,ven%04Xfam%04Xmod%04X" | ||
553 | #define CPU_FEATURE_TYPEVAL boot_cpu_data.x86_vendor, boot_cpu_data.x86, \ | ||
554 | boot_cpu_data.x86_model | ||
555 | |||
543 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ | 556 | #endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ |
544 | 557 | ||
545 | #endif /* _ASM_X86_CPUFEATURE_H */ | 558 | #endif /* _ASM_X86_CPUFEATURE_H */ |
diff --git a/arch/x86/include/asm/cputime.h b/arch/x86/include/asm/cputime.h deleted file mode 100644 index 6d68ad7e0ea3..000000000000 --- a/arch/x86/include/asm/cputime.h +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | #include <asm-generic/cputime.h> | ||
diff --git a/arch/x86/include/asm/dmi.h b/arch/x86/include/asm/dmi.h index fd8f9e2ca35f..535192f6bfad 100644 --- a/arch/x86/include/asm/dmi.h +++ b/arch/x86/include/asm/dmi.h | |||
@@ -13,7 +13,9 @@ static __always_inline __init void *dmi_alloc(unsigned len) | |||
13 | } | 13 | } |
14 | 14 | ||
15 | /* Use early IO mappings for DMI because it's initialized early */ | 15 | /* Use early IO mappings for DMI because it's initialized early */ |
16 | #define dmi_ioremap early_ioremap | 16 | #define dmi_early_remap early_ioremap |
17 | #define dmi_iounmap early_iounmap | 17 | #define dmi_early_unmap early_iounmap |
18 | #define dmi_remap ioremap | ||
19 | #define dmi_unmap iounmap | ||
18 | 20 | ||
19 | #endif /* _ASM_X86_DMI_H */ | 21 | #endif /* _ASM_X86_DMI_H */ |
diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 65c6e6e3a552..0869434eaf72 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h | |||
@@ -1,9 +1,29 @@ | |||
1 | #ifndef _ASM_X86_EFI_H | 1 | #ifndef _ASM_X86_EFI_H |
2 | #define _ASM_X86_EFI_H | 2 | #define _ASM_X86_EFI_H |
3 | 3 | ||
4 | /* | ||
5 | * We map the EFI regions needed for runtime services non-contiguously, | ||
6 | * with preserved alignment on virtual addresses starting from -4G down | ||
7 | * for a total max space of 64G. This way, we provide for stable runtime | ||
8 | * services addresses across kernels so that a kexec'd kernel can still | ||
9 | * use them. | ||
10 | * | ||
11 | * This is the main reason why we're doing stable VA mappings for RT | ||
12 | * services. | ||
13 | * | ||
14 | * This flag is used in conjuction with a chicken bit called | ||
15 | * "efi=old_map" which can be used as a fallback to the old runtime | ||
16 | * services mapping method in case there's some b0rkage with a | ||
17 | * particular EFI implementation (haha, it is hard to hold up the | ||
18 | * sarcasm here...). | ||
19 | */ | ||
20 | #define EFI_OLD_MEMMAP EFI_ARCH_1 | ||
21 | |||
22 | #define EFI32_LOADER_SIGNATURE "EL32" | ||
23 | #define EFI64_LOADER_SIGNATURE "EL64" | ||
24 | |||
4 | #ifdef CONFIG_X86_32 | 25 | #ifdef CONFIG_X86_32 |
5 | 26 | ||
6 | #define EFI_LOADER_SIGNATURE "EL32" | ||
7 | 27 | ||
8 | extern unsigned long asmlinkage efi_call_phys(void *, ...); | 28 | extern unsigned long asmlinkage efi_call_phys(void *, ...); |
9 | 29 | ||
@@ -39,8 +59,6 @@ extern unsigned long asmlinkage efi_call_phys(void *, ...); | |||
39 | 59 | ||
40 | #else /* !CONFIG_X86_32 */ | 60 | #else /* !CONFIG_X86_32 */ |
41 | 61 | ||
42 | #define EFI_LOADER_SIGNATURE "EL64" | ||
43 | |||
44 | extern u64 efi_call0(void *fp); | 62 | extern u64 efi_call0(void *fp); |
45 | extern u64 efi_call1(void *fp, u64 arg1); | 63 | extern u64 efi_call1(void *fp, u64 arg1); |
46 | extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); | 64 | extern u64 efi_call2(void *fp, u64 arg1, u64 arg2); |
@@ -69,24 +87,31 @@ extern u64 efi_call6(void *fp, u64 arg1, u64 arg2, u64 arg3, | |||
69 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ | 87 | efi_call6((f), (u64)(a1), (u64)(a2), (u64)(a3), \ |
70 | (u64)(a4), (u64)(a5), (u64)(a6)) | 88 | (u64)(a4), (u64)(a5), (u64)(a6)) |
71 | 89 | ||
90 | #define _efi_call_virtX(x, f, ...) \ | ||
91 | ({ \ | ||
92 | efi_status_t __s; \ | ||
93 | \ | ||
94 | efi_sync_low_kernel_mappings(); \ | ||
95 | preempt_disable(); \ | ||
96 | __s = efi_call##x((void *)efi.systab->runtime->f, __VA_ARGS__); \ | ||
97 | preempt_enable(); \ | ||
98 | __s; \ | ||
99 | }) | ||
100 | |||
72 | #define efi_call_virt0(f) \ | 101 | #define efi_call_virt0(f) \ |
73 | efi_call0((efi.systab->runtime->f)) | 102 | _efi_call_virtX(0, f) |
74 | #define efi_call_virt1(f, a1) \ | 103 | #define efi_call_virt1(f, a1) \ |
75 | efi_call1((efi.systab->runtime->f), (u64)(a1)) | 104 | _efi_call_virtX(1, f, (u64)(a1)) |
76 | #define efi_call_virt2(f, a1, a2) \ | 105 | #define efi_call_virt2(f, a1, a2) \ |
77 | efi_call2((efi.systab->runtime->f), (u64)(a1), (u64)(a2)) | 106 | _efi_call_virtX(2, f, (u64)(a1), (u64)(a2)) |
78 | #define efi_call_virt3(f, a1, a2, a3) \ | 107 | #define efi_call_virt3(f, a1, a2, a3) \ |
79 | efi_call3((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 108 | _efi_call_virtX(3, f, (u64)(a1), (u64)(a2), (u64)(a3)) |
80 | (u64)(a3)) | 109 | #define efi_call_virt4(f, a1, a2, a3, a4) \ |
81 | #define efi_call_virt4(f, a1, a2, a3, a4) \ | 110 | _efi_call_virtX(4, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4)) |
82 | efi_call4((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 111 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ |
83 | (u64)(a3), (u64)(a4)) | 112 | _efi_call_virtX(5, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5)) |
84 | #define efi_call_virt5(f, a1, a2, a3, a4, a5) \ | 113 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ |
85 | efi_call5((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | 114 | _efi_call_virtX(6, f, (u64)(a1), (u64)(a2), (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) |
86 | (u64)(a3), (u64)(a4), (u64)(a5)) | ||
87 | #define efi_call_virt6(f, a1, a2, a3, a4, a5, a6) \ | ||
88 | efi_call6((efi.systab->runtime->f), (u64)(a1), (u64)(a2), \ | ||
89 | (u64)(a3), (u64)(a4), (u64)(a5), (u64)(a6)) | ||
90 | 115 | ||
91 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | 116 | extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, |
92 | u32 type, u64 attribute); | 117 | u32 type, u64 attribute); |
@@ -94,13 +119,33 @@ extern void __iomem *efi_ioremap(unsigned long addr, unsigned long size, | |||
94 | #endif /* CONFIG_X86_32 */ | 119 | #endif /* CONFIG_X86_32 */ |
95 | 120 | ||
96 | extern int add_efi_memmap; | 121 | extern int add_efi_memmap; |
97 | extern unsigned long x86_efi_facility; | 122 | extern struct efi_scratch efi_scratch; |
98 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); | 123 | extern void efi_set_executable(efi_memory_desc_t *md, bool executable); |
99 | extern int efi_memblock_x86_reserve_range(void); | 124 | extern int efi_memblock_x86_reserve_range(void); |
100 | extern void efi_call_phys_prelog(void); | 125 | extern void efi_call_phys_prelog(void); |
101 | extern void efi_call_phys_epilog(void); | 126 | extern void efi_call_phys_epilog(void); |
102 | extern void efi_unmap_memmap(void); | 127 | extern void efi_unmap_memmap(void); |
103 | extern void efi_memory_uc(u64 addr, unsigned long size); | 128 | extern void efi_memory_uc(u64 addr, unsigned long size); |
129 | extern void __init efi_map_region(efi_memory_desc_t *md); | ||
130 | extern void __init efi_map_region_fixed(efi_memory_desc_t *md); | ||
131 | extern void efi_sync_low_kernel_mappings(void); | ||
132 | extern int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); | ||
133 | extern void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); | ||
134 | extern void __init old_map_region(efi_memory_desc_t *md); | ||
135 | extern void __init runtime_code_page_mkexec(void); | ||
136 | extern void __init efi_runtime_mkexec(void); | ||
137 | extern void __init efi_dump_pagetable(void); | ||
138 | extern void __init efi_apply_memmap_quirks(void); | ||
139 | |||
140 | struct efi_setup_data { | ||
141 | u64 fw_vendor; | ||
142 | u64 runtime; | ||
143 | u64 tables; | ||
144 | u64 smbios; | ||
145 | u64 reserved[8]; | ||
146 | }; | ||
147 | |||
148 | extern u64 efi_setup; | ||
104 | 149 | ||
105 | #ifdef CONFIG_EFI | 150 | #ifdef CONFIG_EFI |
106 | 151 | ||
@@ -109,8 +154,40 @@ static inline bool efi_is_native(void) | |||
109 | return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); | 154 | return IS_ENABLED(CONFIG_X86_64) == efi_enabled(EFI_64BIT); |
110 | } | 155 | } |
111 | 156 | ||
112 | extern struct console early_efi_console; | 157 | static inline bool efi_runtime_supported(void) |
158 | { | ||
159 | if (efi_is_native()) | ||
160 | return true; | ||
161 | |||
162 | if (IS_ENABLED(CONFIG_EFI_MIXED) && !efi_enabled(EFI_OLD_MEMMAP)) | ||
163 | return true; | ||
164 | |||
165 | return false; | ||
166 | } | ||
113 | 167 | ||
168 | extern struct console early_efi_console; | ||
169 | extern void parse_efi_setup(u64 phys_addr, u32 data_len); | ||
170 | |||
171 | #ifdef CONFIG_EFI_MIXED | ||
172 | extern void efi_thunk_runtime_setup(void); | ||
173 | extern efi_status_t efi_thunk_set_virtual_address_map( | ||
174 | void *phys_set_virtual_address_map, | ||
175 | unsigned long memory_map_size, | ||
176 | unsigned long descriptor_size, | ||
177 | u32 descriptor_version, | ||
178 | efi_memory_desc_t *virtual_map); | ||
179 | #else | ||
180 | static inline void efi_thunk_runtime_setup(void) {} | ||
181 | static inline efi_status_t efi_thunk_set_virtual_address_map( | ||
182 | void *phys_set_virtual_address_map, | ||
183 | unsigned long memory_map_size, | ||
184 | unsigned long descriptor_size, | ||
185 | u32 descriptor_version, | ||
186 | efi_memory_desc_t *virtual_map) | ||
187 | { | ||
188 | return EFI_SUCCESS; | ||
189 | } | ||
190 | #endif /* CONFIG_EFI_MIXED */ | ||
114 | #else | 191 | #else |
115 | /* | 192 | /* |
116 | * IF EFI is not configured, have the EFI calls return -ENOSYS. | 193 | * IF EFI is not configured, have the EFI calls return -ENOSYS. |
@@ -122,6 +199,7 @@ extern struct console early_efi_console; | |||
122 | #define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) | 199 | #define efi_call4(_f, _a1, _a2, _a3, _a4) (-ENOSYS) |
123 | #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) | 200 | #define efi_call5(_f, _a1, _a2, _a3, _a4, _a5) (-ENOSYS) |
124 | #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) | 201 | #define efi_call6(_f, _a1, _a2, _a3, _a4, _a5, _a6) (-ENOSYS) |
202 | static inline void parse_efi_setup(u64 phys_addr, u32 data_len) {} | ||
125 | #endif /* CONFIG_EFI */ | 203 | #endif /* CONFIG_EFI */ |
126 | 204 | ||
127 | #endif /* _ASM_X86_EFI_H */ | 205 | #endif /* _ASM_X86_EFI_H */ |
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 9c999c1674fa..2c71182d30ef 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h | |||
@@ -281,16 +281,12 @@ do { \ | |||
281 | 281 | ||
282 | #define STACK_RND_MASK (0x7ff) | 282 | #define STACK_RND_MASK (0x7ff) |
283 | 283 | ||
284 | #define VDSO_HIGH_BASE (__fix_to_virt(FIX_VDSO)) | ||
285 | |||
286 | #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) | 284 | #define ARCH_DLINFO ARCH_DLINFO_IA32(vdso_enabled) |
287 | 285 | ||
288 | /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ | 286 | /* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ |
289 | 287 | ||
290 | #else /* CONFIG_X86_32 */ | 288 | #else /* CONFIG_X86_32 */ |
291 | 289 | ||
292 | #define VDSO_HIGH_BASE 0xffffe000U /* CONFIG_COMPAT_VDSO address */ | ||
293 | |||
294 | /* 1GB for 64bit, 8MB for 32bit */ | 290 | /* 1GB for 64bit, 8MB for 32bit */ |
295 | #define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff) | 291 | #define STACK_RND_MASK (test_thread_flag(TIF_ADDR32) ? 0x7ff : 0x3fffff) |
296 | 292 | ||
diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index e846225265ed..43f482a0db37 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h | |||
@@ -40,15 +40,8 @@ | |||
40 | */ | 40 | */ |
41 | extern unsigned long __FIXADDR_TOP; | 41 | extern unsigned long __FIXADDR_TOP; |
42 | #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) | 42 | #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) |
43 | |||
44 | #define FIXADDR_USER_START __fix_to_virt(FIX_VDSO) | ||
45 | #define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1) | ||
46 | #else | 43 | #else |
47 | #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) | 44 | #define FIXADDR_TOP (VSYSCALL_END-PAGE_SIZE) |
48 | |||
49 | /* Only covers 32bit vsyscalls currently. Need another set for 64bit. */ | ||
50 | #define FIXADDR_USER_START ((unsigned long)VSYSCALL32_VSYSCALL) | ||
51 | #define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) | ||
52 | #endif | 45 | #endif |
53 | 46 | ||
54 | 47 | ||
@@ -74,7 +67,6 @@ extern unsigned long __FIXADDR_TOP; | |||
74 | enum fixed_addresses { | 67 | enum fixed_addresses { |
75 | #ifdef CONFIG_X86_32 | 68 | #ifdef CONFIG_X86_32 |
76 | FIX_HOLE, | 69 | FIX_HOLE, |
77 | FIX_VDSO, | ||
78 | #else | 70 | #else |
79 | VSYSCALL_LAST_PAGE, | 71 | VSYSCALL_LAST_PAGE, |
80 | VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE | 72 | VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE |
@@ -98,12 +90,6 @@ enum fixed_addresses { | |||
98 | FIX_IO_APIC_BASE_0, | 90 | FIX_IO_APIC_BASE_0, |
99 | FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, | 91 | FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1, |
100 | #endif | 92 | #endif |
101 | #ifdef CONFIG_X86_VISWS_APIC | ||
102 | FIX_CO_CPU, /* Cobalt timer */ | ||
103 | FIX_CO_APIC, /* Cobalt APIC Redirection Table */ | ||
104 | FIX_LI_PCIA, /* Lithium PCI Bridge A */ | ||
105 | FIX_LI_PCIB, /* Lithium PCI Bridge B */ | ||
106 | #endif | ||
107 | FIX_RO_IDT, /* Virtual mapping for read-only IDT */ | 93 | FIX_RO_IDT, /* Virtual mapping for read-only IDT */ |
108 | #ifdef CONFIG_X86_32 | 94 | #ifdef CONFIG_X86_32 |
109 | FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ | 95 | FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ |
@@ -175,64 +161,13 @@ static inline void __set_fixmap(enum fixed_addresses idx, | |||
175 | } | 161 | } |
176 | #endif | 162 | #endif |
177 | 163 | ||
178 | #define set_fixmap(idx, phys) \ | 164 | #include <asm-generic/fixmap.h> |
179 | __set_fixmap(idx, phys, PAGE_KERNEL) | ||
180 | |||
181 | /* | ||
182 | * Some hardware wants to get fixmapped without caching. | ||
183 | */ | ||
184 | #define set_fixmap_nocache(idx, phys) \ | ||
185 | __set_fixmap(idx, phys, PAGE_KERNEL_NOCACHE) | ||
186 | |||
187 | #define clear_fixmap(idx) \ | ||
188 | __set_fixmap(idx, 0, __pgprot(0)) | ||
189 | |||
190 | #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) | ||
191 | #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) | ||
192 | |||
193 | extern void __this_fixmap_does_not_exist(void); | ||
194 | |||
195 | /* | ||
196 | * 'index to address' translation. If anyone tries to use the idx | ||
197 | * directly without translation, we catch the bug with a NULL-deference | ||
198 | * kernel oops. Illegal ranges of incoming indices are caught too. | ||
199 | */ | ||
200 | static __always_inline unsigned long fix_to_virt(const unsigned int idx) | ||
201 | { | ||
202 | /* | ||
203 | * this branch gets completely eliminated after inlining, | ||
204 | * except when someone tries to use fixaddr indices in an | ||
205 | * illegal way. (such as mixing up address types or using | ||
206 | * out-of-range indices). | ||
207 | * | ||
208 | * If it doesn't get removed, the linker will complain | ||
209 | * loudly with a reasonably clear error message.. | ||
210 | */ | ||
211 | if (idx >= __end_of_fixed_addresses) | ||
212 | __this_fixmap_does_not_exist(); | ||
213 | |||
214 | return __fix_to_virt(idx); | ||
215 | } | ||
216 | |||
217 | static inline unsigned long virt_to_fix(const unsigned long vaddr) | ||
218 | { | ||
219 | BUG_ON(vaddr >= FIXADDR_TOP || vaddr < FIXADDR_START); | ||
220 | return __virt_to_fix(vaddr); | ||
221 | } | ||
222 | |||
223 | /* Return an pointer with offset calculated */ | ||
224 | static __always_inline unsigned long | ||
225 | __set_fixmap_offset(enum fixed_addresses idx, phys_addr_t phys, pgprot_t flags) | ||
226 | { | ||
227 | __set_fixmap(idx, phys, flags); | ||
228 | return fix_to_virt(idx) + (phys & (PAGE_SIZE - 1)); | ||
229 | } | ||
230 | 165 | ||
231 | #define set_fixmap_offset(idx, phys) \ | 166 | #define __late_set_fixmap(idx, phys, flags) __set_fixmap(idx, phys, flags) |
232 | __set_fixmap_offset(idx, phys, PAGE_KERNEL) | 167 | #define __late_clear_fixmap(idx) __set_fixmap(idx, 0, __pgprot(0)) |
233 | 168 | ||
234 | #define set_fixmap_offset_nocache(idx, phys) \ | 169 | void __early_set_fixmap(enum fixed_addresses idx, |
235 | __set_fixmap_offset(idx, phys, PAGE_KERNEL_NOCACHE) | 170 | phys_addr_t phys, pgprot_t flags); |
236 | 171 | ||
237 | #endif /* !__ASSEMBLY__ */ | 172 | #endif /* !__ASSEMBLY__ */ |
238 | #endif /* _ASM_X86_FIXMAP_H */ | 173 | #endif /* _ASM_X86_FIXMAP_H */ |
diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index d3d74698dce9..1c7eefe32502 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h | |||
@@ -145,10 +145,10 @@ static int fd_request_irq(void) | |||
145 | { | 145 | { |
146 | if (can_use_virtual_dma) | 146 | if (can_use_virtual_dma) |
147 | return request_irq(FLOPPY_IRQ, floppy_hardint, | 147 | return request_irq(FLOPPY_IRQ, floppy_hardint, |
148 | IRQF_DISABLED, "floppy", NULL); | 148 | 0, "floppy", NULL); |
149 | else | 149 | else |
150 | return request_irq(FLOPPY_IRQ, floppy_interrupt, | 150 | return request_irq(FLOPPY_IRQ, floppy_interrupt, |
151 | IRQF_DISABLED, "floppy", NULL); | 151 | 0, "floppy", NULL); |
152 | } | 152 | } |
153 | 153 | ||
154 | static unsigned long dma_mem_alloc(unsigned long size) | 154 | static unsigned long dma_mem_alloc(unsigned long size) |
diff --git a/arch/x86/include/asm/futex.h b/arch/x86/include/asm/futex.h index be27ba1e947a..b4c1f5453436 100644 --- a/arch/x86/include/asm/futex.h +++ b/arch/x86/include/asm/futex.h | |||
@@ -110,26 +110,7 @@ static inline int futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) | |||
110 | static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, | 110 | static inline int futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, |
111 | u32 oldval, u32 newval) | 111 | u32 oldval, u32 newval) |
112 | { | 112 | { |
113 | int ret = 0; | 113 | return user_atomic_cmpxchg_inatomic(uval, uaddr, oldval, newval); |
114 | |||
115 | if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) | ||
116 | return -EFAULT; | ||
117 | |||
118 | asm volatile("\t" ASM_STAC "\n" | ||
119 | "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" | ||
120 | "2:\t" ASM_CLAC "\n" | ||
121 | "\t.section .fixup, \"ax\"\n" | ||
122 | "3:\tmov %3, %0\n" | ||
123 | "\tjmp 2b\n" | ||
124 | "\t.previous\n" | ||
125 | _ASM_EXTABLE(1b, 3b) | ||
126 | : "+r" (ret), "=a" (oldval), "+m" (*uaddr) | ||
127 | : "i" (-EFAULT), "r" (newval), "1" (oldval) | ||
128 | : "memory" | ||
129 | ); | ||
130 | |||
131 | *uval = oldval; | ||
132 | return ret; | ||
133 | } | 114 | } |
134 | 115 | ||
135 | #endif | 116 | #endif |
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index ab0ae1aa6d0a..230853da4ec0 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -33,6 +33,9 @@ typedef struct { | |||
33 | #ifdef CONFIG_X86_MCE_THRESHOLD | 33 | #ifdef CONFIG_X86_MCE_THRESHOLD |
34 | unsigned int irq_threshold_count; | 34 | unsigned int irq_threshold_count; |
35 | #endif | 35 | #endif |
36 | #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) | ||
37 | unsigned int irq_hv_callback_count; | ||
38 | #endif | ||
36 | } ____cacheline_aligned irq_cpustat_t; | 39 | } ____cacheline_aligned irq_cpustat_t; |
37 | 40 | ||
38 | DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); | 41 | DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); |
diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h new file mode 100644 index 000000000000..e8c58f88b1d4 --- /dev/null +++ b/arch/x86/include/asm/hash.h | |||
@@ -0,0 +1,7 @@ | |||
1 | #ifndef _ASM_X86_HASH_H | ||
2 | #define _ASM_X86_HASH_H | ||
3 | |||
4 | struct fast_hash_ops; | ||
5 | extern void setup_arch_fast_hash(struct fast_hash_ops *ops); | ||
6 | |||
7 | #endif /* _ASM_X86_HASH_H */ | ||
diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index cba45d99ac1a..a307b7530e54 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h | |||
@@ -98,7 +98,6 @@ extern void trace_call_function_single_interrupt(void); | |||
98 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) | 98 | #define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs)) |
99 | extern unsigned long io_apic_irqs; | 99 | extern unsigned long io_apic_irqs; |
100 | 100 | ||
101 | extern void init_VISWS_APIC_irqs(void); | ||
102 | extern void setup_IO_APIC(void); | 101 | extern void setup_IO_APIC(void); |
103 | extern void disable_IO_APIC(void); | 102 | extern void disable_IO_APIC(void); |
104 | 103 | ||
@@ -191,6 +190,9 @@ extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); | |||
191 | #define trace_interrupt interrupt | 190 | #define trace_interrupt interrupt |
192 | #endif | 191 | #endif |
193 | 192 | ||
193 | #define VECTOR_UNDEFINED -1 | ||
194 | #define VECTOR_RETRIGGERED -2 | ||
195 | |||
194 | typedef int vector_irq_t[NR_VECTORS]; | 196 | typedef int vector_irq_t[NR_VECTORS]; |
195 | DECLARE_PER_CPU(vector_irq_t, vector_irq); | 197 | DECLARE_PER_CPU(vector_irq_t, vector_irq); |
196 | extern void setup_vector_irq(int cpu); | 198 | extern void setup_vector_irq(int cpu); |
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 459769d39263..e34e097b6f9d 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h | |||
@@ -51,10 +51,41 @@ struct devs_id { | |||
51 | enum intel_mid_cpu_type { | 51 | enum intel_mid_cpu_type { |
52 | /* 1 was Moorestown */ | 52 | /* 1 was Moorestown */ |
53 | INTEL_MID_CPU_CHIP_PENWELL = 2, | 53 | INTEL_MID_CPU_CHIP_PENWELL = 2, |
54 | INTEL_MID_CPU_CHIP_CLOVERVIEW, | ||
55 | INTEL_MID_CPU_CHIP_TANGIER, | ||
54 | }; | 56 | }; |
55 | 57 | ||
56 | extern enum intel_mid_cpu_type __intel_mid_cpu_chip; | 58 | extern enum intel_mid_cpu_type __intel_mid_cpu_chip; |
57 | 59 | ||
60 | /** | ||
61 | * struct intel_mid_ops - Interface between intel-mid & sub archs | ||
62 | * @arch_setup: arch_setup function to re-initialize platform | ||
63 | * structures (x86_init, x86_platform_init) | ||
64 | * | ||
65 | * This structure can be extended if any new interface is required | ||
66 | * between intel-mid & its sub arch files. | ||
67 | */ | ||
68 | struct intel_mid_ops { | ||
69 | void (*arch_setup)(void); | ||
70 | }; | ||
71 | |||
72 | /* Helper API's for INTEL_MID_OPS_INIT */ | ||
73 | #define DECLARE_INTEL_MID_OPS_INIT(cpuname, cpuid) \ | ||
74 | [cpuid] = get_##cpuname##_ops | ||
75 | |||
76 | /* Maximum number of CPU ops */ | ||
77 | #define MAX_CPU_OPS(a) (sizeof(a)/sizeof(void *)) | ||
78 | |||
79 | /* | ||
80 | * For every new cpu addition, a weak get_<cpuname>_ops() function needs be | ||
81 | * declared in arch/x86/platform/intel_mid/intel_mid_weak_decls.h. | ||
82 | */ | ||
83 | #define INTEL_MID_OPS_INIT {\ | ||
84 | DECLARE_INTEL_MID_OPS_INIT(penwell, INTEL_MID_CPU_CHIP_PENWELL), \ | ||
85 | DECLARE_INTEL_MID_OPS_INIT(cloverview, INTEL_MID_CPU_CHIP_CLOVERVIEW), \ | ||
86 | DECLARE_INTEL_MID_OPS_INIT(tangier, INTEL_MID_CPU_CHIP_TANGIER) \ | ||
87 | }; | ||
88 | |||
58 | #ifdef CONFIG_X86_INTEL_MID | 89 | #ifdef CONFIG_X86_INTEL_MID |
59 | 90 | ||
60 | static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) | 91 | static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void) |
@@ -86,8 +117,21 @@ extern enum intel_mid_timer_options intel_mid_timer_options; | |||
86 | * Penwell uses spread spectrum clock, so the freq number is not exactly | 117 | * Penwell uses spread spectrum clock, so the freq number is not exactly |
87 | * the same as reported by MSR based on SDM. | 118 | * the same as reported by MSR based on SDM. |
88 | */ | 119 | */ |
89 | #define PENWELL_FSB_FREQ_83SKU 83200 | 120 | #define FSB_FREQ_83SKU 83200 |
90 | #define PENWELL_FSB_FREQ_100SKU 99840 | 121 | #define FSB_FREQ_100SKU 99840 |
122 | #define FSB_FREQ_133SKU 133000 | ||
123 | |||
124 | #define FSB_FREQ_167SKU 167000 | ||
125 | #define FSB_FREQ_200SKU 200000 | ||
126 | #define FSB_FREQ_267SKU 267000 | ||
127 | #define FSB_FREQ_333SKU 333000 | ||
128 | #define FSB_FREQ_400SKU 400000 | ||
129 | |||
130 | /* Bus Select SoC Fuse value */ | ||
131 | #define BSEL_SOC_FUSE_MASK 0x7 | ||
132 | #define BSEL_SOC_FUSE_001 0x1 /* FSB 133MHz */ | ||
133 | #define BSEL_SOC_FUSE_101 0x5 /* FSB 100MHz */ | ||
134 | #define BSEL_SOC_FUSE_111 0x7 /* FSB 83MHz */ | ||
91 | 135 | ||
92 | #define SFI_MTMR_MAX_NUM 8 | 136 | #define SFI_MTMR_MAX_NUM 8 |
93 | #define SFI_MRTC_MAX 8 | 137 | #define SFI_MRTC_MAX 8 |
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index 34f69cb9350a..b8237d8a1e0c 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/string.h> | 39 | #include <linux/string.h> |
40 | #include <linux/compiler.h> | 40 | #include <linux/compiler.h> |
41 | #include <asm/page.h> | 41 | #include <asm/page.h> |
42 | #include <asm/early_ioremap.h> | ||
42 | 43 | ||
43 | #define build_mmio_read(name, size, type, reg, barrier) \ | 44 | #define build_mmio_read(name, size, type, reg, barrier) \ |
44 | static inline type name(const volatile void __iomem *addr) \ | 45 | static inline type name(const volatile void __iomem *addr) \ |
@@ -237,7 +238,7 @@ memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) | |||
237 | 238 | ||
238 | static inline void flush_write_buffers(void) | 239 | static inline void flush_write_buffers(void) |
239 | { | 240 | { |
240 | #if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE) | 241 | #if defined(CONFIG_X86_PPRO_FENCE) |
241 | asm volatile("lock; addl $0,0(%%esp)": : :"memory"); | 242 | asm volatile("lock; addl $0,0(%%esp)": : :"memory"); |
242 | #endif | 243 | #endif |
243 | } | 244 | } |
@@ -316,19 +317,6 @@ extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, | |||
316 | unsigned long prot_val); | 317 | unsigned long prot_val); |
317 | extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); | 318 | extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size); |
318 | 319 | ||
319 | /* | ||
320 | * early_ioremap() and early_iounmap() are for temporary early boot-time | ||
321 | * mappings, before the real ioremap() is functional. | ||
322 | * A boot-time mapping is currently limited to at most 16 pages. | ||
323 | */ | ||
324 | extern void early_ioremap_init(void); | ||
325 | extern void early_ioremap_reset(void); | ||
326 | extern void __iomem *early_ioremap(resource_size_t phys_addr, | ||
327 | unsigned long size); | ||
328 | extern void __iomem *early_memremap(resource_size_t phys_addr, | ||
329 | unsigned long size); | ||
330 | extern void early_iounmap(void __iomem *addr, unsigned long size); | ||
331 | extern void fixup_early_ioremap(void); | ||
332 | extern bool is_early_ioremap_ptep(pte_t *ptep); | 320 | extern bool is_early_ioremap_ptep(pte_t *ptep); |
333 | 321 | ||
334 | #ifdef CONFIG_XEN | 322 | #ifdef CONFIG_XEN |
diff --git a/arch/x86/include/asm/iosf_mbi.h b/arch/x86/include/asm/iosf_mbi.h new file mode 100644 index 000000000000..8e71c7941767 --- /dev/null +++ b/arch/x86/include/asm/iosf_mbi.h | |||
@@ -0,0 +1,90 @@ | |||
1 | /* | ||
2 | * iosf_mbi.h: Intel OnChip System Fabric MailBox access support | ||
3 | */ | ||
4 | |||
5 | #ifndef IOSF_MBI_SYMS_H | ||
6 | #define IOSF_MBI_SYMS_H | ||
7 | |||
8 | #define MBI_MCR_OFFSET 0xD0 | ||
9 | #define MBI_MDR_OFFSET 0xD4 | ||
10 | #define MBI_MCRX_OFFSET 0xD8 | ||
11 | |||
12 | #define MBI_RD_MASK 0xFEFFFFFF | ||
13 | #define MBI_WR_MASK 0X01000000 | ||
14 | |||
15 | #define MBI_MASK_HI 0xFFFFFF00 | ||
16 | #define MBI_MASK_LO 0x000000FF | ||
17 | #define MBI_ENABLE 0xF0 | ||
18 | |||
19 | /* Baytrail available units */ | ||
20 | #define BT_MBI_UNIT_AUNIT 0x00 | ||
21 | #define BT_MBI_UNIT_SMC 0x01 | ||
22 | #define BT_MBI_UNIT_CPU 0x02 | ||
23 | #define BT_MBI_UNIT_BUNIT 0x03 | ||
24 | #define BT_MBI_UNIT_PMC 0x04 | ||
25 | #define BT_MBI_UNIT_GFX 0x06 | ||
26 | #define BT_MBI_UNIT_SMI 0x0C | ||
27 | #define BT_MBI_UNIT_USB 0x43 | ||
28 | #define BT_MBI_UNIT_SATA 0xA3 | ||
29 | #define BT_MBI_UNIT_PCIE 0xA6 | ||
30 | |||
31 | /* Baytrail read/write opcodes */ | ||
32 | #define BT_MBI_AUNIT_READ 0x10 | ||
33 | #define BT_MBI_AUNIT_WRITE 0x11 | ||
34 | #define BT_MBI_SMC_READ 0x10 | ||
35 | #define BT_MBI_SMC_WRITE 0x11 | ||
36 | #define BT_MBI_CPU_READ 0x10 | ||
37 | #define BT_MBI_CPU_WRITE 0x11 | ||
38 | #define BT_MBI_BUNIT_READ 0x10 | ||
39 | #define BT_MBI_BUNIT_WRITE 0x11 | ||
40 | #define BT_MBI_PMC_READ 0x06 | ||
41 | #define BT_MBI_PMC_WRITE 0x07 | ||
42 | #define BT_MBI_GFX_READ 0x00 | ||
43 | #define BT_MBI_GFX_WRITE 0x01 | ||
44 | #define BT_MBI_SMIO_READ 0x06 | ||
45 | #define BT_MBI_SMIO_WRITE 0x07 | ||
46 | #define BT_MBI_USB_READ 0x06 | ||
47 | #define BT_MBI_USB_WRITE 0x07 | ||
48 | #define BT_MBI_SATA_READ 0x00 | ||
49 | #define BT_MBI_SATA_WRITE 0x01 | ||
50 | #define BT_MBI_PCIE_READ 0x00 | ||
51 | #define BT_MBI_PCIE_WRITE 0x01 | ||
52 | |||
53 | /** | ||
54 | * iosf_mbi_read() - MailBox Interface read command | ||
55 | * @port: port indicating subunit being accessed | ||
56 | * @opcode: port specific read or write opcode | ||
57 | * @offset: register address offset | ||
58 | * @mdr: register data to be read | ||
59 | * | ||
60 | * Locking is handled by spinlock - cannot sleep. | ||
61 | * Return: Nonzero on error | ||
62 | */ | ||
63 | int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr); | ||
64 | |||
65 | /** | ||
66 | * iosf_mbi_write() - MailBox unmasked write command | ||
67 | * @port: port indicating subunit being accessed | ||
68 | * @opcode: port specific read or write opcode | ||
69 | * @offset: register address offset | ||
70 | * @mdr: register data to be written | ||
71 | * | ||
72 | * Locking is handled by spinlock - cannot sleep. | ||
73 | * Return: Nonzero on error | ||
74 | */ | ||
75 | int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr); | ||
76 | |||
77 | /** | ||
78 | * iosf_mbi_modify() - MailBox masked write command | ||
79 | * @port: port indicating subunit being accessed | ||
80 | * @opcode: port specific read or write opcode | ||
81 | * @offset: register address offset | ||
82 | * @mdr: register data being modified | ||
83 | * @mask: mask indicating bits in mdr to be modified | ||
84 | * | ||
85 | * Locking is handled by spinlock - cannot sleep. | ||
86 | * Return: Nonzero on error | ||
87 | */ | ||
88 | int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask); | ||
89 | |||
90 | #endif /* IOSF_MBI_SYMS_H */ | ||
diff --git a/arch/x86/include/asm/irq.h b/arch/x86/include/asm/irq.h index 0ea10f27d613..cb6cfcd034cf 100644 --- a/arch/x86/include/asm/irq.h +++ b/arch/x86/include/asm/irq.h | |||
@@ -25,6 +25,7 @@ extern void irq_ctx_init(int cpu); | |||
25 | 25 | ||
26 | #ifdef CONFIG_HOTPLUG_CPU | 26 | #ifdef CONFIG_HOTPLUG_CPU |
27 | #include <linux/cpumask.h> | 27 | #include <linux/cpumask.h> |
28 | extern int check_irq_vectors_for_cpu_disable(void); | ||
28 | extern void fixup_irqs(void); | 29 | extern void fixup_irqs(void); |
29 | extern void irq_force_complete_move(int); | 30 | extern void irq_force_complete_move(int); |
30 | #endif | 31 | #endif |
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ae5d7830855c..fcaf9c961265 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h | |||
@@ -337,6 +337,11 @@ struct kvm_pmu { | |||
337 | u64 reprogram_pmi; | 337 | u64 reprogram_pmi; |
338 | }; | 338 | }; |
339 | 339 | ||
340 | enum { | ||
341 | KVM_DEBUGREG_BP_ENABLED = 1, | ||
342 | KVM_DEBUGREG_WONT_EXIT = 2, | ||
343 | }; | ||
344 | |||
340 | struct kvm_vcpu_arch { | 345 | struct kvm_vcpu_arch { |
341 | /* | 346 | /* |
342 | * rip and regs accesses must go through | 347 | * rip and regs accesses must go through |
@@ -444,7 +449,6 @@ struct kvm_vcpu_arch { | |||
444 | } st; | 449 | } st; |
445 | 450 | ||
446 | u64 last_guest_tsc; | 451 | u64 last_guest_tsc; |
447 | u64 last_kernel_ns; | ||
448 | u64 last_host_tsc; | 452 | u64 last_host_tsc; |
449 | u64 tsc_offset_adjustment; | 453 | u64 tsc_offset_adjustment; |
450 | u64 this_tsc_nsec; | 454 | u64 this_tsc_nsec; |
@@ -464,7 +468,7 @@ struct kvm_vcpu_arch { | |||
464 | struct mtrr_state_type mtrr_state; | 468 | struct mtrr_state_type mtrr_state; |
465 | u32 pat; | 469 | u32 pat; |
466 | 470 | ||
467 | int switch_db_regs; | 471 | unsigned switch_db_regs; |
468 | unsigned long db[KVM_NR_DB_REGS]; | 472 | unsigned long db[KVM_NR_DB_REGS]; |
469 | unsigned long dr6; | 473 | unsigned long dr6; |
470 | unsigned long dr7; | 474 | unsigned long dr7; |
@@ -599,12 +603,15 @@ struct kvm_arch { | |||
599 | bool use_master_clock; | 603 | bool use_master_clock; |
600 | u64 master_kernel_ns; | 604 | u64 master_kernel_ns; |
601 | cycle_t master_cycle_now; | 605 | cycle_t master_cycle_now; |
606 | struct delayed_work kvmclock_update_work; | ||
607 | struct delayed_work kvmclock_sync_work; | ||
602 | 608 | ||
603 | struct kvm_xen_hvm_config xen_hvm_config; | 609 | struct kvm_xen_hvm_config xen_hvm_config; |
604 | 610 | ||
605 | /* fields used by HYPER-V emulation */ | 611 | /* fields used by HYPER-V emulation */ |
606 | u64 hv_guest_os_id; | 612 | u64 hv_guest_os_id; |
607 | u64 hv_hypercall; | 613 | u64 hv_hypercall; |
614 | u64 hv_tsc_page; | ||
608 | 615 | ||
609 | #ifdef CONFIG_KVM_MMU_AUDIT | 616 | #ifdef CONFIG_KVM_MMU_AUDIT |
610 | int audit_point; | 617 | int audit_point; |
@@ -699,6 +706,9 @@ struct kvm_x86_ops { | |||
699 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 706 | void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
700 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 707 | void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
701 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); | 708 | void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); |
709 | u64 (*get_dr6)(struct kvm_vcpu *vcpu); | ||
710 | void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); | ||
711 | void (*sync_dirty_debug_regs)(struct kvm_vcpu *vcpu); | ||
702 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); | 712 | void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); |
703 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); | 713 | void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); |
704 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); | 714 | unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); |
@@ -725,8 +735,8 @@ struct kvm_x86_ops { | |||
725 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); | 735 | int (*nmi_allowed)(struct kvm_vcpu *vcpu); |
726 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); | 736 | bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); |
727 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); | 737 | void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); |
728 | int (*enable_nmi_window)(struct kvm_vcpu *vcpu); | 738 | void (*enable_nmi_window)(struct kvm_vcpu *vcpu); |
729 | int (*enable_irq_window)(struct kvm_vcpu *vcpu); | 739 | void (*enable_irq_window)(struct kvm_vcpu *vcpu); |
730 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); | 740 | void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr); |
731 | int (*vm_has_apicv)(struct kvm *kvm); | 741 | int (*vm_has_apicv)(struct kvm *kvm); |
732 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); | 742 | void (*hwapic_irr_update)(struct kvm_vcpu *vcpu, int max_irr); |
@@ -762,6 +772,9 @@ struct kvm_x86_ops { | |||
762 | struct x86_instruction_info *info, | 772 | struct x86_instruction_info *info, |
763 | enum x86_intercept_stage stage); | 773 | enum x86_intercept_stage stage); |
764 | void (*handle_external_intr)(struct kvm_vcpu *vcpu); | 774 | void (*handle_external_intr)(struct kvm_vcpu *vcpu); |
775 | bool (*mpx_supported)(void); | ||
776 | |||
777 | int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr); | ||
765 | }; | 778 | }; |
766 | 779 | ||
767 | struct kvm_arch_async_pf { | 780 | struct kvm_arch_async_pf { |
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 1df115909758..c7678e43465b 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h | |||
@@ -85,28 +85,9 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, | |||
85 | return ret; | 85 | return ret; |
86 | } | 86 | } |
87 | 87 | ||
88 | static inline uint32_t kvm_cpuid_base(void) | ||
89 | { | ||
90 | if (boot_cpu_data.cpuid_level < 0) | ||
91 | return 0; /* So we don't blow up on old processors */ | ||
92 | |||
93 | if (cpu_has_hypervisor) | ||
94 | return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); | ||
95 | |||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | static inline bool kvm_para_available(void) | ||
100 | { | ||
101 | return kvm_cpuid_base() != 0; | ||
102 | } | ||
103 | |||
104 | static inline unsigned int kvm_arch_para_features(void) | ||
105 | { | ||
106 | return cpuid_eax(KVM_CPUID_FEATURES); | ||
107 | } | ||
108 | |||
109 | #ifdef CONFIG_KVM_GUEST | 88 | #ifdef CONFIG_KVM_GUEST |
89 | bool kvm_para_available(void); | ||
90 | unsigned int kvm_arch_para_features(void); | ||
110 | void __init kvm_guest_init(void); | 91 | void __init kvm_guest_init(void); |
111 | void kvm_async_pf_task_wait(u32 token); | 92 | void kvm_async_pf_task_wait(u32 token); |
112 | void kvm_async_pf_task_wake(u32 token); | 93 | void kvm_async_pf_task_wake(u32 token); |
@@ -126,6 +107,16 @@ static inline void kvm_spinlock_init(void) | |||
126 | #define kvm_async_pf_task_wait(T) do {} while(0) | 107 | #define kvm_async_pf_task_wait(T) do {} while(0) |
127 | #define kvm_async_pf_task_wake(T) do {} while(0) | 108 | #define kvm_async_pf_task_wake(T) do {} while(0) |
128 | 109 | ||
110 | static inline bool kvm_para_available(void) | ||
111 | { | ||
112 | return 0; | ||
113 | } | ||
114 | |||
115 | static inline unsigned int kvm_arch_para_features(void) | ||
116 | { | ||
117 | return 0; | ||
118 | } | ||
119 | |||
129 | static inline u32 kvm_read_and_reset_pf_reason(void) | 120 | static inline u32 kvm_read_and_reset_pf_reason(void) |
130 | { | 121 | { |
131 | return 0; | 122 | return 0; |
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index c696a8687567..6e4ce2df87cf 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -118,7 +118,6 @@ extern void mce_register_decode_chain(struct notifier_block *nb); | |||
118 | extern void mce_unregister_decode_chain(struct notifier_block *nb); | 118 | extern void mce_unregister_decode_chain(struct notifier_block *nb); |
119 | 119 | ||
120 | #include <linux/percpu.h> | 120 | #include <linux/percpu.h> |
121 | #include <linux/init.h> | ||
122 | #include <linux/atomic.h> | 121 | #include <linux/atomic.h> |
123 | 122 | ||
124 | extern int mce_p5_enabled; | 123 | extern int mce_p5_enabled; |
diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index f98bd6625318..b59827e76529 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h | |||
@@ -1,6 +1,21 @@ | |||
1 | #ifndef _ASM_X86_MICROCODE_H | 1 | #ifndef _ASM_X86_MICROCODE_H |
2 | #define _ASM_X86_MICROCODE_H | 2 | #define _ASM_X86_MICROCODE_H |
3 | 3 | ||
4 | #define native_rdmsr(msr, val1, val2) \ | ||
5 | do { \ | ||
6 | u64 __val = native_read_msr((msr)); \ | ||
7 | (void)((val1) = (u32)__val); \ | ||
8 | (void)((val2) = (u32)(__val >> 32)); \ | ||
9 | } while (0) | ||
10 | |||
11 | #define native_wrmsr(msr, low, high) \ | ||
12 | native_write_msr(msr, low, high) | ||
13 | |||
14 | #define native_wrmsrl(msr, val) \ | ||
15 | native_write_msr((msr), \ | ||
16 | (u32)((u64)(val)), \ | ||
17 | (u32)((u64)(val) >> 32)) | ||
18 | |||
4 | struct cpu_signature { | 19 | struct cpu_signature { |
5 | unsigned int sig; | 20 | unsigned int sig; |
6 | unsigned int pf; | 21 | unsigned int pf; |
diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 4c019179a57d..b7b10b82d3e5 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h | |||
@@ -61,11 +61,10 @@ extern int __apply_microcode_amd(struct microcode_amd *mc_amd); | |||
61 | extern int apply_microcode_amd(int cpu); | 61 | extern int apply_microcode_amd(int cpu); |
62 | extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); | 62 | extern enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); |
63 | 63 | ||
64 | #define PATCH_MAX_SIZE PAGE_SIZE | ||
65 | extern u8 amd_ucode_patch[PATCH_MAX_SIZE]; | ||
66 | |||
64 | #ifdef CONFIG_MICROCODE_AMD_EARLY | 67 | #ifdef CONFIG_MICROCODE_AMD_EARLY |
65 | #ifdef CONFIG_X86_32 | ||
66 | #define MPB_MAX_SIZE PAGE_SIZE | ||
67 | extern u8 amd_bsp_mpb[MPB_MAX_SIZE]; | ||
68 | #endif | ||
69 | extern void __init load_ucode_amd_bsp(void); | 68 | extern void __init load_ucode_amd_bsp(void); |
70 | extern void load_ucode_amd_ap(void); | 69 | extern void load_ucode_amd_ap(void); |
71 | extern int __init save_microcode_in_initrd_amd(void); | 70 | extern int __init save_microcode_in_initrd_amd(void); |
diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 8a9b3e288cb4..1ec990bd7dc0 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h | |||
@@ -11,9 +11,6 @@ | |||
11 | #ifdef CONFIG_NUMA | 11 | #ifdef CONFIG_NUMA |
12 | extern struct pglist_data *node_data[]; | 12 | extern struct pglist_data *node_data[]; |
13 | #define NODE_DATA(nid) (node_data[nid]) | 13 | #define NODE_DATA(nid) (node_data[nid]) |
14 | |||
15 | #include <asm/numaq.h> | ||
16 | |||
17 | #endif /* CONFIG_NUMA */ | 14 | #endif /* CONFIG_NUMA */ |
18 | 15 | ||
19 | #ifdef CONFIG_DISCONTIGMEM | 16 | #ifdef CONFIG_DISCONTIGMEM |
diff --git a/arch/x86/include/asm/mpspec.h b/arch/x86/include/asm/mpspec.h index 3142a94c7b4b..f5a617956735 100644 --- a/arch/x86/include/asm/mpspec.h +++ b/arch/x86/include/asm/mpspec.h | |||
@@ -1,7 +1,6 @@ | |||
1 | #ifndef _ASM_X86_MPSPEC_H | 1 | #ifndef _ASM_X86_MPSPEC_H |
2 | #define _ASM_X86_MPSPEC_H | 2 | #define _ASM_X86_MPSPEC_H |
3 | 3 | ||
4 | #include <linux/init.h> | ||
5 | 4 | ||
6 | #include <asm/mpspec_def.h> | 5 | #include <asm/mpspec_def.h> |
7 | #include <asm/x86_init.h> | 6 | #include <asm/x86_init.h> |
@@ -26,12 +25,6 @@ extern int pic_mode; | |||
26 | 25 | ||
27 | extern unsigned int def_to_bigsmp; | 26 | extern unsigned int def_to_bigsmp; |
28 | 27 | ||
29 | #ifdef CONFIG_X86_NUMAQ | ||
30 | extern int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
31 | extern int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
32 | extern int quad_local_to_mp_bus_id [NR_CPUS/4][4]; | ||
33 | #endif | ||
34 | |||
35 | #else /* CONFIG_X86_64: */ | 28 | #else /* CONFIG_X86_64: */ |
36 | 29 | ||
37 | #define MAX_MP_BUSSES 256 | 30 | #define MAX_MP_BUSSES 256 |
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index cd9c41938b8a..c163215abb9a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h | |||
@@ -2,6 +2,7 @@ | |||
2 | #define _ASM_X86_MSHYPER_H | 2 | #define _ASM_X86_MSHYPER_H |
3 | 3 | ||
4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
5 | #include <linux/interrupt.h> | ||
5 | #include <asm/hyperv.h> | 6 | #include <asm/hyperv.h> |
6 | 7 | ||
7 | struct ms_hyperv_info { | 8 | struct ms_hyperv_info { |
@@ -16,6 +17,7 @@ void hyperv_callback_vector(void); | |||
16 | #define trace_hyperv_callback_vector hyperv_callback_vector | 17 | #define trace_hyperv_callback_vector hyperv_callback_vector |
17 | #endif | 18 | #endif |
18 | void hyperv_vector_handler(struct pt_regs *regs); | 19 | void hyperv_vector_handler(struct pt_regs *regs); |
19 | void hv_register_vmbus_handler(int irq, irq_handler_t handler); | 20 | void hv_setup_vmbus_irq(void (*handler)(void)); |
21 | void hv_remove_vmbus_irq(void); | ||
20 | 22 | ||
21 | #endif | 23 | #endif |
diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index e139b13f2a33..de36f22eb0b9 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h | |||
@@ -214,6 +214,8 @@ do { \ | |||
214 | 214 | ||
215 | struct msr *msrs_alloc(void); | 215 | struct msr *msrs_alloc(void); |
216 | void msrs_free(struct msr *msrs); | 216 | void msrs_free(struct msr *msrs); |
217 | int msr_set_bit(u32 msr, u8 bit); | ||
218 | int msr_clear_bit(u32 msr, u8 bit); | ||
217 | 219 | ||
218 | #ifdef CONFIG_SMP | 220 | #ifdef CONFIG_SMP |
219 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); | 221 | int rdmsr_on_cpu(unsigned int cpu, u32 msr_no, u32 *l, u32 *h); |
diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index 2f366d0ac6b4..1da25a5f96f9 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_MWAIT_H | 1 | #ifndef _ASM_X86_MWAIT_H |
2 | #define _ASM_X86_MWAIT_H | 2 | #define _ASM_X86_MWAIT_H |
3 | 3 | ||
4 | #include <linux/sched.h> | ||
5 | |||
4 | #define MWAIT_SUBSTATE_MASK 0xf | 6 | #define MWAIT_SUBSTATE_MASK 0xf |
5 | #define MWAIT_CSTATE_MASK 0xf | 7 | #define MWAIT_CSTATE_MASK 0xf |
6 | #define MWAIT_SUBSTATE_SIZE 4 | 8 | #define MWAIT_SUBSTATE_SIZE 4 |
@@ -13,4 +15,45 @@ | |||
13 | 15 | ||
14 | #define MWAIT_ECX_INTERRUPT_BREAK 0x1 | 16 | #define MWAIT_ECX_INTERRUPT_BREAK 0x1 |
15 | 17 | ||
18 | static inline void __monitor(const void *eax, unsigned long ecx, | ||
19 | unsigned long edx) | ||
20 | { | ||
21 | /* "monitor %eax, %ecx, %edx;" */ | ||
22 | asm volatile(".byte 0x0f, 0x01, 0xc8;" | ||
23 | :: "a" (eax), "c" (ecx), "d"(edx)); | ||
24 | } | ||
25 | |||
26 | static inline void __mwait(unsigned long eax, unsigned long ecx) | ||
27 | { | ||
28 | /* "mwait %eax, %ecx;" */ | ||
29 | asm volatile(".byte 0x0f, 0x01, 0xc9;" | ||
30 | :: "a" (eax), "c" (ecx)); | ||
31 | } | ||
32 | |||
33 | /* | ||
34 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | ||
35 | * which can obviate IPI to trigger checking of need_resched. | ||
36 | * We execute MONITOR against need_resched and enter optimized wait state | ||
37 | * through MWAIT. Whenever someone changes need_resched, we would be woken | ||
38 | * up from MWAIT (without an IPI). | ||
39 | * | ||
40 | * New with Core Duo processors, MWAIT can take some hints based on CPU | ||
41 | * capability. | ||
42 | */ | ||
43 | static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) | ||
44 | { | ||
45 | if (!current_set_polling_and_test()) { | ||
46 | if (static_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) { | ||
47 | mb(); | ||
48 | clflush((void *)¤t_thread_info()->flags); | ||
49 | mb(); | ||
50 | } | ||
51 | |||
52 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
53 | if (!need_resched()) | ||
54 | __mwait(eax, ecx); | ||
55 | } | ||
56 | current_clr_polling(); | ||
57 | } | ||
58 | |||
16 | #endif /* _ASM_X86_MWAIT_H */ | 59 | #endif /* _ASM_X86_MWAIT_H */ |
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 86f9301903c8..5f2fc4441b11 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h | |||
@@ -1,6 +1,7 @@ | |||
1 | #ifndef _ASM_X86_NMI_H | 1 | #ifndef _ASM_X86_NMI_H |
2 | #define _ASM_X86_NMI_H | 2 | #define _ASM_X86_NMI_H |
3 | 3 | ||
4 | #include <linux/irq_work.h> | ||
4 | #include <linux/pm.h> | 5 | #include <linux/pm.h> |
5 | #include <asm/irq.h> | 6 | #include <asm/irq.h> |
6 | #include <asm/io.h> | 7 | #include <asm/io.h> |
@@ -38,6 +39,8 @@ typedef int (*nmi_handler_t)(unsigned int, struct pt_regs *); | |||
38 | struct nmiaction { | 39 | struct nmiaction { |
39 | struct list_head list; | 40 | struct list_head list; |
40 | nmi_handler_t handler; | 41 | nmi_handler_t handler; |
42 | u64 max_duration; | ||
43 | struct irq_work irq_work; | ||
41 | unsigned long flags; | 44 | unsigned long flags; |
42 | const char *name; | 45 | const char *name; |
43 | }; | 46 | }; |
diff --git a/arch/x86/include/asm/numaq.h b/arch/x86/include/asm/numaq.h deleted file mode 100644 index c3b3c322fd87..000000000000 --- a/arch/x86/include/asm/numaq.h +++ /dev/null | |||
@@ -1,171 +0,0 @@ | |||
1 | /* | ||
2 | * Written by: Patricia Gaughen, IBM Corporation | ||
3 | * | ||
4 | * Copyright (C) 2002, IBM Corp. | ||
5 | * | ||
6 | * All rights reserved. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or modify | ||
9 | * it under the terms of the GNU General Public License as published by | ||
10 | * the Free Software Foundation; either version 2 of the License, or | ||
11 | * (at your option) any later version. | ||
12 | * | ||
13 | * This program is distributed in the hope that it will be useful, but | ||
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
16 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
17 | * details. | ||
18 | * | ||
19 | * You should have received a copy of the GNU General Public License | ||
20 | * along with this program; if not, write to the Free Software | ||
21 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
22 | * | ||
23 | * Send feedback to <gone@us.ibm.com> | ||
24 | */ | ||
25 | |||
26 | #ifndef _ASM_X86_NUMAQ_H | ||
27 | #define _ASM_X86_NUMAQ_H | ||
28 | |||
29 | #ifdef CONFIG_X86_NUMAQ | ||
30 | |||
31 | extern int found_numaq; | ||
32 | extern int numaq_numa_init(void); | ||
33 | extern int pci_numaq_init(void); | ||
34 | |||
35 | extern void *xquad_portio; | ||
36 | |||
37 | #define XQUAD_PORTIO_BASE 0xfe400000 | ||
38 | #define XQUAD_PORTIO_QUAD 0x40000 /* 256k per quad. */ | ||
39 | #define XQUAD_PORT_ADDR(port, quad) (xquad_portio + (XQUAD_PORTIO_QUAD*quad) + port) | ||
40 | |||
41 | /* | ||
42 | * SYS_CFG_DATA_PRIV_ADDR, struct eachquadmem, and struct sys_cfg_data are the | ||
43 | */ | ||
44 | #define SYS_CFG_DATA_PRIV_ADDR 0x0009d000 /* place for scd in private | ||
45 | quad space */ | ||
46 | |||
47 | /* | ||
48 | * Communication area for each processor on lynxer-processor tests. | ||
49 | * | ||
50 | * NOTE: If you change the size of this eachproc structure you need | ||
51 | * to change the definition for EACH_QUAD_SIZE. | ||
52 | */ | ||
53 | struct eachquadmem { | ||
54 | unsigned int priv_mem_start; /* Starting address of this */ | ||
55 | /* quad's private memory. */ | ||
56 | /* This is always 0. */ | ||
57 | /* In MB. */ | ||
58 | unsigned int priv_mem_size; /* Size of this quad's */ | ||
59 | /* private memory. */ | ||
60 | /* In MB. */ | ||
61 | unsigned int low_shrd_mem_strp_start;/* Starting address of this */ | ||
62 | /* quad's low shared block */ | ||
63 | /* (untranslated). */ | ||
64 | /* In MB. */ | ||
65 | unsigned int low_shrd_mem_start; /* Starting address of this */ | ||
66 | /* quad's low shared memory */ | ||
67 | /* (untranslated). */ | ||
68 | /* In MB. */ | ||
69 | unsigned int low_shrd_mem_size; /* Size of this quad's low */ | ||
70 | /* shared memory. */ | ||
71 | /* In MB. */ | ||
72 | unsigned int lmmio_copb_start; /* Starting address of this */ | ||
73 | /* quad's local memory */ | ||
74 | /* mapped I/O in the */ | ||
75 | /* compatibility OPB. */ | ||
76 | /* In MB. */ | ||
77 | unsigned int lmmio_copb_size; /* Size of this quad's local */ | ||
78 | /* memory mapped I/O in the */ | ||
79 | /* compatibility OPB. */ | ||
80 | /* In MB. */ | ||
81 | unsigned int lmmio_nopb_start; /* Starting address of this */ | ||
82 | /* quad's local memory */ | ||
83 | /* mapped I/O in the */ | ||
84 | /* non-compatibility OPB. */ | ||
85 | /* In MB. */ | ||
86 | unsigned int lmmio_nopb_size; /* Size of this quad's local */ | ||
87 | /* memory mapped I/O in the */ | ||
88 | /* non-compatibility OPB. */ | ||
89 | /* In MB. */ | ||
90 | unsigned int io_apic_0_start; /* Starting address of I/O */ | ||
91 | /* APIC 0. */ | ||
92 | unsigned int io_apic_0_sz; /* Size I/O APIC 0. */ | ||
93 | unsigned int io_apic_1_start; /* Starting address of I/O */ | ||
94 | /* APIC 1. */ | ||
95 | unsigned int io_apic_1_sz; /* Size I/O APIC 1. */ | ||
96 | unsigned int hi_shrd_mem_start; /* Starting address of this */ | ||
97 | /* quad's high shared memory.*/ | ||
98 | /* In MB. */ | ||
99 | unsigned int hi_shrd_mem_size; /* Size of this quad's high */ | ||
100 | /* shared memory. */ | ||
101 | /* In MB. */ | ||
102 | unsigned int mps_table_addr; /* Address of this quad's */ | ||
103 | /* MPS tables from BIOS, */ | ||
104 | /* in system space.*/ | ||
105 | unsigned int lcl_MDC_pio_addr; /* Port-I/O address for */ | ||
106 | /* local access of MDC. */ | ||
107 | unsigned int rmt_MDC_mmpio_addr; /* MM-Port-I/O address for */ | ||
108 | /* remote access of MDC. */ | ||
109 | unsigned int mm_port_io_start; /* Starting address of this */ | ||
110 | /* quad's memory mapped Port */ | ||
111 | /* I/O space. */ | ||
112 | unsigned int mm_port_io_size; /* Size of this quad's memory*/ | ||
113 | /* mapped Port I/O space. */ | ||
114 | unsigned int mm_rmt_io_apic_start; /* Starting address of this */ | ||
115 | /* quad's memory mapped */ | ||
116 | /* remote I/O APIC space. */ | ||
117 | unsigned int mm_rmt_io_apic_size; /* Size of this quad's memory*/ | ||
118 | /* mapped remote I/O APIC */ | ||
119 | /* space. */ | ||
120 | unsigned int mm_isa_start; /* Starting address of this */ | ||
121 | /* quad's memory mapped ISA */ | ||
122 | /* space (contains MDC */ | ||
123 | /* memory space). */ | ||
124 | unsigned int mm_isa_size; /* Size of this quad's memory*/ | ||
125 | /* mapped ISA space (contains*/ | ||
126 | /* MDC memory space). */ | ||
127 | unsigned int rmt_qmi_addr; /* Remote addr to access QMI.*/ | ||
128 | unsigned int lcl_qmi_addr; /* Local addr to access QMI. */ | ||
129 | }; | ||
130 | |||
131 | /* | ||
132 | * Note: This structure must be NOT be changed unless the multiproc and | ||
133 | * OS are changed to reflect the new structure. | ||
134 | */ | ||
135 | struct sys_cfg_data { | ||
136 | unsigned int quad_id; | ||
137 | unsigned int bsp_proc_id; /* Boot Strap Processor in this quad. */ | ||
138 | unsigned int scd_version; /* Version number of this table. */ | ||
139 | unsigned int first_quad_id; | ||
140 | unsigned int quads_present31_0; /* 1 bit for each quad */ | ||
141 | unsigned int quads_present63_32; /* 1 bit for each quad */ | ||
142 | unsigned int config_flags; | ||
143 | unsigned int boot_flags; | ||
144 | unsigned int csr_start_addr; /* Absolute value (not in MB) */ | ||
145 | unsigned int csr_size; /* Absolute value (not in MB) */ | ||
146 | unsigned int lcl_apic_start_addr; /* Absolute value (not in MB) */ | ||
147 | unsigned int lcl_apic_size; /* Absolute value (not in MB) */ | ||
148 | unsigned int low_shrd_mem_base; /* 0 or 512MB or 1GB */ | ||
149 | unsigned int low_shrd_mem_quad_offset; /* 0,128M,256M,512M,1G */ | ||
150 | /* may not be totally populated */ | ||
151 | unsigned int split_mem_enbl; /* 0 for no low shared memory */ | ||
152 | unsigned int mmio_sz; /* Size of total system memory mapped I/O */ | ||
153 | /* (in MB). */ | ||
154 | unsigned int quad_spin_lock; /* Spare location used for quad */ | ||
155 | /* bringup. */ | ||
156 | unsigned int nonzero55; /* For checksumming. */ | ||
157 | unsigned int nonzeroaa; /* For checksumming. */ | ||
158 | unsigned int scd_magic_number; | ||
159 | unsigned int system_type; | ||
160 | unsigned int checksum; | ||
161 | /* | ||
162 | * memory configuration area for each quad | ||
163 | */ | ||
164 | struct eachquadmem eq[MAX_NUMNODES]; /* indexed by quad id */ | ||
165 | }; | ||
166 | |||
167 | void numaq_tsc_disable(void); | ||
168 | |||
169 | #endif /* CONFIG_X86_NUMAQ */ | ||
170 | #endif /* _ASM_X86_NUMAQ_H */ | ||
171 | |||
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index c87892442e53..775873d3be55 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h | |||
@@ -71,6 +71,7 @@ extern bool __virt_addr_valid(unsigned long kaddr); | |||
71 | #include <asm-generic/getorder.h> | 71 | #include <asm-generic/getorder.h> |
72 | 72 | ||
73 | #define __HAVE_ARCH_GATE_AREA 1 | 73 | #define __HAVE_ARCH_GATE_AREA 1 |
74 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | ||
74 | 75 | ||
75 | #endif /* __KERNEL__ */ | 76 | #endif /* __KERNEL__ */ |
76 | #endif /* _ASM_X86_PAGE_H */ | 77 | #endif /* _ASM_X86_PAGE_H */ |
diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 4d550d04b609..904f528cc8e8 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h | |||
@@ -5,10 +5,6 @@ | |||
5 | 5 | ||
6 | #ifndef __ASSEMBLY__ | 6 | #ifndef __ASSEMBLY__ |
7 | 7 | ||
8 | #ifdef CONFIG_HUGETLB_PAGE | ||
9 | #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA | ||
10 | #endif | ||
11 | |||
12 | #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) | 8 | #define __phys_addr_nodebug(x) ((x) - PAGE_OFFSET) |
13 | #ifdef CONFIG_DEBUG_VIRTUAL | 9 | #ifdef CONFIG_DEBUG_VIRTUAL |
14 | extern unsigned long __phys_addr(unsigned long); | 10 | extern unsigned long __phys_addr(unsigned long); |
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 43dcd804ebd5..8de6d9cf3b95 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h | |||
@@ -39,9 +39,18 @@ | |||
39 | #define __VIRTUAL_MASK_SHIFT 47 | 39 | #define __VIRTUAL_MASK_SHIFT 47 |
40 | 40 | ||
41 | /* | 41 | /* |
42 | * Kernel image size is limited to 512 MB (see level2_kernel_pgt in | 42 | * Kernel image size is limited to 1GiB due to the fixmap living in the |
43 | * arch/x86/kernel/head_64.S), and it is mapped here: | 43 | * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use |
44 | * 512MiB by default, leaving 1.5GiB for modules once the page tables | ||
45 | * are fully set up. If kernel ASLR is configured, it can extend the | ||
46 | * kernel page table mapping, reducing the size of the modules area. | ||
44 | */ | 47 | */ |
45 | #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) | 48 | #define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024) |
49 | #if defined(CONFIG_RANDOMIZE_BASE) && \ | ||
50 | CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT | ||
51 | #define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET | ||
52 | #else | ||
53 | #define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT | ||
54 | #endif | ||
46 | 55 | ||
47 | #endif /* _ASM_X86_PAGE_64_DEFS_H */ | 56 | #endif /* _ASM_X86_PAGE_64_DEFS_H */ |
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 401f350ef71b..cd6e1610e29e 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h | |||
@@ -781,9 +781,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock, | |||
781 | */ | 781 | */ |
782 | #define PV_CALLEE_SAVE_REGS_THUNK(func) \ | 782 | #define PV_CALLEE_SAVE_REGS_THUNK(func) \ |
783 | extern typeof(func) __raw_callee_save_##func; \ | 783 | extern typeof(func) __raw_callee_save_##func; \ |
784 | static void *__##func##__ __used = func; \ | ||
785 | \ | 784 | \ |
786 | asm(".pushsection .text;" \ | 785 | asm(".pushsection .text;" \ |
786 | ".globl __raw_callee_save_" #func " ; " \ | ||
787 | "__raw_callee_save_" #func ": " \ | 787 | "__raw_callee_save_" #func ": " \ |
788 | PV_SAVE_ALL_CALLER_REGS \ | 788 | PV_SAVE_ALL_CALLER_REGS \ |
789 | "call " #func ";" \ | 789 | "call " #func ";" \ |
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index aab8f671b523..7549b8b369e4 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h | |||
@@ -388,10 +388,11 @@ extern struct pv_lock_ops pv_lock_ops; | |||
388 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") | 388 | _paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]") |
389 | 389 | ||
390 | /* Simple instruction patching code. */ | 390 | /* Simple instruction patching code. */ |
391 | #define DEF_NATIVE(ops, name, code) \ | 391 | #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t" |
392 | extern const char start_##ops##_##name[] __visible, \ | 392 | |
393 | end_##ops##_##name[] __visible; \ | 393 | #define DEF_NATIVE(ops, name, code) \ |
394 | asm("start_" #ops "_" #name ": " code "; end_" #ops "_" #name ":") | 394 | __visible extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
395 | asm(NATIVE_LABEL("start_", ops, name) code NATIVE_LABEL("end_", ops, name)) | ||
395 | 396 | ||
396 | unsigned paravirt_patch_nop(void); | 397 | unsigned paravirt_patch_nop(void); |
397 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); | 398 | unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len); |
diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index 947b5c417e83..96ae4f4040bb 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h | |||
@@ -26,11 +26,6 @@ extern int pci_routeirq; | |||
26 | extern int noioapicquirk; | 26 | extern int noioapicquirk; |
27 | extern int noioapicreroute; | 27 | extern int noioapicreroute; |
28 | 28 | ||
29 | /* scan a bus after allocating a pci_sysdata for it */ | ||
30 | extern struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, | ||
31 | int node); | ||
32 | extern struct pci_bus *pci_scan_bus_with_sysdata(int busno); | ||
33 | |||
34 | #ifdef CONFIG_PCI | 29 | #ifdef CONFIG_PCI |
35 | 30 | ||
36 | #ifdef CONFIG_PCI_DOMAINS | 31 | #ifdef CONFIG_PCI_DOMAINS |
@@ -70,7 +65,7 @@ extern unsigned long pci_mem_start; | |||
70 | 65 | ||
71 | extern int pcibios_enabled; | 66 | extern int pcibios_enabled; |
72 | void pcibios_config_init(void); | 67 | void pcibios_config_init(void); |
73 | struct pci_bus *pcibios_scan_root(int bus); | 68 | void pcibios_scan_root(int bus); |
74 | 69 | ||
75 | void pcibios_set_master(struct pci_dev *dev); | 70 | void pcibios_set_master(struct pci_dev *dev); |
76 | void pcibios_penalize_isa_irq(int irq, int active); | 71 | void pcibios_penalize_isa_irq(int irq, int active); |
@@ -104,7 +99,7 @@ extern void pci_iommu_alloc(void); | |||
104 | struct msi_desc; | 99 | struct msi_desc; |
105 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); | 100 | int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type); |
106 | void native_teardown_msi_irq(unsigned int irq); | 101 | void native_teardown_msi_irq(unsigned int irq); |
107 | void native_restore_msi_irqs(struct pci_dev *dev, int irq); | 102 | void native_restore_msi_irqs(struct pci_dev *dev); |
108 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, | 103 | int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, |
109 | unsigned int irq_base, unsigned int irq_offset); | 104 | unsigned int irq_base, unsigned int irq_offset); |
110 | #else | 105 | #else |
@@ -125,7 +120,6 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, | |||
125 | 120 | ||
126 | /* generic pci stuff */ | 121 | /* generic pci stuff */ |
127 | #include <asm-generic/pci.h> | 122 | #include <asm-generic/pci.h> |
128 | #define PCIBIOS_MAX_MEM_32 0xffffffff | ||
129 | 123 | ||
130 | #ifdef CONFIG_NUMA | 124 | #ifdef CONFIG_NUMA |
131 | /* Returns the node based on pci bus */ | 125 | /* Returns the node based on pci bus */ |
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 94220d14d5cc..851bcdc5db04 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h | |||
@@ -52,7 +52,7 @@ | |||
52 | * Compared to the generic __my_cpu_offset version, the following | 52 | * Compared to the generic __my_cpu_offset version, the following |
53 | * saves one instruction and avoids clobbering a temp register. | 53 | * saves one instruction and avoids clobbering a temp register. |
54 | */ | 54 | */ |
55 | #define __this_cpu_ptr(ptr) \ | 55 | #define raw_cpu_ptr(ptr) \ |
56 | ({ \ | 56 | ({ \ |
57 | unsigned long tcp_ptr__; \ | 57 | unsigned long tcp_ptr__; \ |
58 | __verify_pcpu_ptr(ptr); \ | 58 | __verify_pcpu_ptr(ptr); \ |
@@ -362,25 +362,25 @@ do { \ | |||
362 | */ | 362 | */ |
363 | #define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) | 363 | #define this_cpu_read_stable(var) percpu_from_op("mov", var, "p" (&(var))) |
364 | 364 | ||
365 | #define __this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 365 | #define raw_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
366 | #define __this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 366 | #define raw_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
367 | #define __this_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 367 | #define raw_cpu_read_4(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
368 | 368 | ||
369 | #define __this_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) | 369 | #define raw_cpu_write_1(pcp, val) percpu_to_op("mov", (pcp), val) |
370 | #define __this_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) | 370 | #define raw_cpu_write_2(pcp, val) percpu_to_op("mov", (pcp), val) |
371 | #define __this_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) | 371 | #define raw_cpu_write_4(pcp, val) percpu_to_op("mov", (pcp), val) |
372 | #define __this_cpu_add_1(pcp, val) percpu_add_op((pcp), val) | 372 | #define raw_cpu_add_1(pcp, val) percpu_add_op((pcp), val) |
373 | #define __this_cpu_add_2(pcp, val) percpu_add_op((pcp), val) | 373 | #define raw_cpu_add_2(pcp, val) percpu_add_op((pcp), val) |
374 | #define __this_cpu_add_4(pcp, val) percpu_add_op((pcp), val) | 374 | #define raw_cpu_add_4(pcp, val) percpu_add_op((pcp), val) |
375 | #define __this_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) | 375 | #define raw_cpu_and_1(pcp, val) percpu_to_op("and", (pcp), val) |
376 | #define __this_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) | 376 | #define raw_cpu_and_2(pcp, val) percpu_to_op("and", (pcp), val) |
377 | #define __this_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) | 377 | #define raw_cpu_and_4(pcp, val) percpu_to_op("and", (pcp), val) |
378 | #define __this_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) | 378 | #define raw_cpu_or_1(pcp, val) percpu_to_op("or", (pcp), val) |
379 | #define __this_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) | 379 | #define raw_cpu_or_2(pcp, val) percpu_to_op("or", (pcp), val) |
380 | #define __this_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) | 380 | #define raw_cpu_or_4(pcp, val) percpu_to_op("or", (pcp), val) |
381 | #define __this_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) | 381 | #define raw_cpu_xchg_1(pcp, val) percpu_xchg_op(pcp, val) |
382 | #define __this_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) | 382 | #define raw_cpu_xchg_2(pcp, val) percpu_xchg_op(pcp, val) |
383 | #define __this_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) | 383 | #define raw_cpu_xchg_4(pcp, val) percpu_xchg_op(pcp, val) |
384 | 384 | ||
385 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 385 | #define this_cpu_read_1(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
386 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 386 | #define this_cpu_read_2(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
@@ -401,16 +401,16 @@ do { \ | |||
401 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) | 401 | #define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(pcp, nval) |
402 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) | 402 | #define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(pcp, nval) |
403 | 403 | ||
404 | #define __this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 404 | #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
405 | #define __this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | 405 | #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) |
406 | #define __this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | 406 | #define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) |
407 | #define __this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 407 | #define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
408 | #define __this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 408 | #define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
409 | #define __this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 409 | #define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
410 | 410 | ||
411 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) | 411 | #define this_cpu_add_return_1(pcp, val) percpu_add_return_op(pcp, val) |
412 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) | 412 | #define this_cpu_add_return_2(pcp, val) percpu_add_return_op(pcp, val) |
413 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) | 413 | #define this_cpu_add_return_4(pcp, val) percpu_add_return_op(pcp, val) |
414 | #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 414 | #define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
415 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 415 | #define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
416 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 416 | #define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
@@ -427,7 +427,7 @@ do { \ | |||
427 | __ret; \ | 427 | __ret; \ |
428 | }) | 428 | }) |
429 | 429 | ||
430 | #define __this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double | 430 | #define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
431 | #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double | 431 | #define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double |
432 | #endif /* CONFIG_X86_CMPXCHG64 */ | 432 | #endif /* CONFIG_X86_CMPXCHG64 */ |
433 | 433 | ||
@@ -436,22 +436,22 @@ do { \ | |||
436 | * 32 bit must fall back to generic operations. | 436 | * 32 bit must fall back to generic operations. |
437 | */ | 437 | */ |
438 | #ifdef CONFIG_X86_64 | 438 | #ifdef CONFIG_X86_64 |
439 | #define __this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 439 | #define raw_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
440 | #define __this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 440 | #define raw_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
441 | #define __this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) | 441 | #define raw_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
442 | #define __this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 442 | #define raw_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
443 | #define __this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 443 | #define raw_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
444 | #define __this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | 444 | #define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) |
445 | #define __this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 445 | #define raw_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
446 | #define __this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 446 | #define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
447 | 447 | ||
448 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) | 448 | #define this_cpu_read_8(pcp) percpu_from_op("mov", (pcp), "m"(pcp)) |
449 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) | 449 | #define this_cpu_write_8(pcp, val) percpu_to_op("mov", (pcp), val) |
450 | #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) | 450 | #define this_cpu_add_8(pcp, val) percpu_add_op((pcp), val) |
451 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) | 451 | #define this_cpu_and_8(pcp, val) percpu_to_op("and", (pcp), val) |
452 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) | 452 | #define this_cpu_or_8(pcp, val) percpu_to_op("or", (pcp), val) |
453 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) | 453 | #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(pcp, val) |
454 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) | 454 | #define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(pcp, nval) |
455 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) | 455 | #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(pcp, oval, nval) |
456 | 456 | ||
457 | /* | 457 | /* |
@@ -474,7 +474,7 @@ do { \ | |||
474 | __ret; \ | 474 | __ret; \ |
475 | }) | 475 | }) |
476 | 476 | ||
477 | #define __this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double | 477 | #define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
478 | #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double | 478 | #define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double |
479 | 479 | ||
480 | #endif | 480 | #endif |
@@ -495,9 +495,9 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, | |||
495 | unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; | 495 | unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; |
496 | 496 | ||
497 | #ifdef CONFIG_X86_64 | 497 | #ifdef CONFIG_X86_64 |
498 | return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_8(*a)) != 0; | 498 | return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; |
499 | #else | 499 | #else |
500 | return ((1UL << (nr % BITS_PER_LONG)) & __this_cpu_read_4(*a)) != 0; | 500 | return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; |
501 | #endif | 501 | #endif |
502 | } | 502 | } |
503 | 503 | ||
diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index 3bf2dd0cf61f..0d193e234647 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h | |||
@@ -55,6 +55,13 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) | 55 | #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | /* Bit manipulation helper on pte/pgoff entry */ | ||
59 | static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift, | ||
60 | unsigned long mask, unsigned int leftshift) | ||
61 | { | ||
62 | return ((value >> rightshift) & mask) << leftshift; | ||
63 | } | ||
64 | |||
58 | #ifdef CONFIG_MEM_SOFT_DIRTY | 65 | #ifdef CONFIG_MEM_SOFT_DIRTY |
59 | 66 | ||
60 | /* | 67 | /* |
@@ -71,31 +78,34 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
71 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) | 78 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) |
72 | #define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) | 79 | #define PTE_FILE_BITS3 (PTE_FILE_SHIFT4 - PTE_FILE_SHIFT3 - 1) |
73 | 80 | ||
74 | #define pte_to_pgoff(pte) \ | 81 | #define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1) |
75 | ((((pte).pte_low >> (PTE_FILE_SHIFT1)) \ | 82 | #define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1) |
76 | & ((1U << PTE_FILE_BITS1) - 1))) \ | 83 | #define PTE_FILE_MASK3 ((1U << PTE_FILE_BITS3) - 1) |
77 | + ((((pte).pte_low >> (PTE_FILE_SHIFT2)) \ | 84 | |
78 | & ((1U << PTE_FILE_BITS2) - 1)) \ | 85 | #define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1) |
79 | << (PTE_FILE_BITS1)) \ | 86 | #define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2) |
80 | + ((((pte).pte_low >> (PTE_FILE_SHIFT3)) \ | 87 | #define PTE_FILE_LSHIFT4 (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3) |
81 | & ((1U << PTE_FILE_BITS3) - 1)) \ | 88 | |
82 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | 89 | static __always_inline pgoff_t pte_to_pgoff(pte_t pte) |
83 | + ((((pte).pte_low >> (PTE_FILE_SHIFT4))) \ | 90 | { |
84 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3)) | 91 | return (pgoff_t) |
85 | 92 | (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + | |
86 | #define pgoff_to_pte(off) \ | 93 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + |
87 | ((pte_t) { .pte_low = \ | 94 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, PTE_FILE_MASK3, PTE_FILE_LSHIFT3) + |
88 | ((((off)) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ | 95 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT4, -1UL, PTE_FILE_LSHIFT4)); |
89 | + ((((off) >> PTE_FILE_BITS1) \ | 96 | } |
90 | & ((1U << PTE_FILE_BITS2) - 1)) \ | 97 | |
91 | << PTE_FILE_SHIFT2) \ | 98 | static __always_inline pte_t pgoff_to_pte(pgoff_t off) |
92 | + ((((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | 99 | { |
93 | & ((1U << PTE_FILE_BITS3) - 1)) \ | 100 | return (pte_t){ |
94 | << PTE_FILE_SHIFT3) \ | 101 | .pte_low = |
95 | + ((((off) >> \ | 102 | pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + |
96 | (PTE_FILE_BITS1 + PTE_FILE_BITS2 + PTE_FILE_BITS3))) \ | 103 | pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + |
97 | << PTE_FILE_SHIFT4) \ | 104 | pte_bitop(off, PTE_FILE_LSHIFT3, PTE_FILE_MASK3, PTE_FILE_SHIFT3) + |
98 | + _PAGE_FILE }) | 105 | pte_bitop(off, PTE_FILE_LSHIFT4, -1UL, PTE_FILE_SHIFT4) + |
106 | _PAGE_FILE, | ||
107 | }; | ||
108 | } | ||
99 | 109 | ||
100 | #else /* CONFIG_MEM_SOFT_DIRTY */ | 110 | #else /* CONFIG_MEM_SOFT_DIRTY */ |
101 | 111 | ||
@@ -115,22 +125,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) | |||
115 | #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) | 125 | #define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) |
116 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) | 126 | #define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) |
117 | 127 | ||
118 | #define pte_to_pgoff(pte) \ | 128 | #define PTE_FILE_MASK1 ((1U << PTE_FILE_BITS1) - 1) |
119 | ((((pte).pte_low >> PTE_FILE_SHIFT1) \ | 129 | #define PTE_FILE_MASK2 ((1U << PTE_FILE_BITS2) - 1) |
120 | & ((1U << PTE_FILE_BITS1) - 1)) \ | 130 | |
121 | + ((((pte).pte_low >> PTE_FILE_SHIFT2) \ | 131 | #define PTE_FILE_LSHIFT2 (PTE_FILE_BITS1) |
122 | & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \ | 132 | #define PTE_FILE_LSHIFT3 (PTE_FILE_BITS1 + PTE_FILE_BITS2) |
123 | + (((pte).pte_low >> PTE_FILE_SHIFT3) \ | 133 | |
124 | << (PTE_FILE_BITS1 + PTE_FILE_BITS2))) | 134 | static __always_inline pgoff_t pte_to_pgoff(pte_t pte) |
125 | 135 | { | |
126 | #define pgoff_to_pte(off) \ | 136 | return (pgoff_t) |
127 | ((pte_t) { .pte_low = \ | 137 | (pte_bitop(pte.pte_low, PTE_FILE_SHIFT1, PTE_FILE_MASK1, 0) + |
128 | (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ | 138 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT2, PTE_FILE_MASK2, PTE_FILE_LSHIFT2) + |
129 | + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \ | 139 | pte_bitop(pte.pte_low, PTE_FILE_SHIFT3, -1UL, PTE_FILE_LSHIFT3)); |
130 | << PTE_FILE_SHIFT2) \ | 140 | } |
131 | + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ | 141 | |
132 | << PTE_FILE_SHIFT3) \ | 142 | static __always_inline pte_t pgoff_to_pte(pgoff_t off) |
133 | + _PAGE_FILE }) | 143 | { |
144 | return (pte_t){ | ||
145 | .pte_low = | ||
146 | pte_bitop(off, 0, PTE_FILE_MASK1, PTE_FILE_SHIFT1) + | ||
147 | pte_bitop(off, PTE_FILE_LSHIFT2, PTE_FILE_MASK2, PTE_FILE_SHIFT2) + | ||
148 | pte_bitop(off, PTE_FILE_LSHIFT3, -1UL, PTE_FILE_SHIFT3) + | ||
149 | _PAGE_FILE, | ||
150 | }; | ||
151 | } | ||
134 | 152 | ||
135 | #endif /* CONFIG_MEM_SOFT_DIRTY */ | 153 | #endif /* CONFIG_MEM_SOFT_DIRTY */ |
136 | 154 | ||
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index bbc8b12fa443..b459ddf27d64 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h | |||
@@ -15,9 +15,10 @@ | |||
15 | : (prot)) | 15 | : (prot)) |
16 | 16 | ||
17 | #ifndef __ASSEMBLY__ | 17 | #ifndef __ASSEMBLY__ |
18 | |||
19 | #include <asm/x86_init.h> | 18 | #include <asm/x86_init.h> |
20 | 19 | ||
20 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd); | ||
21 | |||
21 | /* | 22 | /* |
22 | * ZERO_PAGE is a global shared page that is always zero: used | 23 | * ZERO_PAGE is a global shared page that is always zero: used |
23 | * for zero-mapped memory areas etc.. | 24 | * for zero-mapped memory areas etc.. |
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 2d883440cb9a..c883bf726398 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h | |||
@@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t; | |||
58 | #define VMALLOC_START _AC(0xffffc90000000000, UL) | 58 | #define VMALLOC_START _AC(0xffffc90000000000, UL) |
59 | #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) | 59 | #define VMALLOC_END _AC(0xffffe8ffffffffff, UL) |
60 | #define VMEMMAP_START _AC(0xffffea0000000000, UL) | 60 | #define VMEMMAP_START _AC(0xffffea0000000000, UL) |
61 | #define MODULES_VADDR _AC(0xffffffffa0000000, UL) | 61 | #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) |
62 | #define MODULES_END _AC(0xffffffffff000000, UL) | 62 | #define MODULES_END _AC(0xffffffffff000000, UL) |
63 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) | 63 | #define MODULES_LEN (MODULES_END - MODULES_VADDR) |
64 | 64 | ||
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 0ecac257fb26..eb3d44945133 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h | |||
@@ -121,7 +121,8 @@ | |||
121 | 121 | ||
122 | /* Set of bits not changed in pte_modify */ | 122 | /* Set of bits not changed in pte_modify */ |
123 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ | 123 | #define _PAGE_CHG_MASK (PTE_PFN_MASK | _PAGE_PCD | _PAGE_PWT | \ |
124 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY) | 124 | _PAGE_SPECIAL | _PAGE_ACCESSED | _PAGE_DIRTY | \ |
125 | _PAGE_SOFT_DIRTY) | ||
125 | #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) | 126 | #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE) |
126 | 127 | ||
127 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) | 128 | #define _PAGE_CACHE_MASK (_PAGE_PCD | _PAGE_PWT) |
@@ -213,13 +214,8 @@ | |||
213 | #ifdef CONFIG_X86_64 | 214 | #ifdef CONFIG_X86_64 |
214 | #define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC | 215 | #define __PAGE_KERNEL_IDENT_LARGE_EXEC __PAGE_KERNEL_LARGE_EXEC |
215 | #else | 216 | #else |
216 | /* | ||
217 | * For PDE_IDENT_ATTR include USER bit. As the PDE and PTE protection | ||
218 | * bits are combined, this will alow user to access the high address mapped | ||
219 | * VDSO in the presence of CONFIG_COMPAT_VDSO | ||
220 | */ | ||
221 | #define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ | 217 | #define PTE_IDENT_ATTR 0x003 /* PRESENT+RW */ |
222 | #define PDE_IDENT_ATTR 0x067 /* PRESENT+RW+USER+DIRTY+ACCESSED */ | 218 | #define PDE_IDENT_ATTR 0x063 /* PRESENT+RW+DIRTY+ACCESSED */ |
223 | #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ | 219 | #define PGD_IDENT_ATTR 0x001 /* PRESENT (no other attributes) */ |
224 | #endif | 220 | #endif |
225 | 221 | ||
@@ -381,8 +377,13 @@ static inline void update_page_count(int level, unsigned long pages) { } | |||
381 | * as a pte too. | 377 | * as a pte too. |
382 | */ | 378 | */ |
383 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); | 379 | extern pte_t *lookup_address(unsigned long address, unsigned int *level); |
380 | extern pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, | ||
381 | unsigned int *level); | ||
384 | extern phys_addr_t slow_virt_to_phys(void *__address); | 382 | extern phys_addr_t slow_virt_to_phys(void *__address); |
385 | 383 | extern int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | |
384 | unsigned numpages, unsigned long page_flags); | ||
385 | void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, | ||
386 | unsigned numpages); | ||
386 | #endif /* !__ASSEMBLY__ */ | 387 | #endif /* !__ASSEMBLY__ */ |
387 | 388 | ||
388 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ | 389 | #endif /* _ASM_X86_PGTABLE_DEFS_H */ |
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index c8b051933b1b..7024c12f7bfe 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h | |||
@@ -19,12 +19,12 @@ DECLARE_PER_CPU(int, __preempt_count); | |||
19 | */ | 19 | */ |
20 | static __always_inline int preempt_count(void) | 20 | static __always_inline int preempt_count(void) |
21 | { | 21 | { |
22 | return __this_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; | 22 | return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; |
23 | } | 23 | } |
24 | 24 | ||
25 | static __always_inline void preempt_count_set(int pc) | 25 | static __always_inline void preempt_count_set(int pc) |
26 | { | 26 | { |
27 | __this_cpu_write_4(__preempt_count, pc); | 27 | raw_cpu_write_4(__preempt_count, pc); |
28 | } | 28 | } |
29 | 29 | ||
30 | /* | 30 | /* |
@@ -53,17 +53,17 @@ static __always_inline void preempt_count_set(int pc) | |||
53 | 53 | ||
54 | static __always_inline void set_preempt_need_resched(void) | 54 | static __always_inline void set_preempt_need_resched(void) |
55 | { | 55 | { |
56 | __this_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); | 56 | raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); |
57 | } | 57 | } |
58 | 58 | ||
59 | static __always_inline void clear_preempt_need_resched(void) | 59 | static __always_inline void clear_preempt_need_resched(void) |
60 | { | 60 | { |
61 | __this_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); | 61 | raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); |
62 | } | 62 | } |
63 | 63 | ||
64 | static __always_inline bool test_preempt_need_resched(void) | 64 | static __always_inline bool test_preempt_need_resched(void) |
65 | { | 65 | { |
66 | return !(__this_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); | 66 | return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); |
67 | } | 67 | } |
68 | 68 | ||
69 | /* | 69 | /* |
@@ -72,12 +72,12 @@ static __always_inline bool test_preempt_need_resched(void) | |||
72 | 72 | ||
73 | static __always_inline void __preempt_count_add(int val) | 73 | static __always_inline void __preempt_count_add(int val) |
74 | { | 74 | { |
75 | __this_cpu_add_4(__preempt_count, val); | 75 | raw_cpu_add_4(__preempt_count, val); |
76 | } | 76 | } |
77 | 77 | ||
78 | static __always_inline void __preempt_count_sub(int val) | 78 | static __always_inline void __preempt_count_sub(int val) |
79 | { | 79 | { |
80 | __this_cpu_add_4(__preempt_count, -val); | 80 | raw_cpu_add_4(__preempt_count, -val); |
81 | } | 81 | } |
82 | 82 | ||
83 | /* | 83 | /* |
@@ -95,7 +95,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) | |||
95 | */ | 95 | */ |
96 | static __always_inline bool should_resched(void) | 96 | static __always_inline bool should_resched(void) |
97 | { | 97 | { |
98 | return unlikely(!__this_cpu_read_4(__preempt_count)); | 98 | return unlikely(!raw_cpu_read_4(__preempt_count)); |
99 | } | 99 | } |
100 | 100 | ||
101 | #ifdef CONFIG_PREEMPT | 101 | #ifdef CONFIG_PREEMPT |
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 7b034a4057f9..a4ea02351f4d 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h | |||
@@ -27,7 +27,6 @@ struct mm_struct; | |||
27 | #include <linux/cache.h> | 27 | #include <linux/cache.h> |
28 | #include <linux/threads.h> | 28 | #include <linux/threads.h> |
29 | #include <linux/math64.h> | 29 | #include <linux/math64.h> |
30 | #include <linux/init.h> | ||
31 | #include <linux/err.h> | 30 | #include <linux/err.h> |
32 | #include <linux/irqflags.h> | 31 | #include <linux/irqflags.h> |
33 | 32 | ||
@@ -72,6 +71,7 @@ extern u16 __read_mostly tlb_lli_4m[NR_INFO]; | |||
72 | extern u16 __read_mostly tlb_lld_4k[NR_INFO]; | 71 | extern u16 __read_mostly tlb_lld_4k[NR_INFO]; |
73 | extern u16 __read_mostly tlb_lld_2m[NR_INFO]; | 72 | extern u16 __read_mostly tlb_lld_2m[NR_INFO]; |
74 | extern u16 __read_mostly tlb_lld_4m[NR_INFO]; | 73 | extern u16 __read_mostly tlb_lld_4m[NR_INFO]; |
74 | extern u16 __read_mostly tlb_lld_1g[NR_INFO]; | ||
75 | extern s8 __read_mostly tlb_flushall_shift; | 75 | extern s8 __read_mostly tlb_flushall_shift; |
76 | 76 | ||
77 | /* | 77 | /* |
@@ -370,6 +370,20 @@ struct ymmh_struct { | |||
370 | u32 ymmh_space[64]; | 370 | u32 ymmh_space[64]; |
371 | }; | 371 | }; |
372 | 372 | ||
373 | /* We don't support LWP yet: */ | ||
374 | struct lwp_struct { | ||
375 | u8 reserved[128]; | ||
376 | }; | ||
377 | |||
378 | struct bndregs_struct { | ||
379 | u64 bndregs[8]; | ||
380 | } __packed; | ||
381 | |||
382 | struct bndcsr_struct { | ||
383 | u64 cfg_reg_u; | ||
384 | u64 status_reg; | ||
385 | } __packed; | ||
386 | |||
373 | struct xsave_hdr_struct { | 387 | struct xsave_hdr_struct { |
374 | u64 xstate_bv; | 388 | u64 xstate_bv; |
375 | u64 reserved1[2]; | 389 | u64 reserved1[2]; |
@@ -380,6 +394,9 @@ struct xsave_struct { | |||
380 | struct i387_fxsave_struct i387; | 394 | struct i387_fxsave_struct i387; |
381 | struct xsave_hdr_struct xsave_hdr; | 395 | struct xsave_hdr_struct xsave_hdr; |
382 | struct ymmh_struct ymmh; | 396 | struct ymmh_struct ymmh; |
397 | struct lwp_struct lwp; | ||
398 | struct bndregs_struct bndregs; | ||
399 | struct bndcsr_struct bndcsr; | ||
383 | /* new processor state extensions will go here */ | 400 | /* new processor state extensions will go here */ |
384 | } __attribute__ ((packed, aligned (64))); | 401 | } __attribute__ ((packed, aligned (64))); |
385 | 402 | ||
@@ -432,6 +449,15 @@ struct stack_canary { | |||
432 | }; | 449 | }; |
433 | DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); | 450 | DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
434 | #endif | 451 | #endif |
452 | /* | ||
453 | * per-CPU IRQ handling stacks | ||
454 | */ | ||
455 | struct irq_stack { | ||
456 | u32 stack[THREAD_SIZE/sizeof(u32)]; | ||
457 | } __aligned(THREAD_SIZE); | ||
458 | |||
459 | DECLARE_PER_CPU(struct irq_stack *, hardirq_stack); | ||
460 | DECLARE_PER_CPU(struct irq_stack *, softirq_stack); | ||
435 | #endif /* X86_64 */ | 461 | #endif /* X86_64 */ |
436 | 462 | ||
437 | extern unsigned int xstate_size; | 463 | extern unsigned int xstate_size; |
@@ -700,29 +726,6 @@ static inline void sync_core(void) | |||
700 | #endif | 726 | #endif |
701 | } | 727 | } |
702 | 728 | ||
703 | static inline void __monitor(const void *eax, unsigned long ecx, | ||
704 | unsigned long edx) | ||
705 | { | ||
706 | /* "monitor %eax, %ecx, %edx;" */ | ||
707 | asm volatile(".byte 0x0f, 0x01, 0xc8;" | ||
708 | :: "a" (eax), "c" (ecx), "d"(edx)); | ||
709 | } | ||
710 | |||
711 | static inline void __mwait(unsigned long eax, unsigned long ecx) | ||
712 | { | ||
713 | /* "mwait %eax, %ecx;" */ | ||
714 | asm volatile(".byte 0x0f, 0x01, 0xc9;" | ||
715 | :: "a" (eax), "c" (ecx)); | ||
716 | } | ||
717 | |||
718 | static inline void __sti_mwait(unsigned long eax, unsigned long ecx) | ||
719 | { | ||
720 | trace_hardirqs_on(); | ||
721 | /* "mwait %eax, %ecx;" */ | ||
722 | asm volatile("sti; .byte 0x0f, 0x01, 0xc9;" | ||
723 | :: "a" (eax), "c" (ecx)); | ||
724 | } | ||
725 | |||
726 | extern void select_idle_routine(const struct cpuinfo_x86 *c); | 729 | extern void select_idle_routine(const struct cpuinfo_x86 *c); |
727 | extern void init_amd_e400_c1e_mask(void); | 730 | extern void init_amd_e400_c1e_mask(void); |
728 | 731 | ||
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 942a08623a1a..14fd6fd75a19 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h | |||
@@ -60,7 +60,6 @@ struct pt_regs { | |||
60 | 60 | ||
61 | #endif /* !__i386__ */ | 61 | #endif /* !__i386__ */ |
62 | 62 | ||
63 | #include <linux/init.h> | ||
64 | #ifdef CONFIG_PARAVIRT | 63 | #ifdef CONFIG_PARAVIRT |
65 | #include <asm/paravirt_types.h> | 64 | #include <asm/paravirt_types.h> |
66 | #endif | 65 | #endif |
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index 59bcf4e22418..9264f04a4c55 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h | |||
@@ -3,7 +3,6 @@ | |||
3 | 3 | ||
4 | #include <uapi/asm/setup.h> | 4 | #include <uapi/asm/setup.h> |
5 | 5 | ||
6 | |||
7 | #define COMMAND_LINE_SIZE 2048 | 6 | #define COMMAND_LINE_SIZE 2048 |
8 | 7 | ||
9 | #include <linux/linkage.h> | 8 | #include <linux/linkage.h> |
@@ -29,6 +28,8 @@ | |||
29 | #include <asm/bootparam.h> | 28 | #include <asm/bootparam.h> |
30 | #include <asm/x86_init.h> | 29 | #include <asm/x86_init.h> |
31 | 30 | ||
31 | extern u64 relocated_ramdisk; | ||
32 | |||
32 | /* Interrupt control for vSMPowered x86_64 systems */ | 33 | /* Interrupt control for vSMPowered x86_64 systems */ |
33 | #ifdef CONFIG_X86_64 | 34 | #ifdef CONFIG_X86_64 |
34 | void vsmp_init(void); | 35 | void vsmp_init(void); |
@@ -38,12 +39,6 @@ static inline void vsmp_init(void) { } | |||
38 | 39 | ||
39 | void setup_bios_corruption_check(void); | 40 | void setup_bios_corruption_check(void); |
40 | 41 | ||
41 | #ifdef CONFIG_X86_VISWS | ||
42 | extern void visws_early_detect(void); | ||
43 | #else | ||
44 | static inline void visws_early_detect(void) { } | ||
45 | #endif | ||
46 | |||
47 | extern unsigned long saved_video_mode; | 42 | extern unsigned long saved_video_mode; |
48 | 43 | ||
49 | extern void reserve_standard_io_resources(void); | 44 | extern void reserve_standard_io_resources(void); |
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 4137890e88e3..8cd27e08e23c 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _ASM_X86_SMP_H | 2 | #define _ASM_X86_SMP_H |
3 | #ifndef __ASSEMBLY__ | 3 | #ifndef __ASSEMBLY__ |
4 | #include <linux/cpumask.h> | 4 | #include <linux/cpumask.h> |
5 | #include <linux/init.h> | ||
6 | #include <asm/percpu.h> | 5 | #include <asm/percpu.h> |
7 | 6 | ||
8 | /* | 7 | /* |
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index 645cad2c95ff..e820c080a4e9 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h | |||
@@ -191,6 +191,14 @@ static inline void clflush(volatile void *__p) | |||
191 | asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); | 191 | asm volatile("clflush %0" : "+m" (*(volatile char __force *)__p)); |
192 | } | 192 | } |
193 | 193 | ||
194 | static inline void clflushopt(volatile void *__p) | ||
195 | { | ||
196 | alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0", | ||
197 | ".byte 0x66; clflush %P0", | ||
198 | X86_FEATURE_CLFLUSHOPT, | ||
199 | "+m" (*(volatile char __force *)__p)); | ||
200 | } | ||
201 | |||
194 | #define nop() asm volatile ("nop") | 202 | #define nop() asm volatile ("nop") |
195 | 203 | ||
196 | 204 | ||
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index bf156ded74b5..0f62f5482d91 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -26,10 +26,9 @@ | |||
26 | # define LOCK_PTR_REG "D" | 26 | # define LOCK_PTR_REG "D" |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | #if defined(CONFIG_X86_32) && \ | 29 | #if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE)) |
30 | (defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)) | ||
31 | /* | 30 | /* |
32 | * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock | 31 | * On PPro SMP, we use a locked operation to unlock |
33 | * (PPro errata 66, 92) | 32 | * (PPro errata 66, 92) |
34 | */ | 33 | */ |
35 | # define UNLOCK_LOCK_PREFIX LOCK_PREFIX | 34 | # define UNLOCK_LOCK_PREFIX LOCK_PREFIX |
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 3ba3de457d05..47e5de25ba79 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h | |||
@@ -9,6 +9,7 @@ | |||
9 | 9 | ||
10 | #include <linux/compiler.h> | 10 | #include <linux/compiler.h> |
11 | #include <asm/page.h> | 11 | #include <asm/page.h> |
12 | #include <asm/percpu.h> | ||
12 | #include <asm/types.h> | 13 | #include <asm/types.h> |
13 | 14 | ||
14 | /* | 15 | /* |
@@ -32,12 +33,6 @@ struct thread_info { | |||
32 | mm_segment_t addr_limit; | 33 | mm_segment_t addr_limit; |
33 | struct restart_block restart_block; | 34 | struct restart_block restart_block; |
34 | void __user *sysenter_return; | 35 | void __user *sysenter_return; |
35 | #ifdef CONFIG_X86_32 | ||
36 | unsigned long previous_esp; /* ESP of the previous stack in | ||
37 | case of nested (IRQ) stacks | ||
38 | */ | ||
39 | __u8 supervisor_stack[0]; | ||
40 | #endif | ||
41 | unsigned int sig_on_uaccess_error:1; | 36 | unsigned int sig_on_uaccess_error:1; |
42 | unsigned int uaccess_err:1; /* uaccess failed */ | 37 | unsigned int uaccess_err:1; /* uaccess failed */ |
43 | }; | 38 | }; |
@@ -153,9 +148,9 @@ struct thread_info { | |||
153 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) | 148 | #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) |
154 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) | 149 | #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) |
155 | 150 | ||
156 | #ifdef CONFIG_X86_32 | 151 | #define STACK_WARN (THREAD_SIZE/8) |
152 | #define KERNEL_STACK_OFFSET (5*(BITS_PER_LONG/8)) | ||
157 | 153 | ||
158 | #define STACK_WARN (THREAD_SIZE/8) | ||
159 | /* | 154 | /* |
160 | * macros/functions for gaining access to the thread information structure | 155 | * macros/functions for gaining access to the thread information structure |
161 | * | 156 | * |
@@ -163,40 +158,6 @@ struct thread_info { | |||
163 | */ | 158 | */ |
164 | #ifndef __ASSEMBLY__ | 159 | #ifndef __ASSEMBLY__ |
165 | 160 | ||
166 | |||
167 | /* how to get the current stack pointer from C */ | ||
168 | register unsigned long current_stack_pointer asm("esp") __used; | ||
169 | |||
170 | /* how to get the thread information struct from C */ | ||
171 | static inline struct thread_info *current_thread_info(void) | ||
172 | { | ||
173 | return (struct thread_info *) | ||
174 | (current_stack_pointer & ~(THREAD_SIZE - 1)); | ||
175 | } | ||
176 | |||
177 | #else /* !__ASSEMBLY__ */ | ||
178 | |||
179 | /* how to get the thread information struct from ASM */ | ||
180 | #define GET_THREAD_INFO(reg) \ | ||
181 | movl $-THREAD_SIZE, reg; \ | ||
182 | andl %esp, reg | ||
183 | |||
184 | /* use this one if reg already contains %esp */ | ||
185 | #define GET_THREAD_INFO_WITH_ESP(reg) \ | ||
186 | andl $-THREAD_SIZE, reg | ||
187 | |||
188 | #endif | ||
189 | |||
190 | #else /* X86_32 */ | ||
191 | |||
192 | #include <asm/percpu.h> | ||
193 | #define KERNEL_STACK_OFFSET (5*8) | ||
194 | |||
195 | /* | ||
196 | * macros/functions for gaining access to the thread information structure | ||
197 | * preempt_count needs to be 1 initially, until the scheduler is functional. | ||
198 | */ | ||
199 | #ifndef __ASSEMBLY__ | ||
200 | DECLARE_PER_CPU(unsigned long, kernel_stack); | 161 | DECLARE_PER_CPU(unsigned long, kernel_stack); |
201 | 162 | ||
202 | static inline struct thread_info *current_thread_info(void) | 163 | static inline struct thread_info *current_thread_info(void) |
@@ -211,8 +172,8 @@ static inline struct thread_info *current_thread_info(void) | |||
211 | 172 | ||
212 | /* how to get the thread information struct from ASM */ | 173 | /* how to get the thread information struct from ASM */ |
213 | #define GET_THREAD_INFO(reg) \ | 174 | #define GET_THREAD_INFO(reg) \ |
214 | movq PER_CPU_VAR(kernel_stack),reg ; \ | 175 | _ASM_MOV PER_CPU_VAR(kernel_stack),reg ; \ |
215 | subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg | 176 | _ASM_SUB $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg ; |
216 | 177 | ||
217 | /* | 178 | /* |
218 | * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in | 179 | * Same if PER_CPU_VAR(kernel_stack) is, perhaps with some offset, already in |
@@ -222,8 +183,6 @@ static inline struct thread_info *current_thread_info(void) | |||
222 | 183 | ||
223 | #endif | 184 | #endif |
224 | 185 | ||
225 | #endif /* !X86_32 */ | ||
226 | |||
227 | /* | 186 | /* |
228 | * Thread-synchronous status. | 187 | * Thread-synchronous status. |
229 | * | 188 | * |
diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index 34baa0eb5d0c..a04eabd43d06 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h | |||
@@ -1,9 +1,9 @@ | |||
1 | #ifndef _ASM_X86_TIMER_H | 1 | #ifndef _ASM_X86_TIMER_H |
2 | #define _ASM_X86_TIMER_H | 2 | #define _ASM_X86_TIMER_H |
3 | #include <linux/init.h> | ||
4 | #include <linux/pm.h> | 3 | #include <linux/pm.h> |
5 | #include <linux/percpu.h> | 4 | #include <linux/percpu.h> |
6 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/math64.h> | ||
7 | 7 | ||
8 | #define TICK_SIZE (tick_nsec / 1000) | 8 | #define TICK_SIZE (tick_nsec / 1000) |
9 | 9 | ||
@@ -12,68 +12,26 @@ extern int recalibrate_cpu_khz(void); | |||
12 | 12 | ||
13 | extern int no_timer_check; | 13 | extern int no_timer_check; |
14 | 14 | ||
15 | /* Accelerators for sched_clock() | 15 | /* |
16 | * convert from cycles(64bits) => nanoseconds (64bits) | 16 | * We use the full linear equation: f(x) = a + b*x, in order to allow |
17 | * basic equation: | 17 | * a continuous function in the face of dynamic freq changes. |
18 | * ns = cycles / (freq / ns_per_sec) | ||
19 | * ns = cycles * (ns_per_sec / freq) | ||
20 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
21 | * ns = cycles * (10^6 / cpu_khz) | ||
22 | * | 18 | * |
23 | * Then we use scaling math (suggested by george@mvista.com) to get: | 19 | * Continuity means that when our frequency changes our slope (b); we want to |
24 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | 20 | * ensure that: f(t) == f'(t), which gives: a + b*t == a' + b'*t. |
25 | * ns = cycles * cyc2ns_scale / SC | ||
26 | * | 21 | * |
27 | * And since SC is a constant power of two, we can convert the div | 22 | * Without an offset (a) the above would not be possible. |
28 | * into a shift. | ||
29 | * | 23 | * |
30 | * We can use khz divisor instead of mhz to keep a better precision, since | 24 | * See the comment near cycles_2_ns() for details on how we compute (b). |
31 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
32 | * (mathieu.desnoyers@polymtl.ca) | ||
33 | * | ||
34 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
35 | * | ||
36 | * In: | ||
37 | * | ||
38 | * ns = cycles * cyc2ns_scale / SC | ||
39 | * | ||
40 | * Although we may still have enough bits to store the value of ns, | ||
41 | * in some cases, we may not have enough bits to store cycles * cyc2ns_scale, | ||
42 | * leading to an incorrect result. | ||
43 | * | ||
44 | * To avoid this, we can decompose 'cycles' into quotient and remainder | ||
45 | * of division by SC. Then, | ||
46 | * | ||
47 | * ns = (quot * SC + rem) * cyc2ns_scale / SC | ||
48 | * = quot * cyc2ns_scale + (rem * cyc2ns_scale) / SC | ||
49 | * | ||
50 | * - sqazi@google.com | ||
51 | */ | 25 | */ |
52 | 26 | struct cyc2ns_data { | |
53 | DECLARE_PER_CPU(unsigned long, cyc2ns); | 27 | u32 cyc2ns_mul; |
54 | DECLARE_PER_CPU(unsigned long long, cyc2ns_offset); | 28 | u32 cyc2ns_shift; |
55 | 29 | u64 cyc2ns_offset; | |
56 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | 30 | u32 __count; |
57 | 31 | /* u32 hole */ | |
58 | static inline unsigned long long __cycles_2_ns(unsigned long long cyc) | 32 | }; /* 24 bytes -- do not grow */ |
59 | { | 33 | |
60 | int cpu = smp_processor_id(); | 34 | extern struct cyc2ns_data *cyc2ns_read_begin(void); |
61 | unsigned long long ns = per_cpu(cyc2ns_offset, cpu); | 35 | extern void cyc2ns_read_end(struct cyc2ns_data *); |
62 | ns += mult_frac(cyc, per_cpu(cyc2ns, cpu), | ||
63 | (1UL << CYC2NS_SCALE_FACTOR)); | ||
64 | return ns; | ||
65 | } | ||
66 | |||
67 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
68 | { | ||
69 | unsigned long long ns; | ||
70 | unsigned long flags; | ||
71 | |||
72 | local_irq_save(flags); | ||
73 | ns = __cycles_2_ns(cyc); | ||
74 | local_irq_restore(flags); | ||
75 | |||
76 | return ns; | ||
77 | } | ||
78 | 36 | ||
79 | #endif /* _ASM_X86_TIMER_H */ | 37 | #endif /* _ASM_X86_TIMER_H */ |
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index e6d90babc245..04905bfc508b 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h | |||
@@ -62,7 +62,7 @@ static inline void __flush_tlb_all(void) | |||
62 | 62 | ||
63 | static inline void __flush_tlb_one(unsigned long addr) | 63 | static inline void __flush_tlb_one(unsigned long addr) |
64 | { | 64 | { |
65 | count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); | 65 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); |
66 | __flush_tlb_single(addr); | 66 | __flush_tlb_single(addr); |
67 | } | 67 | } |
68 | 68 | ||
@@ -93,13 +93,13 @@ static inline void __flush_tlb_one(unsigned long addr) | |||
93 | */ | 93 | */ |
94 | static inline void __flush_tlb_up(void) | 94 | static inline void __flush_tlb_up(void) |
95 | { | 95 | { |
96 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | 96 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
97 | __flush_tlb(); | 97 | __flush_tlb(); |
98 | } | 98 | } |
99 | 99 | ||
100 | static inline void flush_tlb_all(void) | 100 | static inline void flush_tlb_all(void) |
101 | { | 101 | { |
102 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | 102 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
103 | __flush_tlb_all(); | 103 | __flush_tlb_all(); |
104 | } | 104 | } |
105 | 105 | ||
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index d35f24e231cd..0e8f04f2c26f 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h | |||
@@ -119,9 +119,10 @@ static inline void setup_node_to_cpumask_map(void) { } | |||
119 | 119 | ||
120 | extern const struct cpumask *cpu_coregroup_mask(int cpu); | 120 | extern const struct cpumask *cpu_coregroup_mask(int cpu); |
121 | 121 | ||
122 | #ifdef ENABLE_TOPO_DEFINES | ||
123 | #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) | 122 | #define topology_physical_package_id(cpu) (cpu_data(cpu).phys_proc_id) |
124 | #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) | 123 | #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) |
124 | |||
125 | #ifdef ENABLE_TOPO_DEFINES | ||
125 | #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) | 126 | #define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu)) |
126 | #define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) | 127 | #define topology_thread_cpumask(cpu) (per_cpu(cpu_sibling_map, cpu)) |
127 | #endif | 128 | #endif |
@@ -131,25 +132,7 @@ static inline void arch_fix_phys_package_id(int num, u32 slot) | |||
131 | } | 132 | } |
132 | 133 | ||
133 | struct pci_bus; | 134 | struct pci_bus; |
135 | int x86_pci_root_bus_node(int bus); | ||
134 | void x86_pci_root_bus_resources(int bus, struct list_head *resources); | 136 | void x86_pci_root_bus_resources(int bus, struct list_head *resources); |
135 | 137 | ||
136 | #ifdef CONFIG_SMP | ||
137 | #define mc_capable() ((boot_cpu_data.x86_max_cores > 1) && \ | ||
138 | (cpumask_weight(cpu_core_mask(0)) != nr_cpu_ids)) | ||
139 | #define smt_capable() (smp_num_siblings > 1) | ||
140 | #endif | ||
141 | |||
142 | #ifdef CONFIG_NUMA | ||
143 | extern int get_mp_bus_to_node(int busnum); | ||
144 | extern void set_mp_bus_to_node(int busnum, int node); | ||
145 | #else | ||
146 | static inline int get_mp_bus_to_node(int busnum) | ||
147 | { | ||
148 | return 0; | ||
149 | } | ||
150 | static inline void set_mp_bus_to_node(int busnum, int node) | ||
151 | { | ||
152 | } | ||
153 | #endif | ||
154 | |||
155 | #endif /* _ASM_X86_TOPOLOGY_H */ | 138 | #endif /* _ASM_X86_TOPOLOGY_H */ |
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h index 235be70d5bb4..94605c0e9cee 100644 --- a/arch/x86/include/asm/tsc.h +++ b/arch/x86/include/asm/tsc.h | |||
@@ -65,4 +65,7 @@ extern int notsc_setup(char *); | |||
65 | extern void tsc_save_sched_clock_state(void); | 65 | extern void tsc_save_sched_clock_state(void); |
66 | extern void tsc_restore_sched_clock_state(void); | 66 | extern void tsc_restore_sched_clock_state(void); |
67 | 67 | ||
68 | /* MSR based TSC calibration for Intel Atom SoC platforms */ | ||
69 | unsigned long try_msr_calibrate_tsc(void); | ||
70 | |||
68 | #endif /* _ASM_X86_TSC_H */ | 71 | #endif /* _ASM_X86_TSC_H */ |
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 8ec57c07b125..0d592e0a5b84 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h | |||
@@ -40,22 +40,30 @@ | |||
40 | /* | 40 | /* |
41 | * Test whether a block of memory is a valid user space address. | 41 | * Test whether a block of memory is a valid user space address. |
42 | * Returns 0 if the range is valid, nonzero otherwise. | 42 | * Returns 0 if the range is valid, nonzero otherwise. |
43 | * | ||
44 | * This is equivalent to the following test: | ||
45 | * (u33)addr + (u33)size > (u33)current->addr_limit.seg (u65 for x86_64) | ||
46 | * | ||
47 | * This needs 33-bit (65-bit for x86_64) arithmetic. We have a carry... | ||
48 | */ | 43 | */ |
44 | static inline bool __chk_range_not_ok(unsigned long addr, unsigned long size, unsigned long limit) | ||
45 | { | ||
46 | /* | ||
47 | * If we have used "sizeof()" for the size, | ||
48 | * we know it won't overflow the limit (but | ||
49 | * it might overflow the 'addr', so it's | ||
50 | * important to subtract the size from the | ||
51 | * limit, not add it to the address). | ||
52 | */ | ||
53 | if (__builtin_constant_p(size)) | ||
54 | return addr > limit - size; | ||
55 | |||
56 | /* Arbitrary sizes? Be careful about overflow */ | ||
57 | addr += size; | ||
58 | if (addr < size) | ||
59 | return true; | ||
60 | return addr > limit; | ||
61 | } | ||
49 | 62 | ||
50 | #define __range_not_ok(addr, size, limit) \ | 63 | #define __range_not_ok(addr, size, limit) \ |
51 | ({ \ | 64 | ({ \ |
52 | unsigned long flag, roksum; \ | ||
53 | __chk_user_ptr(addr); \ | 65 | __chk_user_ptr(addr); \ |
54 | asm("add %3,%1 ; sbb %0,%0 ; cmp %1,%4 ; sbb $0,%0" \ | 66 | __chk_range_not_ok((unsigned long __force)(addr), size, limit); \ |
55 | : "=&r" (flag), "=r" (roksum) \ | ||
56 | : "1" (addr), "g" ((long)(size)), \ | ||
57 | "rm" (limit)); \ | ||
58 | flag; \ | ||
59 | }) | 67 | }) |
60 | 68 | ||
61 | /** | 69 | /** |
@@ -78,7 +86,7 @@ | |||
78 | * this function, memory access functions may still return -EFAULT. | 86 | * this function, memory access functions may still return -EFAULT. |
79 | */ | 87 | */ |
80 | #define access_ok(type, addr, size) \ | 88 | #define access_ok(type, addr, size) \ |
81 | (likely(__range_not_ok(addr, size, user_addr_max()) == 0)) | 89 | likely(!__range_not_ok(addr, size, user_addr_max())) |
82 | 90 | ||
83 | /* | 91 | /* |
84 | * The exception table consists of pairs of addresses relative to the | 92 | * The exception table consists of pairs of addresses relative to the |
@@ -525,6 +533,98 @@ extern __must_check long strnlen_user(const char __user *str, long n); | |||
525 | unsigned long __must_check clear_user(void __user *mem, unsigned long len); | 533 | unsigned long __must_check clear_user(void __user *mem, unsigned long len); |
526 | unsigned long __must_check __clear_user(void __user *mem, unsigned long len); | 534 | unsigned long __must_check __clear_user(void __user *mem, unsigned long len); |
527 | 535 | ||
536 | extern void __cmpxchg_wrong_size(void) | ||
537 | __compiletime_error("Bad argument size for cmpxchg"); | ||
538 | |||
539 | #define __user_atomic_cmpxchg_inatomic(uval, ptr, old, new, size) \ | ||
540 | ({ \ | ||
541 | int __ret = 0; \ | ||
542 | __typeof__(ptr) __uval = (uval); \ | ||
543 | __typeof__(*(ptr)) __old = (old); \ | ||
544 | __typeof__(*(ptr)) __new = (new); \ | ||
545 | switch (size) { \ | ||
546 | case 1: \ | ||
547 | { \ | ||
548 | asm volatile("\t" ASM_STAC "\n" \ | ||
549 | "1:\t" LOCK_PREFIX "cmpxchgb %4, %2\n" \ | ||
550 | "2:\t" ASM_CLAC "\n" \ | ||
551 | "\t.section .fixup, \"ax\"\n" \ | ||
552 | "3:\tmov %3, %0\n" \ | ||
553 | "\tjmp 2b\n" \ | ||
554 | "\t.previous\n" \ | ||
555 | _ASM_EXTABLE(1b, 3b) \ | ||
556 | : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ | ||
557 | : "i" (-EFAULT), "q" (__new), "1" (__old) \ | ||
558 | : "memory" \ | ||
559 | ); \ | ||
560 | break; \ | ||
561 | } \ | ||
562 | case 2: \ | ||
563 | { \ | ||
564 | asm volatile("\t" ASM_STAC "\n" \ | ||
565 | "1:\t" LOCK_PREFIX "cmpxchgw %4, %2\n" \ | ||
566 | "2:\t" ASM_CLAC "\n" \ | ||
567 | "\t.section .fixup, \"ax\"\n" \ | ||
568 | "3:\tmov %3, %0\n" \ | ||
569 | "\tjmp 2b\n" \ | ||
570 | "\t.previous\n" \ | ||
571 | _ASM_EXTABLE(1b, 3b) \ | ||
572 | : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ | ||
573 | : "i" (-EFAULT), "r" (__new), "1" (__old) \ | ||
574 | : "memory" \ | ||
575 | ); \ | ||
576 | break; \ | ||
577 | } \ | ||
578 | case 4: \ | ||
579 | { \ | ||
580 | asm volatile("\t" ASM_STAC "\n" \ | ||
581 | "1:\t" LOCK_PREFIX "cmpxchgl %4, %2\n" \ | ||
582 | "2:\t" ASM_CLAC "\n" \ | ||
583 | "\t.section .fixup, \"ax\"\n" \ | ||
584 | "3:\tmov %3, %0\n" \ | ||
585 | "\tjmp 2b\n" \ | ||
586 | "\t.previous\n" \ | ||
587 | _ASM_EXTABLE(1b, 3b) \ | ||
588 | : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ | ||
589 | : "i" (-EFAULT), "r" (__new), "1" (__old) \ | ||
590 | : "memory" \ | ||
591 | ); \ | ||
592 | break; \ | ||
593 | } \ | ||
594 | case 8: \ | ||
595 | { \ | ||
596 | if (!IS_ENABLED(CONFIG_X86_64)) \ | ||
597 | __cmpxchg_wrong_size(); \ | ||
598 | \ | ||
599 | asm volatile("\t" ASM_STAC "\n" \ | ||
600 | "1:\t" LOCK_PREFIX "cmpxchgq %4, %2\n" \ | ||
601 | "2:\t" ASM_CLAC "\n" \ | ||
602 | "\t.section .fixup, \"ax\"\n" \ | ||
603 | "3:\tmov %3, %0\n" \ | ||
604 | "\tjmp 2b\n" \ | ||
605 | "\t.previous\n" \ | ||
606 | _ASM_EXTABLE(1b, 3b) \ | ||
607 | : "+r" (__ret), "=a" (__old), "+m" (*(ptr)) \ | ||
608 | : "i" (-EFAULT), "r" (__new), "1" (__old) \ | ||
609 | : "memory" \ | ||
610 | ); \ | ||
611 | break; \ | ||
612 | } \ | ||
613 | default: \ | ||
614 | __cmpxchg_wrong_size(); \ | ||
615 | } \ | ||
616 | *__uval = __old; \ | ||
617 | __ret; \ | ||
618 | }) | ||
619 | |||
620 | #define user_atomic_cmpxchg_inatomic(uval, ptr, old, new) \ | ||
621 | ({ \ | ||
622 | access_ok(VERIFY_WRITE, (ptr), sizeof(*(ptr))) ? \ | ||
623 | __user_atomic_cmpxchg_inatomic((uval), (ptr), \ | ||
624 | (old), (new), sizeof(*(ptr))) : \ | ||
625 | -EFAULT; \ | ||
626 | }) | ||
627 | |||
528 | /* | 628 | /* |
529 | * movsl can be slow when source and dest are not both 8-byte aligned | 629 | * movsl can be slow when source and dest are not both 8-byte aligned |
530 | */ | 630 | */ |
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 190413d0de57..12a26b979bf1 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h | |||
@@ -204,13 +204,13 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) | |||
204 | static __must_check __always_inline int | 204 | static __must_check __always_inline int |
205 | __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) | 205 | __copy_from_user_inatomic(void *dst, const void __user *src, unsigned size) |
206 | { | 206 | { |
207 | return __copy_from_user_nocheck(dst, (__force const void *)src, size); | 207 | return __copy_from_user_nocheck(dst, src, size); |
208 | } | 208 | } |
209 | 209 | ||
210 | static __must_check __always_inline int | 210 | static __must_check __always_inline int |
211 | __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) | 211 | __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size) |
212 | { | 212 | { |
213 | return __copy_to_user_nocheck((__force void *)dst, src, size); | 213 | return __copy_to_user_nocheck(dst, src, size); |
214 | } | 214 | } |
215 | 215 | ||
216 | extern long __copy_user_nocache(void *dst, const void __user *src, | 216 | extern long __copy_user_nocache(void *dst, const void __user *src, |
diff --git a/arch/x86/include/asm/unistd.h b/arch/x86/include/asm/unistd.h index c2a48139c340..3f556c6a0157 100644 --- a/arch/x86/include/asm/unistd.h +++ b/arch/x86/include/asm/unistd.h | |||
@@ -23,6 +23,9 @@ | |||
23 | # include <asm/unistd_64.h> | 23 | # include <asm/unistd_64.h> |
24 | # include <asm/unistd_64_x32.h> | 24 | # include <asm/unistd_64_x32.h> |
25 | # define __ARCH_WANT_COMPAT_SYS_TIME | 25 | # define __ARCH_WANT_COMPAT_SYS_TIME |
26 | # define __ARCH_WANT_COMPAT_SYS_GETDENTS64 | ||
27 | # define __ARCH_WANT_COMPAT_SYS_PREADV64 | ||
28 | # define __ARCH_WANT_COMPAT_SYS_PWRITEV64 | ||
26 | 29 | ||
27 | # endif | 30 | # endif |
28 | 31 | ||
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 6b964a0b86d1..062921ef34e9 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h | |||
@@ -12,7 +12,6 @@ extern enum uv_system_type get_uv_system_type(void); | |||
12 | extern int is_uv_system(void); | 12 | extern int is_uv_system(void); |
13 | extern void uv_cpu_init(void); | 13 | extern void uv_cpu_init(void); |
14 | extern void uv_nmi_init(void); | 14 | extern void uv_nmi_init(void); |
15 | extern void uv_register_nmi_notifier(void); | ||
16 | extern void uv_system_init(void); | 15 | extern void uv_system_init(void); |
17 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, | 16 | extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, |
18 | struct mm_struct *mm, | 17 | struct mm_struct *mm, |
@@ -26,7 +25,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } | |||
26 | static inline int is_uv_system(void) { return 0; } | 25 | static inline int is_uv_system(void) { return 0; } |
27 | static inline void uv_cpu_init(void) { } | 26 | static inline void uv_cpu_init(void) { } |
28 | static inline void uv_system_init(void) { } | 27 | static inline void uv_system_init(void) { } |
29 | static inline void uv_register_nmi_notifier(void) { } | ||
30 | static inline const struct cpumask * | 28 | static inline const struct cpumask * |
31 | uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, | 29 | uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm, |
32 | unsigned long start, unsigned long end, unsigned int cpu) | 30 | unsigned long start, unsigned long end, unsigned int cpu) |
diff --git a/arch/x86/include/asm/vdso.h b/arch/x86/include/asm/vdso.h index fddb53d63915..d1dc55404ff1 100644 --- a/arch/x86/include/asm/vdso.h +++ b/arch/x86/include/asm/vdso.h | |||
@@ -1,8 +1,45 @@ | |||
1 | #ifndef _ASM_X86_VDSO_H | 1 | #ifndef _ASM_X86_VDSO_H |
2 | #define _ASM_X86_VDSO_H | 2 | #define _ASM_X86_VDSO_H |
3 | 3 | ||
4 | #include <asm/page_types.h> | ||
5 | #include <linux/linkage.h> | ||
6 | |||
7 | #ifdef __ASSEMBLER__ | ||
8 | |||
9 | #define DEFINE_VDSO_IMAGE(symname, filename) \ | ||
10 | __PAGE_ALIGNED_DATA ; \ | ||
11 | .globl symname##_start, symname##_end ; \ | ||
12 | .align PAGE_SIZE ; \ | ||
13 | symname##_start: ; \ | ||
14 | .incbin filename ; \ | ||
15 | symname##_end: ; \ | ||
16 | .align PAGE_SIZE /* extra data here leaks to userspace. */ ; \ | ||
17 | \ | ||
18 | .previous ; \ | ||
19 | \ | ||
20 | .globl symname##_pages ; \ | ||
21 | .bss ; \ | ||
22 | .align 8 ; \ | ||
23 | .type symname##_pages, @object ; \ | ||
24 | symname##_pages: ; \ | ||
25 | .zero (symname##_end - symname##_start + PAGE_SIZE - 1) / PAGE_SIZE * (BITS_PER_LONG / 8) ; \ | ||
26 | .size symname##_pages, .-symname##_pages | ||
27 | |||
28 | #else | ||
29 | |||
30 | #define DECLARE_VDSO_IMAGE(symname) \ | ||
31 | extern char symname##_start[], symname##_end[]; \ | ||
32 | extern struct page *symname##_pages[] | ||
33 | |||
4 | #if defined CONFIG_X86_32 || defined CONFIG_COMPAT | 34 | #if defined CONFIG_X86_32 || defined CONFIG_COMPAT |
5 | extern const char VDSO32_PRELINK[]; | 35 | |
36 | #include <asm/vdso32.h> | ||
37 | |||
38 | DECLARE_VDSO_IMAGE(vdso32_int80); | ||
39 | #ifdef CONFIG_COMPAT | ||
40 | DECLARE_VDSO_IMAGE(vdso32_syscall); | ||
41 | #endif | ||
42 | DECLARE_VDSO_IMAGE(vdso32_sysenter); | ||
6 | 43 | ||
7 | /* | 44 | /* |
8 | * Given a pointer to the vDSO image, find the pointer to VDSO32_name | 45 | * Given a pointer to the vDSO image, find the pointer to VDSO32_name |
@@ -11,8 +48,7 @@ extern const char VDSO32_PRELINK[]; | |||
11 | #define VDSO32_SYMBOL(base, name) \ | 48 | #define VDSO32_SYMBOL(base, name) \ |
12 | ({ \ | 49 | ({ \ |
13 | extern const char VDSO32_##name[]; \ | 50 | extern const char VDSO32_##name[]; \ |
14 | (void __user *)(VDSO32_##name - VDSO32_PRELINK + \ | 51 | (void __user *)(VDSO32_##name + (unsigned long)(base)); \ |
15 | (unsigned long)(base)); \ | ||
16 | }) | 52 | }) |
17 | #endif | 53 | #endif |
18 | 54 | ||
@@ -23,12 +59,8 @@ extern const char VDSO32_PRELINK[]; | |||
23 | extern void __user __kernel_sigreturn; | 59 | extern void __user __kernel_sigreturn; |
24 | extern void __user __kernel_rt_sigreturn; | 60 | extern void __user __kernel_rt_sigreturn; |
25 | 61 | ||
26 | /* | 62 | void __init patch_vdso32(void *vdso, size_t len); |
27 | * These symbols are defined by vdso32.S to mark the bounds | 63 | |
28 | * of the ELF DSO images included therein. | 64 | #endif /* __ASSEMBLER__ */ |
29 | */ | ||
30 | extern const char vdso32_int80_start, vdso32_int80_end; | ||
31 | extern const char vdso32_syscall_start, vdso32_syscall_end; | ||
32 | extern const char vdso32_sysenter_start, vdso32_sysenter_end; | ||
33 | 65 | ||
34 | #endif /* _ASM_X86_VDSO_H */ | 66 | #endif /* _ASM_X86_VDSO_H */ |
diff --git a/arch/x86/include/asm/vdso32.h b/arch/x86/include/asm/vdso32.h new file mode 100644 index 000000000000..7efb7018406e --- /dev/null +++ b/arch/x86/include/asm/vdso32.h | |||
@@ -0,0 +1,11 @@ | |||
1 | #ifndef _ASM_X86_VDSO32_H | ||
2 | #define _ASM_X86_VDSO32_H | ||
3 | |||
4 | #define VDSO_BASE_PAGE 0 | ||
5 | #define VDSO_VVAR_PAGE 1 | ||
6 | #define VDSO_HPET_PAGE 2 | ||
7 | #define VDSO_PAGES 3 | ||
8 | #define VDSO_PREV_PAGES 2 | ||
9 | #define VDSO_OFFSET(x) ((x) * PAGE_SIZE) | ||
10 | |||
11 | #endif | ||
diff --git a/arch/x86/include/asm/vgtod.h b/arch/x86/include/asm/vgtod.h index 46e24d36b7da..3c3366c2e37f 100644 --- a/arch/x86/include/asm/vgtod.h +++ b/arch/x86/include/asm/vgtod.h | |||
@@ -1,30 +1,73 @@ | |||
1 | #ifndef _ASM_X86_VGTOD_H | 1 | #ifndef _ASM_X86_VGTOD_H |
2 | #define _ASM_X86_VGTOD_H | 2 | #define _ASM_X86_VGTOD_H |
3 | 3 | ||
4 | #include <asm/vsyscall.h> | 4 | #include <linux/compiler.h> |
5 | #include <linux/clocksource.h> | 5 | #include <linux/clocksource.h> |
6 | 6 | ||
7 | #ifdef BUILD_VDSO32_64 | ||
8 | typedef u64 gtod_long_t; | ||
9 | #else | ||
10 | typedef unsigned long gtod_long_t; | ||
11 | #endif | ||
12 | /* | ||
13 | * vsyscall_gtod_data will be accessed by 32 and 64 bit code at the same time | ||
14 | * so be carefull by modifying this structure. | ||
15 | */ | ||
7 | struct vsyscall_gtod_data { | 16 | struct vsyscall_gtod_data { |
8 | seqcount_t seq; | 17 | unsigned seq; |
9 | 18 | ||
10 | struct { /* extract of a clocksource struct */ | 19 | int vclock_mode; |
11 | int vclock_mode; | 20 | cycle_t cycle_last; |
12 | cycle_t cycle_last; | 21 | cycle_t mask; |
13 | cycle_t mask; | 22 | u32 mult; |
14 | u32 mult; | 23 | u32 shift; |
15 | u32 shift; | ||
16 | } clock; | ||
17 | 24 | ||
18 | /* open coded 'struct timespec' */ | 25 | /* open coded 'struct timespec' */ |
19 | time_t wall_time_sec; | ||
20 | u64 wall_time_snsec; | 26 | u64 wall_time_snsec; |
27 | gtod_long_t wall_time_sec; | ||
28 | gtod_long_t monotonic_time_sec; | ||
21 | u64 monotonic_time_snsec; | 29 | u64 monotonic_time_snsec; |
22 | time_t monotonic_time_sec; | 30 | gtod_long_t wall_time_coarse_sec; |
31 | gtod_long_t wall_time_coarse_nsec; | ||
32 | gtod_long_t monotonic_time_coarse_sec; | ||
33 | gtod_long_t monotonic_time_coarse_nsec; | ||
23 | 34 | ||
24 | struct timezone sys_tz; | 35 | int tz_minuteswest; |
25 | struct timespec wall_time_coarse; | 36 | int tz_dsttime; |
26 | struct timespec monotonic_time_coarse; | ||
27 | }; | 37 | }; |
28 | extern struct vsyscall_gtod_data vsyscall_gtod_data; | 38 | extern struct vsyscall_gtod_data vsyscall_gtod_data; |
29 | 39 | ||
40 | static inline unsigned gtod_read_begin(const struct vsyscall_gtod_data *s) | ||
41 | { | ||
42 | unsigned ret; | ||
43 | |||
44 | repeat: | ||
45 | ret = ACCESS_ONCE(s->seq); | ||
46 | if (unlikely(ret & 1)) { | ||
47 | cpu_relax(); | ||
48 | goto repeat; | ||
49 | } | ||
50 | smp_rmb(); | ||
51 | return ret; | ||
52 | } | ||
53 | |||
54 | static inline int gtod_read_retry(const struct vsyscall_gtod_data *s, | ||
55 | unsigned start) | ||
56 | { | ||
57 | smp_rmb(); | ||
58 | return unlikely(s->seq != start); | ||
59 | } | ||
60 | |||
61 | static inline void gtod_write_begin(struct vsyscall_gtod_data *s) | ||
62 | { | ||
63 | ++s->seq; | ||
64 | smp_wmb(); | ||
65 | } | ||
66 | |||
67 | static inline void gtod_write_end(struct vsyscall_gtod_data *s) | ||
68 | { | ||
69 | smp_wmb(); | ||
70 | ++s->seq; | ||
71 | } | ||
72 | |||
30 | #endif /* _ASM_X86_VGTOD_H */ | 73 | #endif /* _ASM_X86_VGTOD_H */ |
diff --git a/arch/x86/include/asm/visws/cobalt.h b/arch/x86/include/asm/visws/cobalt.h deleted file mode 100644 index 2edb37637ead..000000000000 --- a/arch/x86/include/asm/visws/cobalt.h +++ /dev/null | |||
@@ -1,127 +0,0 @@ | |||
1 | #ifndef _ASM_X86_VISWS_COBALT_H | ||
2 | #define _ASM_X86_VISWS_COBALT_H | ||
3 | |||
4 | #include <asm/fixmap.h> | ||
5 | |||
6 | /* | ||
7 | * Cobalt SGI Visual Workstation system ASIC | ||
8 | */ | ||
9 | |||
10 | #define CO_CPU_NUM_PHYS 0x1e00 | ||
11 | #define CO_CPU_TAB_PHYS (CO_CPU_NUM_PHYS + 2) | ||
12 | |||
13 | #define CO_CPU_MAX 4 | ||
14 | |||
15 | #define CO_CPU_PHYS 0xc2000000 | ||
16 | #define CO_APIC_PHYS 0xc4000000 | ||
17 | |||
18 | /* see set_fixmap() and asm/fixmap.h */ | ||
19 | #define CO_CPU_VADDR (fix_to_virt(FIX_CO_CPU)) | ||
20 | #define CO_APIC_VADDR (fix_to_virt(FIX_CO_APIC)) | ||
21 | |||
22 | /* Cobalt CPU registers -- relative to CO_CPU_VADDR, use co_cpu_*() */ | ||
23 | #define CO_CPU_REV 0x08 | ||
24 | #define CO_CPU_CTRL 0x10 | ||
25 | #define CO_CPU_STAT 0x20 | ||
26 | #define CO_CPU_TIMEVAL 0x30 | ||
27 | |||
28 | /* CO_CPU_CTRL bits */ | ||
29 | #define CO_CTRL_TIMERUN 0x04 /* 0 == disabled */ | ||
30 | #define CO_CTRL_TIMEMASK 0x08 /* 0 == unmasked */ | ||
31 | |||
32 | /* CO_CPU_STATUS bits */ | ||
33 | #define CO_STAT_TIMEINTR 0x02 /* (r) 1 == int pend, (w) 0 == clear */ | ||
34 | |||
35 | /* CO_CPU_TIMEVAL value */ | ||
36 | #define CO_TIME_HZ 100000000 /* Cobalt core rate */ | ||
37 | |||
38 | /* Cobalt APIC registers -- relative to CO_APIC_VADDR, use co_apic_*() */ | ||
39 | #define CO_APIC_HI(n) (((n) * 0x10) + 4) | ||
40 | #define CO_APIC_LO(n) ((n) * 0x10) | ||
41 | #define CO_APIC_ID 0x0ffc | ||
42 | |||
43 | /* CO_APIC_ID bits */ | ||
44 | #define CO_APIC_ENABLE 0x00000100 | ||
45 | |||
46 | /* CO_APIC_LO bits */ | ||
47 | #define CO_APIC_MASK 0x00010000 /* 0 = enabled */ | ||
48 | #define CO_APIC_LEVEL 0x00008000 /* 0 = edge */ | ||
49 | |||
50 | /* | ||
51 | * Where things are physically wired to Cobalt | ||
52 | * #defines with no board _<type>_<rev>_ are common to all (thus far) | ||
53 | */ | ||
54 | #define CO_APIC_IDE0 4 | ||
55 | #define CO_APIC_IDE1 2 /* Only on 320 */ | ||
56 | |||
57 | #define CO_APIC_8259 12 /* serial, floppy, par-l-l */ | ||
58 | |||
59 | /* Lithium PCI Bridge A -- "the one with 82557 Ethernet" */ | ||
60 | #define CO_APIC_PCIA_BASE0 0 /* and 1 */ /* slot 0, line 0 */ | ||
61 | #define CO_APIC_PCIA_BASE123 5 /* and 6 */ /* slot 0, line 1 */ | ||
62 | |||
63 | #define CO_APIC_PIIX4_USB 7 /* this one is weird */ | ||
64 | |||
65 | /* Lithium PCI Bridge B -- "the one with PIIX4" */ | ||
66 | #define CO_APIC_PCIB_BASE0 8 /* and 9-12 *//* slot 0, line 0 */ | ||
67 | #define CO_APIC_PCIB_BASE123 13 /* 14.15 */ /* slot 0, line 1 */ | ||
68 | |||
69 | #define CO_APIC_VIDOUT0 16 | ||
70 | #define CO_APIC_VIDOUT1 17 | ||
71 | #define CO_APIC_VIDIN0 18 | ||
72 | #define CO_APIC_VIDIN1 19 | ||
73 | |||
74 | #define CO_APIC_LI_AUDIO 22 | ||
75 | |||
76 | #define CO_APIC_AS 24 | ||
77 | #define CO_APIC_RE 25 | ||
78 | |||
79 | #define CO_APIC_CPU 28 /* Timer and Cache interrupt */ | ||
80 | #define CO_APIC_NMI 29 | ||
81 | #define CO_APIC_LAST CO_APIC_NMI | ||
82 | |||
83 | /* | ||
84 | * This is how irqs are assigned on the Visual Workstation. | ||
85 | * Legacy devices get irq's 1-15 (system clock is 0 and is CO_APIC_CPU). | ||
86 | * All other devices (including PCI) go to Cobalt and are irq's 16 on up. | ||
87 | */ | ||
88 | #define CO_IRQ_APIC0 16 /* irq of apic entry 0 */ | ||
89 | #define IS_CO_APIC(irq) ((irq) >= CO_IRQ_APIC0) | ||
90 | #define CO_IRQ(apic) (CO_IRQ_APIC0 + (apic)) /* apic ent to irq */ | ||
91 | #define CO_APIC(irq) ((irq) - CO_IRQ_APIC0) /* irq to apic ent */ | ||
92 | #define CO_IRQ_IDE0 14 /* knowledge of... */ | ||
93 | #define CO_IRQ_IDE1 15 /* ... ide driver defaults! */ | ||
94 | #define CO_IRQ_8259 CO_IRQ(CO_APIC_8259) | ||
95 | |||
96 | #ifdef CONFIG_X86_VISWS_APIC | ||
97 | static inline void co_cpu_write(unsigned long reg, unsigned long v) | ||
98 | { | ||
99 | *((volatile unsigned long *)(CO_CPU_VADDR+reg))=v; | ||
100 | } | ||
101 | |||
102 | static inline unsigned long co_cpu_read(unsigned long reg) | ||
103 | { | ||
104 | return *((volatile unsigned long *)(CO_CPU_VADDR+reg)); | ||
105 | } | ||
106 | |||
107 | static inline void co_apic_write(unsigned long reg, unsigned long v) | ||
108 | { | ||
109 | *((volatile unsigned long *)(CO_APIC_VADDR+reg))=v; | ||
110 | } | ||
111 | |||
112 | static inline unsigned long co_apic_read(unsigned long reg) | ||
113 | { | ||
114 | return *((volatile unsigned long *)(CO_APIC_VADDR+reg)); | ||
115 | } | ||
116 | #endif | ||
117 | |||
118 | extern char visws_board_type; | ||
119 | |||
120 | #define VISWS_320 0 | ||
121 | #define VISWS_540 1 | ||
122 | |||
123 | extern char visws_board_rev; | ||
124 | |||
125 | extern int pci_visws_init(void); | ||
126 | |||
127 | #endif /* _ASM_X86_VISWS_COBALT_H */ | ||
diff --git a/arch/x86/include/asm/visws/lithium.h b/arch/x86/include/asm/visws/lithium.h deleted file mode 100644 index a10d89bc1270..000000000000 --- a/arch/x86/include/asm/visws/lithium.h +++ /dev/null | |||
@@ -1,53 +0,0 @@ | |||
1 | #ifndef _ASM_X86_VISWS_LITHIUM_H | ||
2 | #define _ASM_X86_VISWS_LITHIUM_H | ||
3 | |||
4 | #include <asm/fixmap.h> | ||
5 | |||
6 | /* | ||
7 | * Lithium is the SGI Visual Workstation I/O ASIC | ||
8 | */ | ||
9 | |||
10 | #define LI_PCI_A_PHYS 0xfc000000 /* Enet is dev 3 */ | ||
11 | #define LI_PCI_B_PHYS 0xfd000000 /* PIIX4 is here */ | ||
12 | |||
13 | /* see set_fixmap() and asm/fixmap.h */ | ||
14 | #define LI_PCIA_VADDR (fix_to_virt(FIX_LI_PCIA)) | ||
15 | #define LI_PCIB_VADDR (fix_to_virt(FIX_LI_PCIB)) | ||
16 | |||
17 | /* Not a standard PCI? (not in linux/pci.h) */ | ||
18 | #define LI_PCI_BUSNUM 0x44 /* lo8: primary, hi8: sub */ | ||
19 | #define LI_PCI_INTEN 0x46 | ||
20 | |||
21 | /* LI_PCI_INTENT bits */ | ||
22 | #define LI_INTA_0 0x0001 | ||
23 | #define LI_INTA_1 0x0002 | ||
24 | #define LI_INTA_2 0x0004 | ||
25 | #define LI_INTA_3 0x0008 | ||
26 | #define LI_INTA_4 0x0010 | ||
27 | #define LI_INTB 0x0020 | ||
28 | #define LI_INTC 0x0040 | ||
29 | #define LI_INTD 0x0080 | ||
30 | |||
31 | /* More special purpose macros... */ | ||
32 | static inline void li_pcia_write16(unsigned long reg, unsigned short v) | ||
33 | { | ||
34 | *((volatile unsigned short *)(LI_PCIA_VADDR+reg))=v; | ||
35 | } | ||
36 | |||
37 | static inline unsigned short li_pcia_read16(unsigned long reg) | ||
38 | { | ||
39 | return *((volatile unsigned short *)(LI_PCIA_VADDR+reg)); | ||
40 | } | ||
41 | |||
42 | static inline void li_pcib_write16(unsigned long reg, unsigned short v) | ||
43 | { | ||
44 | *((volatile unsigned short *)(LI_PCIB_VADDR+reg))=v; | ||
45 | } | ||
46 | |||
47 | static inline unsigned short li_pcib_read16(unsigned long reg) | ||
48 | { | ||
49 | return *((volatile unsigned short *)(LI_PCIB_VADDR+reg)); | ||
50 | } | ||
51 | |||
52 | #endif /* _ASM_X86_VISWS_LITHIUM_H */ | ||
53 | |||
diff --git a/arch/x86/include/asm/visws/piix4.h b/arch/x86/include/asm/visws/piix4.h deleted file mode 100644 index d0af4d338e7f..000000000000 --- a/arch/x86/include/asm/visws/piix4.h +++ /dev/null | |||
@@ -1,107 +0,0 @@ | |||
1 | #ifndef _ASM_X86_VISWS_PIIX4_H | ||
2 | #define _ASM_X86_VISWS_PIIX4_H | ||
3 | |||
4 | /* | ||
5 | * PIIX4 as used on SGI Visual Workstations | ||
6 | */ | ||
7 | |||
8 | #define PIIX_PM_START 0x0F80 | ||
9 | |||
10 | #define SIO_GPIO_START 0x0FC0 | ||
11 | |||
12 | #define SIO_PM_START 0x0FC8 | ||
13 | |||
14 | #define PMBASE PIIX_PM_START | ||
15 | #define GPIREG0 (PMBASE+0x30) | ||
16 | #define GPIREG(x) (GPIREG0+((x)/8)) | ||
17 | #define GPIBIT(x) (1 << ((x)%8)) | ||
18 | |||
19 | #define PIIX_GPI_BD_ID1 18 | ||
20 | #define PIIX_GPI_BD_ID2 19 | ||
21 | #define PIIX_GPI_BD_ID3 20 | ||
22 | #define PIIX_GPI_BD_ID4 21 | ||
23 | #define PIIX_GPI_BD_REG GPIREG(PIIX_GPI_BD_ID1) | ||
24 | #define PIIX_GPI_BD_MASK (GPIBIT(PIIX_GPI_BD_ID1) | \ | ||
25 | GPIBIT(PIIX_GPI_BD_ID2) | \ | ||
26 | GPIBIT(PIIX_GPI_BD_ID3) | \ | ||
27 | GPIBIT(PIIX_GPI_BD_ID4) ) | ||
28 | |||
29 | #define PIIX_GPI_BD_SHIFT (PIIX_GPI_BD_ID1 % 8) | ||
30 | |||
31 | #define SIO_INDEX 0x2e | ||
32 | #define SIO_DATA 0x2f | ||
33 | |||
34 | #define SIO_DEV_SEL 0x7 | ||
35 | #define SIO_DEV_ENB 0x30 | ||
36 | #define SIO_DEV_MSB 0x60 | ||
37 | #define SIO_DEV_LSB 0x61 | ||
38 | |||
39 | #define SIO_GP_DEV 0x7 | ||
40 | |||
41 | #define SIO_GP_BASE SIO_GPIO_START | ||
42 | #define SIO_GP_MSB (SIO_GP_BASE>>8) | ||
43 | #define SIO_GP_LSB (SIO_GP_BASE&0xff) | ||
44 | |||
45 | #define SIO_GP_DATA1 (SIO_GP_BASE+0) | ||
46 | |||
47 | #define SIO_PM_DEV 0x8 | ||
48 | |||
49 | #define SIO_PM_BASE SIO_PM_START | ||
50 | #define SIO_PM_MSB (SIO_PM_BASE>>8) | ||
51 | #define SIO_PM_LSB (SIO_PM_BASE&0xff) | ||
52 | #define SIO_PM_INDEX (SIO_PM_BASE+0) | ||
53 | #define SIO_PM_DATA (SIO_PM_BASE+1) | ||
54 | |||
55 | #define SIO_PM_FER2 0x1 | ||
56 | |||
57 | #define SIO_PM_GP_EN 0x80 | ||
58 | |||
59 | |||
60 | |||
61 | /* | ||
62 | * This is the dev/reg where generating a config cycle will | ||
63 | * result in a PCI special cycle. | ||
64 | */ | ||
65 | #define SPECIAL_DEV 0xff | ||
66 | #define SPECIAL_REG 0x00 | ||
67 | |||
68 | /* | ||
69 | * PIIX4 needs to see a special cycle with the following data | ||
70 | * to be convinced the processor has gone into the stop grant | ||
71 | * state. PIIX4 insists on seeing this before it will power | ||
72 | * down a system. | ||
73 | */ | ||
74 | #define PIIX_SPECIAL_STOP 0x00120002 | ||
75 | |||
76 | #define PIIX4_RESET_PORT 0xcf9 | ||
77 | #define PIIX4_RESET_VAL 0x6 | ||
78 | |||
79 | #define PMSTS_PORT 0xf80 // 2 bytes PM Status | ||
80 | #define PMEN_PORT 0xf82 // 2 bytes PM Enable | ||
81 | #define PMCNTRL_PORT 0xf84 // 2 bytes PM Control | ||
82 | |||
83 | #define PM_SUSPEND_ENABLE 0x2000 // start sequence to suspend state | ||
84 | |||
85 | /* | ||
86 | * PMSTS and PMEN I/O bit definitions. | ||
87 | * (Bits are the same in both registers) | ||
88 | */ | ||
89 | #define PM_STS_RSM (1<<15) // Resume Status | ||
90 | #define PM_STS_PWRBTNOR (1<<11) // Power Button Override | ||
91 | #define PM_STS_RTC (1<<10) // RTC status | ||
92 | #define PM_STS_PWRBTN (1<<8) // Power Button Pressed? | ||
93 | #define PM_STS_GBL (1<<5) // Global Status | ||
94 | #define PM_STS_BM (1<<4) // Bus Master Status | ||
95 | #define PM_STS_TMROF (1<<0) // Timer Overflow Status. | ||
96 | |||
97 | /* | ||
98 | * Stop clock GPI register | ||
99 | */ | ||
100 | #define PIIX_GPIREG0 (0xf80 + 0x30) | ||
101 | |||
102 | /* | ||
103 | * Stop clock GPI bit in GPIREG0 | ||
104 | */ | ||
105 | #define PIIX_GPI_STPCLK 0x4 // STPCLK signal routed back in | ||
106 | |||
107 | #endif /* _ASM_X86_VISWS_PIIX4_H */ | ||
diff --git a/arch/x86/include/asm/visws/sgivw.h b/arch/x86/include/asm/visws/sgivw.h deleted file mode 100644 index 5fbf63e1003c..000000000000 --- a/arch/x86/include/asm/visws/sgivw.h +++ /dev/null | |||
@@ -1,5 +0,0 @@ | |||
1 | /* | ||
2 | * Frame buffer position and size: | ||
3 | */ | ||
4 | extern unsigned long sgivwfb_mem_phys; | ||
5 | extern unsigned long sgivwfb_mem_size; | ||
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 966502d4682e..7004d21e6219 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h | |||
@@ -85,6 +85,7 @@ | |||
85 | #define VM_EXIT_SAVE_IA32_EFER 0x00100000 | 85 | #define VM_EXIT_SAVE_IA32_EFER 0x00100000 |
86 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 | 86 | #define VM_EXIT_LOAD_IA32_EFER 0x00200000 |
87 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 | 87 | #define VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 |
88 | #define VM_EXIT_CLEAR_BNDCFGS 0x00800000 | ||
88 | 89 | ||
89 | #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff | 90 | #define VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR 0x00036dff |
90 | 91 | ||
@@ -95,11 +96,13 @@ | |||
95 | #define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 | 96 | #define VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 |
96 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 | 97 | #define VM_ENTRY_LOAD_IA32_PAT 0x00004000 |
97 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 | 98 | #define VM_ENTRY_LOAD_IA32_EFER 0x00008000 |
99 | #define VM_ENTRY_LOAD_BNDCFGS 0x00010000 | ||
98 | 100 | ||
99 | #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff | 101 | #define VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR 0x000011ff |
100 | 102 | ||
101 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f | 103 | #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f |
102 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 | 104 | #define VMX_MISC_SAVE_EFER_LMA 0x00000020 |
105 | #define VMX_MISC_ACTIVITY_HLT 0x00000040 | ||
103 | 106 | ||
104 | /* VMCS Encodings */ | 107 | /* VMCS Encodings */ |
105 | enum vmcs_field { | 108 | enum vmcs_field { |
@@ -173,6 +176,8 @@ enum vmcs_field { | |||
173 | GUEST_PDPTR2_HIGH = 0x0000280f, | 176 | GUEST_PDPTR2_HIGH = 0x0000280f, |
174 | GUEST_PDPTR3 = 0x00002810, | 177 | GUEST_PDPTR3 = 0x00002810, |
175 | GUEST_PDPTR3_HIGH = 0x00002811, | 178 | GUEST_PDPTR3_HIGH = 0x00002811, |
179 | GUEST_BNDCFGS = 0x00002812, | ||
180 | GUEST_BNDCFGS_HIGH = 0x00002813, | ||
176 | HOST_IA32_PAT = 0x00002c00, | 181 | HOST_IA32_PAT = 0x00002c00, |
177 | HOST_IA32_PAT_HIGH = 0x00002c01, | 182 | HOST_IA32_PAT_HIGH = 0x00002c01, |
178 | HOST_IA32_EFER = 0x00002c02, | 183 | HOST_IA32_EFER = 0x00002c02, |
diff --git a/arch/x86/include/asm/vvar.h b/arch/x86/include/asm/vvar.h index d76ac40da206..081d909bc495 100644 --- a/arch/x86/include/asm/vvar.h +++ b/arch/x86/include/asm/vvar.h | |||
@@ -16,8 +16,8 @@ | |||
16 | * you mess up, the linker will catch it.) | 16 | * you mess up, the linker will catch it.) |
17 | */ | 17 | */ |
18 | 18 | ||
19 | /* Base address of vvars. This is not ABI. */ | 19 | #ifndef _ASM_X86_VVAR_H |
20 | #define VVAR_ADDRESS (-10*1024*1024 - 4096) | 20 | #define _ASM_X86_VVAR_H |
21 | 21 | ||
22 | #if defined(__VVAR_KERNEL_LDS) | 22 | #if defined(__VVAR_KERNEL_LDS) |
23 | 23 | ||
@@ -29,16 +29,35 @@ | |||
29 | 29 | ||
30 | #else | 30 | #else |
31 | 31 | ||
32 | #ifdef BUILD_VDSO32 | ||
33 | |||
34 | #define DECLARE_VVAR(offset, type, name) \ | ||
35 | extern type vvar_ ## name __attribute__((visibility("hidden"))); | ||
36 | |||
37 | #define VVAR(name) (vvar_ ## name) | ||
38 | |||
39 | #else | ||
40 | |||
41 | extern char __vvar_page; | ||
42 | |||
43 | /* Base address of vvars. This is not ABI. */ | ||
44 | #ifdef CONFIG_X86_64 | ||
45 | #define VVAR_ADDRESS (-10*1024*1024 - 4096) | ||
46 | #else | ||
47 | #define VVAR_ADDRESS (&__vvar_page) | ||
48 | #endif | ||
49 | |||
32 | #define DECLARE_VVAR(offset, type, name) \ | 50 | #define DECLARE_VVAR(offset, type, name) \ |
33 | static type const * const vvaraddr_ ## name = \ | 51 | static type const * const vvaraddr_ ## name = \ |
34 | (void *)(VVAR_ADDRESS + (offset)); | 52 | (void *)(VVAR_ADDRESS + (offset)); |
35 | 53 | ||
54 | #define VVAR(name) (*vvaraddr_ ## name) | ||
55 | #endif | ||
56 | |||
36 | #define DEFINE_VVAR(type, name) \ | 57 | #define DEFINE_VVAR(type, name) \ |
37 | type name \ | 58 | type name \ |
38 | __attribute__((section(".vvar_" #name), aligned(16))) __visible | 59 | __attribute__((section(".vvar_" #name), aligned(16))) __visible |
39 | 60 | ||
40 | #define VVAR(name) (*vvaraddr_ ## name) | ||
41 | |||
42 | #endif | 61 | #endif |
43 | 62 | ||
44 | /* DECLARE_VVAR(offset, type, name) */ | 63 | /* DECLARE_VVAR(offset, type, name) */ |
@@ -48,3 +67,5 @@ DECLARE_VVAR(16, int, vgetcpu_mode) | |||
48 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) | 67 | DECLARE_VVAR(128, struct vsyscall_gtod_data, vsyscall_gtod_data) |
49 | 68 | ||
50 | #undef DECLARE_VVAR | 69 | #undef DECLARE_VVAR |
70 | |||
71 | #endif | ||
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 0f1be11e43d2..e45e4da96bf1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h | |||
@@ -181,7 +181,7 @@ struct x86_msi_ops { | |||
181 | u8 hpet_id); | 181 | u8 hpet_id); |
182 | void (*teardown_msi_irq)(unsigned int irq); | 182 | void (*teardown_msi_irq)(unsigned int irq); |
183 | void (*teardown_msi_irqs)(struct pci_dev *dev); | 183 | void (*teardown_msi_irqs)(struct pci_dev *dev); |
184 | void (*restore_msi_irqs)(struct pci_dev *dev, int irq); | 184 | void (*restore_msi_irqs)(struct pci_dev *dev); |
185 | int (*setup_hpet_msi)(unsigned int irq, unsigned int id); | 185 | int (*setup_hpet_msi)(unsigned int irq, unsigned int id); |
186 | u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); | 186 | u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag); |
187 | u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag); | 187 | u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag); |
diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index b913915e8e63..c949923a5668 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h | |||
@@ -49,10 +49,17 @@ extern bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn); | |||
49 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, | 49 | extern unsigned long set_phys_range_identity(unsigned long pfn_s, |
50 | unsigned long pfn_e); | 50 | unsigned long pfn_e); |
51 | 51 | ||
52 | extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, | ||
53 | struct gnttab_map_grant_ref *kmap_ops, | ||
54 | struct page **pages, unsigned int count); | ||
52 | extern int m2p_add_override(unsigned long mfn, struct page *page, | 55 | extern int m2p_add_override(unsigned long mfn, struct page *page, |
53 | struct gnttab_map_grant_ref *kmap_op); | 56 | struct gnttab_map_grant_ref *kmap_op); |
57 | extern int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, | ||
58 | struct gnttab_map_grant_ref *kmap_ops, | ||
59 | struct page **pages, unsigned int count); | ||
54 | extern int m2p_remove_override(struct page *page, | 60 | extern int m2p_remove_override(struct page *page, |
55 | struct gnttab_map_grant_ref *kmap_op); | 61 | struct gnttab_map_grant_ref *kmap_op, |
62 | unsigned long mfn); | ||
56 | extern struct page *m2p_find_override(unsigned long mfn); | 63 | extern struct page *m2p_find_override(unsigned long mfn); |
57 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); | 64 | extern unsigned long m2p_find_override_pfn(unsigned long mfn, unsigned long pfn); |
58 | 65 | ||
@@ -121,7 +128,7 @@ static inline unsigned long mfn_to_pfn(unsigned long mfn) | |||
121 | pfn = m2p_find_override_pfn(mfn, ~0); | 128 | pfn = m2p_find_override_pfn(mfn, ~0); |
122 | } | 129 | } |
123 | 130 | ||
124 | /* | 131 | /* |
125 | * pfn is ~0 if there are no entries in the m2p for mfn or if the | 132 | * pfn is ~0 if there are no entries in the m2p for mfn or if the |
126 | * entry doesn't map back to the mfn and m2p_override doesn't have a | 133 | * entry doesn't map back to the mfn and m2p_override doesn't have a |
127 | * valid entry for it. | 134 | * valid entry for it. |
@@ -167,7 +174,12 @@ static inline xpaddr_t machine_to_phys(xmaddr_t machine) | |||
167 | */ | 174 | */ |
168 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) | 175 | static inline unsigned long mfn_to_local_pfn(unsigned long mfn) |
169 | { | 176 | { |
170 | unsigned long pfn = mfn_to_pfn(mfn); | 177 | unsigned long pfn; |
178 | |||
179 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
180 | return mfn; | ||
181 | |||
182 | pfn = mfn_to_pfn(mfn); | ||
171 | if (get_phys_to_machine(pfn) != mfn) | 183 | if (get_phys_to_machine(pfn) != mfn) |
172 | return -1; /* force !pfn_valid() */ | 184 | return -1; /* force !pfn_valid() */ |
173 | return pfn; | 185 | return pfn; |
@@ -222,5 +234,6 @@ void make_lowmem_page_readonly(void *vaddr); | |||
222 | void make_lowmem_page_readwrite(void *vaddr); | 234 | void make_lowmem_page_readwrite(void *vaddr); |
223 | 235 | ||
224 | #define xen_remap(cookie, size) ioremap((cookie), (size)); | 236 | #define xen_remap(cookie, size) ioremap((cookie), (size)); |
237 | #define xen_unmap(cookie) iounmap((cookie)) | ||
225 | 238 | ||
226 | #endif /* _ASM_X86_XEN_PAGE_H */ | 239 | #endif /* _ASM_X86_XEN_PAGE_H */ |
diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h index 0415cdabb5a6..d949ef28c48b 100644 --- a/arch/x86/include/asm/xsave.h +++ b/arch/x86/include/asm/xsave.h | |||
@@ -6,11 +6,18 @@ | |||
6 | 6 | ||
7 | #define XSTATE_CPUID 0x0000000d | 7 | #define XSTATE_CPUID 0x0000000d |
8 | 8 | ||
9 | #define XSTATE_FP 0x1 | 9 | #define XSTATE_FP 0x1 |
10 | #define XSTATE_SSE 0x2 | 10 | #define XSTATE_SSE 0x2 |
11 | #define XSTATE_YMM 0x4 | 11 | #define XSTATE_YMM 0x4 |
12 | #define XSTATE_BNDREGS 0x8 | ||
13 | #define XSTATE_BNDCSR 0x10 | ||
14 | #define XSTATE_OPMASK 0x20 | ||
15 | #define XSTATE_ZMM_Hi256 0x40 | ||
16 | #define XSTATE_Hi16_ZMM 0x80 | ||
12 | 17 | ||
13 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) | 18 | #define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE) |
19 | /* Bit 63 of XCR0 is reserved for future expansion */ | ||
20 | #define XSTATE_EXTEND_MASK (~(XSTATE_FPSSE | (1ULL << 63))) | ||
14 | 21 | ||
15 | #define FXSAVE_SIZE 512 | 22 | #define FXSAVE_SIZE 512 |
16 | 23 | ||
@@ -20,10 +27,15 @@ | |||
20 | #define XSAVE_YMM_SIZE 256 | 27 | #define XSAVE_YMM_SIZE 256 |
21 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) | 28 | #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) |
22 | 29 | ||
23 | /* | 30 | /* Supported features which support lazy state saving */ |
24 | * These are the features that the OS can handle currently. | 31 | #define XSTATE_LAZY (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ |
25 | */ | 32 | | XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM) |
26 | #define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) | 33 | |
34 | /* Supported features which require eager state saving */ | ||
35 | #define XSTATE_EAGER (XSTATE_BNDREGS | XSTATE_BNDCSR) | ||
36 | |||
37 | /* All currently supported features */ | ||
38 | #define XCNTXT_MASK (XSTATE_LAZY | XSTATE_EAGER) | ||
27 | 39 | ||
28 | #ifdef CONFIG_X86_64 | 40 | #ifdef CONFIG_X86_64 |
29 | #define REX_PREFIX "0x48, " | 41 | #define REX_PREFIX "0x48, " |
diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 9c3733c5f8f7..225b0988043a 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h | |||
@@ -6,6 +6,7 @@ | |||
6 | #define SETUP_E820_EXT 1 | 6 | #define SETUP_E820_EXT 1 |
7 | #define SETUP_DTB 2 | 7 | #define SETUP_DTB 2 |
8 | #define SETUP_PCI 3 | 8 | #define SETUP_PCI 3 |
9 | #define SETUP_EFI 4 | ||
9 | 10 | ||
10 | /* ram_size flags */ | 11 | /* ram_size flags */ |
11 | #define RAMDISK_IMAGE_START_MASK 0x07FF | 12 | #define RAMDISK_IMAGE_START_MASK 0x07FF |
@@ -23,6 +24,7 @@ | |||
23 | #define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) | 24 | #define XLF_CAN_BE_LOADED_ABOVE_4G (1<<1) |
24 | #define XLF_EFI_HANDOVER_32 (1<<2) | 25 | #define XLF_EFI_HANDOVER_32 (1<<2) |
25 | #define XLF_EFI_HANDOVER_64 (1<<3) | 26 | #define XLF_EFI_HANDOVER_64 (1<<3) |
27 | #define XLF_EFI_KEXEC (1<<4) | ||
26 | 28 | ||
27 | #ifndef __ASSEMBLY__ | 29 | #ifndef __ASSEMBLY__ |
28 | 30 | ||
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index b8f1c0176cbc..462efe746d77 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h | |||
@@ -28,6 +28,9 @@ | |||
28 | /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ | 28 | /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ |
29 | #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) | 29 | #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) |
30 | 30 | ||
31 | /* A partition's reference time stamp counter (TSC) page */ | ||
32 | #define HV_X64_MSR_REFERENCE_TSC 0x40000021 | ||
33 | |||
31 | /* | 34 | /* |
32 | * There is a single feature flag that signifies the presence of the MSR | 35 | * There is a single feature flag that signifies the presence of the MSR |
33 | * that can be used to retrieve both the local APIC Timer frequency as | 36 | * that can be used to retrieve both the local APIC Timer frequency as |
@@ -198,6 +201,9 @@ | |||
198 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ | 201 | #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ |
199 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) | 202 | (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) |
200 | 203 | ||
204 | #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 | ||
205 | #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 | ||
206 | |||
201 | #define HV_PROCESSOR_POWER_STATE_C0 0 | 207 | #define HV_PROCESSOR_POWER_STATE_C0 0 |
202 | #define HV_PROCESSOR_POWER_STATE_C1 1 | 208 | #define HV_PROCESSOR_POWER_STATE_C1 1 |
203 | #define HV_PROCESSOR_POWER_STATE_C2 2 | 209 | #define HV_PROCESSOR_POWER_STATE_C2 2 |
@@ -210,4 +216,11 @@ | |||
210 | #define HV_STATUS_INVALID_ALIGNMENT 4 | 216 | #define HV_STATUS_INVALID_ALIGNMENT 4 |
211 | #define HV_STATUS_INSUFFICIENT_BUFFERS 19 | 217 | #define HV_STATUS_INSUFFICIENT_BUFFERS 19 |
212 | 218 | ||
219 | typedef struct _HV_REFERENCE_TSC_PAGE { | ||
220 | __u32 tsc_sequence; | ||
221 | __u32 res1; | ||
222 | __u64 tsc_scale; | ||
223 | __s64 tsc_offset; | ||
224 | } HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; | ||
225 | |||
213 | #endif | 226 | #endif |
diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 37813b5ddc37..c827ace3121b 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h | |||
@@ -184,6 +184,7 @@ | |||
184 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 | 184 | #define MSR_AMD64_PATCH_LOADER 0xc0010020 |
185 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 | 185 | #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 |
186 | #define MSR_AMD64_OSVW_STATUS 0xc0010141 | 186 | #define MSR_AMD64_OSVW_STATUS 0xc0010141 |
187 | #define MSR_AMD64_LS_CFG 0xc0011020 | ||
187 | #define MSR_AMD64_DC_CFG 0xc0011022 | 188 | #define MSR_AMD64_DC_CFG 0xc0011022 |
188 | #define MSR_AMD64_BU_CFG2 0xc001102a | 189 | #define MSR_AMD64_BU_CFG2 0xc001102a |
189 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 | 190 | #define MSR_AMD64_IBSFETCHCTL 0xc0011030 |
@@ -294,6 +295,7 @@ | |||
294 | #define MSR_SMI_COUNT 0x00000034 | 295 | #define MSR_SMI_COUNT 0x00000034 |
295 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a | 296 | #define MSR_IA32_FEATURE_CONTROL 0x0000003a |
296 | #define MSR_IA32_TSC_ADJUST 0x0000003b | 297 | #define MSR_IA32_TSC_ADJUST 0x0000003b |
298 | #define MSR_IA32_BNDCFGS 0x00000d90 | ||
297 | 299 | ||
298 | #define FEATURE_CONTROL_LOCKED (1<<0) | 300 | #define FEATURE_CONTROL_LOCKED (1<<0) |
299 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) | 301 | #define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1<<1) |
@@ -367,33 +369,58 @@ | |||
367 | #define THERM_LOG_THRESHOLD1 (1 << 9) | 369 | #define THERM_LOG_THRESHOLD1 (1 << 9) |
368 | 370 | ||
369 | /* MISC_ENABLE bits: architectural */ | 371 | /* MISC_ENABLE bits: architectural */ |
370 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0) | 372 | #define MSR_IA32_MISC_ENABLE_FAST_STRING_BIT 0 |
371 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1) | 373 | #define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) |
372 | #define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7) | 374 | #define MSR_IA32_MISC_ENABLE_TCC_BIT 1 |
373 | #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11) | 375 | #define MSR_IA32_MISC_ENABLE_TCC (1ULL << MSR_IA32_MISC_ENABLE_TCC_BIT) |
374 | #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12) | 376 | #define MSR_IA32_MISC_ENABLE_EMON_BIT 7 |
375 | #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16) | 377 | #define MSR_IA32_MISC_ENABLE_EMON (1ULL << MSR_IA32_MISC_ENABLE_EMON_BIT) |
376 | #define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18) | 378 | #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT 11 |
377 | #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22) | 379 | #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_BTS_UNAVAIL_BIT) |
378 | #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23) | 380 | #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT 12 |
379 | #define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34) | 381 | #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL_BIT) |
382 | #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT 16 | ||
383 | #define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP_BIT) | ||
384 | #define MSR_IA32_MISC_ENABLE_MWAIT_BIT 18 | ||
385 | #define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << MSR_IA32_MISC_ENABLE_MWAIT_BIT) | ||
386 | #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT 22 | ||
387 | #define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT); | ||
388 | #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT 23 | ||
389 | #define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XTPR_DISABLE_BIT) | ||
390 | #define MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT 34 | ||
391 | #define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_XD_DISABLE_BIT) | ||
380 | 392 | ||
381 | /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ | 393 | /* MISC_ENABLE bits: model-specific, meaning may vary from core to core */ |
382 | #define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2) | 394 | #define MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT 2 |
383 | #define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3) | 395 | #define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << MSR_IA32_MISC_ENABLE_X87_COMPAT_BIT) |
384 | #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4) | 396 | #define MSR_IA32_MISC_ENABLE_TM1_BIT 3 |
385 | #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6) | 397 | #define MSR_IA32_MISC_ENABLE_TM1 (1ULL << MSR_IA32_MISC_ENABLE_TM1_BIT) |
386 | #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8) | 398 | #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT 4 |
387 | #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9) | 399 | #define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE_BIT) |
388 | #define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10) | 400 | #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT 6 |
389 | #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10) | 401 | #define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE_BIT) |
390 | #define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13) | 402 | #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT 8 |
391 | #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19) | 403 | #define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK_BIT) |
392 | #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20) | 404 | #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT 9 |
393 | #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24) | 405 | #define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) |
394 | #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37) | 406 | #define MSR_IA32_MISC_ENABLE_FERR_BIT 10 |
395 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38) | 407 | #define MSR_IA32_MISC_ENABLE_FERR (1ULL << MSR_IA32_MISC_ENABLE_FERR_BIT) |
396 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39) | 408 | #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT 10 |
409 | #define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX_BIT) | ||
410 | #define MSR_IA32_MISC_ENABLE_TM2_BIT 13 | ||
411 | #define MSR_IA32_MISC_ENABLE_TM2 (1ULL << MSR_IA32_MISC_ENABLE_TM2_BIT) | ||
412 | #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT 19 | ||
413 | #define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE_BIT) | ||
414 | #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT 20 | ||
415 | #define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK_BIT) | ||
416 | #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT 24 | ||
417 | #define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << MSR_IA32_MISC_ENABLE_L1D_CONTEXT_BIT) | ||
418 | #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT 37 | ||
419 | #define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE_BIT) | ||
420 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT 38 | ||
421 | #define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_TURBO_DISABLE_BIT) | ||
422 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 | ||
423 | #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) | ||
397 | 424 | ||
398 | #define MSR_IA32_TSC_DEADLINE 0x000006E0 | 425 | #define MSR_IA32_TSC_DEADLINE 0x000006E0 |
399 | 426 | ||
@@ -527,6 +554,7 @@ | |||
527 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e | 554 | #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e |
528 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f | 555 | #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f |
529 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 | 556 | #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 |
557 | #define MSR_IA32_VMX_VMFUNC 0x00000491 | ||
530 | 558 | ||
531 | /* VMX_BASIC bits and bitmasks */ | 559 | /* VMX_BASIC bits and bitmasks */ |
532 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 | 560 | #define VMX_BASIC_VMCS_SIZE_SHIFT 32 |
diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h index ee50c801f7b7..cc2d6a3aeae7 100644 --- a/arch/x86/include/uapi/asm/sembuf.h +++ b/arch/x86/include/uapi/asm/sembuf.h | |||
@@ -13,12 +13,12 @@ | |||
13 | struct semid64_ds { | 13 | struct semid64_ds { |
14 | struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ | 14 | struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ |
15 | __kernel_time_t sem_otime; /* last semop time */ | 15 | __kernel_time_t sem_otime; /* last semop time */ |
16 | unsigned long __unused1; | 16 | __kernel_ulong_t __unused1; |
17 | __kernel_time_t sem_ctime; /* last change time */ | 17 | __kernel_time_t sem_ctime; /* last change time */ |
18 | unsigned long __unused2; | 18 | __kernel_ulong_t __unused2; |
19 | unsigned long sem_nsems; /* no. of semaphores in array */ | 19 | __kernel_ulong_t sem_nsems; /* no. of semaphores in array */ |
20 | unsigned long __unused3; | 20 | __kernel_ulong_t __unused3; |
21 | unsigned long __unused4; | 21 | __kernel_ulong_t __unused4; |
22 | }; | 22 | }; |
23 | 23 | ||
24 | #endif /* _ASM_X86_SEMBUF_H */ | 24 | #endif /* _ASM_X86_SEMBUF_H */ |
diff --git a/arch/x86/include/uapi/asm/stat.h b/arch/x86/include/uapi/asm/stat.h index 7b3ddc348585..bc03eb5d6360 100644 --- a/arch/x86/include/uapi/asm/stat.h +++ b/arch/x86/include/uapi/asm/stat.h | |||
@@ -1,6 +1,8 @@ | |||
1 | #ifndef _ASM_X86_STAT_H | 1 | #ifndef _ASM_X86_STAT_H |
2 | #define _ASM_X86_STAT_H | 2 | #define _ASM_X86_STAT_H |
3 | 3 | ||
4 | #include <asm/posix_types.h> | ||
5 | |||
4 | #define STAT_HAVE_NSEC 1 | 6 | #define STAT_HAVE_NSEC 1 |
5 | 7 | ||
6 | #ifdef __i386__ | 8 | #ifdef __i386__ |
@@ -78,26 +80,26 @@ struct stat64 { | |||
78 | #else /* __i386__ */ | 80 | #else /* __i386__ */ |
79 | 81 | ||
80 | struct stat { | 82 | struct stat { |
81 | unsigned long st_dev; | 83 | __kernel_ulong_t st_dev; |
82 | unsigned long st_ino; | 84 | __kernel_ulong_t st_ino; |
83 | unsigned long st_nlink; | 85 | __kernel_ulong_t st_nlink; |
84 | 86 | ||
85 | unsigned int st_mode; | 87 | unsigned int st_mode; |
86 | unsigned int st_uid; | 88 | unsigned int st_uid; |
87 | unsigned int st_gid; | 89 | unsigned int st_gid; |
88 | unsigned int __pad0; | 90 | unsigned int __pad0; |
89 | unsigned long st_rdev; | 91 | __kernel_ulong_t st_rdev; |
90 | long st_size; | 92 | __kernel_long_t st_size; |
91 | long st_blksize; | 93 | __kernel_long_t st_blksize; |
92 | long st_blocks; /* Number 512-byte blocks allocated. */ | 94 | __kernel_long_t st_blocks; /* Number 512-byte blocks allocated. */ |
93 | 95 | ||
94 | unsigned long st_atime; | 96 | __kernel_ulong_t st_atime; |
95 | unsigned long st_atime_nsec; | 97 | __kernel_ulong_t st_atime_nsec; |
96 | unsigned long st_mtime; | 98 | __kernel_ulong_t st_mtime; |
97 | unsigned long st_mtime_nsec; | 99 | __kernel_ulong_t st_mtime_nsec; |
98 | unsigned long st_ctime; | 100 | __kernel_ulong_t st_ctime; |
99 | unsigned long st_ctime_nsec; | 101 | __kernel_ulong_t st_ctime_nsec; |
100 | long __unused[3]; | 102 | __kernel_long_t __unused[3]; |
101 | }; | 103 | }; |
102 | 104 | ||
103 | /* We don't need to memset the whole thing just to initialize the padding */ | 105 | /* We don't need to memset the whole thing just to initialize the padding */ |
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 9b0a34e2cd79..f4d96000d33a 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -26,13 +26,14 @@ obj-$(CONFIG_IRQ_WORK) += irq_work.o | |||
26 | obj-y += probe_roms.o | 26 | obj-y += probe_roms.o |
27 | obj-$(CONFIG_X86_32) += i386_ksyms_32.o | 27 | obj-$(CONFIG_X86_32) += i386_ksyms_32.o |
28 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o | 28 | obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o |
29 | obj-y += syscall_$(BITS).o | 29 | obj-y += syscall_$(BITS).o vsyscall_gtod.o |
30 | obj-$(CONFIG_X86_64) += vsyscall_64.o | 30 | obj-$(CONFIG_X86_64) += vsyscall_64.o |
31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o | 31 | obj-$(CONFIG_X86_64) += vsyscall_emu_64.o |
32 | obj-$(CONFIG_SYSFS) += ksysfs.o | ||
32 | obj-y += bootflag.o e820.o | 33 | obj-y += bootflag.o e820.o |
33 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o | 34 | obj-y += pci-dma.o quirks.o topology.o kdebugfs.o |
34 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o | 35 | obj-y += alternative.o i8253.o pci-nommu.o hw_breakpoint.o |
35 | obj-y += tsc.o io_delay.o rtc.o | 36 | obj-y += tsc.o tsc_msr.o io_delay.o rtc.o |
36 | obj-y += pci-iommu_table.o | 37 | obj-y += pci-iommu_table.o |
37 | obj-y += resource.o | 38 | obj-y += resource.o |
38 | 39 | ||
@@ -91,15 +92,6 @@ obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o | |||
91 | 92 | ||
92 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o | 93 | obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o |
93 | 94 | ||
94 | obj-$(CONFIG_MICROCODE_EARLY) += microcode_core_early.o | ||
95 | obj-$(CONFIG_MICROCODE_INTEL_EARLY) += microcode_intel_early.o | ||
96 | obj-$(CONFIG_MICROCODE_INTEL_LIB) += microcode_intel_lib.o | ||
97 | microcode-y := microcode_core.o | ||
98 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | ||
99 | microcode-$(CONFIG_MICROCODE_AMD) += microcode_amd.o | ||
100 | obj-$(CONFIG_MICROCODE_AMD_EARLY) += microcode_amd_early.o | ||
101 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
102 | |||
103 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o | 95 | obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o |
104 | 96 | ||
105 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o | 97 | obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o |
@@ -111,6 +103,7 @@ obj-$(CONFIG_EFI) += sysfb_efi.o | |||
111 | 103 | ||
112 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o | 104 | obj-$(CONFIG_PERF_EVENTS) += perf_regs.o |
113 | obj-$(CONFIG_TRACING) += tracepoint.o | 105 | obj-$(CONFIG_TRACING) += tracepoint.o |
106 | obj-$(CONFIG_IOSF_MBI) += iosf_mbi.o | ||
114 | 107 | ||
115 | ### | 108 | ### |
116 | # 64 bit specific files | 109 | # 64 bit specific files |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 6c0b43bd024b..86281ffb96d6 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -46,7 +46,6 @@ | |||
46 | 46 | ||
47 | #include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ | 47 | #include "sleep.h" /* To include x86_acpi_suspend_lowlevel */ |
48 | static int __initdata acpi_force = 0; | 48 | static int __initdata acpi_force = 0; |
49 | u32 acpi_rsdt_forced; | ||
50 | int acpi_disabled; | 49 | int acpi_disabled; |
51 | EXPORT_SYMBOL(acpi_disabled); | 50 | EXPORT_SYMBOL(acpi_disabled); |
52 | 51 | ||
@@ -54,10 +53,6 @@ EXPORT_SYMBOL(acpi_disabled); | |||
54 | # include <asm/proto.h> | 53 | # include <asm/proto.h> |
55 | #endif /* X86 */ | 54 | #endif /* X86 */ |
56 | 55 | ||
57 | #define BAD_MADT_ENTRY(entry, end) ( \ | ||
58 | (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ | ||
59 | ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) | ||
60 | |||
61 | #define PREFIX "ACPI: " | 56 | #define PREFIX "ACPI: " |
62 | 57 | ||
63 | int acpi_noirq; /* skip ACPI IRQ initialization */ | 58 | int acpi_noirq; /* skip ACPI IRQ initialization */ |
@@ -614,10 +609,10 @@ static void acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) | |||
614 | int nid; | 609 | int nid; |
615 | 610 | ||
616 | nid = acpi_get_node(handle); | 611 | nid = acpi_get_node(handle); |
617 | if (nid == -1 || !node_online(nid)) | 612 | if (nid != -1) { |
618 | return; | 613 | set_apicid_to_node(physid, nid); |
619 | set_apicid_to_node(physid, nid); | 614 | numa_set_node(cpu, nid); |
620 | numa_set_node(cpu, nid); | 615 | } |
621 | #endif | 616 | #endif |
622 | } | 617 | } |
623 | 618 | ||
@@ -908,10 +903,6 @@ static int __init acpi_parse_madt_lapic_entries(void) | |||
908 | #ifdef CONFIG_X86_IO_APIC | 903 | #ifdef CONFIG_X86_IO_APIC |
909 | #define MP_ISA_BUS 0 | 904 | #define MP_ISA_BUS 0 |
910 | 905 | ||
911 | #ifdef CONFIG_X86_ES7000 | ||
912 | extern int es7000_plat; | ||
913 | #endif | ||
914 | |||
915 | void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) | 906 | void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) |
916 | { | 907 | { |
917 | int ioapic; | 908 | int ioapic; |
@@ -961,14 +952,6 @@ void __init mp_config_acpi_legacy_irqs(void) | |||
961 | set_bit(MP_ISA_BUS, mp_bus_not_pci); | 952 | set_bit(MP_ISA_BUS, mp_bus_not_pci); |
962 | pr_debug("Bus #%d is ISA\n", MP_ISA_BUS); | 953 | pr_debug("Bus #%d is ISA\n", MP_ISA_BUS); |
963 | 954 | ||
964 | #ifdef CONFIG_X86_ES7000 | ||
965 | /* | ||
966 | * Older generations of ES7000 have no legacy identity mappings | ||
967 | */ | ||
968 | if (es7000_plat == 1) | ||
969 | return; | ||
970 | #endif | ||
971 | |||
972 | /* | 955 | /* |
973 | * Use the default configuration for the IRQs 0-15. Unless | 956 | * Use the default configuration for the IRQs 0-15. Unless |
974 | * overridden by (MADT) interrupt source override entries. | 957 | * overridden by (MADT) interrupt source override entries. |
@@ -1034,9 +1017,7 @@ static int mp_config_acpi_gsi(struct device *dev, u32 gsi, int trigger, | |||
1034 | 1017 | ||
1035 | if (!acpi_ioapic) | 1018 | if (!acpi_ioapic) |
1036 | return 0; | 1019 | return 0; |
1037 | if (!dev) | 1020 | if (!dev || !dev_is_pci(dev)) |
1038 | return 0; | ||
1039 | if (dev->bus != &pci_bus_type) | ||
1040 | return 0; | 1021 | return 0; |
1041 | 1022 | ||
1042 | pdev = to_pci_dev(dev); | 1023 | pdev = to_pci_dev(dev); |
@@ -1564,7 +1545,7 @@ static int __init parse_acpi(char *arg) | |||
1564 | } | 1545 | } |
1565 | /* acpi=rsdt use RSDT instead of XSDT */ | 1546 | /* acpi=rsdt use RSDT instead of XSDT */ |
1566 | else if (strcmp(arg, "rsdt") == 0) { | 1547 | else if (strcmp(arg, "rsdt") == 0) { |
1567 | acpi_rsdt_forced = 1; | 1548 | acpi_gbl_do_not_use_xsdt = TRUE; |
1568 | } | 1549 | } |
1569 | /* "acpi=noirq" disables ACPI interrupt routing */ | 1550 | /* "acpi=noirq" disables ACPI interrupt routing */ |
1570 | else if (strcmp(arg, "noirq") == 0) { | 1551 | else if (strcmp(arg, "noirq") == 0) { |
diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index d2b7f27781bc..4b28159e0421 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c | |||
@@ -87,7 +87,9 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) | |||
87 | num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; | 87 | num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK; |
88 | 88 | ||
89 | retval = 0; | 89 | retval = 0; |
90 | if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) { | 90 | /* If the HW does not support any sub-states in this C-state */ |
91 | if (num_cstate_subtype == 0) { | ||
92 | pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n", cx->address, edx_part); | ||
91 | retval = -1; | 93 | retval = -1; |
92 | goto out; | 94 | goto out; |
93 | } | 95 | } |
@@ -150,29 +152,6 @@ int acpi_processor_ffh_cstate_probe(unsigned int cpu, | |||
150 | } | 152 | } |
151 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); | 153 | EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe); |
152 | 154 | ||
153 | /* | ||
154 | * This uses new MONITOR/MWAIT instructions on P4 processors with PNI, | ||
155 | * which can obviate IPI to trigger checking of need_resched. | ||
156 | * We execute MONITOR against need_resched and enter optimized wait state | ||
157 | * through MWAIT. Whenever someone changes need_resched, we would be woken | ||
158 | * up from MWAIT (without an IPI). | ||
159 | * | ||
160 | * New with Core Duo processors, MWAIT can take some hints based on CPU | ||
161 | * capability. | ||
162 | */ | ||
163 | void mwait_idle_with_hints(unsigned long ax, unsigned long cx) | ||
164 | { | ||
165 | if (!need_resched()) { | ||
166 | if (this_cpu_has(X86_FEATURE_CLFLUSH_MONITOR)) | ||
167 | clflush((void *)¤t_thread_info()->flags); | ||
168 | |||
169 | __monitor((void *)¤t_thread_info()->flags, 0, 0); | ||
170 | smp_mb(); | ||
171 | if (!need_resched()) | ||
172 | __mwait(ax, cx); | ||
173 | } | ||
174 | } | ||
175 | |||
176 | void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) | 155 | void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx) |
177 | { | 156 | { |
178 | unsigned int cpu = smp_processor_id(); | 157 | unsigned int cpu = smp_processor_id(); |
diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 59554dca96ec..f04dbb3069b8 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c | |||
@@ -22,6 +22,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { | |||
22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, | 22 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M10H_F3) }, |
23 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) }, | 23 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F3) }, |
24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, | 24 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, |
25 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, | ||
25 | {} | 26 | {} |
26 | }; | 27 | }; |
27 | EXPORT_SYMBOL(amd_nb_misc_ids); | 28 | EXPORT_SYMBOL(amd_nb_misc_ids); |
@@ -30,6 +31,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { | |||
30 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, | 31 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, |
31 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, | 32 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, |
32 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, | 33 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, |
34 | { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, | ||
33 | {} | 35 | {} |
34 | }; | 36 | }; |
35 | 37 | ||
@@ -179,7 +181,7 @@ int amd_get_subcaches(int cpu) | |||
179 | return (mask >> (4 * cuid)) & 0xf; | 181 | return (mask >> (4 * cuid)) & 0xf; |
180 | } | 182 | } |
181 | 183 | ||
182 | int amd_set_subcaches(int cpu, int mask) | 184 | int amd_set_subcaches(int cpu, unsigned long mask) |
183 | { | 185 | { |
184 | static unsigned int reset, ban; | 186 | static unsigned int reset, ban; |
185 | struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); | 187 | struct amd_northbridge *nb = node_to_amd_nb(amd_get_nb_id(cpu)); |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index fd972a3e4cbb..9fa8aa051f54 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -18,7 +18,6 @@ | |||
18 | #include <linux/pci_ids.h> | 18 | #include <linux/pci_ids.h> |
19 | #include <linux/pci.h> | 19 | #include <linux/pci.h> |
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/ioport.h> | ||
22 | #include <linux/suspend.h> | 21 | #include <linux/suspend.h> |
23 | #include <asm/e820.h> | 22 | #include <asm/e820.h> |
24 | #include <asm/io.h> | 23 | #include <asm/io.h> |
@@ -54,18 +53,6 @@ int fallback_aper_force __initdata; | |||
54 | 53 | ||
55 | int fix_aperture __initdata = 1; | 54 | int fix_aperture __initdata = 1; |
56 | 55 | ||
57 | static struct resource gart_resource = { | ||
58 | .name = "GART", | ||
59 | .flags = IORESOURCE_MEM, | ||
60 | }; | ||
61 | |||
62 | static void __init insert_aperture_resource(u32 aper_base, u32 aper_size) | ||
63 | { | ||
64 | gart_resource.start = aper_base; | ||
65 | gart_resource.end = aper_base + aper_size - 1; | ||
66 | insert_resource(&iomem_resource, &gart_resource); | ||
67 | } | ||
68 | |||
69 | /* This code runs before the PCI subsystem is initialized, so just | 56 | /* This code runs before the PCI subsystem is initialized, so just |
70 | access the northbridge directly. */ | 57 | access the northbridge directly. */ |
71 | 58 | ||
@@ -96,7 +83,6 @@ static u32 __init allocate_aperture(void) | |||
96 | memblock_reserve(addr, aper_size); | 83 | memblock_reserve(addr, aper_size); |
97 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", | 84 | printk(KERN_INFO "Mapping aperture over %d KB of RAM @ %lx\n", |
98 | aper_size >> 10, addr); | 85 | aper_size >> 10, addr); |
99 | insert_aperture_resource((u32)addr, aper_size); | ||
100 | register_nosave_region(addr >> PAGE_SHIFT, | 86 | register_nosave_region(addr >> PAGE_SHIFT, |
101 | (addr+aper_size) >> PAGE_SHIFT); | 87 | (addr+aper_size) >> PAGE_SHIFT); |
102 | 88 | ||
@@ -444,12 +430,8 @@ int __init gart_iommu_hole_init(void) | |||
444 | 430 | ||
445 | out: | 431 | out: |
446 | if (!fix && !fallback_aper_force) { | 432 | if (!fix && !fallback_aper_force) { |
447 | if (last_aper_base) { | 433 | if (last_aper_base) |
448 | unsigned long n = (32 * 1024 * 1024) << last_aper_order; | ||
449 | |||
450 | insert_aperture_resource((u32)last_aper_base, n); | ||
451 | return 1; | 434 | return 1; |
452 | } | ||
453 | return 0; | 435 | return 0; |
454 | } | 436 | } |
455 | 437 | ||
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile index 0ae0323b1f9c..dcb5b15401ce 100644 --- a/arch/x86/kernel/apic/Makefile +++ b/arch/x86/kernel/apic/Makefile | |||
@@ -18,10 +18,7 @@ obj-y += apic_flat_64.o | |||
18 | endif | 18 | endif |
19 | 19 | ||
20 | # APIC probe will depend on the listing order here | 20 | # APIC probe will depend on the listing order here |
21 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | ||
22 | obj-$(CONFIG_X86_SUMMIT) += summit_32.o | ||
23 | obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o | 21 | obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o |
24 | obj-$(CONFIG_X86_ES7000) += es7000_32.o | ||
25 | 22 | ||
26 | # For 32bit, probe_32 need to be listed last | 23 | # For 32bit, probe_32 need to be listed last |
27 | obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o | 24 | obj-$(CONFIG_X86_LOCAL_APIC) += probe_$(BITS).o |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index d278736bf774..ad28db7e6bde 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -75,6 +75,13 @@ unsigned int max_physical_apicid; | |||
75 | physid_mask_t phys_cpu_present_map; | 75 | physid_mask_t phys_cpu_present_map; |
76 | 76 | ||
77 | /* | 77 | /* |
78 | * Processor to be disabled specified by kernel parameter | ||
79 | * disable_cpu_apicid=<int>, mostly used for the kdump 2nd kernel to | ||
80 | * avoid undefined behaviour caused by sending INIT from AP to BSP. | ||
81 | */ | ||
82 | static unsigned int disabled_cpu_apicid __read_mostly = BAD_APICID; | ||
83 | |||
84 | /* | ||
78 | * Map cpu index to physical APIC ID | 85 | * Map cpu index to physical APIC ID |
79 | */ | 86 | */ |
80 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); | 87 | DEFINE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid, BAD_APICID); |
@@ -126,6 +133,10 @@ static inline void imcr_apic_to_pic(void) | |||
126 | * +1=force-enable | 133 | * +1=force-enable |
127 | */ | 134 | */ |
128 | static int force_enable_local_apic __initdata; | 135 | static int force_enable_local_apic __initdata; |
136 | |||
137 | /* Control whether x2APIC mode is enabled or not */ | ||
138 | static bool nox2apic __initdata; | ||
139 | |||
129 | /* | 140 | /* |
130 | * APIC command line parameters | 141 | * APIC command line parameters |
131 | */ | 142 | */ |
@@ -155,8 +166,7 @@ int x2apic_mode; | |||
155 | /* x2apic enabled before OS handover */ | 166 | /* x2apic enabled before OS handover */ |
156 | int x2apic_preenabled; | 167 | int x2apic_preenabled; |
157 | static int x2apic_disabled; | 168 | static int x2apic_disabled; |
158 | static int nox2apic; | 169 | static int __init setup_nox2apic(char *str) |
159 | static __init int setup_nox2apic(char *str) | ||
160 | { | 170 | { |
161 | if (x2apic_enabled()) { | 171 | if (x2apic_enabled()) { |
162 | int apicid = native_apic_msr_read(APIC_ID); | 172 | int apicid = native_apic_msr_read(APIC_ID); |
@@ -171,7 +181,7 @@ static __init int setup_nox2apic(char *str) | |||
171 | } else | 181 | } else |
172 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); | 182 | setup_clear_cpu_cap(X86_FEATURE_X2APIC); |
173 | 183 | ||
174 | nox2apic = 1; | 184 | nox2apic = true; |
175 | 185 | ||
176 | return 0; | 186 | return 0; |
177 | } | 187 | } |
@@ -276,8 +286,12 @@ u32 native_safe_apic_wait_icr_idle(void) | |||
276 | 286 | ||
277 | void native_apic_icr_write(u32 low, u32 id) | 287 | void native_apic_icr_write(u32 low, u32 id) |
278 | { | 288 | { |
289 | unsigned long flags; | ||
290 | |||
291 | local_irq_save(flags); | ||
279 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); | 292 | apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); |
280 | apic_write(APIC_ICR, low); | 293 | apic_write(APIC_ICR, low); |
294 | local_irq_restore(flags); | ||
281 | } | 295 | } |
282 | 296 | ||
283 | u64 native_apic_icr_read(void) | 297 | u64 native_apic_icr_read(void) |
@@ -1968,7 +1982,7 @@ __visible void smp_trace_spurious_interrupt(struct pt_regs *regs) | |||
1968 | */ | 1982 | */ |
1969 | static inline void __smp_error_interrupt(struct pt_regs *regs) | 1983 | static inline void __smp_error_interrupt(struct pt_regs *regs) |
1970 | { | 1984 | { |
1971 | u32 v0, v1; | 1985 | u32 v; |
1972 | u32 i = 0; | 1986 | u32 i = 0; |
1973 | static const char * const error_interrupt_reason[] = { | 1987 | static const char * const error_interrupt_reason[] = { |
1974 | "Send CS error", /* APIC Error Bit 0 */ | 1988 | "Send CS error", /* APIC Error Bit 0 */ |
@@ -1982,21 +1996,21 @@ static inline void __smp_error_interrupt(struct pt_regs *regs) | |||
1982 | }; | 1996 | }; |
1983 | 1997 | ||
1984 | /* First tickle the hardware, only then report what went on. -- REW */ | 1998 | /* First tickle the hardware, only then report what went on. -- REW */ |
1985 | v0 = apic_read(APIC_ESR); | 1999 | if (lapic_get_maxlvt() > 3) /* Due to the Pentium erratum 3AP. */ |
1986 | apic_write(APIC_ESR, 0); | 2000 | apic_write(APIC_ESR, 0); |
1987 | v1 = apic_read(APIC_ESR); | 2001 | v = apic_read(APIC_ESR); |
1988 | ack_APIC_irq(); | 2002 | ack_APIC_irq(); |
1989 | atomic_inc(&irq_err_count); | 2003 | atomic_inc(&irq_err_count); |
1990 | 2004 | ||
1991 | apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x(%02x)", | 2005 | apic_printk(APIC_DEBUG, KERN_DEBUG "APIC error on CPU%d: %02x", |
1992 | smp_processor_id(), v0 , v1); | 2006 | smp_processor_id(), v); |
1993 | 2007 | ||
1994 | v1 = v1 & 0xff; | 2008 | v &= 0xff; |
1995 | while (v1) { | 2009 | while (v) { |
1996 | if (v1 & 0x1) | 2010 | if (v & 0x1) |
1997 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); | 2011 | apic_printk(APIC_DEBUG, KERN_CONT " : %s", error_interrupt_reason[i]); |
1998 | i++; | 2012 | i++; |
1999 | v1 >>= 1; | 2013 | v >>= 1; |
2000 | } | 2014 | } |
2001 | 2015 | ||
2002 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); | 2016 | apic_printk(APIC_DEBUG, KERN_CONT "\n"); |
@@ -2115,6 +2129,38 @@ int generic_processor_info(int apicid, int version) | |||
2115 | phys_cpu_present_map); | 2129 | phys_cpu_present_map); |
2116 | 2130 | ||
2117 | /* | 2131 | /* |
2132 | * boot_cpu_physical_apicid is designed to have the apicid | ||
2133 | * returned by read_apic_id(), i.e, the apicid of the | ||
2134 | * currently booting-up processor. However, on some platforms, | ||
2135 | * it is temporarily modified by the apicid reported as BSP | ||
2136 | * through MP table. Concretely: | ||
2137 | * | ||
2138 | * - arch/x86/kernel/mpparse.c: MP_processor_info() | ||
2139 | * - arch/x86/mm/amdtopology.c: amd_numa_init() | ||
2140 | * | ||
2141 | * This function is executed with the modified | ||
2142 | * boot_cpu_physical_apicid. So, disabled_cpu_apicid kernel | ||
2143 | * parameter doesn't work to disable APs on kdump 2nd kernel. | ||
2144 | * | ||
2145 | * Since fixing handling of boot_cpu_physical_apicid requires | ||
2146 | * another discussion and tests on each platform, we leave it | ||
2147 | * for now and here we use read_apic_id() directly in this | ||
2148 | * function, generic_processor_info(). | ||
2149 | */ | ||
2150 | if (disabled_cpu_apicid != BAD_APICID && | ||
2151 | disabled_cpu_apicid != read_apic_id() && | ||
2152 | disabled_cpu_apicid == apicid) { | ||
2153 | int thiscpu = num_processors + disabled_cpus; | ||
2154 | |||
2155 | pr_warning("APIC: Disabling requested cpu." | ||
2156 | " Processor %d/0x%x ignored.\n", | ||
2157 | thiscpu, apicid); | ||
2158 | |||
2159 | disabled_cpus++; | ||
2160 | return -ENODEV; | ||
2161 | } | ||
2162 | |||
2163 | /* | ||
2118 | * If boot cpu has not been detected yet, then only allow upto | 2164 | * If boot cpu has not been detected yet, then only allow upto |
2119 | * nr_cpu_ids - 1 processors and keep one slot free for boot cpu | 2165 | * nr_cpu_ids - 1 processors and keep one slot free for boot cpu |
2120 | */ | 2166 | */ |
@@ -2592,3 +2638,12 @@ static int __init lapic_insert_resource(void) | |||
2592 | * that is using request_resource | 2638 | * that is using request_resource |
2593 | */ | 2639 | */ |
2594 | late_initcall(lapic_insert_resource); | 2640 | late_initcall(lapic_insert_resource); |
2641 | |||
2642 | static int __init apic_set_disabled_cpu_apicid(char *arg) | ||
2643 | { | ||
2644 | if (!arg || !get_option(&arg, &disabled_cpu_apicid)) | ||
2645 | return -EINVAL; | ||
2646 | |||
2647 | return 0; | ||
2648 | } | ||
2649 | early_param("disable_cpu_apicid", apic_set_disabled_cpu_apicid); | ||
diff --git a/arch/x86/kernel/apic/apic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c index 00c77cf78e9e..7c1b29479513 100644 --- a/arch/x86/kernel/apic/apic_flat_64.c +++ b/arch/x86/kernel/apic/apic_flat_64.c | |||
@@ -14,16 +14,13 @@ | |||
14 | #include <linux/string.h> | 14 | #include <linux/string.h> |
15 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
16 | #include <linux/ctype.h> | 16 | #include <linux/ctype.h> |
17 | #include <linux/init.h> | ||
18 | #include <linux/hardirq.h> | 17 | #include <linux/hardirq.h> |
19 | #include <linux/module.h> | 18 | #include <linux/module.h> |
20 | #include <asm/smp.h> | 19 | #include <asm/smp.h> |
21 | #include <asm/apic.h> | 20 | #include <asm/apic.h> |
22 | #include <asm/ipi.h> | 21 | #include <asm/ipi.h> |
23 | 22 | ||
24 | #ifdef CONFIG_ACPI | 23 | #include <linux/acpi.h> |
25 | #include <acpi/acpi_bus.h> | ||
26 | #endif | ||
27 | 24 | ||
28 | static struct apic apic_physflat; | 25 | static struct apic apic_physflat; |
29 | static struct apic apic_flat; | 26 | static struct apic apic_flat; |
@@ -201,7 +198,7 @@ static struct apic apic_flat = { | |||
201 | 198 | ||
202 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 199 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
203 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 200 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
204 | .wait_for_init_deassert = NULL, | 201 | .wait_for_init_deassert = false, |
205 | .smp_callin_clear_local_apic = NULL, | 202 | .smp_callin_clear_local_apic = NULL, |
206 | .inquire_remote_apic = default_inquire_remote_apic, | 203 | .inquire_remote_apic = default_inquire_remote_apic, |
207 | 204 | ||
@@ -317,7 +314,7 @@ static struct apic apic_physflat = { | |||
317 | 314 | ||
318 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 315 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
319 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 316 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
320 | .wait_for_init_deassert = NULL, | 317 | .wait_for_init_deassert = false, |
321 | .smp_callin_clear_local_apic = NULL, | 318 | .smp_callin_clear_local_apic = NULL, |
322 | .inquire_remote_apic = default_inquire_remote_apic, | 319 | .inquire_remote_apic = default_inquire_remote_apic, |
323 | 320 | ||
diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index e145f28b4099..8c7c98249c20 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/string.h> | 15 | #include <linux/string.h> |
16 | #include <linux/kernel.h> | 16 | #include <linux/kernel.h> |
17 | #include <linux/ctype.h> | 17 | #include <linux/ctype.h> |
18 | #include <linux/init.h> | ||
19 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
20 | #include <asm/fixmap.h> | 19 | #include <asm/fixmap.h> |
21 | #include <asm/mpspec.h> | 20 | #include <asm/mpspec.h> |
@@ -173,8 +172,7 @@ struct apic apic_noop = { | |||
173 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 172 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
174 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 173 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
175 | 174 | ||
176 | .wait_for_init_deassert = NULL, | 175 | .wait_for_init_deassert = false, |
177 | |||
178 | .smp_callin_clear_local_apic = NULL, | 176 | .smp_callin_clear_local_apic = NULL, |
179 | .inquire_remote_apic = NULL, | 177 | .inquire_remote_apic = NULL, |
180 | 178 | ||
diff --git a/arch/x86/kernel/apic/apic_numachip.c b/arch/x86/kernel/apic/apic_numachip.c index 3e67f9e3d7ef..a5b45df8bc88 100644 --- a/arch/x86/kernel/apic/apic_numachip.c +++ b/arch/x86/kernel/apic/apic_numachip.c | |||
@@ -248,7 +248,7 @@ static const struct apic apic_numachip __refconst = { | |||
248 | .wakeup_secondary_cpu = numachip_wakeup_secondary, | 248 | .wakeup_secondary_cpu = numachip_wakeup_secondary, |
249 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 249 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
250 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 250 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
251 | .wait_for_init_deassert = NULL, | 251 | .wait_for_init_deassert = false, |
252 | .smp_callin_clear_local_apic = NULL, | 252 | .smp_callin_clear_local_apic = NULL, |
253 | .inquire_remote_apic = NULL, /* REMRD not supported */ | 253 | .inquire_remote_apic = NULL, /* REMRD not supported */ |
254 | 254 | ||
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index d50e3640d5ae..e4840aa7a255 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -199,8 +199,7 @@ static struct apic apic_bigsmp = { | |||
199 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 199 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
200 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 200 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
201 | 201 | ||
202 | .wait_for_init_deassert = default_wait_for_init_deassert, | 202 | .wait_for_init_deassert = true, |
203 | |||
204 | .smp_callin_clear_local_apic = NULL, | 203 | .smp_callin_clear_local_apic = NULL, |
205 | .inquire_remote_apic = default_inquire_remote_apic, | 204 | .inquire_remote_apic = default_inquire_remote_apic, |
206 | 205 | ||
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c deleted file mode 100644 index c55224731b2d..000000000000 --- a/arch/x86/kernel/apic/es7000_32.c +++ /dev/null | |||
@@ -1,746 +0,0 @@ | |||
1 | /* | ||
2 | * Written by: Garry Forsgren, Unisys Corporation | ||
3 | * Natalie Protasevich, Unisys Corporation | ||
4 | * | ||
5 | * This file contains the code to configure and interface | ||
6 | * with Unisys ES7000 series hardware system manager. | ||
7 | * | ||
8 | * Copyright (c) 2003 Unisys Corporation. | ||
9 | * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar | ||
10 | * | ||
11 | * All Rights Reserved. | ||
12 | * | ||
13 | * This program is free software; you can redistribute it and/or modify it | ||
14 | * under the terms of version 2 of the GNU General Public License as | ||
15 | * published by the Free Software Foundation. | ||
16 | * | ||
17 | * This program is distributed in the hope that it would be useful, but | ||
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License along | ||
22 | * with this program; if not, write the Free Software Foundation, Inc., 59 | ||
23 | * Temple Place - Suite 330, Boston MA 02111-1307, USA. | ||
24 | * | ||
25 | * Contact information: Unisys Corporation, Township Line & Union Meeting | ||
26 | * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or: | ||
27 | * | ||
28 | * http://www.unisys.com | ||
29 | */ | ||
30 | |||
31 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
32 | |||
33 | #include <linux/notifier.h> | ||
34 | #include <linux/spinlock.h> | ||
35 | #include <linux/cpumask.h> | ||
36 | #include <linux/threads.h> | ||
37 | #include <linux/kernel.h> | ||
38 | #include <linux/module.h> | ||
39 | #include <linux/reboot.h> | ||
40 | #include <linux/string.h> | ||
41 | #include <linux/types.h> | ||
42 | #include <linux/errno.h> | ||
43 | #include <linux/acpi.h> | ||
44 | #include <linux/init.h> | ||
45 | #include <linux/gfp.h> | ||
46 | #include <linux/nmi.h> | ||
47 | #include <linux/smp.h> | ||
48 | #include <linux/io.h> | ||
49 | |||
50 | #include <asm/apicdef.h> | ||
51 | #include <linux/atomic.h> | ||
52 | #include <asm/fixmap.h> | ||
53 | #include <asm/mpspec.h> | ||
54 | #include <asm/setup.h> | ||
55 | #include <asm/apic.h> | ||
56 | #include <asm/ipi.h> | ||
57 | |||
58 | /* | ||
59 | * ES7000 chipsets | ||
60 | */ | ||
61 | |||
62 | #define NON_UNISYS 0 | ||
63 | #define ES7000_CLASSIC 1 | ||
64 | #define ES7000_ZORRO 2 | ||
65 | |||
66 | #define MIP_REG 1 | ||
67 | #define MIP_PSAI_REG 4 | ||
68 | |||
69 | #define MIP_BUSY 1 | ||
70 | #define MIP_SPIN 0xf0000 | ||
71 | #define MIP_VALID 0x0100000000000000ULL | ||
72 | #define MIP_SW_APIC 0x1020b | ||
73 | |||
74 | #define MIP_PORT(val) ((val >> 32) & 0xffff) | ||
75 | |||
76 | #define MIP_RD_LO(val) (val & 0xffffffff) | ||
77 | |||
78 | struct mip_reg { | ||
79 | unsigned long long off_0x00; | ||
80 | unsigned long long off_0x08; | ||
81 | unsigned long long off_0x10; | ||
82 | unsigned long long off_0x18; | ||
83 | unsigned long long off_0x20; | ||
84 | unsigned long long off_0x28; | ||
85 | unsigned long long off_0x30; | ||
86 | unsigned long long off_0x38; | ||
87 | }; | ||
88 | |||
89 | struct mip_reg_info { | ||
90 | unsigned long long mip_info; | ||
91 | unsigned long long delivery_info; | ||
92 | unsigned long long host_reg; | ||
93 | unsigned long long mip_reg; | ||
94 | }; | ||
95 | |||
96 | struct psai { | ||
97 | unsigned long long entry_type; | ||
98 | unsigned long long addr; | ||
99 | unsigned long long bep_addr; | ||
100 | }; | ||
101 | |||
102 | #ifdef CONFIG_ACPI | ||
103 | |||
104 | struct es7000_oem_table { | ||
105 | struct acpi_table_header Header; | ||
106 | u32 OEMTableAddr; | ||
107 | u32 OEMTableSize; | ||
108 | }; | ||
109 | |||
110 | static unsigned long oem_addrX; | ||
111 | static unsigned long oem_size; | ||
112 | |||
113 | #endif | ||
114 | |||
115 | /* | ||
116 | * ES7000 Globals | ||
117 | */ | ||
118 | |||
119 | static volatile unsigned long *psai; | ||
120 | static struct mip_reg *mip_reg; | ||
121 | static struct mip_reg *host_reg; | ||
122 | static int mip_port; | ||
123 | static unsigned long mip_addr; | ||
124 | static unsigned long host_addr; | ||
125 | |||
126 | int es7000_plat; | ||
127 | |||
128 | /* | ||
129 | * GSI override for ES7000 platforms. | ||
130 | */ | ||
131 | |||
132 | |||
133 | static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip) | ||
134 | { | ||
135 | unsigned long vect = 0, psaival = 0; | ||
136 | |||
137 | if (psai == NULL) | ||
138 | return -1; | ||
139 | |||
140 | vect = ((unsigned long)__pa(eip)/0x1000) << 16; | ||
141 | psaival = (0x1000000 | vect | cpu); | ||
142 | |||
143 | while (*psai & 0x1000000) | ||
144 | ; | ||
145 | |||
146 | *psai = psaival; | ||
147 | |||
148 | return 0; | ||
149 | } | ||
150 | |||
151 | static int es7000_apic_is_cluster(void) | ||
152 | { | ||
153 | /* MPENTIUMIII */ | ||
154 | if (boot_cpu_data.x86 == 6 && | ||
155 | (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11)) | ||
156 | return 1; | ||
157 | |||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static void setup_unisys(void) | ||
162 | { | ||
163 | /* | ||
164 | * Determine the generation of the ES7000 currently running. | ||
165 | * | ||
166 | * es7000_plat = 1 if the machine is a 5xx ES7000 box | ||
167 | * es7000_plat = 2 if the machine is a x86_64 ES7000 box | ||
168 | * | ||
169 | */ | ||
170 | if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2)) | ||
171 | es7000_plat = ES7000_ZORRO; | ||
172 | else | ||
173 | es7000_plat = ES7000_CLASSIC; | ||
174 | } | ||
175 | |||
176 | /* | ||
177 | * Parse the OEM Table: | ||
178 | */ | ||
179 | static int parse_unisys_oem(char *oemptr) | ||
180 | { | ||
181 | int i; | ||
182 | int success = 0; | ||
183 | unsigned char type, size; | ||
184 | unsigned long val; | ||
185 | char *tp = NULL; | ||
186 | struct psai *psaip = NULL; | ||
187 | struct mip_reg_info *mi; | ||
188 | struct mip_reg *host, *mip; | ||
189 | |||
190 | tp = oemptr; | ||
191 | |||
192 | tp += 8; | ||
193 | |||
194 | for (i = 0; i <= 6; i++) { | ||
195 | type = *tp++; | ||
196 | size = *tp++; | ||
197 | tp -= 2; | ||
198 | switch (type) { | ||
199 | case MIP_REG: | ||
200 | mi = (struct mip_reg_info *)tp; | ||
201 | val = MIP_RD_LO(mi->host_reg); | ||
202 | host_addr = val; | ||
203 | host = (struct mip_reg *)val; | ||
204 | host_reg = __va(host); | ||
205 | val = MIP_RD_LO(mi->mip_reg); | ||
206 | mip_port = MIP_PORT(mi->mip_info); | ||
207 | mip_addr = val; | ||
208 | mip = (struct mip_reg *)val; | ||
209 | mip_reg = __va(mip); | ||
210 | pr_debug("host_reg = 0x%lx\n", | ||
211 | (unsigned long)host_reg); | ||
212 | pr_debug("mip_reg = 0x%lx\n", | ||
213 | (unsigned long)mip_reg); | ||
214 | success++; | ||
215 | break; | ||
216 | case MIP_PSAI_REG: | ||
217 | psaip = (struct psai *)tp; | ||
218 | if (tp != NULL) { | ||
219 | if (psaip->addr) | ||
220 | psai = __va(psaip->addr); | ||
221 | else | ||
222 | psai = NULL; | ||
223 | success++; | ||
224 | } | ||
225 | break; | ||
226 | default: | ||
227 | break; | ||
228 | } | ||
229 | tp += size; | ||
230 | } | ||
231 | |||
232 | if (success < 2) | ||
233 | es7000_plat = NON_UNISYS; | ||
234 | else | ||
235 | setup_unisys(); | ||
236 | |||
237 | return es7000_plat; | ||
238 | } | ||
239 | |||
240 | #ifdef CONFIG_ACPI | ||
241 | static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr) | ||
242 | { | ||
243 | struct acpi_table_header *header = NULL; | ||
244 | struct es7000_oem_table *table; | ||
245 | acpi_size tbl_size; | ||
246 | acpi_status ret; | ||
247 | int i = 0; | ||
248 | |||
249 | for (;;) { | ||
250 | ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size); | ||
251 | if (!ACPI_SUCCESS(ret)) | ||
252 | return -1; | ||
253 | |||
254 | if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) | ||
255 | break; | ||
256 | |||
257 | early_acpi_os_unmap_memory(header, tbl_size); | ||
258 | } | ||
259 | |||
260 | table = (void *)header; | ||
261 | |||
262 | oem_addrX = table->OEMTableAddr; | ||
263 | oem_size = table->OEMTableSize; | ||
264 | |||
265 | early_acpi_os_unmap_memory(header, tbl_size); | ||
266 | |||
267 | *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size); | ||
268 | |||
269 | return 0; | ||
270 | } | ||
271 | |||
272 | static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) | ||
273 | { | ||
274 | if (!oem_addr) | ||
275 | return; | ||
276 | |||
277 | __acpi_unmap_table((char *)oem_addr, oem_size); | ||
278 | } | ||
279 | |||
280 | static int es7000_check_dsdt(void) | ||
281 | { | ||
282 | struct acpi_table_header header; | ||
283 | |||
284 | if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) && | ||
285 | !strncmp(header.oem_id, "UNISYS", 6)) | ||
286 | return 1; | ||
287 | return 0; | ||
288 | } | ||
289 | |||
290 | static int es7000_acpi_ret; | ||
291 | |||
292 | /* Hook from generic ACPI tables.c */ | ||
293 | static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
294 | { | ||
295 | unsigned long oem_addr = 0; | ||
296 | int check_dsdt; | ||
297 | int ret = 0; | ||
298 | |||
299 | /* check dsdt at first to avoid clear fix_map for oem_addr */ | ||
300 | check_dsdt = es7000_check_dsdt(); | ||
301 | |||
302 | if (!find_unisys_acpi_oem_table(&oem_addr)) { | ||
303 | if (check_dsdt) { | ||
304 | ret = parse_unisys_oem((char *)oem_addr); | ||
305 | } else { | ||
306 | setup_unisys(); | ||
307 | ret = 1; | ||
308 | } | ||
309 | /* | ||
310 | * we need to unmap it | ||
311 | */ | ||
312 | unmap_unisys_acpi_oem_table(oem_addr); | ||
313 | } | ||
314 | |||
315 | es7000_acpi_ret = ret; | ||
316 | |||
317 | return ret && !es7000_apic_is_cluster(); | ||
318 | } | ||
319 | |||
320 | static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) | ||
321 | { | ||
322 | int ret = es7000_acpi_ret; | ||
323 | |||
324 | return ret && es7000_apic_is_cluster(); | ||
325 | } | ||
326 | |||
327 | #else /* !CONFIG_ACPI: */ | ||
328 | static int es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
329 | { | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | static int es7000_acpi_madt_oem_check_cluster(char *oem_id, char *oem_table_id) | ||
334 | { | ||
335 | return 0; | ||
336 | } | ||
337 | #endif /* !CONFIG_ACPI */ | ||
338 | |||
339 | static void es7000_spin(int n) | ||
340 | { | ||
341 | int i = 0; | ||
342 | |||
343 | while (i++ < n) | ||
344 | rep_nop(); | ||
345 | } | ||
346 | |||
347 | static int es7000_mip_write(struct mip_reg *mip_reg) | ||
348 | { | ||
349 | int status = 0; | ||
350 | int spin; | ||
351 | |||
352 | spin = MIP_SPIN; | ||
353 | while ((host_reg->off_0x38 & MIP_VALID) != 0) { | ||
354 | if (--spin <= 0) { | ||
355 | WARN(1, "Timeout waiting for Host Valid Flag\n"); | ||
356 | return -1; | ||
357 | } | ||
358 | es7000_spin(MIP_SPIN); | ||
359 | } | ||
360 | |||
361 | memcpy(host_reg, mip_reg, sizeof(struct mip_reg)); | ||
362 | outb(1, mip_port); | ||
363 | |||
364 | spin = MIP_SPIN; | ||
365 | |||
366 | while ((mip_reg->off_0x38 & MIP_VALID) == 0) { | ||
367 | if (--spin <= 0) { | ||
368 | WARN(1, "Timeout waiting for MIP Valid Flag\n"); | ||
369 | return -1; | ||
370 | } | ||
371 | es7000_spin(MIP_SPIN); | ||
372 | } | ||
373 | |||
374 | status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48; | ||
375 | mip_reg->off_0x38 &= ~MIP_VALID; | ||
376 | |||
377 | return status; | ||
378 | } | ||
379 | |||
380 | static void es7000_enable_apic_mode(void) | ||
381 | { | ||
382 | struct mip_reg es7000_mip_reg; | ||
383 | int mip_status; | ||
384 | |||
385 | if (!es7000_plat) | ||
386 | return; | ||
387 | |||
388 | pr_info("Enabling APIC mode.\n"); | ||
389 | memset(&es7000_mip_reg, 0, sizeof(struct mip_reg)); | ||
390 | es7000_mip_reg.off_0x00 = MIP_SW_APIC; | ||
391 | es7000_mip_reg.off_0x38 = MIP_VALID; | ||
392 | |||
393 | while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) | ||
394 | WARN(1, "Command failed, status = %x\n", mip_status); | ||
395 | } | ||
396 | |||
397 | static void es7000_wait_for_init_deassert(atomic_t *deassert) | ||
398 | { | ||
399 | while (!atomic_read(deassert)) | ||
400 | cpu_relax(); | ||
401 | } | ||
402 | |||
403 | static unsigned int es7000_get_apic_id(unsigned long x) | ||
404 | { | ||
405 | return (x >> 24) & 0xFF; | ||
406 | } | ||
407 | |||
408 | static void es7000_send_IPI_mask(const struct cpumask *mask, int vector) | ||
409 | { | ||
410 | default_send_IPI_mask_sequence_phys(mask, vector); | ||
411 | } | ||
412 | |||
413 | static void es7000_send_IPI_allbutself(int vector) | ||
414 | { | ||
415 | default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector); | ||
416 | } | ||
417 | |||
418 | static void es7000_send_IPI_all(int vector) | ||
419 | { | ||
420 | es7000_send_IPI_mask(cpu_online_mask, vector); | ||
421 | } | ||
422 | |||
423 | static int es7000_apic_id_registered(void) | ||
424 | { | ||
425 | return 1; | ||
426 | } | ||
427 | |||
428 | static const struct cpumask *target_cpus_cluster(void) | ||
429 | { | ||
430 | return cpu_all_mask; | ||
431 | } | ||
432 | |||
433 | static const struct cpumask *es7000_target_cpus(void) | ||
434 | { | ||
435 | return cpumask_of(smp_processor_id()); | ||
436 | } | ||
437 | |||
438 | static unsigned long es7000_check_apicid_used(physid_mask_t *map, int apicid) | ||
439 | { | ||
440 | return 0; | ||
441 | } | ||
442 | |||
443 | static unsigned long es7000_check_apicid_present(int bit) | ||
444 | { | ||
445 | return physid_isset(bit, phys_cpu_present_map); | ||
446 | } | ||
447 | |||
448 | static int es7000_early_logical_apicid(int cpu) | ||
449 | { | ||
450 | /* on es7000, logical apicid is the same as physical */ | ||
451 | return early_per_cpu(x86_bios_cpu_apicid, cpu); | ||
452 | } | ||
453 | |||
454 | static unsigned long calculate_ldr(int cpu) | ||
455 | { | ||
456 | unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu); | ||
457 | |||
458 | return SET_APIC_LOGICAL_ID(id); | ||
459 | } | ||
460 | |||
461 | /* | ||
462 | * Set up the logical destination ID. | ||
463 | * | ||
464 | * Intel recommends to set DFR, LdR and TPR before enabling | ||
465 | * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel | ||
466 | * document number 292116). So here it goes... | ||
467 | */ | ||
468 | static void es7000_init_apic_ldr_cluster(void) | ||
469 | { | ||
470 | unsigned long val; | ||
471 | int cpu = smp_processor_id(); | ||
472 | |||
473 | apic_write(APIC_DFR, APIC_DFR_CLUSTER); | ||
474 | val = calculate_ldr(cpu); | ||
475 | apic_write(APIC_LDR, val); | ||
476 | } | ||
477 | |||
478 | static void es7000_init_apic_ldr(void) | ||
479 | { | ||
480 | unsigned long val; | ||
481 | int cpu = smp_processor_id(); | ||
482 | |||
483 | apic_write(APIC_DFR, APIC_DFR_FLAT); | ||
484 | val = calculate_ldr(cpu); | ||
485 | apic_write(APIC_LDR, val); | ||
486 | } | ||
487 | |||
488 | static void es7000_setup_apic_routing(void) | ||
489 | { | ||
490 | int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); | ||
491 | |||
492 | pr_info("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", | ||
493 | (apic_version[apic] == 0x14) ? | ||
494 | "Physical Cluster" : "Logical Cluster", | ||
495 | nr_ioapics, cpumask_bits(es7000_target_cpus())[0]); | ||
496 | } | ||
497 | |||
498 | static int es7000_cpu_present_to_apicid(int mps_cpu) | ||
499 | { | ||
500 | if (!mps_cpu) | ||
501 | return boot_cpu_physical_apicid; | ||
502 | else if (mps_cpu < nr_cpu_ids) | ||
503 | return per_cpu(x86_bios_cpu_apicid, mps_cpu); | ||
504 | else | ||
505 | return BAD_APICID; | ||
506 | } | ||
507 | |||
508 | static int cpu_id; | ||
509 | |||
510 | static void es7000_apicid_to_cpu_present(int phys_apicid, physid_mask_t *retmap) | ||
511 | { | ||
512 | physid_set_mask_of_physid(cpu_id, retmap); | ||
513 | ++cpu_id; | ||
514 | } | ||
515 | |||
516 | static void es7000_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) | ||
517 | { | ||
518 | /* For clustered we don't have a good way to do this yet - hack */ | ||
519 | physids_promote(0xFFL, retmap); | ||
520 | } | ||
521 | |||
522 | static int es7000_check_phys_apicid_present(int cpu_physical_apicid) | ||
523 | { | ||
524 | boot_cpu_physical_apicid = read_apic_id(); | ||
525 | return 1; | ||
526 | } | ||
527 | |||
528 | static inline int | ||
529 | es7000_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) | ||
530 | { | ||
531 | unsigned int round = 0; | ||
532 | unsigned int cpu, uninitialized_var(apicid); | ||
533 | |||
534 | /* | ||
535 | * The cpus in the mask must all be on the apic cluster. | ||
536 | */ | ||
537 | for_each_cpu_and(cpu, cpumask, cpu_online_mask) { | ||
538 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
539 | |||
540 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | ||
541 | WARN(1, "Not a valid mask!"); | ||
542 | |||
543 | return -EINVAL; | ||
544 | } | ||
545 | apicid |= new_apicid; | ||
546 | round++; | ||
547 | } | ||
548 | if (!round) | ||
549 | return -EINVAL; | ||
550 | *dest_id = apicid; | ||
551 | return 0; | ||
552 | } | ||
553 | |||
554 | static int | ||
555 | es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
556 | const struct cpumask *andmask, | ||
557 | unsigned int *apicid) | ||
558 | { | ||
559 | cpumask_var_t cpumask; | ||
560 | *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
561 | |||
562 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
563 | return 0; | ||
564 | |||
565 | cpumask_and(cpumask, inmask, andmask); | ||
566 | es7000_cpu_mask_to_apicid(cpumask, apicid); | ||
567 | |||
568 | free_cpumask_var(cpumask); | ||
569 | |||
570 | return 0; | ||
571 | } | ||
572 | |||
573 | static int es7000_phys_pkg_id(int cpuid_apic, int index_msb) | ||
574 | { | ||
575 | return cpuid_apic >> index_msb; | ||
576 | } | ||
577 | |||
578 | static int probe_es7000(void) | ||
579 | { | ||
580 | /* probed later in mptable/ACPI hooks */ | ||
581 | return 0; | ||
582 | } | ||
583 | |||
584 | static int es7000_mps_ret; | ||
585 | static int es7000_mps_oem_check(struct mpc_table *mpc, char *oem, | ||
586 | char *productid) | ||
587 | { | ||
588 | int ret = 0; | ||
589 | |||
590 | if (mpc->oemptr) { | ||
591 | struct mpc_oemtable *oem_table = | ||
592 | (struct mpc_oemtable *)mpc->oemptr; | ||
593 | |||
594 | if (!strncmp(oem, "UNISYS", 6)) | ||
595 | ret = parse_unisys_oem((char *)oem_table); | ||
596 | } | ||
597 | |||
598 | es7000_mps_ret = ret; | ||
599 | |||
600 | return ret && !es7000_apic_is_cluster(); | ||
601 | } | ||
602 | |||
603 | static int es7000_mps_oem_check_cluster(struct mpc_table *mpc, char *oem, | ||
604 | char *productid) | ||
605 | { | ||
606 | int ret = es7000_mps_ret; | ||
607 | |||
608 | return ret && es7000_apic_is_cluster(); | ||
609 | } | ||
610 | |||
611 | /* We've been warned by a false positive warning.Use __refdata to keep calm. */ | ||
612 | static struct apic __refdata apic_es7000_cluster = { | ||
613 | |||
614 | .name = "es7000", | ||
615 | .probe = probe_es7000, | ||
616 | .acpi_madt_oem_check = es7000_acpi_madt_oem_check_cluster, | ||
617 | .apic_id_valid = default_apic_id_valid, | ||
618 | .apic_id_registered = es7000_apic_id_registered, | ||
619 | |||
620 | .irq_delivery_mode = dest_LowestPrio, | ||
621 | /* logical delivery broadcast to all procs: */ | ||
622 | .irq_dest_mode = 1, | ||
623 | |||
624 | .target_cpus = target_cpus_cluster, | ||
625 | .disable_esr = 1, | ||
626 | .dest_logical = 0, | ||
627 | .check_apicid_used = es7000_check_apicid_used, | ||
628 | .check_apicid_present = es7000_check_apicid_present, | ||
629 | |||
630 | .vector_allocation_domain = flat_vector_allocation_domain, | ||
631 | .init_apic_ldr = es7000_init_apic_ldr_cluster, | ||
632 | |||
633 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | ||
634 | .setup_apic_routing = es7000_setup_apic_routing, | ||
635 | .multi_timer_check = NULL, | ||
636 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, | ||
637 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, | ||
638 | .setup_portio_remap = NULL, | ||
639 | .check_phys_apicid_present = es7000_check_phys_apicid_present, | ||
640 | .enable_apic_mode = es7000_enable_apic_mode, | ||
641 | .phys_pkg_id = es7000_phys_pkg_id, | ||
642 | .mps_oem_check = es7000_mps_oem_check_cluster, | ||
643 | |||
644 | .get_apic_id = es7000_get_apic_id, | ||
645 | .set_apic_id = NULL, | ||
646 | .apic_id_mask = 0xFF << 24, | ||
647 | |||
648 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | ||
649 | |||
650 | .send_IPI_mask = es7000_send_IPI_mask, | ||
651 | .send_IPI_mask_allbutself = NULL, | ||
652 | .send_IPI_allbutself = es7000_send_IPI_allbutself, | ||
653 | .send_IPI_all = es7000_send_IPI_all, | ||
654 | .send_IPI_self = default_send_IPI_self, | ||
655 | |||
656 | .wakeup_secondary_cpu = wakeup_secondary_cpu_via_mip, | ||
657 | |||
658 | .trampoline_phys_low = 0x467, | ||
659 | .trampoline_phys_high = 0x469, | ||
660 | |||
661 | .wait_for_init_deassert = NULL, | ||
662 | |||
663 | /* Nothing to do for most platforms, since cleared by the INIT cycle: */ | ||
664 | .smp_callin_clear_local_apic = NULL, | ||
665 | .inquire_remote_apic = default_inquire_remote_apic, | ||
666 | |||
667 | .read = native_apic_mem_read, | ||
668 | .write = native_apic_mem_write, | ||
669 | .eoi_write = native_apic_mem_write, | ||
670 | .icr_read = native_apic_icr_read, | ||
671 | .icr_write = native_apic_icr_write, | ||
672 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
673 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
674 | |||
675 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, | ||
676 | }; | ||
677 | |||
678 | static struct apic __refdata apic_es7000 = { | ||
679 | |||
680 | .name = "es7000", | ||
681 | .probe = probe_es7000, | ||
682 | .acpi_madt_oem_check = es7000_acpi_madt_oem_check, | ||
683 | .apic_id_valid = default_apic_id_valid, | ||
684 | .apic_id_registered = es7000_apic_id_registered, | ||
685 | |||
686 | .irq_delivery_mode = dest_Fixed, | ||
687 | /* phys delivery to target CPUs: */ | ||
688 | .irq_dest_mode = 0, | ||
689 | |||
690 | .target_cpus = es7000_target_cpus, | ||
691 | .disable_esr = 1, | ||
692 | .dest_logical = 0, | ||
693 | .check_apicid_used = es7000_check_apicid_used, | ||
694 | .check_apicid_present = es7000_check_apicid_present, | ||
695 | |||
696 | .vector_allocation_domain = flat_vector_allocation_domain, | ||
697 | .init_apic_ldr = es7000_init_apic_ldr, | ||
698 | |||
699 | .ioapic_phys_id_map = es7000_ioapic_phys_id_map, | ||
700 | .setup_apic_routing = es7000_setup_apic_routing, | ||
701 | .multi_timer_check = NULL, | ||
702 | .cpu_present_to_apicid = es7000_cpu_present_to_apicid, | ||
703 | .apicid_to_cpu_present = es7000_apicid_to_cpu_present, | ||
704 | .setup_portio_remap = NULL, | ||
705 | .check_phys_apicid_present = es7000_check_phys_apicid_present, | ||
706 | .enable_apic_mode = es7000_enable_apic_mode, | ||
707 | .phys_pkg_id = es7000_phys_pkg_id, | ||
708 | .mps_oem_check = es7000_mps_oem_check, | ||
709 | |||
710 | .get_apic_id = es7000_get_apic_id, | ||
711 | .set_apic_id = NULL, | ||
712 | .apic_id_mask = 0xFF << 24, | ||
713 | |||
714 | .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and, | ||
715 | |||
716 | .send_IPI_mask = es7000_send_IPI_mask, | ||
717 | .send_IPI_mask_allbutself = NULL, | ||
718 | .send_IPI_allbutself = es7000_send_IPI_allbutself, | ||
719 | .send_IPI_all = es7000_send_IPI_all, | ||
720 | .send_IPI_self = default_send_IPI_self, | ||
721 | |||
722 | .trampoline_phys_low = 0x467, | ||
723 | .trampoline_phys_high = 0x469, | ||
724 | |||
725 | .wait_for_init_deassert = es7000_wait_for_init_deassert, | ||
726 | |||
727 | /* Nothing to do for most platforms, since cleared by the INIT cycle: */ | ||
728 | .smp_callin_clear_local_apic = NULL, | ||
729 | .inquire_remote_apic = default_inquire_remote_apic, | ||
730 | |||
731 | .read = native_apic_mem_read, | ||
732 | .write = native_apic_mem_write, | ||
733 | .eoi_write = native_apic_mem_write, | ||
734 | .icr_read = native_apic_icr_read, | ||
735 | .icr_write = native_apic_icr_write, | ||
736 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
737 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
738 | |||
739 | .x86_32_early_logical_apicid = es7000_early_logical_apicid, | ||
740 | }; | ||
741 | |||
742 | /* | ||
743 | * Need to check for es7000 followed by es7000_cluster, so this order | ||
744 | * in apic_drivers is important. | ||
745 | */ | ||
746 | apic_drivers(apic_es7000, apic_es7000_cluster); | ||
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e63a5bd2a78f..6ad4658de705 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -37,9 +37,6 @@ | |||
37 | #include <linux/kthread.h> | 37 | #include <linux/kthread.h> |
38 | #include <linux/jiffies.h> /* time_after() */ | 38 | #include <linux/jiffies.h> /* time_after() */ |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #ifdef CONFIG_ACPI | ||
41 | #include <acpi/acpi_bus.h> | ||
42 | #endif | ||
43 | #include <linux/bootmem.h> | 40 | #include <linux/bootmem.h> |
44 | #include <linux/dmar.h> | 41 | #include <linux/dmar.h> |
45 | #include <linux/hpet.h> | 42 | #include <linux/hpet.h> |
@@ -1142,9 +1139,10 @@ next: | |||
1142 | if (test_bit(vector, used_vectors)) | 1139 | if (test_bit(vector, used_vectors)) |
1143 | goto next; | 1140 | goto next; |
1144 | 1141 | ||
1145 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) | 1142 | for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) { |
1146 | if (per_cpu(vector_irq, new_cpu)[vector] != -1) | 1143 | if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED) |
1147 | goto next; | 1144 | goto next; |
1145 | } | ||
1148 | /* Found one! */ | 1146 | /* Found one! */ |
1149 | current_vector = vector; | 1147 | current_vector = vector; |
1150 | current_offset = offset; | 1148 | current_offset = offset; |
@@ -1183,7 +1181,7 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1183 | 1181 | ||
1184 | vector = cfg->vector; | 1182 | vector = cfg->vector; |
1185 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) | 1183 | for_each_cpu_and(cpu, cfg->domain, cpu_online_mask) |
1186 | per_cpu(vector_irq, cpu)[vector] = -1; | 1184 | per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; |
1187 | 1185 | ||
1188 | cfg->vector = 0; | 1186 | cfg->vector = 0; |
1189 | cpumask_clear(cfg->domain); | 1187 | cpumask_clear(cfg->domain); |
@@ -1191,11 +1189,10 @@ static void __clear_irq_vector(int irq, struct irq_cfg *cfg) | |||
1191 | if (likely(!cfg->move_in_progress)) | 1189 | if (likely(!cfg->move_in_progress)) |
1192 | return; | 1190 | return; |
1193 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { | 1191 | for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) { |
1194 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | 1192 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
1195 | vector++) { | ||
1196 | if (per_cpu(vector_irq, cpu)[vector] != irq) | 1193 | if (per_cpu(vector_irq, cpu)[vector] != irq) |
1197 | continue; | 1194 | continue; |
1198 | per_cpu(vector_irq, cpu)[vector] = -1; | 1195 | per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; |
1199 | break; | 1196 | break; |
1200 | } | 1197 | } |
1201 | } | 1198 | } |
@@ -1228,12 +1225,12 @@ void __setup_vector_irq(int cpu) | |||
1228 | /* Mark the free vectors */ | 1225 | /* Mark the free vectors */ |
1229 | for (vector = 0; vector < NR_VECTORS; ++vector) { | 1226 | for (vector = 0; vector < NR_VECTORS; ++vector) { |
1230 | irq = per_cpu(vector_irq, cpu)[vector]; | 1227 | irq = per_cpu(vector_irq, cpu)[vector]; |
1231 | if (irq < 0) | 1228 | if (irq <= VECTOR_UNDEFINED) |
1232 | continue; | 1229 | continue; |
1233 | 1230 | ||
1234 | cfg = irq_cfg(irq); | 1231 | cfg = irq_cfg(irq); |
1235 | if (!cpumask_test_cpu(cpu, cfg->domain)) | 1232 | if (!cpumask_test_cpu(cpu, cfg->domain)) |
1236 | per_cpu(vector_irq, cpu)[vector] = -1; | 1233 | per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED; |
1237 | } | 1234 | } |
1238 | raw_spin_unlock(&vector_lock); | 1235 | raw_spin_unlock(&vector_lock); |
1239 | } | 1236 | } |
@@ -2202,13 +2199,13 @@ asmlinkage void smp_irq_move_cleanup_interrupt(void) | |||
2202 | 2199 | ||
2203 | me = smp_processor_id(); | 2200 | me = smp_processor_id(); |
2204 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 2201 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
2205 | unsigned int irq; | 2202 | int irq; |
2206 | unsigned int irr; | 2203 | unsigned int irr; |
2207 | struct irq_desc *desc; | 2204 | struct irq_desc *desc; |
2208 | struct irq_cfg *cfg; | 2205 | struct irq_cfg *cfg; |
2209 | irq = __this_cpu_read(vector_irq[vector]); | 2206 | irq = __this_cpu_read(vector_irq[vector]); |
2210 | 2207 | ||
2211 | if (irq == -1) | 2208 | if (irq <= VECTOR_UNDEFINED) |
2212 | continue; | 2209 | continue; |
2213 | 2210 | ||
2214 | desc = irq_to_desc(irq); | 2211 | desc = irq_to_desc(irq); |
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 7434d8556d09..62071569bd50 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/cpumask.h> | 1 | #include <linux/cpumask.h> |
2 | #include <linux/interrupt.h> | 2 | #include <linux/interrupt.h> |
3 | #include <linux/init.h> | ||
4 | 3 | ||
5 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
6 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c deleted file mode 100644 index 1e42e8f305ee..000000000000 --- a/arch/x86/kernel/apic/numaq_32.c +++ /dev/null | |||
@@ -1,525 +0,0 @@ | |||
1 | /* | ||
2 | * Written by: Patricia Gaughen, IBM Corporation | ||
3 | * | ||
4 | * Copyright (C) 2002, IBM Corp. | ||
5 | * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar | ||
6 | * | ||
7 | * All rights reserved. | ||
8 | * | ||
9 | * This program is free software; you can redistribute it and/or modify | ||
10 | * it under the terms of the GNU General Public License as published by | ||
11 | * the Free Software Foundation; either version 2 of the License, or | ||
12 | * (at your option) any later version. | ||
13 | * | ||
14 | * This program is distributed in the hope that it will be useful, but | ||
15 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
17 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
18 | * details. | ||
19 | * | ||
20 | * You should have received a copy of the GNU General Public License | ||
21 | * along with this program; if not, write to the Free Software | ||
22 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
23 | * | ||
24 | * Send feedback to <gone@us.ibm.com> | ||
25 | */ | ||
26 | #include <linux/nodemask.h> | ||
27 | #include <linux/topology.h> | ||
28 | #include <linux/bootmem.h> | ||
29 | #include <linux/memblock.h> | ||
30 | #include <linux/threads.h> | ||
31 | #include <linux/cpumask.h> | ||
32 | #include <linux/kernel.h> | ||
33 | #include <linux/mmzone.h> | ||
34 | #include <linux/module.h> | ||
35 | #include <linux/string.h> | ||
36 | #include <linux/init.h> | ||
37 | #include <linux/numa.h> | ||
38 | #include <linux/smp.h> | ||
39 | #include <linux/io.h> | ||
40 | #include <linux/mm.h> | ||
41 | |||
42 | #include <asm/processor.h> | ||
43 | #include <asm/fixmap.h> | ||
44 | #include <asm/mpspec.h> | ||
45 | #include <asm/numaq.h> | ||
46 | #include <asm/setup.h> | ||
47 | #include <asm/apic.h> | ||
48 | #include <asm/e820.h> | ||
49 | #include <asm/ipi.h> | ||
50 | |||
51 | int found_numaq; | ||
52 | |||
53 | /* | ||
54 | * Have to match translation table entries to main table entries by counter | ||
55 | * hence the mpc_record variable .... can't see a less disgusting way of | ||
56 | * doing this .... | ||
57 | */ | ||
58 | struct mpc_trans { | ||
59 | unsigned char mpc_type; | ||
60 | unsigned char trans_len; | ||
61 | unsigned char trans_type; | ||
62 | unsigned char trans_quad; | ||
63 | unsigned char trans_global; | ||
64 | unsigned char trans_local; | ||
65 | unsigned short trans_reserved; | ||
66 | }; | ||
67 | |||
68 | static int mpc_record; | ||
69 | |||
70 | static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; | ||
71 | |||
72 | int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
73 | int mp_bus_id_to_local[MAX_MP_BUSSES]; | ||
74 | int quad_local_to_mp_bus_id[NR_CPUS/4][4]; | ||
75 | |||
76 | |||
77 | static inline void numaq_register_node(int node, struct sys_cfg_data *scd) | ||
78 | { | ||
79 | struct eachquadmem *eq = scd->eq + node; | ||
80 | u64 start = (u64)(eq->hi_shrd_mem_start - eq->priv_mem_size) << 20; | ||
81 | u64 end = (u64)(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size) << 20; | ||
82 | int ret; | ||
83 | |||
84 | node_set(node, numa_nodes_parsed); | ||
85 | ret = numa_add_memblk(node, start, end); | ||
86 | BUG_ON(ret < 0); | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * Function: smp_dump_qct() | ||
91 | * | ||
92 | * Description: gets memory layout from the quad config table. This | ||
93 | * function also updates numa_nodes_parsed with the nodes (quads) present. | ||
94 | */ | ||
95 | static void __init smp_dump_qct(void) | ||
96 | { | ||
97 | struct sys_cfg_data *scd; | ||
98 | int node; | ||
99 | |||
100 | scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR); | ||
101 | |||
102 | for_each_node(node) { | ||
103 | if (scd->quads_present31_0 & (1 << node)) | ||
104 | numaq_register_node(node, scd); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | void numaq_tsc_disable(void) | ||
109 | { | ||
110 | if (!found_numaq) | ||
111 | return; | ||
112 | |||
113 | if (num_online_nodes() > 1) { | ||
114 | printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); | ||
115 | setup_clear_cpu_cap(X86_FEATURE_TSC); | ||
116 | } | ||
117 | } | ||
118 | |||
119 | static void __init numaq_tsc_init(void) | ||
120 | { | ||
121 | numaq_tsc_disable(); | ||
122 | } | ||
123 | |||
124 | static inline int generate_logical_apicid(int quad, int phys_apicid) | ||
125 | { | ||
126 | return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1); | ||
127 | } | ||
128 | |||
129 | /* x86_quirks member */ | ||
130 | static int mpc_apic_id(struct mpc_cpu *m) | ||
131 | { | ||
132 | int quad = translation_table[mpc_record]->trans_quad; | ||
133 | int logical_apicid = generate_logical_apicid(quad, m->apicid); | ||
134 | |||
135 | printk(KERN_DEBUG | ||
136 | "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n", | ||
137 | m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, | ||
138 | (m->cpufeature & CPU_MODEL_MASK) >> 4, | ||
139 | m->apicver, quad, logical_apicid); | ||
140 | |||
141 | return logical_apicid; | ||
142 | } | ||
143 | |||
144 | /* x86_quirks member */ | ||
145 | static void mpc_oem_bus_info(struct mpc_bus *m, char *name) | ||
146 | { | ||
147 | int quad = translation_table[mpc_record]->trans_quad; | ||
148 | int local = translation_table[mpc_record]->trans_local; | ||
149 | |||
150 | mp_bus_id_to_node[m->busid] = quad; | ||
151 | mp_bus_id_to_local[m->busid] = local; | ||
152 | |||
153 | printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad); | ||
154 | } | ||
155 | |||
156 | /* x86_quirks member */ | ||
157 | static void mpc_oem_pci_bus(struct mpc_bus *m) | ||
158 | { | ||
159 | int quad = translation_table[mpc_record]->trans_quad; | ||
160 | int local = translation_table[mpc_record]->trans_local; | ||
161 | |||
162 | quad_local_to_mp_bus_id[quad][local] = m->busid; | ||
163 | } | ||
164 | |||
165 | /* | ||
166 | * Called from mpparse code. | ||
167 | * mode = 0: prescan | ||
168 | * mode = 1: one mpc entry scanned | ||
169 | */ | ||
170 | static void numaq_mpc_record(unsigned int mode) | ||
171 | { | ||
172 | if (!mode) | ||
173 | mpc_record = 0; | ||
174 | else | ||
175 | mpc_record++; | ||
176 | } | ||
177 | |||
178 | static void __init MP_translation_info(struct mpc_trans *m) | ||
179 | { | ||
180 | printk(KERN_INFO | ||
181 | "Translation: record %d, type %d, quad %d, global %d, local %d\n", | ||
182 | mpc_record, m->trans_type, m->trans_quad, m->trans_global, | ||
183 | m->trans_local); | ||
184 | |||
185 | if (mpc_record >= MAX_MPC_ENTRY) | ||
186 | printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n"); | ||
187 | else | ||
188 | translation_table[mpc_record] = m; /* stash this for later */ | ||
189 | |||
190 | if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad)) | ||
191 | node_set_online(m->trans_quad); | ||
192 | } | ||
193 | |||
194 | static int __init mpf_checksum(unsigned char *mp, int len) | ||
195 | { | ||
196 | int sum = 0; | ||
197 | |||
198 | while (len--) | ||
199 | sum += *mp++; | ||
200 | |||
201 | return sum & 0xFF; | ||
202 | } | ||
203 | |||
204 | /* | ||
205 | * Read/parse the MPC oem tables | ||
206 | */ | ||
207 | static void __init smp_read_mpc_oem(struct mpc_table *mpc) | ||
208 | { | ||
209 | struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr; | ||
210 | int count = sizeof(*oemtable); /* the header size */ | ||
211 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | ||
212 | |||
213 | mpc_record = 0; | ||
214 | printk(KERN_INFO | ||
215 | "Found an OEM MPC table at %8p - parsing it...\n", oemtable); | ||
216 | |||
217 | if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) { | ||
218 | printk(KERN_WARNING | ||
219 | "SMP mpc oemtable: bad signature [%c%c%c%c]!\n", | ||
220 | oemtable->signature[0], oemtable->signature[1], | ||
221 | oemtable->signature[2], oemtable->signature[3]); | ||
222 | return; | ||
223 | } | ||
224 | |||
225 | if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) { | ||
226 | printk(KERN_WARNING "SMP oem mptable: checksum error!\n"); | ||
227 | return; | ||
228 | } | ||
229 | |||
230 | while (count < oemtable->length) { | ||
231 | switch (*oemptr) { | ||
232 | case MP_TRANSLATION: | ||
233 | { | ||
234 | struct mpc_trans *m = (void *)oemptr; | ||
235 | |||
236 | MP_translation_info(m); | ||
237 | oemptr += sizeof(*m); | ||
238 | count += sizeof(*m); | ||
239 | ++mpc_record; | ||
240 | break; | ||
241 | } | ||
242 | default: | ||
243 | printk(KERN_WARNING | ||
244 | "Unrecognised OEM table entry type! - %d\n", | ||
245 | (int)*oemptr); | ||
246 | return; | ||
247 | } | ||
248 | } | ||
249 | } | ||
250 | |||
251 | static __init void early_check_numaq(void) | ||
252 | { | ||
253 | /* | ||
254 | * get boot-time SMP configuration: | ||
255 | */ | ||
256 | if (smp_found_config) | ||
257 | early_get_smp_config(); | ||
258 | |||
259 | if (found_numaq) { | ||
260 | x86_init.mpparse.mpc_record = numaq_mpc_record; | ||
261 | x86_init.mpparse.setup_ioapic_ids = x86_init_noop; | ||
262 | x86_init.mpparse.mpc_apic_id = mpc_apic_id; | ||
263 | x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; | ||
264 | x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; | ||
265 | x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; | ||
266 | x86_init.timers.tsc_pre_init = numaq_tsc_init; | ||
267 | x86_init.pci.init = pci_numaq_init; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | int __init numaq_numa_init(void) | ||
272 | { | ||
273 | early_check_numaq(); | ||
274 | if (!found_numaq) | ||
275 | return -ENOENT; | ||
276 | smp_dump_qct(); | ||
277 | |||
278 | return 0; | ||
279 | } | ||
280 | |||
281 | #define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | ||
282 | |||
283 | static inline unsigned int numaq_get_apic_id(unsigned long x) | ||
284 | { | ||
285 | return (x >> 24) & 0x0F; | ||
286 | } | ||
287 | |||
288 | static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector) | ||
289 | { | ||
290 | default_send_IPI_mask_sequence_logical(mask, vector); | ||
291 | } | ||
292 | |||
293 | static inline void numaq_send_IPI_allbutself(int vector) | ||
294 | { | ||
295 | default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); | ||
296 | } | ||
297 | |||
298 | static inline void numaq_send_IPI_all(int vector) | ||
299 | { | ||
300 | numaq_send_IPI_mask(cpu_online_mask, vector); | ||
301 | } | ||
302 | |||
303 | #define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8) | ||
304 | #define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa) | ||
305 | |||
306 | /* | ||
307 | * Because we use NMIs rather than the INIT-STARTUP sequence to | ||
308 | * bootstrap the CPUs, the APIC may be in a weird state. Kick it: | ||
309 | */ | ||
310 | static inline void numaq_smp_callin_clear_local_apic(void) | ||
311 | { | ||
312 | clear_local_APIC(); | ||
313 | } | ||
314 | |||
315 | static inline const struct cpumask *numaq_target_cpus(void) | ||
316 | { | ||
317 | return cpu_all_mask; | ||
318 | } | ||
319 | |||
320 | static unsigned long numaq_check_apicid_used(physid_mask_t *map, int apicid) | ||
321 | { | ||
322 | return physid_isset(apicid, *map); | ||
323 | } | ||
324 | |||
325 | static inline unsigned long numaq_check_apicid_present(int bit) | ||
326 | { | ||
327 | return physid_isset(bit, phys_cpu_present_map); | ||
328 | } | ||
329 | |||
330 | static inline int numaq_apic_id_registered(void) | ||
331 | { | ||
332 | return 1; | ||
333 | } | ||
334 | |||
335 | static inline void numaq_init_apic_ldr(void) | ||
336 | { | ||
337 | /* Already done in NUMA-Q firmware */ | ||
338 | } | ||
339 | |||
340 | static inline void numaq_setup_apic_routing(void) | ||
341 | { | ||
342 | printk(KERN_INFO | ||
343 | "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n", | ||
344 | nr_ioapics); | ||
345 | } | ||
346 | |||
347 | /* | ||
348 | * Skip adding the timer int on secondary nodes, which causes | ||
349 | * a small but painful rift in the time-space continuum. | ||
350 | */ | ||
351 | static inline int numaq_multi_timer_check(int apic, int irq) | ||
352 | { | ||
353 | return apic != 0 && irq == 0; | ||
354 | } | ||
355 | |||
356 | static inline void numaq_ioapic_phys_id_map(physid_mask_t *phys_map, physid_mask_t *retmap) | ||
357 | { | ||
358 | /* We don't have a good way to do this yet - hack */ | ||
359 | return physids_promote(0xFUL, retmap); | ||
360 | } | ||
361 | |||
362 | /* | ||
363 | * Supporting over 60 cpus on NUMA-Q requires a locality-dependent | ||
364 | * cpu to APIC ID relation to properly interact with the intelligent | ||
365 | * mode of the cluster controller. | ||
366 | */ | ||
367 | static inline int numaq_cpu_present_to_apicid(int mps_cpu) | ||
368 | { | ||
369 | if (mps_cpu < 60) | ||
370 | return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3)); | ||
371 | else | ||
372 | return BAD_APICID; | ||
373 | } | ||
374 | |||
375 | static inline int numaq_apicid_to_node(int logical_apicid) | ||
376 | { | ||
377 | return logical_apicid >> 4; | ||
378 | } | ||
379 | |||
380 | static int numaq_numa_cpu_node(int cpu) | ||
381 | { | ||
382 | int logical_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
383 | |||
384 | if (logical_apicid != BAD_APICID) | ||
385 | return numaq_apicid_to_node(logical_apicid); | ||
386 | return NUMA_NO_NODE; | ||
387 | } | ||
388 | |||
389 | static void numaq_apicid_to_cpu_present(int logical_apicid, physid_mask_t *retmap) | ||
390 | { | ||
391 | int node = numaq_apicid_to_node(logical_apicid); | ||
392 | int cpu = __ffs(logical_apicid & 0xf); | ||
393 | |||
394 | physid_set_mask_of_physid(cpu + 4*node, retmap); | ||
395 | } | ||
396 | |||
397 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | ||
398 | void *xquad_portio; | ||
399 | |||
400 | static inline int numaq_check_phys_apicid_present(int phys_apicid) | ||
401 | { | ||
402 | return 1; | ||
403 | } | ||
404 | |||
405 | /* | ||
406 | * We use physical apicids here, not logical, so just return the default | ||
407 | * physical broadcast to stop people from breaking us | ||
408 | */ | ||
409 | static int | ||
410 | numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask, | ||
411 | const struct cpumask *andmask, | ||
412 | unsigned int *apicid) | ||
413 | { | ||
414 | *apicid = 0x0F; | ||
415 | return 0; | ||
416 | } | ||
417 | |||
418 | /* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */ | ||
419 | static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb) | ||
420 | { | ||
421 | return cpuid_apic >> index_msb; | ||
422 | } | ||
423 | |||
424 | static int | ||
425 | numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid) | ||
426 | { | ||
427 | if (strncmp(oem, "IBM NUMA", 8)) | ||
428 | printk(KERN_ERR "Warning! Not a NUMA-Q system!\n"); | ||
429 | else | ||
430 | found_numaq = 1; | ||
431 | |||
432 | return found_numaq; | ||
433 | } | ||
434 | |||
435 | static int probe_numaq(void) | ||
436 | { | ||
437 | /* already know from get_memcfg_numaq() */ | ||
438 | return found_numaq; | ||
439 | } | ||
440 | |||
441 | static void numaq_setup_portio_remap(void) | ||
442 | { | ||
443 | int num_quads = num_online_nodes(); | ||
444 | |||
445 | if (num_quads <= 1) | ||
446 | return; | ||
447 | |||
448 | printk(KERN_INFO | ||
449 | "Remapping cross-quad port I/O for %d quads\n", num_quads); | ||
450 | |||
451 | xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD); | ||
452 | |||
453 | printk(KERN_INFO | ||
454 | "xquad_portio vaddr 0x%08lx, len %08lx\n", | ||
455 | (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD); | ||
456 | } | ||
457 | |||
458 | /* Use __refdata to keep false positive warning calm. */ | ||
459 | static struct apic __refdata apic_numaq = { | ||
460 | |||
461 | .name = "NUMAQ", | ||
462 | .probe = probe_numaq, | ||
463 | .acpi_madt_oem_check = NULL, | ||
464 | .apic_id_valid = default_apic_id_valid, | ||
465 | .apic_id_registered = numaq_apic_id_registered, | ||
466 | |||
467 | .irq_delivery_mode = dest_LowestPrio, | ||
468 | /* physical delivery on LOCAL quad: */ | ||
469 | .irq_dest_mode = 0, | ||
470 | |||
471 | .target_cpus = numaq_target_cpus, | ||
472 | .disable_esr = 1, | ||
473 | .dest_logical = APIC_DEST_LOGICAL, | ||
474 | .check_apicid_used = numaq_check_apicid_used, | ||
475 | .check_apicid_present = numaq_check_apicid_present, | ||
476 | |||
477 | .vector_allocation_domain = flat_vector_allocation_domain, | ||
478 | .init_apic_ldr = numaq_init_apic_ldr, | ||
479 | |||
480 | .ioapic_phys_id_map = numaq_ioapic_phys_id_map, | ||
481 | .setup_apic_routing = numaq_setup_apic_routing, | ||
482 | .multi_timer_check = numaq_multi_timer_check, | ||
483 | .cpu_present_to_apicid = numaq_cpu_present_to_apicid, | ||
484 | .apicid_to_cpu_present = numaq_apicid_to_cpu_present, | ||
485 | .setup_portio_remap = numaq_setup_portio_remap, | ||
486 | .check_phys_apicid_present = numaq_check_phys_apicid_present, | ||
487 | .enable_apic_mode = NULL, | ||
488 | .phys_pkg_id = numaq_phys_pkg_id, | ||
489 | .mps_oem_check = numaq_mps_oem_check, | ||
490 | |||
491 | .get_apic_id = numaq_get_apic_id, | ||
492 | .set_apic_id = NULL, | ||
493 | .apic_id_mask = 0x0F << 24, | ||
494 | |||
495 | .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and, | ||
496 | |||
497 | .send_IPI_mask = numaq_send_IPI_mask, | ||
498 | .send_IPI_mask_allbutself = NULL, | ||
499 | .send_IPI_allbutself = numaq_send_IPI_allbutself, | ||
500 | .send_IPI_all = numaq_send_IPI_all, | ||
501 | .send_IPI_self = default_send_IPI_self, | ||
502 | |||
503 | .wakeup_secondary_cpu = wakeup_secondary_cpu_via_nmi, | ||
504 | .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW, | ||
505 | .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH, | ||
506 | |||
507 | /* We don't do anything here because we use NMI's to boot instead */ | ||
508 | .wait_for_init_deassert = NULL, | ||
509 | |||
510 | .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic, | ||
511 | .inquire_remote_apic = NULL, | ||
512 | |||
513 | .read = native_apic_mem_read, | ||
514 | .write = native_apic_mem_write, | ||
515 | .eoi_write = native_apic_mem_write, | ||
516 | .icr_read = native_apic_icr_read, | ||
517 | .icr_write = native_apic_icr_write, | ||
518 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
519 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
520 | |||
521 | .x86_32_early_logical_apicid = noop_x86_32_early_logical_apicid, | ||
522 | .x86_32_numa_cpu_node = numaq_numa_cpu_node, | ||
523 | }; | ||
524 | |||
525 | apic_driver(apic_numaq); | ||
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index eb35ef9ee63f..cceb352c968c 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c | |||
@@ -119,8 +119,7 @@ static struct apic apic_default = { | |||
119 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 119 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
120 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 120 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
121 | 121 | ||
122 | .wait_for_init_deassert = default_wait_for_init_deassert, | 122 | .wait_for_init_deassert = true, |
123 | |||
124 | .smp_callin_clear_local_apic = NULL, | 123 | .smp_callin_clear_local_apic = NULL, |
125 | .inquire_remote_apic = default_inquire_remote_apic, | 124 | .inquire_remote_apic = default_inquire_remote_apic, |
126 | 125 | ||
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c deleted file mode 100644 index 77c95c0e1bf7..000000000000 --- a/arch/x86/kernel/apic/summit_32.c +++ /dev/null | |||
@@ -1,552 +0,0 @@ | |||
1 | /* | ||
2 | * IBM Summit-Specific Code | ||
3 | * | ||
4 | * Written By: Matthew Dobson, IBM Corporation | ||
5 | * | ||
6 | * Copyright (c) 2003 IBM Corp. | ||
7 | * | ||
8 | * All rights reserved. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License as published by | ||
12 | * the Free Software Foundation; either version 2 of the License, or (at | ||
13 | * your option) any later version. | ||
14 | * | ||
15 | * This program is distributed in the hope that it will be useful, but | ||
16 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
18 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
19 | * details. | ||
20 | * | ||
21 | * You should have received a copy of the GNU General Public License | ||
22 | * along with this program; if not, write to the Free Software | ||
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
24 | * | ||
25 | * Send feedback to <colpatch@us.ibm.com> | ||
26 | * | ||
27 | */ | ||
28 | |||
29 | #define pr_fmt(fmt) "summit: %s: " fmt, __func__ | ||
30 | |||
31 | #include <linux/mm.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <asm/io.h> | ||
34 | #include <asm/bios_ebda.h> | ||
35 | |||
36 | /* | ||
37 | * APIC driver for the IBM "Summit" chipset. | ||
38 | */ | ||
39 | #include <linux/threads.h> | ||
40 | #include <linux/cpumask.h> | ||
41 | #include <asm/mpspec.h> | ||
42 | #include <asm/apic.h> | ||
43 | #include <asm/smp.h> | ||
44 | #include <asm/fixmap.h> | ||
45 | #include <asm/apicdef.h> | ||
46 | #include <asm/ipi.h> | ||
47 | #include <linux/kernel.h> | ||
48 | #include <linux/string.h> | ||
49 | #include <linux/gfp.h> | ||
50 | #include <linux/smp.h> | ||
51 | |||
52 | static unsigned summit_get_apic_id(unsigned long x) | ||
53 | { | ||
54 | return (x >> 24) & 0xFF; | ||
55 | } | ||
56 | |||
57 | static inline void summit_send_IPI_mask(const struct cpumask *mask, int vector) | ||
58 | { | ||
59 | default_send_IPI_mask_sequence_logical(mask, vector); | ||
60 | } | ||
61 | |||
62 | static void summit_send_IPI_allbutself(int vector) | ||
63 | { | ||
64 | default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector); | ||
65 | } | ||
66 | |||
67 | static void summit_send_IPI_all(int vector) | ||
68 | { | ||
69 | summit_send_IPI_mask(cpu_online_mask, vector); | ||
70 | } | ||
71 | |||
72 | #include <asm/tsc.h> | ||
73 | |||
74 | extern int use_cyclone; | ||
75 | |||
76 | #ifdef CONFIG_X86_SUMMIT_NUMA | ||
77 | static void setup_summit(void); | ||
78 | #else | ||
79 | static inline void setup_summit(void) {} | ||
80 | #endif | ||
81 | |||
82 | static int summit_mps_oem_check(struct mpc_table *mpc, char *oem, | ||
83 | char *productid) | ||
84 | { | ||
85 | if (!strncmp(oem, "IBM ENSW", 8) && | ||
86 | (!strncmp(productid, "VIGIL SMP", 9) | ||
87 | || !strncmp(productid, "EXA", 3) | ||
88 | || !strncmp(productid, "RUTHLESS SMP", 12))){ | ||
89 | mark_tsc_unstable("Summit based system"); | ||
90 | use_cyclone = 1; /*enable cyclone-timer*/ | ||
91 | setup_summit(); | ||
92 | return 1; | ||
93 | } | ||
94 | return 0; | ||
95 | } | ||
96 | |||
97 | /* Hook from generic ACPI tables.c */ | ||
98 | static int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id) | ||
99 | { | ||
100 | if (!strncmp(oem_id, "IBM", 3) && | ||
101 | (!strncmp(oem_table_id, "SERVIGIL", 8) | ||
102 | || !strncmp(oem_table_id, "EXA", 3))){ | ||
103 | mark_tsc_unstable("Summit based system"); | ||
104 | use_cyclone = 1; /*enable cyclone-timer*/ | ||
105 | setup_summit(); | ||
106 | return 1; | ||
107 | } | ||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | struct rio_table_hdr { | ||
112 | unsigned char version; /* Version number of this data structure */ | ||
113 | /* Version 3 adds chassis_num & WP_index */ | ||
114 | unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */ | ||
115 | unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */ | ||
116 | } __attribute__((packed)); | ||
117 | |||
118 | struct scal_detail { | ||
119 | unsigned char node_id; /* Scalability Node ID */ | ||
120 | unsigned long CBAR; /* Address of 1MB register space */ | ||
121 | unsigned char port0node; /* Node ID port connected to: 0xFF=None */ | ||
122 | unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
123 | unsigned char port1node; /* Node ID port connected to: 0xFF = None */ | ||
124 | unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
125 | unsigned char port2node; /* Node ID port connected to: 0xFF = None */ | ||
126 | unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
127 | unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */ | ||
128 | } __attribute__((packed)); | ||
129 | |||
130 | struct rio_detail { | ||
131 | unsigned char node_id; /* RIO Node ID */ | ||
132 | unsigned long BBAR; /* Address of 1MB register space */ | ||
133 | unsigned char type; /* Type of device */ | ||
134 | unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/ | ||
135 | /* For CYC: Node ID of Twister that owns this CYC */ | ||
136 | unsigned char port0node; /* Node ID port connected to: 0xFF=None */ | ||
137 | unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
138 | unsigned char port1node; /* Node ID port connected to: 0xFF=None */ | ||
139 | unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */ | ||
140 | unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */ | ||
141 | /* For CYC: 0 */ | ||
142 | unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */ | ||
143 | /* = 0 : the XAPIC is not used, ie:*/ | ||
144 | /* ints fwded to another XAPIC */ | ||
145 | /* Bits1:7 Reserved */ | ||
146 | /* For CYC: Bits0:7 Reserved */ | ||
147 | unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */ | ||
148 | /* lower slot numbers/PCI bus numbers */ | ||
149 | /* For CYC: No meaning */ | ||
150 | unsigned char chassis_num; /* 1 based Chassis number */ | ||
151 | /* For LookOut WPEGs this field indicates the */ | ||
152 | /* Expansion Chassis #, enumerated from Boot */ | ||
153 | /* Node WPEG external port, then Boot Node CYC */ | ||
154 | /* external port, then Next Vigil chassis WPEG */ | ||
155 | /* external port, etc. */ | ||
156 | /* Shared Lookouts have only 1 chassis number (the */ | ||
157 | /* first one assigned) */ | ||
158 | } __attribute__((packed)); | ||
159 | |||
160 | |||
161 | typedef enum { | ||
162 | CompatTwister = 0, /* Compatibility Twister */ | ||
163 | AltTwister = 1, /* Alternate Twister of internal 8-way */ | ||
164 | CompatCyclone = 2, /* Compatibility Cyclone */ | ||
165 | AltCyclone = 3, /* Alternate Cyclone of internal 8-way */ | ||
166 | CompatWPEG = 4, /* Compatibility WPEG */ | ||
167 | AltWPEG = 5, /* Second Planar WPEG */ | ||
168 | LookOutAWPEG = 6, /* LookOut WPEG */ | ||
169 | LookOutBWPEG = 7, /* LookOut WPEG */ | ||
170 | } node_type; | ||
171 | |||
172 | static inline int is_WPEG(struct rio_detail *rio){ | ||
173 | return (rio->type == CompatWPEG || rio->type == AltWPEG || | ||
174 | rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); | ||
175 | } | ||
176 | |||
177 | #define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER) | ||
178 | |||
179 | static const struct cpumask *summit_target_cpus(void) | ||
180 | { | ||
181 | /* CPU_MASK_ALL (0xff) has undefined behaviour with | ||
182 | * dest_LowestPrio mode logical clustered apic interrupt routing | ||
183 | * Just start on cpu 0. IRQ balancing will spread load | ||
184 | */ | ||
185 | return cpumask_of(0); | ||
186 | } | ||
187 | |||
188 | static unsigned long summit_check_apicid_used(physid_mask_t *map, int apicid) | ||
189 | { | ||
190 | return 0; | ||
191 | } | ||
192 | |||
193 | /* we don't use the phys_cpu_present_map to indicate apicid presence */ | ||
194 | static unsigned long summit_check_apicid_present(int bit) | ||
195 | { | ||
196 | return 1; | ||
197 | } | ||
198 | |||
199 | static int summit_early_logical_apicid(int cpu) | ||
200 | { | ||
201 | int count = 0; | ||
202 | u8 my_id = early_per_cpu(x86_cpu_to_apicid, cpu); | ||
203 | u8 my_cluster = APIC_CLUSTER(my_id); | ||
204 | #ifdef CONFIG_SMP | ||
205 | u8 lid; | ||
206 | int i; | ||
207 | |||
208 | /* Create logical APIC IDs by counting CPUs already in cluster. */ | ||
209 | for (count = 0, i = nr_cpu_ids; --i >= 0; ) { | ||
210 | lid = early_per_cpu(x86_cpu_to_logical_apicid, i); | ||
211 | if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster) | ||
212 | ++count; | ||
213 | } | ||
214 | #endif | ||
215 | /* We only have a 4 wide bitmap in cluster mode. If a deranged | ||
216 | * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */ | ||
217 | BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT); | ||
218 | return my_cluster | (1UL << count); | ||
219 | } | ||
220 | |||
221 | static void summit_init_apic_ldr(void) | ||
222 | { | ||
223 | int cpu = smp_processor_id(); | ||
224 | unsigned long id = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
225 | unsigned long val; | ||
226 | |||
227 | apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE); | ||
228 | val = apic_read(APIC_LDR) & ~APIC_LDR_MASK; | ||
229 | val |= SET_APIC_LOGICAL_ID(id); | ||
230 | apic_write(APIC_LDR, val); | ||
231 | } | ||
232 | |||
233 | static int summit_apic_id_registered(void) | ||
234 | { | ||
235 | return 1; | ||
236 | } | ||
237 | |||
238 | static void summit_setup_apic_routing(void) | ||
239 | { | ||
240 | pr_info("Enabling APIC mode: Summit. Using %d I/O APICs\n", | ||
241 | nr_ioapics); | ||
242 | } | ||
243 | |||
244 | static int summit_cpu_present_to_apicid(int mps_cpu) | ||
245 | { | ||
246 | if (mps_cpu < nr_cpu_ids) | ||
247 | return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu); | ||
248 | else | ||
249 | return BAD_APICID; | ||
250 | } | ||
251 | |||
252 | static void summit_ioapic_phys_id_map(physid_mask_t *phys_id_map, physid_mask_t *retmap) | ||
253 | { | ||
254 | /* For clustered we don't have a good way to do this yet - hack */ | ||
255 | physids_promote(0x0FL, retmap); | ||
256 | } | ||
257 | |||
258 | static void summit_apicid_to_cpu_present(int apicid, physid_mask_t *retmap) | ||
259 | { | ||
260 | physid_set_mask_of_physid(0, retmap); | ||
261 | } | ||
262 | |||
263 | static int summit_check_phys_apicid_present(int physical_apicid) | ||
264 | { | ||
265 | return 1; | ||
266 | } | ||
267 | |||
268 | static inline int | ||
269 | summit_cpu_mask_to_apicid(const struct cpumask *cpumask, unsigned int *dest_id) | ||
270 | { | ||
271 | unsigned int round = 0; | ||
272 | unsigned int cpu, apicid = 0; | ||
273 | |||
274 | /* | ||
275 | * The cpus in the mask must all be on the apic cluster. | ||
276 | */ | ||
277 | for_each_cpu_and(cpu, cpumask, cpu_online_mask) { | ||
278 | int new_apicid = early_per_cpu(x86_cpu_to_logical_apicid, cpu); | ||
279 | |||
280 | if (round && APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) { | ||
281 | pr_err("Not a valid mask!\n"); | ||
282 | return -EINVAL; | ||
283 | } | ||
284 | apicid |= new_apicid; | ||
285 | round++; | ||
286 | } | ||
287 | if (!round) | ||
288 | return -EINVAL; | ||
289 | *dest_id = apicid; | ||
290 | return 0; | ||
291 | } | ||
292 | |||
293 | static int | ||
294 | summit_cpu_mask_to_apicid_and(const struct cpumask *inmask, | ||
295 | const struct cpumask *andmask, | ||
296 | unsigned int *apicid) | ||
297 | { | ||
298 | cpumask_var_t cpumask; | ||
299 | *apicid = early_per_cpu(x86_cpu_to_logical_apicid, 0); | ||
300 | |||
301 | if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC)) | ||
302 | return 0; | ||
303 | |||
304 | cpumask_and(cpumask, inmask, andmask); | ||
305 | summit_cpu_mask_to_apicid(cpumask, apicid); | ||
306 | |||
307 | free_cpumask_var(cpumask); | ||
308 | |||
309 | return 0; | ||
310 | } | ||
311 | |||
312 | /* | ||
313 | * cpuid returns the value latched in the HW at reset, not the APIC ID | ||
314 | * register's value. For any box whose BIOS changes APIC IDs, like | ||
315 | * clustered APIC systems, we must use hard_smp_processor_id. | ||
316 | * | ||
317 | * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID. | ||
318 | */ | ||
319 | static int summit_phys_pkg_id(int cpuid_apic, int index_msb) | ||
320 | { | ||
321 | return hard_smp_processor_id() >> index_msb; | ||
322 | } | ||
323 | |||
324 | static int probe_summit(void) | ||
325 | { | ||
326 | /* probed later in mptable/ACPI hooks */ | ||
327 | return 0; | ||
328 | } | ||
329 | |||
330 | #ifdef CONFIG_X86_SUMMIT_NUMA | ||
331 | static struct rio_table_hdr *rio_table_hdr; | ||
332 | static struct scal_detail *scal_devs[MAX_NUMNODES]; | ||
333 | static struct rio_detail *rio_devs[MAX_NUMNODES*4]; | ||
334 | |||
335 | #ifndef CONFIG_X86_NUMAQ | ||
336 | static int mp_bus_id_to_node[MAX_MP_BUSSES]; | ||
337 | #endif | ||
338 | |||
339 | static int setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus) | ||
340 | { | ||
341 | int twister = 0, node = 0; | ||
342 | int i, bus, num_buses; | ||
343 | |||
344 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { | ||
345 | if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) { | ||
346 | twister = rio_devs[i]->owner_id; | ||
347 | break; | ||
348 | } | ||
349 | } | ||
350 | if (i == rio_table_hdr->num_rio_dev) { | ||
351 | pr_err("Couldn't find owner Cyclone for Winnipeg!\n"); | ||
352 | return last_bus; | ||
353 | } | ||
354 | |||
355 | for (i = 0; i < rio_table_hdr->num_scal_dev; i++) { | ||
356 | if (scal_devs[i]->node_id == twister) { | ||
357 | node = scal_devs[i]->node_id; | ||
358 | break; | ||
359 | } | ||
360 | } | ||
361 | if (i == rio_table_hdr->num_scal_dev) { | ||
362 | pr_err("Couldn't find owner Twister for Cyclone!\n"); | ||
363 | return last_bus; | ||
364 | } | ||
365 | |||
366 | switch (rio_devs[wpeg_num]->type) { | ||
367 | case CompatWPEG: | ||
368 | /* | ||
369 | * The Compatibility Winnipeg controls the 2 legacy buses, | ||
370 | * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case | ||
371 | * a PCI-PCI bridge card is used in either slot: total 5 buses. | ||
372 | */ | ||
373 | num_buses = 5; | ||
374 | break; | ||
375 | case AltWPEG: | ||
376 | /* | ||
377 | * The Alternate Winnipeg controls the 2 133MHz buses [1 slot | ||
378 | * each], their 2 "extra" buses, the 100MHz bus [2 slots] and | ||
379 | * the "extra" buses for each of those slots: total 7 buses. | ||
380 | */ | ||
381 | num_buses = 7; | ||
382 | break; | ||
383 | case LookOutAWPEG: | ||
384 | case LookOutBWPEG: | ||
385 | /* | ||
386 | * A Lookout Winnipeg controls 3 100MHz buses [2 slots each] | ||
387 | * & the "extra" buses for each of those slots: total 9 buses. | ||
388 | */ | ||
389 | num_buses = 9; | ||
390 | break; | ||
391 | default: | ||
392 | pr_info("Unsupported Winnipeg type!\n"); | ||
393 | return last_bus; | ||
394 | } | ||
395 | |||
396 | for (bus = last_bus; bus < last_bus + num_buses; bus++) | ||
397 | mp_bus_id_to_node[bus] = node; | ||
398 | return bus; | ||
399 | } | ||
400 | |||
401 | static int build_detail_arrays(void) | ||
402 | { | ||
403 | unsigned long ptr; | ||
404 | int i, scal_detail_size, rio_detail_size; | ||
405 | |||
406 | if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) { | ||
407 | pr_warn("MAX_NUMNODES too low! Defined as %d, but system has %d nodes\n", | ||
408 | MAX_NUMNODES, rio_table_hdr->num_scal_dev); | ||
409 | return 0; | ||
410 | } | ||
411 | |||
412 | switch (rio_table_hdr->version) { | ||
413 | default: | ||
414 | pr_warn("Invalid Rio Grande Table Version: %d\n", | ||
415 | rio_table_hdr->version); | ||
416 | return 0; | ||
417 | case 2: | ||
418 | scal_detail_size = 11; | ||
419 | rio_detail_size = 13; | ||
420 | break; | ||
421 | case 3: | ||
422 | scal_detail_size = 12; | ||
423 | rio_detail_size = 15; | ||
424 | break; | ||
425 | } | ||
426 | |||
427 | ptr = (unsigned long)rio_table_hdr + 3; | ||
428 | for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size) | ||
429 | scal_devs[i] = (struct scal_detail *)ptr; | ||
430 | |||
431 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size) | ||
432 | rio_devs[i] = (struct rio_detail *)ptr; | ||
433 | |||
434 | return 1; | ||
435 | } | ||
436 | |||
437 | void setup_summit(void) | ||
438 | { | ||
439 | unsigned long ptr; | ||
440 | unsigned short offset; | ||
441 | int i, next_wpeg, next_bus = 0; | ||
442 | |||
443 | /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */ | ||
444 | ptr = get_bios_ebda(); | ||
445 | ptr = (unsigned long)phys_to_virt(ptr); | ||
446 | |||
447 | rio_table_hdr = NULL; | ||
448 | offset = 0x180; | ||
449 | while (offset) { | ||
450 | /* The block id is stored in the 2nd word */ | ||
451 | if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) { | ||
452 | /* set the pointer past the offset & block id */ | ||
453 | rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4); | ||
454 | break; | ||
455 | } | ||
456 | /* The next offset is stored in the 1st word. 0 means no more */ | ||
457 | offset = *((unsigned short *)(ptr + offset)); | ||
458 | } | ||
459 | if (!rio_table_hdr) { | ||
460 | pr_err("Unable to locate Rio Grande Table in EBDA - bailing!\n"); | ||
461 | return; | ||
462 | } | ||
463 | |||
464 | if (!build_detail_arrays()) | ||
465 | return; | ||
466 | |||
467 | /* The first Winnipeg we're looking for has an index of 0 */ | ||
468 | next_wpeg = 0; | ||
469 | do { | ||
470 | for (i = 0; i < rio_table_hdr->num_rio_dev; i++) { | ||
471 | if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) { | ||
472 | /* It's the Winnipeg we're looking for! */ | ||
473 | next_bus = setup_pci_node_map_for_wpeg(i, next_bus); | ||
474 | next_wpeg++; | ||
475 | break; | ||
476 | } | ||
477 | } | ||
478 | /* | ||
479 | * If we go through all Rio devices and don't find one with | ||
480 | * the next index, it means we've found all the Winnipegs, | ||
481 | * and thus all the PCI buses. | ||
482 | */ | ||
483 | if (i == rio_table_hdr->num_rio_dev) | ||
484 | next_wpeg = 0; | ||
485 | } while (next_wpeg != 0); | ||
486 | } | ||
487 | #endif | ||
488 | |||
489 | static struct apic apic_summit = { | ||
490 | |||
491 | .name = "summit", | ||
492 | .probe = probe_summit, | ||
493 | .acpi_madt_oem_check = summit_acpi_madt_oem_check, | ||
494 | .apic_id_valid = default_apic_id_valid, | ||
495 | .apic_id_registered = summit_apic_id_registered, | ||
496 | |||
497 | .irq_delivery_mode = dest_LowestPrio, | ||
498 | /* logical delivery broadcast to all CPUs: */ | ||
499 | .irq_dest_mode = 1, | ||
500 | |||
501 | .target_cpus = summit_target_cpus, | ||
502 | .disable_esr = 1, | ||
503 | .dest_logical = APIC_DEST_LOGICAL, | ||
504 | .check_apicid_used = summit_check_apicid_used, | ||
505 | .check_apicid_present = summit_check_apicid_present, | ||
506 | |||
507 | .vector_allocation_domain = flat_vector_allocation_domain, | ||
508 | .init_apic_ldr = summit_init_apic_ldr, | ||
509 | |||
510 | .ioapic_phys_id_map = summit_ioapic_phys_id_map, | ||
511 | .setup_apic_routing = summit_setup_apic_routing, | ||
512 | .multi_timer_check = NULL, | ||
513 | .cpu_present_to_apicid = summit_cpu_present_to_apicid, | ||
514 | .apicid_to_cpu_present = summit_apicid_to_cpu_present, | ||
515 | .setup_portio_remap = NULL, | ||
516 | .check_phys_apicid_present = summit_check_phys_apicid_present, | ||
517 | .enable_apic_mode = NULL, | ||
518 | .phys_pkg_id = summit_phys_pkg_id, | ||
519 | .mps_oem_check = summit_mps_oem_check, | ||
520 | |||
521 | .get_apic_id = summit_get_apic_id, | ||
522 | .set_apic_id = NULL, | ||
523 | .apic_id_mask = 0xFF << 24, | ||
524 | |||
525 | .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and, | ||
526 | |||
527 | .send_IPI_mask = summit_send_IPI_mask, | ||
528 | .send_IPI_mask_allbutself = NULL, | ||
529 | .send_IPI_allbutself = summit_send_IPI_allbutself, | ||
530 | .send_IPI_all = summit_send_IPI_all, | ||
531 | .send_IPI_self = default_send_IPI_self, | ||
532 | |||
533 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | ||
534 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | ||
535 | |||
536 | .wait_for_init_deassert = default_wait_for_init_deassert, | ||
537 | |||
538 | .smp_callin_clear_local_apic = NULL, | ||
539 | .inquire_remote_apic = default_inquire_remote_apic, | ||
540 | |||
541 | .read = native_apic_mem_read, | ||
542 | .write = native_apic_mem_write, | ||
543 | .eoi_write = native_apic_mem_write, | ||
544 | .icr_read = native_apic_icr_read, | ||
545 | .icr_write = native_apic_icr_write, | ||
546 | .wait_icr_idle = native_apic_wait_icr_idle, | ||
547 | .safe_wait_icr_idle = native_safe_apic_wait_icr_idle, | ||
548 | |||
549 | .x86_32_early_logical_apicid = summit_early_logical_apicid, | ||
550 | }; | ||
551 | |||
552 | apic_driver(apic_summit); | ||
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 140e29db478d..e66766bf1641 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c | |||
@@ -3,7 +3,6 @@ | |||
3 | #include <linux/string.h> | 3 | #include <linux/string.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
6 | #include <linux/init.h> | ||
7 | #include <linux/dmar.h> | 6 | #include <linux/dmar.h> |
8 | #include <linux/cpu.h> | 7 | #include <linux/cpu.h> |
9 | 8 | ||
@@ -280,7 +279,7 @@ static struct apic apic_x2apic_cluster = { | |||
280 | 279 | ||
281 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 280 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
282 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 281 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
283 | .wait_for_init_deassert = NULL, | 282 | .wait_for_init_deassert = false, |
284 | .smp_callin_clear_local_apic = NULL, | 283 | .smp_callin_clear_local_apic = NULL, |
285 | .inquire_remote_apic = NULL, | 284 | .inquire_remote_apic = NULL, |
286 | 285 | ||
diff --git a/arch/x86/kernel/apic/x2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c index 562a76d433c8..6d600ebf6c12 100644 --- a/arch/x86/kernel/apic/x2apic_phys.c +++ b/arch/x86/kernel/apic/x2apic_phys.c | |||
@@ -3,7 +3,6 @@ | |||
3 | #include <linux/string.h> | 3 | #include <linux/string.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
6 | #include <linux/init.h> | ||
7 | #include <linux/dmar.h> | 6 | #include <linux/dmar.h> |
8 | 7 | ||
9 | #include <asm/smp.h> | 8 | #include <asm/smp.h> |
@@ -134,7 +133,7 @@ static struct apic apic_x2apic_phys = { | |||
134 | 133 | ||
135 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 134 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
136 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 135 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
137 | .wait_for_init_deassert = NULL, | 136 | .wait_for_init_deassert = false, |
138 | .smp_callin_clear_local_apic = NULL, | 137 | .smp_callin_clear_local_apic = NULL, |
139 | .inquire_remote_apic = NULL, | 138 | .inquire_remote_apic = NULL, |
140 | 139 | ||
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index ad0dc0428baf..7834389ba5be 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c | |||
@@ -396,7 +396,7 @@ static struct apic __refdata apic_x2apic_uv_x = { | |||
396 | .wakeup_secondary_cpu = uv_wakeup_secondary, | 396 | .wakeup_secondary_cpu = uv_wakeup_secondary, |
397 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, | 397 | .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW, |
398 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, | 398 | .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH, |
399 | .wait_for_init_deassert = NULL, | 399 | .wait_for_init_deassert = false, |
400 | .smp_callin_clear_local_apic = NULL, | 400 | .smp_callin_clear_local_apic = NULL, |
401 | .inquire_remote_apic = NULL, | 401 | .inquire_remote_apic = NULL, |
402 | 402 | ||
@@ -980,7 +980,6 @@ void __init uv_system_init(void) | |||
980 | uv_nmi_setup(); | 980 | uv_nmi_setup(); |
981 | uv_cpu_init(); | 981 | uv_cpu_init(); |
982 | uv_scir_register_cpu_notifier(); | 982 | uv_scir_register_cpu_notifier(); |
983 | uv_register_nmi_notifier(); | ||
984 | proc_mkdir("sgi_uv", NULL); | 983 | proc_mkdir("sgi_uv", NULL); |
985 | 984 | ||
986 | /* register Legacy VGA I/O redirection handler */ | 985 | /* register Legacy VGA I/O redirection handler */ |
diff --git a/arch/x86/kernel/check.c b/arch/x86/kernel/check.c index e2dbcb7dabdd..83a7995625a6 100644 --- a/arch/x86/kernel/check.c +++ b/arch/x86/kernel/check.c | |||
@@ -91,7 +91,7 @@ void __init setup_bios_corruption_check(void) | |||
91 | 91 | ||
92 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); | 92 | corruption_check_size = round_up(corruption_check_size, PAGE_SIZE); |
93 | 93 | ||
94 | for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { | 94 | for_each_free_mem_range(i, NUMA_NO_NODE, &start, &end, NULL) { |
95 | start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), | 95 | start = clamp_t(phys_addr_t, round_up(start, PAGE_SIZE), |
96 | PAGE_SIZE, corruption_check_size); | 96 | PAGE_SIZE, corruption_check_size); |
97 | end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), | 97 | end = clamp_t(phys_addr_t, round_down(end, PAGE_SIZE), |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 47b56a7e99cb..7fd54f09b011 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -36,12 +36,13 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o | |||
36 | endif | 36 | endif |
37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o | 37 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o |
38 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o | 38 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o |
39 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o | 39 | obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o perf_event_intel_rapl.o |
40 | endif | 40 | endif |
41 | 41 | ||
42 | 42 | ||
43 | obj-$(CONFIG_X86_MCE) += mcheck/ | 43 | obj-$(CONFIG_X86_MCE) += mcheck/ |
44 | obj-$(CONFIG_MTRR) += mtrr/ | 44 | obj-$(CONFIG_MTRR) += mtrr/ |
45 | obj-$(CONFIG_MICROCODE) += microcode/ | ||
45 | 46 | ||
46 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o | 47 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o |
47 | 48 | ||
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index bca023bdd6b2..ce8b8ff0e0ef 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -1,5 +1,4 @@ | |||
1 | #include <linux/export.h> | 1 | #include <linux/export.h> |
2 | #include <linux/init.h> | ||
3 | #include <linux/bitops.h> | 2 | #include <linux/bitops.h> |
4 | #include <linux/elf.h> | 3 | #include <linux/elf.h> |
5 | #include <linux/mm.h> | 4 | #include <linux/mm.h> |
@@ -219,7 +218,7 @@ static void amd_k7_smp_check(struct cpuinfo_x86 *c) | |||
219 | */ | 218 | */ |
220 | WARN_ONCE(1, "WARNING: This combination of AMD" | 219 | WARN_ONCE(1, "WARNING: This combination of AMD" |
221 | " processors is not suitable for SMP.\n"); | 220 | " processors is not suitable for SMP.\n"); |
222 | add_taint(TAINT_UNSAFE_SMP, LOCKDEP_NOW_UNRELIABLE); | 221 | add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); |
223 | } | 222 | } |
224 | 223 | ||
225 | static void init_amd_k7(struct cpuinfo_x86 *c) | 224 | static void init_amd_k7(struct cpuinfo_x86 *c) |
@@ -234,9 +233,7 @@ static void init_amd_k7(struct cpuinfo_x86 *c) | |||
234 | if (c->x86_model >= 6 && c->x86_model <= 10) { | 233 | if (c->x86_model >= 6 && c->x86_model <= 10) { |
235 | if (!cpu_has(c, X86_FEATURE_XMM)) { | 234 | if (!cpu_has(c, X86_FEATURE_XMM)) { |
236 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); | 235 | printk(KERN_INFO "Enabling disabled K7/SSE Support.\n"); |
237 | rdmsr(MSR_K7_HWCR, l, h); | 236 | msr_clear_bit(MSR_K7_HWCR, 15); |
238 | l &= ~0x00008000; | ||
239 | wrmsr(MSR_K7_HWCR, l, h); | ||
240 | set_cpu_cap(c, X86_FEATURE_XMM); | 237 | set_cpu_cap(c, X86_FEATURE_XMM); |
241 | } | 238 | } |
242 | } | 239 | } |
@@ -487,7 +484,7 @@ static void early_init_amd(struct cpuinfo_x86 *c) | |||
487 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 484 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
488 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 485 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
489 | if (!check_tsc_unstable()) | 486 | if (!check_tsc_unstable()) |
490 | sched_clock_stable = 1; | 487 | set_sched_clock_stable(); |
491 | } | 488 | } |
492 | 489 | ||
493 | #ifdef CONFIG_X86_64 | 490 | #ifdef CONFIG_X86_64 |
@@ -508,6 +505,10 @@ static void early_init_amd(struct cpuinfo_x86 *c) | |||
508 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); | 505 | set_cpu_cap(c, X86_FEATURE_EXTD_APICID); |
509 | } | 506 | } |
510 | #endif | 507 | #endif |
508 | |||
509 | /* F16h erratum 793, CVE-2013-6885 */ | ||
510 | if (c->x86 == 0x16 && c->x86_model <= 0xf) | ||
511 | msr_set_bit(MSR_AMD64_LS_CFG, 15); | ||
511 | } | 512 | } |
512 | 513 | ||
513 | static const int amd_erratum_383[]; | 514 | static const int amd_erratum_383[]; |
@@ -527,11 +528,8 @@ static void init_amd(struct cpuinfo_x86 *c) | |||
527 | * Errata 63 for SH-B3 steppings | 528 | * Errata 63 for SH-B3 steppings |
528 | * Errata 122 for all steppings (F+ have it disabled by default) | 529 | * Errata 122 for all steppings (F+ have it disabled by default) |
529 | */ | 530 | */ |
530 | if (c->x86 == 0xf) { | 531 | if (c->x86 == 0xf) |
531 | rdmsrl(MSR_K7_HWCR, value); | 532 | msr_set_bit(MSR_K7_HWCR, 6); |
532 | value |= 1 << 6; | ||
533 | wrmsrl(MSR_K7_HWCR, value); | ||
534 | } | ||
535 | #endif | 533 | #endif |
536 | 534 | ||
537 | early_init_amd(c); | 535 | early_init_amd(c); |
@@ -614,14 +612,11 @@ static void init_amd(struct cpuinfo_x86 *c) | |||
614 | (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && | 612 | (c->x86_model >= 0x10) && (c->x86_model <= 0x1f) && |
615 | !cpu_has(c, X86_FEATURE_TOPOEXT)) { | 613 | !cpu_has(c, X86_FEATURE_TOPOEXT)) { |
616 | 614 | ||
617 | if (!rdmsrl_safe(0xc0011005, &value)) { | 615 | if (msr_set_bit(0xc0011005, 54) > 0) { |
618 | value |= 1ULL << 54; | ||
619 | wrmsrl_safe(0xc0011005, value); | ||
620 | rdmsrl(0xc0011005, value); | 616 | rdmsrl(0xc0011005, value); |
621 | if (value & (1ULL << 54)) { | 617 | if (value & BIT_64(54)) { |
622 | set_cpu_cap(c, X86_FEATURE_TOPOEXT); | 618 | set_cpu_cap(c, X86_FEATURE_TOPOEXT); |
623 | printk(KERN_INFO FW_INFO "CPU: Re-enabling " | 619 | pr_info(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); |
624 | "disabled Topology Extensions Support\n"); | ||
625 | } | 620 | } |
626 | } | 621 | } |
627 | } | 622 | } |
@@ -700,19 +695,12 @@ static void init_amd(struct cpuinfo_x86 *c) | |||
700 | * Disable GART TLB Walk Errors on Fam10h. We do this here | 695 | * Disable GART TLB Walk Errors on Fam10h. We do this here |
701 | * because this is always needed when GART is enabled, even in a | 696 | * because this is always needed when GART is enabled, even in a |
702 | * kernel which has no MCE support built in. | 697 | * kernel which has no MCE support built in. |
703 | * BIOS should disable GartTlbWlk Errors themself. If | 698 | * BIOS should disable GartTlbWlk Errors already. If |
704 | * it doesn't do it here as suggested by the BKDG. | 699 | * it doesn't, do it here as suggested by the BKDG. |
705 | * | 700 | * |
706 | * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 | 701 | * Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=33012 |
707 | */ | 702 | */ |
708 | u64 mask; | 703 | msr_set_bit(MSR_AMD64_MCx_MASK(4), 10); |
709 | int err; | ||
710 | |||
711 | err = rdmsrl_safe(MSR_AMD64_MCx_MASK(4), &mask); | ||
712 | if (err == 0) { | ||
713 | mask |= (1 << 10); | ||
714 | wrmsrl_safe(MSR_AMD64_MCx_MASK(4), mask); | ||
715 | } | ||
716 | 704 | ||
717 | /* | 705 | /* |
718 | * On family 10h BIOS may not have properly enabled WC+ support, | 706 | * On family 10h BIOS may not have properly enabled WC+ support, |
@@ -724,10 +712,7 @@ static void init_amd(struct cpuinfo_x86 *c) | |||
724 | * NOTE: we want to use the _safe accessors so as not to #GP kvm | 712 | * NOTE: we want to use the _safe accessors so as not to #GP kvm |
725 | * guests on older kvm hosts. | 713 | * guests on older kvm hosts. |
726 | */ | 714 | */ |
727 | 715 | msr_clear_bit(MSR_AMD64_BU_CFG2, 24); | |
728 | rdmsrl_safe(MSR_AMD64_BU_CFG2, &value); | ||
729 | value &= ~(1ULL << 24); | ||
730 | wrmsrl_safe(MSR_AMD64_BU_CFG2, value); | ||
731 | 716 | ||
732 | if (cpu_has_amd_erratum(c, amd_erratum_383)) | 717 | if (cpu_has_amd_erratum(c, amd_erratum_383)) |
733 | set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); | 718 | set_cpu_bug(c, X86_BUG_AMD_TLB_MMATCH); |
@@ -758,10 +743,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) | |||
758 | 743 | ||
759 | static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) | 744 | static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c) |
760 | { | 745 | { |
761 | tlb_flushall_shift = 5; | 746 | tlb_flushall_shift = 6; |
762 | |||
763 | if (c->x86 <= 0x11) | ||
764 | tlb_flushall_shift = 4; | ||
765 | } | 747 | } |
766 | 748 | ||
767 | static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) | 749 | static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) |
@@ -790,14 +772,10 @@ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c) | |||
790 | } | 772 | } |
791 | 773 | ||
792 | /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ | 774 | /* Handle DTLB 2M and 4M sizes, fall back to L1 if L2 is disabled */ |
793 | if (!((eax >> 16) & mask)) { | 775 | if (!((eax >> 16) & mask)) |
794 | u32 a, b, c, d; | 776 | tlb_lld_2m[ENTRIES] = (cpuid_eax(0x80000005) >> 16) & 0xff; |
795 | 777 | else | |
796 | cpuid(0x80000005, &a, &b, &c, &d); | ||
797 | tlb_lld_2m[ENTRIES] = (a >> 16) & 0xff; | ||
798 | } else { | ||
799 | tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; | 778 | tlb_lld_2m[ENTRIES] = (eax >> 16) & mask; |
800 | } | ||
801 | 779 | ||
802 | /* a 4M entry uses two 2M entries */ | 780 | /* a 4M entry uses two 2M entries */ |
803 | tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; | 781 | tlb_lld_4m[ENTRIES] = tlb_lld_2m[ENTRIES] >> 1; |
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 8d5652dc99dd..d8fba5c15fbd 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/bitops.h> | 1 | #include <linux/bitops.h> |
2 | #include <linux/kernel.h> | 2 | #include <linux/kernel.h> |
3 | #include <linux/init.h> | ||
4 | 3 | ||
5 | #include <asm/processor.h> | 4 | #include <asm/processor.h> |
6 | #include <asm/e820.h> | 5 | #include <asm/e820.h> |
@@ -9,236 +8,6 @@ | |||
9 | 8 | ||
10 | #include "cpu.h" | 9 | #include "cpu.h" |
11 | 10 | ||
12 | #ifdef CONFIG_X86_OOSTORE | ||
13 | |||
14 | static u32 power2(u32 x) | ||
15 | { | ||
16 | u32 s = 1; | ||
17 | |||
18 | while (s <= x) | ||
19 | s <<= 1; | ||
20 | |||
21 | return s >>= 1; | ||
22 | } | ||
23 | |||
24 | |||
25 | /* | ||
26 | * Set up an actual MCR | ||
27 | */ | ||
28 | static void centaur_mcr_insert(int reg, u32 base, u32 size, int key) | ||
29 | { | ||
30 | u32 lo, hi; | ||
31 | |||
32 | hi = base & ~0xFFF; | ||
33 | lo = ~(size-1); /* Size is a power of 2 so this makes a mask */ | ||
34 | lo &= ~0xFFF; /* Remove the ctrl value bits */ | ||
35 | lo |= key; /* Attribute we wish to set */ | ||
36 | wrmsr(reg+MSR_IDT_MCR0, lo, hi); | ||
37 | mtrr_centaur_report_mcr(reg, lo, hi); /* Tell the mtrr driver */ | ||
38 | } | ||
39 | |||
40 | /* | ||
41 | * Figure what we can cover with MCR's | ||
42 | * | ||
43 | * Shortcut: We know you can't put 4Gig of RAM on a winchip | ||
44 | */ | ||
45 | static u32 ramtop(void) | ||
46 | { | ||
47 | u32 clip = 0xFFFFFFFFUL; | ||
48 | u32 top = 0; | ||
49 | int i; | ||
50 | |||
51 | for (i = 0; i < e820.nr_map; i++) { | ||
52 | unsigned long start, end; | ||
53 | |||
54 | if (e820.map[i].addr > 0xFFFFFFFFUL) | ||
55 | continue; | ||
56 | /* | ||
57 | * Don't MCR over reserved space. Ignore the ISA hole | ||
58 | * we frob around that catastrophe already | ||
59 | */ | ||
60 | if (e820.map[i].type == E820_RESERVED) { | ||
61 | if (e820.map[i].addr >= 0x100000UL && | ||
62 | e820.map[i].addr < clip) | ||
63 | clip = e820.map[i].addr; | ||
64 | continue; | ||
65 | } | ||
66 | start = e820.map[i].addr; | ||
67 | end = e820.map[i].addr + e820.map[i].size; | ||
68 | if (start >= end) | ||
69 | continue; | ||
70 | if (end > top) | ||
71 | top = end; | ||
72 | } | ||
73 | /* | ||
74 | * Everything below 'top' should be RAM except for the ISA hole. | ||
75 | * Because of the limited MCR's we want to map NV/ACPI into our | ||
76 | * MCR range for gunk in RAM | ||
77 | * | ||
78 | * Clip might cause us to MCR insufficient RAM but that is an | ||
79 | * acceptable failure mode and should only bite obscure boxes with | ||
80 | * a VESA hole at 15Mb | ||
81 | * | ||
82 | * The second case Clip sometimes kicks in is when the EBDA is marked | ||
83 | * as reserved. Again we fail safe with reasonable results | ||
84 | */ | ||
85 | if (top > clip) | ||
86 | top = clip; | ||
87 | |||
88 | return top; | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * Compute a set of MCR's to give maximum coverage | ||
93 | */ | ||
94 | static int centaur_mcr_compute(int nr, int key) | ||
95 | { | ||
96 | u32 mem = ramtop(); | ||
97 | u32 root = power2(mem); | ||
98 | u32 base = root; | ||
99 | u32 top = root; | ||
100 | u32 floor = 0; | ||
101 | int ct = 0; | ||
102 | |||
103 | while (ct < nr) { | ||
104 | u32 fspace = 0; | ||
105 | u32 high; | ||
106 | u32 low; | ||
107 | |||
108 | /* | ||
109 | * Find the largest block we will fill going upwards | ||
110 | */ | ||
111 | high = power2(mem-top); | ||
112 | |||
113 | /* | ||
114 | * Find the largest block we will fill going downwards | ||
115 | */ | ||
116 | low = base/2; | ||
117 | |||
118 | /* | ||
119 | * Don't fill below 1Mb going downwards as there | ||
120 | * is an ISA hole in the way. | ||
121 | */ | ||
122 | if (base <= 1024*1024) | ||
123 | low = 0; | ||
124 | |||
125 | /* | ||
126 | * See how much space we could cover by filling below | ||
127 | * the ISA hole | ||
128 | */ | ||
129 | |||
130 | if (floor == 0) | ||
131 | fspace = 512*1024; | ||
132 | else if (floor == 512*1024) | ||
133 | fspace = 128*1024; | ||
134 | |||
135 | /* And forget ROM space */ | ||
136 | |||
137 | /* | ||
138 | * Now install the largest coverage we get | ||
139 | */ | ||
140 | if (fspace > high && fspace > low) { | ||
141 | centaur_mcr_insert(ct, floor, fspace, key); | ||
142 | floor += fspace; | ||
143 | } else if (high > low) { | ||
144 | centaur_mcr_insert(ct, top, high, key); | ||
145 | top += high; | ||
146 | } else if (low > 0) { | ||
147 | base -= low; | ||
148 | centaur_mcr_insert(ct, base, low, key); | ||
149 | } else | ||
150 | break; | ||
151 | ct++; | ||
152 | } | ||
153 | /* | ||
154 | * We loaded ct values. We now need to set the mask. The caller | ||
155 | * must do this bit. | ||
156 | */ | ||
157 | return ct; | ||
158 | } | ||
159 | |||
160 | static void centaur_create_optimal_mcr(void) | ||
161 | { | ||
162 | int used; | ||
163 | int i; | ||
164 | |||
165 | /* | ||
166 | * Allocate up to 6 mcrs to mark as much of ram as possible | ||
167 | * as write combining and weak write ordered. | ||
168 | * | ||
169 | * To experiment with: Linux never uses stack operations for | ||
170 | * mmio spaces so we could globally enable stack operation wc | ||
171 | * | ||
172 | * Load the registers with type 31 - full write combining, all | ||
173 | * writes weakly ordered. | ||
174 | */ | ||
175 | used = centaur_mcr_compute(6, 31); | ||
176 | |||
177 | /* | ||
178 | * Wipe unused MCRs | ||
179 | */ | ||
180 | for (i = used; i < 8; i++) | ||
181 | wrmsr(MSR_IDT_MCR0+i, 0, 0); | ||
182 | } | ||
183 | |||
184 | static void winchip2_create_optimal_mcr(void) | ||
185 | { | ||
186 | u32 lo, hi; | ||
187 | int used; | ||
188 | int i; | ||
189 | |||
190 | /* | ||
191 | * Allocate up to 6 mcrs to mark as much of ram as possible | ||
192 | * as write combining, weak store ordered. | ||
193 | * | ||
194 | * Load the registers with type 25 | ||
195 | * 8 - weak write ordering | ||
196 | * 16 - weak read ordering | ||
197 | * 1 - write combining | ||
198 | */ | ||
199 | used = centaur_mcr_compute(6, 25); | ||
200 | |||
201 | /* | ||
202 | * Mark the registers we are using. | ||
203 | */ | ||
204 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
205 | for (i = 0; i < used; i++) | ||
206 | lo |= 1<<(9+i); | ||
207 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
208 | |||
209 | /* | ||
210 | * Wipe unused MCRs | ||
211 | */ | ||
212 | |||
213 | for (i = used; i < 8; i++) | ||
214 | wrmsr(MSR_IDT_MCR0+i, 0, 0); | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * Handle the MCR key on the Winchip 2. | ||
219 | */ | ||
220 | static void winchip2_unprotect_mcr(void) | ||
221 | { | ||
222 | u32 lo, hi; | ||
223 | u32 key; | ||
224 | |||
225 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
226 | lo &= ~0x1C0; /* blank bits 8-6 */ | ||
227 | key = (lo>>17) & 7; | ||
228 | lo |= key<<6; /* replace with unlock key */ | ||
229 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
230 | } | ||
231 | |||
232 | static void winchip2_protect_mcr(void) | ||
233 | { | ||
234 | u32 lo, hi; | ||
235 | |||
236 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
237 | lo &= ~0x1C0; /* blank bits 8-6 */ | ||
238 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
239 | } | ||
240 | #endif /* CONFIG_X86_OOSTORE */ | ||
241 | |||
242 | #define ACE_PRESENT (1 << 6) | 11 | #define ACE_PRESENT (1 << 6) |
243 | #define ACE_ENABLED (1 << 7) | 12 | #define ACE_ENABLED (1 << 7) |
244 | #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ | 13 | #define ACE_FCR (1 << 28) /* MSR_VIA_FCR */ |
@@ -363,20 +132,6 @@ static void init_centaur(struct cpuinfo_x86 *c) | |||
363 | fcr_clr = DPDC; | 132 | fcr_clr = DPDC; |
364 | printk(KERN_NOTICE "Disabling bugged TSC.\n"); | 133 | printk(KERN_NOTICE "Disabling bugged TSC.\n"); |
365 | clear_cpu_cap(c, X86_FEATURE_TSC); | 134 | clear_cpu_cap(c, X86_FEATURE_TSC); |
366 | #ifdef CONFIG_X86_OOSTORE | ||
367 | centaur_create_optimal_mcr(); | ||
368 | /* | ||
369 | * Enable: | ||
370 | * write combining on non-stack, non-string | ||
371 | * write combining on string, all types | ||
372 | * weak write ordering | ||
373 | * | ||
374 | * The C6 original lacks weak read order | ||
375 | * | ||
376 | * Note 0x120 is write only on Winchip 1 | ||
377 | */ | ||
378 | wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); | ||
379 | #endif | ||
380 | break; | 135 | break; |
381 | case 8: | 136 | case 8: |
382 | switch (c->x86_mask) { | 137 | switch (c->x86_mask) { |
@@ -393,40 +148,12 @@ static void init_centaur(struct cpuinfo_x86 *c) | |||
393 | fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| | 148 | fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| |
394 | E2MMX|EAMD3D; | 149 | E2MMX|EAMD3D; |
395 | fcr_clr = DPDC; | 150 | fcr_clr = DPDC; |
396 | #ifdef CONFIG_X86_OOSTORE | ||
397 | winchip2_unprotect_mcr(); | ||
398 | winchip2_create_optimal_mcr(); | ||
399 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
400 | /* | ||
401 | * Enable: | ||
402 | * write combining on non-stack, non-string | ||
403 | * write combining on string, all types | ||
404 | * weak write ordering | ||
405 | */ | ||
406 | lo |= 31; | ||
407 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
408 | winchip2_protect_mcr(); | ||
409 | #endif | ||
410 | break; | 151 | break; |
411 | case 9: | 152 | case 9: |
412 | name = "3"; | 153 | name = "3"; |
413 | fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| | 154 | fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK| |
414 | E2MMX|EAMD3D; | 155 | E2MMX|EAMD3D; |
415 | fcr_clr = DPDC; | 156 | fcr_clr = DPDC; |
416 | #ifdef CONFIG_X86_OOSTORE | ||
417 | winchip2_unprotect_mcr(); | ||
418 | winchip2_create_optimal_mcr(); | ||
419 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
420 | /* | ||
421 | * Enable: | ||
422 | * write combining on non-stack, non-string | ||
423 | * write combining on string, all types | ||
424 | * weak write ordering | ||
425 | */ | ||
426 | lo |= 31; | ||
427 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
428 | winchip2_protect_mcr(); | ||
429 | #endif | ||
430 | break; | 157 | break; |
431 | default: | 158 | default: |
432 | name = "??"; | 159 | name = "??"; |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 6abc172b8258..a135239badb7 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -284,8 +284,13 @@ static __always_inline void setup_smap(struct cpuinfo_x86 *c) | |||
284 | raw_local_save_flags(eflags); | 284 | raw_local_save_flags(eflags); |
285 | BUG_ON(eflags & X86_EFLAGS_AC); | 285 | BUG_ON(eflags & X86_EFLAGS_AC); |
286 | 286 | ||
287 | if (cpu_has(c, X86_FEATURE_SMAP)) | 287 | if (cpu_has(c, X86_FEATURE_SMAP)) { |
288 | #ifdef CONFIG_X86_SMAP | ||
288 | set_in_cr4(X86_CR4_SMAP); | 289 | set_in_cr4(X86_CR4_SMAP); |
290 | #else | ||
291 | clear_in_cr4(X86_CR4_SMAP); | ||
292 | #endif | ||
293 | } | ||
289 | } | 294 | } |
290 | 295 | ||
291 | /* | 296 | /* |
@@ -472,6 +477,7 @@ u16 __read_mostly tlb_lli_4m[NR_INFO]; | |||
472 | u16 __read_mostly tlb_lld_4k[NR_INFO]; | 477 | u16 __read_mostly tlb_lld_4k[NR_INFO]; |
473 | u16 __read_mostly tlb_lld_2m[NR_INFO]; | 478 | u16 __read_mostly tlb_lld_2m[NR_INFO]; |
474 | u16 __read_mostly tlb_lld_4m[NR_INFO]; | 479 | u16 __read_mostly tlb_lld_4m[NR_INFO]; |
480 | u16 __read_mostly tlb_lld_1g[NR_INFO]; | ||
475 | 481 | ||
476 | /* | 482 | /* |
477 | * tlb_flushall_shift shows the balance point in replacing cr3 write | 483 | * tlb_flushall_shift shows the balance point in replacing cr3 write |
@@ -486,13 +492,13 @@ void cpu_detect_tlb(struct cpuinfo_x86 *c) | |||
486 | if (this_cpu->c_detect_tlb) | 492 | if (this_cpu->c_detect_tlb) |
487 | this_cpu->c_detect_tlb(c); | 493 | this_cpu->c_detect_tlb(c); |
488 | 494 | ||
489 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 495 | printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n" |
490 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d\n" \ | 496 | "Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n" |
491 | "tlb_flushall_shift: %d\n", | 497 | "tlb_flushall_shift: %d\n", |
492 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], | 498 | tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES], |
493 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], | 499 | tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES], |
494 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], | 500 | tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES], |
495 | tlb_flushall_shift); | 501 | tlb_lld_1g[ENTRIES], tlb_flushall_shift); |
496 | } | 502 | } |
497 | 503 | ||
498 | void detect_ht(struct cpuinfo_x86 *c) | 504 | void detect_ht(struct cpuinfo_x86 *c) |
@@ -1019,7 +1025,8 @@ __setup("show_msr=", setup_show_msr); | |||
1019 | 1025 | ||
1020 | static __init int setup_noclflush(char *arg) | 1026 | static __init int setup_noclflush(char *arg) |
1021 | { | 1027 | { |
1022 | setup_clear_cpu_cap(X86_FEATURE_CLFLSH); | 1028 | setup_clear_cpu_cap(X86_FEATURE_CLFLUSH); |
1029 | setup_clear_cpu_cap(X86_FEATURE_CLFLUSHOPT); | ||
1023 | return 1; | 1030 | return 1; |
1024 | } | 1031 | } |
1025 | __setup("noclflush", setup_noclflush); | 1032 | __setup("noclflush", setup_noclflush); |
@@ -1072,6 +1079,10 @@ static __init int setup_disablecpuid(char *arg) | |||
1072 | } | 1079 | } |
1073 | __setup("clearcpuid=", setup_disablecpuid); | 1080 | __setup("clearcpuid=", setup_disablecpuid); |
1074 | 1081 | ||
1082 | DEFINE_PER_CPU(unsigned long, kernel_stack) = | ||
1083 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; | ||
1084 | EXPORT_PER_CPU_SYMBOL(kernel_stack); | ||
1085 | |||
1075 | #ifdef CONFIG_X86_64 | 1086 | #ifdef CONFIG_X86_64 |
1076 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; | 1087 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
1077 | struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, | 1088 | struct desc_ptr debug_idt_descr = { NR_VECTORS * 16 - 1, |
@@ -1088,10 +1099,6 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | |||
1088 | &init_task; | 1099 | &init_task; |
1089 | EXPORT_PER_CPU_SYMBOL(current_task); | 1100 | EXPORT_PER_CPU_SYMBOL(current_task); |
1090 | 1101 | ||
1091 | DEFINE_PER_CPU(unsigned long, kernel_stack) = | ||
1092 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; | ||
1093 | EXPORT_PER_CPU_SYMBOL(kernel_stack); | ||
1094 | |||
1095 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 1102 | DEFINE_PER_CPU(char *, irq_stack_ptr) = |
1096 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 1103 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; |
1097 | 1104 | ||
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index d0969c75ab54..aaf152e79637 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/bitops.h> | 1 | #include <linux/bitops.h> |
3 | #include <linux/delay.h> | 2 | #include <linux/delay.h> |
4 | #include <linux/pci.h> | 3 | #include <linux/pci.h> |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index ea04b342c026..a80029035bf2 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -1,4 +1,3 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
3 | 2 | ||
4 | #include <linux/string.h> | 3 | #include <linux/string.h> |
@@ -32,11 +31,8 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
32 | 31 | ||
33 | /* Unmask CPUID levels if masked: */ | 32 | /* Unmask CPUID levels if masked: */ |
34 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { | 33 | if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) { |
35 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 34 | if (msr_clear_bit(MSR_IA32_MISC_ENABLE, |
36 | 35 | MSR_IA32_MISC_ENABLE_LIMIT_CPUID_BIT) > 0) { | |
37 | if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) { | ||
38 | misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID; | ||
39 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
40 | c->cpuid_level = cpuid_eax(0); | 36 | c->cpuid_level = cpuid_eax(0); |
41 | get_cpu_cap(c); | 37 | get_cpu_cap(c); |
42 | } | 38 | } |
@@ -93,7 +89,7 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
93 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); | 89 | set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); |
94 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); | 90 | set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); |
95 | if (!check_tsc_unstable()) | 91 | if (!check_tsc_unstable()) |
96 | sched_clock_stable = 1; | 92 | set_sched_clock_stable(); |
97 | } | 93 | } |
98 | 94 | ||
99 | /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ | 95 | /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ |
@@ -130,16 +126,10 @@ static void early_init_intel(struct cpuinfo_x86 *c) | |||
130 | * Ingo Molnar reported a Pentium D (model 6) and a Xeon | 126 | * Ingo Molnar reported a Pentium D (model 6) and a Xeon |
131 | * (model 2) with the same problem. | 127 | * (model 2) with the same problem. |
132 | */ | 128 | */ |
133 | if (c->x86 == 15) { | 129 | if (c->x86 == 15) |
134 | rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | 130 | if (msr_clear_bit(MSR_IA32_MISC_ENABLE, |
135 | 131 | MSR_IA32_MISC_ENABLE_FAST_STRING_BIT) > 0) | |
136 | if (misc_enable & MSR_IA32_MISC_ENABLE_FAST_STRING) { | 132 | pr_info("kmemcheck: Disabling fast string operations\n"); |
137 | printk(KERN_INFO "kmemcheck: Disabling fast string operations\n"); | ||
138 | |||
139 | misc_enable &= ~MSR_IA32_MISC_ENABLE_FAST_STRING; | ||
140 | wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable); | ||
141 | } | ||
142 | } | ||
143 | #endif | 133 | #endif |
144 | 134 | ||
145 | /* | 135 | /* |
@@ -196,10 +186,16 @@ static void intel_smp_check(struct cpuinfo_x86 *c) | |||
196 | } | 186 | } |
197 | } | 187 | } |
198 | 188 | ||
199 | static void intel_workarounds(struct cpuinfo_x86 *c) | 189 | static int forcepae; |
190 | static int __init forcepae_setup(char *__unused) | ||
200 | { | 191 | { |
201 | unsigned long lo, hi; | 192 | forcepae = 1; |
193 | return 1; | ||
194 | } | ||
195 | __setup("forcepae", forcepae_setup); | ||
202 | 196 | ||
197 | static void intel_workarounds(struct cpuinfo_x86 *c) | ||
198 | { | ||
203 | #ifdef CONFIG_X86_F00F_BUG | 199 | #ifdef CONFIG_X86_F00F_BUG |
204 | /* | 200 | /* |
205 | * All current models of Pentium and Pentium with MMX technology CPUs | 201 | * All current models of Pentium and Pentium with MMX technology CPUs |
@@ -226,16 +222,26 @@ static void intel_workarounds(struct cpuinfo_x86 *c) | |||
226 | clear_cpu_cap(c, X86_FEATURE_SEP); | 222 | clear_cpu_cap(c, X86_FEATURE_SEP); |
227 | 223 | ||
228 | /* | 224 | /* |
225 | * PAE CPUID issue: many Pentium M report no PAE but may have a | ||
226 | * functionally usable PAE implementation. | ||
227 | * Forcefully enable PAE if kernel parameter "forcepae" is present. | ||
228 | */ | ||
229 | if (forcepae) { | ||
230 | printk(KERN_WARNING "PAE forced!\n"); | ||
231 | set_cpu_cap(c, X86_FEATURE_PAE); | ||
232 | add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_NOW_UNRELIABLE); | ||
233 | } | ||
234 | |||
235 | /* | ||
229 | * P4 Xeon errata 037 workaround. | 236 | * P4 Xeon errata 037 workaround. |
230 | * Hardware prefetcher may cause stale data to be loaded into the cache. | 237 | * Hardware prefetcher may cause stale data to be loaded into the cache. |
231 | */ | 238 | */ |
232 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { | 239 | if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { |
233 | rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); | 240 | if (msr_set_bit(MSR_IA32_MISC_ENABLE, |
234 | if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) { | 241 | MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE_BIT) |
235 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); | 242 | > 0) { |
236 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); | 243 | pr_info("CPU: C0 stepping P4 Xeon detected.\n"); |
237 | lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; | 244 | pr_info("CPU: Disabling hardware prefetching (Errata 037)\n"); |
238 | wrmsr(MSR_IA32_MISC_ENABLE, lo, hi); | ||
239 | } | 245 | } |
240 | } | 246 | } |
241 | 247 | ||
@@ -268,10 +274,6 @@ static void intel_workarounds(struct cpuinfo_x86 *c) | |||
268 | } | 274 | } |
269 | #endif | 275 | #endif |
270 | 276 | ||
271 | #ifdef CONFIG_X86_NUMAQ | ||
272 | numaq_tsc_disable(); | ||
273 | #endif | ||
274 | |||
275 | intel_smp_check(c); | 277 | intel_smp_check(c); |
276 | } | 278 | } |
277 | #else | 279 | #else |
@@ -506,6 +508,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) | |||
506 | #define TLB_DATA0_2M_4M 0x23 | 508 | #define TLB_DATA0_2M_4M 0x23 |
507 | 509 | ||
508 | #define STLB_4K 0x41 | 510 | #define STLB_4K 0x41 |
511 | #define STLB_4K_2M 0x42 | ||
509 | 512 | ||
510 | static const struct _tlb_table intel_tlb_table[] = { | 513 | static const struct _tlb_table intel_tlb_table[] = { |
511 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, | 514 | { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, |
@@ -526,13 +529,20 @@ static const struct _tlb_table intel_tlb_table[] = { | |||
526 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, | 529 | { 0x5b, TLB_DATA_4K_4M, 64, " TLB_DATA 4 KByte and 4 MByte pages" }, |
527 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, | 530 | { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, |
528 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, | 531 | { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, |
532 | { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, | ||
533 | { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, | ||
534 | { 0x76, TLB_INST_2M_4M, 8, " TLB_INST 2-MByte or 4-MByte pages, fully associative" }, | ||
529 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, | 535 | { 0xb0, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 4-way set associative" }, |
530 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, | 536 | { 0xb1, TLB_INST_2M_4M, 4, " TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries" }, |
531 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, | 537 | { 0xb2, TLB_INST_4K, 64, " TLB_INST 4KByte pages, 4-way set associative" }, |
532 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, | 538 | { 0xb3, TLB_DATA_4K, 128, " TLB_DATA 4 KByte pages, 4-way set associative" }, |
533 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, | 539 | { 0xb4, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 4-way associative" }, |
540 | { 0xb5, TLB_INST_4K, 64, " TLB_INST 4 KByte pages, 8-way set ssociative" }, | ||
541 | { 0xb6, TLB_INST_4K, 128, " TLB_INST 4 KByte pages, 8-way set ssociative" }, | ||
534 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, | 542 | { 0xba, TLB_DATA_4K, 64, " TLB_DATA 4 KByte pages, 4-way associative" }, |
535 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, | 543 | { 0xc0, TLB_DATA_4K_4M, 8, " TLB_DATA 4 KByte and 4 MByte pages, 4-way associative" }, |
544 | { 0xc1, STLB_4K_2M, 1024, " STLB 4 KByte and 2 MByte pages, 8-way associative" }, | ||
545 | { 0xc2, TLB_DATA_2M_4M, 16, " DTLB 2 MByte/4MByte pages, 4-way associative" }, | ||
536 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, | 546 | { 0xca, STLB_4K, 512, " STLB 4 KByte pages, 4-way associative" }, |
537 | { 0x00, 0, 0 } | 547 | { 0x00, 0, 0 } |
538 | }; | 548 | }; |
@@ -558,6 +568,20 @@ static void intel_tlb_lookup(const unsigned char desc) | |||
558 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | 568 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) |
559 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | 569 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; |
560 | break; | 570 | break; |
571 | case STLB_4K_2M: | ||
572 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
573 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
574 | if (tlb_lld_4k[ENTRIES] < intel_tlb_table[k].entries) | ||
575 | tlb_lld_4k[ENTRIES] = intel_tlb_table[k].entries; | ||
576 | if (tlb_lli_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
577 | tlb_lli_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
578 | if (tlb_lld_2m[ENTRIES] < intel_tlb_table[k].entries) | ||
579 | tlb_lld_2m[ENTRIES] = intel_tlb_table[k].entries; | ||
580 | if (tlb_lli_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
581 | tlb_lli_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
582 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | ||
583 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | ||
584 | break; | ||
561 | case TLB_INST_ALL: | 585 | case TLB_INST_ALL: |
562 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) | 586 | if (tlb_lli_4k[ENTRIES] < intel_tlb_table[k].entries) |
563 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; | 587 | tlb_lli_4k[ENTRIES] = intel_tlb_table[k].entries; |
@@ -603,6 +627,10 @@ static void intel_tlb_lookup(const unsigned char desc) | |||
603 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) | 627 | if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) |
604 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; | 628 | tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; |
605 | break; | 629 | break; |
630 | case TLB_DATA_1G: | ||
631 | if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) | ||
632 | tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; | ||
633 | break; | ||
606 | } | 634 | } |
607 | } | 635 | } |
608 | 636 | ||
@@ -615,21 +643,17 @@ static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c) | |||
615 | case 0x61d: /* six-core 45 nm xeon "Dunnington" */ | 643 | case 0x61d: /* six-core 45 nm xeon "Dunnington" */ |
616 | tlb_flushall_shift = -1; | 644 | tlb_flushall_shift = -1; |
617 | break; | 645 | break; |
646 | case 0x63a: /* Ivybridge */ | ||
647 | tlb_flushall_shift = 2; | ||
648 | break; | ||
618 | case 0x61a: /* 45 nm nehalem, "Bloomfield" */ | 649 | case 0x61a: /* 45 nm nehalem, "Bloomfield" */ |
619 | case 0x61e: /* 45 nm nehalem, "Lynnfield" */ | 650 | case 0x61e: /* 45 nm nehalem, "Lynnfield" */ |
620 | case 0x625: /* 32 nm nehalem, "Clarkdale" */ | 651 | case 0x625: /* 32 nm nehalem, "Clarkdale" */ |
621 | case 0x62c: /* 32 nm nehalem, "Gulftown" */ | 652 | case 0x62c: /* 32 nm nehalem, "Gulftown" */ |
622 | case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ | 653 | case 0x62e: /* 45 nm nehalem-ex, "Beckton" */ |
623 | case 0x62f: /* 32 nm Xeon E7 */ | 654 | case 0x62f: /* 32 nm Xeon E7 */ |
624 | tlb_flushall_shift = 6; | ||
625 | break; | ||
626 | case 0x62a: /* SandyBridge */ | 655 | case 0x62a: /* SandyBridge */ |
627 | case 0x62d: /* SandyBridge, "Romely-EP" */ | 656 | case 0x62d: /* SandyBridge, "Romely-EP" */ |
628 | tlb_flushall_shift = 5; | ||
629 | break; | ||
630 | case 0x63a: /* Ivybridge */ | ||
631 | tlb_flushall_shift = 1; | ||
632 | break; | ||
633 | default: | 657 | default: |
634 | tlb_flushall_shift = 6; | 658 | tlb_flushall_shift = 6; |
635 | } | 659 | } |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0641113e2965..a952e9c85b6f 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -1225,21 +1225,24 @@ static struct notifier_block cacheinfo_cpu_notifier = { | |||
1225 | 1225 | ||
1226 | static int __init cache_sysfs_init(void) | 1226 | static int __init cache_sysfs_init(void) |
1227 | { | 1227 | { |
1228 | int i; | 1228 | int i, err = 0; |
1229 | 1229 | ||
1230 | if (num_cache_leaves == 0) | 1230 | if (num_cache_leaves == 0) |
1231 | return 0; | 1231 | return 0; |
1232 | 1232 | ||
1233 | cpu_notifier_register_begin(); | ||
1233 | for_each_online_cpu(i) { | 1234 | for_each_online_cpu(i) { |
1234 | int err; | ||
1235 | struct device *dev = get_cpu_device(i); | 1235 | struct device *dev = get_cpu_device(i); |
1236 | 1236 | ||
1237 | err = cache_add_dev(dev); | 1237 | err = cache_add_dev(dev); |
1238 | if (err) | 1238 | if (err) |
1239 | return err; | 1239 | goto out; |
1240 | } | 1240 | } |
1241 | register_hotcpu_notifier(&cacheinfo_cpu_notifier); | 1241 | __register_hotcpu_notifier(&cacheinfo_cpu_notifier); |
1242 | return 0; | 1242 | |
1243 | out: | ||
1244 | cpu_notifier_register_done(); | ||
1245 | return err; | ||
1243 | } | 1246 | } |
1244 | 1247 | ||
1245 | device_initcall(cache_sysfs_init); | 1248 | device_initcall(cache_sysfs_init); |
diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index 36565373af87..afa9f0d487ea 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c | |||
@@ -47,45 +47,3 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) | |||
47 | return NULL; | 47 | return NULL; |
48 | } | 48 | } |
49 | EXPORT_SYMBOL(x86_match_cpu); | 49 | EXPORT_SYMBOL(x86_match_cpu); |
50 | |||
51 | ssize_t arch_print_cpu_modalias(struct device *dev, | ||
52 | struct device_attribute *attr, | ||
53 | char *bufptr) | ||
54 | { | ||
55 | int size = PAGE_SIZE; | ||
56 | int i, n; | ||
57 | char *buf = bufptr; | ||
58 | |||
59 | n = snprintf(buf, size, "x86cpu:vendor:%04X:family:%04X:" | ||
60 | "model:%04X:feature:", | ||
61 | boot_cpu_data.x86_vendor, | ||
62 | boot_cpu_data.x86, | ||
63 | boot_cpu_data.x86_model); | ||
64 | size -= n; | ||
65 | buf += n; | ||
66 | size -= 1; | ||
67 | for (i = 0; i < NCAPINTS*32; i++) { | ||
68 | if (boot_cpu_has(i)) { | ||
69 | n = snprintf(buf, size, ",%04X", i); | ||
70 | if (n >= size) { | ||
71 | WARN(1, "x86 features overflow page\n"); | ||
72 | break; | ||
73 | } | ||
74 | size -= n; | ||
75 | buf += n; | ||
76 | } | ||
77 | } | ||
78 | *buf++ = '\n'; | ||
79 | return buf - bufptr; | ||
80 | } | ||
81 | |||
82 | int arch_cpu_uevent(struct device *dev, struct kobj_uevent_env *env) | ||
83 | { | ||
84 | char *buf = kzalloc(PAGE_SIZE, GFP_KERNEL); | ||
85 | if (buf) { | ||
86 | arch_print_cpu_modalias(NULL, NULL, buf); | ||
87 | add_uevent_var(env, "MODALIAS=%s", buf); | ||
88 | kfree(buf); | ||
89 | } | ||
90 | return 0; | ||
91 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index de8b60a53f69..a1aef9533154 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c | |||
@@ -33,22 +33,28 @@ | |||
33 | #include <linux/acpi.h> | 33 | #include <linux/acpi.h> |
34 | #include <linux/cper.h> | 34 | #include <linux/cper.h> |
35 | #include <acpi/apei.h> | 35 | #include <acpi/apei.h> |
36 | #include <acpi/ghes.h> | ||
36 | #include <asm/mce.h> | 37 | #include <asm/mce.h> |
37 | 38 | ||
38 | #include "mce-internal.h" | 39 | #include "mce-internal.h" |
39 | 40 | ||
40 | void apei_mce_report_mem_error(int corrected, struct cper_sec_mem_err *mem_err) | 41 | void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) |
41 | { | 42 | { |
42 | struct mce m; | 43 | struct mce m; |
43 | 44 | ||
44 | /* Only corrected MC is reported */ | 45 | if (!(mem_err->validation_bits & CPER_MEM_VALID_PA)) |
45 | if (!corrected || !(mem_err->validation_bits & CPER_MEM_VALID_PA)) | ||
46 | return; | 46 | return; |
47 | 47 | ||
48 | mce_setup(&m); | 48 | mce_setup(&m); |
49 | m.bank = 1; | 49 | m.bank = 1; |
50 | /* Fake a memory read corrected error with unknown channel */ | 50 | /* Fake a memory read error with unknown channel */ |
51 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; | 51 | m.status = MCI_STATUS_VAL | MCI_STATUS_EN | MCI_STATUS_ADDRV | 0x9f; |
52 | |||
53 | if (severity >= GHES_SEV_RECOVERABLE) | ||
54 | m.status |= MCI_STATUS_UC; | ||
55 | if (severity >= GHES_SEV_PANIC) | ||
56 | m.status |= MCI_STATUS_PCC; | ||
57 | |||
52 | m.addr = mem_err->physical_addr; | 58 | m.addr = mem_err->physical_addr; |
53 | mce_log(&m); | 59 | mce_log(&m); |
54 | mce_notify_irq(); | 60 | mce_notify_irq(); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index b3218cdee95f..eeee23ff75ef 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -89,6 +89,9 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_chrdev_wait); | |||
89 | static DEFINE_PER_CPU(struct mce, mces_seen); | 89 | static DEFINE_PER_CPU(struct mce, mces_seen); |
90 | static int cpu_missing; | 90 | static int cpu_missing; |
91 | 91 | ||
92 | /* CMCI storm detection filter */ | ||
93 | static DEFINE_PER_CPU(unsigned long, mce_polled_error); | ||
94 | |||
92 | /* | 95 | /* |
93 | * MCA banks polled by the period polling timer for corrected events. | 96 | * MCA banks polled by the period polling timer for corrected events. |
94 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). | 97 | * With Intel CMCI, this only has MCA banks which do not support CMCI (if any). |
@@ -595,6 +598,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
595 | { | 598 | { |
596 | struct mce m; | 599 | struct mce m; |
597 | int i; | 600 | int i; |
601 | unsigned long *v; | ||
598 | 602 | ||
599 | this_cpu_inc(mce_poll_count); | 603 | this_cpu_inc(mce_poll_count); |
600 | 604 | ||
@@ -614,6 +618,8 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
614 | if (!(m.status & MCI_STATUS_VAL)) | 618 | if (!(m.status & MCI_STATUS_VAL)) |
615 | continue; | 619 | continue; |
616 | 620 | ||
621 | v = &get_cpu_var(mce_polled_error); | ||
622 | set_bit(0, v); | ||
617 | /* | 623 | /* |
618 | * Uncorrected or signalled events are handled by the exception | 624 | * Uncorrected or signalled events are handled by the exception |
619 | * handler when it is enabled, so don't process those here. | 625 | * handler when it is enabled, so don't process those here. |
@@ -1278,10 +1284,18 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) | |||
1278 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = | 1284 | static unsigned long (*mce_adjust_timer)(unsigned long interval) = |
1279 | mce_adjust_timer_default; | 1285 | mce_adjust_timer_default; |
1280 | 1286 | ||
1287 | static int cmc_error_seen(void) | ||
1288 | { | ||
1289 | unsigned long *v = &__get_cpu_var(mce_polled_error); | ||
1290 | |||
1291 | return test_and_clear_bit(0, v); | ||
1292 | } | ||
1293 | |||
1281 | static void mce_timer_fn(unsigned long data) | 1294 | static void mce_timer_fn(unsigned long data) |
1282 | { | 1295 | { |
1283 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1296 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1284 | unsigned long iv; | 1297 | unsigned long iv; |
1298 | int notify; | ||
1285 | 1299 | ||
1286 | WARN_ON(smp_processor_id() != data); | 1300 | WARN_ON(smp_processor_id() != data); |
1287 | 1301 | ||
@@ -1296,7 +1310,9 @@ static void mce_timer_fn(unsigned long data) | |||
1296 | * polling interval, otherwise increase the polling interval. | 1310 | * polling interval, otherwise increase the polling interval. |
1297 | */ | 1311 | */ |
1298 | iv = __this_cpu_read(mce_next_interval); | 1312 | iv = __this_cpu_read(mce_next_interval); |
1299 | if (mce_notify_irq()) { | 1313 | notify = mce_notify_irq(); |
1314 | notify |= cmc_error_seen(); | ||
1315 | if (notify) { | ||
1300 | iv = max(iv / 2, (unsigned long) HZ/100); | 1316 | iv = max(iv / 2, (unsigned long) HZ/100); |
1301 | } else { | 1317 | } else { |
1302 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); | 1318 | iv = min(iv * 2, round_jiffies_relative(check_interval * HZ)); |
@@ -1638,15 +1654,15 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) | |||
1638 | 1654 | ||
1639 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) | 1655 | static void mce_start_timer(unsigned int cpu, struct timer_list *t) |
1640 | { | 1656 | { |
1641 | unsigned long iv = mce_adjust_timer(check_interval * HZ); | 1657 | unsigned long iv = check_interval * HZ; |
1642 | |||
1643 | __this_cpu_write(mce_next_interval, iv); | ||
1644 | 1658 | ||
1645 | if (mca_cfg.ignore_ce || !iv) | 1659 | if (mca_cfg.ignore_ce || !iv) |
1646 | return; | 1660 | return; |
1647 | 1661 | ||
1662 | per_cpu(mce_next_interval, cpu) = iv; | ||
1663 | |||
1648 | t->expires = round_jiffies(jiffies + iv); | 1664 | t->expires = round_jiffies(jiffies + iv); |
1649 | add_timer_on(t, smp_processor_id()); | 1665 | add_timer_on(t, cpu); |
1650 | } | 1666 | } |
1651 | 1667 | ||
1652 | static void __mcheck_cpu_init_timer(void) | 1668 | static void __mcheck_cpu_init_timer(void) |
@@ -2272,8 +2288,10 @@ static int mce_device_create(unsigned int cpu) | |||
2272 | dev->release = &mce_device_release; | 2288 | dev->release = &mce_device_release; |
2273 | 2289 | ||
2274 | err = device_register(dev); | 2290 | err = device_register(dev); |
2275 | if (err) | 2291 | if (err) { |
2292 | put_device(dev); | ||
2276 | return err; | 2293 | return err; |
2294 | } | ||
2277 | 2295 | ||
2278 | for (i = 0; mce_device_attrs[i]; i++) { | 2296 | for (i = 0; mce_device_attrs[i]; i++) { |
2279 | err = device_create_file(dev, mce_device_attrs[i]); | 2297 | err = device_create_file(dev, mce_device_attrs[i]); |
@@ -2432,14 +2450,18 @@ static __init int mcheck_init_device(void) | |||
2432 | if (err) | 2450 | if (err) |
2433 | return err; | 2451 | return err; |
2434 | 2452 | ||
2453 | cpu_notifier_register_begin(); | ||
2435 | for_each_online_cpu(i) { | 2454 | for_each_online_cpu(i) { |
2436 | err = mce_device_create(i); | 2455 | err = mce_device_create(i); |
2437 | if (err) | 2456 | if (err) { |
2457 | cpu_notifier_register_done(); | ||
2438 | return err; | 2458 | return err; |
2459 | } | ||
2439 | } | 2460 | } |
2440 | 2461 | ||
2441 | register_syscore_ops(&mce_syscore_ops); | 2462 | register_syscore_ops(&mce_syscore_ops); |
2442 | register_hotcpu_notifier(&mce_cpu_notifier); | 2463 | __register_hotcpu_notifier(&mce_cpu_notifier); |
2464 | cpu_notifier_register_done(); | ||
2443 | 2465 | ||
2444 | /* register character device /dev/mcelog */ | 2466 | /* register character device /dev/mcelog */ |
2445 | misc_register(&mce_chrdev_device); | 2467 | misc_register(&mce_chrdev_device); |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 4cfe0458ca66..3bdb95ae8c43 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -6,10 +6,10 @@ | |||
6 | */ | 6 | */ |
7 | 7 | ||
8 | #include <linux/gfp.h> | 8 | #include <linux/gfp.h> |
9 | #include <linux/init.h> | ||
10 | #include <linux/interrupt.h> | 9 | #include <linux/interrupt.h> |
11 | #include <linux/percpu.h> | 10 | #include <linux/percpu.h> |
12 | #include <linux/sched.h> | 11 | #include <linux/sched.h> |
12 | #include <linux/cpumask.h> | ||
13 | #include <asm/apic.h> | 13 | #include <asm/apic.h> |
14 | #include <asm/processor.h> | 14 | #include <asm/processor.h> |
15 | #include <asm/msr.h> | 15 | #include <asm/msr.h> |
@@ -138,6 +138,22 @@ unsigned long mce_intel_adjust_timer(unsigned long interval) | |||
138 | } | 138 | } |
139 | } | 139 | } |
140 | 140 | ||
141 | static void cmci_storm_disable_banks(void) | ||
142 | { | ||
143 | unsigned long flags, *owned; | ||
144 | int bank; | ||
145 | u64 val; | ||
146 | |||
147 | raw_spin_lock_irqsave(&cmci_discover_lock, flags); | ||
148 | owned = __get_cpu_var(mce_banks_owned); | ||
149 | for_each_set_bit(bank, owned, MAX_NR_BANKS) { | ||
150 | rdmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
151 | val &= ~MCI_CTL2_CMCI_EN; | ||
152 | wrmsrl(MSR_IA32_MCx_CTL2(bank), val); | ||
153 | } | ||
154 | raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
155 | } | ||
156 | |||
141 | static bool cmci_storm_detect(void) | 157 | static bool cmci_storm_detect(void) |
142 | { | 158 | { |
143 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); | 159 | unsigned int cnt = __this_cpu_read(cmci_storm_cnt); |
@@ -159,7 +175,7 @@ static bool cmci_storm_detect(void) | |||
159 | if (cnt <= CMCI_STORM_THRESHOLD) | 175 | if (cnt <= CMCI_STORM_THRESHOLD) |
160 | return false; | 176 | return false; |
161 | 177 | ||
162 | cmci_clear(); | 178 | cmci_storm_disable_banks(); |
163 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); | 179 | __this_cpu_write(cmci_storm_state, CMCI_STORM_ACTIVE); |
164 | r = atomic_add_return(1, &cmci_storm_on_cpus); | 180 | r = atomic_add_return(1, &cmci_storm_on_cpus); |
165 | mce_timer_kick(CMCI_POLL_INTERVAL); | 181 | mce_timer_kick(CMCI_POLL_INTERVAL); |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 1c044b1ccc59..a3042989398c 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | 8 | #include <linux/smp.h> |
10 | 9 | ||
11 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 3eec7de76efb..d921b7ee6595 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -271,9 +271,6 @@ static void thermal_throttle_remove_dev(struct device *dev) | |||
271 | sysfs_remove_group(&dev->kobj, &thermal_attr_group); | 271 | sysfs_remove_group(&dev->kobj, &thermal_attr_group); |
272 | } | 272 | } |
273 | 273 | ||
274 | /* Mutex protecting device creation against CPU hotplug: */ | ||
275 | static DEFINE_MUTEX(therm_cpu_lock); | ||
276 | |||
277 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | 274 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ |
278 | static int | 275 | static int |
279 | thermal_throttle_cpu_callback(struct notifier_block *nfb, | 276 | thermal_throttle_cpu_callback(struct notifier_block *nfb, |
@@ -289,18 +286,14 @@ thermal_throttle_cpu_callback(struct notifier_block *nfb, | |||
289 | switch (action) { | 286 | switch (action) { |
290 | case CPU_UP_PREPARE: | 287 | case CPU_UP_PREPARE: |
291 | case CPU_UP_PREPARE_FROZEN: | 288 | case CPU_UP_PREPARE_FROZEN: |
292 | mutex_lock(&therm_cpu_lock); | ||
293 | err = thermal_throttle_add_dev(dev, cpu); | 289 | err = thermal_throttle_add_dev(dev, cpu); |
294 | mutex_unlock(&therm_cpu_lock); | ||
295 | WARN_ON(err); | 290 | WARN_ON(err); |
296 | break; | 291 | break; |
297 | case CPU_UP_CANCELED: | 292 | case CPU_UP_CANCELED: |
298 | case CPU_UP_CANCELED_FROZEN: | 293 | case CPU_UP_CANCELED_FROZEN: |
299 | case CPU_DEAD: | 294 | case CPU_DEAD: |
300 | case CPU_DEAD_FROZEN: | 295 | case CPU_DEAD_FROZEN: |
301 | mutex_lock(&therm_cpu_lock); | ||
302 | thermal_throttle_remove_dev(dev); | 296 | thermal_throttle_remove_dev(dev); |
303 | mutex_unlock(&therm_cpu_lock); | ||
304 | break; | 297 | break; |
305 | } | 298 | } |
306 | return notifier_from_errno(err); | 299 | return notifier_from_errno(err); |
@@ -319,19 +312,16 @@ static __init int thermal_throttle_init_device(void) | |||
319 | if (!atomic_read(&therm_throt_en)) | 312 | if (!atomic_read(&therm_throt_en)) |
320 | return 0; | 313 | return 0; |
321 | 314 | ||
322 | register_hotcpu_notifier(&thermal_throttle_cpu_notifier); | 315 | cpu_notifier_register_begin(); |
323 | 316 | ||
324 | #ifdef CONFIG_HOTPLUG_CPU | ||
325 | mutex_lock(&therm_cpu_lock); | ||
326 | #endif | ||
327 | /* connect live CPUs to sysfs */ | 317 | /* connect live CPUs to sysfs */ |
328 | for_each_online_cpu(cpu) { | 318 | for_each_online_cpu(cpu) { |
329 | err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); | 319 | err = thermal_throttle_add_dev(get_cpu_device(cpu), cpu); |
330 | WARN_ON(err); | 320 | WARN_ON(err); |
331 | } | 321 | } |
332 | #ifdef CONFIG_HOTPLUG_CPU | 322 | |
333 | mutex_unlock(&therm_cpu_lock); | 323 | __register_hotcpu_notifier(&thermal_throttle_cpu_notifier); |
334 | #endif | 324 | cpu_notifier_register_done(); |
335 | 325 | ||
336 | return 0; | 326 | return 0; |
337 | } | 327 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index e9a701aecaa1..7dc5564d0cdf 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/interrupt.h> | 5 | #include <linux/interrupt.h> |
6 | #include <linux/kernel.h> | 6 | #include <linux/kernel.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | #include <linux/init.h> | ||
9 | 8 | ||
10 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
11 | #include <asm/mce.h> | 10 | #include <asm/mce.h> |
diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile new file mode 100644 index 000000000000..285c85427c32 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/Makefile | |||
@@ -0,0 +1,7 @@ | |||
1 | microcode-y := core.o | ||
2 | obj-$(CONFIG_MICROCODE) += microcode.o | ||
3 | microcode-$(CONFIG_MICROCODE_INTEL) += intel.o intel_lib.o | ||
4 | microcode-$(CONFIG_MICROCODE_AMD) += amd.o | ||
5 | obj-$(CONFIG_MICROCODE_EARLY) += core_early.o | ||
6 | obj-$(CONFIG_MICROCODE_INTEL_EARLY) += intel_early.o | ||
7 | obj-$(CONFIG_MICROCODE_AMD_EARLY) += amd_early.o | ||
diff --git a/arch/x86/kernel/microcode_amd.c b/arch/x86/kernel/cpu/microcode/amd.c index c3d4cc972eca..8fffd845e22b 100644 --- a/arch/x86/kernel/microcode_amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c | |||
@@ -182,10 +182,10 @@ int __apply_microcode_amd(struct microcode_amd *mc_amd) | |||
182 | { | 182 | { |
183 | u32 rev, dummy; | 183 | u32 rev, dummy; |
184 | 184 | ||
185 | wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); | 185 | native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); |
186 | 186 | ||
187 | /* verify patch application was successful */ | 187 | /* verify patch application was successful */ |
188 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); | 188 | native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); |
189 | if (rev != mc_amd->hdr.patch_id) | 189 | if (rev != mc_amd->hdr.patch_id) |
190 | return -1; | 190 | return -1; |
191 | 191 | ||
@@ -332,6 +332,9 @@ static int verify_and_add_patch(u8 family, u8 *fw, unsigned int leftover) | |||
332 | patch->patch_id = mc_hdr->patch_id; | 332 | patch->patch_id = mc_hdr->patch_id; |
333 | patch->equiv_cpu = proc_id; | 333 | patch->equiv_cpu = proc_id; |
334 | 334 | ||
335 | pr_debug("%s: Added patch_id: 0x%08x, proc_id: 0x%04x\n", | ||
336 | __func__, patch->patch_id, proc_id); | ||
337 | |||
335 | /* ... and add to cache. */ | 338 | /* ... and add to cache. */ |
336 | update_cache(patch); | 339 | update_cache(patch); |
337 | 340 | ||
@@ -390,9 +393,9 @@ enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size) | |||
390 | if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) { | 393 | if (cpu_data(smp_processor_id()).cpu_index == boot_cpu_data.cpu_index) { |
391 | struct ucode_patch *p = find_patch(smp_processor_id()); | 394 | struct ucode_patch *p = find_patch(smp_processor_id()); |
392 | if (p) { | 395 | if (p) { |
393 | memset(amd_bsp_mpb, 0, MPB_MAX_SIZE); | 396 | memset(amd_ucode_patch, 0, PATCH_MAX_SIZE); |
394 | memcpy(amd_bsp_mpb, p->data, min_t(u32, ksize(p->data), | 397 | memcpy(amd_ucode_patch, p->data, min_t(u32, ksize(p->data), |
395 | MPB_MAX_SIZE)); | 398 | PATCH_MAX_SIZE)); |
396 | } | 399 | } |
397 | } | 400 | } |
398 | #endif | 401 | #endif |
@@ -430,7 +433,7 @@ static enum ucode_state request_microcode_amd(int cpu, struct device *device, | |||
430 | if (c->x86 >= 0x15) | 433 | if (c->x86 >= 0x15) |
431 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); | 434 | snprintf(fw_name, sizeof(fw_name), "amd-ucode/microcode_amd_fam%.2xh.bin", c->x86); |
432 | 435 | ||
433 | if (request_firmware(&fw, (const char *)fw_name, device)) { | 436 | if (request_firmware_direct(&fw, (const char *)fw_name, device)) { |
434 | pr_debug("failed to load file %s\n", fw_name); | 437 | pr_debug("failed to load file %s\n", fw_name); |
435 | goto out; | 438 | goto out; |
436 | } | 439 | } |
diff --git a/arch/x86/kernel/microcode_amd_early.c b/arch/x86/kernel/cpu/microcode/amd_early.c index 6073104ccaa3..617a9e284245 100644 --- a/arch/x86/kernel/microcode_amd_early.c +++ b/arch/x86/kernel/cpu/microcode/amd_early.c | |||
@@ -2,6 +2,7 @@ | |||
2 | * Copyright (C) 2013 Advanced Micro Devices, Inc. | 2 | * Copyright (C) 2013 Advanced Micro Devices, Inc. |
3 | * | 3 | * |
4 | * Author: Jacob Shin <jacob.shin@amd.com> | 4 | * Author: Jacob Shin <jacob.shin@amd.com> |
5 | * Fixes: Borislav Petkov <bp@suse.de> | ||
5 | * | 6 | * |
6 | * This program is free software; you can redistribute it and/or modify | 7 | * This program is free software; you can redistribute it and/or modify |
7 | * it under the terms of the GNU General Public License version 2 as | 8 | * it under the terms of the GNU General Public License version 2 as |
@@ -15,10 +16,18 @@ | |||
15 | #include <asm/setup.h> | 16 | #include <asm/setup.h> |
16 | #include <asm/microcode_amd.h> | 17 | #include <asm/microcode_amd.h> |
17 | 18 | ||
18 | static bool ucode_loaded; | 19 | /* |
20 | * This points to the current valid container of microcode patches which we will | ||
21 | * save from the initrd before jettisoning its contents. | ||
22 | */ | ||
23 | static u8 *container; | ||
24 | static size_t container_size; | ||
25 | |||
19 | static u32 ucode_new_rev; | 26 | static u32 ucode_new_rev; |
20 | static unsigned long ucode_offset; | 27 | u8 amd_ucode_patch[PATCH_MAX_SIZE]; |
21 | static size_t ucode_size; | 28 | static u16 this_equiv_id; |
29 | |||
30 | struct cpio_data ucode_cpio; | ||
22 | 31 | ||
23 | /* | 32 | /* |
24 | * Microcode patch container file is prepended to the initrd in cpio format. | 33 | * Microcode patch container file is prepended to the initrd in cpio format. |
@@ -32,9 +41,6 @@ static struct cpio_data __init find_ucode_in_initrd(void) | |||
32 | char *path; | 41 | char *path; |
33 | void *start; | 42 | void *start; |
34 | size_t size; | 43 | size_t size; |
35 | unsigned long *uoffset; | ||
36 | size_t *usize; | ||
37 | struct cpio_data cd; | ||
38 | 44 | ||
39 | #ifdef CONFIG_X86_32 | 45 | #ifdef CONFIG_X86_32 |
40 | struct boot_params *p; | 46 | struct boot_params *p; |
@@ -47,30 +53,50 @@ static struct cpio_data __init find_ucode_in_initrd(void) | |||
47 | path = (char *)__pa_nodebug(ucode_path); | 53 | path = (char *)__pa_nodebug(ucode_path); |
48 | start = (void *)p->hdr.ramdisk_image; | 54 | start = (void *)p->hdr.ramdisk_image; |
49 | size = p->hdr.ramdisk_size; | 55 | size = p->hdr.ramdisk_size; |
50 | uoffset = (unsigned long *)__pa_nodebug(&ucode_offset); | ||
51 | usize = (size_t *)__pa_nodebug(&ucode_size); | ||
52 | #else | 56 | #else |
53 | path = ucode_path; | 57 | path = ucode_path; |
54 | start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); | 58 | start = (void *)(boot_params.hdr.ramdisk_image + PAGE_OFFSET); |
55 | size = boot_params.hdr.ramdisk_size; | 59 | size = boot_params.hdr.ramdisk_size; |
56 | uoffset = &ucode_offset; | ||
57 | usize = &ucode_size; | ||
58 | #endif | 60 | #endif |
59 | 61 | ||
60 | cd = find_cpio_data(path, start, size, &offset); | 62 | return find_cpio_data(path, start, size, &offset); |
61 | if (!cd.data) | 63 | } |
62 | return cd; | ||
63 | 64 | ||
64 | if (*(u32 *)cd.data != UCODE_MAGIC) { | 65 | static size_t compute_container_size(u8 *data, u32 total_size) |
65 | cd.data = NULL; | 66 | { |
66 | cd.size = 0; | 67 | size_t size = 0; |
67 | return cd; | 68 | u32 *header = (u32 *)data; |
68 | } | 69 | |
70 | if (header[0] != UCODE_MAGIC || | ||
71 | header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ | ||
72 | header[2] == 0) /* size */ | ||
73 | return size; | ||
74 | |||
75 | size = header[2] + CONTAINER_HDR_SZ; | ||
76 | total_size -= size; | ||
77 | data += size; | ||
69 | 78 | ||
70 | *uoffset = (u8 *)cd.data - (u8 *)start; | 79 | while (total_size) { |
71 | *usize = cd.size; | 80 | u16 patch_size; |
81 | |||
82 | header = (u32 *)data; | ||
83 | |||
84 | if (header[0] != UCODE_UCODE_TYPE) | ||
85 | break; | ||
86 | |||
87 | /* | ||
88 | * Sanity-check patch size. | ||
89 | */ | ||
90 | patch_size = header[1]; | ||
91 | if (patch_size > PATCH_MAX_SIZE) | ||
92 | break; | ||
93 | |||
94 | size += patch_size + SECTION_HDR_SIZE; | ||
95 | data += patch_size + SECTION_HDR_SIZE; | ||
96 | total_size -= patch_size + SECTION_HDR_SIZE; | ||
97 | } | ||
72 | 98 | ||
73 | return cd; | 99 | return size; |
74 | } | 100 | } |
75 | 101 | ||
76 | /* | 102 | /* |
@@ -85,23 +111,22 @@ static struct cpio_data __init find_ucode_in_initrd(void) | |||
85 | static void apply_ucode_in_initrd(void *ucode, size_t size) | 111 | static void apply_ucode_in_initrd(void *ucode, size_t size) |
86 | { | 112 | { |
87 | struct equiv_cpu_entry *eq; | 113 | struct equiv_cpu_entry *eq; |
114 | size_t *cont_sz; | ||
88 | u32 *header; | 115 | u32 *header; |
89 | u8 *data; | 116 | u8 *data, **cont; |
90 | u16 eq_id = 0; | 117 | u16 eq_id = 0; |
91 | int offset, left; | 118 | int offset, left; |
92 | u32 rev, eax; | 119 | u32 rev, eax, ebx, ecx, edx; |
93 | u32 *new_rev; | 120 | u32 *new_rev; |
94 | unsigned long *uoffset; | ||
95 | size_t *usize; | ||
96 | 121 | ||
97 | #ifdef CONFIG_X86_32 | 122 | #ifdef CONFIG_X86_32 |
98 | new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); | 123 | new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); |
99 | uoffset = (unsigned long *)__pa_nodebug(&ucode_offset); | 124 | cont_sz = (size_t *)__pa_nodebug(&container_size); |
100 | usize = (size_t *)__pa_nodebug(&ucode_size); | 125 | cont = (u8 **)__pa_nodebug(&container); |
101 | #else | 126 | #else |
102 | new_rev = &ucode_new_rev; | 127 | new_rev = &ucode_new_rev; |
103 | uoffset = &ucode_offset; | 128 | cont_sz = &container_size; |
104 | usize = &ucode_size; | 129 | cont = &container; |
105 | #endif | 130 | #endif |
106 | 131 | ||
107 | data = ucode; | 132 | data = ucode; |
@@ -109,23 +134,37 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
109 | header = (u32 *)data; | 134 | header = (u32 *)data; |
110 | 135 | ||
111 | /* find equiv cpu table */ | 136 | /* find equiv cpu table */ |
112 | 137 | if (header[0] != UCODE_MAGIC || | |
113 | if (header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ | 138 | header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ |
114 | header[2] == 0) /* size */ | 139 | header[2] == 0) /* size */ |
115 | return; | 140 | return; |
116 | 141 | ||
117 | eax = cpuid_eax(0x00000001); | 142 | eax = 0x00000001; |
143 | ecx = 0; | ||
144 | native_cpuid(&eax, &ebx, &ecx, &edx); | ||
118 | 145 | ||
119 | while (left > 0) { | 146 | while (left > 0) { |
120 | eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); | 147 | eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); |
121 | 148 | ||
149 | *cont = data; | ||
150 | |||
151 | /* Advance past the container header */ | ||
122 | offset = header[2] + CONTAINER_HDR_SZ; | 152 | offset = header[2] + CONTAINER_HDR_SZ; |
123 | data += offset; | 153 | data += offset; |
124 | left -= offset; | 154 | left -= offset; |
125 | 155 | ||
126 | eq_id = find_equiv_id(eq, eax); | 156 | eq_id = find_equiv_id(eq, eax); |
127 | if (eq_id) | 157 | if (eq_id) { |
158 | this_equiv_id = eq_id; | ||
159 | *cont_sz = compute_container_size(*cont, left + offset); | ||
160 | |||
161 | /* | ||
162 | * truncate how much we need to iterate over in the | ||
163 | * ucode update loop below | ||
164 | */ | ||
165 | left = *cont_sz - offset; | ||
128 | break; | 166 | break; |
167 | } | ||
129 | 168 | ||
130 | /* | 169 | /* |
131 | * support multiple container files appended together. if this | 170 | * support multiple container files appended together. if this |
@@ -145,19 +184,18 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
145 | 184 | ||
146 | /* mark where the next microcode container file starts */ | 185 | /* mark where the next microcode container file starts */ |
147 | offset = data - (u8 *)ucode; | 186 | offset = data - (u8 *)ucode; |
148 | *uoffset += offset; | ||
149 | *usize -= offset; | ||
150 | ucode = data; | 187 | ucode = data; |
151 | } | 188 | } |
152 | 189 | ||
153 | if (!eq_id) { | 190 | if (!eq_id) { |
154 | *usize = 0; | 191 | *cont = NULL; |
192 | *cont_sz = 0; | ||
155 | return; | 193 | return; |
156 | } | 194 | } |
157 | 195 | ||
158 | /* find ucode and update if needed */ | 196 | /* find ucode and update if needed */ |
159 | 197 | ||
160 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); | 198 | native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); |
161 | 199 | ||
162 | while (left > 0) { | 200 | while (left > 0) { |
163 | struct microcode_amd *mc; | 201 | struct microcode_amd *mc; |
@@ -168,134 +206,190 @@ static void apply_ucode_in_initrd(void *ucode, size_t size) | |||
168 | break; | 206 | break; |
169 | 207 | ||
170 | mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); | 208 | mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); |
171 | if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) | 209 | |
172 | if (__apply_microcode_amd(mc) == 0) { | 210 | if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { |
211 | |||
212 | if (!__apply_microcode_amd(mc)) { | ||
173 | rev = mc->hdr.patch_id; | 213 | rev = mc->hdr.patch_id; |
174 | *new_rev = rev; | 214 | *new_rev = rev; |
215 | |||
216 | /* save ucode patch */ | ||
217 | memcpy(amd_ucode_patch, mc, | ||
218 | min_t(u32, header[1], PATCH_MAX_SIZE)); | ||
175 | } | 219 | } |
220 | } | ||
176 | 221 | ||
177 | offset = header[1] + SECTION_HDR_SIZE; | 222 | offset = header[1] + SECTION_HDR_SIZE; |
178 | data += offset; | 223 | data += offset; |
179 | left -= offset; | 224 | left -= offset; |
180 | } | 225 | } |
181 | |||
182 | /* mark where this microcode container file ends */ | ||
183 | offset = *usize - (data - (u8 *)ucode); | ||
184 | *usize -= offset; | ||
185 | |||
186 | if (!(*new_rev)) | ||
187 | *usize = 0; | ||
188 | } | 226 | } |
189 | 227 | ||
190 | void __init load_ucode_amd_bsp(void) | 228 | void __init load_ucode_amd_bsp(void) |
191 | { | 229 | { |
192 | struct cpio_data cd = find_ucode_in_initrd(); | 230 | struct cpio_data cp; |
193 | if (!cd.data) | 231 | void **data; |
232 | size_t *size; | ||
233 | |||
234 | #ifdef CONFIG_X86_32 | ||
235 | data = (void **)__pa_nodebug(&ucode_cpio.data); | ||
236 | size = (size_t *)__pa_nodebug(&ucode_cpio.size); | ||
237 | #else | ||
238 | data = &ucode_cpio.data; | ||
239 | size = &ucode_cpio.size; | ||
240 | #endif | ||
241 | |||
242 | cp = find_ucode_in_initrd(); | ||
243 | if (!cp.data) | ||
194 | return; | 244 | return; |
195 | 245 | ||
196 | apply_ucode_in_initrd(cd.data, cd.size); | 246 | *data = cp.data; |
247 | *size = cp.size; | ||
248 | |||
249 | apply_ucode_in_initrd(cp.data, cp.size); | ||
197 | } | 250 | } |
198 | 251 | ||
199 | #ifdef CONFIG_X86_32 | 252 | #ifdef CONFIG_X86_32 |
200 | u8 amd_bsp_mpb[MPB_MAX_SIZE]; | ||
201 | |||
202 | /* | 253 | /* |
203 | * On 32-bit, since AP's early load occurs before paging is turned on, we | 254 | * On 32-bit, since AP's early load occurs before paging is turned on, we |
204 | * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during | 255 | * cannot traverse cpu_equiv_table and pcache in kernel heap memory. So during |
205 | * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During | 256 | * cold boot, AP will apply_ucode_in_initrd() just like the BSP. During |
206 | * save_microcode_in_initrd_amd() BSP's patch is copied to amd_bsp_mpb, which | 257 | * save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, |
207 | * is used upon resume from suspend. | 258 | * which is used upon resume from suspend. |
208 | */ | 259 | */ |
209 | void load_ucode_amd_ap(void) | 260 | void load_ucode_amd_ap(void) |
210 | { | 261 | { |
211 | struct microcode_amd *mc; | 262 | struct microcode_amd *mc; |
212 | unsigned long *initrd; | ||
213 | unsigned long *uoffset; | ||
214 | size_t *usize; | 263 | size_t *usize; |
215 | void *ucode; | 264 | void **ucode; |
216 | 265 | ||
217 | mc = (struct microcode_amd *)__pa(amd_bsp_mpb); | 266 | mc = (struct microcode_amd *)__pa(amd_ucode_patch); |
218 | if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { | 267 | if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { |
219 | __apply_microcode_amd(mc); | 268 | __apply_microcode_amd(mc); |
220 | return; | 269 | return; |
221 | } | 270 | } |
222 | 271 | ||
223 | initrd = (unsigned long *)__pa(&initrd_start); | 272 | ucode = (void *)__pa_nodebug(&container); |
224 | uoffset = (unsigned long *)__pa(&ucode_offset); | 273 | usize = (size_t *)__pa_nodebug(&container_size); |
225 | usize = (size_t *)__pa(&ucode_size); | ||
226 | 274 | ||
227 | if (!*usize || !*initrd) | 275 | if (!*ucode || !*usize) |
228 | return; | 276 | return; |
229 | 277 | ||
230 | ucode = (void *)((unsigned long)__pa(*initrd) + *uoffset); | 278 | apply_ucode_in_initrd(*ucode, *usize); |
231 | apply_ucode_in_initrd(ucode, *usize); | ||
232 | } | 279 | } |
233 | 280 | ||
234 | static void __init collect_cpu_sig_on_bsp(void *arg) | 281 | static void __init collect_cpu_sig_on_bsp(void *arg) |
235 | { | 282 | { |
236 | unsigned int cpu = smp_processor_id(); | 283 | unsigned int cpu = smp_processor_id(); |
237 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 284 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
285 | |||
238 | uci->cpu_sig.sig = cpuid_eax(0x00000001); | 286 | uci->cpu_sig.sig = cpuid_eax(0x00000001); |
239 | } | 287 | } |
288 | |||
289 | static void __init get_bsp_sig(void) | ||
290 | { | ||
291 | unsigned int bsp = boot_cpu_data.cpu_index; | ||
292 | struct ucode_cpu_info *uci = ucode_cpu_info + bsp; | ||
293 | |||
294 | if (!uci->cpu_sig.sig) | ||
295 | smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); | ||
296 | } | ||
240 | #else | 297 | #else |
241 | void load_ucode_amd_ap(void) | 298 | void load_ucode_amd_ap(void) |
242 | { | 299 | { |
243 | unsigned int cpu = smp_processor_id(); | 300 | unsigned int cpu = smp_processor_id(); |
244 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; | 301 | struct ucode_cpu_info *uci = ucode_cpu_info + cpu; |
302 | struct equiv_cpu_entry *eq; | ||
303 | struct microcode_amd *mc; | ||
245 | u32 rev, eax; | 304 | u32 rev, eax; |
305 | u16 eq_id; | ||
306 | |||
307 | /* Exit if called on the BSP. */ | ||
308 | if (!cpu) | ||
309 | return; | ||
310 | |||
311 | if (!container) | ||
312 | return; | ||
246 | 313 | ||
247 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); | 314 | rdmsr(MSR_AMD64_PATCH_LEVEL, rev, eax); |
248 | eax = cpuid_eax(0x00000001); | ||
249 | 315 | ||
250 | uci->cpu_sig.rev = rev; | 316 | uci->cpu_sig.rev = rev; |
251 | uci->cpu_sig.sig = eax; | 317 | uci->cpu_sig.sig = eax; |
252 | 318 | ||
253 | if (cpu && !ucode_loaded) { | 319 | eax = cpuid_eax(0x00000001); |
254 | void *ucode; | 320 | eq = (struct equiv_cpu_entry *)(container + CONTAINER_HDR_SZ); |
321 | |||
322 | eq_id = find_equiv_id(eq, eax); | ||
323 | if (!eq_id) | ||
324 | return; | ||
255 | 325 | ||
256 | if (!ucode_size || !initrd_start) | 326 | if (eq_id == this_equiv_id) { |
257 | return; | 327 | mc = (struct microcode_amd *)amd_ucode_patch; |
328 | |||
329 | if (mc && rev < mc->hdr.patch_id) { | ||
330 | if (!__apply_microcode_amd(mc)) | ||
331 | ucode_new_rev = mc->hdr.patch_id; | ||
332 | } | ||
258 | 333 | ||
259 | ucode = (void *)(initrd_start + ucode_offset); | 334 | } else { |
260 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); | 335 | if (!ucode_cpio.data) |
261 | if (load_microcode_amd(eax, ucode, ucode_size) != UCODE_OK) | ||
262 | return; | 336 | return; |
263 | 337 | ||
264 | ucode_loaded = true; | 338 | /* |
339 | * AP has a different equivalence ID than BSP, looks like | ||
340 | * mixed-steppings silicon so go through the ucode blob anew. | ||
341 | */ | ||
342 | apply_ucode_in_initrd(ucode_cpio.data, ucode_cpio.size); | ||
265 | } | 343 | } |
266 | |||
267 | apply_microcode_amd(cpu); | ||
268 | } | 344 | } |
269 | #endif | 345 | #endif |
270 | 346 | ||
271 | int __init save_microcode_in_initrd_amd(void) | 347 | int __init save_microcode_in_initrd_amd(void) |
272 | { | 348 | { |
349 | unsigned long cont; | ||
273 | enum ucode_state ret; | 350 | enum ucode_state ret; |
274 | void *ucode; | ||
275 | u32 eax; | 351 | u32 eax; |
276 | 352 | ||
277 | #ifdef CONFIG_X86_32 | 353 | if (!container) |
278 | unsigned int bsp = boot_cpu_data.cpu_index; | 354 | return -EINVAL; |
279 | struct ucode_cpu_info *uci = ucode_cpu_info + bsp; | ||
280 | 355 | ||
281 | if (!uci->cpu_sig.sig) | 356 | #ifdef CONFIG_X86_32 |
282 | smp_call_function_single(bsp, collect_cpu_sig_on_bsp, NULL, 1); | 357 | get_bsp_sig(); |
358 | cont = (unsigned long)container; | ||
359 | #else | ||
360 | /* | ||
361 | * We need the physical address of the container for both bitness since | ||
362 | * boot_params.hdr.ramdisk_image is a physical address. | ||
363 | */ | ||
364 | cont = __pa(container); | ||
283 | #endif | 365 | #endif |
366 | |||
367 | /* | ||
368 | * Take into account the fact that the ramdisk might get relocated and | ||
369 | * therefore we need to recompute the container's position in virtual | ||
370 | * memory space. | ||
371 | */ | ||
372 | if (relocated_ramdisk) | ||
373 | container = (u8 *)(__va(relocated_ramdisk) + | ||
374 | (cont - boot_params.hdr.ramdisk_image)); | ||
375 | |||
284 | if (ucode_new_rev) | 376 | if (ucode_new_rev) |
285 | pr_info("microcode: updated early to new patch_level=0x%08x\n", | 377 | pr_info("microcode: updated early to new patch_level=0x%08x\n", |
286 | ucode_new_rev); | 378 | ucode_new_rev); |
287 | 379 | ||
288 | if (ucode_loaded || !ucode_size || !initrd_start) | ||
289 | return 0; | ||
290 | |||
291 | ucode = (void *)(initrd_start + ucode_offset); | ||
292 | eax = cpuid_eax(0x00000001); | 380 | eax = cpuid_eax(0x00000001); |
293 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); | 381 | eax = ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff); |
294 | 382 | ||
295 | ret = load_microcode_amd(eax, ucode, ucode_size); | 383 | ret = load_microcode_amd(eax, container, container_size); |
296 | if (ret != UCODE_OK) | 384 | if (ret != UCODE_OK) |
297 | return -EINVAL; | 385 | return -EINVAL; |
298 | 386 | ||
299 | ucode_loaded = true; | 387 | /* |
388 | * This will be freed any msec now, stash patches for the current | ||
389 | * family and switch to patch cache for cpu hotplug, etc later. | ||
390 | */ | ||
391 | container = NULL; | ||
392 | container_size = 0; | ||
393 | |||
300 | return 0; | 394 | return 0; |
301 | } | 395 | } |
diff --git a/arch/x86/kernel/microcode_core.c b/arch/x86/kernel/cpu/microcode/core.c index 15c987698b0f..15c987698b0f 100644 --- a/arch/x86/kernel/microcode_core.c +++ b/arch/x86/kernel/cpu/microcode/core.c | |||
diff --git a/arch/x86/kernel/microcode_core_early.c b/arch/x86/kernel/cpu/microcode/core_early.c index be7f8514f577..be7f8514f577 100644 --- a/arch/x86/kernel/microcode_core_early.c +++ b/arch/x86/kernel/cpu/microcode/core_early.c | |||
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 5fb2cebf556b..a276fa75d9b5 100644 --- a/arch/x86/kernel/microcode_intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c | |||
@@ -278,7 +278,7 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device, | |||
278 | sprintf(name, "intel-ucode/%02x-%02x-%02x", | 278 | sprintf(name, "intel-ucode/%02x-%02x-%02x", |
279 | c->x86, c->x86_model, c->x86_mask); | 279 | c->x86, c->x86_model, c->x86_mask); |
280 | 280 | ||
281 | if (request_firmware(&firmware, name, device)) { | 281 | if (request_firmware_direct(&firmware, name, device)) { |
282 | pr_debug("data file %s load failed\n", name); | 282 | pr_debug("data file %s load failed\n", name); |
283 | return UCODE_NFOUND; | 283 | return UCODE_NFOUND; |
284 | } | 284 | } |
diff --git a/arch/x86/kernel/microcode_intel_early.c b/arch/x86/kernel/cpu/microcode/intel_early.c index 1575deb2e636..18f739129e72 100644 --- a/arch/x86/kernel/microcode_intel_early.c +++ b/arch/x86/kernel/cpu/microcode/intel_early.c | |||
@@ -365,16 +365,6 @@ out: | |||
365 | return state; | 365 | return state; |
366 | } | 366 | } |
367 | 367 | ||
368 | #define native_rdmsr(msr, val1, val2) \ | ||
369 | do { \ | ||
370 | u64 __val = native_read_msr((msr)); \ | ||
371 | (void)((val1) = (u32)__val); \ | ||
372 | (void)((val2) = (u32)(__val >> 32)); \ | ||
373 | } while (0) | ||
374 | |||
375 | #define native_wrmsr(msr, low, high) \ | ||
376 | native_write_msr(msr, low, high); | ||
377 | |||
378 | static int collect_cpu_info_early(struct ucode_cpu_info *uci) | 368 | static int collect_cpu_info_early(struct ucode_cpu_info *uci) |
379 | { | 369 | { |
380 | unsigned int val[2]; | 370 | unsigned int val[2]; |
diff --git a/arch/x86/kernel/microcode_intel_lib.c b/arch/x86/kernel/cpu/microcode/intel_lib.c index ce69320d0179..ce69320d0179 100644 --- a/arch/x86/kernel/microcode_intel_lib.c +++ b/arch/x86/kernel/cpu/microcode/intel_lib.c | |||
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 9f7ca266864a..76f98fe5b35c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/hardirq.h> | 17 | #include <linux/hardirq.h> |
18 | #include <linux/efi.h> | 18 | #include <linux/efi.h> |
19 | #include <linux/interrupt.h> | 19 | #include <linux/interrupt.h> |
20 | #include <linux/irq.h> | ||
20 | #include <asm/processor.h> | 21 | #include <asm/processor.h> |
21 | #include <asm/hypervisor.h> | 22 | #include <asm/hypervisor.h> |
22 | #include <asm/hyperv.h> | 23 | #include <asm/hyperv.h> |
@@ -26,10 +27,50 @@ | |||
26 | #include <asm/irq_regs.h> | 27 | #include <asm/irq_regs.h> |
27 | #include <asm/i8259.h> | 28 | #include <asm/i8259.h> |
28 | #include <asm/apic.h> | 29 | #include <asm/apic.h> |
30 | #include <asm/timer.h> | ||
29 | 31 | ||
30 | struct ms_hyperv_info ms_hyperv; | 32 | struct ms_hyperv_info ms_hyperv; |
31 | EXPORT_SYMBOL_GPL(ms_hyperv); | 33 | EXPORT_SYMBOL_GPL(ms_hyperv); |
32 | 34 | ||
35 | #if IS_ENABLED(CONFIG_HYPERV) | ||
36 | static void (*vmbus_handler)(void); | ||
37 | |||
38 | void hyperv_vector_handler(struct pt_regs *regs) | ||
39 | { | ||
40 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
41 | |||
42 | irq_enter(); | ||
43 | exit_idle(); | ||
44 | |||
45 | inc_irq_stat(irq_hv_callback_count); | ||
46 | if (vmbus_handler) | ||
47 | vmbus_handler(); | ||
48 | |||
49 | irq_exit(); | ||
50 | set_irq_regs(old_regs); | ||
51 | } | ||
52 | |||
53 | void hv_setup_vmbus_irq(void (*handler)(void)) | ||
54 | { | ||
55 | vmbus_handler = handler; | ||
56 | /* | ||
57 | * Setup the IDT for hypervisor callback. Prevent reallocation | ||
58 | * at module reload. | ||
59 | */ | ||
60 | if (!test_bit(HYPERVISOR_CALLBACK_VECTOR, used_vectors)) | ||
61 | alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, | ||
62 | hyperv_callback_vector); | ||
63 | } | ||
64 | |||
65 | void hv_remove_vmbus_irq(void) | ||
66 | { | ||
67 | /* We have no way to deallocate the interrupt gate */ | ||
68 | vmbus_handler = NULL; | ||
69 | } | ||
70 | EXPORT_SYMBOL_GPL(hv_setup_vmbus_irq); | ||
71 | EXPORT_SYMBOL_GPL(hv_remove_vmbus_irq); | ||
72 | #endif | ||
73 | |||
33 | static uint32_t __init ms_hyperv_platform(void) | 74 | static uint32_t __init ms_hyperv_platform(void) |
34 | { | 75 | { |
35 | u32 eax; | 76 | u32 eax; |
@@ -105,6 +146,11 @@ static void __init ms_hyperv_init_platform(void) | |||
105 | 146 | ||
106 | if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) | 147 | if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) |
107 | clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); | 148 | clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); |
149 | |||
150 | #ifdef CONFIG_X86_IO_APIC | ||
151 | no_timer_check = 1; | ||
152 | #endif | ||
153 | |||
108 | } | 154 | } |
109 | 155 | ||
110 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | 156 | const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { |
@@ -113,41 +159,3 @@ const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { | |||
113 | .init_platform = ms_hyperv_init_platform, | 159 | .init_platform = ms_hyperv_init_platform, |
114 | }; | 160 | }; |
115 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); | 161 | EXPORT_SYMBOL(x86_hyper_ms_hyperv); |
116 | |||
117 | #if IS_ENABLED(CONFIG_HYPERV) | ||
118 | static int vmbus_irq = -1; | ||
119 | static irq_handler_t vmbus_isr; | ||
120 | |||
121 | void hv_register_vmbus_handler(int irq, irq_handler_t handler) | ||
122 | { | ||
123 | /* | ||
124 | * Setup the IDT for hypervisor callback. | ||
125 | */ | ||
126 | alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, hyperv_callback_vector); | ||
127 | |||
128 | vmbus_irq = irq; | ||
129 | vmbus_isr = handler; | ||
130 | } | ||
131 | |||
132 | void hyperv_vector_handler(struct pt_regs *regs) | ||
133 | { | ||
134 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
135 | struct irq_desc *desc; | ||
136 | |||
137 | irq_enter(); | ||
138 | exit_idle(); | ||
139 | |||
140 | desc = irq_to_desc(vmbus_irq); | ||
141 | |||
142 | if (desc) | ||
143 | generic_handle_irq_desc(vmbus_irq, desc); | ||
144 | |||
145 | irq_exit(); | ||
146 | set_irq_regs(old_regs); | ||
147 | } | ||
148 | #else | ||
149 | void hv_register_vmbus_handler(int irq, irq_handler_t handler) | ||
150 | { | ||
151 | } | ||
152 | #endif | ||
153 | EXPORT_SYMBOL_GPL(hv_register_vmbus_handler); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index ce2d0a2c3e4f..0e25a1bc5ab5 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -683,7 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
683 | } | 683 | } |
684 | 684 | ||
685 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ | 685 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ |
686 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | 686 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
687 | __flush_tlb(); | 687 | __flush_tlb(); |
688 | 688 | ||
689 | /* Save MTRR state */ | 689 | /* Save MTRR state */ |
@@ -697,7 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
697 | static void post_set(void) __releases(set_atomicity_lock) | 697 | static void post_set(void) __releases(set_atomicity_lock) |
698 | { | 698 | { |
699 | /* Flush TLBs (no need to flush caches - they are disabled) */ | 699 | /* Flush TLBs (no need to flush caches - they are disabled) */ |
700 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | 700 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
701 | __flush_tlb(); | 701 | __flush_tlb(); |
702 | 702 | ||
703 | /* Intel (P6) standard MTRRs */ | 703 | /* Intel (P6) standard MTRRs */ |
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 8e132931614d..ae407f7226c8 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c | |||
@@ -892,7 +892,6 @@ static void x86_pmu_enable(struct pmu *pmu) | |||
892 | * hw_perf_group_sched_in() or x86_pmu_enable() | 892 | * hw_perf_group_sched_in() or x86_pmu_enable() |
893 | * | 893 | * |
894 | * step1: save events moving to new counters | 894 | * step1: save events moving to new counters |
895 | * step2: reprogram moved events into new counters | ||
896 | */ | 895 | */ |
897 | for (i = 0; i < n_running; i++) { | 896 | for (i = 0; i < n_running; i++) { |
898 | event = cpuc->event_list[i]; | 897 | event = cpuc->event_list[i]; |
@@ -918,6 +917,9 @@ static void x86_pmu_enable(struct pmu *pmu) | |||
918 | x86_pmu_stop(event, PERF_EF_UPDATE); | 917 | x86_pmu_stop(event, PERF_EF_UPDATE); |
919 | } | 918 | } |
920 | 919 | ||
920 | /* | ||
921 | * step2: reprogram moved events into new counters | ||
922 | */ | ||
921 | for (i = 0; i < cpuc->n_events; i++) { | 923 | for (i = 0; i < cpuc->n_events; i++) { |
922 | event = cpuc->event_list[i]; | 924 | event = cpuc->event_list[i]; |
923 | hwc = &event->hw; | 925 | hwc = &event->hw; |
@@ -1043,7 +1045,7 @@ static int x86_pmu_add(struct perf_event *event, int flags) | |||
1043 | /* | 1045 | /* |
1044 | * If group events scheduling transaction was started, | 1046 | * If group events scheduling transaction was started, |
1045 | * skip the schedulability test here, it will be performed | 1047 | * skip the schedulability test here, it will be performed |
1046 | * at commit time (->commit_txn) as a whole | 1048 | * at commit time (->commit_txn) as a whole. |
1047 | */ | 1049 | */ |
1048 | if (cpuc->group_flag & PERF_EVENT_TXN) | 1050 | if (cpuc->group_flag & PERF_EVENT_TXN) |
1049 | goto done_collect; | 1051 | goto done_collect; |
@@ -1058,6 +1060,10 @@ static int x86_pmu_add(struct perf_event *event, int flags) | |||
1058 | memcpy(cpuc->assign, assign, n*sizeof(int)); | 1060 | memcpy(cpuc->assign, assign, n*sizeof(int)); |
1059 | 1061 | ||
1060 | done_collect: | 1062 | done_collect: |
1063 | /* | ||
1064 | * Commit the collect_events() state. See x86_pmu_del() and | ||
1065 | * x86_pmu_*_txn(). | ||
1066 | */ | ||
1061 | cpuc->n_events = n; | 1067 | cpuc->n_events = n; |
1062 | cpuc->n_added += n - n0; | 1068 | cpuc->n_added += n - n0; |
1063 | cpuc->n_txn += n - n0; | 1069 | cpuc->n_txn += n - n0; |
@@ -1183,25 +1189,38 @@ static void x86_pmu_del(struct perf_event *event, int flags) | |||
1183 | * If we're called during a txn, we don't need to do anything. | 1189 | * If we're called during a txn, we don't need to do anything. |
1184 | * The events never got scheduled and ->cancel_txn will truncate | 1190 | * The events never got scheduled and ->cancel_txn will truncate |
1185 | * the event_list. | 1191 | * the event_list. |
1192 | * | ||
1193 | * XXX assumes any ->del() called during a TXN will only be on | ||
1194 | * an event added during that same TXN. | ||
1186 | */ | 1195 | */ |
1187 | if (cpuc->group_flag & PERF_EVENT_TXN) | 1196 | if (cpuc->group_flag & PERF_EVENT_TXN) |
1188 | return; | 1197 | return; |
1189 | 1198 | ||
1199 | /* | ||
1200 | * Not a TXN, therefore cleanup properly. | ||
1201 | */ | ||
1190 | x86_pmu_stop(event, PERF_EF_UPDATE); | 1202 | x86_pmu_stop(event, PERF_EF_UPDATE); |
1191 | 1203 | ||
1192 | for (i = 0; i < cpuc->n_events; i++) { | 1204 | for (i = 0; i < cpuc->n_events; i++) { |
1193 | if (event == cpuc->event_list[i]) { | 1205 | if (event == cpuc->event_list[i]) |
1206 | break; | ||
1207 | } | ||
1194 | 1208 | ||
1195 | if (x86_pmu.put_event_constraints) | 1209 | if (WARN_ON_ONCE(i == cpuc->n_events)) /* called ->del() without ->add() ? */ |
1196 | x86_pmu.put_event_constraints(cpuc, event); | 1210 | return; |
1197 | 1211 | ||
1198 | while (++i < cpuc->n_events) | 1212 | /* If we have a newly added event; make sure to decrease n_added. */ |
1199 | cpuc->event_list[i-1] = cpuc->event_list[i]; | 1213 | if (i >= cpuc->n_events - cpuc->n_added) |
1214 | --cpuc->n_added; | ||
1215 | |||
1216 | if (x86_pmu.put_event_constraints) | ||
1217 | x86_pmu.put_event_constraints(cpuc, event); | ||
1218 | |||
1219 | /* Delete the array entry. */ | ||
1220 | while (++i < cpuc->n_events) | ||
1221 | cpuc->event_list[i-1] = cpuc->event_list[i]; | ||
1222 | --cpuc->n_events; | ||
1200 | 1223 | ||
1201 | --cpuc->n_events; | ||
1202 | break; | ||
1203 | } | ||
1204 | } | ||
1205 | perf_event_update_userpage(event); | 1224 | perf_event_update_userpage(event); |
1206 | } | 1225 | } |
1207 | 1226 | ||
@@ -1521,6 +1540,8 @@ static int __init init_hw_perf_events(void) | |||
1521 | 1540 | ||
1522 | pr_cont("%s PMU driver.\n", x86_pmu.name); | 1541 | pr_cont("%s PMU driver.\n", x86_pmu.name); |
1523 | 1542 | ||
1543 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | ||
1544 | |||
1524 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) | 1545 | for (quirk = x86_pmu.quirks; quirk; quirk = quirk->next) |
1525 | quirk->func(); | 1546 | quirk->func(); |
1526 | 1547 | ||
@@ -1534,7 +1555,6 @@ static int __init init_hw_perf_events(void) | |||
1534 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, | 1555 | __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, |
1535 | 0, x86_pmu.num_counters, 0, 0); | 1556 | 0, x86_pmu.num_counters, 0, 0); |
1536 | 1557 | ||
1537 | x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ | ||
1538 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; | 1558 | x86_pmu_format_group.attrs = x86_pmu.format_attrs; |
1539 | 1559 | ||
1540 | if (x86_pmu.event_attrs) | 1560 | if (x86_pmu.event_attrs) |
@@ -1594,7 +1614,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) | |||
1594 | { | 1614 | { |
1595 | __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); | 1615 | __this_cpu_and(cpu_hw_events.group_flag, ~PERF_EVENT_TXN); |
1596 | /* | 1616 | /* |
1597 | * Truncate the collected events. | 1617 | * Truncate collected array by the number of events added in this |
1618 | * transaction. See x86_pmu_add() and x86_pmu_*_txn(). | ||
1598 | */ | 1619 | */ |
1599 | __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); | 1620 | __this_cpu_sub(cpu_hw_events.n_added, __this_cpu_read(cpu_hw_events.n_txn)); |
1600 | __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); | 1621 | __this_cpu_sub(cpu_hw_events.n_events, __this_cpu_read(cpu_hw_events.n_txn)); |
@@ -1605,6 +1626,8 @@ static void x86_pmu_cancel_txn(struct pmu *pmu) | |||
1605 | * Commit group events scheduling transaction | 1626 | * Commit group events scheduling transaction |
1606 | * Perform the group schedulability test as a whole | 1627 | * Perform the group schedulability test as a whole |
1607 | * Return 0 if success | 1628 | * Return 0 if success |
1629 | * | ||
1630 | * Does not cancel the transaction on failure; expects the caller to do this. | ||
1608 | */ | 1631 | */ |
1609 | static int x86_pmu_commit_txn(struct pmu *pmu) | 1632 | static int x86_pmu_commit_txn(struct pmu *pmu) |
1610 | { | 1633 | { |
@@ -1820,9 +1843,12 @@ static ssize_t set_attr_rdpmc(struct device *cdev, | |||
1820 | if (ret) | 1843 | if (ret) |
1821 | return ret; | 1844 | return ret; |
1822 | 1845 | ||
1846 | if (x86_pmu.attr_rdpmc_broken) | ||
1847 | return -ENOTSUPP; | ||
1848 | |||
1823 | if (!!val != !!x86_pmu.attr_rdpmc) { | 1849 | if (!!val != !!x86_pmu.attr_rdpmc) { |
1824 | x86_pmu.attr_rdpmc = !!val; | 1850 | x86_pmu.attr_rdpmc = !!val; |
1825 | smp_call_function(change_rdpmc, (void *)val, 1); | 1851 | on_each_cpu(change_rdpmc, (void *)val, 1); |
1826 | } | 1852 | } |
1827 | 1853 | ||
1828 | return count; | 1854 | return count; |
@@ -1883,21 +1909,27 @@ static struct pmu pmu = { | |||
1883 | 1909 | ||
1884 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) | 1910 | void arch_perf_update_userpage(struct perf_event_mmap_page *userpg, u64 now) |
1885 | { | 1911 | { |
1912 | struct cyc2ns_data *data; | ||
1913 | |||
1886 | userpg->cap_user_time = 0; | 1914 | userpg->cap_user_time = 0; |
1887 | userpg->cap_user_time_zero = 0; | 1915 | userpg->cap_user_time_zero = 0; |
1888 | userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; | 1916 | userpg->cap_user_rdpmc = x86_pmu.attr_rdpmc; |
1889 | userpg->pmc_width = x86_pmu.cntval_bits; | 1917 | userpg->pmc_width = x86_pmu.cntval_bits; |
1890 | 1918 | ||
1891 | if (!sched_clock_stable) | 1919 | if (!sched_clock_stable()) |
1892 | return; | 1920 | return; |
1893 | 1921 | ||
1922 | data = cyc2ns_read_begin(); | ||
1923 | |||
1894 | userpg->cap_user_time = 1; | 1924 | userpg->cap_user_time = 1; |
1895 | userpg->time_mult = this_cpu_read(cyc2ns); | 1925 | userpg->time_mult = data->cyc2ns_mul; |
1896 | userpg->time_shift = CYC2NS_SCALE_FACTOR; | 1926 | userpg->time_shift = data->cyc2ns_shift; |
1897 | userpg->time_offset = this_cpu_read(cyc2ns_offset) - now; | 1927 | userpg->time_offset = data->cyc2ns_offset - now; |
1898 | 1928 | ||
1899 | userpg->cap_user_time_zero = 1; | 1929 | userpg->cap_user_time_zero = 1; |
1900 | userpg->time_zero = this_cpu_read(cyc2ns_offset); | 1930 | userpg->time_zero = data->cyc2ns_offset; |
1931 | |||
1932 | cyc2ns_read_end(data); | ||
1901 | } | 1933 | } |
1902 | 1934 | ||
1903 | /* | 1935 | /* |
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index c1a861829d81..3b2f9bdd974b 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h | |||
@@ -130,9 +130,11 @@ struct cpu_hw_events { | |||
130 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 130 | unsigned long running[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
131 | int enabled; | 131 | int enabled; |
132 | 132 | ||
133 | int n_events; | 133 | int n_events; /* the # of events in the below arrays */ |
134 | int n_added; | 134 | int n_added; /* the # last events in the below arrays; |
135 | int n_txn; | 135 | they've never been enabled yet */ |
136 | int n_txn; /* the # last events in the below arrays; | ||
137 | added in the current transaction */ | ||
136 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ | 138 | int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ |
137 | u64 tags[X86_PMC_IDX_MAX]; | 139 | u64 tags[X86_PMC_IDX_MAX]; |
138 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ | 140 | struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ |
@@ -409,6 +411,7 @@ struct x86_pmu { | |||
409 | /* | 411 | /* |
410 | * sysfs attrs | 412 | * sysfs attrs |
411 | */ | 413 | */ |
414 | int attr_rdpmc_broken; | ||
412 | int attr_rdpmc; | 415 | int attr_rdpmc; |
413 | struct attribute **format_attrs; | 416 | struct attribute **format_attrs; |
414 | struct attribute **event_attrs; | 417 | struct attribute **event_attrs; |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 4b8e4d3cd6ea..4c36bbe3173a 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c | |||
@@ -926,13 +926,13 @@ static __init int amd_ibs_init(void) | |||
926 | goto out; | 926 | goto out; |
927 | 927 | ||
928 | perf_ibs_pm_init(); | 928 | perf_ibs_pm_init(); |
929 | get_online_cpus(); | 929 | cpu_notifier_register_begin(); |
930 | ibs_caps = caps; | 930 | ibs_caps = caps; |
931 | /* make ibs_caps visible to other cpus: */ | 931 | /* make ibs_caps visible to other cpus: */ |
932 | smp_mb(); | 932 | smp_mb(); |
933 | perf_cpu_notifier(perf_ibs_cpu_notifier); | ||
934 | smp_call_function(setup_APIC_ibs, NULL, 1); | 933 | smp_call_function(setup_APIC_ibs, NULL, 1); |
935 | put_online_cpus(); | 934 | __perf_cpu_notifier(perf_ibs_cpu_notifier); |
935 | cpu_notifier_register_done(); | ||
936 | 936 | ||
937 | ret = perf_event_ibs_init(); | 937 | ret = perf_event_ibs_init(); |
938 | out: | 938 | out: |
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c index 754291adec33..3bbdf4cd38b9 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c | |||
@@ -531,15 +531,16 @@ static int __init amd_uncore_init(void) | |||
531 | if (ret) | 531 | if (ret) |
532 | return -ENODEV; | 532 | return -ENODEV; |
533 | 533 | ||
534 | get_online_cpus(); | 534 | cpu_notifier_register_begin(); |
535 | |||
535 | /* init cpus already online before registering for hotplug notifier */ | 536 | /* init cpus already online before registering for hotplug notifier */ |
536 | for_each_online_cpu(cpu) { | 537 | for_each_online_cpu(cpu) { |
537 | amd_uncore_cpu_up_prepare(cpu); | 538 | amd_uncore_cpu_up_prepare(cpu); |
538 | smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); | 539 | smp_call_function_single(cpu, init_cpu_already_online, NULL, 1); |
539 | } | 540 | } |
540 | 541 | ||
541 | register_cpu_notifier(&amd_uncore_cpu_notifier_block); | 542 | __register_cpu_notifier(&amd_uncore_cpu_notifier_block); |
542 | put_online_cpus(); | 543 | cpu_notifier_register_done(); |
543 | 544 | ||
544 | return 0; | 545 | return 0; |
545 | } | 546 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 0fa4f242f050..aa333d966886 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c | |||
@@ -1361,10 +1361,8 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1361 | intel_pmu_disable_all(); | 1361 | intel_pmu_disable_all(); |
1362 | handled = intel_pmu_drain_bts_buffer(); | 1362 | handled = intel_pmu_drain_bts_buffer(); |
1363 | status = intel_pmu_get_status(); | 1363 | status = intel_pmu_get_status(); |
1364 | if (!status) { | 1364 | if (!status) |
1365 | intel_pmu_enable_all(0); | 1365 | goto done; |
1366 | return handled; | ||
1367 | } | ||
1368 | 1366 | ||
1369 | loops = 0; | 1367 | loops = 0; |
1370 | again: | 1368 | again: |
@@ -2310,10 +2308,7 @@ __init int intel_pmu_init(void) | |||
2310 | if (version > 1) | 2308 | if (version > 1) |
2311 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); | 2309 | x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); |
2312 | 2310 | ||
2313 | /* | 2311 | if (boot_cpu_has(X86_FEATURE_PDCM)) { |
2314 | * v2 and above have a perf capabilities MSR | ||
2315 | */ | ||
2316 | if (version > 1) { | ||
2317 | u64 capabilities; | 2312 | u64 capabilities; |
2318 | 2313 | ||
2319 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); | 2314 | rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c new file mode 100644 index 000000000000..059218ed5208 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c | |||
@@ -0,0 +1,680 @@ | |||
1 | /* | ||
2 | * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters | ||
3 | * Copyright (C) 2013 Google, Inc., Stephane Eranian | ||
4 | * | ||
5 | * Intel RAPL interface is specified in the IA-32 Manual Vol3b | ||
6 | * section 14.7.1 (September 2013) | ||
7 | * | ||
8 | * RAPL provides more controls than just reporting energy consumption | ||
9 | * however here we only expose the 3 energy consumption free running | ||
10 | * counters (pp0, pkg, dram). | ||
11 | * | ||
12 | * Each of those counters increments in a power unit defined by the | ||
13 | * RAPL_POWER_UNIT MSR. On SandyBridge, this unit is 1/(2^16) Joules | ||
14 | * but it can vary. | ||
15 | * | ||
16 | * Counter to rapl events mappings: | ||
17 | * | ||
18 | * pp0 counter: consumption of all physical cores (power plane 0) | ||
19 | * event: rapl_energy_cores | ||
20 | * perf code: 0x1 | ||
21 | * | ||
22 | * pkg counter: consumption of the whole processor package | ||
23 | * event: rapl_energy_pkg | ||
24 | * perf code: 0x2 | ||
25 | * | ||
26 | * dram counter: consumption of the dram domain (servers only) | ||
27 | * event: rapl_energy_dram | ||
28 | * perf code: 0x3 | ||
29 | * | ||
30 | * dram counter: consumption of the builtin-gpu domain (client only) | ||
31 | * event: rapl_energy_gpu | ||
32 | * perf code: 0x4 | ||
33 | * | ||
34 | * We manage those counters as free running (read-only). They may be | ||
35 | * use simultaneously by other tools, such as turbostat. | ||
36 | * | ||
37 | * The events only support system-wide mode counting. There is no | ||
38 | * sampling support because it does not make sense and is not | ||
39 | * supported by the RAPL hardware. | ||
40 | * | ||
41 | * Because we want to avoid floating-point operations in the kernel, | ||
42 | * the events are all reported in fixed point arithmetic (32.32). | ||
43 | * Tools must adjust the counts to convert them to Watts using | ||
44 | * the duration of the measurement. Tools may use a function such as | ||
45 | * ldexp(raw_count, -32); | ||
46 | */ | ||
47 | #include <linux/module.h> | ||
48 | #include <linux/slab.h> | ||
49 | #include <linux/perf_event.h> | ||
50 | #include <asm/cpu_device_id.h> | ||
51 | #include "perf_event.h" | ||
52 | |||
53 | /* | ||
54 | * RAPL energy status counters | ||
55 | */ | ||
56 | #define RAPL_IDX_PP0_NRG_STAT 0 /* all cores */ | ||
57 | #define INTEL_RAPL_PP0 0x1 /* pseudo-encoding */ | ||
58 | #define RAPL_IDX_PKG_NRG_STAT 1 /* entire package */ | ||
59 | #define INTEL_RAPL_PKG 0x2 /* pseudo-encoding */ | ||
60 | #define RAPL_IDX_RAM_NRG_STAT 2 /* DRAM */ | ||
61 | #define INTEL_RAPL_RAM 0x3 /* pseudo-encoding */ | ||
62 | #define RAPL_IDX_PP1_NRG_STAT 3 /* DRAM */ | ||
63 | #define INTEL_RAPL_PP1 0x4 /* pseudo-encoding */ | ||
64 | |||
65 | /* Clients have PP0, PKG */ | ||
66 | #define RAPL_IDX_CLN (1<<RAPL_IDX_PP0_NRG_STAT|\ | ||
67 | 1<<RAPL_IDX_PKG_NRG_STAT|\ | ||
68 | 1<<RAPL_IDX_PP1_NRG_STAT) | ||
69 | |||
70 | /* Servers have PP0, PKG, RAM */ | ||
71 | #define RAPL_IDX_SRV (1<<RAPL_IDX_PP0_NRG_STAT|\ | ||
72 | 1<<RAPL_IDX_PKG_NRG_STAT|\ | ||
73 | 1<<RAPL_IDX_RAM_NRG_STAT) | ||
74 | |||
75 | /* | ||
76 | * event code: LSB 8 bits, passed in attr->config | ||
77 | * any other bit is reserved | ||
78 | */ | ||
79 | #define RAPL_EVENT_MASK 0xFFULL | ||
80 | |||
81 | #define DEFINE_RAPL_FORMAT_ATTR(_var, _name, _format) \ | ||
82 | static ssize_t __rapl_##_var##_show(struct kobject *kobj, \ | ||
83 | struct kobj_attribute *attr, \ | ||
84 | char *page) \ | ||
85 | { \ | ||
86 | BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ | ||
87 | return sprintf(page, _format "\n"); \ | ||
88 | } \ | ||
89 | static struct kobj_attribute format_attr_##_var = \ | ||
90 | __ATTR(_name, 0444, __rapl_##_var##_show, NULL) | ||
91 | |||
92 | #define RAPL_EVENT_DESC(_name, _config) \ | ||
93 | { \ | ||
94 | .attr = __ATTR(_name, 0444, rapl_event_show, NULL), \ | ||
95 | .config = _config, \ | ||
96 | } | ||
97 | |||
98 | #define RAPL_CNTR_WIDTH 32 /* 32-bit rapl counters */ | ||
99 | |||
100 | struct rapl_pmu { | ||
101 | spinlock_t lock; | ||
102 | int hw_unit; /* 1/2^hw_unit Joule */ | ||
103 | int n_active; /* number of active events */ | ||
104 | struct list_head active_list; | ||
105 | struct pmu *pmu; /* pointer to rapl_pmu_class */ | ||
106 | ktime_t timer_interval; /* in ktime_t unit */ | ||
107 | struct hrtimer hrtimer; | ||
108 | }; | ||
109 | |||
110 | static struct pmu rapl_pmu_class; | ||
111 | static cpumask_t rapl_cpu_mask; | ||
112 | static int rapl_cntr_mask; | ||
113 | |||
114 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu); | ||
115 | static DEFINE_PER_CPU(struct rapl_pmu *, rapl_pmu_to_free); | ||
116 | |||
117 | static inline u64 rapl_read_counter(struct perf_event *event) | ||
118 | { | ||
119 | u64 raw; | ||
120 | rdmsrl(event->hw.event_base, raw); | ||
121 | return raw; | ||
122 | } | ||
123 | |||
124 | static inline u64 rapl_scale(u64 v) | ||
125 | { | ||
126 | /* | ||
127 | * scale delta to smallest unit (1/2^32) | ||
128 | * users must then scale back: count * 1/(1e9*2^32) to get Joules | ||
129 | * or use ldexp(count, -32). | ||
130 | * Watts = Joules/Time delta | ||
131 | */ | ||
132 | return v << (32 - __get_cpu_var(rapl_pmu)->hw_unit); | ||
133 | } | ||
134 | |||
135 | static u64 rapl_event_update(struct perf_event *event) | ||
136 | { | ||
137 | struct hw_perf_event *hwc = &event->hw; | ||
138 | u64 prev_raw_count, new_raw_count; | ||
139 | s64 delta, sdelta; | ||
140 | int shift = RAPL_CNTR_WIDTH; | ||
141 | |||
142 | again: | ||
143 | prev_raw_count = local64_read(&hwc->prev_count); | ||
144 | rdmsrl(event->hw.event_base, new_raw_count); | ||
145 | |||
146 | if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, | ||
147 | new_raw_count) != prev_raw_count) { | ||
148 | cpu_relax(); | ||
149 | goto again; | ||
150 | } | ||
151 | |||
152 | /* | ||
153 | * Now we have the new raw value and have updated the prev | ||
154 | * timestamp already. We can now calculate the elapsed delta | ||
155 | * (event-)time and add that to the generic event. | ||
156 | * | ||
157 | * Careful, not all hw sign-extends above the physical width | ||
158 | * of the count. | ||
159 | */ | ||
160 | delta = (new_raw_count << shift) - (prev_raw_count << shift); | ||
161 | delta >>= shift; | ||
162 | |||
163 | sdelta = rapl_scale(delta); | ||
164 | |||
165 | local64_add(sdelta, &event->count); | ||
166 | |||
167 | return new_raw_count; | ||
168 | } | ||
169 | |||
170 | static void rapl_start_hrtimer(struct rapl_pmu *pmu) | ||
171 | { | ||
172 | __hrtimer_start_range_ns(&pmu->hrtimer, | ||
173 | pmu->timer_interval, 0, | ||
174 | HRTIMER_MODE_REL_PINNED, 0); | ||
175 | } | ||
176 | |||
177 | static void rapl_stop_hrtimer(struct rapl_pmu *pmu) | ||
178 | { | ||
179 | hrtimer_cancel(&pmu->hrtimer); | ||
180 | } | ||
181 | |||
182 | static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer) | ||
183 | { | ||
184 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
185 | struct perf_event *event; | ||
186 | unsigned long flags; | ||
187 | |||
188 | if (!pmu->n_active) | ||
189 | return HRTIMER_NORESTART; | ||
190 | |||
191 | spin_lock_irqsave(&pmu->lock, flags); | ||
192 | |||
193 | list_for_each_entry(event, &pmu->active_list, active_entry) { | ||
194 | rapl_event_update(event); | ||
195 | } | ||
196 | |||
197 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
198 | |||
199 | hrtimer_forward_now(hrtimer, pmu->timer_interval); | ||
200 | |||
201 | return HRTIMER_RESTART; | ||
202 | } | ||
203 | |||
204 | static void rapl_hrtimer_init(struct rapl_pmu *pmu) | ||
205 | { | ||
206 | struct hrtimer *hr = &pmu->hrtimer; | ||
207 | |||
208 | hrtimer_init(hr, CLOCK_MONOTONIC, HRTIMER_MODE_REL); | ||
209 | hr->function = rapl_hrtimer_handle; | ||
210 | } | ||
211 | |||
212 | static void __rapl_pmu_event_start(struct rapl_pmu *pmu, | ||
213 | struct perf_event *event) | ||
214 | { | ||
215 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
216 | return; | ||
217 | |||
218 | event->hw.state = 0; | ||
219 | |||
220 | list_add_tail(&event->active_entry, &pmu->active_list); | ||
221 | |||
222 | local64_set(&event->hw.prev_count, rapl_read_counter(event)); | ||
223 | |||
224 | pmu->n_active++; | ||
225 | if (pmu->n_active == 1) | ||
226 | rapl_start_hrtimer(pmu); | ||
227 | } | ||
228 | |||
229 | static void rapl_pmu_event_start(struct perf_event *event, int mode) | ||
230 | { | ||
231 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
232 | unsigned long flags; | ||
233 | |||
234 | spin_lock_irqsave(&pmu->lock, flags); | ||
235 | __rapl_pmu_event_start(pmu, event); | ||
236 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
237 | } | ||
238 | |||
239 | static void rapl_pmu_event_stop(struct perf_event *event, int mode) | ||
240 | { | ||
241 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
242 | struct hw_perf_event *hwc = &event->hw; | ||
243 | unsigned long flags; | ||
244 | |||
245 | spin_lock_irqsave(&pmu->lock, flags); | ||
246 | |||
247 | /* mark event as deactivated and stopped */ | ||
248 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
249 | WARN_ON_ONCE(pmu->n_active <= 0); | ||
250 | pmu->n_active--; | ||
251 | if (pmu->n_active == 0) | ||
252 | rapl_stop_hrtimer(pmu); | ||
253 | |||
254 | list_del(&event->active_entry); | ||
255 | |||
256 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
257 | hwc->state |= PERF_HES_STOPPED; | ||
258 | } | ||
259 | |||
260 | /* check if update of sw counter is necessary */ | ||
261 | if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
262 | /* | ||
263 | * Drain the remaining delta count out of a event | ||
264 | * that we are disabling: | ||
265 | */ | ||
266 | rapl_event_update(event); | ||
267 | hwc->state |= PERF_HES_UPTODATE; | ||
268 | } | ||
269 | |||
270 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
271 | } | ||
272 | |||
273 | static int rapl_pmu_event_add(struct perf_event *event, int mode) | ||
274 | { | ||
275 | struct rapl_pmu *pmu = __get_cpu_var(rapl_pmu); | ||
276 | struct hw_perf_event *hwc = &event->hw; | ||
277 | unsigned long flags; | ||
278 | |||
279 | spin_lock_irqsave(&pmu->lock, flags); | ||
280 | |||
281 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
282 | |||
283 | if (mode & PERF_EF_START) | ||
284 | __rapl_pmu_event_start(pmu, event); | ||
285 | |||
286 | spin_unlock_irqrestore(&pmu->lock, flags); | ||
287 | |||
288 | return 0; | ||
289 | } | ||
290 | |||
291 | static void rapl_pmu_event_del(struct perf_event *event, int flags) | ||
292 | { | ||
293 | rapl_pmu_event_stop(event, PERF_EF_UPDATE); | ||
294 | } | ||
295 | |||
296 | static int rapl_pmu_event_init(struct perf_event *event) | ||
297 | { | ||
298 | u64 cfg = event->attr.config & RAPL_EVENT_MASK; | ||
299 | int bit, msr, ret = 0; | ||
300 | |||
301 | /* only look at RAPL events */ | ||
302 | if (event->attr.type != rapl_pmu_class.type) | ||
303 | return -ENOENT; | ||
304 | |||
305 | /* check only supported bits are set */ | ||
306 | if (event->attr.config & ~RAPL_EVENT_MASK) | ||
307 | return -EINVAL; | ||
308 | |||
309 | /* | ||
310 | * check event is known (determines counter) | ||
311 | */ | ||
312 | switch (cfg) { | ||
313 | case INTEL_RAPL_PP0: | ||
314 | bit = RAPL_IDX_PP0_NRG_STAT; | ||
315 | msr = MSR_PP0_ENERGY_STATUS; | ||
316 | break; | ||
317 | case INTEL_RAPL_PKG: | ||
318 | bit = RAPL_IDX_PKG_NRG_STAT; | ||
319 | msr = MSR_PKG_ENERGY_STATUS; | ||
320 | break; | ||
321 | case INTEL_RAPL_RAM: | ||
322 | bit = RAPL_IDX_RAM_NRG_STAT; | ||
323 | msr = MSR_DRAM_ENERGY_STATUS; | ||
324 | break; | ||
325 | case INTEL_RAPL_PP1: | ||
326 | bit = RAPL_IDX_PP1_NRG_STAT; | ||
327 | msr = MSR_PP1_ENERGY_STATUS; | ||
328 | break; | ||
329 | default: | ||
330 | return -EINVAL; | ||
331 | } | ||
332 | /* check event supported */ | ||
333 | if (!(rapl_cntr_mask & (1 << bit))) | ||
334 | return -EINVAL; | ||
335 | |||
336 | /* unsupported modes and filters */ | ||
337 | if (event->attr.exclude_user || | ||
338 | event->attr.exclude_kernel || | ||
339 | event->attr.exclude_hv || | ||
340 | event->attr.exclude_idle || | ||
341 | event->attr.exclude_host || | ||
342 | event->attr.exclude_guest || | ||
343 | event->attr.sample_period) /* no sampling */ | ||
344 | return -EINVAL; | ||
345 | |||
346 | /* must be done before validate_group */ | ||
347 | event->hw.event_base = msr; | ||
348 | event->hw.config = cfg; | ||
349 | event->hw.idx = bit; | ||
350 | |||
351 | return ret; | ||
352 | } | ||
353 | |||
354 | static void rapl_pmu_event_read(struct perf_event *event) | ||
355 | { | ||
356 | rapl_event_update(event); | ||
357 | } | ||
358 | |||
359 | static ssize_t rapl_get_attr_cpumask(struct device *dev, | ||
360 | struct device_attribute *attr, char *buf) | ||
361 | { | ||
362 | int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask); | ||
363 | |||
364 | buf[n++] = '\n'; | ||
365 | buf[n] = '\0'; | ||
366 | return n; | ||
367 | } | ||
368 | |||
369 | static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL); | ||
370 | |||
371 | static struct attribute *rapl_pmu_attrs[] = { | ||
372 | &dev_attr_cpumask.attr, | ||
373 | NULL, | ||
374 | }; | ||
375 | |||
376 | static struct attribute_group rapl_pmu_attr_group = { | ||
377 | .attrs = rapl_pmu_attrs, | ||
378 | }; | ||
379 | |||
380 | EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01"); | ||
381 | EVENT_ATTR_STR(energy-pkg , rapl_pkg, "event=0x02"); | ||
382 | EVENT_ATTR_STR(energy-ram , rapl_ram, "event=0x03"); | ||
383 | EVENT_ATTR_STR(energy-gpu , rapl_gpu, "event=0x04"); | ||
384 | |||
385 | EVENT_ATTR_STR(energy-cores.unit, rapl_cores_unit, "Joules"); | ||
386 | EVENT_ATTR_STR(energy-pkg.unit , rapl_pkg_unit, "Joules"); | ||
387 | EVENT_ATTR_STR(energy-ram.unit , rapl_ram_unit, "Joules"); | ||
388 | EVENT_ATTR_STR(energy-gpu.unit , rapl_gpu_unit, "Joules"); | ||
389 | |||
390 | /* | ||
391 | * we compute in 0.23 nJ increments regardless of MSR | ||
392 | */ | ||
393 | EVENT_ATTR_STR(energy-cores.scale, rapl_cores_scale, "2.3283064365386962890625e-10"); | ||
394 | EVENT_ATTR_STR(energy-pkg.scale, rapl_pkg_scale, "2.3283064365386962890625e-10"); | ||
395 | EVENT_ATTR_STR(energy-ram.scale, rapl_ram_scale, "2.3283064365386962890625e-10"); | ||
396 | EVENT_ATTR_STR(energy-gpu.scale, rapl_gpu_scale, "2.3283064365386962890625e-10"); | ||
397 | |||
398 | static struct attribute *rapl_events_srv_attr[] = { | ||
399 | EVENT_PTR(rapl_cores), | ||
400 | EVENT_PTR(rapl_pkg), | ||
401 | EVENT_PTR(rapl_ram), | ||
402 | |||
403 | EVENT_PTR(rapl_cores_unit), | ||
404 | EVENT_PTR(rapl_pkg_unit), | ||
405 | EVENT_PTR(rapl_ram_unit), | ||
406 | |||
407 | EVENT_PTR(rapl_cores_scale), | ||
408 | EVENT_PTR(rapl_pkg_scale), | ||
409 | EVENT_PTR(rapl_ram_scale), | ||
410 | NULL, | ||
411 | }; | ||
412 | |||
413 | static struct attribute *rapl_events_cln_attr[] = { | ||
414 | EVENT_PTR(rapl_cores), | ||
415 | EVENT_PTR(rapl_pkg), | ||
416 | EVENT_PTR(rapl_gpu), | ||
417 | |||
418 | EVENT_PTR(rapl_cores_unit), | ||
419 | EVENT_PTR(rapl_pkg_unit), | ||
420 | EVENT_PTR(rapl_gpu_unit), | ||
421 | |||
422 | EVENT_PTR(rapl_cores_scale), | ||
423 | EVENT_PTR(rapl_pkg_scale), | ||
424 | EVENT_PTR(rapl_gpu_scale), | ||
425 | NULL, | ||
426 | }; | ||
427 | |||
428 | static struct attribute_group rapl_pmu_events_group = { | ||
429 | .name = "events", | ||
430 | .attrs = NULL, /* patched at runtime */ | ||
431 | }; | ||
432 | |||
433 | DEFINE_RAPL_FORMAT_ATTR(event, event, "config:0-7"); | ||
434 | static struct attribute *rapl_formats_attr[] = { | ||
435 | &format_attr_event.attr, | ||
436 | NULL, | ||
437 | }; | ||
438 | |||
439 | static struct attribute_group rapl_pmu_format_group = { | ||
440 | .name = "format", | ||
441 | .attrs = rapl_formats_attr, | ||
442 | }; | ||
443 | |||
444 | const struct attribute_group *rapl_attr_groups[] = { | ||
445 | &rapl_pmu_attr_group, | ||
446 | &rapl_pmu_format_group, | ||
447 | &rapl_pmu_events_group, | ||
448 | NULL, | ||
449 | }; | ||
450 | |||
451 | static struct pmu rapl_pmu_class = { | ||
452 | .attr_groups = rapl_attr_groups, | ||
453 | .task_ctx_nr = perf_invalid_context, /* system-wide only */ | ||
454 | .event_init = rapl_pmu_event_init, | ||
455 | .add = rapl_pmu_event_add, /* must have */ | ||
456 | .del = rapl_pmu_event_del, /* must have */ | ||
457 | .start = rapl_pmu_event_start, | ||
458 | .stop = rapl_pmu_event_stop, | ||
459 | .read = rapl_pmu_event_read, | ||
460 | }; | ||
461 | |||
462 | static void rapl_cpu_exit(int cpu) | ||
463 | { | ||
464 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
465 | int i, phys_id = topology_physical_package_id(cpu); | ||
466 | int target = -1; | ||
467 | |||
468 | /* find a new cpu on same package */ | ||
469 | for_each_online_cpu(i) { | ||
470 | if (i == cpu) | ||
471 | continue; | ||
472 | if (phys_id == topology_physical_package_id(i)) { | ||
473 | target = i; | ||
474 | break; | ||
475 | } | ||
476 | } | ||
477 | /* | ||
478 | * clear cpu from cpumask | ||
479 | * if was set in cpumask and still some cpu on package, | ||
480 | * then move to new cpu | ||
481 | */ | ||
482 | if (cpumask_test_and_clear_cpu(cpu, &rapl_cpu_mask) && target >= 0) | ||
483 | cpumask_set_cpu(target, &rapl_cpu_mask); | ||
484 | |||
485 | WARN_ON(cpumask_empty(&rapl_cpu_mask)); | ||
486 | /* | ||
487 | * migrate events and context to new cpu | ||
488 | */ | ||
489 | if (target >= 0) | ||
490 | perf_pmu_migrate_context(pmu->pmu, cpu, target); | ||
491 | |||
492 | /* cancel overflow polling timer for CPU */ | ||
493 | rapl_stop_hrtimer(pmu); | ||
494 | } | ||
495 | |||
496 | static void rapl_cpu_init(int cpu) | ||
497 | { | ||
498 | int i, phys_id = topology_physical_package_id(cpu); | ||
499 | |||
500 | /* check if phys_is is already covered */ | ||
501 | for_each_cpu(i, &rapl_cpu_mask) { | ||
502 | if (phys_id == topology_physical_package_id(i)) | ||
503 | return; | ||
504 | } | ||
505 | /* was not found, so add it */ | ||
506 | cpumask_set_cpu(cpu, &rapl_cpu_mask); | ||
507 | } | ||
508 | |||
509 | static int rapl_cpu_prepare(int cpu) | ||
510 | { | ||
511 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
512 | int phys_id = topology_physical_package_id(cpu); | ||
513 | u64 ms; | ||
514 | |||
515 | if (pmu) | ||
516 | return 0; | ||
517 | |||
518 | if (phys_id < 0) | ||
519 | return -1; | ||
520 | |||
521 | pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); | ||
522 | if (!pmu) | ||
523 | return -1; | ||
524 | |||
525 | spin_lock_init(&pmu->lock); | ||
526 | |||
527 | INIT_LIST_HEAD(&pmu->active_list); | ||
528 | |||
529 | /* | ||
530 | * grab power unit as: 1/2^unit Joules | ||
531 | * | ||
532 | * we cache in local PMU instance | ||
533 | */ | ||
534 | rdmsrl(MSR_RAPL_POWER_UNIT, pmu->hw_unit); | ||
535 | pmu->hw_unit = (pmu->hw_unit >> 8) & 0x1FULL; | ||
536 | pmu->pmu = &rapl_pmu_class; | ||
537 | |||
538 | /* | ||
539 | * use reference of 200W for scaling the timeout | ||
540 | * to avoid missing counter overflows. | ||
541 | * 200W = 200 Joules/sec | ||
542 | * divide interval by 2 to avoid lockstep (2 * 100) | ||
543 | * if hw unit is 32, then we use 2 ms 1/200/2 | ||
544 | */ | ||
545 | if (pmu->hw_unit < 32) | ||
546 | ms = (1000 / (2 * 100)) * (1ULL << (32 - pmu->hw_unit - 1)); | ||
547 | else | ||
548 | ms = 2; | ||
549 | |||
550 | pmu->timer_interval = ms_to_ktime(ms); | ||
551 | |||
552 | rapl_hrtimer_init(pmu); | ||
553 | |||
554 | /* set RAPL pmu for this cpu for now */ | ||
555 | per_cpu(rapl_pmu, cpu) = pmu; | ||
556 | per_cpu(rapl_pmu_to_free, cpu) = NULL; | ||
557 | |||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | static void rapl_cpu_kfree(int cpu) | ||
562 | { | ||
563 | struct rapl_pmu *pmu = per_cpu(rapl_pmu_to_free, cpu); | ||
564 | |||
565 | kfree(pmu); | ||
566 | |||
567 | per_cpu(rapl_pmu_to_free, cpu) = NULL; | ||
568 | } | ||
569 | |||
570 | static int rapl_cpu_dying(int cpu) | ||
571 | { | ||
572 | struct rapl_pmu *pmu = per_cpu(rapl_pmu, cpu); | ||
573 | |||
574 | if (!pmu) | ||
575 | return 0; | ||
576 | |||
577 | per_cpu(rapl_pmu, cpu) = NULL; | ||
578 | |||
579 | per_cpu(rapl_pmu_to_free, cpu) = pmu; | ||
580 | |||
581 | return 0; | ||
582 | } | ||
583 | |||
584 | static int rapl_cpu_notifier(struct notifier_block *self, | ||
585 | unsigned long action, void *hcpu) | ||
586 | { | ||
587 | unsigned int cpu = (long)hcpu; | ||
588 | |||
589 | switch (action & ~CPU_TASKS_FROZEN) { | ||
590 | case CPU_UP_PREPARE: | ||
591 | rapl_cpu_prepare(cpu); | ||
592 | break; | ||
593 | case CPU_STARTING: | ||
594 | rapl_cpu_init(cpu); | ||
595 | break; | ||
596 | case CPU_UP_CANCELED: | ||
597 | case CPU_DYING: | ||
598 | rapl_cpu_dying(cpu); | ||
599 | break; | ||
600 | case CPU_ONLINE: | ||
601 | case CPU_DEAD: | ||
602 | rapl_cpu_kfree(cpu); | ||
603 | break; | ||
604 | case CPU_DOWN_PREPARE: | ||
605 | rapl_cpu_exit(cpu); | ||
606 | break; | ||
607 | default: | ||
608 | break; | ||
609 | } | ||
610 | |||
611 | return NOTIFY_OK; | ||
612 | } | ||
613 | |||
614 | static const struct x86_cpu_id rapl_cpu_match[] = { | ||
615 | [0] = { .vendor = X86_VENDOR_INTEL, .family = 6 }, | ||
616 | [1] = {}, | ||
617 | }; | ||
618 | |||
619 | static int __init rapl_pmu_init(void) | ||
620 | { | ||
621 | struct rapl_pmu *pmu; | ||
622 | int cpu, ret; | ||
623 | |||
624 | /* | ||
625 | * check for Intel processor family 6 | ||
626 | */ | ||
627 | if (!x86_match_cpu(rapl_cpu_match)) | ||
628 | return 0; | ||
629 | |||
630 | /* check supported CPU */ | ||
631 | switch (boot_cpu_data.x86_model) { | ||
632 | case 42: /* Sandy Bridge */ | ||
633 | case 58: /* Ivy Bridge */ | ||
634 | case 60: /* Haswell */ | ||
635 | case 69: /* Haswell-Celeron */ | ||
636 | rapl_cntr_mask = RAPL_IDX_CLN; | ||
637 | rapl_pmu_events_group.attrs = rapl_events_cln_attr; | ||
638 | break; | ||
639 | case 45: /* Sandy Bridge-EP */ | ||
640 | case 62: /* IvyTown */ | ||
641 | rapl_cntr_mask = RAPL_IDX_SRV; | ||
642 | rapl_pmu_events_group.attrs = rapl_events_srv_attr; | ||
643 | break; | ||
644 | |||
645 | default: | ||
646 | /* unsupported */ | ||
647 | return 0; | ||
648 | } | ||
649 | |||
650 | cpu_notifier_register_begin(); | ||
651 | |||
652 | for_each_online_cpu(cpu) { | ||
653 | rapl_cpu_prepare(cpu); | ||
654 | rapl_cpu_init(cpu); | ||
655 | } | ||
656 | |||
657 | __perf_cpu_notifier(rapl_cpu_notifier); | ||
658 | |||
659 | ret = perf_pmu_register(&rapl_pmu_class, "power", -1); | ||
660 | if (WARN_ON(ret)) { | ||
661 | pr_info("RAPL PMU detected, registration failed (%d), RAPL PMU disabled\n", ret); | ||
662 | cpu_notifier_register_done(); | ||
663 | return -1; | ||
664 | } | ||
665 | |||
666 | pmu = __get_cpu_var(rapl_pmu); | ||
667 | |||
668 | pr_info("RAPL PMU detected, hw unit 2^-%d Joules," | ||
669 | " API unit is 2^-32 Joules," | ||
670 | " %d fixed counters" | ||
671 | " %llu ms ovfl timer\n", | ||
672 | pmu->hw_unit, | ||
673 | hweight32(rapl_cntr_mask), | ||
674 | ktime_to_ms(pmu->timer_interval)); | ||
675 | |||
676 | cpu_notifier_register_done(); | ||
677 | |||
678 | return 0; | ||
679 | } | ||
680 | device_initcall(rapl_pmu_init); | ||
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c index 29c248799ced..65bbbea38b9c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c | |||
@@ -66,6 +66,47 @@ DEFINE_UNCORE_FORMAT_ATTR(mask_vnw, mask_vnw, "config2:3-4"); | |||
66 | DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); | 66 | DEFINE_UNCORE_FORMAT_ATTR(mask0, mask0, "config2:0-31"); |
67 | DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); | 67 | DEFINE_UNCORE_FORMAT_ATTR(mask1, mask1, "config2:32-63"); |
68 | 68 | ||
69 | static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); | ||
70 | static void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); | ||
71 | static void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); | ||
72 | static void uncore_pmu_event_read(struct perf_event *event); | ||
73 | |||
74 | static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) | ||
75 | { | ||
76 | return container_of(event->pmu, struct intel_uncore_pmu, pmu); | ||
77 | } | ||
78 | |||
79 | static struct intel_uncore_box * | ||
80 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | ||
81 | { | ||
82 | struct intel_uncore_box *box; | ||
83 | |||
84 | box = *per_cpu_ptr(pmu->box, cpu); | ||
85 | if (box) | ||
86 | return box; | ||
87 | |||
88 | raw_spin_lock(&uncore_box_lock); | ||
89 | list_for_each_entry(box, &pmu->box_list, list) { | ||
90 | if (box->phys_id == topology_physical_package_id(cpu)) { | ||
91 | atomic_inc(&box->refcnt); | ||
92 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
93 | break; | ||
94 | } | ||
95 | } | ||
96 | raw_spin_unlock(&uncore_box_lock); | ||
97 | |||
98 | return *per_cpu_ptr(pmu->box, cpu); | ||
99 | } | ||
100 | |||
101 | static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) | ||
102 | { | ||
103 | /* | ||
104 | * perf core schedules event on the basis of cpu, uncore events are | ||
105 | * collected by one of the cpus inside a physical package. | ||
106 | */ | ||
107 | return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); | ||
108 | } | ||
109 | |||
69 | static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) | 110 | static u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) |
70 | { | 111 | { |
71 | u64 count; | 112 | u64 count; |
@@ -501,8 +542,11 @@ static struct extra_reg snbep_uncore_cbox_extra_regs[] = { | |||
501 | SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, | 542 | SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, |
502 | SNBEP_CBO_PMON_CTL_TID_EN, 0x1), | 543 | SNBEP_CBO_PMON_CTL_TID_EN, 0x1), |
503 | SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), | 544 | SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), |
545 | SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0x6), | ||
504 | SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), | 546 | SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), |
547 | SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0x6), | ||
505 | SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), | 548 | SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), |
549 | SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0x6), | ||
506 | SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), | 550 | SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0x6), |
507 | SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), | 551 | SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x8), |
508 | SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), | 552 | SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x8), |
@@ -1178,10 +1222,15 @@ static struct extra_reg ivt_uncore_cbox_extra_regs[] = { | |||
1178 | SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, | 1222 | SNBEP_CBO_EVENT_EXTRA_REG(SNBEP_CBO_PMON_CTL_TID_EN, |
1179 | SNBEP_CBO_PMON_CTL_TID_EN, 0x1), | 1223 | SNBEP_CBO_PMON_CTL_TID_EN, 0x1), |
1180 | SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), | 1224 | SNBEP_CBO_EVENT_EXTRA_REG(0x1031, 0x10ff, 0x2), |
1225 | SNBEP_CBO_EVENT_EXTRA_REG(0x1134, 0xffff, 0x4), | ||
1226 | SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), | ||
1227 | SNBEP_CBO_EVENT_EXTRA_REG(0x5134, 0xffff, 0xc), | ||
1181 | SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), | 1228 | SNBEP_CBO_EVENT_EXTRA_REG(0x0334, 0xffff, 0x4), |
1229 | SNBEP_CBO_EVENT_EXTRA_REG(0x4334, 0xffff, 0xc), | ||
1182 | SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), | 1230 | SNBEP_CBO_EVENT_EXTRA_REG(0x0534, 0xffff, 0x4), |
1231 | SNBEP_CBO_EVENT_EXTRA_REG(0x4534, 0xffff, 0xc), | ||
1183 | SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), | 1232 | SNBEP_CBO_EVENT_EXTRA_REG(0x0934, 0xffff, 0x4), |
1184 | SNBEP_CBO_EVENT_EXTRA_REG(0x4134, 0xffff, 0xc), | 1233 | SNBEP_CBO_EVENT_EXTRA_REG(0x4934, 0xffff, 0xc), |
1185 | SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), | 1234 | SNBEP_CBO_EVENT_EXTRA_REG(0x0135, 0xffff, 0x10), |
1186 | SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), | 1235 | SNBEP_CBO_EVENT_EXTRA_REG(0x0335, 0xffff, 0x10), |
1187 | SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), | 1236 | SNBEP_CBO_EVENT_EXTRA_REG(0x2135, 0xffff, 0x10), |
@@ -1631,6 +1680,349 @@ static struct intel_uncore_type *snb_msr_uncores[] = { | |||
1631 | &snb_uncore_cbox, | 1680 | &snb_uncore_cbox, |
1632 | NULL, | 1681 | NULL, |
1633 | }; | 1682 | }; |
1683 | |||
1684 | enum { | ||
1685 | SNB_PCI_UNCORE_IMC, | ||
1686 | }; | ||
1687 | |||
1688 | static struct uncore_event_desc snb_uncore_imc_events[] = { | ||
1689 | INTEL_UNCORE_EVENT_DESC(data_reads, "event=0x01"), | ||
1690 | INTEL_UNCORE_EVENT_DESC(data_reads.scale, "6.103515625e-5"), | ||
1691 | INTEL_UNCORE_EVENT_DESC(data_reads.unit, "MiB"), | ||
1692 | |||
1693 | INTEL_UNCORE_EVENT_DESC(data_writes, "event=0x02"), | ||
1694 | INTEL_UNCORE_EVENT_DESC(data_writes.scale, "6.103515625e-5"), | ||
1695 | INTEL_UNCORE_EVENT_DESC(data_writes.unit, "MiB"), | ||
1696 | |||
1697 | { /* end: all zeroes */ }, | ||
1698 | }; | ||
1699 | |||
1700 | #define SNB_UNCORE_PCI_IMC_EVENT_MASK 0xff | ||
1701 | #define SNB_UNCORE_PCI_IMC_BAR_OFFSET 0x48 | ||
1702 | |||
1703 | /* page size multiple covering all config regs */ | ||
1704 | #define SNB_UNCORE_PCI_IMC_MAP_SIZE 0x6000 | ||
1705 | |||
1706 | #define SNB_UNCORE_PCI_IMC_DATA_READS 0x1 | ||
1707 | #define SNB_UNCORE_PCI_IMC_DATA_READS_BASE 0x5050 | ||
1708 | #define SNB_UNCORE_PCI_IMC_DATA_WRITES 0x2 | ||
1709 | #define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 | ||
1710 | #define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE | ||
1711 | |||
1712 | static struct attribute *snb_uncore_imc_formats_attr[] = { | ||
1713 | &format_attr_event.attr, | ||
1714 | NULL, | ||
1715 | }; | ||
1716 | |||
1717 | static struct attribute_group snb_uncore_imc_format_group = { | ||
1718 | .name = "format", | ||
1719 | .attrs = snb_uncore_imc_formats_attr, | ||
1720 | }; | ||
1721 | |||
1722 | static void snb_uncore_imc_init_box(struct intel_uncore_box *box) | ||
1723 | { | ||
1724 | struct pci_dev *pdev = box->pci_dev; | ||
1725 | int where = SNB_UNCORE_PCI_IMC_BAR_OFFSET; | ||
1726 | resource_size_t addr; | ||
1727 | u32 pci_dword; | ||
1728 | |||
1729 | pci_read_config_dword(pdev, where, &pci_dword); | ||
1730 | addr = pci_dword; | ||
1731 | |||
1732 | #ifdef CONFIG_PHYS_ADDR_T_64BIT | ||
1733 | pci_read_config_dword(pdev, where + 4, &pci_dword); | ||
1734 | addr |= ((resource_size_t)pci_dword << 32); | ||
1735 | #endif | ||
1736 | |||
1737 | addr &= ~(PAGE_SIZE - 1); | ||
1738 | |||
1739 | box->io_addr = ioremap(addr, SNB_UNCORE_PCI_IMC_MAP_SIZE); | ||
1740 | box->hrtimer_duration = UNCORE_SNB_IMC_HRTIMER_INTERVAL; | ||
1741 | } | ||
1742 | |||
1743 | static void snb_uncore_imc_enable_box(struct intel_uncore_box *box) | ||
1744 | {} | ||
1745 | |||
1746 | static void snb_uncore_imc_disable_box(struct intel_uncore_box *box) | ||
1747 | {} | ||
1748 | |||
1749 | static void snb_uncore_imc_enable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1750 | {} | ||
1751 | |||
1752 | static void snb_uncore_imc_disable_event(struct intel_uncore_box *box, struct perf_event *event) | ||
1753 | {} | ||
1754 | |||
1755 | static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf_event *event) | ||
1756 | { | ||
1757 | struct hw_perf_event *hwc = &event->hw; | ||
1758 | |||
1759 | return (u64)*(unsigned int *)(box->io_addr + hwc->event_base); | ||
1760 | } | ||
1761 | |||
1762 | /* | ||
1763 | * custom event_init() function because we define our own fixed, free | ||
1764 | * running counters, so we do not want to conflict with generic uncore | ||
1765 | * logic. Also simplifies processing | ||
1766 | */ | ||
1767 | static int snb_uncore_imc_event_init(struct perf_event *event) | ||
1768 | { | ||
1769 | struct intel_uncore_pmu *pmu; | ||
1770 | struct intel_uncore_box *box; | ||
1771 | struct hw_perf_event *hwc = &event->hw; | ||
1772 | u64 cfg = event->attr.config & SNB_UNCORE_PCI_IMC_EVENT_MASK; | ||
1773 | int idx, base; | ||
1774 | |||
1775 | if (event->attr.type != event->pmu->type) | ||
1776 | return -ENOENT; | ||
1777 | |||
1778 | pmu = uncore_event_to_pmu(event); | ||
1779 | /* no device found for this pmu */ | ||
1780 | if (pmu->func_id < 0) | ||
1781 | return -ENOENT; | ||
1782 | |||
1783 | /* Sampling not supported yet */ | ||
1784 | if (hwc->sample_period) | ||
1785 | return -EINVAL; | ||
1786 | |||
1787 | /* unsupported modes and filters */ | ||
1788 | if (event->attr.exclude_user || | ||
1789 | event->attr.exclude_kernel || | ||
1790 | event->attr.exclude_hv || | ||
1791 | event->attr.exclude_idle || | ||
1792 | event->attr.exclude_host || | ||
1793 | event->attr.exclude_guest || | ||
1794 | event->attr.sample_period) /* no sampling */ | ||
1795 | return -EINVAL; | ||
1796 | |||
1797 | /* | ||
1798 | * Place all uncore events for a particular physical package | ||
1799 | * onto a single cpu | ||
1800 | */ | ||
1801 | if (event->cpu < 0) | ||
1802 | return -EINVAL; | ||
1803 | |||
1804 | /* check only supported bits are set */ | ||
1805 | if (event->attr.config & ~SNB_UNCORE_PCI_IMC_EVENT_MASK) | ||
1806 | return -EINVAL; | ||
1807 | |||
1808 | box = uncore_pmu_to_box(pmu, event->cpu); | ||
1809 | if (!box || box->cpu < 0) | ||
1810 | return -EINVAL; | ||
1811 | |||
1812 | event->cpu = box->cpu; | ||
1813 | |||
1814 | event->hw.idx = -1; | ||
1815 | event->hw.last_tag = ~0ULL; | ||
1816 | event->hw.extra_reg.idx = EXTRA_REG_NONE; | ||
1817 | event->hw.branch_reg.idx = EXTRA_REG_NONE; | ||
1818 | /* | ||
1819 | * check event is known (whitelist, determines counter) | ||
1820 | */ | ||
1821 | switch (cfg) { | ||
1822 | case SNB_UNCORE_PCI_IMC_DATA_READS: | ||
1823 | base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; | ||
1824 | idx = UNCORE_PMC_IDX_FIXED; | ||
1825 | break; | ||
1826 | case SNB_UNCORE_PCI_IMC_DATA_WRITES: | ||
1827 | base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; | ||
1828 | idx = UNCORE_PMC_IDX_FIXED + 1; | ||
1829 | break; | ||
1830 | default: | ||
1831 | return -EINVAL; | ||
1832 | } | ||
1833 | |||
1834 | /* must be done before validate_group */ | ||
1835 | event->hw.event_base = base; | ||
1836 | event->hw.config = cfg; | ||
1837 | event->hw.idx = idx; | ||
1838 | |||
1839 | /* no group validation needed, we have free running counters */ | ||
1840 | |||
1841 | return 0; | ||
1842 | } | ||
1843 | |||
1844 | static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_event *event) | ||
1845 | { | ||
1846 | return 0; | ||
1847 | } | ||
1848 | |||
1849 | static void snb_uncore_imc_event_start(struct perf_event *event, int flags) | ||
1850 | { | ||
1851 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1852 | u64 count; | ||
1853 | |||
1854 | if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) | ||
1855 | return; | ||
1856 | |||
1857 | event->hw.state = 0; | ||
1858 | box->n_active++; | ||
1859 | |||
1860 | list_add_tail(&event->active_entry, &box->active_list); | ||
1861 | |||
1862 | count = snb_uncore_imc_read_counter(box, event); | ||
1863 | local64_set(&event->hw.prev_count, count); | ||
1864 | |||
1865 | if (box->n_active == 1) | ||
1866 | uncore_pmu_start_hrtimer(box); | ||
1867 | } | ||
1868 | |||
1869 | static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) | ||
1870 | { | ||
1871 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1872 | struct hw_perf_event *hwc = &event->hw; | ||
1873 | |||
1874 | if (!(hwc->state & PERF_HES_STOPPED)) { | ||
1875 | box->n_active--; | ||
1876 | |||
1877 | WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); | ||
1878 | hwc->state |= PERF_HES_STOPPED; | ||
1879 | |||
1880 | list_del(&event->active_entry); | ||
1881 | |||
1882 | if (box->n_active == 0) | ||
1883 | uncore_pmu_cancel_hrtimer(box); | ||
1884 | } | ||
1885 | |||
1886 | if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { | ||
1887 | /* | ||
1888 | * Drain the remaining delta count out of a event | ||
1889 | * that we are disabling: | ||
1890 | */ | ||
1891 | uncore_perf_event_update(box, event); | ||
1892 | hwc->state |= PERF_HES_UPTODATE; | ||
1893 | } | ||
1894 | } | ||
1895 | |||
1896 | static int snb_uncore_imc_event_add(struct perf_event *event, int flags) | ||
1897 | { | ||
1898 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1899 | struct hw_perf_event *hwc = &event->hw; | ||
1900 | |||
1901 | if (!box) | ||
1902 | return -ENODEV; | ||
1903 | |||
1904 | hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; | ||
1905 | if (!(flags & PERF_EF_START)) | ||
1906 | hwc->state |= PERF_HES_ARCH; | ||
1907 | |||
1908 | snb_uncore_imc_event_start(event, 0); | ||
1909 | |||
1910 | box->n_events++; | ||
1911 | |||
1912 | return 0; | ||
1913 | } | ||
1914 | |||
1915 | static void snb_uncore_imc_event_del(struct perf_event *event, int flags) | ||
1916 | { | ||
1917 | struct intel_uncore_box *box = uncore_event_to_box(event); | ||
1918 | int i; | ||
1919 | |||
1920 | snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); | ||
1921 | |||
1922 | for (i = 0; i < box->n_events; i++) { | ||
1923 | if (event == box->event_list[i]) { | ||
1924 | --box->n_events; | ||
1925 | break; | ||
1926 | } | ||
1927 | } | ||
1928 | } | ||
1929 | |||
1930 | static int snb_pci2phy_map_init(int devid) | ||
1931 | { | ||
1932 | struct pci_dev *dev = NULL; | ||
1933 | int bus; | ||
1934 | |||
1935 | dev = pci_get_device(PCI_VENDOR_ID_INTEL, devid, dev); | ||
1936 | if (!dev) | ||
1937 | return -ENOTTY; | ||
1938 | |||
1939 | bus = dev->bus->number; | ||
1940 | |||
1941 | pcibus_to_physid[bus] = 0; | ||
1942 | |||
1943 | pci_dev_put(dev); | ||
1944 | |||
1945 | return 0; | ||
1946 | } | ||
1947 | |||
1948 | static struct pmu snb_uncore_imc_pmu = { | ||
1949 | .task_ctx_nr = perf_invalid_context, | ||
1950 | .event_init = snb_uncore_imc_event_init, | ||
1951 | .add = snb_uncore_imc_event_add, | ||
1952 | .del = snb_uncore_imc_event_del, | ||
1953 | .start = snb_uncore_imc_event_start, | ||
1954 | .stop = snb_uncore_imc_event_stop, | ||
1955 | .read = uncore_pmu_event_read, | ||
1956 | }; | ||
1957 | |||
1958 | static struct intel_uncore_ops snb_uncore_imc_ops = { | ||
1959 | .init_box = snb_uncore_imc_init_box, | ||
1960 | .enable_box = snb_uncore_imc_enable_box, | ||
1961 | .disable_box = snb_uncore_imc_disable_box, | ||
1962 | .disable_event = snb_uncore_imc_disable_event, | ||
1963 | .enable_event = snb_uncore_imc_enable_event, | ||
1964 | .hw_config = snb_uncore_imc_hw_config, | ||
1965 | .read_counter = snb_uncore_imc_read_counter, | ||
1966 | }; | ||
1967 | |||
1968 | static struct intel_uncore_type snb_uncore_imc = { | ||
1969 | .name = "imc", | ||
1970 | .num_counters = 2, | ||
1971 | .num_boxes = 1, | ||
1972 | .fixed_ctr_bits = 32, | ||
1973 | .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, | ||
1974 | .event_descs = snb_uncore_imc_events, | ||
1975 | .format_group = &snb_uncore_imc_format_group, | ||
1976 | .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, | ||
1977 | .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, | ||
1978 | .ops = &snb_uncore_imc_ops, | ||
1979 | .pmu = &snb_uncore_imc_pmu, | ||
1980 | }; | ||
1981 | |||
1982 | static struct intel_uncore_type *snb_pci_uncores[] = { | ||
1983 | [SNB_PCI_UNCORE_IMC] = &snb_uncore_imc, | ||
1984 | NULL, | ||
1985 | }; | ||
1986 | |||
1987 | static DEFINE_PCI_DEVICE_TABLE(snb_uncore_pci_ids) = { | ||
1988 | { /* IMC */ | ||
1989 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SNB_IMC), | ||
1990 | .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), | ||
1991 | }, | ||
1992 | { /* end: all zeroes */ }, | ||
1993 | }; | ||
1994 | |||
1995 | static DEFINE_PCI_DEVICE_TABLE(ivb_uncore_pci_ids) = { | ||
1996 | { /* IMC */ | ||
1997 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC), | ||
1998 | .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), | ||
1999 | }, | ||
2000 | { /* end: all zeroes */ }, | ||
2001 | }; | ||
2002 | |||
2003 | static DEFINE_PCI_DEVICE_TABLE(hsw_uncore_pci_ids) = { | ||
2004 | { /* IMC */ | ||
2005 | PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_HSW_IMC), | ||
2006 | .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), | ||
2007 | }, | ||
2008 | { /* end: all zeroes */ }, | ||
2009 | }; | ||
2010 | |||
2011 | static struct pci_driver snb_uncore_pci_driver = { | ||
2012 | .name = "snb_uncore", | ||
2013 | .id_table = snb_uncore_pci_ids, | ||
2014 | }; | ||
2015 | |||
2016 | static struct pci_driver ivb_uncore_pci_driver = { | ||
2017 | .name = "ivb_uncore", | ||
2018 | .id_table = ivb_uncore_pci_ids, | ||
2019 | }; | ||
2020 | |||
2021 | static struct pci_driver hsw_uncore_pci_driver = { | ||
2022 | .name = "hsw_uncore", | ||
2023 | .id_table = hsw_uncore_pci_ids, | ||
2024 | }; | ||
2025 | |||
1634 | /* end of Sandy Bridge uncore support */ | 2026 | /* end of Sandy Bridge uncore support */ |
1635 | 2027 | ||
1636 | /* Nehalem uncore support */ | 2028 | /* Nehalem uncore support */ |
@@ -2781,6 +3173,7 @@ again: | |||
2781 | static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) | 3173 | static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) |
2782 | { | 3174 | { |
2783 | struct intel_uncore_box *box; | 3175 | struct intel_uncore_box *box; |
3176 | struct perf_event *event; | ||
2784 | unsigned long flags; | 3177 | unsigned long flags; |
2785 | int bit; | 3178 | int bit; |
2786 | 3179 | ||
@@ -2793,19 +3186,27 @@ static enum hrtimer_restart uncore_pmu_hrtimer(struct hrtimer *hrtimer) | |||
2793 | */ | 3186 | */ |
2794 | local_irq_save(flags); | 3187 | local_irq_save(flags); |
2795 | 3188 | ||
3189 | /* | ||
3190 | * handle boxes with an active event list as opposed to active | ||
3191 | * counters | ||
3192 | */ | ||
3193 | list_for_each_entry(event, &box->active_list, active_entry) { | ||
3194 | uncore_perf_event_update(box, event); | ||
3195 | } | ||
3196 | |||
2796 | for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) | 3197 | for_each_set_bit(bit, box->active_mask, UNCORE_PMC_IDX_MAX) |
2797 | uncore_perf_event_update(box, box->events[bit]); | 3198 | uncore_perf_event_update(box, box->events[bit]); |
2798 | 3199 | ||
2799 | local_irq_restore(flags); | 3200 | local_irq_restore(flags); |
2800 | 3201 | ||
2801 | hrtimer_forward_now(hrtimer, ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL)); | 3202 | hrtimer_forward_now(hrtimer, ns_to_ktime(box->hrtimer_duration)); |
2802 | return HRTIMER_RESTART; | 3203 | return HRTIMER_RESTART; |
2803 | } | 3204 | } |
2804 | 3205 | ||
2805 | static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) | 3206 | static void uncore_pmu_start_hrtimer(struct intel_uncore_box *box) |
2806 | { | 3207 | { |
2807 | __hrtimer_start_range_ns(&box->hrtimer, | 3208 | __hrtimer_start_range_ns(&box->hrtimer, |
2808 | ns_to_ktime(UNCORE_PMU_HRTIMER_INTERVAL), 0, | 3209 | ns_to_ktime(box->hrtimer_duration), 0, |
2809 | HRTIMER_MODE_REL_PINNED, 0); | 3210 | HRTIMER_MODE_REL_PINNED, 0); |
2810 | } | 3211 | } |
2811 | 3212 | ||
@@ -2839,43 +3240,12 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, | |||
2839 | box->cpu = -1; | 3240 | box->cpu = -1; |
2840 | box->phys_id = -1; | 3241 | box->phys_id = -1; |
2841 | 3242 | ||
2842 | return box; | 3243 | /* set default hrtimer timeout */ |
2843 | } | 3244 | box->hrtimer_duration = UNCORE_PMU_HRTIMER_INTERVAL; |
2844 | |||
2845 | static struct intel_uncore_box * | ||
2846 | uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) | ||
2847 | { | ||
2848 | struct intel_uncore_box *box; | ||
2849 | |||
2850 | box = *per_cpu_ptr(pmu->box, cpu); | ||
2851 | if (box) | ||
2852 | return box; | ||
2853 | |||
2854 | raw_spin_lock(&uncore_box_lock); | ||
2855 | list_for_each_entry(box, &pmu->box_list, list) { | ||
2856 | if (box->phys_id == topology_physical_package_id(cpu)) { | ||
2857 | atomic_inc(&box->refcnt); | ||
2858 | *per_cpu_ptr(pmu->box, cpu) = box; | ||
2859 | break; | ||
2860 | } | ||
2861 | } | ||
2862 | raw_spin_unlock(&uncore_box_lock); | ||
2863 | |||
2864 | return *per_cpu_ptr(pmu->box, cpu); | ||
2865 | } | ||
2866 | 3245 | ||
2867 | static struct intel_uncore_pmu *uncore_event_to_pmu(struct perf_event *event) | 3246 | INIT_LIST_HEAD(&box->active_list); |
2868 | { | ||
2869 | return container_of(event->pmu, struct intel_uncore_pmu, pmu); | ||
2870 | } | ||
2871 | 3247 | ||
2872 | static struct intel_uncore_box *uncore_event_to_box(struct perf_event *event) | 3248 | return box; |
2873 | { | ||
2874 | /* | ||
2875 | * perf core schedules event on the basis of cpu, uncore events are | ||
2876 | * collected by one of the cpus inside a physical package. | ||
2877 | */ | ||
2878 | return uncore_pmu_to_box(uncore_event_to_pmu(event), smp_processor_id()); | ||
2879 | } | 3249 | } |
2880 | 3250 | ||
2881 | static int | 3251 | static int |
@@ -3271,16 +3641,21 @@ static int __init uncore_pmu_register(struct intel_uncore_pmu *pmu) | |||
3271 | { | 3641 | { |
3272 | int ret; | 3642 | int ret; |
3273 | 3643 | ||
3274 | pmu->pmu = (struct pmu) { | 3644 | if (!pmu->type->pmu) { |
3275 | .attr_groups = pmu->type->attr_groups, | 3645 | pmu->pmu = (struct pmu) { |
3276 | .task_ctx_nr = perf_invalid_context, | 3646 | .attr_groups = pmu->type->attr_groups, |
3277 | .event_init = uncore_pmu_event_init, | 3647 | .task_ctx_nr = perf_invalid_context, |
3278 | .add = uncore_pmu_event_add, | 3648 | .event_init = uncore_pmu_event_init, |
3279 | .del = uncore_pmu_event_del, | 3649 | .add = uncore_pmu_event_add, |
3280 | .start = uncore_pmu_event_start, | 3650 | .del = uncore_pmu_event_del, |
3281 | .stop = uncore_pmu_event_stop, | 3651 | .start = uncore_pmu_event_start, |
3282 | .read = uncore_pmu_event_read, | 3652 | .stop = uncore_pmu_event_stop, |
3283 | }; | 3653 | .read = uncore_pmu_event_read, |
3654 | }; | ||
3655 | } else { | ||
3656 | pmu->pmu = *pmu->type->pmu; | ||
3657 | pmu->pmu.attr_groups = pmu->type->attr_groups; | ||
3658 | } | ||
3284 | 3659 | ||
3285 | if (pmu->type->num_boxes == 1) { | 3660 | if (pmu->type->num_boxes == 1) { |
3286 | if (strlen(pmu->type->name) > 0) | 3661 | if (strlen(pmu->type->name) > 0) |
@@ -3326,6 +3701,8 @@ static int __init uncore_type_init(struct intel_uncore_type *type) | |||
3326 | if (!pmus) | 3701 | if (!pmus) |
3327 | return -ENOMEM; | 3702 | return -ENOMEM; |
3328 | 3703 | ||
3704 | type->pmus = pmus; | ||
3705 | |||
3329 | type->unconstrainted = (struct event_constraint) | 3706 | type->unconstrainted = (struct event_constraint) |
3330 | __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, | 3707 | __EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1, |
3331 | 0, type->num_counters, 0, 0); | 3708 | 0, type->num_counters, 0, 0); |
@@ -3361,7 +3738,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type) | |||
3361 | } | 3738 | } |
3362 | 3739 | ||
3363 | type->pmu_group = &uncore_pmu_attr_group; | 3740 | type->pmu_group = &uncore_pmu_attr_group; |
3364 | type->pmus = pmus; | ||
3365 | return 0; | 3741 | return 0; |
3366 | fail: | 3742 | fail: |
3367 | uncore_type_exit(type); | 3743 | uncore_type_exit(type); |
@@ -3493,6 +3869,28 @@ static int __init uncore_pci_init(void) | |||
3493 | pci_uncores = ivt_pci_uncores; | 3869 | pci_uncores = ivt_pci_uncores; |
3494 | uncore_pci_driver = &ivt_uncore_pci_driver; | 3870 | uncore_pci_driver = &ivt_uncore_pci_driver; |
3495 | break; | 3871 | break; |
3872 | case 42: /* Sandy Bridge */ | ||
3873 | ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC); | ||
3874 | if (ret) | ||
3875 | return ret; | ||
3876 | pci_uncores = snb_pci_uncores; | ||
3877 | uncore_pci_driver = &snb_uncore_pci_driver; | ||
3878 | break; | ||
3879 | case 58: /* Ivy Bridge */ | ||
3880 | ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC); | ||
3881 | if (ret) | ||
3882 | return ret; | ||
3883 | pci_uncores = snb_pci_uncores; | ||
3884 | uncore_pci_driver = &ivb_uncore_pci_driver; | ||
3885 | break; | ||
3886 | case 60: /* Haswell */ | ||
3887 | case 69: /* Haswell Celeron */ | ||
3888 | ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC); | ||
3889 | if (ret) | ||
3890 | return ret; | ||
3891 | pci_uncores = snb_pci_uncores; | ||
3892 | uncore_pci_driver = &hsw_uncore_pci_driver; | ||
3893 | break; | ||
3496 | default: | 3894 | default: |
3497 | return 0; | 3895 | return 0; |
3498 | } | 3896 | } |
@@ -3764,7 +4162,7 @@ static void __init uncore_cpu_setup(void *dummy) | |||
3764 | 4162 | ||
3765 | static int __init uncore_cpu_init(void) | 4163 | static int __init uncore_cpu_init(void) |
3766 | { | 4164 | { |
3767 | int ret, cpu, max_cores; | 4165 | int ret, max_cores; |
3768 | 4166 | ||
3769 | max_cores = boot_cpu_data.x86_max_cores; | 4167 | max_cores = boot_cpu_data.x86_max_cores; |
3770 | switch (boot_cpu_data.x86_model) { | 4168 | switch (boot_cpu_data.x86_model) { |
@@ -3808,29 +4206,6 @@ static int __init uncore_cpu_init(void) | |||
3808 | if (ret) | 4206 | if (ret) |
3809 | return ret; | 4207 | return ret; |
3810 | 4208 | ||
3811 | get_online_cpus(); | ||
3812 | |||
3813 | for_each_online_cpu(cpu) { | ||
3814 | int i, phys_id = topology_physical_package_id(cpu); | ||
3815 | |||
3816 | for_each_cpu(i, &uncore_cpu_mask) { | ||
3817 | if (phys_id == topology_physical_package_id(i)) { | ||
3818 | phys_id = -1; | ||
3819 | break; | ||
3820 | } | ||
3821 | } | ||
3822 | if (phys_id < 0) | ||
3823 | continue; | ||
3824 | |||
3825 | uncore_cpu_prepare(cpu, phys_id); | ||
3826 | uncore_event_init_cpu(cpu); | ||
3827 | } | ||
3828 | on_each_cpu(uncore_cpu_setup, NULL, 1); | ||
3829 | |||
3830 | register_cpu_notifier(&uncore_cpu_nb); | ||
3831 | |||
3832 | put_online_cpus(); | ||
3833 | |||
3834 | return 0; | 4209 | return 0; |
3835 | } | 4210 | } |
3836 | 4211 | ||
@@ -3859,6 +4234,41 @@ static int __init uncore_pmus_register(void) | |||
3859 | return 0; | 4234 | return 0; |
3860 | } | 4235 | } |
3861 | 4236 | ||
4237 | static void __init uncore_cpumask_init(void) | ||
4238 | { | ||
4239 | int cpu; | ||
4240 | |||
4241 | /* | ||
4242 | * ony invoke once from msr or pci init code | ||
4243 | */ | ||
4244 | if (!cpumask_empty(&uncore_cpu_mask)) | ||
4245 | return; | ||
4246 | |||
4247 | cpu_notifier_register_begin(); | ||
4248 | |||
4249 | for_each_online_cpu(cpu) { | ||
4250 | int i, phys_id = topology_physical_package_id(cpu); | ||
4251 | |||
4252 | for_each_cpu(i, &uncore_cpu_mask) { | ||
4253 | if (phys_id == topology_physical_package_id(i)) { | ||
4254 | phys_id = -1; | ||
4255 | break; | ||
4256 | } | ||
4257 | } | ||
4258 | if (phys_id < 0) | ||
4259 | continue; | ||
4260 | |||
4261 | uncore_cpu_prepare(cpu, phys_id); | ||
4262 | uncore_event_init_cpu(cpu); | ||
4263 | } | ||
4264 | on_each_cpu(uncore_cpu_setup, NULL, 1); | ||
4265 | |||
4266 | __register_cpu_notifier(&uncore_cpu_nb); | ||
4267 | |||
4268 | cpu_notifier_register_done(); | ||
4269 | } | ||
4270 | |||
4271 | |||
3862 | static int __init intel_uncore_init(void) | 4272 | static int __init intel_uncore_init(void) |
3863 | { | 4273 | { |
3864 | int ret; | 4274 | int ret; |
@@ -3877,6 +4287,7 @@ static int __init intel_uncore_init(void) | |||
3877 | uncore_pci_exit(); | 4287 | uncore_pci_exit(); |
3878 | goto fail; | 4288 | goto fail; |
3879 | } | 4289 | } |
4290 | uncore_cpumask_init(); | ||
3880 | 4291 | ||
3881 | uncore_pmus_register(); | 4292 | uncore_pmus_register(); |
3882 | return 0; | 4293 | return 0; |
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h index a80ab71a883d..90236f0c94a9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h +++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h | |||
@@ -6,6 +6,7 @@ | |||
6 | 6 | ||
7 | #define UNCORE_PMU_NAME_LEN 32 | 7 | #define UNCORE_PMU_NAME_LEN 32 |
8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) | 8 | #define UNCORE_PMU_HRTIMER_INTERVAL (60LL * NSEC_PER_SEC) |
9 | #define UNCORE_SNB_IMC_HRTIMER_INTERVAL (5ULL * NSEC_PER_SEC) | ||
9 | 10 | ||
10 | #define UNCORE_FIXED_EVENT 0xff | 11 | #define UNCORE_FIXED_EVENT 0xff |
11 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 | 12 | #define UNCORE_PMC_IDX_MAX_GENERIC 8 |
@@ -440,6 +441,7 @@ struct intel_uncore_type { | |||
440 | struct intel_uncore_ops *ops; | 441 | struct intel_uncore_ops *ops; |
441 | struct uncore_event_desc *event_descs; | 442 | struct uncore_event_desc *event_descs; |
442 | const struct attribute_group *attr_groups[4]; | 443 | const struct attribute_group *attr_groups[4]; |
444 | struct pmu *pmu; /* for custom pmu ops */ | ||
443 | }; | 445 | }; |
444 | 446 | ||
445 | #define pmu_group attr_groups[0] | 447 | #define pmu_group attr_groups[0] |
@@ -488,8 +490,11 @@ struct intel_uncore_box { | |||
488 | u64 tags[UNCORE_PMC_IDX_MAX]; | 490 | u64 tags[UNCORE_PMC_IDX_MAX]; |
489 | struct pci_dev *pci_dev; | 491 | struct pci_dev *pci_dev; |
490 | struct intel_uncore_pmu *pmu; | 492 | struct intel_uncore_pmu *pmu; |
493 | u64 hrtimer_duration; /* hrtimer timeout for this box */ | ||
491 | struct hrtimer hrtimer; | 494 | struct hrtimer hrtimer; |
492 | struct list_head list; | 495 | struct list_head list; |
496 | struct list_head active_list; | ||
497 | void *io_addr; | ||
493 | struct intel_uncore_extra_reg shared_regs[0]; | 498 | struct intel_uncore_extra_reg shared_regs[0]; |
494 | }; | 499 | }; |
495 | 500 | ||
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 3486e6660357..5d466b7d8609 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c | |||
@@ -1257,7 +1257,24 @@ again: | |||
1257 | pass++; | 1257 | pass++; |
1258 | goto again; | 1258 | goto again; |
1259 | } | 1259 | } |
1260 | 1260 | /* | |
1261 | * Perf does test runs to see if a whole group can be assigned | ||
1262 | * together succesfully. There can be multiple rounds of this. | ||
1263 | * Unfortunately, p4_pmu_swap_config_ts touches the hwc->config | ||
1264 | * bits, such that the next round of group assignments will | ||
1265 | * cause the above p4_should_swap_ts to pass instead of fail. | ||
1266 | * This leads to counters exclusive to thread0 being used by | ||
1267 | * thread1. | ||
1268 | * | ||
1269 | * Solve this with a cheap hack, reset the idx back to -1 to | ||
1270 | * force a new lookup (p4_next_cntr) to get the right counter | ||
1271 | * for the right thread. | ||
1272 | * | ||
1273 | * This probably doesn't comply with the general spirit of how | ||
1274 | * perf wants to work, but P4 is special. :-( | ||
1275 | */ | ||
1276 | if (p4_should_swap_ts(hwc->config, cpu)) | ||
1277 | hwc->idx = -1; | ||
1261 | p4_pmu_swap_config_ts(hwc, cpu); | 1278 | p4_pmu_swap_config_ts(hwc, cpu); |
1262 | if (assign) | 1279 | if (assign) |
1263 | assign[i] = cntr_idx; | 1280 | assign[i] = cntr_idx; |
@@ -1322,6 +1339,7 @@ static __initconst const struct x86_pmu p4_pmu = { | |||
1322 | __init int p4_pmu_init(void) | 1339 | __init int p4_pmu_init(void) |
1323 | { | 1340 | { |
1324 | unsigned int low, high; | 1341 | unsigned int low, high; |
1342 | int i, reg; | ||
1325 | 1343 | ||
1326 | /* If we get stripped -- indexing fails */ | 1344 | /* If we get stripped -- indexing fails */ |
1327 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); | 1345 | BUILD_BUG_ON(ARCH_P4_MAX_CCCR > INTEL_PMC_MAX_GENERIC); |
@@ -1340,5 +1358,19 @@ __init int p4_pmu_init(void) | |||
1340 | 1358 | ||
1341 | x86_pmu = p4_pmu; | 1359 | x86_pmu = p4_pmu; |
1342 | 1360 | ||
1361 | /* | ||
1362 | * Even though the counters are configured to interrupt a particular | ||
1363 | * logical processor when an overflow happens, testing has shown that | ||
1364 | * on kdump kernels (which uses a single cpu), thread1's counter | ||
1365 | * continues to run and will report an NMI on thread0. Due to the | ||
1366 | * overflow bug, this leads to a stream of unknown NMIs. | ||
1367 | * | ||
1368 | * Solve this by zero'ing out the registers to mimic a reset. | ||
1369 | */ | ||
1370 | for (i = 0; i < x86_pmu.num_counters; i++) { | ||
1371 | reg = x86_pmu_config_addr(i); | ||
1372 | wrmsrl_safe(reg, 0ULL); | ||
1373 | } | ||
1374 | |||
1343 | return 0; | 1375 | return 0; |
1344 | } | 1376 | } |
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index b1e2fe115323..7c1a0c07b607 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c | |||
@@ -231,31 +231,49 @@ static __initconst const struct x86_pmu p6_pmu = { | |||
231 | 231 | ||
232 | }; | 232 | }; |
233 | 233 | ||
234 | static __init void p6_pmu_rdpmc_quirk(void) | ||
235 | { | ||
236 | if (boot_cpu_data.x86_mask < 9) { | ||
237 | /* | ||
238 | * PPro erratum 26; fixed in stepping 9 and above. | ||
239 | */ | ||
240 | pr_warn("Userspace RDPMC support disabled due to a CPU erratum\n"); | ||
241 | x86_pmu.attr_rdpmc_broken = 1; | ||
242 | x86_pmu.attr_rdpmc = 0; | ||
243 | } | ||
244 | } | ||
245 | |||
234 | __init int p6_pmu_init(void) | 246 | __init int p6_pmu_init(void) |
235 | { | 247 | { |
248 | x86_pmu = p6_pmu; | ||
249 | |||
236 | switch (boot_cpu_data.x86_model) { | 250 | switch (boot_cpu_data.x86_model) { |
237 | case 1: | 251 | case 1: /* Pentium Pro */ |
238 | case 3: /* Pentium Pro */ | 252 | x86_add_quirk(p6_pmu_rdpmc_quirk); |
239 | case 5: | 253 | break; |
240 | case 6: /* Pentium II */ | 254 | |
241 | case 7: | 255 | case 3: /* Pentium II - Klamath */ |
242 | case 8: | 256 | case 5: /* Pentium II - Deschutes */ |
243 | case 11: /* Pentium III */ | 257 | case 6: /* Pentium II - Mendocino */ |
244 | case 9: | ||
245 | case 13: | ||
246 | /* Pentium M */ | ||
247 | break; | 258 | break; |
259 | |||
260 | case 7: /* Pentium III - Katmai */ | ||
261 | case 8: /* Pentium III - Coppermine */ | ||
262 | case 10: /* Pentium III Xeon */ | ||
263 | case 11: /* Pentium III - Tualatin */ | ||
264 | break; | ||
265 | |||
266 | case 9: /* Pentium M - Banias */ | ||
267 | case 13: /* Pentium M - Dothan */ | ||
268 | break; | ||
269 | |||
248 | default: | 270 | default: |
249 | pr_cont("unsupported p6 CPU model %d ", | 271 | pr_cont("unsupported p6 CPU model %d ", boot_cpu_data.x86_model); |
250 | boot_cpu_data.x86_model); | ||
251 | return -ENODEV; | 272 | return -ENODEV; |
252 | } | 273 | } |
253 | 274 | ||
254 | x86_pmu = p6_pmu; | ||
255 | |||
256 | memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, | 275 | memcpy(hw_cache_event_ids, p6_hw_cache_event_ids, |
257 | sizeof(hw_cache_event_ids)); | 276 | sizeof(hw_cache_event_ids)); |
258 | 277 | ||
259 | |||
260 | return 0; | 278 | return 0; |
261 | } | 279 | } |
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index 88db010845cb..384df5105fbc 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c | |||
@@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s) | |||
31 | } | 31 | } |
32 | __setup("nordrand", x86_rdrand_setup); | 32 | __setup("nordrand", x86_rdrand_setup); |
33 | 33 | ||
34 | /* We can't use arch_get_random_long() here since alternatives haven't run */ | ||
35 | static inline int rdrand_long(unsigned long *v) | ||
36 | { | ||
37 | int ok; | ||
38 | asm volatile("1: " RDRAND_LONG "\n\t" | ||
39 | "jc 2f\n\t" | ||
40 | "decl %0\n\t" | ||
41 | "jnz 1b\n\t" | ||
42 | "2:" | ||
43 | : "=r" (ok), "=a" (*v) | ||
44 | : "0" (RDRAND_RETRY_LOOPS)); | ||
45 | return ok; | ||
46 | } | ||
47 | |||
48 | /* | 34 | /* |
49 | * Force a reseed cycle; we are architecturally guaranteed a reseed | 35 | * Force a reseed cycle; we are architecturally guaranteed a reseed |
50 | * after no more than 512 128-bit chunks of random data. This also | 36 | * after no more than 512 128-bit chunks of random data. This also |
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index aa0430d69b90..3fa0e5ad86b4 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
2 | #include <linux/mm.h> | 2 | #include <linux/mm.h> |
3 | #include <linux/init.h> | ||
4 | #include <asm/processor.h> | 3 | #include <asm/processor.h> |
5 | #include <asm/msr.h> | 4 | #include <asm/msr.h> |
6 | #include "cpu.h" | 5 | #include "cpu.h" |
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c index 75c5ad5d35cc..ef9c2a0078bd 100644 --- a/arch/x86/kernel/cpu/umc.c +++ b/arch/x86/kernel/cpu/umc.c | |||
@@ -1,5 +1,4 @@ | |||
1 | #include <linux/kernel.h> | 1 | #include <linux/kernel.h> |
2 | #include <linux/init.h> | ||
3 | #include <asm/processor.h> | 2 | #include <asm/processor.h> |
4 | #include "cpu.h" | 3 | #include "cpu.h" |
5 | 4 | ||
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c index 7d9481c743f8..3225ae6c5180 100644 --- a/arch/x86/kernel/cpuid.c +++ b/arch/x86/kernel/cpuid.c | |||
@@ -198,14 +198,15 @@ static int __init cpuid_init(void) | |||
198 | goto out_chrdev; | 198 | goto out_chrdev; |
199 | } | 199 | } |
200 | cpuid_class->devnode = cpuid_devnode; | 200 | cpuid_class->devnode = cpuid_devnode; |
201 | get_online_cpus(); | 201 | |
202 | cpu_notifier_register_begin(); | ||
202 | for_each_online_cpu(i) { | 203 | for_each_online_cpu(i) { |
203 | err = cpuid_device_create(i); | 204 | err = cpuid_device_create(i); |
204 | if (err != 0) | 205 | if (err != 0) |
205 | goto out_class; | 206 | goto out_class; |
206 | } | 207 | } |
207 | register_hotcpu_notifier(&cpuid_class_cpu_notifier); | 208 | __register_hotcpu_notifier(&cpuid_class_cpu_notifier); |
208 | put_online_cpus(); | 209 | cpu_notifier_register_done(); |
209 | 210 | ||
210 | err = 0; | 211 | err = 0; |
211 | goto out; | 212 | goto out; |
@@ -215,7 +216,7 @@ out_class: | |||
215 | for_each_online_cpu(i) { | 216 | for_each_online_cpu(i) { |
216 | cpuid_device_destroy(i); | 217 | cpuid_device_destroy(i); |
217 | } | 218 | } |
218 | put_online_cpus(); | 219 | cpu_notifier_register_done(); |
219 | class_destroy(cpuid_class); | 220 | class_destroy(cpuid_class); |
220 | out_chrdev: | 221 | out_chrdev: |
221 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); | 222 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
@@ -227,13 +228,13 @@ static void __exit cpuid_exit(void) | |||
227 | { | 228 | { |
228 | int cpu = 0; | 229 | int cpu = 0; |
229 | 230 | ||
230 | get_online_cpus(); | 231 | cpu_notifier_register_begin(); |
231 | for_each_online_cpu(cpu) | 232 | for_each_online_cpu(cpu) |
232 | cpuid_device_destroy(cpu); | 233 | cpuid_device_destroy(cpu); |
233 | class_destroy(cpuid_class); | 234 | class_destroy(cpuid_class); |
234 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); | 235 | __unregister_chrdev(CPUID_MAJOR, 0, NR_CPUS, "cpu/cpuid"); |
235 | unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); | 236 | __unregister_hotcpu_notifier(&cpuid_class_cpu_notifier); |
236 | put_online_cpus(); | 237 | cpu_notifier_register_done(); |
237 | } | 238 | } |
238 | 239 | ||
239 | module_init(cpuid_init); | 240 | module_init(cpuid_init); |
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 18677a90d6a3..507de8066594 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c | |||
@@ -7,7 +7,6 @@ | |||
7 | * | 7 | * |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/types.h> | 10 | #include <linux/types.h> |
12 | #include <linux/kernel.h> | 11 | #include <linux/kernel.h> |
13 | #include <linux/smp.h> | 12 | #include <linux/smp.h> |
@@ -58,9 +57,7 @@ static void kdump_nmi_callback(int cpu, struct pt_regs *regs) | |||
58 | { | 57 | { |
59 | #ifdef CONFIG_X86_32 | 58 | #ifdef CONFIG_X86_32 |
60 | struct pt_regs fixed_regs; | 59 | struct pt_regs fixed_regs; |
61 | #endif | ||
62 | 60 | ||
63 | #ifdef CONFIG_X86_32 | ||
64 | if (!user_mode_vm(regs)) { | 61 | if (!user_mode_vm(regs)) { |
65 | crash_fixup_ss_esp(&fixed_regs, regs); | 62 | crash_fixup_ss_esp(&fixed_regs, regs); |
66 | regs = &fixed_regs; | 63 | regs = &fixed_regs; |
diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index 5d3fe8d36e4a..f6dfd9334b67 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c | |||
@@ -1,6 +1,5 @@ | |||
1 | #include <linux/mm.h> | 1 | #include <linux/mm.h> |
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/init.h> | ||
4 | #include <linux/init_task.h> | 3 | #include <linux/init_task.h> |
5 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
6 | 5 | ||
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index f2a1770ca176..5abd4cd4230c 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c | |||
@@ -16,12 +16,35 @@ | |||
16 | 16 | ||
17 | #include <asm/stacktrace.h> | 17 | #include <asm/stacktrace.h> |
18 | 18 | ||
19 | static void *is_irq_stack(void *p, void *irq) | ||
20 | { | ||
21 | if (p < irq || p >= (irq + THREAD_SIZE)) | ||
22 | return NULL; | ||
23 | return irq + THREAD_SIZE; | ||
24 | } | ||
25 | |||
26 | |||
27 | static void *is_hardirq_stack(unsigned long *stack, int cpu) | ||
28 | { | ||
29 | void *irq = per_cpu(hardirq_stack, cpu); | ||
30 | |||
31 | return is_irq_stack(stack, irq); | ||
32 | } | ||
33 | |||
34 | static void *is_softirq_stack(unsigned long *stack, int cpu) | ||
35 | { | ||
36 | void *irq = per_cpu(softirq_stack, cpu); | ||
37 | |||
38 | return is_irq_stack(stack, irq); | ||
39 | } | ||
19 | 40 | ||
20 | void dump_trace(struct task_struct *task, struct pt_regs *regs, | 41 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
21 | unsigned long *stack, unsigned long bp, | 42 | unsigned long *stack, unsigned long bp, |
22 | const struct stacktrace_ops *ops, void *data) | 43 | const struct stacktrace_ops *ops, void *data) |
23 | { | 44 | { |
45 | const unsigned cpu = get_cpu(); | ||
24 | int graph = 0; | 46 | int graph = 0; |
47 | u32 *prev_esp; | ||
25 | 48 | ||
26 | if (!task) | 49 | if (!task) |
27 | task = current; | 50 | task = current; |
@@ -30,7 +53,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
30 | unsigned long dummy; | 53 | unsigned long dummy; |
31 | 54 | ||
32 | stack = &dummy; | 55 | stack = &dummy; |
33 | if (task && task != current) | 56 | if (task != current) |
34 | stack = (unsigned long *)task->thread.sp; | 57 | stack = (unsigned long *)task->thread.sp; |
35 | } | 58 | } |
36 | 59 | ||
@@ -39,18 +62,31 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
39 | 62 | ||
40 | for (;;) { | 63 | for (;;) { |
41 | struct thread_info *context; | 64 | struct thread_info *context; |
65 | void *end_stack; | ||
66 | |||
67 | end_stack = is_hardirq_stack(stack, cpu); | ||
68 | if (!end_stack) | ||
69 | end_stack = is_softirq_stack(stack, cpu); | ||
42 | 70 | ||
43 | context = (struct thread_info *) | 71 | context = task_thread_info(task); |
44 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 72 | bp = ops->walk_stack(context, stack, bp, ops, data, |
45 | bp = ops->walk_stack(context, stack, bp, ops, data, NULL, &graph); | 73 | end_stack, &graph); |
46 | 74 | ||
47 | stack = (unsigned long *)context->previous_esp; | 75 | /* Stop if not on irq stack */ |
76 | if (!end_stack) | ||
77 | break; | ||
78 | |||
79 | /* The previous esp is saved on the bottom of the stack */ | ||
80 | prev_esp = (u32 *)(end_stack - THREAD_SIZE); | ||
81 | stack = (unsigned long *)*prev_esp; | ||
48 | if (!stack) | 82 | if (!stack) |
49 | break; | 83 | break; |
84 | |||
50 | if (ops->stack(data, "IRQ") < 0) | 85 | if (ops->stack(data, "IRQ") < 0) |
51 | break; | 86 | break; |
52 | touch_nmi_watchdog(); | 87 | touch_nmi_watchdog(); |
53 | } | 88 | } |
89 | put_cpu(); | ||
54 | } | 90 | } |
55 | EXPORT_SYMBOL(dump_trace); | 91 | EXPORT_SYMBOL(dump_trace); |
56 | 92 | ||
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index addb207dab92..1abcb50b48ae 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c | |||
@@ -104,6 +104,44 @@ in_irq_stack(unsigned long *stack, unsigned long *irq_stack, | |||
104 | return (stack >= irq_stack && stack < irq_stack_end); | 104 | return (stack >= irq_stack && stack < irq_stack_end); |
105 | } | 105 | } |
106 | 106 | ||
107 | static const unsigned long irq_stack_size = | ||
108 | (IRQ_STACK_SIZE - 64) / sizeof(unsigned long); | ||
109 | |||
110 | enum stack_type { | ||
111 | STACK_IS_UNKNOWN, | ||
112 | STACK_IS_NORMAL, | ||
113 | STACK_IS_EXCEPTION, | ||
114 | STACK_IS_IRQ, | ||
115 | }; | ||
116 | |||
117 | static enum stack_type | ||
118 | analyze_stack(int cpu, struct task_struct *task, unsigned long *stack, | ||
119 | unsigned long **stack_end, unsigned long *irq_stack, | ||
120 | unsigned *used, char **id) | ||
121 | { | ||
122 | unsigned long addr; | ||
123 | |||
124 | addr = ((unsigned long)stack & (~(THREAD_SIZE - 1))); | ||
125 | if ((unsigned long)task_stack_page(task) == addr) | ||
126 | return STACK_IS_NORMAL; | ||
127 | |||
128 | *stack_end = in_exception_stack(cpu, (unsigned long)stack, | ||
129 | used, id); | ||
130 | if (*stack_end) | ||
131 | return STACK_IS_EXCEPTION; | ||
132 | |||
133 | if (!irq_stack) | ||
134 | return STACK_IS_NORMAL; | ||
135 | |||
136 | *stack_end = irq_stack; | ||
137 | irq_stack = irq_stack - irq_stack_size; | ||
138 | |||
139 | if (in_irq_stack(stack, irq_stack, *stack_end)) | ||
140 | return STACK_IS_IRQ; | ||
141 | |||
142 | return STACK_IS_UNKNOWN; | ||
143 | } | ||
144 | |||
107 | /* | 145 | /* |
108 | * x86-64 can have up to three kernel stacks: | 146 | * x86-64 can have up to three kernel stacks: |
109 | * process stack | 147 | * process stack |
@@ -116,12 +154,12 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
116 | const struct stacktrace_ops *ops, void *data) | 154 | const struct stacktrace_ops *ops, void *data) |
117 | { | 155 | { |
118 | const unsigned cpu = get_cpu(); | 156 | const unsigned cpu = get_cpu(); |
119 | unsigned long *irq_stack_end = | ||
120 | (unsigned long *)per_cpu(irq_stack_ptr, cpu); | ||
121 | unsigned used = 0; | ||
122 | struct thread_info *tinfo; | 157 | struct thread_info *tinfo; |
123 | int graph = 0; | 158 | unsigned long *irq_stack = (unsigned long *)per_cpu(irq_stack_ptr, cpu); |
124 | unsigned long dummy; | 159 | unsigned long dummy; |
160 | unsigned used = 0; | ||
161 | int graph = 0; | ||
162 | int done = 0; | ||
125 | 163 | ||
126 | if (!task) | 164 | if (!task) |
127 | task = current; | 165 | task = current; |
@@ -143,49 +181,61 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs, | |||
143 | * exceptions | 181 | * exceptions |
144 | */ | 182 | */ |
145 | tinfo = task_thread_info(task); | 183 | tinfo = task_thread_info(task); |
146 | for (;;) { | 184 | while (!done) { |
185 | unsigned long *stack_end; | ||
186 | enum stack_type stype; | ||
147 | char *id; | 187 | char *id; |
148 | unsigned long *estack_end; | ||
149 | estack_end = in_exception_stack(cpu, (unsigned long)stack, | ||
150 | &used, &id); | ||
151 | 188 | ||
152 | if (estack_end) { | 189 | stype = analyze_stack(cpu, task, stack, &stack_end, |
190 | irq_stack, &used, &id); | ||
191 | |||
192 | /* Default finish unless specified to continue */ | ||
193 | done = 1; | ||
194 | |||
195 | switch (stype) { | ||
196 | |||
197 | /* Break out early if we are on the thread stack */ | ||
198 | case STACK_IS_NORMAL: | ||
199 | break; | ||
200 | |||
201 | case STACK_IS_EXCEPTION: | ||
202 | |||
153 | if (ops->stack(data, id) < 0) | 203 | if (ops->stack(data, id) < 0) |
154 | break; | 204 | break; |
155 | 205 | ||
156 | bp = ops->walk_stack(tinfo, stack, bp, ops, | 206 | bp = ops->walk_stack(tinfo, stack, bp, ops, |
157 | data, estack_end, &graph); | 207 | data, stack_end, &graph); |
158 | ops->stack(data, "<EOE>"); | 208 | ops->stack(data, "<EOE>"); |
159 | /* | 209 | /* |
160 | * We link to the next stack via the | 210 | * We link to the next stack via the |
161 | * second-to-last pointer (index -2 to end) in the | 211 | * second-to-last pointer (index -2 to end) in the |
162 | * exception stack: | 212 | * exception stack: |
163 | */ | 213 | */ |
164 | stack = (unsigned long *) estack_end[-2]; | 214 | stack = (unsigned long *) stack_end[-2]; |
165 | continue; | 215 | done = 0; |
166 | } | 216 | break; |
167 | if (irq_stack_end) { | 217 | |
168 | unsigned long *irq_stack; | 218 | case STACK_IS_IRQ: |
169 | irq_stack = irq_stack_end - | 219 | |
170 | (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack); | 220 | if (ops->stack(data, "IRQ") < 0) |
171 | 221 | break; | |
172 | if (in_irq_stack(stack, irq_stack, irq_stack_end)) { | 222 | bp = ops->walk_stack(tinfo, stack, bp, |
173 | if (ops->stack(data, "IRQ") < 0) | 223 | ops, data, stack_end, &graph); |
174 | break; | 224 | /* |
175 | bp = ops->walk_stack(tinfo, stack, bp, | 225 | * We link to the next stack (which would be |
176 | ops, data, irq_stack_end, &graph); | 226 | * the process stack normally) the last |
177 | /* | 227 | * pointer (index -1 to end) in the IRQ stack: |
178 | * We link to the next stack (which would be | 228 | */ |
179 | * the process stack normally) the last | 229 | stack = (unsigned long *) (stack_end[-1]); |
180 | * pointer (index -1 to end) in the IRQ stack: | 230 | irq_stack = NULL; |
181 | */ | 231 | ops->stack(data, "EOI"); |
182 | stack = (unsigned long *) (irq_stack_end[-1]); | 232 | done = 0; |
183 | irq_stack_end = NULL; | 233 | break; |
184 | ops->stack(data, "EOI"); | 234 | |
185 | continue; | 235 | case STACK_IS_UNKNOWN: |
186 | } | 236 | ops->stack(data, "UNK"); |
237 | break; | ||
187 | } | 238 | } |
188 | break; | ||
189 | } | 239 | } |
190 | 240 | ||
191 | /* | 241 | /* |
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 174da5fc5a7b..988c00a1f60d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -1120,7 +1120,7 @@ void __init memblock_find_dma_reserve(void) | |||
1120 | nr_pages += end_pfn - start_pfn; | 1120 | nr_pages += end_pfn - start_pfn; |
1121 | } | 1121 | } |
1122 | 1122 | ||
1123 | for_each_free_mem_range(u, MAX_NUMNODES, &start, &end, NULL) { | 1123 | for_each_free_mem_range(u, NUMA_NO_NODE, &start, &end, NULL) { |
1124 | start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); | 1124 | start_pfn = min_t(unsigned long, PFN_UP(start), MAX_DMA_PFN); |
1125 | end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); | 1125 | end_pfn = min_t(unsigned long, PFN_DOWN(end), MAX_DMA_PFN); |
1126 | if (start_pfn < end_pfn) | 1126 | if (start_pfn < end_pfn) |
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index bc4a088f9023..b0cc3809723d 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c | |||
@@ -203,18 +203,15 @@ static void __init intel_remapping_check(int num, int slot, int func) | |||
203 | revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); | 203 | revision = read_pci_config_byte(num, slot, func, PCI_REVISION_ID); |
204 | 204 | ||
205 | /* | 205 | /* |
206 | * Revision 13 of all triggering devices id in this quirk have | 206 | * Revision <= 13 of all triggering devices id in this quirk |
207 | * a problem draining interrupts when irq remapping is enabled, | 207 | * have a problem draining interrupts when irq remapping is |
208 | * and should be flagged as broken. Additionally revisions 0x12 | 208 | * enabled, and should be flagged as broken. Additionally |
209 | * and 0x22 of device id 0x3405 has this problem. | 209 | * revision 0x22 of device id 0x3405 has this problem. |
210 | */ | 210 | */ |
211 | if (revision == 0x13) | 211 | if (revision <= 0x13) |
212 | set_irq_remapping_broken(); | 212 | set_irq_remapping_broken(); |
213 | else if ((device == 0x3405) && | 213 | else if (device == 0x3405 && revision == 0x22) |
214 | ((revision == 0x12) || | ||
215 | (revision == 0x22))) | ||
216 | set_irq_remapping_broken(); | 214 | set_irq_remapping_broken(); |
217 | |||
218 | } | 215 | } |
219 | 216 | ||
220 | /* | 217 | /* |
@@ -228,7 +225,7 @@ static void __init intel_remapping_check(int num, int slot, int func) | |||
228 | * | 225 | * |
229 | * And yes, so far on current devices the base addr is always under 4G. | 226 | * And yes, so far on current devices the base addr is always under 4G. |
230 | */ | 227 | */ |
231 | static u32 __init intel_stolen_base(int num, int slot, int func) | 228 | static u32 __init intel_stolen_base(int num, int slot, int func, size_t stolen_size) |
232 | { | 229 | { |
233 | u32 base; | 230 | u32 base; |
234 | 231 | ||
@@ -247,6 +244,114 @@ static u32 __init intel_stolen_base(int num, int slot, int func) | |||
247 | #define MB(x) (KB (KB (x))) | 244 | #define MB(x) (KB (KB (x))) |
248 | #define GB(x) (MB (KB (x))) | 245 | #define GB(x) (MB (KB (x))) |
249 | 246 | ||
247 | static size_t __init i830_tseg_size(void) | ||
248 | { | ||
249 | u8 tmp = read_pci_config_byte(0, 0, 0, I830_ESMRAMC); | ||
250 | |||
251 | if (!(tmp & TSEG_ENABLE)) | ||
252 | return 0; | ||
253 | |||
254 | if (tmp & I830_TSEG_SIZE_1M) | ||
255 | return MB(1); | ||
256 | else | ||
257 | return KB(512); | ||
258 | } | ||
259 | |||
260 | static size_t __init i845_tseg_size(void) | ||
261 | { | ||
262 | u8 tmp = read_pci_config_byte(0, 0, 0, I845_ESMRAMC); | ||
263 | |||
264 | if (!(tmp & TSEG_ENABLE)) | ||
265 | return 0; | ||
266 | |||
267 | switch (tmp & I845_TSEG_SIZE_MASK) { | ||
268 | case I845_TSEG_SIZE_512K: | ||
269 | return KB(512); | ||
270 | case I845_TSEG_SIZE_1M: | ||
271 | return MB(1); | ||
272 | default: | ||
273 | WARN_ON(1); | ||
274 | return 0; | ||
275 | } | ||
276 | } | ||
277 | |||
278 | static size_t __init i85x_tseg_size(void) | ||
279 | { | ||
280 | u8 tmp = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC); | ||
281 | |||
282 | if (!(tmp & TSEG_ENABLE)) | ||
283 | return 0; | ||
284 | |||
285 | return MB(1); | ||
286 | } | ||
287 | |||
288 | static size_t __init i830_mem_size(void) | ||
289 | { | ||
290 | return read_pci_config_byte(0, 0, 0, I830_DRB3) * MB(32); | ||
291 | } | ||
292 | |||
293 | static size_t __init i85x_mem_size(void) | ||
294 | { | ||
295 | return read_pci_config_byte(0, 0, 1, I85X_DRB3) * MB(32); | ||
296 | } | ||
297 | |||
298 | /* | ||
299 | * On 830/845/85x the stolen memory base isn't available in any | ||
300 | * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size. | ||
301 | */ | ||
302 | static u32 __init i830_stolen_base(int num, int slot, int func, size_t stolen_size) | ||
303 | { | ||
304 | return i830_mem_size() - i830_tseg_size() - stolen_size; | ||
305 | } | ||
306 | |||
307 | static u32 __init i845_stolen_base(int num, int slot, int func, size_t stolen_size) | ||
308 | { | ||
309 | return i830_mem_size() - i845_tseg_size() - stolen_size; | ||
310 | } | ||
311 | |||
312 | static u32 __init i85x_stolen_base(int num, int slot, int func, size_t stolen_size) | ||
313 | { | ||
314 | return i85x_mem_size() - i85x_tseg_size() - stolen_size; | ||
315 | } | ||
316 | |||
317 | static u32 __init i865_stolen_base(int num, int slot, int func, size_t stolen_size) | ||
318 | { | ||
319 | /* | ||
320 | * FIXME is the graphics stolen memory region | ||
321 | * always at TOUD? Ie. is it always the last | ||
322 | * one to be allocated by the BIOS? | ||
323 | */ | ||
324 | return read_pci_config_16(0, 0, 0, I865_TOUD) << 16; | ||
325 | } | ||
326 | |||
327 | static size_t __init i830_stolen_size(int num, int slot, int func) | ||
328 | { | ||
329 | size_t stolen_size; | ||
330 | u16 gmch_ctrl; | ||
331 | |||
332 | gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL); | ||
333 | |||
334 | switch (gmch_ctrl & I830_GMCH_GMS_MASK) { | ||
335 | case I830_GMCH_GMS_STOLEN_512: | ||
336 | stolen_size = KB(512); | ||
337 | break; | ||
338 | case I830_GMCH_GMS_STOLEN_1024: | ||
339 | stolen_size = MB(1); | ||
340 | break; | ||
341 | case I830_GMCH_GMS_STOLEN_8192: | ||
342 | stolen_size = MB(8); | ||
343 | break; | ||
344 | case I830_GMCH_GMS_LOCAL: | ||
345 | /* local memory isn't part of the normal address space */ | ||
346 | stolen_size = 0; | ||
347 | break; | ||
348 | default: | ||
349 | return 0; | ||
350 | } | ||
351 | |||
352 | return stolen_size; | ||
353 | } | ||
354 | |||
250 | static size_t __init gen3_stolen_size(int num, int slot, int func) | 355 | static size_t __init gen3_stolen_size(int num, int slot, int func) |
251 | { | 356 | { |
252 | size_t stolen_size; | 357 | size_t stolen_size; |
@@ -313,7 +418,7 @@ static size_t __init gen6_stolen_size(int num, int slot, int func) | |||
313 | return gmch_ctrl << 25; /* 32 MB units */ | 418 | return gmch_ctrl << 25; /* 32 MB units */ |
314 | } | 419 | } |
315 | 420 | ||
316 | static inline size_t gen8_stolen_size(int num, int slot, int func) | 421 | static size_t gen8_stolen_size(int num, int slot, int func) |
317 | { | 422 | { |
318 | u16 gmch_ctrl; | 423 | u16 gmch_ctrl; |
319 | 424 | ||
@@ -323,31 +428,74 @@ static inline size_t gen8_stolen_size(int num, int slot, int func) | |||
323 | return gmch_ctrl << 25; /* 32 MB units */ | 428 | return gmch_ctrl << 25; /* 32 MB units */ |
324 | } | 429 | } |
325 | 430 | ||
326 | typedef size_t (*stolen_size_fn)(int num, int slot, int func); | 431 | |
432 | struct intel_stolen_funcs { | ||
433 | size_t (*size)(int num, int slot, int func); | ||
434 | u32 (*base)(int num, int slot, int func, size_t size); | ||
435 | }; | ||
436 | |||
437 | static const struct intel_stolen_funcs i830_stolen_funcs = { | ||
438 | .base = i830_stolen_base, | ||
439 | .size = i830_stolen_size, | ||
440 | }; | ||
441 | |||
442 | static const struct intel_stolen_funcs i845_stolen_funcs = { | ||
443 | .base = i845_stolen_base, | ||
444 | .size = i830_stolen_size, | ||
445 | }; | ||
446 | |||
447 | static const struct intel_stolen_funcs i85x_stolen_funcs = { | ||
448 | .base = i85x_stolen_base, | ||
449 | .size = gen3_stolen_size, | ||
450 | }; | ||
451 | |||
452 | static const struct intel_stolen_funcs i865_stolen_funcs = { | ||
453 | .base = i865_stolen_base, | ||
454 | .size = gen3_stolen_size, | ||
455 | }; | ||
456 | |||
457 | static const struct intel_stolen_funcs gen3_stolen_funcs = { | ||
458 | .base = intel_stolen_base, | ||
459 | .size = gen3_stolen_size, | ||
460 | }; | ||
461 | |||
462 | static const struct intel_stolen_funcs gen6_stolen_funcs = { | ||
463 | .base = intel_stolen_base, | ||
464 | .size = gen6_stolen_size, | ||
465 | }; | ||
466 | |||
467 | static const struct intel_stolen_funcs gen8_stolen_funcs = { | ||
468 | .base = intel_stolen_base, | ||
469 | .size = gen8_stolen_size, | ||
470 | }; | ||
327 | 471 | ||
328 | static struct pci_device_id intel_stolen_ids[] __initdata = { | 472 | static struct pci_device_id intel_stolen_ids[] __initdata = { |
329 | INTEL_I915G_IDS(gen3_stolen_size), | 473 | INTEL_I830_IDS(&i830_stolen_funcs), |
330 | INTEL_I915GM_IDS(gen3_stolen_size), | 474 | INTEL_I845G_IDS(&i845_stolen_funcs), |
331 | INTEL_I945G_IDS(gen3_stolen_size), | 475 | INTEL_I85X_IDS(&i85x_stolen_funcs), |
332 | INTEL_I945GM_IDS(gen3_stolen_size), | 476 | INTEL_I865G_IDS(&i865_stolen_funcs), |
333 | INTEL_VLV_M_IDS(gen6_stolen_size), | 477 | INTEL_I915G_IDS(&gen3_stolen_funcs), |
334 | INTEL_VLV_D_IDS(gen6_stolen_size), | 478 | INTEL_I915GM_IDS(&gen3_stolen_funcs), |
335 | INTEL_PINEVIEW_IDS(gen3_stolen_size), | 479 | INTEL_I945G_IDS(&gen3_stolen_funcs), |
336 | INTEL_I965G_IDS(gen3_stolen_size), | 480 | INTEL_I945GM_IDS(&gen3_stolen_funcs), |
337 | INTEL_G33_IDS(gen3_stolen_size), | 481 | INTEL_VLV_M_IDS(&gen6_stolen_funcs), |
338 | INTEL_I965GM_IDS(gen3_stolen_size), | 482 | INTEL_VLV_D_IDS(&gen6_stolen_funcs), |
339 | INTEL_GM45_IDS(gen3_stolen_size), | 483 | INTEL_PINEVIEW_IDS(&gen3_stolen_funcs), |
340 | INTEL_G45_IDS(gen3_stolen_size), | 484 | INTEL_I965G_IDS(&gen3_stolen_funcs), |
341 | INTEL_IRONLAKE_D_IDS(gen3_stolen_size), | 485 | INTEL_G33_IDS(&gen3_stolen_funcs), |
342 | INTEL_IRONLAKE_M_IDS(gen3_stolen_size), | 486 | INTEL_I965GM_IDS(&gen3_stolen_funcs), |
343 | INTEL_SNB_D_IDS(gen6_stolen_size), | 487 | INTEL_GM45_IDS(&gen3_stolen_funcs), |
344 | INTEL_SNB_M_IDS(gen6_stolen_size), | 488 | INTEL_G45_IDS(&gen3_stolen_funcs), |
345 | INTEL_IVB_M_IDS(gen6_stolen_size), | 489 | INTEL_IRONLAKE_D_IDS(&gen3_stolen_funcs), |
346 | INTEL_IVB_D_IDS(gen6_stolen_size), | 490 | INTEL_IRONLAKE_M_IDS(&gen3_stolen_funcs), |
347 | INTEL_HSW_D_IDS(gen6_stolen_size), | 491 | INTEL_SNB_D_IDS(&gen6_stolen_funcs), |
348 | INTEL_HSW_M_IDS(gen6_stolen_size), | 492 | INTEL_SNB_M_IDS(&gen6_stolen_funcs), |
349 | INTEL_BDW_M_IDS(gen8_stolen_size), | 493 | INTEL_IVB_M_IDS(&gen6_stolen_funcs), |
350 | INTEL_BDW_D_IDS(gen8_stolen_size) | 494 | INTEL_IVB_D_IDS(&gen6_stolen_funcs), |
495 | INTEL_HSW_D_IDS(&gen6_stolen_funcs), | ||
496 | INTEL_HSW_M_IDS(&gen6_stolen_funcs), | ||
497 | INTEL_BDW_M_IDS(&gen8_stolen_funcs), | ||
498 | INTEL_BDW_D_IDS(&gen8_stolen_funcs) | ||
351 | }; | 499 | }; |
352 | 500 | ||
353 | static void __init intel_graphics_stolen(int num, int slot, int func) | 501 | static void __init intel_graphics_stolen(int num, int slot, int func) |
@@ -364,11 +512,13 @@ static void __init intel_graphics_stolen(int num, int slot, int func) | |||
364 | 512 | ||
365 | for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) { | 513 | for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) { |
366 | if (intel_stolen_ids[i].device == device) { | 514 | if (intel_stolen_ids[i].device == device) { |
367 | stolen_size_fn stolen_size = | 515 | const struct intel_stolen_funcs *stolen_funcs = |
368 | (stolen_size_fn)intel_stolen_ids[i].driver_data; | 516 | (const struct intel_stolen_funcs *)intel_stolen_ids[i].driver_data; |
369 | size = stolen_size(num, slot, func); | 517 | size = stolen_funcs->size(num, slot, func); |
370 | start = intel_stolen_base(num, slot, func); | 518 | start = stolen_funcs->base(num, slot, func, size); |
371 | if (size && start) { | 519 | if (size && start) { |
520 | printk(KERN_INFO "Reserving Intel graphics stolen memory at 0x%x-0x%x\n", | ||
521 | start, start + (u32)size - 1); | ||
372 | /* Mark this space as reserved */ | 522 | /* Mark this space as reserved */ |
373 | e820_add_region(start, size, E820_RESERVED); | 523 | e820_add_region(start, size, E820_RESERVED); |
374 | sanitize_e820_map(e820.map, | 524 | sanitize_e820_map(e820.map, |
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d4bdd253fea7..52819e816f87 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -77,8 +77,7 @@ within(unsigned long addr, unsigned long start, unsigned long end) | |||
77 | return addr >= start && addr < end; | 77 | return addr >= start && addr < end; |
78 | } | 78 | } |
79 | 79 | ||
80 | static int | 80 | static unsigned long text_ip_addr(unsigned long ip) |
81 | do_ftrace_mod_code(unsigned long ip, const void *new_code) | ||
82 | { | 81 | { |
83 | /* | 82 | /* |
84 | * On x86_64, kernel text mappings are mapped read-only with | 83 | * On x86_64, kernel text mappings are mapped read-only with |
@@ -91,7 +90,7 @@ do_ftrace_mod_code(unsigned long ip, const void *new_code) | |||
91 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | 90 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) |
92 | ip = (unsigned long)__va(__pa_symbol(ip)); | 91 | ip = (unsigned long)__va(__pa_symbol(ip)); |
93 | 92 | ||
94 | return probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE); | 93 | return ip; |
95 | } | 94 | } |
96 | 95 | ||
97 | static const unsigned char *ftrace_nop_replace(void) | 96 | static const unsigned char *ftrace_nop_replace(void) |
@@ -123,8 +122,10 @@ ftrace_modify_code_direct(unsigned long ip, unsigned const char *old_code, | |||
123 | if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) | 122 | if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) |
124 | return -EINVAL; | 123 | return -EINVAL; |
125 | 124 | ||
125 | ip = text_ip_addr(ip); | ||
126 | |||
126 | /* replace the text with the new text */ | 127 | /* replace the text with the new text */ |
127 | if (do_ftrace_mod_code(ip, new_code)) | 128 | if (probe_kernel_write((void *)ip, new_code, MCOUNT_INSN_SIZE)) |
128 | return -EPERM; | 129 | return -EPERM; |
129 | 130 | ||
130 | sync_core(); | 131 | sync_core(); |
@@ -221,37 +222,51 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, | |||
221 | return -EINVAL; | 222 | return -EINVAL; |
222 | } | 223 | } |
223 | 224 | ||
224 | int ftrace_update_ftrace_func(ftrace_func_t func) | 225 | static unsigned long ftrace_update_func; |
226 | |||
227 | static int update_ftrace_func(unsigned long ip, void *new) | ||
225 | { | 228 | { |
226 | unsigned long ip = (unsigned long)(&ftrace_call); | 229 | unsigned char old[MCOUNT_INSN_SIZE]; |
227 | unsigned char old[MCOUNT_INSN_SIZE], *new; | ||
228 | int ret; | 230 | int ret; |
229 | 231 | ||
230 | memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE); | 232 | memcpy(old, (void *)ip, MCOUNT_INSN_SIZE); |
231 | new = ftrace_call_replace(ip, (unsigned long)func); | 233 | |
234 | ftrace_update_func = ip; | ||
235 | /* Make sure the breakpoints see the ftrace_update_func update */ | ||
236 | smp_wmb(); | ||
232 | 237 | ||
233 | /* See comment above by declaration of modifying_ftrace_code */ | 238 | /* See comment above by declaration of modifying_ftrace_code */ |
234 | atomic_inc(&modifying_ftrace_code); | 239 | atomic_inc(&modifying_ftrace_code); |
235 | 240 | ||
236 | ret = ftrace_modify_code(ip, old, new); | 241 | ret = ftrace_modify_code(ip, old, new); |
237 | 242 | ||
243 | atomic_dec(&modifying_ftrace_code); | ||
244 | |||
245 | return ret; | ||
246 | } | ||
247 | |||
248 | int ftrace_update_ftrace_func(ftrace_func_t func) | ||
249 | { | ||
250 | unsigned long ip = (unsigned long)(&ftrace_call); | ||
251 | unsigned char *new; | ||
252 | int ret; | ||
253 | |||
254 | new = ftrace_call_replace(ip, (unsigned long)func); | ||
255 | ret = update_ftrace_func(ip, new); | ||
256 | |||
238 | /* Also update the regs callback function */ | 257 | /* Also update the regs callback function */ |
239 | if (!ret) { | 258 | if (!ret) { |
240 | ip = (unsigned long)(&ftrace_regs_call); | 259 | ip = (unsigned long)(&ftrace_regs_call); |
241 | memcpy(old, &ftrace_regs_call, MCOUNT_INSN_SIZE); | ||
242 | new = ftrace_call_replace(ip, (unsigned long)func); | 260 | new = ftrace_call_replace(ip, (unsigned long)func); |
243 | ret = ftrace_modify_code(ip, old, new); | 261 | ret = update_ftrace_func(ip, new); |
244 | } | 262 | } |
245 | 263 | ||
246 | atomic_dec(&modifying_ftrace_code); | ||
247 | |||
248 | return ret; | 264 | return ret; |
249 | } | 265 | } |
250 | 266 | ||
251 | static int is_ftrace_caller(unsigned long ip) | 267 | static int is_ftrace_caller(unsigned long ip) |
252 | { | 268 | { |
253 | if (ip == (unsigned long)(&ftrace_call) || | 269 | if (ip == ftrace_update_func) |
254 | ip == (unsigned long)(&ftrace_regs_call)) | ||
255 | return 1; | 270 | return 1; |
256 | 271 | ||
257 | return 0; | 272 | return 0; |
@@ -293,7 +308,10 @@ static int ftrace_write(unsigned long ip, const char *val, int size) | |||
293 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) | 308 | if (within(ip, (unsigned long)_text, (unsigned long)_etext)) |
294 | ip = (unsigned long)__va(__pa_symbol(ip)); | 309 | ip = (unsigned long)__va(__pa_symbol(ip)); |
295 | 310 | ||
296 | return probe_kernel_write((void *)ip, val, size); | 311 | if (probe_kernel_write((void *)ip, val, size)) |
312 | return -EPERM; | ||
313 | |||
314 | return 0; | ||
297 | } | 315 | } |
298 | 316 | ||
299 | static int add_break(unsigned long ip, const char *old) | 317 | static int add_break(unsigned long ip, const char *old) |
@@ -308,10 +326,7 @@ static int add_break(unsigned long ip, const char *old) | |||
308 | if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) | 326 | if (memcmp(replaced, old, MCOUNT_INSN_SIZE) != 0) |
309 | return -EINVAL; | 327 | return -EINVAL; |
310 | 328 | ||
311 | if (ftrace_write(ip, &brk, 1)) | 329 | return ftrace_write(ip, &brk, 1); |
312 | return -EPERM; | ||
313 | |||
314 | return 0; | ||
315 | } | 330 | } |
316 | 331 | ||
317 | static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) | 332 | static int add_brk_on_call(struct dyn_ftrace *rec, unsigned long addr) |
@@ -410,7 +425,7 @@ static int remove_breakpoint(struct dyn_ftrace *rec) | |||
410 | 425 | ||
411 | /* If this does not have a breakpoint, we are done */ | 426 | /* If this does not have a breakpoint, we are done */ |
412 | if (ins[0] != brk) | 427 | if (ins[0] != brk) |
413 | return -1; | 428 | return 0; |
414 | 429 | ||
415 | nop = ftrace_nop_replace(); | 430 | nop = ftrace_nop_replace(); |
416 | 431 | ||
@@ -440,7 +455,7 @@ static int remove_breakpoint(struct dyn_ftrace *rec) | |||
440 | } | 455 | } |
441 | 456 | ||
442 | update: | 457 | update: |
443 | return probe_kernel_write((void *)ip, &nop[0], 1); | 458 | return ftrace_write(ip, nop, 1); |
444 | } | 459 | } |
445 | 460 | ||
446 | static int add_update_code(unsigned long ip, unsigned const char *new) | 461 | static int add_update_code(unsigned long ip, unsigned const char *new) |
@@ -448,9 +463,7 @@ static int add_update_code(unsigned long ip, unsigned const char *new) | |||
448 | /* skip breakpoint */ | 463 | /* skip breakpoint */ |
449 | ip++; | 464 | ip++; |
450 | new++; | 465 | new++; |
451 | if (ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1)) | 466 | return ftrace_write(ip, new, MCOUNT_INSN_SIZE - 1); |
452 | return -EPERM; | ||
453 | return 0; | ||
454 | } | 467 | } |
455 | 468 | ||
456 | static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) | 469 | static int add_update_call(struct dyn_ftrace *rec, unsigned long addr) |
@@ -505,10 +518,7 @@ static int finish_update_call(struct dyn_ftrace *rec, unsigned long addr) | |||
505 | 518 | ||
506 | new = ftrace_call_replace(ip, addr); | 519 | new = ftrace_call_replace(ip, addr); |
507 | 520 | ||
508 | if (ftrace_write(ip, new, 1)) | 521 | return ftrace_write(ip, new, 1); |
509 | return -EPERM; | ||
510 | |||
511 | return 0; | ||
512 | } | 522 | } |
513 | 523 | ||
514 | static int finish_update_nop(struct dyn_ftrace *rec) | 524 | static int finish_update_nop(struct dyn_ftrace *rec) |
@@ -518,9 +528,7 @@ static int finish_update_nop(struct dyn_ftrace *rec) | |||
518 | 528 | ||
519 | new = ftrace_nop_replace(); | 529 | new = ftrace_nop_replace(); |
520 | 530 | ||
521 | if (ftrace_write(ip, new, 1)) | 531 | return ftrace_write(ip, new, 1); |
522 | return -EPERM; | ||
523 | return 0; | ||
524 | } | 532 | } |
525 | 533 | ||
526 | static int finish_update(struct dyn_ftrace *rec, int enable) | 534 | static int finish_update(struct dyn_ftrace *rec, int enable) |
@@ -617,8 +625,14 @@ void ftrace_replace_code(int enable) | |||
617 | printk(KERN_WARNING "Failed on %s (%d):\n", report, count); | 625 | printk(KERN_WARNING "Failed on %s (%d):\n", report, count); |
618 | for_ftrace_rec_iter(iter) { | 626 | for_ftrace_rec_iter(iter) { |
619 | rec = ftrace_rec_iter_record(iter); | 627 | rec = ftrace_rec_iter_record(iter); |
620 | remove_breakpoint(rec); | 628 | /* |
629 | * Breakpoints are handled only when this function is in | ||
630 | * progress. The system could not work with them. | ||
631 | */ | ||
632 | if (remove_breakpoint(rec)) | ||
633 | BUG(); | ||
621 | } | 634 | } |
635 | run_sync(); | ||
622 | } | 636 | } |
623 | 637 | ||
624 | static int | 638 | static int |
@@ -640,16 +654,19 @@ ftrace_modify_code(unsigned long ip, unsigned const char *old_code, | |||
640 | run_sync(); | 654 | run_sync(); |
641 | 655 | ||
642 | ret = ftrace_write(ip, new_code, 1); | 656 | ret = ftrace_write(ip, new_code, 1); |
643 | if (ret) { | 657 | /* |
644 | ret = -EPERM; | 658 | * The breakpoint is handled only when this function is in progress. |
645 | goto out; | 659 | * The system could not work if we could not remove it. |
646 | } | 660 | */ |
647 | run_sync(); | 661 | BUG_ON(ret); |
648 | out: | 662 | out: |
663 | run_sync(); | ||
649 | return ret; | 664 | return ret; |
650 | 665 | ||
651 | fail_update: | 666 | fail_update: |
652 | probe_kernel_write((void *)ip, &old_code[0], 1); | 667 | /* Also here the system could not work with the breakpoint */ |
668 | if (ftrace_write(ip, old_code, 1)) | ||
669 | BUG(); | ||
653 | goto out; | 670 | goto out; |
654 | } | 671 | } |
655 | 672 | ||
@@ -663,11 +680,8 @@ void arch_ftrace_update_code(int command) | |||
663 | atomic_dec(&modifying_ftrace_code); | 680 | atomic_dec(&modifying_ftrace_code); |
664 | } | 681 | } |
665 | 682 | ||
666 | int __init ftrace_dyn_arch_init(void *data) | 683 | int __init ftrace_dyn_arch_init(void) |
667 | { | 684 | { |
668 | /* The return code is retured via data */ | ||
669 | *(unsigned long *)data = 0; | ||
670 | |||
671 | return 0; | 685 | return 0; |
672 | } | 686 | } |
673 | #endif | 687 | #endif |
@@ -677,45 +691,41 @@ int __init ftrace_dyn_arch_init(void *data) | |||
677 | #ifdef CONFIG_DYNAMIC_FTRACE | 691 | #ifdef CONFIG_DYNAMIC_FTRACE |
678 | extern void ftrace_graph_call(void); | 692 | extern void ftrace_graph_call(void); |
679 | 693 | ||
680 | static int ftrace_mod_jmp(unsigned long ip, | 694 | static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr) |
681 | int old_offset, int new_offset) | ||
682 | { | 695 | { |
683 | unsigned char code[MCOUNT_INSN_SIZE]; | 696 | static union ftrace_code_union calc; |
684 | 697 | ||
685 | if (probe_kernel_read(code, (void *)ip, MCOUNT_INSN_SIZE)) | 698 | /* Jmp not a call (ignore the .e8) */ |
686 | return -EFAULT; | 699 | calc.e8 = 0xe9; |
700 | calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr); | ||
687 | 701 | ||
688 | if (code[0] != 0xe9 || old_offset != *(int *)(&code[1])) | 702 | /* |
689 | return -EINVAL; | 703 | * ftrace external locks synchronize the access to the static variable. |
704 | */ | ||
705 | return calc.code; | ||
706 | } | ||
690 | 707 | ||
691 | *(int *)(&code[1]) = new_offset; | 708 | static int ftrace_mod_jmp(unsigned long ip, void *func) |
709 | { | ||
710 | unsigned char *new; | ||
692 | 711 | ||
693 | if (do_ftrace_mod_code(ip, &code)) | 712 | new = ftrace_jmp_replace(ip, (unsigned long)func); |
694 | return -EPERM; | ||
695 | 713 | ||
696 | return 0; | 714 | return update_ftrace_func(ip, new); |
697 | } | 715 | } |
698 | 716 | ||
699 | int ftrace_enable_ftrace_graph_caller(void) | 717 | int ftrace_enable_ftrace_graph_caller(void) |
700 | { | 718 | { |
701 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | 719 | unsigned long ip = (unsigned long)(&ftrace_graph_call); |
702 | int old_offset, new_offset; | ||
703 | |||
704 | old_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
705 | new_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
706 | 720 | ||
707 | return ftrace_mod_jmp(ip, old_offset, new_offset); | 721 | return ftrace_mod_jmp(ip, &ftrace_graph_caller); |
708 | } | 722 | } |
709 | 723 | ||
710 | int ftrace_disable_ftrace_graph_caller(void) | 724 | int ftrace_disable_ftrace_graph_caller(void) |
711 | { | 725 | { |
712 | unsigned long ip = (unsigned long)(&ftrace_graph_call); | 726 | unsigned long ip = (unsigned long)(&ftrace_graph_call); |
713 | int old_offset, new_offset; | ||
714 | |||
715 | old_offset = (unsigned long)(&ftrace_graph_caller) - (ip + MCOUNT_INSN_SIZE); | ||
716 | new_offset = (unsigned long)(&ftrace_stub) - (ip + MCOUNT_INSN_SIZE); | ||
717 | 727 | ||
718 | return ftrace_mod_jmp(ip, old_offset, new_offset); | 728 | return ftrace_mod_jmp(ip, &ftrace_stub); |
719 | } | 729 | } |
720 | 730 | ||
721 | #endif /* !CONFIG_DYNAMIC_FTRACE */ | 731 | #endif /* !CONFIG_DYNAMIC_FTRACE */ |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 81ba27679f18..f36bd42d6f0c 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -544,6 +544,10 @@ ENDPROC(early_idt_handlers) | |||
544 | /* This is global to keep gas from relaxing the jumps */ | 544 | /* This is global to keep gas from relaxing the jumps */ |
545 | ENTRY(early_idt_handler) | 545 | ENTRY(early_idt_handler) |
546 | cld | 546 | cld |
547 | |||
548 | cmpl $2,(%esp) # X86_TRAP_NMI | ||
549 | je is_nmi # Ignore NMI | ||
550 | |||
547 | cmpl $2,%ss:early_recursion_flag | 551 | cmpl $2,%ss:early_recursion_flag |
548 | je hlt_loop | 552 | je hlt_loop |
549 | incl %ss:early_recursion_flag | 553 | incl %ss:early_recursion_flag |
@@ -594,8 +598,9 @@ ex_entry: | |||
594 | pop %edx | 598 | pop %edx |
595 | pop %ecx | 599 | pop %ecx |
596 | pop %eax | 600 | pop %eax |
597 | addl $8,%esp /* drop vector number and error code */ | ||
598 | decl %ss:early_recursion_flag | 601 | decl %ss:early_recursion_flag |
602 | is_nmi: | ||
603 | addl $8,%esp /* drop vector number and error code */ | ||
599 | iret | 604 | iret |
600 | ENDPROC(early_idt_handler) | 605 | ENDPROC(early_idt_handler) |
601 | 606 | ||
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index e1aabdb314c8..a468c0a65c42 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S | |||
@@ -343,6 +343,9 @@ early_idt_handlers: | |||
343 | ENTRY(early_idt_handler) | 343 | ENTRY(early_idt_handler) |
344 | cld | 344 | cld |
345 | 345 | ||
346 | cmpl $2,(%rsp) # X86_TRAP_NMI | ||
347 | je is_nmi # Ignore NMI | ||
348 | |||
346 | cmpl $2,early_recursion_flag(%rip) | 349 | cmpl $2,early_recursion_flag(%rip) |
347 | jz 1f | 350 | jz 1f |
348 | incl early_recursion_flag(%rip) | 351 | incl early_recursion_flag(%rip) |
@@ -405,8 +408,9 @@ ENTRY(early_idt_handler) | |||
405 | popq %rdx | 408 | popq %rdx |
406 | popq %rcx | 409 | popq %rcx |
407 | popq %rax | 410 | popq %rax |
408 | addq $16,%rsp # drop vector number and error code | ||
409 | decl early_recursion_flag(%rip) | 411 | decl early_recursion_flag(%rip) |
412 | is_nmi: | ||
413 | addq $16,%rsp # drop vector number and error code | ||
410 | INTERRUPT_RETURN | 414 | INTERRUPT_RETURN |
411 | ENDPROC(early_idt_handler) | 415 | ENDPROC(early_idt_handler) |
412 | 416 | ||
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index da85a8e830a1..8d80ae011603 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c | |||
@@ -521,7 +521,7 @@ static int hpet_setup_irq(struct hpet_dev *dev) | |||
521 | { | 521 | { |
522 | 522 | ||
523 | if (request_irq(dev->irq, hpet_interrupt_handler, | 523 | if (request_irq(dev->irq, hpet_interrupt_handler, |
524 | IRQF_TIMER | IRQF_DISABLED | IRQF_NOBALANCING, | 524 | IRQF_TIMER | IRQF_NOBALANCING, |
525 | dev->name, dev)) | 525 | dev->name, dev)) |
526 | return -1; | 526 | return -1; |
527 | 527 | ||
@@ -699,7 +699,7 @@ static int hpet_cpuhp_notify(struct notifier_block *n, | |||
699 | /* FIXME: add schedule_work_on() */ | 699 | /* FIXME: add schedule_work_on() */ |
700 | schedule_delayed_work_on(cpu, &work.work, 0); | 700 | schedule_delayed_work_on(cpu, &work.work, 0); |
701 | wait_for_completion(&work.complete); | 701 | wait_for_completion(&work.complete); |
702 | destroy_timer_on_stack(&work.work.timer); | 702 | destroy_delayed_work_on_stack(&work.work); |
703 | break; | 703 | break; |
704 | case CPU_DEAD: | 704 | case CPU_DEAD: |
705 | if (hdev) { | 705 | if (hdev) { |
@@ -752,9 +752,7 @@ static struct clocksource clocksource_hpet = { | |||
752 | .mask = HPET_MASK, | 752 | .mask = HPET_MASK, |
753 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | 753 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, |
754 | .resume = hpet_resume_counter, | 754 | .resume = hpet_resume_counter, |
755 | #ifdef CONFIG_X86_64 | ||
756 | .archdata = { .vclock_mode = VCLOCK_HPET }, | 755 | .archdata = { .vclock_mode = VCLOCK_HPET }, |
757 | #endif | ||
758 | }; | 756 | }; |
759 | 757 | ||
760 | static int hpet_clocksource_register(void) | 758 | static int hpet_clocksource_register(void) |
@@ -943,12 +941,14 @@ static __init int hpet_late_init(void) | |||
943 | if (boot_cpu_has(X86_FEATURE_ARAT)) | 941 | if (boot_cpu_has(X86_FEATURE_ARAT)) |
944 | return 0; | 942 | return 0; |
945 | 943 | ||
944 | cpu_notifier_register_begin(); | ||
946 | for_each_online_cpu(cpu) { | 945 | for_each_online_cpu(cpu) { |
947 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); | 946 | hpet_cpuhp_notify(NULL, CPU_ONLINE, (void *)(long)cpu); |
948 | } | 947 | } |
949 | 948 | ||
950 | /* This notifier should be called after workqueue is ready */ | 949 | /* This notifier should be called after workqueue is ready */ |
951 | hotcpu_notifier(hpet_cpuhp_notify, -20); | 950 | __hotcpu_notifier(hpet_cpuhp_notify, -20); |
951 | cpu_notifier_register_done(); | ||
952 | 952 | ||
953 | return 0; | 953 | return 0; |
954 | } | 954 | } |
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index f66ff162dce8..a67b47c31314 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/kernel.h> | 38 | #include <linux/kernel.h> |
39 | #include <linux/module.h> | 39 | #include <linux/module.h> |
40 | #include <linux/sched.h> | 40 | #include <linux/sched.h> |
41 | #include <linux/init.h> | ||
42 | #include <linux/smp.h> | 41 | #include <linux/smp.h> |
43 | 42 | ||
44 | #include <asm/hw_breakpoint.h> | 43 | #include <asm/hw_breakpoint.h> |
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index e8368c6dd2a2..d5dd80814419 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c | |||
@@ -86,10 +86,19 @@ EXPORT_SYMBOL(__kernel_fpu_begin); | |||
86 | 86 | ||
87 | void __kernel_fpu_end(void) | 87 | void __kernel_fpu_end(void) |
88 | { | 88 | { |
89 | if (use_eager_fpu()) | 89 | if (use_eager_fpu()) { |
90 | math_state_restore(); | 90 | /* |
91 | else | 91 | * For eager fpu, most the time, tsk_used_math() is true. |
92 | * Restore the user math as we are done with the kernel usage. | ||
93 | * At few instances during thread exit, signal handling etc, | ||
94 | * tsk_used_math() is false. Those few places will take proper | ||
95 | * actions, so we don't need to restore the math here. | ||
96 | */ | ||
97 | if (likely(tsk_used_math(current))) | ||
98 | math_state_restore(); | ||
99 | } else { | ||
92 | stts(); | 100 | stts(); |
101 | } | ||
93 | } | 102 | } |
94 | EXPORT_SYMBOL(__kernel_fpu_end); | 103 | EXPORT_SYMBOL(__kernel_fpu_end); |
95 | 104 | ||
diff --git a/arch/x86/kernel/iosf_mbi.c b/arch/x86/kernel/iosf_mbi.c new file mode 100644 index 000000000000..c3aae6672843 --- /dev/null +++ b/arch/x86/kernel/iosf_mbi.c | |||
@@ -0,0 +1,226 @@ | |||
1 | /* | ||
2 | * IOSF-SB MailBox Interface Driver | ||
3 | * Copyright (c) 2013, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * | ||
15 | * The IOSF-SB is a fabric bus available on Atom based SOC's that uses a | ||
16 | * mailbox interface (MBI) to communicate with mutiple devices. This | ||
17 | * driver implements access to this interface for those platforms that can | ||
18 | * enumerate the device using PCI. | ||
19 | */ | ||
20 | |||
21 | #include <linux/module.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/spinlock.h> | ||
24 | #include <linux/pci.h> | ||
25 | |||
26 | #include <asm/iosf_mbi.h> | ||
27 | |||
28 | static DEFINE_SPINLOCK(iosf_mbi_lock); | ||
29 | |||
30 | static inline u32 iosf_mbi_form_mcr(u8 op, u8 port, u8 offset) | ||
31 | { | ||
32 | return (op << 24) | (port << 16) | (offset << 8) | MBI_ENABLE; | ||
33 | } | ||
34 | |||
35 | static struct pci_dev *mbi_pdev; /* one mbi device */ | ||
36 | |||
37 | static int iosf_mbi_pci_read_mdr(u32 mcrx, u32 mcr, u32 *mdr) | ||
38 | { | ||
39 | int result; | ||
40 | |||
41 | if (!mbi_pdev) | ||
42 | return -ENODEV; | ||
43 | |||
44 | if (mcrx) { | ||
45 | result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET, | ||
46 | mcrx); | ||
47 | if (result < 0) | ||
48 | goto fail_read; | ||
49 | } | ||
50 | |||
51 | result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr); | ||
52 | if (result < 0) | ||
53 | goto fail_read; | ||
54 | |||
55 | result = pci_read_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr); | ||
56 | if (result < 0) | ||
57 | goto fail_read; | ||
58 | |||
59 | return 0; | ||
60 | |||
61 | fail_read: | ||
62 | dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); | ||
63 | return result; | ||
64 | } | ||
65 | |||
66 | static int iosf_mbi_pci_write_mdr(u32 mcrx, u32 mcr, u32 mdr) | ||
67 | { | ||
68 | int result; | ||
69 | |||
70 | if (!mbi_pdev) | ||
71 | return -ENODEV; | ||
72 | |||
73 | result = pci_write_config_dword(mbi_pdev, MBI_MDR_OFFSET, mdr); | ||
74 | if (result < 0) | ||
75 | goto fail_write; | ||
76 | |||
77 | if (mcrx) { | ||
78 | result = pci_write_config_dword(mbi_pdev, MBI_MCRX_OFFSET, | ||
79 | mcrx); | ||
80 | if (result < 0) | ||
81 | goto fail_write; | ||
82 | } | ||
83 | |||
84 | result = pci_write_config_dword(mbi_pdev, MBI_MCR_OFFSET, mcr); | ||
85 | if (result < 0) | ||
86 | goto fail_write; | ||
87 | |||
88 | return 0; | ||
89 | |||
90 | fail_write: | ||
91 | dev_err(&mbi_pdev->dev, "PCI config access failed with %d\n", result); | ||
92 | return result; | ||
93 | } | ||
94 | |||
95 | int iosf_mbi_read(u8 port, u8 opcode, u32 offset, u32 *mdr) | ||
96 | { | ||
97 | u32 mcr, mcrx; | ||
98 | unsigned long flags; | ||
99 | int ret; | ||
100 | |||
101 | /*Access to the GFX unit is handled by GPU code */ | ||
102 | if (port == BT_MBI_UNIT_GFX) { | ||
103 | WARN_ON(1); | ||
104 | return -EPERM; | ||
105 | } | ||
106 | |||
107 | mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); | ||
108 | mcrx = offset & MBI_MASK_HI; | ||
109 | |||
110 | spin_lock_irqsave(&iosf_mbi_lock, flags); | ||
111 | ret = iosf_mbi_pci_read_mdr(mcrx, mcr, mdr); | ||
112 | spin_unlock_irqrestore(&iosf_mbi_lock, flags); | ||
113 | |||
114 | return ret; | ||
115 | } | ||
116 | EXPORT_SYMBOL(iosf_mbi_read); | ||
117 | |||
118 | int iosf_mbi_write(u8 port, u8 opcode, u32 offset, u32 mdr) | ||
119 | { | ||
120 | u32 mcr, mcrx; | ||
121 | unsigned long flags; | ||
122 | int ret; | ||
123 | |||
124 | /*Access to the GFX unit is handled by GPU code */ | ||
125 | if (port == BT_MBI_UNIT_GFX) { | ||
126 | WARN_ON(1); | ||
127 | return -EPERM; | ||
128 | } | ||
129 | |||
130 | mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); | ||
131 | mcrx = offset & MBI_MASK_HI; | ||
132 | |||
133 | spin_lock_irqsave(&iosf_mbi_lock, flags); | ||
134 | ret = iosf_mbi_pci_write_mdr(mcrx, mcr, mdr); | ||
135 | spin_unlock_irqrestore(&iosf_mbi_lock, flags); | ||
136 | |||
137 | return ret; | ||
138 | } | ||
139 | EXPORT_SYMBOL(iosf_mbi_write); | ||
140 | |||
141 | int iosf_mbi_modify(u8 port, u8 opcode, u32 offset, u32 mdr, u32 mask) | ||
142 | { | ||
143 | u32 mcr, mcrx; | ||
144 | u32 value; | ||
145 | unsigned long flags; | ||
146 | int ret; | ||
147 | |||
148 | /*Access to the GFX unit is handled by GPU code */ | ||
149 | if (port == BT_MBI_UNIT_GFX) { | ||
150 | WARN_ON(1); | ||
151 | return -EPERM; | ||
152 | } | ||
153 | |||
154 | mcr = iosf_mbi_form_mcr(opcode, port, offset & MBI_MASK_LO); | ||
155 | mcrx = offset & MBI_MASK_HI; | ||
156 | |||
157 | spin_lock_irqsave(&iosf_mbi_lock, flags); | ||
158 | |||
159 | /* Read current mdr value */ | ||
160 | ret = iosf_mbi_pci_read_mdr(mcrx, mcr & MBI_RD_MASK, &value); | ||
161 | if (ret < 0) { | ||
162 | spin_unlock_irqrestore(&iosf_mbi_lock, flags); | ||
163 | return ret; | ||
164 | } | ||
165 | |||
166 | /* Apply mask */ | ||
167 | value &= ~mask; | ||
168 | mdr &= mask; | ||
169 | value |= mdr; | ||
170 | |||
171 | /* Write back */ | ||
172 | ret = iosf_mbi_pci_write_mdr(mcrx, mcr | MBI_WR_MASK, value); | ||
173 | |||
174 | spin_unlock_irqrestore(&iosf_mbi_lock, flags); | ||
175 | |||
176 | return ret; | ||
177 | } | ||
178 | EXPORT_SYMBOL(iosf_mbi_modify); | ||
179 | |||
180 | static int iosf_mbi_probe(struct pci_dev *pdev, | ||
181 | const struct pci_device_id *unused) | ||
182 | { | ||
183 | int ret; | ||
184 | |||
185 | ret = pci_enable_device(pdev); | ||
186 | if (ret < 0) { | ||
187 | dev_err(&pdev->dev, "error: could not enable device\n"); | ||
188 | return ret; | ||
189 | } | ||
190 | |||
191 | mbi_pdev = pci_dev_get(pdev); | ||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | static DEFINE_PCI_DEVICE_TABLE(iosf_mbi_pci_ids) = { | ||
196 | { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0F00) }, | ||
197 | { 0, }, | ||
198 | }; | ||
199 | MODULE_DEVICE_TABLE(pci, iosf_mbi_pci_ids); | ||
200 | |||
201 | static struct pci_driver iosf_mbi_pci_driver = { | ||
202 | .name = "iosf_mbi_pci", | ||
203 | .probe = iosf_mbi_probe, | ||
204 | .id_table = iosf_mbi_pci_ids, | ||
205 | }; | ||
206 | |||
207 | static int __init iosf_mbi_init(void) | ||
208 | { | ||
209 | return pci_register_driver(&iosf_mbi_pci_driver); | ||
210 | } | ||
211 | |||
212 | static void __exit iosf_mbi_exit(void) | ||
213 | { | ||
214 | pci_unregister_driver(&iosf_mbi_pci_driver); | ||
215 | if (mbi_pdev) { | ||
216 | pci_dev_put(mbi_pdev); | ||
217 | mbi_pdev = NULL; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | module_init(iosf_mbi_init); | ||
222 | module_exit(iosf_mbi_exit); | ||
223 | |||
224 | MODULE_AUTHOR("David E. Box <david.e.box@linux.intel.com>"); | ||
225 | MODULE_DESCRIPTION("IOSF Mailbox Interface accessor"); | ||
226 | MODULE_LICENSE("GPL v2"); | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 22d0687e7fda..283a76a9cc40 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -125,6 +125,12 @@ int arch_show_interrupts(struct seq_file *p, int prec) | |||
125 | seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); | 125 | seq_printf(p, "%10u ", per_cpu(mce_poll_count, j)); |
126 | seq_printf(p, " Machine check polls\n"); | 126 | seq_printf(p, " Machine check polls\n"); |
127 | #endif | 127 | #endif |
128 | #if IS_ENABLED(CONFIG_HYPERV) || defined(CONFIG_XEN) | ||
129 | seq_printf(p, "%*s: ", prec, "THR"); | ||
130 | for_each_online_cpu(j) | ||
131 | seq_printf(p, "%10u ", irq_stats(j)->irq_hv_callback_count); | ||
132 | seq_printf(p, " Hypervisor callback interrupts\n"); | ||
133 | #endif | ||
128 | seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); | 134 | seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); |
129 | #if defined(CONFIG_X86_IO_APIC) | 135 | #if defined(CONFIG_X86_IO_APIC) |
130 | seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); | 136 | seq_printf(p, "%*s: %10u\n", prec, "MIS", atomic_read(&irq_mis_count)); |
@@ -193,9 +199,13 @@ __visible unsigned int __irq_entry do_IRQ(struct pt_regs *regs) | |||
193 | if (!handle_irq(irq, regs)) { | 199 | if (!handle_irq(irq, regs)) { |
194 | ack_APIC_irq(); | 200 | ack_APIC_irq(); |
195 | 201 | ||
196 | if (printk_ratelimit()) | 202 | if (irq != VECTOR_RETRIGGERED) { |
197 | pr_emerg("%s: %d.%d No irq handler for vector (irq %d)\n", | 203 | pr_emerg_ratelimited("%s: %d.%d No irq handler for vector (irq %d)\n", |
198 | __func__, smp_processor_id(), vector, irq); | 204 | __func__, smp_processor_id(), |
205 | vector, irq); | ||
206 | } else { | ||
207 | __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); | ||
208 | } | ||
199 | } | 209 | } |
200 | 210 | ||
201 | irq_exit(); | 211 | irq_exit(); |
@@ -262,6 +272,83 @@ __visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) | |||
262 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); | 272 | EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); |
263 | 273 | ||
264 | #ifdef CONFIG_HOTPLUG_CPU | 274 | #ifdef CONFIG_HOTPLUG_CPU |
275 | |||
276 | /* These two declarations are only used in check_irq_vectors_for_cpu_disable() | ||
277 | * below, which is protected by stop_machine(). Putting them on the stack | ||
278 | * results in a stack frame overflow. Dynamically allocating could result in a | ||
279 | * failure so declare these two cpumasks as global. | ||
280 | */ | ||
281 | static struct cpumask affinity_new, online_new; | ||
282 | |||
283 | /* | ||
284 | * This cpu is going to be removed and its vectors migrated to the remaining | ||
285 | * online cpus. Check to see if there are enough vectors in the remaining cpus. | ||
286 | * This function is protected by stop_machine(). | ||
287 | */ | ||
288 | int check_irq_vectors_for_cpu_disable(void) | ||
289 | { | ||
290 | int irq, cpu; | ||
291 | unsigned int this_cpu, vector, this_count, count; | ||
292 | struct irq_desc *desc; | ||
293 | struct irq_data *data; | ||
294 | |||
295 | this_cpu = smp_processor_id(); | ||
296 | cpumask_copy(&online_new, cpu_online_mask); | ||
297 | cpu_clear(this_cpu, online_new); | ||
298 | |||
299 | this_count = 0; | ||
300 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | ||
301 | irq = __this_cpu_read(vector_irq[vector]); | ||
302 | if (irq >= 0) { | ||
303 | desc = irq_to_desc(irq); | ||
304 | data = irq_desc_get_irq_data(desc); | ||
305 | cpumask_copy(&affinity_new, data->affinity); | ||
306 | cpu_clear(this_cpu, affinity_new); | ||
307 | |||
308 | /* Do not count inactive or per-cpu irqs. */ | ||
309 | if (!irq_has_action(irq) || irqd_is_per_cpu(data)) | ||
310 | continue; | ||
311 | |||
312 | /* | ||
313 | * A single irq may be mapped to multiple | ||
314 | * cpu's vector_irq[] (for example IOAPIC cluster | ||
315 | * mode). In this case we have two | ||
316 | * possibilities: | ||
317 | * | ||
318 | * 1) the resulting affinity mask is empty; that is | ||
319 | * this the down'd cpu is the last cpu in the irq's | ||
320 | * affinity mask, or | ||
321 | * | ||
322 | * 2) the resulting affinity mask is no longer | ||
323 | * a subset of the online cpus but the affinity | ||
324 | * mask is not zero; that is the down'd cpu is the | ||
325 | * last online cpu in a user set affinity mask. | ||
326 | */ | ||
327 | if (cpumask_empty(&affinity_new) || | ||
328 | !cpumask_subset(&affinity_new, &online_new)) | ||
329 | this_count++; | ||
330 | } | ||
331 | } | ||
332 | |||
333 | count = 0; | ||
334 | for_each_online_cpu(cpu) { | ||
335 | if (cpu == this_cpu) | ||
336 | continue; | ||
337 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; | ||
338 | vector++) { | ||
339 | if (per_cpu(vector_irq, cpu)[vector] < 0) | ||
340 | count++; | ||
341 | } | ||
342 | } | ||
343 | |||
344 | if (count < this_count) { | ||
345 | pr_warn("CPU %d disable failed: CPU has %u vectors assigned and there are only %u available.\n", | ||
346 | this_cpu, this_count, count); | ||
347 | return -ERANGE; | ||
348 | } | ||
349 | return 0; | ||
350 | } | ||
351 | |||
265 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ | 352 | /* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ |
266 | void fixup_irqs(void) | 353 | void fixup_irqs(void) |
267 | { | 354 | { |
@@ -344,7 +431,7 @@ void fixup_irqs(void) | |||
344 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { | 431 | for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) { |
345 | unsigned int irr; | 432 | unsigned int irr; |
346 | 433 | ||
347 | if (__this_cpu_read(vector_irq[vector]) < 0) | 434 | if (__this_cpu_read(vector_irq[vector]) <= VECTOR_UNDEFINED) |
348 | continue; | 435 | continue; |
349 | 436 | ||
350 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); | 437 | irr = apic_read(APIC_IRR + (vector / 32 * 0x10)); |
@@ -355,11 +442,14 @@ void fixup_irqs(void) | |||
355 | data = irq_desc_get_irq_data(desc); | 442 | data = irq_desc_get_irq_data(desc); |
356 | chip = irq_data_get_irq_chip(data); | 443 | chip = irq_data_get_irq_chip(data); |
357 | raw_spin_lock(&desc->lock); | 444 | raw_spin_lock(&desc->lock); |
358 | if (chip->irq_retrigger) | 445 | if (chip->irq_retrigger) { |
359 | chip->irq_retrigger(data); | 446 | chip->irq_retrigger(data); |
447 | __this_cpu_write(vector_irq[vector], VECTOR_RETRIGGERED); | ||
448 | } | ||
360 | raw_spin_unlock(&desc->lock); | 449 | raw_spin_unlock(&desc->lock); |
361 | } | 450 | } |
362 | __this_cpu_write(vector_irq[vector], -1); | 451 | if (__this_cpu_read(vector_irq[vector]) != VECTOR_RETRIGGERED) |
452 | __this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED); | ||
363 | } | 453 | } |
364 | } | 454 | } |
365 | #endif | 455 | #endif |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index d7fcbedc9c43..63ce838e5a54 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -55,16 +55,8 @@ static inline int check_stack_overflow(void) { return 0; } | |||
55 | static inline void print_stack_overflow(void) { } | 55 | static inline void print_stack_overflow(void) { } |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | /* | 58 | DEFINE_PER_CPU(struct irq_stack *, hardirq_stack); |
59 | * per-CPU IRQ handling contexts (thread information and stack) | 59 | DEFINE_PER_CPU(struct irq_stack *, softirq_stack); |
60 | */ | ||
61 | union irq_ctx { | ||
62 | struct thread_info tinfo; | ||
63 | u32 stack[THREAD_SIZE/sizeof(u32)]; | ||
64 | } __attribute__((aligned(THREAD_SIZE))); | ||
65 | |||
66 | static DEFINE_PER_CPU(union irq_ctx *, hardirq_ctx); | ||
67 | static DEFINE_PER_CPU(union irq_ctx *, softirq_ctx); | ||
68 | 60 | ||
69 | static void call_on_stack(void *func, void *stack) | 61 | static void call_on_stack(void *func, void *stack) |
70 | { | 62 | { |
@@ -77,14 +69,26 @@ static void call_on_stack(void *func, void *stack) | |||
77 | : "memory", "cc", "edx", "ecx", "eax"); | 69 | : "memory", "cc", "edx", "ecx", "eax"); |
78 | } | 70 | } |
79 | 71 | ||
72 | /* how to get the current stack pointer from C */ | ||
73 | #define current_stack_pointer ({ \ | ||
74 | unsigned long sp; \ | ||
75 | asm("mov %%esp,%0" : "=g" (sp)); \ | ||
76 | sp; \ | ||
77 | }) | ||
78 | |||
79 | static inline void *current_stack(void) | ||
80 | { | ||
81 | return (void *)(current_stack_pointer & ~(THREAD_SIZE - 1)); | ||
82 | } | ||
83 | |||
80 | static inline int | 84 | static inline int |
81 | execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | 85 | execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) |
82 | { | 86 | { |
83 | union irq_ctx *curctx, *irqctx; | 87 | struct irq_stack *curstk, *irqstk; |
84 | u32 *isp, arg1, arg2; | 88 | u32 *isp, *prev_esp, arg1, arg2; |
85 | 89 | ||
86 | curctx = (union irq_ctx *) current_thread_info(); | 90 | curstk = (struct irq_stack *) current_stack(); |
87 | irqctx = __this_cpu_read(hardirq_ctx); | 91 | irqstk = __this_cpu_read(hardirq_stack); |
88 | 92 | ||
89 | /* | 93 | /* |
90 | * this is where we switch to the IRQ stack. However, if we are | 94 | * this is where we switch to the IRQ stack. However, if we are |
@@ -92,13 +96,14 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |||
92 | * handler) we can't do that and just have to keep using the | 96 | * handler) we can't do that and just have to keep using the |
93 | * current stack (which is the irq stack already after all) | 97 | * current stack (which is the irq stack already after all) |
94 | */ | 98 | */ |
95 | if (unlikely(curctx == irqctx)) | 99 | if (unlikely(curstk == irqstk)) |
96 | return 0; | 100 | return 0; |
97 | 101 | ||
98 | /* build the stack frame on the IRQ stack */ | 102 | isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); |
99 | isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); | 103 | |
100 | irqctx->tinfo.task = curctx->tinfo.task; | 104 | /* Save the next esp at the bottom of the stack */ |
101 | irqctx->tinfo.previous_esp = current_stack_pointer; | 105 | prev_esp = (u32 *)irqstk; |
106 | *prev_esp = current_stack_pointer; | ||
102 | 107 | ||
103 | if (unlikely(overflow)) | 108 | if (unlikely(overflow)) |
104 | call_on_stack(print_stack_overflow, isp); | 109 | call_on_stack(print_stack_overflow, isp); |
@@ -118,46 +123,40 @@ execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) | |||
118 | */ | 123 | */ |
119 | void irq_ctx_init(int cpu) | 124 | void irq_ctx_init(int cpu) |
120 | { | 125 | { |
121 | union irq_ctx *irqctx; | 126 | struct irq_stack *irqstk; |
122 | 127 | ||
123 | if (per_cpu(hardirq_ctx, cpu)) | 128 | if (per_cpu(hardirq_stack, cpu)) |
124 | return; | 129 | return; |
125 | 130 | ||
126 | irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), | 131 | irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), |
127 | THREADINFO_GFP, | 132 | THREADINFO_GFP, |
128 | THREAD_SIZE_ORDER)); | 133 | THREAD_SIZE_ORDER)); |
129 | memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); | 134 | per_cpu(hardirq_stack, cpu) = irqstk; |
130 | irqctx->tinfo.cpu = cpu; | ||
131 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | ||
132 | |||
133 | per_cpu(hardirq_ctx, cpu) = irqctx; | ||
134 | 135 | ||
135 | irqctx = page_address(alloc_pages_node(cpu_to_node(cpu), | 136 | irqstk = page_address(alloc_pages_node(cpu_to_node(cpu), |
136 | THREADINFO_GFP, | 137 | THREADINFO_GFP, |
137 | THREAD_SIZE_ORDER)); | 138 | THREAD_SIZE_ORDER)); |
138 | memset(&irqctx->tinfo, 0, sizeof(struct thread_info)); | 139 | per_cpu(softirq_stack, cpu) = irqstk; |
139 | irqctx->tinfo.cpu = cpu; | ||
140 | irqctx->tinfo.addr_limit = MAKE_MM_SEG(0); | ||
141 | |||
142 | per_cpu(softirq_ctx, cpu) = irqctx; | ||
143 | 140 | ||
144 | printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", | 141 | printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n", |
145 | cpu, per_cpu(hardirq_ctx, cpu), per_cpu(softirq_ctx, cpu)); | 142 | cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu)); |
146 | } | 143 | } |
147 | 144 | ||
148 | void do_softirq_own_stack(void) | 145 | void do_softirq_own_stack(void) |
149 | { | 146 | { |
150 | struct thread_info *curctx; | 147 | struct thread_info *curstk; |
151 | union irq_ctx *irqctx; | 148 | struct irq_stack *irqstk; |
152 | u32 *isp; | 149 | u32 *isp, *prev_esp; |
153 | 150 | ||
154 | curctx = current_thread_info(); | 151 | curstk = current_stack(); |
155 | irqctx = __this_cpu_read(softirq_ctx); | 152 | irqstk = __this_cpu_read(softirq_stack); |
156 | irqctx->tinfo.task = curctx->task; | ||
157 | irqctx->tinfo.previous_esp = current_stack_pointer; | ||
158 | 153 | ||
159 | /* build the stack frame on the softirq stack */ | 154 | /* build the stack frame on the softirq stack */ |
160 | isp = (u32 *) ((char *)irqctx + sizeof(*irqctx)); | 155 | isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); |
156 | |||
157 | /* Push the previous esp onto the stack */ | ||
158 | prev_esp = (u32 *)irqstk; | ||
159 | *prev_esp = current_stack_pointer; | ||
161 | 160 | ||
162 | call_on_stack(__do_softirq, isp); | 161 | call_on_stack(__do_softirq, isp); |
163 | } | 162 | } |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index a2a1fbc594ff..7f50156542fb 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -52,7 +52,7 @@ static struct irqaction irq2 = { | |||
52 | }; | 52 | }; |
53 | 53 | ||
54 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { | 54 | DEFINE_PER_CPU(vector_irq_t, vector_irq) = { |
55 | [0 ... NR_VECTORS - 1] = -1, | 55 | [0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED, |
56 | }; | 56 | }; |
57 | 57 | ||
58 | int vector_used_by_percpu_irq(unsigned int vector) | 58 | int vector_used_by_percpu_irq(unsigned int vector) |
@@ -60,7 +60,7 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
60 | int cpu; | 60 | int cpu; |
61 | 61 | ||
62 | for_each_online_cpu(cpu) { | 62 | for_each_online_cpu(cpu) { |
63 | if (per_cpu(vector_irq, cpu)[vector] != -1) | 63 | if (per_cpu(vector_irq, cpu)[vector] > VECTOR_UNDEFINED) |
64 | return 1; | 64 | return 1; |
65 | } | 65 | } |
66 | 66 | ||
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 836f8322960e..7ec1d5f8d283 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c | |||
@@ -39,7 +39,6 @@ | |||
39 | #include <linux/sched.h> | 39 | #include <linux/sched.h> |
40 | #include <linux/delay.h> | 40 | #include <linux/delay.h> |
41 | #include <linux/kgdb.h> | 41 | #include <linux/kgdb.h> |
42 | #include <linux/init.h> | ||
43 | #include <linux/smp.h> | 42 | #include <linux/smp.h> |
44 | #include <linux/nmi.h> | 43 | #include <linux/nmi.h> |
45 | #include <linux/hw_breakpoint.h> | 44 | #include <linux/hw_breakpoint.h> |
diff --git a/arch/x86/kernel/ksysfs.c b/arch/x86/kernel/ksysfs.c new file mode 100644 index 000000000000..c2bedaea11f7 --- /dev/null +++ b/arch/x86/kernel/ksysfs.c | |||
@@ -0,0 +1,340 @@ | |||
1 | /* | ||
2 | * Architecture specific sysfs attributes in /sys/kernel | ||
3 | * | ||
4 | * Copyright (C) 2007, Intel Corp. | ||
5 | * Huang Ying <ying.huang@intel.com> | ||
6 | * Copyright (C) 2013, 2013 Red Hat, Inc. | ||
7 | * Dave Young <dyoung@redhat.com> | ||
8 | * | ||
9 | * This file is released under the GPLv2 | ||
10 | */ | ||
11 | |||
12 | #include <linux/kobject.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/sysfs.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/stat.h> | ||
17 | #include <linux/slab.h> | ||
18 | #include <linux/mm.h> | ||
19 | |||
20 | #include <asm/io.h> | ||
21 | #include <asm/setup.h> | ||
22 | |||
23 | static ssize_t version_show(struct kobject *kobj, | ||
24 | struct kobj_attribute *attr, char *buf) | ||
25 | { | ||
26 | return sprintf(buf, "0x%04x\n", boot_params.hdr.version); | ||
27 | } | ||
28 | |||
29 | static struct kobj_attribute boot_params_version_attr = __ATTR_RO(version); | ||
30 | |||
31 | static ssize_t boot_params_data_read(struct file *fp, struct kobject *kobj, | ||
32 | struct bin_attribute *bin_attr, | ||
33 | char *buf, loff_t off, size_t count) | ||
34 | { | ||
35 | memcpy(buf, (void *)&boot_params + off, count); | ||
36 | return count; | ||
37 | } | ||
38 | |||
39 | static struct bin_attribute boot_params_data_attr = { | ||
40 | .attr = { | ||
41 | .name = "data", | ||
42 | .mode = S_IRUGO, | ||
43 | }, | ||
44 | .read = boot_params_data_read, | ||
45 | .size = sizeof(boot_params), | ||
46 | }; | ||
47 | |||
48 | static struct attribute *boot_params_version_attrs[] = { | ||
49 | &boot_params_version_attr.attr, | ||
50 | NULL, | ||
51 | }; | ||
52 | |||
53 | static struct bin_attribute *boot_params_data_attrs[] = { | ||
54 | &boot_params_data_attr, | ||
55 | NULL, | ||
56 | }; | ||
57 | |||
58 | static struct attribute_group boot_params_attr_group = { | ||
59 | .attrs = boot_params_version_attrs, | ||
60 | .bin_attrs = boot_params_data_attrs, | ||
61 | }; | ||
62 | |||
63 | static int kobj_to_setup_data_nr(struct kobject *kobj, int *nr) | ||
64 | { | ||
65 | const char *name; | ||
66 | |||
67 | name = kobject_name(kobj); | ||
68 | return kstrtoint(name, 10, nr); | ||
69 | } | ||
70 | |||
71 | static int get_setup_data_paddr(int nr, u64 *paddr) | ||
72 | { | ||
73 | int i = 0; | ||
74 | struct setup_data *data; | ||
75 | u64 pa_data = boot_params.hdr.setup_data; | ||
76 | |||
77 | while (pa_data) { | ||
78 | if (nr == i) { | ||
79 | *paddr = pa_data; | ||
80 | return 0; | ||
81 | } | ||
82 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
83 | if (!data) | ||
84 | return -ENOMEM; | ||
85 | |||
86 | pa_data = data->next; | ||
87 | iounmap(data); | ||
88 | i++; | ||
89 | } | ||
90 | return -EINVAL; | ||
91 | } | ||
92 | |||
93 | static int __init get_setup_data_size(int nr, size_t *size) | ||
94 | { | ||
95 | int i = 0; | ||
96 | struct setup_data *data; | ||
97 | u64 pa_data = boot_params.hdr.setup_data; | ||
98 | |||
99 | while (pa_data) { | ||
100 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
101 | if (!data) | ||
102 | return -ENOMEM; | ||
103 | if (nr == i) { | ||
104 | *size = data->len; | ||
105 | iounmap(data); | ||
106 | return 0; | ||
107 | } | ||
108 | |||
109 | pa_data = data->next; | ||
110 | iounmap(data); | ||
111 | i++; | ||
112 | } | ||
113 | return -EINVAL; | ||
114 | } | ||
115 | |||
116 | static ssize_t type_show(struct kobject *kobj, | ||
117 | struct kobj_attribute *attr, char *buf) | ||
118 | { | ||
119 | int nr, ret; | ||
120 | u64 paddr; | ||
121 | struct setup_data *data; | ||
122 | |||
123 | ret = kobj_to_setup_data_nr(kobj, &nr); | ||
124 | if (ret) | ||
125 | return ret; | ||
126 | |||
127 | ret = get_setup_data_paddr(nr, &paddr); | ||
128 | if (ret) | ||
129 | return ret; | ||
130 | data = ioremap_cache(paddr, sizeof(*data)); | ||
131 | if (!data) | ||
132 | return -ENOMEM; | ||
133 | |||
134 | ret = sprintf(buf, "0x%x\n", data->type); | ||
135 | iounmap(data); | ||
136 | return ret; | ||
137 | } | ||
138 | |||
139 | static ssize_t setup_data_data_read(struct file *fp, | ||
140 | struct kobject *kobj, | ||
141 | struct bin_attribute *bin_attr, | ||
142 | char *buf, | ||
143 | loff_t off, size_t count) | ||
144 | { | ||
145 | int nr, ret = 0; | ||
146 | u64 paddr; | ||
147 | struct setup_data *data; | ||
148 | void *p; | ||
149 | |||
150 | ret = kobj_to_setup_data_nr(kobj, &nr); | ||
151 | if (ret) | ||
152 | return ret; | ||
153 | |||
154 | ret = get_setup_data_paddr(nr, &paddr); | ||
155 | if (ret) | ||
156 | return ret; | ||
157 | data = ioremap_cache(paddr, sizeof(*data)); | ||
158 | if (!data) | ||
159 | return -ENOMEM; | ||
160 | |||
161 | if (off > data->len) { | ||
162 | ret = -EINVAL; | ||
163 | goto out; | ||
164 | } | ||
165 | |||
166 | if (count > data->len - off) | ||
167 | count = data->len - off; | ||
168 | |||
169 | if (!count) | ||
170 | goto out; | ||
171 | |||
172 | ret = count; | ||
173 | p = ioremap_cache(paddr + sizeof(*data), data->len); | ||
174 | if (!p) { | ||
175 | ret = -ENOMEM; | ||
176 | goto out; | ||
177 | } | ||
178 | memcpy(buf, p + off, count); | ||
179 | iounmap(p); | ||
180 | out: | ||
181 | iounmap(data); | ||
182 | return ret; | ||
183 | } | ||
184 | |||
185 | static struct kobj_attribute type_attr = __ATTR_RO(type); | ||
186 | |||
187 | static struct bin_attribute data_attr = { | ||
188 | .attr = { | ||
189 | .name = "data", | ||
190 | .mode = S_IRUGO, | ||
191 | }, | ||
192 | .read = setup_data_data_read, | ||
193 | }; | ||
194 | |||
195 | static struct attribute *setup_data_type_attrs[] = { | ||
196 | &type_attr.attr, | ||
197 | NULL, | ||
198 | }; | ||
199 | |||
200 | static struct bin_attribute *setup_data_data_attrs[] = { | ||
201 | &data_attr, | ||
202 | NULL, | ||
203 | }; | ||
204 | |||
205 | static struct attribute_group setup_data_attr_group = { | ||
206 | .attrs = setup_data_type_attrs, | ||
207 | .bin_attrs = setup_data_data_attrs, | ||
208 | }; | ||
209 | |||
210 | static int __init create_setup_data_node(struct kobject *parent, | ||
211 | struct kobject **kobjp, int nr) | ||
212 | { | ||
213 | int ret = 0; | ||
214 | size_t size; | ||
215 | struct kobject *kobj; | ||
216 | char name[16]; /* should be enough for setup_data nodes numbers */ | ||
217 | snprintf(name, 16, "%d", nr); | ||
218 | |||
219 | kobj = kobject_create_and_add(name, parent); | ||
220 | if (!kobj) | ||
221 | return -ENOMEM; | ||
222 | |||
223 | ret = get_setup_data_size(nr, &size); | ||
224 | if (ret) | ||
225 | goto out_kobj; | ||
226 | |||
227 | data_attr.size = size; | ||
228 | ret = sysfs_create_group(kobj, &setup_data_attr_group); | ||
229 | if (ret) | ||
230 | goto out_kobj; | ||
231 | *kobjp = kobj; | ||
232 | |||
233 | return 0; | ||
234 | out_kobj: | ||
235 | kobject_put(kobj); | ||
236 | return ret; | ||
237 | } | ||
238 | |||
239 | static void __init cleanup_setup_data_node(struct kobject *kobj) | ||
240 | { | ||
241 | sysfs_remove_group(kobj, &setup_data_attr_group); | ||
242 | kobject_put(kobj); | ||
243 | } | ||
244 | |||
245 | static int __init get_setup_data_total_num(u64 pa_data, int *nr) | ||
246 | { | ||
247 | int ret = 0; | ||
248 | struct setup_data *data; | ||
249 | |||
250 | *nr = 0; | ||
251 | while (pa_data) { | ||
252 | *nr += 1; | ||
253 | data = ioremap_cache(pa_data, sizeof(*data)); | ||
254 | if (!data) { | ||
255 | ret = -ENOMEM; | ||
256 | goto out; | ||
257 | } | ||
258 | pa_data = data->next; | ||
259 | iounmap(data); | ||
260 | } | ||
261 | |||
262 | out: | ||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | static int __init create_setup_data_nodes(struct kobject *parent) | ||
267 | { | ||
268 | struct kobject *setup_data_kobj, **kobjp; | ||
269 | u64 pa_data; | ||
270 | int i, j, nr, ret = 0; | ||
271 | |||
272 | pa_data = boot_params.hdr.setup_data; | ||
273 | if (!pa_data) | ||
274 | return 0; | ||
275 | |||
276 | setup_data_kobj = kobject_create_and_add("setup_data", parent); | ||
277 | if (!setup_data_kobj) { | ||
278 | ret = -ENOMEM; | ||
279 | goto out; | ||
280 | } | ||
281 | |||
282 | ret = get_setup_data_total_num(pa_data, &nr); | ||
283 | if (ret) | ||
284 | goto out_setup_data_kobj; | ||
285 | |||
286 | kobjp = kmalloc(sizeof(*kobjp) * nr, GFP_KERNEL); | ||
287 | if (!kobjp) { | ||
288 | ret = -ENOMEM; | ||
289 | goto out_setup_data_kobj; | ||
290 | } | ||
291 | |||
292 | for (i = 0; i < nr; i++) { | ||
293 | ret = create_setup_data_node(setup_data_kobj, kobjp + i, i); | ||
294 | if (ret) | ||
295 | goto out_clean_nodes; | ||
296 | } | ||
297 | |||
298 | kfree(kobjp); | ||
299 | return 0; | ||
300 | |||
301 | out_clean_nodes: | ||
302 | for (j = i - 1; j > 0; j--) | ||
303 | cleanup_setup_data_node(*(kobjp + j)); | ||
304 | kfree(kobjp); | ||
305 | out_setup_data_kobj: | ||
306 | kobject_put(setup_data_kobj); | ||
307 | out: | ||
308 | return ret; | ||
309 | } | ||
310 | |||
311 | static int __init boot_params_ksysfs_init(void) | ||
312 | { | ||
313 | int ret; | ||
314 | struct kobject *boot_params_kobj; | ||
315 | |||
316 | boot_params_kobj = kobject_create_and_add("boot_params", | ||
317 | kernel_kobj); | ||
318 | if (!boot_params_kobj) { | ||
319 | ret = -ENOMEM; | ||
320 | goto out; | ||
321 | } | ||
322 | |||
323 | ret = sysfs_create_group(boot_params_kobj, &boot_params_attr_group); | ||
324 | if (ret) | ||
325 | goto out_boot_params_kobj; | ||
326 | |||
327 | ret = create_setup_data_nodes(boot_params_kobj); | ||
328 | if (ret) | ||
329 | goto out_create_group; | ||
330 | |||
331 | return 0; | ||
332 | out_create_group: | ||
333 | sysfs_remove_group(boot_params_kobj, &boot_params_attr_group); | ||
334 | out_boot_params_kobj: | ||
335 | kobject_put(boot_params_kobj); | ||
336 | out: | ||
337 | return ret; | ||
338 | } | ||
339 | |||
340 | arch_initcall(boot_params_ksysfs_init); | ||
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6dd802c6d780..0331cb389d68 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -417,7 +417,6 @@ void kvm_disable_steal_time(void) | |||
417 | #ifdef CONFIG_SMP | 417 | #ifdef CONFIG_SMP |
418 | static void __init kvm_smp_prepare_boot_cpu(void) | 418 | static void __init kvm_smp_prepare_boot_cpu(void) |
419 | { | 419 | { |
420 | WARN_ON(kvm_register_clock("primary cpu clock")); | ||
421 | kvm_guest_cpu_init(); | 420 | kvm_guest_cpu_init(); |
422 | native_smp_prepare_boot_cpu(); | 421 | native_smp_prepare_boot_cpu(); |
423 | kvm_spinlock_init(); | 422 | kvm_spinlock_init(); |
@@ -500,6 +499,38 @@ void __init kvm_guest_init(void) | |||
500 | #endif | 499 | #endif |
501 | } | 500 | } |
502 | 501 | ||
502 | static noinline uint32_t __kvm_cpuid_base(void) | ||
503 | { | ||
504 | if (boot_cpu_data.cpuid_level < 0) | ||
505 | return 0; /* So we don't blow up on old processors */ | ||
506 | |||
507 | if (cpu_has_hypervisor) | ||
508 | return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0); | ||
509 | |||
510 | return 0; | ||
511 | } | ||
512 | |||
513 | static inline uint32_t kvm_cpuid_base(void) | ||
514 | { | ||
515 | static int kvm_cpuid_base = -1; | ||
516 | |||
517 | if (kvm_cpuid_base == -1) | ||
518 | kvm_cpuid_base = __kvm_cpuid_base(); | ||
519 | |||
520 | return kvm_cpuid_base; | ||
521 | } | ||
522 | |||
523 | bool kvm_para_available(void) | ||
524 | { | ||
525 | return kvm_cpuid_base() != 0; | ||
526 | } | ||
527 | EXPORT_SYMBOL_GPL(kvm_para_available); | ||
528 | |||
529 | unsigned int kvm_arch_para_features(void) | ||
530 | { | ||
531 | return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES); | ||
532 | } | ||
533 | |||
503 | static uint32_t __init kvm_detect(void) | 534 | static uint32_t __init kvm_detect(void) |
504 | { | 535 | { |
505 | return kvm_cpuid_base(); | 536 | return kvm_cpuid_base(); |
@@ -673,7 +704,7 @@ static cpumask_t waiting_cpus; | |||
673 | /* Track spinlock on which a cpu is waiting */ | 704 | /* Track spinlock on which a cpu is waiting */ |
674 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); | 705 | static DEFINE_PER_CPU(struct kvm_lock_waiting, klock_waiting); |
675 | 706 | ||
676 | static void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | 707 | __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
677 | { | 708 | { |
678 | struct kvm_lock_waiting *w; | 709 | struct kvm_lock_waiting *w; |
679 | int cpu; | 710 | int cpu; |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index e6041094ff26..d9156ceecdff 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -242,7 +242,7 @@ void __init kvmclock_init(void) | |||
242 | hv_clock = __va(mem); | 242 | hv_clock = __va(mem); |
243 | memset(hv_clock, 0, size); | 243 | memset(hv_clock, 0, size); |
244 | 244 | ||
245 | if (kvm_register_clock("boot clock")) { | 245 | if (kvm_register_clock("primary cpu clock")) { |
246 | hv_clock = NULL; | 246 | hv_clock = NULL; |
247 | memblock_free(mem, size); | 247 | memblock_free(mem, size); |
248 | return; | 248 | return; |
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index ebc987398923..af1d14a9ebda 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c | |||
@@ -229,6 +229,17 @@ static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode) | |||
229 | } | 229 | } |
230 | } | 230 | } |
231 | 231 | ||
232 | /* | ||
233 | * On x86-64 we do not support 16-bit segments due to | ||
234 | * IRET leaking the high bits of the kernel stack address. | ||
235 | */ | ||
236 | #ifdef CONFIG_X86_64 | ||
237 | if (!ldt_info.seg_32bit) { | ||
238 | error = -EINVAL; | ||
239 | goto out_unlock; | ||
240 | } | ||
241 | #endif | ||
242 | |||
232 | fill_ldt(&ldt, &ldt_info); | 243 | fill_ldt(&ldt, &ldt_info); |
233 | if (oldmode) | 244 | if (oldmode) |
234 | ldt.avl = 0; | 245 | ldt.avl = 0; |
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 5b19e4d78b00..1667b1de8d5d 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c | |||
@@ -9,7 +9,6 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/kexec.h> | 10 | #include <linux/kexec.h> |
11 | #include <linux/delay.h> | 11 | #include <linux/delay.h> |
12 | #include <linux/init.h> | ||
13 | #include <linux/numa.h> | 12 | #include <linux/numa.h> |
14 | #include <linux/ftrace.h> | 13 | #include <linux/ftrace.h> |
15 | #include <linux/suspend.h> | 14 | #include <linux/suspend.h> |
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 4eabc160696f..679cef0791cd 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c | |||
@@ -279,5 +279,7 @@ void arch_crash_save_vmcoreinfo(void) | |||
279 | VMCOREINFO_SYMBOL(node_data); | 279 | VMCOREINFO_SYMBOL(node_data); |
280 | VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); | 280 | VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); |
281 | #endif | 281 | #endif |
282 | vmcoreinfo_append_str("KERNELOFFSET=%lx\n", | ||
283 | (unsigned long)&_text - __START_KERNEL); | ||
282 | } | 284 | } |
283 | 285 | ||
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index 18be189368bb..e69f9882bf95 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/gfp.h> | 29 | #include <linux/gfp.h> |
30 | #include <linux/jump_label.h> | 30 | #include <linux/jump_label.h> |
31 | #include <linux/random.h> | ||
31 | 32 | ||
32 | #include <asm/page.h> | 33 | #include <asm/page.h> |
33 | #include <asm/pgtable.h> | 34 | #include <asm/pgtable.h> |
@@ -43,13 +44,52 @@ do { \ | |||
43 | } while (0) | 44 | } while (0) |
44 | #endif | 45 | #endif |
45 | 46 | ||
47 | #ifdef CONFIG_RANDOMIZE_BASE | ||
48 | static unsigned long module_load_offset; | ||
49 | static int randomize_modules = 1; | ||
50 | |||
51 | /* Mutex protects the module_load_offset. */ | ||
52 | static DEFINE_MUTEX(module_kaslr_mutex); | ||
53 | |||
54 | static int __init parse_nokaslr(char *p) | ||
55 | { | ||
56 | randomize_modules = 0; | ||
57 | return 0; | ||
58 | } | ||
59 | early_param("nokaslr", parse_nokaslr); | ||
60 | |||
61 | static unsigned long int get_module_load_offset(void) | ||
62 | { | ||
63 | if (randomize_modules) { | ||
64 | mutex_lock(&module_kaslr_mutex); | ||
65 | /* | ||
66 | * Calculate the module_load_offset the first time this | ||
67 | * code is called. Once calculated it stays the same until | ||
68 | * reboot. | ||
69 | */ | ||
70 | if (module_load_offset == 0) | ||
71 | module_load_offset = | ||
72 | (get_random_int() % 1024 + 1) * PAGE_SIZE; | ||
73 | mutex_unlock(&module_kaslr_mutex); | ||
74 | } | ||
75 | return module_load_offset; | ||
76 | } | ||
77 | #else | ||
78 | static unsigned long int get_module_load_offset(void) | ||
79 | { | ||
80 | return 0; | ||
81 | } | ||
82 | #endif | ||
83 | |||
46 | void *module_alloc(unsigned long size) | 84 | void *module_alloc(unsigned long size) |
47 | { | 85 | { |
48 | if (PAGE_ALIGN(size) > MODULES_LEN) | 86 | if (PAGE_ALIGN(size) > MODULES_LEN) |
49 | return NULL; | 87 | return NULL; |
50 | return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, | 88 | return __vmalloc_node_range(size, 1, |
51 | GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC, | 89 | MODULES_VADDR + get_module_load_offset(), |
52 | NUMA_NO_NODE, __builtin_return_address(0)); | 90 | MODULES_END, GFP_KERNEL | __GFP_HIGHMEM, |
91 | PAGE_KERNEL_EXEC, NUMA_NO_NODE, | ||
92 | __builtin_return_address(0)); | ||
53 | } | 93 | } |
54 | 94 | ||
55 | #ifdef CONFIG_X86_32 | 95 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 05266b5aae22..c9603ac80de5 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -259,14 +259,15 @@ static int __init msr_init(void) | |||
259 | goto out_chrdev; | 259 | goto out_chrdev; |
260 | } | 260 | } |
261 | msr_class->devnode = msr_devnode; | 261 | msr_class->devnode = msr_devnode; |
262 | get_online_cpus(); | 262 | |
263 | cpu_notifier_register_begin(); | ||
263 | for_each_online_cpu(i) { | 264 | for_each_online_cpu(i) { |
264 | err = msr_device_create(i); | 265 | err = msr_device_create(i); |
265 | if (err != 0) | 266 | if (err != 0) |
266 | goto out_class; | 267 | goto out_class; |
267 | } | 268 | } |
268 | register_hotcpu_notifier(&msr_class_cpu_notifier); | 269 | __register_hotcpu_notifier(&msr_class_cpu_notifier); |
269 | put_online_cpus(); | 270 | cpu_notifier_register_done(); |
270 | 271 | ||
271 | err = 0; | 272 | err = 0; |
272 | goto out; | 273 | goto out; |
@@ -275,7 +276,7 @@ out_class: | |||
275 | i = 0; | 276 | i = 0; |
276 | for_each_online_cpu(i) | 277 | for_each_online_cpu(i) |
277 | msr_device_destroy(i); | 278 | msr_device_destroy(i); |
278 | put_online_cpus(); | 279 | cpu_notifier_register_done(); |
279 | class_destroy(msr_class); | 280 | class_destroy(msr_class); |
280 | out_chrdev: | 281 | out_chrdev: |
281 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); | 282 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
@@ -286,13 +287,14 @@ out: | |||
286 | static void __exit msr_exit(void) | 287 | static void __exit msr_exit(void) |
287 | { | 288 | { |
288 | int cpu = 0; | 289 | int cpu = 0; |
289 | get_online_cpus(); | 290 | |
291 | cpu_notifier_register_begin(); | ||
290 | for_each_online_cpu(cpu) | 292 | for_each_online_cpu(cpu) |
291 | msr_device_destroy(cpu); | 293 | msr_device_destroy(cpu); |
292 | class_destroy(msr_class); | 294 | class_destroy(msr_class); |
293 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); | 295 | __unregister_chrdev(MSR_MAJOR, 0, NR_CPUS, "cpu/msr"); |
294 | unregister_hotcpu_notifier(&msr_class_cpu_notifier); | 296 | __unregister_hotcpu_notifier(&msr_class_cpu_notifier); |
295 | put_online_cpus(); | 297 | cpu_notifier_register_done(); |
296 | } | 298 | } |
297 | 299 | ||
298 | module_init(msr_init); | 300 | module_init(msr_init); |
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 6fcb49ce50a1..b4872b999a71 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c | |||
@@ -87,6 +87,7 @@ __setup("unknown_nmi_panic", setup_unknown_nmi_panic); | |||
87 | #define nmi_to_desc(type) (&nmi_desc[type]) | 87 | #define nmi_to_desc(type) (&nmi_desc[type]) |
88 | 88 | ||
89 | static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC; | 89 | static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC; |
90 | |||
90 | static int __init nmi_warning_debugfs(void) | 91 | static int __init nmi_warning_debugfs(void) |
91 | { | 92 | { |
92 | debugfs_create_u64("nmi_longest_ns", 0644, | 93 | debugfs_create_u64("nmi_longest_ns", 0644, |
@@ -95,6 +96,20 @@ static int __init nmi_warning_debugfs(void) | |||
95 | } | 96 | } |
96 | fs_initcall(nmi_warning_debugfs); | 97 | fs_initcall(nmi_warning_debugfs); |
97 | 98 | ||
99 | static void nmi_max_handler(struct irq_work *w) | ||
100 | { | ||
101 | struct nmiaction *a = container_of(w, struct nmiaction, irq_work); | ||
102 | int remainder_ns, decimal_msecs; | ||
103 | u64 whole_msecs = ACCESS_ONCE(a->max_duration); | ||
104 | |||
105 | remainder_ns = do_div(whole_msecs, (1000 * 1000)); | ||
106 | decimal_msecs = remainder_ns / 1000; | ||
107 | |||
108 | printk_ratelimited(KERN_INFO | ||
109 | "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n", | ||
110 | a->handler, whole_msecs, decimal_msecs); | ||
111 | } | ||
112 | |||
98 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) | 113 | static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2b) |
99 | { | 114 | { |
100 | struct nmi_desc *desc = nmi_to_desc(type); | 115 | struct nmi_desc *desc = nmi_to_desc(type); |
@@ -110,26 +125,20 @@ static int __kprobes nmi_handle(unsigned int type, struct pt_regs *regs, bool b2 | |||
110 | * to handle those situations. | 125 | * to handle those situations. |
111 | */ | 126 | */ |
112 | list_for_each_entry_rcu(a, &desc->head, list) { | 127 | list_for_each_entry_rcu(a, &desc->head, list) { |
113 | u64 before, delta, whole_msecs; | 128 | int thishandled; |
114 | int remainder_ns, decimal_msecs, thishandled; | 129 | u64 delta; |
115 | 130 | ||
116 | before = sched_clock(); | 131 | delta = sched_clock(); |
117 | thishandled = a->handler(type, regs); | 132 | thishandled = a->handler(type, regs); |
118 | handled += thishandled; | 133 | handled += thishandled; |
119 | delta = sched_clock() - before; | 134 | delta = sched_clock() - delta; |
120 | trace_nmi_handler(a->handler, (int)delta, thishandled); | 135 | trace_nmi_handler(a->handler, (int)delta, thishandled); |
121 | 136 | ||
122 | if (delta < nmi_longest_ns) | 137 | if (delta < nmi_longest_ns || delta < a->max_duration) |
123 | continue; | 138 | continue; |
124 | 139 | ||
125 | nmi_longest_ns = delta; | 140 | a->max_duration = delta; |
126 | whole_msecs = delta; | 141 | irq_work_queue(&a->irq_work); |
127 | remainder_ns = do_div(whole_msecs, (1000 * 1000)); | ||
128 | decimal_msecs = remainder_ns / 1000; | ||
129 | printk_ratelimited(KERN_INFO | ||
130 | "INFO: NMI handler (%ps) took too long to run: " | ||
131 | "%lld.%03d msecs\n", a->handler, whole_msecs, | ||
132 | decimal_msecs); | ||
133 | } | 142 | } |
134 | 143 | ||
135 | rcu_read_unlock(); | 144 | rcu_read_unlock(); |
@@ -146,6 +155,8 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) | |||
146 | if (!action->handler) | 155 | if (!action->handler) |
147 | return -EINVAL; | 156 | return -EINVAL; |
148 | 157 | ||
158 | init_irq_work(&action->irq_work, nmi_max_handler); | ||
159 | |||
149 | spin_lock_irqsave(&desc->lock, flags); | 160 | spin_lock_irqsave(&desc->lock, flags); |
150 | 161 | ||
151 | /* | 162 | /* |
diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 299d49302e7d..0497f719977d 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c | |||
@@ -1207,23 +1207,31 @@ error: | |||
1207 | return ret; | 1207 | return ret; |
1208 | } | 1208 | } |
1209 | 1209 | ||
1210 | static inline int __init determine_tce_table_size(u64 ram) | 1210 | static inline int __init determine_tce_table_size(void) |
1211 | { | 1211 | { |
1212 | int ret; | 1212 | int ret; |
1213 | 1213 | ||
1214 | if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED) | 1214 | if (specified_table_size != TCE_TABLE_SIZE_UNSPECIFIED) |
1215 | return specified_table_size; | 1215 | return specified_table_size; |
1216 | 1216 | ||
1217 | /* | 1217 | if (is_kdump_kernel() && saved_max_pfn) { |
1218 | * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to | 1218 | /* |
1219 | * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each | 1219 | * Table sizes are from 0 to 7 (TCE_TABLE_SIZE_64K to |
1220 | * larger table size has twice as many entries, so shift the | 1220 | * TCE_TABLE_SIZE_8M). Table size 0 has 8K entries and each |
1221 | * max ram address by 13 to divide by 8K and then look at the | 1221 | * larger table size has twice as many entries, so shift the |
1222 | * order of the result to choose between 0-7. | 1222 | * max ram address by 13 to divide by 8K and then look at the |
1223 | */ | 1223 | * order of the result to choose between 0-7. |
1224 | ret = get_order(ram >> 13); | 1224 | */ |
1225 | if (ret > TCE_TABLE_SIZE_8M) | 1225 | ret = get_order((saved_max_pfn * PAGE_SIZE) >> 13); |
1226 | if (ret > TCE_TABLE_SIZE_8M) | ||
1227 | ret = TCE_TABLE_SIZE_8M; | ||
1228 | } else { | ||
1229 | /* | ||
1230 | * Use 8M by default (suggested by Muli) if it's not | ||
1231 | * kdump kernel and saved_max_pfn isn't set. | ||
1232 | */ | ||
1226 | ret = TCE_TABLE_SIZE_8M; | 1233 | ret = TCE_TABLE_SIZE_8M; |
1234 | } | ||
1227 | 1235 | ||
1228 | return ret; | 1236 | return ret; |
1229 | } | 1237 | } |
@@ -1418,8 +1426,7 @@ int __init detect_calgary(void) | |||
1418 | return -ENOMEM; | 1426 | return -ENOMEM; |
1419 | } | 1427 | } |
1420 | 1428 | ||
1421 | specified_table_size = determine_tce_table_size((is_kdump_kernel() ? | 1429 | specified_table_size = determine_tce_table_size(); |
1422 | saved_max_pfn : max_pfn) * PAGE_SIZE); | ||
1423 | 1430 | ||
1424 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { | 1431 | for (bus = 0; bus < MAX_PHB_BUS_NUM; bus++) { |
1425 | struct calgary_bus_info *info = &bus_info[bus]; | 1432 | struct calgary_bus_info *info = &bus_info[bus]; |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 872079a67e4d..f7d0672481fd 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -100,8 +100,10 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, | |||
100 | flag |= __GFP_ZERO; | 100 | flag |= __GFP_ZERO; |
101 | again: | 101 | again: |
102 | page = NULL; | 102 | page = NULL; |
103 | if (!(flag & GFP_ATOMIC)) | 103 | /* CMA can be used only in the context which permits sleeping */ |
104 | if (flag & __GFP_WAIT) | ||
104 | page = dma_alloc_from_contiguous(dev, count, get_order(size)); | 105 | page = dma_alloc_from_contiguous(dev, count, get_order(size)); |
106 | /* fallback */ | ||
105 | if (!page) | 107 | if (!page) |
106 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); | 108 | page = alloc_pages_node(dev_to_node(dev), flag, get_order(size)); |
107 | if (!page) | 109 | if (!page) |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 871be4a84c7d..da15918d1c81 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -3,7 +3,6 @@ | |||
3 | #include <linux/dma-mapping.h> | 3 | #include <linux/dma-mapping.h> |
4 | #include <linux/scatterlist.h> | 4 | #include <linux/scatterlist.h> |
5 | #include <linux/string.h> | 5 | #include <linux/string.h> |
6 | #include <linux/init.h> | ||
7 | #include <linux/gfp.h> | 6 | #include <linux/gfp.h> |
8 | #include <linux/pci.h> | 7 | #include <linux/pci.h> |
9 | #include <linux/mm.h> | 8 | #include <linux/mm.h> |
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 3fb8d95ab8b5..4505e2a950d8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c | |||
@@ -298,10 +298,7 @@ void arch_cpu_idle_dead(void) | |||
298 | */ | 298 | */ |
299 | void arch_cpu_idle(void) | 299 | void arch_cpu_idle(void) |
300 | { | 300 | { |
301 | if (cpuidle_idle_call()) | 301 | x86_idle(); |
302 | x86_idle(); | ||
303 | else | ||
304 | local_irq_enable(); | ||
305 | } | 302 | } |
306 | 303 | ||
307 | /* | 304 | /* |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 6f1236c29c4b..7bc86bbe7485 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -24,7 +24,6 @@ | |||
24 | #include <linux/interrupt.h> | 24 | #include <linux/interrupt.h> |
25 | #include <linux/delay.h> | 25 | #include <linux/delay.h> |
26 | #include <linux/reboot.h> | 26 | #include <linux/reboot.h> |
27 | #include <linux/init.h> | ||
28 | #include <linux/mc146818rtc.h> | 27 | #include <linux/mc146818rtc.h> |
29 | #include <linux/module.h> | 28 | #include <linux/module.h> |
30 | #include <linux/kallsyms.h> | 29 | #include <linux/kallsyms.h> |
@@ -315,6 +314,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
315 | */ | 314 | */ |
316 | arch_end_context_switch(next_p); | 315 | arch_end_context_switch(next_p); |
317 | 316 | ||
317 | this_cpu_write(kernel_stack, | ||
318 | (unsigned long)task_stack_page(next_p) + | ||
319 | THREAD_SIZE - KERNEL_STACK_OFFSET); | ||
320 | |||
318 | /* | 321 | /* |
319 | * Restore %gs if needed (which is common) | 322 | * Restore %gs if needed (which is common) |
320 | */ | 323 | */ |
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 7461f50d5bb1..678c0ada3b3c 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -184,14 +184,14 @@ unsigned long kernel_stack_pointer(struct pt_regs *regs) | |||
184 | { | 184 | { |
185 | unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); | 185 | unsigned long context = (unsigned long)regs & ~(THREAD_SIZE - 1); |
186 | unsigned long sp = (unsigned long)®s->sp; | 186 | unsigned long sp = (unsigned long)®s->sp; |
187 | struct thread_info *tinfo; | 187 | u32 *prev_esp; |
188 | 188 | ||
189 | if (context == (sp & ~(THREAD_SIZE - 1))) | 189 | if (context == (sp & ~(THREAD_SIZE - 1))) |
190 | return sp; | 190 | return sp; |
191 | 191 | ||
192 | tinfo = (struct thread_info *)context; | 192 | prev_esp = (u32 *)(context); |
193 | if (tinfo->previous_esp) | 193 | if (prev_esp) |
194 | return tinfo->previous_esp; | 194 | return (unsigned long)prev_esp; |
195 | 195 | ||
196 | return (unsigned long)regs; | 196 | return (unsigned long)regs; |
197 | } | 197 | } |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 04ee1e2e4c02..ff898bbf579d 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -529,7 +529,7 @@ static void quirk_amd_nb_node(struct pci_dev *dev) | |||
529 | return; | 529 | return; |
530 | 530 | ||
531 | pci_read_config_dword(nb_ht, 0x60, &val); | 531 | pci_read_config_dword(nb_ht, 0x60, &val); |
532 | node = val & 7; | 532 | node = pcibus_to_node(dev->bus) | (val & 7); |
533 | /* | 533 | /* |
534 | * Some hardware may return an invalid node ID, | 534 | * Some hardware may return an invalid node ID, |
535 | * so check it first: | 535 | * so check it first: |
@@ -571,3 +571,40 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_F5, | |||
571 | quirk_amd_nb_node); | 571 | quirk_amd_nb_node); |
572 | 572 | ||
573 | #endif | 573 | #endif |
574 | |||
575 | #ifdef CONFIG_PCI | ||
576 | /* | ||
577 | * Processor does not ensure DRAM scrub read/write sequence | ||
578 | * is atomic wrt accesses to CC6 save state area. Therefore | ||
579 | * if a concurrent scrub read/write access is to same address | ||
580 | * the entry may appear as if it is not written. This quirk | ||
581 | * applies to Fam16h models 00h-0Fh | ||
582 | * | ||
583 | * See "Revision Guide" for AMD F16h models 00h-0fh, | ||
584 | * document 51810 rev. 3.04, Nov 2013 | ||
585 | */ | ||
586 | static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev) | ||
587 | { | ||
588 | u32 val; | ||
589 | |||
590 | /* | ||
591 | * Suggested workaround: | ||
592 | * set D18F3x58[4:0] = 00h and set D18F3x5C[0] = 0b | ||
593 | */ | ||
594 | pci_read_config_dword(dev, 0x58, &val); | ||
595 | if (val & 0x1F) { | ||
596 | val &= ~(0x1F); | ||
597 | pci_write_config_dword(dev, 0x58, val); | ||
598 | } | ||
599 | |||
600 | pci_read_config_dword(dev, 0x5C, &val); | ||
601 | if (val & BIT(0)) { | ||
602 | val &= ~BIT(0); | ||
603 | pci_write_config_dword(dev, 0x5c, val); | ||
604 | } | ||
605 | } | ||
606 | |||
607 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, | ||
608 | amd_disable_seq_and_redirect_scrub); | ||
609 | |||
610 | #endif | ||
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index c752cb43e52f..654b46574b91 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -464,9 +464,12 @@ void __attribute__((weak)) mach_reboot_fixups(void) | |||
464 | * 2) If still alive, write to the keyboard controller | 464 | * 2) If still alive, write to the keyboard controller |
465 | * 3) If still alive, write to the ACPI reboot register again | 465 | * 3) If still alive, write to the ACPI reboot register again |
466 | * 4) If still alive, write to the keyboard controller again | 466 | * 4) If still alive, write to the keyboard controller again |
467 | * 5) If still alive, call the EFI runtime service to reboot | ||
468 | * 6) If still alive, write to the PCI IO port 0xCF9 to reboot | ||
469 | * 7) If still alive, inform BIOS to do a proper reboot | ||
467 | * | 470 | * |
468 | * If the machine is still alive at this stage, it gives up. We default to | 471 | * If the machine is still alive at this stage, it gives up. We default to |
469 | * following the same pattern, except that if we're still alive after (4) we'll | 472 | * following the same pattern, except that if we're still alive after (7) we'll |
470 | * try to force a triple fault and then cycle between hitting the keyboard | 473 | * try to force a triple fault and then cycle between hitting the keyboard |
471 | * controller and doing that | 474 | * controller and doing that |
472 | */ | 475 | */ |
@@ -502,7 +505,7 @@ static void native_machine_emergency_restart(void) | |||
502 | attempt = 1; | 505 | attempt = 1; |
503 | reboot_type = BOOT_ACPI; | 506 | reboot_type = BOOT_ACPI; |
504 | } else { | 507 | } else { |
505 | reboot_type = BOOT_TRIPLE; | 508 | reboot_type = BOOT_EFI; |
506 | } | 509 | } |
507 | break; | 510 | break; |
508 | 511 | ||
@@ -510,13 +513,15 @@ static void native_machine_emergency_restart(void) | |||
510 | load_idt(&no_idt); | 513 | load_idt(&no_idt); |
511 | __asm__ __volatile__("int3"); | 514 | __asm__ __volatile__("int3"); |
512 | 515 | ||
516 | /* We're probably dead after this, but... */ | ||
513 | reboot_type = BOOT_KBD; | 517 | reboot_type = BOOT_KBD; |
514 | break; | 518 | break; |
515 | 519 | ||
516 | case BOOT_BIOS: | 520 | case BOOT_BIOS: |
517 | machine_real_restart(MRR_BIOS); | 521 | machine_real_restart(MRR_BIOS); |
518 | 522 | ||
519 | reboot_type = BOOT_KBD; | 523 | /* We're probably dead after this, but... */ |
524 | reboot_type = BOOT_TRIPLE; | ||
520 | break; | 525 | break; |
521 | 526 | ||
522 | case BOOT_ACPI: | 527 | case BOOT_ACPI: |
@@ -530,7 +535,7 @@ static void native_machine_emergency_restart(void) | |||
530 | EFI_RESET_WARM : | 535 | EFI_RESET_WARM : |
531 | EFI_RESET_COLD, | 536 | EFI_RESET_COLD, |
532 | EFI_SUCCESS, 0, NULL); | 537 | EFI_SUCCESS, 0, NULL); |
533 | reboot_type = BOOT_KBD; | 538 | reboot_type = BOOT_CF9_COND; |
534 | break; | 539 | break; |
535 | 540 | ||
536 | case BOOT_CF9: | 541 | case BOOT_CF9: |
@@ -548,7 +553,7 @@ static void native_machine_emergency_restart(void) | |||
548 | outb(cf9|reboot_code, 0xcf9); | 553 | outb(cf9|reboot_code, 0xcf9); |
549 | udelay(50); | 554 | udelay(50); |
550 | } | 555 | } |
551 | reboot_type = BOOT_KBD; | 556 | reboot_type = BOOT_BIOS; |
552 | break; | 557 | break; |
553 | } | 558 | } |
554 | } | 559 | } |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index cb233bc9dee3..09c76d265550 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -295,6 +295,8 @@ static void __init reserve_brk(void) | |||
295 | _brk_start = 0; | 295 | _brk_start = 0; |
296 | } | 296 | } |
297 | 297 | ||
298 | u64 relocated_ramdisk; | ||
299 | |||
298 | #ifdef CONFIG_BLK_DEV_INITRD | 300 | #ifdef CONFIG_BLK_DEV_INITRD |
299 | 301 | ||
300 | static u64 __init get_ramdisk_image(void) | 302 | static u64 __init get_ramdisk_image(void) |
@@ -321,25 +323,24 @@ static void __init relocate_initrd(void) | |||
321 | u64 ramdisk_image = get_ramdisk_image(); | 323 | u64 ramdisk_image = get_ramdisk_image(); |
322 | u64 ramdisk_size = get_ramdisk_size(); | 324 | u64 ramdisk_size = get_ramdisk_size(); |
323 | u64 area_size = PAGE_ALIGN(ramdisk_size); | 325 | u64 area_size = PAGE_ALIGN(ramdisk_size); |
324 | u64 ramdisk_here; | ||
325 | unsigned long slop, clen, mapaddr; | 326 | unsigned long slop, clen, mapaddr; |
326 | char *p, *q; | 327 | char *p, *q; |
327 | 328 | ||
328 | /* We need to move the initrd down into directly mapped mem */ | 329 | /* We need to move the initrd down into directly mapped mem */ |
329 | ramdisk_here = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), | 330 | relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), |
330 | area_size, PAGE_SIZE); | 331 | area_size, PAGE_SIZE); |
331 | 332 | ||
332 | if (!ramdisk_here) | 333 | if (!relocated_ramdisk) |
333 | panic("Cannot find place for new RAMDISK of size %lld\n", | 334 | panic("Cannot find place for new RAMDISK of size %lld\n", |
334 | ramdisk_size); | 335 | ramdisk_size); |
335 | 336 | ||
336 | /* Note: this includes all the mem currently occupied by | 337 | /* Note: this includes all the mem currently occupied by |
337 | the initrd, we rely on that fact to keep the data intact. */ | 338 | the initrd, we rely on that fact to keep the data intact. */ |
338 | memblock_reserve(ramdisk_here, area_size); | 339 | memblock_reserve(relocated_ramdisk, area_size); |
339 | initrd_start = ramdisk_here + PAGE_OFFSET; | 340 | initrd_start = relocated_ramdisk + PAGE_OFFSET; |
340 | initrd_end = initrd_start + ramdisk_size; | 341 | initrd_end = initrd_start + ramdisk_size; |
341 | printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", | 342 | printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", |
342 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 343 | relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1); |
343 | 344 | ||
344 | q = (char *)initrd_start; | 345 | q = (char *)initrd_start; |
345 | 346 | ||
@@ -363,7 +364,7 @@ static void __init relocate_initrd(void) | |||
363 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" | 364 | printk(KERN_INFO "Move RAMDISK from [mem %#010llx-%#010llx] to" |
364 | " [mem %#010llx-%#010llx]\n", | 365 | " [mem %#010llx-%#010llx]\n", |
365 | ramdisk_image, ramdisk_image + ramdisk_size - 1, | 366 | ramdisk_image, ramdisk_image + ramdisk_size - 1, |
366 | ramdisk_here, ramdisk_here + ramdisk_size - 1); | 367 | relocated_ramdisk, relocated_ramdisk + ramdisk_size - 1); |
367 | } | 368 | } |
368 | 369 | ||
369 | static void __init early_reserve_initrd(void) | 370 | static void __init early_reserve_initrd(void) |
@@ -447,6 +448,9 @@ static void __init parse_setup_data(void) | |||
447 | case SETUP_DTB: | 448 | case SETUP_DTB: |
448 | add_dtb(pa_data); | 449 | add_dtb(pa_data); |
449 | break; | 450 | break; |
451 | case SETUP_EFI: | ||
452 | parse_efi_setup(pa_data, data_len); | ||
453 | break; | ||
450 | default: | 454 | default: |
451 | break; | 455 | break; |
452 | } | 456 | } |
@@ -824,6 +828,20 @@ static void __init trim_low_memory_range(void) | |||
824 | } | 828 | } |
825 | 829 | ||
826 | /* | 830 | /* |
831 | * Dump out kernel offset information on panic. | ||
832 | */ | ||
833 | static int | ||
834 | dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p) | ||
835 | { | ||
836 | pr_emerg("Kernel Offset: 0x%lx from 0x%lx " | ||
837 | "(relocation range: 0x%lx-0x%lx)\n", | ||
838 | (unsigned long)&_text - __START_KERNEL, __START_KERNEL, | ||
839 | __START_KERNEL_map, MODULES_VADDR-1); | ||
840 | |||
841 | return 0; | ||
842 | } | ||
843 | |||
844 | /* | ||
827 | * Determine if we were loaded by an EFI loader. If so, then we have also been | 845 | * Determine if we were loaded by an EFI loader. If so, then we have also been |
828 | * passed the efi memmap, systab, etc., so we should use these data structures | 846 | * passed the efi memmap, systab, etc., so we should use these data structures |
829 | * for initialization. Note, the efi init code path is determined by the | 847 | * for initialization. Note, the efi init code path is determined by the |
@@ -851,7 +869,6 @@ void __init setup_arch(char **cmdline_p) | |||
851 | 869 | ||
852 | #ifdef CONFIG_X86_32 | 870 | #ifdef CONFIG_X86_32 |
853 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); | 871 | memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); |
854 | visws_early_detect(); | ||
855 | 872 | ||
856 | /* | 873 | /* |
857 | * copy kernel address range established so far and switch | 874 | * copy kernel address range established so far and switch |
@@ -908,11 +925,11 @@ void __init setup_arch(char **cmdline_p) | |||
908 | #ifdef CONFIG_EFI | 925 | #ifdef CONFIG_EFI |
909 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 926 | if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
910 | "EL32", 4)) { | 927 | "EL32", 4)) { |
911 | set_bit(EFI_BOOT, &x86_efi_facility); | 928 | set_bit(EFI_BOOT, &efi.flags); |
912 | } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, | 929 | } else if (!strncmp((char *)&boot_params.efi_info.efi_loader_signature, |
913 | "EL64", 4)) { | 930 | "EL64", 4)) { |
914 | set_bit(EFI_BOOT, &x86_efi_facility); | 931 | set_bit(EFI_BOOT, &efi.flags); |
915 | set_bit(EFI_64BIT, &x86_efi_facility); | 932 | set_bit(EFI_64BIT, &efi.flags); |
916 | } | 933 | } |
917 | 934 | ||
918 | if (efi_enabled(EFI_BOOT)) | 935 | if (efi_enabled(EFI_BOOT)) |
@@ -924,8 +941,6 @@ void __init setup_arch(char **cmdline_p) | |||
924 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; | 941 | iomem_resource.end = (1ULL << boot_cpu_data.x86_phys_bits) - 1; |
925 | setup_memory_map(); | 942 | setup_memory_map(); |
926 | parse_setup_data(); | 943 | parse_setup_data(); |
927 | /* update the e820_saved too */ | ||
928 | e820_reserve_setup_data(); | ||
929 | 944 | ||
930 | copy_edd(); | 945 | copy_edd(); |
931 | 946 | ||
@@ -987,6 +1002,8 @@ void __init setup_arch(char **cmdline_p) | |||
987 | early_dump_pci_devices(); | 1002 | early_dump_pci_devices(); |
988 | #endif | 1003 | #endif |
989 | 1004 | ||
1005 | /* update the e820_saved too */ | ||
1006 | e820_reserve_setup_data(); | ||
990 | finish_e820_parsing(); | 1007 | finish_e820_parsing(); |
991 | 1008 | ||
992 | if (efi_enabled(EFI_BOOT)) | 1009 | if (efi_enabled(EFI_BOOT)) |
@@ -1221,14 +1238,8 @@ void __init setup_arch(char **cmdline_p) | |||
1221 | register_refined_jiffies(CLOCK_TICK_RATE); | 1238 | register_refined_jiffies(CLOCK_TICK_RATE); |
1222 | 1239 | ||
1223 | #ifdef CONFIG_EFI | 1240 | #ifdef CONFIG_EFI |
1224 | /* Once setup is done above, unmap the EFI memory map on | 1241 | if (efi_enabled(EFI_BOOT)) |
1225 | * mismatched firmware/kernel archtectures since there is no | 1242 | efi_apply_memmap_quirks(); |
1226 | * support for runtime services. | ||
1227 | */ | ||
1228 | if (efi_enabled(EFI_BOOT) && !efi_is_native()) { | ||
1229 | pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); | ||
1230 | efi_unmap_memmap(); | ||
1231 | } | ||
1232 | #endif | 1243 | #endif |
1233 | } | 1244 | } |
1234 | 1245 | ||
@@ -1248,3 +1259,15 @@ void __init i386_reserve_resources(void) | |||
1248 | } | 1259 | } |
1249 | 1260 | ||
1250 | #endif /* CONFIG_X86_32 */ | 1261 | #endif /* CONFIG_X86_32 */ |
1262 | |||
1263 | static struct notifier_block kernel_offset_notifier = { | ||
1264 | .notifier_call = dump_kernel_offset | ||
1265 | }; | ||
1266 | |||
1267 | static int __init register_kernel_offset_dumper(void) | ||
1268 | { | ||
1269 | atomic_notifier_chain_register(&panic_notifier_list, | ||
1270 | &kernel_offset_notifier); | ||
1271 | return 0; | ||
1272 | } | ||
1273 | __initcall(register_kernel_offset_dumper); | ||
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 85dc05a3aa02..34826934d4a7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -122,8 +122,9 @@ static void smp_callin(void) | |||
122 | * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. | 122 | * Since CPU0 is not wakened up by INIT, it doesn't wait for the IPI. |
123 | */ | 123 | */ |
124 | cpuid = smp_processor_id(); | 124 | cpuid = smp_processor_id(); |
125 | if (apic->wait_for_init_deassert && cpuid != 0) | 125 | if (apic->wait_for_init_deassert && cpuid) |
126 | apic->wait_for_init_deassert(&init_deasserted); | 126 | while (!atomic_read(&init_deasserted)) |
127 | cpu_relax(); | ||
127 | 128 | ||
128 | /* | 129 | /* |
129 | * (This works even if the APIC is not enabled.) | 130 | * (This works even if the APIC is not enabled.) |
@@ -701,11 +702,15 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, | |||
701 | int id; | 702 | int id; |
702 | int boot_error; | 703 | int boot_error; |
703 | 704 | ||
705 | preempt_disable(); | ||
706 | |||
704 | /* | 707 | /* |
705 | * Wake up AP by INIT, INIT, STARTUP sequence. | 708 | * Wake up AP by INIT, INIT, STARTUP sequence. |
706 | */ | 709 | */ |
707 | if (cpu) | 710 | if (cpu) { |
708 | return wakeup_secondary_cpu_via_init(apicid, start_ip); | 711 | boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip); |
712 | goto out; | ||
713 | } | ||
709 | 714 | ||
710 | /* | 715 | /* |
711 | * Wake up BSP by nmi. | 716 | * Wake up BSP by nmi. |
@@ -725,6 +730,9 @@ wakeup_cpu_via_init_nmi(int cpu, unsigned long start_ip, int apicid, | |||
725 | boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); | 730 | boot_error = wakeup_secondary_cpu_via_nmi(id, start_ip); |
726 | } | 731 | } |
727 | 732 | ||
733 | out: | ||
734 | preempt_enable(); | ||
735 | |||
728 | return boot_error; | 736 | return boot_error; |
729 | } | 737 | } |
730 | 738 | ||
@@ -758,10 +766,10 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle) | |||
758 | #else | 766 | #else |
759 | clear_tsk_thread_flag(idle, TIF_FORK); | 767 | clear_tsk_thread_flag(idle, TIF_FORK); |
760 | initial_gs = per_cpu_offset(cpu); | 768 | initial_gs = per_cpu_offset(cpu); |
769 | #endif | ||
761 | per_cpu(kernel_stack, cpu) = | 770 | per_cpu(kernel_stack, cpu) = |
762 | (unsigned long)task_stack_page(idle) - | 771 | (unsigned long)task_stack_page(idle) - |
763 | KERNEL_STACK_OFFSET + THREAD_SIZE; | 772 | KERNEL_STACK_OFFSET + THREAD_SIZE; |
764 | #endif | ||
765 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); | 773 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
766 | initial_code = (unsigned long)start_secondary; | 774 | initial_code = (unsigned long)start_secondary; |
767 | stack_start = idle->thread.sp; | 775 | stack_start = idle->thread.sp; |
@@ -1312,6 +1320,12 @@ void cpu_disable_common(void) | |||
1312 | 1320 | ||
1313 | int native_cpu_disable(void) | 1321 | int native_cpu_disable(void) |
1314 | { | 1322 | { |
1323 | int ret; | ||
1324 | |||
1325 | ret = check_irq_vectors_for_cpu_disable(); | ||
1326 | if (ret) | ||
1327 | return ret; | ||
1328 | |||
1315 | clear_local_APIC(); | 1329 | clear_local_APIC(); |
1316 | 1330 | ||
1317 | cpu_disable_common(); | 1331 | cpu_disable_common(); |
@@ -1373,7 +1387,7 @@ static inline void mwait_play_dead(void) | |||
1373 | 1387 | ||
1374 | if (!this_cpu_has(X86_FEATURE_MWAIT)) | 1388 | if (!this_cpu_has(X86_FEATURE_MWAIT)) |
1375 | return; | 1389 | return; |
1376 | if (!this_cpu_has(X86_FEATURE_CLFLSH)) | 1390 | if (!this_cpu_has(X86_FEATURE_CLFLUSH)) |
1377 | return; | 1391 | return; |
1378 | if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) | 1392 | if (__this_cpu_read(cpu_info.cpuid_level) < CPUID_MWAIT_LEAF) |
1379 | return; | 1393 | return; |
@@ -1417,7 +1431,9 @@ static inline void mwait_play_dead(void) | |||
1417 | * The WBINVD is insufficient due to the spurious-wakeup | 1431 | * The WBINVD is insufficient due to the spurious-wakeup |
1418 | * case where we return around the loop. | 1432 | * case where we return around the loop. |
1419 | */ | 1433 | */ |
1434 | mb(); | ||
1420 | clflush(mwait_ptr); | 1435 | clflush(mwait_ptr); |
1436 | mb(); | ||
1421 | __monitor(mwait_ptr, 0, 0); | 1437 | __monitor(mwait_ptr, 0, 0); |
1422 | mb(); | 1438 | mb(); |
1423 | __mwait(eax, 0); | 1439 | __mwait(eax, 0); |
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 24d3c91e9812..bf7ef5ce29df 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c | |||
@@ -23,7 +23,7 @@ | |||
23 | #include <asm/time.h> | 23 | #include <asm/time.h> |
24 | 24 | ||
25 | #ifdef CONFIG_X86_64 | 25 | #ifdef CONFIG_X86_64 |
26 | DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; | 26 | __visible DEFINE_VVAR(volatile unsigned long, jiffies) = INITIAL_JIFFIES; |
27 | #endif | 27 | #endif |
28 | 28 | ||
29 | unsigned long profile_pc(struct pt_regs *regs) | 29 | unsigned long profile_pc(struct pt_regs *regs) |
@@ -62,7 +62,7 @@ static irqreturn_t timer_interrupt(int irq, void *dev_id) | |||
62 | 62 | ||
63 | static struct irqaction irq0 = { | 63 | static struct irqaction irq0 = { |
64 | .handler = timer_interrupt, | 64 | .handler = timer_interrupt, |
65 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, | 65 | .flags = IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, |
66 | .name = "timer" | 66 | .name = "timer" |
67 | }; | 67 | }; |
68 | 68 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index b857ed890b4c..57409f6b8c62 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -211,21 +211,17 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code) \ | |||
211 | exception_exit(prev_state); \ | 211 | exception_exit(prev_state); \ |
212 | } | 212 | } |
213 | 213 | ||
214 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, | 214 | DO_ERROR_INFO(X86_TRAP_DE, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip ) |
215 | regs->ip) | 215 | DO_ERROR (X86_TRAP_OF, SIGSEGV, "overflow", overflow ) |
216 | DO_ERROR(X86_TRAP_OF, SIGSEGV, "overflow", overflow) | 216 | DO_ERROR (X86_TRAP_BR, SIGSEGV, "bounds", bounds ) |
217 | DO_ERROR(X86_TRAP_BR, SIGSEGV, "bounds", bounds) | 217 | DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip ) |
218 | DO_ERROR_INFO(X86_TRAP_UD, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, | 218 | DO_ERROR (X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun ) |
219 | regs->ip) | 219 | DO_ERROR (X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS ) |
220 | DO_ERROR(X86_TRAP_OLD_MF, SIGFPE, "coprocessor segment overrun", | 220 | DO_ERROR (X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present ) |
221 | coprocessor_segment_overrun) | ||
222 | DO_ERROR(X86_TRAP_TS, SIGSEGV, "invalid TSS", invalid_TSS) | ||
223 | DO_ERROR(X86_TRAP_NP, SIGBUS, "segment not present", segment_not_present) | ||
224 | #ifdef CONFIG_X86_32 | 221 | #ifdef CONFIG_X86_32 |
225 | DO_ERROR(X86_TRAP_SS, SIGBUS, "stack segment", stack_segment) | 222 | DO_ERROR (X86_TRAP_SS, SIGBUS, "stack segment", stack_segment ) |
226 | #endif | 223 | #endif |
227 | DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, | 224 | DO_ERROR_INFO(X86_TRAP_AC, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0 ) |
228 | BUS_ADRALN, 0) | ||
229 | 225 | ||
230 | #ifdef CONFIG_X86_64 | 226 | #ifdef CONFIG_X86_64 |
231 | /* Runs on IST stack */ | 227 | /* Runs on IST stack */ |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 930e5d48f560..57e5ce126d5a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -11,6 +11,7 @@ | |||
11 | #include <linux/clocksource.h> | 11 | #include <linux/clocksource.h> |
12 | #include <linux/percpu.h> | 12 | #include <linux/percpu.h> |
13 | #include <linux/timex.h> | 13 | #include <linux/timex.h> |
14 | #include <linux/static_key.h> | ||
14 | 15 | ||
15 | #include <asm/hpet.h> | 16 | #include <asm/hpet.h> |
16 | #include <asm/timer.h> | 17 | #include <asm/timer.h> |
@@ -37,13 +38,244 @@ static int __read_mostly tsc_unstable; | |||
37 | erroneous rdtsc usage on !cpu_has_tsc processors */ | 38 | erroneous rdtsc usage on !cpu_has_tsc processors */ |
38 | static int __read_mostly tsc_disabled = -1; | 39 | static int __read_mostly tsc_disabled = -1; |
39 | 40 | ||
41 | static struct static_key __use_tsc = STATIC_KEY_INIT; | ||
42 | |||
40 | int tsc_clocksource_reliable; | 43 | int tsc_clocksource_reliable; |
44 | |||
45 | /* | ||
46 | * Use a ring-buffer like data structure, where a writer advances the head by | ||
47 | * writing a new data entry and a reader advances the tail when it observes a | ||
48 | * new entry. | ||
49 | * | ||
50 | * Writers are made to wait on readers until there's space to write a new | ||
51 | * entry. | ||
52 | * | ||
53 | * This means that we can always use an {offset, mul} pair to compute a ns | ||
54 | * value that is 'roughly' in the right direction, even if we're writing a new | ||
55 | * {offset, mul} pair during the clock read. | ||
56 | * | ||
57 | * The down-side is that we can no longer guarantee strict monotonicity anymore | ||
58 | * (assuming the TSC was that to begin with), because while we compute the | ||
59 | * intersection point of the two clock slopes and make sure the time is | ||
60 | * continuous at the point of switching; we can no longer guarantee a reader is | ||
61 | * strictly before or after the switch point. | ||
62 | * | ||
63 | * It does mean a reader no longer needs to disable IRQs in order to avoid | ||
64 | * CPU-Freq updates messing with his times, and similarly an NMI reader will | ||
65 | * no longer run the risk of hitting half-written state. | ||
66 | */ | ||
67 | |||
68 | struct cyc2ns { | ||
69 | struct cyc2ns_data data[2]; /* 0 + 2*24 = 48 */ | ||
70 | struct cyc2ns_data *head; /* 48 + 8 = 56 */ | ||
71 | struct cyc2ns_data *tail; /* 56 + 8 = 64 */ | ||
72 | }; /* exactly fits one cacheline */ | ||
73 | |||
74 | static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); | ||
75 | |||
76 | struct cyc2ns_data *cyc2ns_read_begin(void) | ||
77 | { | ||
78 | struct cyc2ns_data *head; | ||
79 | |||
80 | preempt_disable(); | ||
81 | |||
82 | head = this_cpu_read(cyc2ns.head); | ||
83 | /* | ||
84 | * Ensure we observe the entry when we observe the pointer to it. | ||
85 | * matches the wmb from cyc2ns_write_end(). | ||
86 | */ | ||
87 | smp_read_barrier_depends(); | ||
88 | head->__count++; | ||
89 | barrier(); | ||
90 | |||
91 | return head; | ||
92 | } | ||
93 | |||
94 | void cyc2ns_read_end(struct cyc2ns_data *head) | ||
95 | { | ||
96 | barrier(); | ||
97 | /* | ||
98 | * If we're the outer most nested read; update the tail pointer | ||
99 | * when we're done. This notifies possible pending writers | ||
100 | * that we've observed the head pointer and that the other | ||
101 | * entry is now free. | ||
102 | */ | ||
103 | if (!--head->__count) { | ||
104 | /* | ||
105 | * x86-TSO does not reorder writes with older reads; | ||
106 | * therefore once this write becomes visible to another | ||
107 | * cpu, we must be finished reading the cyc2ns_data. | ||
108 | * | ||
109 | * matches with cyc2ns_write_begin(). | ||
110 | */ | ||
111 | this_cpu_write(cyc2ns.tail, head); | ||
112 | } | ||
113 | preempt_enable(); | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * Begin writing a new @data entry for @cpu. | ||
118 | * | ||
119 | * Assumes some sort of write side lock; currently 'provided' by the assumption | ||
120 | * that cpufreq will call its notifiers sequentially. | ||
121 | */ | ||
122 | static struct cyc2ns_data *cyc2ns_write_begin(int cpu) | ||
123 | { | ||
124 | struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); | ||
125 | struct cyc2ns_data *data = c2n->data; | ||
126 | |||
127 | if (data == c2n->head) | ||
128 | data++; | ||
129 | |||
130 | /* XXX send an IPI to @cpu in order to guarantee a read? */ | ||
131 | |||
132 | /* | ||
133 | * When we observe the tail write from cyc2ns_read_end(), | ||
134 | * the cpu must be done with that entry and its safe | ||
135 | * to start writing to it. | ||
136 | */ | ||
137 | while (c2n->tail == data) | ||
138 | cpu_relax(); | ||
139 | |||
140 | return data; | ||
141 | } | ||
142 | |||
143 | static void cyc2ns_write_end(int cpu, struct cyc2ns_data *data) | ||
144 | { | ||
145 | struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); | ||
146 | |||
147 | /* | ||
148 | * Ensure the @data writes are visible before we publish the | ||
149 | * entry. Matches the data-depencency in cyc2ns_read_begin(). | ||
150 | */ | ||
151 | smp_wmb(); | ||
152 | |||
153 | ACCESS_ONCE(c2n->head) = data; | ||
154 | } | ||
155 | |||
156 | /* | ||
157 | * Accelerators for sched_clock() | ||
158 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
159 | * basic equation: | ||
160 | * ns = cycles / (freq / ns_per_sec) | ||
161 | * ns = cycles * (ns_per_sec / freq) | ||
162 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
163 | * ns = cycles * (10^6 / cpu_khz) | ||
164 | * | ||
165 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
166 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
167 | * ns = cycles * cyc2ns_scale / SC | ||
168 | * | ||
169 | * And since SC is a constant power of two, we can convert the div | ||
170 | * into a shift. | ||
171 | * | ||
172 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
173 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
174 | * (mathieu.desnoyers@polymtl.ca) | ||
175 | * | ||
176 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
177 | */ | ||
178 | |||
179 | #define CYC2NS_SCALE_FACTOR 10 /* 2^10, carefully chosen */ | ||
180 | |||
181 | static void cyc2ns_data_init(struct cyc2ns_data *data) | ||
182 | { | ||
183 | data->cyc2ns_mul = 0; | ||
184 | data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; | ||
185 | data->cyc2ns_offset = 0; | ||
186 | data->__count = 0; | ||
187 | } | ||
188 | |||
189 | static void cyc2ns_init(int cpu) | ||
190 | { | ||
191 | struct cyc2ns *c2n = &per_cpu(cyc2ns, cpu); | ||
192 | |||
193 | cyc2ns_data_init(&c2n->data[0]); | ||
194 | cyc2ns_data_init(&c2n->data[1]); | ||
195 | |||
196 | c2n->head = c2n->data; | ||
197 | c2n->tail = c2n->data; | ||
198 | } | ||
199 | |||
200 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
201 | { | ||
202 | struct cyc2ns_data *data, *tail; | ||
203 | unsigned long long ns; | ||
204 | |||
205 | /* | ||
206 | * See cyc2ns_read_*() for details; replicated in order to avoid | ||
207 | * an extra few instructions that came with the abstraction. | ||
208 | * Notable, it allows us to only do the __count and tail update | ||
209 | * dance when its actually needed. | ||
210 | */ | ||
211 | |||
212 | preempt_disable_notrace(); | ||
213 | data = this_cpu_read(cyc2ns.head); | ||
214 | tail = this_cpu_read(cyc2ns.tail); | ||
215 | |||
216 | if (likely(data == tail)) { | ||
217 | ns = data->cyc2ns_offset; | ||
218 | ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); | ||
219 | } else { | ||
220 | data->__count++; | ||
221 | |||
222 | barrier(); | ||
223 | |||
224 | ns = data->cyc2ns_offset; | ||
225 | ns += mul_u64_u32_shr(cyc, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); | ||
226 | |||
227 | barrier(); | ||
228 | |||
229 | if (!--data->__count) | ||
230 | this_cpu_write(cyc2ns.tail, data); | ||
231 | } | ||
232 | preempt_enable_notrace(); | ||
233 | |||
234 | return ns; | ||
235 | } | ||
236 | |||
237 | /* XXX surely we already have this someplace in the kernel?! */ | ||
238 | #define DIV_ROUND(n, d) (((n) + ((d) / 2)) / (d)) | ||
239 | |||
240 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
241 | { | ||
242 | unsigned long long tsc_now, ns_now; | ||
243 | struct cyc2ns_data *data; | ||
244 | unsigned long flags; | ||
245 | |||
246 | local_irq_save(flags); | ||
247 | sched_clock_idle_sleep_event(); | ||
248 | |||
249 | if (!cpu_khz) | ||
250 | goto done; | ||
251 | |||
252 | data = cyc2ns_write_begin(cpu); | ||
253 | |||
254 | rdtscll(tsc_now); | ||
255 | ns_now = cycles_2_ns(tsc_now); | ||
256 | |||
257 | /* | ||
258 | * Compute a new multiplier as per the above comment and ensure our | ||
259 | * time function is continuous; see the comment near struct | ||
260 | * cyc2ns_data. | ||
261 | */ | ||
262 | data->cyc2ns_mul = DIV_ROUND(NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR, cpu_khz); | ||
263 | data->cyc2ns_shift = CYC2NS_SCALE_FACTOR; | ||
264 | data->cyc2ns_offset = ns_now - | ||
265 | mul_u64_u32_shr(tsc_now, data->cyc2ns_mul, CYC2NS_SCALE_FACTOR); | ||
266 | |||
267 | cyc2ns_write_end(cpu, data); | ||
268 | |||
269 | done: | ||
270 | sched_clock_idle_wakeup_event(0); | ||
271 | local_irq_restore(flags); | ||
272 | } | ||
41 | /* | 273 | /* |
42 | * Scheduler clock - returns current time in nanosec units. | 274 | * Scheduler clock - returns current time in nanosec units. |
43 | */ | 275 | */ |
44 | u64 native_sched_clock(void) | 276 | u64 native_sched_clock(void) |
45 | { | 277 | { |
46 | u64 this_offset; | 278 | u64 tsc_now; |
47 | 279 | ||
48 | /* | 280 | /* |
49 | * Fall back to jiffies if there's no TSC available: | 281 | * Fall back to jiffies if there's no TSC available: |
@@ -53,16 +285,16 @@ u64 native_sched_clock(void) | |||
53 | * very important for it to be as fast as the platform | 285 | * very important for it to be as fast as the platform |
54 | * can achieve it. ) | 286 | * can achieve it. ) |
55 | */ | 287 | */ |
56 | if (unlikely(tsc_disabled)) { | 288 | if (!static_key_false(&__use_tsc)) { |
57 | /* No locking but a rare wrong value is not a big deal: */ | 289 | /* No locking but a rare wrong value is not a big deal: */ |
58 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); | 290 | return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); |
59 | } | 291 | } |
60 | 292 | ||
61 | /* read the Time Stamp Counter: */ | 293 | /* read the Time Stamp Counter: */ |
62 | rdtscll(this_offset); | 294 | rdtscll(tsc_now); |
63 | 295 | ||
64 | /* return the value in ns */ | 296 | /* return the value in ns */ |
65 | return __cycles_2_ns(this_offset); | 297 | return cycles_2_ns(tsc_now); |
66 | } | 298 | } |
67 | 299 | ||
68 | /* We need to define a real function for sched_clock, to override the | 300 | /* We need to define a real function for sched_clock, to override the |
@@ -419,6 +651,13 @@ unsigned long native_calibrate_tsc(void) | |||
419 | unsigned long flags, latch, ms, fast_calibrate; | 651 | unsigned long flags, latch, ms, fast_calibrate; |
420 | int hpet = is_hpet_enabled(), i, loopmin; | 652 | int hpet = is_hpet_enabled(), i, loopmin; |
421 | 653 | ||
654 | /* Calibrate TSC using MSR for Intel Atom SoCs */ | ||
655 | local_irq_save(flags); | ||
656 | fast_calibrate = try_msr_calibrate_tsc(); | ||
657 | local_irq_restore(flags); | ||
658 | if (fast_calibrate) | ||
659 | return fast_calibrate; | ||
660 | |||
422 | local_irq_save(flags); | 661 | local_irq_save(flags); |
423 | fast_calibrate = quick_pit_calibrate(); | 662 | fast_calibrate = quick_pit_calibrate(); |
424 | local_irq_restore(flags); | 663 | local_irq_restore(flags); |
@@ -589,61 +828,11 @@ int recalibrate_cpu_khz(void) | |||
589 | EXPORT_SYMBOL(recalibrate_cpu_khz); | 828 | EXPORT_SYMBOL(recalibrate_cpu_khz); |
590 | 829 | ||
591 | 830 | ||
592 | /* Accelerators for sched_clock() | ||
593 | * convert from cycles(64bits) => nanoseconds (64bits) | ||
594 | * basic equation: | ||
595 | * ns = cycles / (freq / ns_per_sec) | ||
596 | * ns = cycles * (ns_per_sec / freq) | ||
597 | * ns = cycles * (10^9 / (cpu_khz * 10^3)) | ||
598 | * ns = cycles * (10^6 / cpu_khz) | ||
599 | * | ||
600 | * Then we use scaling math (suggested by george@mvista.com) to get: | ||
601 | * ns = cycles * (10^6 * SC / cpu_khz) / SC | ||
602 | * ns = cycles * cyc2ns_scale / SC | ||
603 | * | ||
604 | * And since SC is a constant power of two, we can convert the div | ||
605 | * into a shift. | ||
606 | * | ||
607 | * We can use khz divisor instead of mhz to keep a better precision, since | ||
608 | * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits. | ||
609 | * (mathieu.desnoyers@polymtl.ca) | ||
610 | * | ||
611 | * -johnstul@us.ibm.com "math is hard, lets go shopping!" | ||
612 | */ | ||
613 | |||
614 | DEFINE_PER_CPU(unsigned long, cyc2ns); | ||
615 | DEFINE_PER_CPU(unsigned long long, cyc2ns_offset); | ||
616 | |||
617 | static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) | ||
618 | { | ||
619 | unsigned long long tsc_now, ns_now, *offset; | ||
620 | unsigned long flags, *scale; | ||
621 | |||
622 | local_irq_save(flags); | ||
623 | sched_clock_idle_sleep_event(); | ||
624 | |||
625 | scale = &per_cpu(cyc2ns, cpu); | ||
626 | offset = &per_cpu(cyc2ns_offset, cpu); | ||
627 | |||
628 | rdtscll(tsc_now); | ||
629 | ns_now = __cycles_2_ns(tsc_now); | ||
630 | |||
631 | if (cpu_khz) { | ||
632 | *scale = ((NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR) + | ||
633 | cpu_khz / 2) / cpu_khz; | ||
634 | *offset = ns_now - mult_frac(tsc_now, *scale, | ||
635 | (1UL << CYC2NS_SCALE_FACTOR)); | ||
636 | } | ||
637 | |||
638 | sched_clock_idle_wakeup_event(0); | ||
639 | local_irq_restore(flags); | ||
640 | } | ||
641 | |||
642 | static unsigned long long cyc2ns_suspend; | 831 | static unsigned long long cyc2ns_suspend; |
643 | 832 | ||
644 | void tsc_save_sched_clock_state(void) | 833 | void tsc_save_sched_clock_state(void) |
645 | { | 834 | { |
646 | if (!sched_clock_stable) | 835 | if (!sched_clock_stable()) |
647 | return; | 836 | return; |
648 | 837 | ||
649 | cyc2ns_suspend = sched_clock(); | 838 | cyc2ns_suspend = sched_clock(); |
@@ -663,16 +852,26 @@ void tsc_restore_sched_clock_state(void) | |||
663 | unsigned long flags; | 852 | unsigned long flags; |
664 | int cpu; | 853 | int cpu; |
665 | 854 | ||
666 | if (!sched_clock_stable) | 855 | if (!sched_clock_stable()) |
667 | return; | 856 | return; |
668 | 857 | ||
669 | local_irq_save(flags); | 858 | local_irq_save(flags); |
670 | 859 | ||
671 | __this_cpu_write(cyc2ns_offset, 0); | 860 | /* |
861 | * We're comming out of suspend, there's no concurrency yet; don't | ||
862 | * bother being nice about the RCU stuff, just write to both | ||
863 | * data fields. | ||
864 | */ | ||
865 | |||
866 | this_cpu_write(cyc2ns.data[0].cyc2ns_offset, 0); | ||
867 | this_cpu_write(cyc2ns.data[1].cyc2ns_offset, 0); | ||
868 | |||
672 | offset = cyc2ns_suspend - sched_clock(); | 869 | offset = cyc2ns_suspend - sched_clock(); |
673 | 870 | ||
674 | for_each_possible_cpu(cpu) | 871 | for_each_possible_cpu(cpu) { |
675 | per_cpu(cyc2ns_offset, cpu) = offset; | 872 | per_cpu(cyc2ns.data[0].cyc2ns_offset, cpu) = offset; |
873 | per_cpu(cyc2ns.data[1].cyc2ns_offset, cpu) = offset; | ||
874 | } | ||
676 | 875 | ||
677 | local_irq_restore(flags); | 876 | local_irq_restore(flags); |
678 | } | 877 | } |
@@ -715,8 +914,7 @@ static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, | |||
715 | tsc_khz_ref = tsc_khz; | 914 | tsc_khz_ref = tsc_khz; |
716 | } | 915 | } |
717 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || | 916 | if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || |
718 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) || | 917 | (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { |
719 | (val == CPUFREQ_RESUMECHANGE)) { | ||
720 | *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); | 918 | *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new); |
721 | 919 | ||
722 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); | 920 | tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new); |
@@ -786,16 +984,14 @@ static struct clocksource clocksource_tsc = { | |||
786 | .mask = CLOCKSOURCE_MASK(64), | 984 | .mask = CLOCKSOURCE_MASK(64), |
787 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | 985 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
788 | CLOCK_SOURCE_MUST_VERIFY, | 986 | CLOCK_SOURCE_MUST_VERIFY, |
789 | #ifdef CONFIG_X86_64 | ||
790 | .archdata = { .vclock_mode = VCLOCK_TSC }, | 987 | .archdata = { .vclock_mode = VCLOCK_TSC }, |
791 | #endif | ||
792 | }; | 988 | }; |
793 | 989 | ||
794 | void mark_tsc_unstable(char *reason) | 990 | void mark_tsc_unstable(char *reason) |
795 | { | 991 | { |
796 | if (!tsc_unstable) { | 992 | if (!tsc_unstable) { |
797 | tsc_unstable = 1; | 993 | tsc_unstable = 1; |
798 | sched_clock_stable = 0; | 994 | clear_sched_clock_stable(); |
799 | disable_sched_clock_irqtime(); | 995 | disable_sched_clock_irqtime(); |
800 | pr_info("Marking TSC unstable due to %s\n", reason); | 996 | pr_info("Marking TSC unstable due to %s\n", reason); |
801 | /* Change only the rating, when not registered */ | 997 | /* Change only the rating, when not registered */ |
@@ -995,14 +1191,18 @@ void __init tsc_init(void) | |||
995 | * speed as the bootup CPU. (cpufreq notifiers will fix this | 1191 | * speed as the bootup CPU. (cpufreq notifiers will fix this |
996 | * up if their speed diverges) | 1192 | * up if their speed diverges) |
997 | */ | 1193 | */ |
998 | for_each_possible_cpu(cpu) | 1194 | for_each_possible_cpu(cpu) { |
1195 | cyc2ns_init(cpu); | ||
999 | set_cyc2ns_scale(cpu_khz, cpu); | 1196 | set_cyc2ns_scale(cpu_khz, cpu); |
1197 | } | ||
1000 | 1198 | ||
1001 | if (tsc_disabled > 0) | 1199 | if (tsc_disabled > 0) |
1002 | return; | 1200 | return; |
1003 | 1201 | ||
1004 | /* now allow native_sched_clock() to use rdtsc */ | 1202 | /* now allow native_sched_clock() to use rdtsc */ |
1203 | |||
1005 | tsc_disabled = 0; | 1204 | tsc_disabled = 0; |
1205 | static_key_slow_inc(&__use_tsc); | ||
1006 | 1206 | ||
1007 | if (!no_sched_irq_time) | 1207 | if (!no_sched_irq_time) |
1008 | enable_sched_clock_irqtime(); | 1208 | enable_sched_clock_irqtime(); |
diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c new file mode 100644 index 000000000000..92ae6acac8a7 --- /dev/null +++ b/arch/x86/kernel/tsc_msr.c | |||
@@ -0,0 +1,127 @@ | |||
1 | /* | ||
2 | * tsc_msr.c - MSR based TSC calibration on Intel Atom SoC platforms. | ||
3 | * | ||
4 | * TSC in Intel Atom SoC runs at a constant rate which can be figured | ||
5 | * by this formula: | ||
6 | * <maximum core-clock to bus-clock ratio> * <maximum resolved frequency> | ||
7 | * See Intel 64 and IA-32 System Programming Guid section 16.12 and 30.11.5 | ||
8 | * for details. | ||
9 | * Especially some Intel Atom SoCs don't have PIT(i8254) or HPET, so MSR | ||
10 | * based calibration is the only option. | ||
11 | * | ||
12 | * | ||
13 | * Copyright (C) 2013 Intel Corporation | ||
14 | * Author: Bin Gao <bin.gao@intel.com> | ||
15 | * | ||
16 | * This file is released under the GPLv2. | ||
17 | */ | ||
18 | |||
19 | #include <linux/kernel.h> | ||
20 | #include <asm/processor.h> | ||
21 | #include <asm/setup.h> | ||
22 | #include <asm/apic.h> | ||
23 | #include <asm/param.h> | ||
24 | |||
25 | /* CPU reference clock frequency: in KHz */ | ||
26 | #define FREQ_83 83200 | ||
27 | #define FREQ_100 99840 | ||
28 | #define FREQ_133 133200 | ||
29 | #define FREQ_166 166400 | ||
30 | |||
31 | #define MAX_NUM_FREQS 8 | ||
32 | |||
33 | /* | ||
34 | * According to Intel 64 and IA-32 System Programming Guide, | ||
35 | * if MSR_PERF_STAT[31] is set, the maximum resolved bus ratio can be | ||
36 | * read in MSR_PLATFORM_ID[12:8], otherwise in MSR_PERF_STAT[44:40]. | ||
37 | * Unfortunately some Intel Atom SoCs aren't quite compliant to this, | ||
38 | * so we need manually differentiate SoC families. This is what the | ||
39 | * field msr_plat does. | ||
40 | */ | ||
41 | struct freq_desc { | ||
42 | u8 x86_family; /* CPU family */ | ||
43 | u8 x86_model; /* model */ | ||
44 | u8 msr_plat; /* 1: use MSR_PLATFORM_INFO, 0: MSR_IA32_PERF_STATUS */ | ||
45 | u32 freqs[MAX_NUM_FREQS]; | ||
46 | }; | ||
47 | |||
48 | static struct freq_desc freq_desc_tables[] = { | ||
49 | /* PNW */ | ||
50 | { 6, 0x27, 0, { 0, 0, 0, 0, 0, FREQ_100, 0, FREQ_83 } }, | ||
51 | /* CLV+ */ | ||
52 | { 6, 0x35, 0, { 0, FREQ_133, 0, 0, 0, FREQ_100, 0, FREQ_83 } }, | ||
53 | /* TNG */ | ||
54 | { 6, 0x4a, 1, { 0, FREQ_100, FREQ_133, 0, 0, 0, 0, 0 } }, | ||
55 | /* VLV2 */ | ||
56 | { 6, 0x37, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_166, 0, 0, 0, 0 } }, | ||
57 | /* ANN */ | ||
58 | { 6, 0x5a, 1, { FREQ_83, FREQ_100, FREQ_133, FREQ_100, 0, 0, 0, 0 } }, | ||
59 | }; | ||
60 | |||
61 | static int match_cpu(u8 family, u8 model) | ||
62 | { | ||
63 | int i; | ||
64 | |||
65 | for (i = 0; i < ARRAY_SIZE(freq_desc_tables); i++) { | ||
66 | if ((family == freq_desc_tables[i].x86_family) && | ||
67 | (model == freq_desc_tables[i].x86_model)) | ||
68 | return i; | ||
69 | } | ||
70 | |||
71 | return -1; | ||
72 | } | ||
73 | |||
74 | /* Map CPU reference clock freq ID(0-7) to CPU reference clock freq(KHz) */ | ||
75 | #define id_to_freq(cpu_index, freq_id) \ | ||
76 | (freq_desc_tables[cpu_index].freqs[freq_id]) | ||
77 | |||
78 | /* | ||
79 | * Do MSR calibration only for known/supported CPUs. | ||
80 | * | ||
81 | * Returns the calibration value or 0 if MSR calibration failed. | ||
82 | */ | ||
83 | unsigned long try_msr_calibrate_tsc(void) | ||
84 | { | ||
85 | u32 lo, hi, ratio, freq_id, freq; | ||
86 | unsigned long res; | ||
87 | int cpu_index; | ||
88 | |||
89 | cpu_index = match_cpu(boot_cpu_data.x86, boot_cpu_data.x86_model); | ||
90 | if (cpu_index < 0) | ||
91 | return 0; | ||
92 | |||
93 | if (freq_desc_tables[cpu_index].msr_plat) { | ||
94 | rdmsr(MSR_PLATFORM_INFO, lo, hi); | ||
95 | ratio = (lo >> 8) & 0x1f; | ||
96 | } else { | ||
97 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
98 | ratio = (hi >> 8) & 0x1f; | ||
99 | } | ||
100 | pr_info("Maximum core-clock to bus-clock ratio: 0x%x\n", ratio); | ||
101 | |||
102 | if (!ratio) | ||
103 | goto fail; | ||
104 | |||
105 | /* Get FSB FREQ ID */ | ||
106 | rdmsr(MSR_FSB_FREQ, lo, hi); | ||
107 | freq_id = lo & 0x7; | ||
108 | freq = id_to_freq(cpu_index, freq_id); | ||
109 | pr_info("Resolved frequency ID: %u, frequency: %u KHz\n", | ||
110 | freq_id, freq); | ||
111 | if (!freq) | ||
112 | goto fail; | ||
113 | |||
114 | /* TSC frequency = maximum resolved freq * maximum resolved bus ratio */ | ||
115 | res = freq * ratio; | ||
116 | pr_info("TSC runs at %lu KHz\n", res); | ||
117 | |||
118 | #ifdef CONFIG_X86_LOCAL_APIC | ||
119 | lapic_timer_frequency = (freq * 1000) / HZ; | ||
120 | pr_info("lapic_timer_frequency = %d\n", lapic_timer_frequency); | ||
121 | #endif | ||
122 | return res; | ||
123 | |||
124 | fail: | ||
125 | pr_warn("Fast TSC calibration using MSR failed\n"); | ||
126 | return 0; | ||
127 | } | ||
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index adfdf56a3714..26488487bc61 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c | |||
@@ -16,7 +16,6 @@ | |||
16 | */ | 16 | */ |
17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
18 | #include <linux/kernel.h> | 18 | #include <linux/kernel.h> |
19 | #include <linux/init.h> | ||
20 | #include <linux/smp.h> | 19 | #include <linux/smp.h> |
21 | #include <linux/nmi.h> | 20 | #include <linux/nmi.h> |
22 | #include <asm/tsc.h> | 21 | #include <asm/tsc.h> |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index da6b35a98260..49edf2dd3613 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -147,7 +147,6 @@ SECTIONS | |||
147 | _edata = .; | 147 | _edata = .; |
148 | } :data | 148 | } :data |
149 | 149 | ||
150 | #ifdef CONFIG_X86_64 | ||
151 | 150 | ||
152 | . = ALIGN(PAGE_SIZE); | 151 | . = ALIGN(PAGE_SIZE); |
153 | __vvar_page = .; | 152 | __vvar_page = .; |
@@ -165,12 +164,15 @@ SECTIONS | |||
165 | #undef __VVAR_KERNEL_LDS | 164 | #undef __VVAR_KERNEL_LDS |
166 | #undef EMIT_VVAR | 165 | #undef EMIT_VVAR |
167 | 166 | ||
167 | /* | ||
168 | * Pad the rest of the page with zeros. Otherwise the loader | ||
169 | * can leave garbage here. | ||
170 | */ | ||
171 | . = __vvar_beginning_hack + PAGE_SIZE; | ||
168 | } :data | 172 | } :data |
169 | 173 | ||
170 | . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); | 174 | . = ALIGN(__vvar_page + PAGE_SIZE, PAGE_SIZE); |
171 | 175 | ||
172 | #endif /* CONFIG_X86_64 */ | ||
173 | |||
174 | /* Init code and data - will be freed after init */ | 176 | /* Init code and data - will be freed after init */ |
175 | . = ALIGN(PAGE_SIZE); | 177 | . = ALIGN(PAGE_SIZE); |
176 | .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { | 178 | .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) { |
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c index 992f890283e9..f6584a90aba3 100644 --- a/arch/x86/kernel/vsmp_64.c +++ b/arch/x86/kernel/vsmp_64.c | |||
@@ -33,7 +33,7 @@ | |||
33 | * and vice versa. | 33 | * and vice versa. |
34 | */ | 34 | */ |
35 | 35 | ||
36 | static unsigned long vsmp_save_fl(void) | 36 | asmlinkage unsigned long vsmp_save_fl(void) |
37 | { | 37 | { |
38 | unsigned long flags = native_save_fl(); | 38 | unsigned long flags = native_save_fl(); |
39 | 39 | ||
@@ -43,7 +43,7 @@ static unsigned long vsmp_save_fl(void) | |||
43 | } | 43 | } |
44 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); | 44 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl); |
45 | 45 | ||
46 | static void vsmp_restore_fl(unsigned long flags) | 46 | __visible void vsmp_restore_fl(unsigned long flags) |
47 | { | 47 | { |
48 | if (flags & X86_EFLAGS_IF) | 48 | if (flags & X86_EFLAGS_IF) |
49 | flags &= ~X86_EFLAGS_AC; | 49 | flags &= ~X86_EFLAGS_AC; |
@@ -53,7 +53,7 @@ static void vsmp_restore_fl(unsigned long flags) | |||
53 | } | 53 | } |
54 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); | 54 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl); |
55 | 55 | ||
56 | static void vsmp_irq_disable(void) | 56 | asmlinkage void vsmp_irq_disable(void) |
57 | { | 57 | { |
58 | unsigned long flags = native_save_fl(); | 58 | unsigned long flags = native_save_fl(); |
59 | 59 | ||
@@ -61,7 +61,7 @@ static void vsmp_irq_disable(void) | |||
61 | } | 61 | } |
62 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); | 62 | PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable); |
63 | 63 | ||
64 | static void vsmp_irq_enable(void) | 64 | asmlinkage void vsmp_irq_enable(void) |
65 | { | 65 | { |
66 | unsigned long flags = native_save_fl(); | 66 | unsigned long flags = native_save_fl(); |
67 | 67 | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 1f96f9347ed9..8b3b3eb3cead 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -47,14 +47,12 @@ | |||
47 | #include <asm/segment.h> | 47 | #include <asm/segment.h> |
48 | #include <asm/desc.h> | 48 | #include <asm/desc.h> |
49 | #include <asm/topology.h> | 49 | #include <asm/topology.h> |
50 | #include <asm/vgtod.h> | ||
51 | #include <asm/traps.h> | 50 | #include <asm/traps.h> |
52 | 51 | ||
53 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
54 | #include "vsyscall_trace.h" | 53 | #include "vsyscall_trace.h" |
55 | 54 | ||
56 | DEFINE_VVAR(int, vgetcpu_mode); | 55 | DEFINE_VVAR(int, vgetcpu_mode); |
57 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); | ||
58 | 56 | ||
59 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; | 57 | static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE; |
60 | 58 | ||
@@ -77,48 +75,6 @@ static int __init vsyscall_setup(char *str) | |||
77 | } | 75 | } |
78 | early_param("vsyscall", vsyscall_setup); | 76 | early_param("vsyscall", vsyscall_setup); |
79 | 77 | ||
80 | void update_vsyscall_tz(void) | ||
81 | { | ||
82 | vsyscall_gtod_data.sys_tz = sys_tz; | ||
83 | } | ||
84 | |||
85 | void update_vsyscall(struct timekeeper *tk) | ||
86 | { | ||
87 | struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; | ||
88 | |||
89 | write_seqcount_begin(&vdata->seq); | ||
90 | |||
91 | /* copy vsyscall data */ | ||
92 | vdata->clock.vclock_mode = tk->clock->archdata.vclock_mode; | ||
93 | vdata->clock.cycle_last = tk->clock->cycle_last; | ||
94 | vdata->clock.mask = tk->clock->mask; | ||
95 | vdata->clock.mult = tk->mult; | ||
96 | vdata->clock.shift = tk->shift; | ||
97 | |||
98 | vdata->wall_time_sec = tk->xtime_sec; | ||
99 | vdata->wall_time_snsec = tk->xtime_nsec; | ||
100 | |||
101 | vdata->monotonic_time_sec = tk->xtime_sec | ||
102 | + tk->wall_to_monotonic.tv_sec; | ||
103 | vdata->monotonic_time_snsec = tk->xtime_nsec | ||
104 | + (tk->wall_to_monotonic.tv_nsec | ||
105 | << tk->shift); | ||
106 | while (vdata->monotonic_time_snsec >= | ||
107 | (((u64)NSEC_PER_SEC) << tk->shift)) { | ||
108 | vdata->monotonic_time_snsec -= | ||
109 | ((u64)NSEC_PER_SEC) << tk->shift; | ||
110 | vdata->monotonic_time_sec++; | ||
111 | } | ||
112 | |||
113 | vdata->wall_time_coarse.tv_sec = tk->xtime_sec; | ||
114 | vdata->wall_time_coarse.tv_nsec = (long)(tk->xtime_nsec >> tk->shift); | ||
115 | |||
116 | vdata->monotonic_time_coarse = timespec_add(vdata->wall_time_coarse, | ||
117 | tk->wall_to_monotonic); | ||
118 | |||
119 | write_seqcount_end(&vdata->seq); | ||
120 | } | ||
121 | |||
122 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, | 78 | static void warn_bad_vsyscall(const char *level, struct pt_regs *regs, |
123 | const char *message) | 79 | const char *message) |
124 | { | 80 | { |
@@ -374,7 +330,6 @@ void __init map_vsyscall(void) | |||
374 | { | 330 | { |
375 | extern char __vsyscall_page; | 331 | extern char __vsyscall_page; |
376 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); | 332 | unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page); |
377 | extern char __vvar_page; | ||
378 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); | 333 | unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page); |
379 | 334 | ||
380 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, | 335 | __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_vsyscall, |
@@ -393,9 +348,13 @@ static int __init vsyscall_init(void) | |||
393 | { | 348 | { |
394 | BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); | 349 | BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE)); |
395 | 350 | ||
351 | cpu_notifier_register_begin(); | ||
352 | |||
396 | on_each_cpu(cpu_vsyscall_init, NULL, 1); | 353 | on_each_cpu(cpu_vsyscall_init, NULL, 1); |
397 | /* notifier priority > KVM */ | 354 | /* notifier priority > KVM */ |
398 | hotcpu_notifier(cpu_vsyscall_notifier, 30); | 355 | __hotcpu_notifier(cpu_vsyscall_notifier, 30); |
356 | |||
357 | cpu_notifier_register_done(); | ||
399 | 358 | ||
400 | return 0; | 359 | return 0; |
401 | } | 360 | } |
diff --git a/arch/x86/kernel/vsyscall_gtod.c b/arch/x86/kernel/vsyscall_gtod.c new file mode 100644 index 000000000000..f9c6e56e14b5 --- /dev/null +++ b/arch/x86/kernel/vsyscall_gtod.c | |||
@@ -0,0 +1,69 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE | ||
3 | * Copyright 2003 Andi Kleen, SuSE Labs. | ||
4 | * | ||
5 | * Modified for x86 32 bit architecture by | ||
6 | * Stefani Seibold <stefani@seibold.net> | ||
7 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | ||
8 | * | ||
9 | * Thanks to hpa@transmeta.com for some useful hint. | ||
10 | * Special thanks to Ingo Molnar for his early experience with | ||
11 | * a different vsyscall implementation for Linux/IA32 and for the name. | ||
12 | * | ||
13 | */ | ||
14 | |||
15 | #include <linux/timekeeper_internal.h> | ||
16 | #include <asm/vgtod.h> | ||
17 | #include <asm/vvar.h> | ||
18 | |||
19 | DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data); | ||
20 | |||
21 | void update_vsyscall_tz(void) | ||
22 | { | ||
23 | vsyscall_gtod_data.tz_minuteswest = sys_tz.tz_minuteswest; | ||
24 | vsyscall_gtod_data.tz_dsttime = sys_tz.tz_dsttime; | ||
25 | } | ||
26 | |||
27 | void update_vsyscall(struct timekeeper *tk) | ||
28 | { | ||
29 | struct vsyscall_gtod_data *vdata = &vsyscall_gtod_data; | ||
30 | |||
31 | gtod_write_begin(vdata); | ||
32 | |||
33 | /* copy vsyscall data */ | ||
34 | vdata->vclock_mode = tk->clock->archdata.vclock_mode; | ||
35 | vdata->cycle_last = tk->clock->cycle_last; | ||
36 | vdata->mask = tk->clock->mask; | ||
37 | vdata->mult = tk->mult; | ||
38 | vdata->shift = tk->shift; | ||
39 | |||
40 | vdata->wall_time_sec = tk->xtime_sec; | ||
41 | vdata->wall_time_snsec = tk->xtime_nsec; | ||
42 | |||
43 | vdata->monotonic_time_sec = tk->xtime_sec | ||
44 | + tk->wall_to_monotonic.tv_sec; | ||
45 | vdata->monotonic_time_snsec = tk->xtime_nsec | ||
46 | + (tk->wall_to_monotonic.tv_nsec | ||
47 | << tk->shift); | ||
48 | while (vdata->monotonic_time_snsec >= | ||
49 | (((u64)NSEC_PER_SEC) << tk->shift)) { | ||
50 | vdata->monotonic_time_snsec -= | ||
51 | ((u64)NSEC_PER_SEC) << tk->shift; | ||
52 | vdata->monotonic_time_sec++; | ||
53 | } | ||
54 | |||
55 | vdata->wall_time_coarse_sec = tk->xtime_sec; | ||
56 | vdata->wall_time_coarse_nsec = (long)(tk->xtime_nsec >> tk->shift); | ||
57 | |||
58 | vdata->monotonic_time_coarse_sec = | ||
59 | vdata->wall_time_coarse_sec + tk->wall_to_monotonic.tv_sec; | ||
60 | vdata->monotonic_time_coarse_nsec = | ||
61 | vdata->wall_time_coarse_nsec + tk->wall_to_monotonic.tv_nsec; | ||
62 | |||
63 | while (vdata->monotonic_time_coarse_nsec >= NSEC_PER_SEC) { | ||
64 | vdata->monotonic_time_coarse_nsec -= NSEC_PER_SEC; | ||
65 | vdata->monotonic_time_coarse_sec++; | ||
66 | } | ||
67 | |||
68 | gtod_write_end(vdata); | ||
69 | } | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 021783b1f46a..e48b674639cc 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c | |||
@@ -136,9 +136,9 @@ void arch_teardown_msi_irq(unsigned int irq) | |||
136 | x86_msi.teardown_msi_irq(irq); | 136 | x86_msi.teardown_msi_irq(irq); |
137 | } | 137 | } |
138 | 138 | ||
139 | void arch_restore_msi_irqs(struct pci_dev *dev, int irq) | 139 | void arch_restore_msi_irqs(struct pci_dev *dev) |
140 | { | 140 | { |
141 | x86_msi.restore_msi_irqs(dev, irq); | 141 | x86_msi.restore_msi_irqs(dev); |
142 | } | 142 | } |
143 | u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) | 143 | u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag) |
144 | { | 144 | { |
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 422fd8223470..a4b451c6addf 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c | |||
@@ -562,6 +562,16 @@ static void __init xstate_enable_boot_cpu(void) | |||
562 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | 562 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) |
563 | eagerfpu = ENABLE; | 563 | eagerfpu = ENABLE; |
564 | 564 | ||
565 | if (pcntxt_mask & XSTATE_EAGER) { | ||
566 | if (eagerfpu == DISABLE) { | ||
567 | pr_err("eagerfpu not present, disabling some xstate features: 0x%llx\n", | ||
568 | pcntxt_mask & XSTATE_EAGER); | ||
569 | pcntxt_mask &= ~XSTATE_EAGER; | ||
570 | } else { | ||
571 | eagerfpu = ENABLE; | ||
572 | } | ||
573 | } | ||
574 | |||
565 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", | 575 | pr_info("enabled xstate_bv 0x%llx, cntxt size 0x%x\n", |
566 | pcntxt_mask, xstate_size); | 576 | pcntxt_mask, xstate_size); |
567 | } | 577 | } |
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b89c5db2b832..287e4c85fff9 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig | |||
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT | |||
80 | depends on KVM && TRACEPOINTS | 80 | depends on KVM && TRACEPOINTS |
81 | ---help--- | 81 | ---help--- |
82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows | 82 | This option adds a R/W kVM module parameter 'mmu_audit', which allows |
83 | audit KVM MMU at runtime. | 83 | auditing of KVM MMU events at runtime. |
84 | 84 | ||
85 | config KVM_DEVICE_ASSIGNMENT | 85 | config KVM_DEVICE_ASSIGNMENT |
86 | bool "KVM legacy PCI device assignment support" | 86 | bool "KVM legacy PCI device assignment support" |
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c6976257eff5..bea60671ef8a 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c | |||
@@ -28,7 +28,7 @@ static u32 xstate_required_size(u64 xstate_bv) | |||
28 | int feature_bit = 0; | 28 | int feature_bit = 0; |
29 | u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; | 29 | u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; |
30 | 30 | ||
31 | xstate_bv &= ~XSTATE_FPSSE; | 31 | xstate_bv &= XSTATE_EXTEND_MASK; |
32 | while (xstate_bv) { | 32 | while (xstate_bv) { |
33 | if (xstate_bv & 0x1) { | 33 | if (xstate_bv & 0x1) { |
34 | u32 eax, ebx, ecx, edx; | 34 | u32 eax, ebx, ecx, edx; |
@@ -43,6 +43,16 @@ static u32 xstate_required_size(u64 xstate_bv) | |||
43 | return ret; | 43 | return ret; |
44 | } | 44 | } |
45 | 45 | ||
46 | u64 kvm_supported_xcr0(void) | ||
47 | { | ||
48 | u64 xcr0 = KVM_SUPPORTED_XCR0 & host_xcr0; | ||
49 | |||
50 | if (!kvm_x86_ops->mpx_supported()) | ||
51 | xcr0 &= ~(XSTATE_BNDREGS | XSTATE_BNDCSR); | ||
52 | |||
53 | return xcr0; | ||
54 | } | ||
55 | |||
46 | void kvm_update_cpuid(struct kvm_vcpu *vcpu) | 56 | void kvm_update_cpuid(struct kvm_vcpu *vcpu) |
47 | { | 57 | { |
48 | struct kvm_cpuid_entry2 *best; | 58 | struct kvm_cpuid_entry2 *best; |
@@ -73,9 +83,9 @@ void kvm_update_cpuid(struct kvm_vcpu *vcpu) | |||
73 | } else { | 83 | } else { |
74 | vcpu->arch.guest_supported_xcr0 = | 84 | vcpu->arch.guest_supported_xcr0 = |
75 | (best->eax | ((u64)best->edx << 32)) & | 85 | (best->eax | ((u64)best->edx << 32)) & |
76 | host_xcr0 & KVM_SUPPORTED_XCR0; | 86 | kvm_supported_xcr0(); |
77 | vcpu->arch.guest_xstate_size = | 87 | vcpu->arch.guest_xstate_size = best->ebx = |
78 | xstate_required_size(vcpu->arch.guest_supported_xcr0); | 88 | xstate_required_size(vcpu->arch.xcr0); |
79 | } | 89 | } |
80 | 90 | ||
81 | kvm_pmu_cpuid_update(vcpu); | 91 | kvm_pmu_cpuid_update(vcpu); |
@@ -210,13 +220,6 @@ static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
210 | entry->flags = 0; | 220 | entry->flags = 0; |
211 | } | 221 | } |
212 | 222 | ||
213 | static bool supported_xcr0_bit(unsigned bit) | ||
214 | { | ||
215 | u64 mask = ((u64)1 << bit); | ||
216 | |||
217 | return mask & KVM_SUPPORTED_XCR0 & host_xcr0; | ||
218 | } | ||
219 | |||
220 | #define F(x) bit(X86_FEATURE_##x) | 223 | #define F(x) bit(X86_FEATURE_##x) |
221 | 224 | ||
222 | static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, | 225 | static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry, |
@@ -256,6 +259,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
256 | #endif | 259 | #endif |
257 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; | 260 | unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0; |
258 | unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; | 261 | unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0; |
262 | unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0; | ||
259 | 263 | ||
260 | /* cpuid 1.edx */ | 264 | /* cpuid 1.edx */ |
261 | const u32 kvm_supported_word0_x86_features = | 265 | const u32 kvm_supported_word0_x86_features = |
@@ -263,7 +267,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
263 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | | 267 | F(TSC) | F(MSR) | F(PAE) | F(MCE) | |
264 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | | 268 | F(CX8) | F(APIC) | 0 /* Reserved */ | F(SEP) | |
265 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | | 269 | F(MTRR) | F(PGE) | F(MCA) | F(CMOV) | |
266 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLSH) | | 270 | F(PAT) | F(PSE36) | 0 /* PSN */ | F(CLFLUSH) | |
267 | 0 /* Reserved, DS, ACPI */ | F(MMX) | | 271 | 0 /* Reserved, DS, ACPI */ | F(MMX) | |
268 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | | 272 | F(FXSR) | F(XMM) | F(XMM2) | F(SELFSNOOP) | |
269 | 0 /* HTT, TM, Reserved, PBE */; | 273 | 0 /* HTT, TM, Reserved, PBE */; |
@@ -303,7 +307,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
303 | /* cpuid 7.0.ebx */ | 307 | /* cpuid 7.0.ebx */ |
304 | const u32 kvm_supported_word9_x86_features = | 308 | const u32 kvm_supported_word9_x86_features = |
305 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | | 309 | F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) | |
306 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM); | 310 | F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) | |
311 | F(ADX); | ||
307 | 312 | ||
308 | /* all calls to cpuid_count() should be made on the same cpu */ | 313 | /* all calls to cpuid_count() should be made on the same cpu */ |
309 | get_cpu(); | 314 | get_cpu(); |
@@ -436,16 +441,18 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, | |||
436 | } | 441 | } |
437 | case 0xd: { | 442 | case 0xd: { |
438 | int idx, i; | 443 | int idx, i; |
444 | u64 supported = kvm_supported_xcr0(); | ||
439 | 445 | ||
440 | entry->eax &= host_xcr0 & KVM_SUPPORTED_XCR0; | 446 | entry->eax &= supported; |
441 | entry->edx &= (host_xcr0 & KVM_SUPPORTED_XCR0) >> 32; | 447 | entry->edx &= supported >> 32; |
442 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 448 | entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
443 | for (idx = 1, i = 1; idx < 64; ++idx) { | 449 | for (idx = 1, i = 1; idx < 64; ++idx) { |
450 | u64 mask = ((u64)1 << idx); | ||
444 | if (*nent >= maxnent) | 451 | if (*nent >= maxnent) |
445 | goto out; | 452 | goto out; |
446 | 453 | ||
447 | do_cpuid_1_ent(&entry[i], function, idx); | 454 | do_cpuid_1_ent(&entry[i], function, idx); |
448 | if (entry[i].eax == 0 || !supported_xcr0_bit(idx)) | 455 | if (entry[i].eax == 0 || !(supported & mask)) |
449 | continue; | 456 | continue; |
450 | entry[i].flags |= | 457 | entry[i].flags |= |
451 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; | 458 | KVM_CPUID_FLAG_SIGNIFCANT_INDEX; |
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index f1e4895174b2..a2a1bb7ed8c1 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h | |||
@@ -72,4 +72,12 @@ static inline bool guest_cpuid_has_pcid(struct kvm_vcpu *vcpu) | |||
72 | return best && (best->ecx & bit(X86_FEATURE_PCID)); | 72 | return best && (best->ecx & bit(X86_FEATURE_PCID)); |
73 | } | 73 | } |
74 | 74 | ||
75 | static inline bool guest_cpuid_has_x2apic(struct kvm_vcpu *vcpu) | ||
76 | { | ||
77 | struct kvm_cpuid_entry2 *best; | ||
78 | |||
79 | best = kvm_find_cpuid_entry(vcpu, 1, 0); | ||
80 | return best && (best->ecx & bit(X86_FEATURE_X2APIC)); | ||
81 | } | ||
82 | |||
75 | #endif | 83 | #endif |
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 07ffca0a89e9..205b17eed93c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c | |||
@@ -3668,6 +3668,10 @@ static const struct gprefix pfx_vmovntpx = { | |||
3668 | I(0, em_mov), N, N, N, | 3668 | I(0, em_mov), N, N, N, |
3669 | }; | 3669 | }; |
3670 | 3670 | ||
3671 | static const struct gprefix pfx_0f_28_0f_29 = { | ||
3672 | I(Aligned, em_mov), I(Aligned, em_mov), N, N, | ||
3673 | }; | ||
3674 | |||
3671 | static const struct escape escape_d9 = { { | 3675 | static const struct escape escape_d9 = { { |
3672 | N, N, N, N, N, N, N, I(DstMem, em_fnstcw), | 3676 | N, N, N, N, N, N, N, I(DstMem, em_fnstcw), |
3673 | }, { | 3677 | }, { |
@@ -3870,7 +3874,9 @@ static const struct opcode twobyte_table[256] = { | |||
3870 | IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), | 3874 | IIP(ModRM | SrcMem | Priv | Op3264, em_cr_write, cr_write, check_cr_write), |
3871 | IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), | 3875 | IIP(ModRM | SrcMem | Priv | Op3264, em_dr_write, dr_write, check_dr_write), |
3872 | N, N, N, N, | 3876 | N, N, N, N, |
3873 | N, N, N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), | 3877 | GP(ModRM | DstReg | SrcMem | Mov | Sse, &pfx_0f_28_0f_29), |
3878 | GP(ModRM | DstMem | SrcReg | Mov | Sse, &pfx_0f_28_0f_29), | ||
3879 | N, GP(ModRM | DstMem | SrcReg | Sse | Mov | Aligned, &pfx_vmovntpx), | ||
3874 | N, N, N, N, | 3880 | N, N, N, N, |
3875 | /* 0x30 - 0x3F */ | 3881 | /* 0x30 - 0x3F */ |
3876 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), | 3882 | II(ImplicitOps | Priv, em_wrmsr, wrmsr), |
diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 412a5aa0ef94..518d86471b76 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c | |||
@@ -37,6 +37,7 @@ | |||
37 | 37 | ||
38 | #include "irq.h" | 38 | #include "irq.h" |
39 | #include "i8254.h" | 39 | #include "i8254.h" |
40 | #include "x86.h" | ||
40 | 41 | ||
41 | #ifndef CONFIG_X86_64 | 42 | #ifndef CONFIG_X86_64 |
42 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) | 43 | #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) |
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) | |||
349 | atomic_set(&ps->pending, 0); | 350 | atomic_set(&ps->pending, 0); |
350 | ps->irq_ack = 1; | 351 | ps->irq_ack = 1; |
351 | 352 | ||
353 | /* | ||
354 | * Do not allow the guest to program periodic timers with small | ||
355 | * interval, since the hrtimers are not throttled by the host | ||
356 | * scheduler. | ||
357 | */ | ||
358 | if (ps->is_periodic) { | ||
359 | s64 min_period = min_timer_period_us * 1000LL; | ||
360 | |||
361 | if (ps->period < min_period) { | ||
362 | pr_info_ratelimited( | ||
363 | "kvm: requested %lld ns " | ||
364 | "i8254 timer period limited to %lld ns\n", | ||
365 | ps->period, min_period); | ||
366 | ps->period = min_period; | ||
367 | } | ||
368 | } | ||
369 | |||
352 | hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), | 370 | hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), |
353 | HRTIMER_MODE_ABS); | 371 | HRTIMER_MODE_ABS); |
354 | } | 372 | } |
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 775702f649ca..9736529ade08 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c | |||
@@ -71,9 +71,6 @@ | |||
71 | #define VEC_POS(v) ((v) & (32 - 1)) | 71 | #define VEC_POS(v) ((v) & (32 - 1)) |
72 | #define REG_POS(v) (((v) >> 5) << 4) | 72 | #define REG_POS(v) (((v) >> 5) << 4) |
73 | 73 | ||
74 | static unsigned int min_timer_period_us = 500; | ||
75 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
76 | |||
77 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) | 74 | static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) |
78 | { | 75 | { |
79 | *((u32 *) (apic->regs + reg_off)) = val; | 76 | *((u32 *) (apic->regs + reg_off)) = val; |
@@ -435,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) | |||
435 | u8 val; | 432 | u8 val; |
436 | if (pv_eoi_get_user(vcpu, &val) < 0) | 433 | if (pv_eoi_get_user(vcpu, &val) < 0) |
437 | apic_debug("Can't read EOI MSR value: 0x%llx\n", | 434 | apic_debug("Can't read EOI MSR value: 0x%llx\n", |
438 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 435 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
439 | return val & 0x1; | 436 | return val & 0x1; |
440 | } | 437 | } |
441 | 438 | ||
@@ -443,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) | |||
443 | { | 440 | { |
444 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { | 441 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { |
445 | apic_debug("Can't set EOI MSR value: 0x%llx\n", | 442 | apic_debug("Can't set EOI MSR value: 0x%llx\n", |
446 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 443 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
447 | return; | 444 | return; |
448 | } | 445 | } |
449 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 446 | __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
@@ -453,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) | |||
453 | { | 450 | { |
454 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { | 451 | if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { |
455 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", | 452 | apic_debug("Can't clear EOI MSR value: 0x%llx\n", |
456 | (unsigned long long)vcpi->arch.pv_eoi.msr_val); | 453 | (unsigned long long)vcpu->arch.pv_eoi.msr_val); |
457 | return; | 454 | return; |
458 | } | 455 | } |
459 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); | 456 | __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); |
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index c8b0d0d2da5c..6a11845fd8b9 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h | |||
@@ -65,7 +65,7 @@ bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src, | |||
65 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); | 65 | struct kvm_lapic_irq *irq, int *r, unsigned long *dest_map); |
66 | 66 | ||
67 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); | 67 | u64 kvm_get_apic_base(struct kvm_vcpu *vcpu); |
68 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data); | 68 | int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info); |
69 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, | 69 | void kvm_apic_post_state_restore(struct kvm_vcpu *vcpu, |
70 | struct kvm_lapic_state *s); | 70 | struct kvm_lapic_state *s); |
71 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); | 71 | int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu); |
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 40772ef0f2b1..f5704d9e5ddc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c | |||
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2659 | int emulate = 0; | 2659 | int emulate = 0; |
2660 | gfn_t pseudo_gfn; | 2660 | gfn_t pseudo_gfn; |
2661 | 2661 | ||
2662 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
2663 | return 0; | ||
2664 | |||
2662 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { | 2665 | for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { |
2663 | if (iterator.level == level) { | 2666 | if (iterator.level == level) { |
2664 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, | 2667 | mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, |
@@ -2669,6 +2672,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, | |||
2669 | break; | 2672 | break; |
2670 | } | 2673 | } |
2671 | 2674 | ||
2675 | drop_large_spte(vcpu, iterator.sptep); | ||
2672 | if (!is_shadow_present_pte(*iterator.sptep)) { | 2676 | if (!is_shadow_present_pte(*iterator.sptep)) { |
2673 | u64 base_addr = iterator.addr; | 2677 | u64 base_addr = iterator.addr; |
2674 | 2678 | ||
@@ -2829,6 +2833,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, | |||
2829 | bool ret = false; | 2833 | bool ret = false; |
2830 | u64 spte = 0ull; | 2834 | u64 spte = 0ull; |
2831 | 2835 | ||
2836 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
2837 | return false; | ||
2838 | |||
2832 | if (!page_fault_can_be_fast(error_code)) | 2839 | if (!page_fault_can_be_fast(error_code)) |
2833 | return false; | 2840 | return false; |
2834 | 2841 | ||
@@ -3224,6 +3231,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) | |||
3224 | struct kvm_shadow_walk_iterator iterator; | 3231 | struct kvm_shadow_walk_iterator iterator; |
3225 | u64 spte = 0ull; | 3232 | u64 spte = 0ull; |
3226 | 3233 | ||
3234 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
3235 | return spte; | ||
3236 | |||
3227 | walk_shadow_page_lockless_begin(vcpu); | 3237 | walk_shadow_page_lockless_begin(vcpu); |
3228 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) | 3238 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) |
3229 | if (!is_shadow_present_pte(spte)) | 3239 | if (!is_shadow_present_pte(spte)) |
@@ -3319,7 +3329,7 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn) | |||
3319 | arch.direct_map = vcpu->arch.mmu.direct_map; | 3329 | arch.direct_map = vcpu->arch.mmu.direct_map; |
3320 | arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); | 3330 | arch.cr3 = vcpu->arch.mmu.get_cr3(vcpu); |
3321 | 3331 | ||
3322 | return kvm_setup_async_pf(vcpu, gva, gfn, &arch); | 3332 | return kvm_setup_async_pf(vcpu, gva, gfn_to_hva(vcpu->kvm, gfn), &arch); |
3323 | } | 3333 | } |
3324 | 3334 | ||
3325 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) | 3335 | static bool can_do_async_pf(struct kvm_vcpu *vcpu) |
@@ -4510,6 +4520,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) | |||
4510 | u64 spte; | 4520 | u64 spte; |
4511 | int nr_sptes = 0; | 4521 | int nr_sptes = 0; |
4512 | 4522 | ||
4523 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
4524 | return nr_sptes; | ||
4525 | |||
4513 | walk_shadow_page_lockless_begin(vcpu); | 4526 | walk_shadow_page_lockless_begin(vcpu); |
4514 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { | 4527 | for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { |
4515 | sptes[iterator.level-1] = spte; | 4528 | sptes[iterator.level-1] = spte; |
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ad75d77999d0..b1e6c1bf68d3 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h | |||
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, | |||
569 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) | 569 | if (FNAME(gpte_changed)(vcpu, gw, top_level)) |
570 | goto out_gpte_changed; | 570 | goto out_gpte_changed; |
571 | 571 | ||
572 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) | ||
573 | goto out_gpte_changed; | ||
574 | |||
572 | for (shadow_walk_init(&it, vcpu, addr); | 575 | for (shadow_walk_init(&it, vcpu, addr); |
573 | shadow_walk_okay(&it) && it.level > gw->level; | 576 | shadow_walk_okay(&it) && it.level > gw->level; |
574 | shadow_walk_next(&it)) { | 577 | shadow_walk_next(&it)) { |
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) | |||
820 | */ | 823 | */ |
821 | mmu_topup_memory_caches(vcpu); | 824 | mmu_topup_memory_caches(vcpu); |
822 | 825 | ||
826 | if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) { | ||
827 | WARN_ON(1); | ||
828 | return; | ||
829 | } | ||
830 | |||
823 | spin_lock(&vcpu->kvm->mmu_lock); | 831 | spin_lock(&vcpu->kvm->mmu_lock); |
824 | for_each_shadow_entry(vcpu, gva, iterator) { | 832 | for_each_shadow_entry(vcpu, gva, iterator) { |
825 | level = iterator.level; | 833 | level = iterator.level; |
@@ -905,7 +913,8 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, | |||
905 | * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't | 913 | * and kvm_mmu_notifier_invalidate_range_start detect the mapping page isn't |
906 | * used by guest then tlbs are not flushed, so guest is allowed to access the | 914 | * used by guest then tlbs are not flushed, so guest is allowed to access the |
907 | * freed pages. | 915 | * freed pages. |
908 | * And we increase kvm->tlbs_dirty to delay tlbs flush in this case. | 916 | * We set tlbs_dirty to let the notifier know this change and delay the flush |
917 | * until such a case actually happens. | ||
909 | */ | 918 | */ |
910 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | 919 | static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) |
911 | { | 920 | { |
@@ -934,7 +943,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
934 | return -EINVAL; | 943 | return -EINVAL; |
935 | 944 | ||
936 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { | 945 | if (FNAME(prefetch_invalid_gpte)(vcpu, sp, &sp->spt[i], gpte)) { |
937 | vcpu->kvm->tlbs_dirty++; | 946 | vcpu->kvm->tlbs_dirty = true; |
938 | continue; | 947 | continue; |
939 | } | 948 | } |
940 | 949 | ||
@@ -949,7 +958,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) | |||
949 | 958 | ||
950 | if (gfn != sp->gfns[i]) { | 959 | if (gfn != sp->gfns[i]) { |
951 | drop_spte(vcpu->kvm, &sp->spt[i]); | 960 | drop_spte(vcpu->kvm, &sp->spt[i]); |
952 | vcpu->kvm->tlbs_dirty++; | 961 | vcpu->kvm->tlbs_dirty = true; |
953 | continue; | 962 | continue; |
954 | } | 963 | } |
955 | 964 | ||
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c7168a5cff1b..7f4f9c2badae 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <asm/perf_event.h> | 34 | #include <asm/perf_event.h> |
35 | #include <asm/tlbflush.h> | 35 | #include <asm/tlbflush.h> |
36 | #include <asm/desc.h> | 36 | #include <asm/desc.h> |
37 | #include <asm/debugreg.h> | ||
37 | #include <asm/kvm_para.h> | 38 | #include <asm/kvm_para.h> |
38 | 39 | ||
39 | #include <asm/virtext.h> | 40 | #include <asm/virtext.h> |
@@ -303,20 +304,35 @@ static inline bool is_cr_intercept(struct vcpu_svm *svm, int bit) | |||
303 | return vmcb->control.intercept_cr & (1U << bit); | 304 | return vmcb->control.intercept_cr & (1U << bit); |
304 | } | 305 | } |
305 | 306 | ||
306 | static inline void set_dr_intercept(struct vcpu_svm *svm, int bit) | 307 | static inline void set_dr_intercepts(struct vcpu_svm *svm) |
307 | { | 308 | { |
308 | struct vmcb *vmcb = get_host_vmcb(svm); | 309 | struct vmcb *vmcb = get_host_vmcb(svm); |
309 | 310 | ||
310 | vmcb->control.intercept_dr |= (1U << bit); | 311 | vmcb->control.intercept_dr = (1 << INTERCEPT_DR0_READ) |
312 | | (1 << INTERCEPT_DR1_READ) | ||
313 | | (1 << INTERCEPT_DR2_READ) | ||
314 | | (1 << INTERCEPT_DR3_READ) | ||
315 | | (1 << INTERCEPT_DR4_READ) | ||
316 | | (1 << INTERCEPT_DR5_READ) | ||
317 | | (1 << INTERCEPT_DR6_READ) | ||
318 | | (1 << INTERCEPT_DR7_READ) | ||
319 | | (1 << INTERCEPT_DR0_WRITE) | ||
320 | | (1 << INTERCEPT_DR1_WRITE) | ||
321 | | (1 << INTERCEPT_DR2_WRITE) | ||
322 | | (1 << INTERCEPT_DR3_WRITE) | ||
323 | | (1 << INTERCEPT_DR4_WRITE) | ||
324 | | (1 << INTERCEPT_DR5_WRITE) | ||
325 | | (1 << INTERCEPT_DR6_WRITE) | ||
326 | | (1 << INTERCEPT_DR7_WRITE); | ||
311 | 327 | ||
312 | recalc_intercepts(svm); | 328 | recalc_intercepts(svm); |
313 | } | 329 | } |
314 | 330 | ||
315 | static inline void clr_dr_intercept(struct vcpu_svm *svm, int bit) | 331 | static inline void clr_dr_intercepts(struct vcpu_svm *svm) |
316 | { | 332 | { |
317 | struct vmcb *vmcb = get_host_vmcb(svm); | 333 | struct vmcb *vmcb = get_host_vmcb(svm); |
318 | 334 | ||
319 | vmcb->control.intercept_dr &= ~(1U << bit); | 335 | vmcb->control.intercept_dr = 0; |
320 | 336 | ||
321 | recalc_intercepts(svm); | 337 | recalc_intercepts(svm); |
322 | } | 338 | } |
@@ -1080,23 +1096,7 @@ static void init_vmcb(struct vcpu_svm *svm) | |||
1080 | set_cr_intercept(svm, INTERCEPT_CR4_WRITE); | 1096 | set_cr_intercept(svm, INTERCEPT_CR4_WRITE); |
1081 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); | 1097 | set_cr_intercept(svm, INTERCEPT_CR8_WRITE); |
1082 | 1098 | ||
1083 | set_dr_intercept(svm, INTERCEPT_DR0_READ); | 1099 | set_dr_intercepts(svm); |
1084 | set_dr_intercept(svm, INTERCEPT_DR1_READ); | ||
1085 | set_dr_intercept(svm, INTERCEPT_DR2_READ); | ||
1086 | set_dr_intercept(svm, INTERCEPT_DR3_READ); | ||
1087 | set_dr_intercept(svm, INTERCEPT_DR4_READ); | ||
1088 | set_dr_intercept(svm, INTERCEPT_DR5_READ); | ||
1089 | set_dr_intercept(svm, INTERCEPT_DR6_READ); | ||
1090 | set_dr_intercept(svm, INTERCEPT_DR7_READ); | ||
1091 | |||
1092 | set_dr_intercept(svm, INTERCEPT_DR0_WRITE); | ||
1093 | set_dr_intercept(svm, INTERCEPT_DR1_WRITE); | ||
1094 | set_dr_intercept(svm, INTERCEPT_DR2_WRITE); | ||
1095 | set_dr_intercept(svm, INTERCEPT_DR3_WRITE); | ||
1096 | set_dr_intercept(svm, INTERCEPT_DR4_WRITE); | ||
1097 | set_dr_intercept(svm, INTERCEPT_DR5_WRITE); | ||
1098 | set_dr_intercept(svm, INTERCEPT_DR6_WRITE); | ||
1099 | set_dr_intercept(svm, INTERCEPT_DR7_WRITE); | ||
1100 | 1100 | ||
1101 | set_exception_intercept(svm, PF_VECTOR); | 1101 | set_exception_intercept(svm, PF_VECTOR); |
1102 | set_exception_intercept(svm, UD_VECTOR); | 1102 | set_exception_intercept(svm, UD_VECTOR); |
@@ -1671,6 +1671,34 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) | |||
1671 | mark_dirty(svm->vmcb, VMCB_ASID); | 1671 | mark_dirty(svm->vmcb, VMCB_ASID); |
1672 | } | 1672 | } |
1673 | 1673 | ||
1674 | static u64 svm_get_dr6(struct kvm_vcpu *vcpu) | ||
1675 | { | ||
1676 | return to_svm(vcpu)->vmcb->save.dr6; | ||
1677 | } | ||
1678 | |||
1679 | static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) | ||
1680 | { | ||
1681 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1682 | |||
1683 | svm->vmcb->save.dr6 = value; | ||
1684 | mark_dirty(svm->vmcb, VMCB_DR); | ||
1685 | } | ||
1686 | |||
1687 | static void svm_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) | ||
1688 | { | ||
1689 | struct vcpu_svm *svm = to_svm(vcpu); | ||
1690 | |||
1691 | get_debugreg(vcpu->arch.db[0], 0); | ||
1692 | get_debugreg(vcpu->arch.db[1], 1); | ||
1693 | get_debugreg(vcpu->arch.db[2], 2); | ||
1694 | get_debugreg(vcpu->arch.db[3], 3); | ||
1695 | vcpu->arch.dr6 = svm_get_dr6(vcpu); | ||
1696 | vcpu->arch.dr7 = svm->vmcb->save.dr7; | ||
1697 | |||
1698 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; | ||
1699 | set_dr_intercepts(svm); | ||
1700 | } | ||
1701 | |||
1674 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) | 1702 | static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) |
1675 | { | 1703 | { |
1676 | struct vcpu_svm *svm = to_svm(vcpu); | 1704 | struct vcpu_svm *svm = to_svm(vcpu); |
@@ -2829,6 +2857,7 @@ static int iret_interception(struct vcpu_svm *svm) | |||
2829 | clr_intercept(svm, INTERCEPT_IRET); | 2857 | clr_intercept(svm, INTERCEPT_IRET); |
2830 | svm->vcpu.arch.hflags |= HF_IRET_MASK; | 2858 | svm->vcpu.arch.hflags |= HF_IRET_MASK; |
2831 | svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); | 2859 | svm->nmi_iret_rip = kvm_rip_read(&svm->vcpu); |
2860 | kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); | ||
2832 | return 1; | 2861 | return 1; |
2833 | } | 2862 | } |
2834 | 2863 | ||
@@ -2961,6 +2990,17 @@ static int dr_interception(struct vcpu_svm *svm) | |||
2961 | unsigned long val; | 2990 | unsigned long val; |
2962 | int err; | 2991 | int err; |
2963 | 2992 | ||
2993 | if (svm->vcpu.guest_debug == 0) { | ||
2994 | /* | ||
2995 | * No more DR vmexits; force a reload of the debug registers | ||
2996 | * and reenter on this instruction. The next vmexit will | ||
2997 | * retrieve the full state of the debug registers. | ||
2998 | */ | ||
2999 | clr_dr_intercepts(svm); | ||
3000 | svm->vcpu.arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; | ||
3001 | return 1; | ||
3002 | } | ||
3003 | |||
2964 | if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) | 3004 | if (!boot_cpu_has(X86_FEATURE_DECODEASSISTS)) |
2965 | return emulate_on_interception(svm); | 3005 | return emulate_on_interception(svm); |
2966 | 3006 | ||
@@ -2989,10 +3029,8 @@ static int cr8_write_interception(struct vcpu_svm *svm) | |||
2989 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); | 3029 | u8 cr8_prev = kvm_get_cr8(&svm->vcpu); |
2990 | /* instruction emulation calls kvm_set_cr8() */ | 3030 | /* instruction emulation calls kvm_set_cr8() */ |
2991 | r = cr_interception(svm); | 3031 | r = cr_interception(svm); |
2992 | if (irqchip_in_kernel(svm->vcpu.kvm)) { | 3032 | if (irqchip_in_kernel(svm->vcpu.kvm)) |
2993 | clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); | ||
2994 | return r; | 3033 | return r; |
2995 | } | ||
2996 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) | 3034 | if (cr8_prev <= kvm_get_cr8(&svm->vcpu)) |
2997 | return r; | 3035 | return r; |
2998 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; | 3036 | kvm_run->exit_reason = KVM_EXIT_SET_TPR; |
@@ -3554,6 +3592,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) | |||
3554 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) | 3592 | if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK)) |
3555 | return; | 3593 | return; |
3556 | 3594 | ||
3595 | clr_cr_intercept(svm, INTERCEPT_CR8_WRITE); | ||
3596 | |||
3557 | if (irr == -1) | 3597 | if (irr == -1) |
3558 | return; | 3598 | return; |
3559 | 3599 | ||
@@ -3636,7 +3676,7 @@ static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) | |||
3636 | return ret; | 3676 | return ret; |
3637 | } | 3677 | } |
3638 | 3678 | ||
3639 | static int enable_irq_window(struct kvm_vcpu *vcpu) | 3679 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
3640 | { | 3680 | { |
3641 | struct vcpu_svm *svm = to_svm(vcpu); | 3681 | struct vcpu_svm *svm = to_svm(vcpu); |
3642 | 3682 | ||
@@ -3650,16 +3690,15 @@ static int enable_irq_window(struct kvm_vcpu *vcpu) | |||
3650 | svm_set_vintr(svm); | 3690 | svm_set_vintr(svm); |
3651 | svm_inject_irq(svm, 0x0); | 3691 | svm_inject_irq(svm, 0x0); |
3652 | } | 3692 | } |
3653 | return 0; | ||
3654 | } | 3693 | } |
3655 | 3694 | ||
3656 | static int enable_nmi_window(struct kvm_vcpu *vcpu) | 3695 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
3657 | { | 3696 | { |
3658 | struct vcpu_svm *svm = to_svm(vcpu); | 3697 | struct vcpu_svm *svm = to_svm(vcpu); |
3659 | 3698 | ||
3660 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) | 3699 | if ((svm->vcpu.arch.hflags & (HF_NMI_MASK | HF_IRET_MASK)) |
3661 | == HF_NMI_MASK) | 3700 | == HF_NMI_MASK) |
3662 | return 0; /* IRET will cause a vm exit */ | 3701 | return; /* IRET will cause a vm exit */ |
3663 | 3702 | ||
3664 | /* | 3703 | /* |
3665 | * Something prevents NMI from been injected. Single step over possible | 3704 | * Something prevents NMI from been injected. Single step over possible |
@@ -3668,7 +3707,6 @@ static int enable_nmi_window(struct kvm_vcpu *vcpu) | |||
3668 | svm->nmi_singlestep = true; | 3707 | svm->nmi_singlestep = true; |
3669 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); | 3708 | svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); |
3670 | update_db_bp_intercept(vcpu); | 3709 | update_db_bp_intercept(vcpu); |
3671 | return 0; | ||
3672 | } | 3710 | } |
3673 | 3711 | ||
3674 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) | 3712 | static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) |
@@ -4051,6 +4089,11 @@ static bool svm_invpcid_supported(void) | |||
4051 | return false; | 4089 | return false; |
4052 | } | 4090 | } |
4053 | 4091 | ||
4092 | static bool svm_mpx_supported(void) | ||
4093 | { | ||
4094 | return false; | ||
4095 | } | ||
4096 | |||
4054 | static bool svm_has_wbinvd_exit(void) | 4097 | static bool svm_has_wbinvd_exit(void) |
4055 | { | 4098 | { |
4056 | return true; | 4099 | return true; |
@@ -4286,7 +4329,10 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4286 | .set_idt = svm_set_idt, | 4329 | .set_idt = svm_set_idt, |
4287 | .get_gdt = svm_get_gdt, | 4330 | .get_gdt = svm_get_gdt, |
4288 | .set_gdt = svm_set_gdt, | 4331 | .set_gdt = svm_set_gdt, |
4332 | .get_dr6 = svm_get_dr6, | ||
4333 | .set_dr6 = svm_set_dr6, | ||
4289 | .set_dr7 = svm_set_dr7, | 4334 | .set_dr7 = svm_set_dr7, |
4335 | .sync_dirty_debug_regs = svm_sync_dirty_debug_regs, | ||
4290 | .cache_reg = svm_cache_reg, | 4336 | .cache_reg = svm_cache_reg, |
4291 | .get_rflags = svm_get_rflags, | 4337 | .get_rflags = svm_get_rflags, |
4292 | .set_rflags = svm_set_rflags, | 4338 | .set_rflags = svm_set_rflags, |
@@ -4330,6 +4376,7 @@ static struct kvm_x86_ops svm_x86_ops = { | |||
4330 | 4376 | ||
4331 | .rdtscp_supported = svm_rdtscp_supported, | 4377 | .rdtscp_supported = svm_rdtscp_supported, |
4332 | .invpcid_supported = svm_invpcid_supported, | 4378 | .invpcid_supported = svm_invpcid_supported, |
4379 | .mpx_supported = svm_mpx_supported, | ||
4333 | 4380 | ||
4334 | .set_supported_cpuid = svm_set_supported_cpuid, | 4381 | .set_supported_cpuid = svm_set_supported_cpuid, |
4335 | 4382 | ||
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index da7837e1349d..1320e0f8e611 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/ftrace_event.h> | 31 | #include <linux/ftrace_event.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/tboot.h> | 33 | #include <linux/tboot.h> |
34 | #include <linux/hrtimer.h> | ||
34 | #include "kvm_cache_regs.h" | 35 | #include "kvm_cache_regs.h" |
35 | #include "x86.h" | 36 | #include "x86.h" |
36 | 37 | ||
@@ -42,6 +43,7 @@ | |||
42 | #include <asm/i387.h> | 43 | #include <asm/i387.h> |
43 | #include <asm/xcr.h> | 44 | #include <asm/xcr.h> |
44 | #include <asm/perf_event.h> | 45 | #include <asm/perf_event.h> |
46 | #include <asm/debugreg.h> | ||
45 | #include <asm/kexec.h> | 47 | #include <asm/kexec.h> |
46 | 48 | ||
47 | #include "trace.h" | 49 | #include "trace.h" |
@@ -110,6 +112,8 @@ module_param(nested, bool, S_IRUGO); | |||
110 | 112 | ||
111 | #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) | 113 | #define RMODE_GUEST_OWNED_EFLAGS_BITS (~(X86_EFLAGS_IOPL | X86_EFLAGS_VM)) |
112 | 114 | ||
115 | #define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5 | ||
116 | |||
113 | /* | 117 | /* |
114 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: | 118 | * These 2 parameters are used to config the controls for Pause-Loop Exiting: |
115 | * ple_gap: upper bound on the amount of time between two successive | 119 | * ple_gap: upper bound on the amount of time between two successive |
@@ -202,6 +206,7 @@ struct __packed vmcs12 { | |||
202 | u64 guest_pdptr1; | 206 | u64 guest_pdptr1; |
203 | u64 guest_pdptr2; | 207 | u64 guest_pdptr2; |
204 | u64 guest_pdptr3; | 208 | u64 guest_pdptr3; |
209 | u64 guest_bndcfgs; | ||
205 | u64 host_ia32_pat; | 210 | u64 host_ia32_pat; |
206 | u64 host_ia32_efer; | 211 | u64 host_ia32_efer; |
207 | u64 host_ia32_perf_global_ctrl; | 212 | u64 host_ia32_perf_global_ctrl; |
@@ -374,6 +379,9 @@ struct nested_vmx { | |||
374 | */ | 379 | */ |
375 | struct page *apic_access_page; | 380 | struct page *apic_access_page; |
376 | u64 msr_ia32_feature_control; | 381 | u64 msr_ia32_feature_control; |
382 | |||
383 | struct hrtimer preemption_timer; | ||
384 | bool preemption_timer_expired; | ||
377 | }; | 385 | }; |
378 | 386 | ||
379 | #define POSTED_INTR_ON 0 | 387 | #define POSTED_INTR_ON 0 |
@@ -418,6 +426,8 @@ struct vcpu_vmx { | |||
418 | u64 msr_host_kernel_gs_base; | 426 | u64 msr_host_kernel_gs_base; |
419 | u64 msr_guest_kernel_gs_base; | 427 | u64 msr_guest_kernel_gs_base; |
420 | #endif | 428 | #endif |
429 | u32 vm_entry_controls_shadow; | ||
430 | u32 vm_exit_controls_shadow; | ||
421 | /* | 431 | /* |
422 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a | 432 | * loaded_vmcs points to the VMCS currently used in this vcpu. For a |
423 | * non-nested (L1) guest, it always points to vmcs01. For a nested | 433 | * non-nested (L1) guest, it always points to vmcs01. For a nested |
@@ -439,6 +449,7 @@ struct vcpu_vmx { | |||
439 | #endif | 449 | #endif |
440 | int gs_ldt_reload_needed; | 450 | int gs_ldt_reload_needed; |
441 | int fs_reload_needed; | 451 | int fs_reload_needed; |
452 | u64 msr_host_bndcfgs; | ||
442 | } host_state; | 453 | } host_state; |
443 | struct { | 454 | struct { |
444 | int vm86_active; | 455 | int vm86_active; |
@@ -531,6 +542,7 @@ static const unsigned long shadow_read_write_fields[] = { | |||
531 | GUEST_CS_LIMIT, | 542 | GUEST_CS_LIMIT, |
532 | GUEST_CS_BASE, | 543 | GUEST_CS_BASE, |
533 | GUEST_ES_BASE, | 544 | GUEST_ES_BASE, |
545 | GUEST_BNDCFGS, | ||
534 | CR0_GUEST_HOST_MASK, | 546 | CR0_GUEST_HOST_MASK, |
535 | CR0_READ_SHADOW, | 547 | CR0_READ_SHADOW, |
536 | CR4_READ_SHADOW, | 548 | CR4_READ_SHADOW, |
@@ -586,6 +598,7 @@ static const unsigned short vmcs_field_to_offset_table[] = { | |||
586 | FIELD64(GUEST_PDPTR1, guest_pdptr1), | 598 | FIELD64(GUEST_PDPTR1, guest_pdptr1), |
587 | FIELD64(GUEST_PDPTR2, guest_pdptr2), | 599 | FIELD64(GUEST_PDPTR2, guest_pdptr2), |
588 | FIELD64(GUEST_PDPTR3, guest_pdptr3), | 600 | FIELD64(GUEST_PDPTR3, guest_pdptr3), |
601 | FIELD64(GUEST_BNDCFGS, guest_bndcfgs), | ||
589 | FIELD64(HOST_IA32_PAT, host_ia32_pat), | 602 | FIELD64(HOST_IA32_PAT, host_ia32_pat), |
590 | FIELD64(HOST_IA32_EFER, host_ia32_efer), | 603 | FIELD64(HOST_IA32_EFER, host_ia32_efer), |
591 | FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), | 604 | FIELD64(HOST_IA32_PERF_GLOBAL_CTRL, host_ia32_perf_global_ctrl), |
@@ -716,6 +729,7 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu); | |||
716 | static u64 construct_eptp(unsigned long root_hpa); | 729 | static u64 construct_eptp(unsigned long root_hpa); |
717 | static void kvm_cpu_vmxon(u64 addr); | 730 | static void kvm_cpu_vmxon(u64 addr); |
718 | static void kvm_cpu_vmxoff(void); | 731 | static void kvm_cpu_vmxoff(void); |
732 | static bool vmx_mpx_supported(void); | ||
719 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); | 733 | static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr); |
720 | static void vmx_set_segment(struct kvm_vcpu *vcpu, | 734 | static void vmx_set_segment(struct kvm_vcpu *vcpu, |
721 | struct kvm_segment *var, int seg); | 735 | struct kvm_segment *var, int seg); |
@@ -726,6 +740,7 @@ static u32 vmx_segment_access_rights(struct kvm_segment *var); | |||
726 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); | 740 | static void vmx_sync_pir_to_irr_dummy(struct kvm_vcpu *vcpu); |
727 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); | 741 | static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx); |
728 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); | 742 | static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx); |
743 | static bool vmx_mpx_supported(void); | ||
729 | 744 | ||
730 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); | 745 | static DEFINE_PER_CPU(struct vmcs *, vmxarea); |
731 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); | 746 | static DEFINE_PER_CPU(struct vmcs *, current_vmcs); |
@@ -1045,6 +1060,12 @@ static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12) | |||
1045 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; | 1060 | return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS; |
1046 | } | 1061 | } |
1047 | 1062 | ||
1063 | static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12) | ||
1064 | { | ||
1065 | return vmcs12->pin_based_vm_exec_control & | ||
1066 | PIN_BASED_VMX_PREEMPTION_TIMER; | ||
1067 | } | ||
1068 | |||
1048 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) | 1069 | static inline int nested_cpu_has_ept(struct vmcs12 *vmcs12) |
1049 | { | 1070 | { |
1050 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); | 1071 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT); |
@@ -1056,7 +1077,9 @@ static inline bool is_exception(u32 intr_info) | |||
1056 | == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); | 1077 | == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); |
1057 | } | 1078 | } |
1058 | 1079 | ||
1059 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); | 1080 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
1081 | u32 exit_intr_info, | ||
1082 | unsigned long exit_qualification); | ||
1060 | static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, | 1083 | static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, |
1061 | struct vmcs12 *vmcs12, | 1084 | struct vmcs12 *vmcs12, |
1062 | u32 reason, unsigned long qualification); | 1085 | u32 reason, unsigned long qualification); |
@@ -1326,6 +1349,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) | |||
1326 | vmcs_writel(field, vmcs_readl(field) | mask); | 1349 | vmcs_writel(field, vmcs_readl(field) | mask); |
1327 | } | 1350 | } |
1328 | 1351 | ||
1352 | static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) | ||
1353 | { | ||
1354 | vmcs_write32(VM_ENTRY_CONTROLS, val); | ||
1355 | vmx->vm_entry_controls_shadow = val; | ||
1356 | } | ||
1357 | |||
1358 | static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) | ||
1359 | { | ||
1360 | if (vmx->vm_entry_controls_shadow != val) | ||
1361 | vm_entry_controls_init(vmx, val); | ||
1362 | } | ||
1363 | |||
1364 | static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) | ||
1365 | { | ||
1366 | return vmx->vm_entry_controls_shadow; | ||
1367 | } | ||
1368 | |||
1369 | |||
1370 | static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) | ||
1371 | { | ||
1372 | vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); | ||
1373 | } | ||
1374 | |||
1375 | static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) | ||
1376 | { | ||
1377 | vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); | ||
1378 | } | ||
1379 | |||
1380 | static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) | ||
1381 | { | ||
1382 | vmcs_write32(VM_EXIT_CONTROLS, val); | ||
1383 | vmx->vm_exit_controls_shadow = val; | ||
1384 | } | ||
1385 | |||
1386 | static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) | ||
1387 | { | ||
1388 | if (vmx->vm_exit_controls_shadow != val) | ||
1389 | vm_exit_controls_init(vmx, val); | ||
1390 | } | ||
1391 | |||
1392 | static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) | ||
1393 | { | ||
1394 | return vmx->vm_exit_controls_shadow; | ||
1395 | } | ||
1396 | |||
1397 | |||
1398 | static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) | ||
1399 | { | ||
1400 | vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); | ||
1401 | } | ||
1402 | |||
1403 | static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) | ||
1404 | { | ||
1405 | vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); | ||
1406 | } | ||
1407 | |||
1329 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) | 1408 | static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) |
1330 | { | 1409 | { |
1331 | vmx->segment_cache.bitmask = 0; | 1410 | vmx->segment_cache.bitmask = 0; |
@@ -1410,11 +1489,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) | |||
1410 | vmcs_write32(EXCEPTION_BITMAP, eb); | 1489 | vmcs_write32(EXCEPTION_BITMAP, eb); |
1411 | } | 1490 | } |
1412 | 1491 | ||
1413 | static void clear_atomic_switch_msr_special(unsigned long entry, | 1492 | static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
1414 | unsigned long exit) | 1493 | unsigned long entry, unsigned long exit) |
1415 | { | 1494 | { |
1416 | vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); | 1495 | vm_entry_controls_clearbit(vmx, entry); |
1417 | vmcs_clear_bits(VM_EXIT_CONTROLS, exit); | 1496 | vm_exit_controls_clearbit(vmx, exit); |
1418 | } | 1497 | } |
1419 | 1498 | ||
1420 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | 1499 | static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) |
@@ -1425,14 +1504,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
1425 | switch (msr) { | 1504 | switch (msr) { |
1426 | case MSR_EFER: | 1505 | case MSR_EFER: |
1427 | if (cpu_has_load_ia32_efer) { | 1506 | if (cpu_has_load_ia32_efer) { |
1428 | clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, | 1507 | clear_atomic_switch_msr_special(vmx, |
1508 | VM_ENTRY_LOAD_IA32_EFER, | ||
1429 | VM_EXIT_LOAD_IA32_EFER); | 1509 | VM_EXIT_LOAD_IA32_EFER); |
1430 | return; | 1510 | return; |
1431 | } | 1511 | } |
1432 | break; | 1512 | break; |
1433 | case MSR_CORE_PERF_GLOBAL_CTRL: | 1513 | case MSR_CORE_PERF_GLOBAL_CTRL: |
1434 | if (cpu_has_load_perf_global_ctrl) { | 1514 | if (cpu_has_load_perf_global_ctrl) { |
1435 | clear_atomic_switch_msr_special( | 1515 | clear_atomic_switch_msr_special(vmx, |
1436 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, | 1516 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, |
1437 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); | 1517 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); |
1438 | return; | 1518 | return; |
@@ -1453,14 +1533,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) | |||
1453 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); | 1533 | vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); |
1454 | } | 1534 | } |
1455 | 1535 | ||
1456 | static void add_atomic_switch_msr_special(unsigned long entry, | 1536 | static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, |
1457 | unsigned long exit, unsigned long guest_val_vmcs, | 1537 | unsigned long entry, unsigned long exit, |
1458 | unsigned long host_val_vmcs, u64 guest_val, u64 host_val) | 1538 | unsigned long guest_val_vmcs, unsigned long host_val_vmcs, |
1539 | u64 guest_val, u64 host_val) | ||
1459 | { | 1540 | { |
1460 | vmcs_write64(guest_val_vmcs, guest_val); | 1541 | vmcs_write64(guest_val_vmcs, guest_val); |
1461 | vmcs_write64(host_val_vmcs, host_val); | 1542 | vmcs_write64(host_val_vmcs, host_val); |
1462 | vmcs_set_bits(VM_ENTRY_CONTROLS, entry); | 1543 | vm_entry_controls_setbit(vmx, entry); |
1463 | vmcs_set_bits(VM_EXIT_CONTROLS, exit); | 1544 | vm_exit_controls_setbit(vmx, exit); |
1464 | } | 1545 | } |
1465 | 1546 | ||
1466 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | 1547 | static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, |
@@ -1472,7 +1553,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
1472 | switch (msr) { | 1553 | switch (msr) { |
1473 | case MSR_EFER: | 1554 | case MSR_EFER: |
1474 | if (cpu_has_load_ia32_efer) { | 1555 | if (cpu_has_load_ia32_efer) { |
1475 | add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, | 1556 | add_atomic_switch_msr_special(vmx, |
1557 | VM_ENTRY_LOAD_IA32_EFER, | ||
1476 | VM_EXIT_LOAD_IA32_EFER, | 1558 | VM_EXIT_LOAD_IA32_EFER, |
1477 | GUEST_IA32_EFER, | 1559 | GUEST_IA32_EFER, |
1478 | HOST_IA32_EFER, | 1560 | HOST_IA32_EFER, |
@@ -1482,7 +1564,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, | |||
1482 | break; | 1564 | break; |
1483 | case MSR_CORE_PERF_GLOBAL_CTRL: | 1565 | case MSR_CORE_PERF_GLOBAL_CTRL: |
1484 | if (cpu_has_load_perf_global_ctrl) { | 1566 | if (cpu_has_load_perf_global_ctrl) { |
1485 | add_atomic_switch_msr_special( | 1567 | add_atomic_switch_msr_special(vmx, |
1486 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, | 1568 | VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, |
1487 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, | 1569 | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, |
1488 | GUEST_IA32_PERF_GLOBAL_CTRL, | 1570 | GUEST_IA32_PERF_GLOBAL_CTRL, |
@@ -1647,6 +1729,8 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu) | |||
1647 | if (is_long_mode(&vmx->vcpu)) | 1729 | if (is_long_mode(&vmx->vcpu)) |
1648 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); | 1730 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base); |
1649 | #endif | 1731 | #endif |
1732 | if (boot_cpu_has(X86_FEATURE_MPX)) | ||
1733 | rdmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); | ||
1650 | for (i = 0; i < vmx->save_nmsrs; ++i) | 1734 | for (i = 0; i < vmx->save_nmsrs; ++i) |
1651 | kvm_set_shared_msr(vmx->guest_msrs[i].index, | 1735 | kvm_set_shared_msr(vmx->guest_msrs[i].index, |
1652 | vmx->guest_msrs[i].data, | 1736 | vmx->guest_msrs[i].data, |
@@ -1684,6 +1768,8 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) | |||
1684 | #ifdef CONFIG_X86_64 | 1768 | #ifdef CONFIG_X86_64 |
1685 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); | 1769 | wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); |
1686 | #endif | 1770 | #endif |
1771 | if (vmx->host_state.msr_host_bndcfgs) | ||
1772 | wrmsrl(MSR_IA32_BNDCFGS, vmx->host_state.msr_host_bndcfgs); | ||
1687 | /* | 1773 | /* |
1688 | * If the FPU is not active (through the host task or | 1774 | * If the FPU is not active (through the host task or |
1689 | * the guest vcpu), then restore the cr0.TS bit. | 1775 | * the guest vcpu), then restore the cr0.TS bit. |
@@ -1906,7 +1992,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) | |||
1906 | if (!(vmcs12->exception_bitmap & (1u << nr))) | 1992 | if (!(vmcs12->exception_bitmap & (1u << nr))) |
1907 | return 0; | 1993 | return 0; |
1908 | 1994 | ||
1909 | nested_vmx_vmexit(vcpu); | 1995 | nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, |
1996 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
1997 | vmcs_readl(EXIT_QUALIFICATION)); | ||
1910 | return 1; | 1998 | return 1; |
1911 | } | 1999 | } |
1912 | 2000 | ||
@@ -2183,9 +2271,9 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2183 | */ | 2271 | */ |
2184 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | 2272 | nested_vmx_pinbased_ctls_low |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; |
2185 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | | 2273 | nested_vmx_pinbased_ctls_high &= PIN_BASED_EXT_INTR_MASK | |
2186 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS | | 2274 | PIN_BASED_NMI_EXITING | PIN_BASED_VIRTUAL_NMIS; |
2275 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2187 | PIN_BASED_VMX_PREEMPTION_TIMER; | 2276 | PIN_BASED_VMX_PREEMPTION_TIMER; |
2188 | nested_vmx_pinbased_ctls_high |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR; | ||
2189 | 2277 | ||
2190 | /* | 2278 | /* |
2191 | * Exit controls | 2279 | * Exit controls |
@@ -2200,15 +2288,12 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2200 | #ifdef CONFIG_X86_64 | 2288 | #ifdef CONFIG_X86_64 |
2201 | VM_EXIT_HOST_ADDR_SPACE_SIZE | | 2289 | VM_EXIT_HOST_ADDR_SPACE_SIZE | |
2202 | #endif | 2290 | #endif |
2203 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT | | 2291 | VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT; |
2292 | nested_vmx_exit_ctls_high |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2293 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER | | ||
2204 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | 2294 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; |
2205 | if (!(nested_vmx_pinbased_ctls_high & PIN_BASED_VMX_PREEMPTION_TIMER) || | 2295 | if (vmx_mpx_supported()) |
2206 | !(nested_vmx_exit_ctls_high & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) { | 2296 | nested_vmx_exit_ctls_high |= VM_EXIT_CLEAR_BNDCFGS; |
2207 | nested_vmx_exit_ctls_high &= ~VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | ||
2208 | nested_vmx_pinbased_ctls_high &= ~PIN_BASED_VMX_PREEMPTION_TIMER; | ||
2209 | } | ||
2210 | nested_vmx_exit_ctls_high |= (VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR | | ||
2211 | VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER); | ||
2212 | 2297 | ||
2213 | /* entry controls */ | 2298 | /* entry controls */ |
2214 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, | 2299 | rdmsr(MSR_IA32_VMX_ENTRY_CTLS, |
@@ -2222,6 +2307,8 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2222 | VM_ENTRY_LOAD_IA32_PAT; | 2307 | VM_ENTRY_LOAD_IA32_PAT; |
2223 | nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | | 2308 | nested_vmx_entry_ctls_high |= (VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | |
2224 | VM_ENTRY_LOAD_IA32_EFER); | 2309 | VM_ENTRY_LOAD_IA32_EFER); |
2310 | if (vmx_mpx_supported()) | ||
2311 | nested_vmx_entry_ctls_high |= VM_ENTRY_LOAD_BNDCFGS; | ||
2225 | 2312 | ||
2226 | /* cpu-based controls */ | 2313 | /* cpu-based controls */ |
2227 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, | 2314 | rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, |
@@ -2277,8 +2364,9 @@ static __init void nested_vmx_setup_ctls_msrs(void) | |||
2277 | 2364 | ||
2278 | /* miscellaneous data */ | 2365 | /* miscellaneous data */ |
2279 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); | 2366 | rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); |
2280 | nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | | 2367 | nested_vmx_misc_low &= VMX_MISC_SAVE_EFER_LMA; |
2281 | VMX_MISC_SAVE_EFER_LMA; | 2368 | nested_vmx_misc_low |= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE | |
2369 | VMX_MISC_ACTIVITY_HLT; | ||
2282 | nested_vmx_misc_high = 0; | 2370 | nested_vmx_misc_high = 0; |
2283 | } | 2371 | } |
2284 | 2372 | ||
@@ -2295,32 +2383,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high) | |||
2295 | return low | ((u64)high << 32); | 2383 | return low | ((u64)high << 32); |
2296 | } | 2384 | } |
2297 | 2385 | ||
2298 | /* | 2386 | /* Returns 0 on success, non-0 otherwise. */ |
2299 | * If we allow our guest to use VMX instructions (i.e., nested VMX), we should | ||
2300 | * also let it use VMX-specific MSRs. | ||
2301 | * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a | ||
2302 | * VMX-specific MSR, or 0 when we haven't (and the caller should handle it | ||
2303 | * like all other MSRs). | ||
2304 | */ | ||
2305 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | 2387 | static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) |
2306 | { | 2388 | { |
2307 | if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC && | ||
2308 | msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) { | ||
2309 | /* | ||
2310 | * According to the spec, processors which do not support VMX | ||
2311 | * should throw a #GP(0) when VMX capability MSRs are read. | ||
2312 | */ | ||
2313 | kvm_queue_exception_e(vcpu, GP_VECTOR, 0); | ||
2314 | return 1; | ||
2315 | } | ||
2316 | |||
2317 | switch (msr_index) { | 2389 | switch (msr_index) { |
2318 | case MSR_IA32_FEATURE_CONTROL: | ||
2319 | if (nested_vmx_allowed(vcpu)) { | ||
2320 | *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; | ||
2321 | break; | ||
2322 | } | ||
2323 | return 0; | ||
2324 | case MSR_IA32_VMX_BASIC: | 2390 | case MSR_IA32_VMX_BASIC: |
2325 | /* | 2391 | /* |
2326 | * This MSR reports some information about VMX support. We | 2392 | * This MSR reports some information about VMX support. We |
@@ -2387,34 +2453,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2387 | *pdata = nested_vmx_ept_caps; | 2453 | *pdata = nested_vmx_ept_caps; |
2388 | break; | 2454 | break; |
2389 | default: | 2455 | default: |
2390 | return 0; | ||
2391 | } | ||
2392 | |||
2393 | return 1; | ||
2394 | } | ||
2395 | |||
2396 | static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | ||
2397 | { | ||
2398 | u32 msr_index = msr_info->index; | ||
2399 | u64 data = msr_info->data; | ||
2400 | bool host_initialized = msr_info->host_initiated; | ||
2401 | |||
2402 | if (!nested_vmx_allowed(vcpu)) | ||
2403 | return 0; | ||
2404 | |||
2405 | if (msr_index == MSR_IA32_FEATURE_CONTROL) { | ||
2406 | if (!host_initialized && | ||
2407 | to_vmx(vcpu)->nested.msr_ia32_feature_control | ||
2408 | & FEATURE_CONTROL_LOCKED) | ||
2409 | return 0; | ||
2410 | to_vmx(vcpu)->nested.msr_ia32_feature_control = data; | ||
2411 | return 1; | 2456 | return 1; |
2412 | } | 2457 | } |
2413 | 2458 | ||
2414 | /* | ||
2415 | * No need to treat VMX capability MSRs specially: If we don't handle | ||
2416 | * them, handle_wrmsr will #GP(0), which is correct (they are readonly) | ||
2417 | */ | ||
2418 | return 0; | 2459 | return 0; |
2419 | } | 2460 | } |
2420 | 2461 | ||
@@ -2460,13 +2501,25 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2460 | case MSR_IA32_SYSENTER_ESP: | 2501 | case MSR_IA32_SYSENTER_ESP: |
2461 | data = vmcs_readl(GUEST_SYSENTER_ESP); | 2502 | data = vmcs_readl(GUEST_SYSENTER_ESP); |
2462 | break; | 2503 | break; |
2504 | case MSR_IA32_BNDCFGS: | ||
2505 | if (!vmx_mpx_supported()) | ||
2506 | return 1; | ||
2507 | data = vmcs_read64(GUEST_BNDCFGS); | ||
2508 | break; | ||
2509 | case MSR_IA32_FEATURE_CONTROL: | ||
2510 | if (!nested_vmx_allowed(vcpu)) | ||
2511 | return 1; | ||
2512 | data = to_vmx(vcpu)->nested.msr_ia32_feature_control; | ||
2513 | break; | ||
2514 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
2515 | if (!nested_vmx_allowed(vcpu)) | ||
2516 | return 1; | ||
2517 | return vmx_get_vmx_msr(vcpu, msr_index, pdata); | ||
2463 | case MSR_TSC_AUX: | 2518 | case MSR_TSC_AUX: |
2464 | if (!to_vmx(vcpu)->rdtscp_enabled) | 2519 | if (!to_vmx(vcpu)->rdtscp_enabled) |
2465 | return 1; | 2520 | return 1; |
2466 | /* Otherwise falls through */ | 2521 | /* Otherwise falls through */ |
2467 | default: | 2522 | default: |
2468 | if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) | ||
2469 | return 0; | ||
2470 | msr = find_msr_entry(to_vmx(vcpu), msr_index); | 2523 | msr = find_msr_entry(to_vmx(vcpu), msr_index); |
2471 | if (msr) { | 2524 | if (msr) { |
2472 | data = msr->data; | 2525 | data = msr->data; |
@@ -2479,6 +2532,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) | |||
2479 | return 0; | 2532 | return 0; |
2480 | } | 2533 | } |
2481 | 2534 | ||
2535 | static void vmx_leave_nested(struct kvm_vcpu *vcpu); | ||
2536 | |||
2482 | /* | 2537 | /* |
2483 | * Writes msr value into into the appropriate "register". | 2538 | * Writes msr value into into the appropriate "register". |
2484 | * Returns 0 on success, non-0 otherwise. | 2539 | * Returns 0 on success, non-0 otherwise. |
@@ -2519,6 +2574,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2519 | case MSR_IA32_SYSENTER_ESP: | 2574 | case MSR_IA32_SYSENTER_ESP: |
2520 | vmcs_writel(GUEST_SYSENTER_ESP, data); | 2575 | vmcs_writel(GUEST_SYSENTER_ESP, data); |
2521 | break; | 2576 | break; |
2577 | case MSR_IA32_BNDCFGS: | ||
2578 | if (!vmx_mpx_supported()) | ||
2579 | return 1; | ||
2580 | vmcs_write64(GUEST_BNDCFGS, data); | ||
2581 | break; | ||
2522 | case MSR_IA32_TSC: | 2582 | case MSR_IA32_TSC: |
2523 | kvm_write_tsc(vcpu, msr_info); | 2583 | kvm_write_tsc(vcpu, msr_info); |
2524 | break; | 2584 | break; |
@@ -2533,6 +2593,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2533 | case MSR_IA32_TSC_ADJUST: | 2593 | case MSR_IA32_TSC_ADJUST: |
2534 | ret = kvm_set_msr_common(vcpu, msr_info); | 2594 | ret = kvm_set_msr_common(vcpu, msr_info); |
2535 | break; | 2595 | break; |
2596 | case MSR_IA32_FEATURE_CONTROL: | ||
2597 | if (!nested_vmx_allowed(vcpu) || | ||
2598 | (to_vmx(vcpu)->nested.msr_ia32_feature_control & | ||
2599 | FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) | ||
2600 | return 1; | ||
2601 | vmx->nested.msr_ia32_feature_control = data; | ||
2602 | if (msr_info->host_initiated && data == 0) | ||
2603 | vmx_leave_nested(vcpu); | ||
2604 | break; | ||
2605 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: | ||
2606 | return 1; /* they are read-only */ | ||
2536 | case MSR_TSC_AUX: | 2607 | case MSR_TSC_AUX: |
2537 | if (!vmx->rdtscp_enabled) | 2608 | if (!vmx->rdtscp_enabled) |
2538 | return 1; | 2609 | return 1; |
@@ -2541,8 +2612,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2541 | return 1; | 2612 | return 1; |
2542 | /* Otherwise falls through */ | 2613 | /* Otherwise falls through */ |
2543 | default: | 2614 | default: |
2544 | if (vmx_set_vmx_msr(vcpu, msr_info)) | ||
2545 | break; | ||
2546 | msr = find_msr_entry(vmx, msr_index); | 2615 | msr = find_msr_entry(vmx, msr_index); |
2547 | if (msr) { | 2616 | if (msr) { |
2548 | msr->data = data; | 2617 | msr->data = data; |
@@ -2795,12 +2864,12 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2795 | vmx_capability.ept, vmx_capability.vpid); | 2864 | vmx_capability.ept, vmx_capability.vpid); |
2796 | } | 2865 | } |
2797 | 2866 | ||
2798 | min = 0; | 2867 | min = VM_EXIT_SAVE_DEBUG_CONTROLS; |
2799 | #ifdef CONFIG_X86_64 | 2868 | #ifdef CONFIG_X86_64 |
2800 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; | 2869 | min |= VM_EXIT_HOST_ADDR_SPACE_SIZE; |
2801 | #endif | 2870 | #endif |
2802 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | | 2871 | opt = VM_EXIT_SAVE_IA32_PAT | VM_EXIT_LOAD_IA32_PAT | |
2803 | VM_EXIT_ACK_INTR_ON_EXIT; | 2872 | VM_EXIT_ACK_INTR_ON_EXIT | VM_EXIT_CLEAR_BNDCFGS; |
2804 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, | 2873 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_EXIT_CTLS, |
2805 | &_vmexit_control) < 0) | 2874 | &_vmexit_control) < 0) |
2806 | return -EIO; | 2875 | return -EIO; |
@@ -2816,8 +2885,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf) | |||
2816 | !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) | 2885 | !(_vmexit_control & VM_EXIT_ACK_INTR_ON_EXIT)) |
2817 | _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; | 2886 | _pin_based_exec_control &= ~PIN_BASED_POSTED_INTR; |
2818 | 2887 | ||
2819 | min = 0; | 2888 | min = VM_ENTRY_LOAD_DEBUG_CONTROLS; |
2820 | opt = VM_ENTRY_LOAD_IA32_PAT; | 2889 | opt = VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS; |
2821 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, | 2890 | if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_ENTRY_CTLS, |
2822 | &_vmentry_control) < 0) | 2891 | &_vmentry_control) < 0) |
2823 | return -EIO; | 2892 | return -EIO; |
@@ -3182,14 +3251,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) | |||
3182 | vmx_load_host_state(to_vmx(vcpu)); | 3251 | vmx_load_host_state(to_vmx(vcpu)); |
3183 | vcpu->arch.efer = efer; | 3252 | vcpu->arch.efer = efer; |
3184 | if (efer & EFER_LMA) { | 3253 | if (efer & EFER_LMA) { |
3185 | vmcs_write32(VM_ENTRY_CONTROLS, | 3254 | vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3186 | vmcs_read32(VM_ENTRY_CONTROLS) | | ||
3187 | VM_ENTRY_IA32E_MODE); | ||
3188 | msr->data = efer; | 3255 | msr->data = efer; |
3189 | } else { | 3256 | } else { |
3190 | vmcs_write32(VM_ENTRY_CONTROLS, | 3257 | vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3191 | vmcs_read32(VM_ENTRY_CONTROLS) & | ||
3192 | ~VM_ENTRY_IA32E_MODE); | ||
3193 | 3258 | ||
3194 | msr->data = efer & ~EFER_LME; | 3259 | msr->data = efer & ~EFER_LME; |
3195 | } | 3260 | } |
@@ -3217,9 +3282,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) | |||
3217 | 3282 | ||
3218 | static void exit_lmode(struct kvm_vcpu *vcpu) | 3283 | static void exit_lmode(struct kvm_vcpu *vcpu) |
3219 | { | 3284 | { |
3220 | vmcs_write32(VM_ENTRY_CONTROLS, | 3285 | vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); |
3221 | vmcs_read32(VM_ENTRY_CONTROLS) | ||
3222 | & ~VM_ENTRY_IA32E_MODE); | ||
3223 | vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); | 3286 | vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); |
3224 | } | 3287 | } |
3225 | 3288 | ||
@@ -4192,6 +4255,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx) | |||
4192 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) | 4255 | static u32 vmx_exec_control(struct vcpu_vmx *vmx) |
4193 | { | 4256 | { |
4194 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; | 4257 | u32 exec_control = vmcs_config.cpu_based_exec_ctrl; |
4258 | |||
4259 | if (vmx->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT) | ||
4260 | exec_control &= ~CPU_BASED_MOV_DR_EXITING; | ||
4261 | |||
4195 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { | 4262 | if (!vm_need_tpr_shadow(vmx->vcpu.kvm)) { |
4196 | exec_control &= ~CPU_BASED_TPR_SHADOW; | 4263 | exec_control &= ~CPU_BASED_TPR_SHADOW; |
4197 | #ifdef CONFIG_X86_64 | 4264 | #ifdef CONFIG_X86_64 |
@@ -4346,10 +4413,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4346 | ++vmx->nmsrs; | 4413 | ++vmx->nmsrs; |
4347 | } | 4414 | } |
4348 | 4415 | ||
4349 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); | 4416 | |
4417 | vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); | ||
4350 | 4418 | ||
4351 | /* 22.2.1, 20.8.1 */ | 4419 | /* 22.2.1, 20.8.1 */ |
4352 | vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); | 4420 | vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); |
4353 | 4421 | ||
4354 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); | 4422 | vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); |
4355 | set_cr4_guest_host_mask(vmx); | 4423 | set_cr4_guest_host_mask(vmx); |
@@ -4360,7 +4428,7 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) | |||
4360 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | 4428 | static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) |
4361 | { | 4429 | { |
4362 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 4430 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
4363 | u64 msr; | 4431 | struct msr_data apic_base_msr; |
4364 | 4432 | ||
4365 | vmx->rmode.vm86_active = 0; | 4433 | vmx->rmode.vm86_active = 0; |
4366 | 4434 | ||
@@ -4368,10 +4436,11 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu) | |||
4368 | 4436 | ||
4369 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); | 4437 | vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val(); |
4370 | kvm_set_cr8(&vmx->vcpu, 0); | 4438 | kvm_set_cr8(&vmx->vcpu, 0); |
4371 | msr = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; | 4439 | apic_base_msr.data = 0xfee00000 | MSR_IA32_APICBASE_ENABLE; |
4372 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) | 4440 | if (kvm_vcpu_is_bsp(&vmx->vcpu)) |
4373 | msr |= MSR_IA32_APICBASE_BSP; | 4441 | apic_base_msr.data |= MSR_IA32_APICBASE_BSP; |
4374 | kvm_set_apic_base(&vmx->vcpu, msr); | 4442 | apic_base_msr.host_initiated = true; |
4443 | kvm_set_apic_base(&vmx->vcpu, &apic_base_msr); | ||
4375 | 4444 | ||
4376 | vmx_segment_cache_clear(vmx); | 4445 | vmx_segment_cache_clear(vmx); |
4377 | 4446 | ||
@@ -4463,39 +4532,28 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) | |||
4463 | PIN_BASED_NMI_EXITING; | 4532 | PIN_BASED_NMI_EXITING; |
4464 | } | 4533 | } |
4465 | 4534 | ||
4466 | static int enable_irq_window(struct kvm_vcpu *vcpu) | 4535 | static void enable_irq_window(struct kvm_vcpu *vcpu) |
4467 | { | 4536 | { |
4468 | u32 cpu_based_vm_exec_control; | 4537 | u32 cpu_based_vm_exec_control; |
4469 | 4538 | ||
4470 | if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) | ||
4471 | /* | ||
4472 | * We get here if vmx_interrupt_allowed() said we can't | ||
4473 | * inject to L1 now because L2 must run. The caller will have | ||
4474 | * to make L2 exit right after entry, so we can inject to L1 | ||
4475 | * more promptly. | ||
4476 | */ | ||
4477 | return -EBUSY; | ||
4478 | |||
4479 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4539 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4480 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; | 4540 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; |
4481 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4541 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
4482 | return 0; | ||
4483 | } | 4542 | } |
4484 | 4543 | ||
4485 | static int enable_nmi_window(struct kvm_vcpu *vcpu) | 4544 | static void enable_nmi_window(struct kvm_vcpu *vcpu) |
4486 | { | 4545 | { |
4487 | u32 cpu_based_vm_exec_control; | 4546 | u32 cpu_based_vm_exec_control; |
4488 | 4547 | ||
4489 | if (!cpu_has_virtual_nmis()) | 4548 | if (!cpu_has_virtual_nmis() || |
4490 | return enable_irq_window(vcpu); | 4549 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { |
4491 | 4550 | enable_irq_window(vcpu); | |
4492 | if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) | 4551 | return; |
4493 | return enable_irq_window(vcpu); | 4552 | } |
4494 | 4553 | ||
4495 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | 4554 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); |
4496 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; | 4555 | cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; |
4497 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | 4556 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); |
4498 | return 0; | ||
4499 | } | 4557 | } |
4500 | 4558 | ||
4501 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) | 4559 | static void vmx_inject_irq(struct kvm_vcpu *vcpu) |
@@ -4587,25 +4645,8 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) | |||
4587 | 4645 | ||
4588 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | 4646 | static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) |
4589 | { | 4647 | { |
4590 | if (is_guest_mode(vcpu)) { | 4648 | if (to_vmx(vcpu)->nested.nested_run_pending) |
4591 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 4649 | return 0; |
4592 | |||
4593 | if (to_vmx(vcpu)->nested.nested_run_pending) | ||
4594 | return 0; | ||
4595 | if (nested_exit_on_nmi(vcpu)) { | ||
4596 | nested_vmx_vmexit(vcpu); | ||
4597 | vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; | ||
4598 | vmcs12->vm_exit_intr_info = NMI_VECTOR | | ||
4599 | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; | ||
4600 | /* | ||
4601 | * The NMI-triggered VM exit counts as injection: | ||
4602 | * clear this one and block further NMIs. | ||
4603 | */ | ||
4604 | vcpu->arch.nmi_pending = 0; | ||
4605 | vmx_set_nmi_mask(vcpu, true); | ||
4606 | return 0; | ||
4607 | } | ||
4608 | } | ||
4609 | 4650 | ||
4610 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) | 4651 | if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked) |
4611 | return 0; | 4652 | return 0; |
@@ -4617,23 +4658,8 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) | |||
4617 | 4658 | ||
4618 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) | 4659 | static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) |
4619 | { | 4660 | { |
4620 | if (is_guest_mode(vcpu)) { | 4661 | return (!to_vmx(vcpu)->nested.nested_run_pending && |
4621 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 4662 | vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && |
4622 | |||
4623 | if (to_vmx(vcpu)->nested.nested_run_pending) | ||
4624 | return 0; | ||
4625 | if (nested_exit_on_intr(vcpu)) { | ||
4626 | nested_vmx_vmexit(vcpu); | ||
4627 | vmcs12->vm_exit_reason = | ||
4628 | EXIT_REASON_EXTERNAL_INTERRUPT; | ||
4629 | vmcs12->vm_exit_intr_info = 0; | ||
4630 | /* | ||
4631 | * fall through to normal code, but now in L1, not L2 | ||
4632 | */ | ||
4633 | } | ||
4634 | } | ||
4635 | |||
4636 | return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && | ||
4637 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & | 4663 | !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & |
4638 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); | 4664 | (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); |
4639 | } | 4665 | } |
@@ -4812,7 +4838,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) | |||
4812 | dr6 = vmcs_readl(EXIT_QUALIFICATION); | 4838 | dr6 = vmcs_readl(EXIT_QUALIFICATION); |
4813 | if (!(vcpu->guest_debug & | 4839 | if (!(vcpu->guest_debug & |
4814 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { | 4840 | (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { |
4815 | vcpu->arch.dr6 = dr6 | DR6_FIXED_1; | 4841 | vcpu->arch.dr6 &= ~15; |
4842 | vcpu->arch.dr6 |= dr6; | ||
4816 | kvm_queue_exception(vcpu, DB_VECTOR); | 4843 | kvm_queue_exception(vcpu, DB_VECTOR); |
4817 | return 1; | 4844 | return 1; |
4818 | } | 4845 | } |
@@ -5075,19 +5102,66 @@ static int handle_dr(struct kvm_vcpu *vcpu) | |||
5075 | } | 5102 | } |
5076 | } | 5103 | } |
5077 | 5104 | ||
5105 | if (vcpu->guest_debug == 0) { | ||
5106 | u32 cpu_based_vm_exec_control; | ||
5107 | |||
5108 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
5109 | cpu_based_vm_exec_control &= ~CPU_BASED_MOV_DR_EXITING; | ||
5110 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
5111 | |||
5112 | /* | ||
5113 | * No more DR vmexits; force a reload of the debug registers | ||
5114 | * and reenter on this instruction. The next vmexit will | ||
5115 | * retrieve the full state of the debug registers. | ||
5116 | */ | ||
5117 | vcpu->arch.switch_db_regs |= KVM_DEBUGREG_WONT_EXIT; | ||
5118 | return 1; | ||
5119 | } | ||
5120 | |||
5078 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 5121 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
5079 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; | 5122 | dr = exit_qualification & DEBUG_REG_ACCESS_NUM; |
5080 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); | 5123 | reg = DEBUG_REG_ACCESS_REG(exit_qualification); |
5081 | if (exit_qualification & TYPE_MOV_FROM_DR) { | 5124 | if (exit_qualification & TYPE_MOV_FROM_DR) { |
5082 | unsigned long val; | 5125 | unsigned long val; |
5083 | if (!kvm_get_dr(vcpu, dr, &val)) | 5126 | |
5084 | kvm_register_write(vcpu, reg, val); | 5127 | if (kvm_get_dr(vcpu, dr, &val)) |
5128 | return 1; | ||
5129 | kvm_register_write(vcpu, reg, val); | ||
5085 | } else | 5130 | } else |
5086 | kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); | 5131 | if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg])) |
5132 | return 1; | ||
5133 | |||
5087 | skip_emulated_instruction(vcpu); | 5134 | skip_emulated_instruction(vcpu); |
5088 | return 1; | 5135 | return 1; |
5089 | } | 5136 | } |
5090 | 5137 | ||
5138 | static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) | ||
5139 | { | ||
5140 | return vcpu->arch.dr6; | ||
5141 | } | ||
5142 | |||
5143 | static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) | ||
5144 | { | ||
5145 | } | ||
5146 | |||
5147 | static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu) | ||
5148 | { | ||
5149 | u32 cpu_based_vm_exec_control; | ||
5150 | |||
5151 | get_debugreg(vcpu->arch.db[0], 0); | ||
5152 | get_debugreg(vcpu->arch.db[1], 1); | ||
5153 | get_debugreg(vcpu->arch.db[2], 2); | ||
5154 | get_debugreg(vcpu->arch.db[3], 3); | ||
5155 | get_debugreg(vcpu->arch.dr6, 6); | ||
5156 | vcpu->arch.dr7 = vmcs_readl(GUEST_DR7); | ||
5157 | |||
5158 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT; | ||
5159 | |||
5160 | cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); | ||
5161 | cpu_based_vm_exec_control |= CPU_BASED_MOV_DR_EXITING; | ||
5162 | vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); | ||
5163 | } | ||
5164 | |||
5091 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) | 5165 | static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) |
5092 | { | 5166 | { |
5093 | vmcs_writel(GUEST_DR7, val); | 5167 | vmcs_writel(GUEST_DR7, val); |
@@ -5687,6 +5761,18 @@ static void nested_vmx_failValid(struct kvm_vcpu *vcpu, | |||
5687 | */ | 5761 | */ |
5688 | } | 5762 | } |
5689 | 5763 | ||
5764 | static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) | ||
5765 | { | ||
5766 | struct vcpu_vmx *vmx = | ||
5767 | container_of(timer, struct vcpu_vmx, nested.preemption_timer); | ||
5768 | |||
5769 | vmx->nested.preemption_timer_expired = true; | ||
5770 | kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu); | ||
5771 | kvm_vcpu_kick(&vmx->vcpu); | ||
5772 | |||
5773 | return HRTIMER_NORESTART; | ||
5774 | } | ||
5775 | |||
5690 | /* | 5776 | /* |
5691 | * Emulate the VMXON instruction. | 5777 | * Emulate the VMXON instruction. |
5692 | * Currently, we just remember that VMX is active, and do not save or even | 5778 | * Currently, we just remember that VMX is active, and do not save or even |
@@ -5751,6 +5837,10 @@ static int handle_vmon(struct kvm_vcpu *vcpu) | |||
5751 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); | 5837 | INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); |
5752 | vmx->nested.vmcs02_num = 0; | 5838 | vmx->nested.vmcs02_num = 0; |
5753 | 5839 | ||
5840 | hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, | ||
5841 | HRTIMER_MODE_REL); | ||
5842 | vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; | ||
5843 | |||
5754 | vmx->nested.vmxon = true; | 5844 | vmx->nested.vmxon = true; |
5755 | 5845 | ||
5756 | skip_emulated_instruction(vcpu); | 5846 | skip_emulated_instruction(vcpu); |
@@ -6460,11 +6550,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, | |||
6460 | int size; | 6550 | int size; |
6461 | u8 b; | 6551 | u8 b; |
6462 | 6552 | ||
6463 | if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) | ||
6464 | return 1; | ||
6465 | |||
6466 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) | 6553 | if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) |
6467 | return 0; | 6554 | return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); |
6468 | 6555 | ||
6469 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 6556 | exit_qualification = vmcs_readl(EXIT_QUALIFICATION); |
6470 | 6557 | ||
@@ -6628,6 +6715,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6628 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); | 6715 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
6629 | u32 exit_reason = vmx->exit_reason; | 6716 | u32 exit_reason = vmx->exit_reason; |
6630 | 6717 | ||
6718 | trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, | ||
6719 | vmcs_readl(EXIT_QUALIFICATION), | ||
6720 | vmx->idt_vectoring_info, | ||
6721 | intr_info, | ||
6722 | vmcs_read32(VM_EXIT_INTR_ERROR_CODE), | ||
6723 | KVM_ISA_VMX); | ||
6724 | |||
6631 | if (vmx->nested.nested_run_pending) | 6725 | if (vmx->nested.nested_run_pending) |
6632 | return 0; | 6726 | return 0; |
6633 | 6727 | ||
@@ -6644,7 +6738,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6644 | else if (is_page_fault(intr_info)) | 6738 | else if (is_page_fault(intr_info)) |
6645 | return enable_ept; | 6739 | return enable_ept; |
6646 | else if (is_no_device(intr_info) && | 6740 | else if (is_no_device(intr_info) && |
6647 | !(nested_read_cr0(vmcs12) & X86_CR0_TS)) | 6741 | !(vmcs12->guest_cr0 & X86_CR0_TS)) |
6648 | return 0; | 6742 | return 0; |
6649 | return vmcs12->exception_bitmap & | 6743 | return vmcs12->exception_bitmap & |
6650 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); | 6744 | (1u << (intr_info & INTR_INFO_VECTOR_MASK)); |
@@ -6723,9 +6817,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) | |||
6723 | * table is L0's fault. | 6817 | * table is L0's fault. |
6724 | */ | 6818 | */ |
6725 | return 0; | 6819 | return 0; |
6726 | case EXIT_REASON_PREEMPTION_TIMER: | ||
6727 | return vmcs12->pin_based_vm_exec_control & | ||
6728 | PIN_BASED_VMX_PREEMPTION_TIMER; | ||
6729 | case EXIT_REASON_WBINVD: | 6820 | case EXIT_REASON_WBINVD: |
6730 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); | 6821 | return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING); |
6731 | case EXIT_REASON_XSETBV: | 6822 | case EXIT_REASON_XSETBV: |
@@ -6741,27 +6832,6 @@ static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) | |||
6741 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); | 6832 | *info2 = vmcs_read32(VM_EXIT_INTR_INFO); |
6742 | } | 6833 | } |
6743 | 6834 | ||
6744 | static void nested_adjust_preemption_timer(struct kvm_vcpu *vcpu) | ||
6745 | { | ||
6746 | u64 delta_tsc_l1; | ||
6747 | u32 preempt_val_l1, preempt_val_l2, preempt_scale; | ||
6748 | |||
6749 | if (!(get_vmcs12(vcpu)->pin_based_vm_exec_control & | ||
6750 | PIN_BASED_VMX_PREEMPTION_TIMER)) | ||
6751 | return; | ||
6752 | preempt_scale = native_read_msr(MSR_IA32_VMX_MISC) & | ||
6753 | MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE; | ||
6754 | preempt_val_l2 = vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); | ||
6755 | delta_tsc_l1 = vmx_read_l1_tsc(vcpu, native_read_tsc()) | ||
6756 | - vcpu->arch.last_guest_tsc; | ||
6757 | preempt_val_l1 = delta_tsc_l1 >> preempt_scale; | ||
6758 | if (preempt_val_l2 <= preempt_val_l1) | ||
6759 | preempt_val_l2 = 0; | ||
6760 | else | ||
6761 | preempt_val_l2 -= preempt_val_l1; | ||
6762 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, preempt_val_l2); | ||
6763 | } | ||
6764 | |||
6765 | /* | 6835 | /* |
6766 | * The guest has exited. See if we can fix it or if we need userspace | 6836 | * The guest has exited. See if we can fix it or if we need userspace |
6767 | * assistance. | 6837 | * assistance. |
@@ -6777,7 +6847,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) | |||
6777 | return handle_invalid_guest_state(vcpu); | 6847 | return handle_invalid_guest_state(vcpu); |
6778 | 6848 | ||
6779 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { | 6849 | if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { |
6780 | nested_vmx_vmexit(vcpu); | 6850 | nested_vmx_vmexit(vcpu, exit_reason, |
6851 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
6852 | vmcs_readl(EXIT_QUALIFICATION)); | ||
6781 | return 1; | 6853 | return 1; |
6782 | } | 6854 | } |
6783 | 6855 | ||
@@ -7006,6 +7078,12 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) | |||
7006 | local_irq_enable(); | 7078 | local_irq_enable(); |
7007 | } | 7079 | } |
7008 | 7080 | ||
7081 | static bool vmx_mpx_supported(void) | ||
7082 | { | ||
7083 | return (vmcs_config.vmexit_ctrl & VM_EXIT_CLEAR_BNDCFGS) && | ||
7084 | (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS); | ||
7085 | } | ||
7086 | |||
7009 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) | 7087 | static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) |
7010 | { | 7088 | { |
7011 | u32 exit_intr_info; | 7089 | u32 exit_intr_info; |
@@ -7172,8 +7250,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) | |||
7172 | atomic_switch_perf_msrs(vmx); | 7250 | atomic_switch_perf_msrs(vmx); |
7173 | debugctlmsr = get_debugctlmsr(); | 7251 | debugctlmsr = get_debugctlmsr(); |
7174 | 7252 | ||
7175 | if (is_guest_mode(vcpu) && !vmx->nested.nested_run_pending) | ||
7176 | nested_adjust_preemption_timer(vcpu); | ||
7177 | vmx->__launched = vmx->loaded_vmcs->launched; | 7253 | vmx->__launched = vmx->loaded_vmcs->launched; |
7178 | asm( | 7254 | asm( |
7179 | /* Store host registers */ | 7255 | /* Store host registers */ |
@@ -7332,8 +7408,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) | |||
7332 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7408 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7333 | 7409 | ||
7334 | free_vpid(vmx); | 7410 | free_vpid(vmx); |
7335 | free_nested(vmx); | ||
7336 | free_loaded_vmcs(vmx->loaded_vmcs); | 7411 | free_loaded_vmcs(vmx->loaded_vmcs); |
7412 | free_nested(vmx); | ||
7337 | kfree(vmx->guest_msrs); | 7413 | kfree(vmx->guest_msrs); |
7338 | kvm_vcpu_uninit(vcpu); | 7414 | kvm_vcpu_uninit(vcpu); |
7339 | kmem_cache_free(kvm_vcpu_cache, vmx); | 7415 | kmem_cache_free(kvm_vcpu_cache, vmx); |
@@ -7518,15 +7594,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) | |||
7518 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, | 7594 | static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, |
7519 | struct x86_exception *fault) | 7595 | struct x86_exception *fault) |
7520 | { | 7596 | { |
7521 | struct vmcs12 *vmcs12; | 7597 | struct vmcs12 *vmcs12 = get_vmcs12(vcpu); |
7522 | nested_vmx_vmexit(vcpu); | 7598 | u32 exit_reason; |
7523 | vmcs12 = get_vmcs12(vcpu); | ||
7524 | 7599 | ||
7525 | if (fault->error_code & PFERR_RSVD_MASK) | 7600 | if (fault->error_code & PFERR_RSVD_MASK) |
7526 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; | 7601 | exit_reason = EXIT_REASON_EPT_MISCONFIG; |
7527 | else | 7602 | else |
7528 | vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; | 7603 | exit_reason = EXIT_REASON_EPT_VIOLATION; |
7529 | vmcs12->exit_qualification = vcpu->arch.exit_qualification; | 7604 | nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification); |
7530 | vmcs12->guest_physical_address = fault->address; | 7605 | vmcs12->guest_physical_address = fault->address; |
7531 | } | 7606 | } |
7532 | 7607 | ||
@@ -7564,11 +7639,35 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, | |||
7564 | 7639 | ||
7565 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ | 7640 | /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ |
7566 | if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) | 7641 | if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) |
7567 | nested_vmx_vmexit(vcpu); | 7642 | nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, |
7643 | vmcs_read32(VM_EXIT_INTR_INFO), | ||
7644 | vmcs_readl(EXIT_QUALIFICATION)); | ||
7568 | else | 7645 | else |
7569 | kvm_inject_page_fault(vcpu, fault); | 7646 | kvm_inject_page_fault(vcpu, fault); |
7570 | } | 7647 | } |
7571 | 7648 | ||
7649 | static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) | ||
7650 | { | ||
7651 | u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; | ||
7652 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
7653 | |||
7654 | if (vcpu->arch.virtual_tsc_khz == 0) | ||
7655 | return; | ||
7656 | |||
7657 | /* Make sure short timeouts reliably trigger an immediate vmexit. | ||
7658 | * hrtimer_start does not guarantee this. */ | ||
7659 | if (preemption_timeout <= 1) { | ||
7660 | vmx_preemption_timer_fn(&vmx->nested.preemption_timer); | ||
7661 | return; | ||
7662 | } | ||
7663 | |||
7664 | preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; | ||
7665 | preemption_timeout *= 1000000; | ||
7666 | do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); | ||
7667 | hrtimer_start(&vmx->nested.preemption_timer, | ||
7668 | ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); | ||
7669 | } | ||
7670 | |||
7572 | /* | 7671 | /* |
7573 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested | 7672 | * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested |
7574 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it | 7673 | * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it |
@@ -7582,7 +7681,6 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7582 | { | 7681 | { |
7583 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 7682 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
7584 | u32 exec_control; | 7683 | u32 exec_control; |
7585 | u32 exit_control; | ||
7586 | 7684 | ||
7587 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); | 7685 | vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); |
7588 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); | 7686 | vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); |
@@ -7640,13 +7738,14 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7640 | 7738 | ||
7641 | vmcs_write64(VMCS_LINK_POINTER, -1ull); | 7739 | vmcs_write64(VMCS_LINK_POINTER, -1ull); |
7642 | 7740 | ||
7643 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, | 7741 | exec_control = vmcs12->pin_based_vm_exec_control; |
7644 | (vmcs_config.pin_based_exec_ctrl | | 7742 | exec_control |= vmcs_config.pin_based_exec_ctrl; |
7645 | vmcs12->pin_based_vm_exec_control)); | 7743 | exec_control &= ~PIN_BASED_VMX_PREEMPTION_TIMER; |
7744 | vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); | ||
7646 | 7745 | ||
7647 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | 7746 | vmx->nested.preemption_timer_expired = false; |
7648 | vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, | 7747 | if (nested_cpu_has_preemption_timer(vmcs12)) |
7649 | vmcs12->vmx_preemption_timer_value); | 7748 | vmx_start_preemption_timer(vcpu); |
7650 | 7749 | ||
7651 | /* | 7750 | /* |
7652 | * Whether page-faults are trapped is determined by a combination of | 7751 | * Whether page-faults are trapped is determined by a combination of |
@@ -7674,7 +7773,7 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7674 | enable_ept ? vmcs12->page_fault_error_code_match : 0); | 7773 | enable_ept ? vmcs12->page_fault_error_code_match : 0); |
7675 | 7774 | ||
7676 | if (cpu_has_secondary_exec_ctrls()) { | 7775 | if (cpu_has_secondary_exec_ctrls()) { |
7677 | u32 exec_control = vmx_secondary_exec_control(vmx); | 7776 | exec_control = vmx_secondary_exec_control(vmx); |
7678 | if (!vmx->rdtscp_enabled) | 7777 | if (!vmx->rdtscp_enabled) |
7679 | exec_control &= ~SECONDARY_EXEC_RDTSCP; | 7778 | exec_control &= ~SECONDARY_EXEC_RDTSCP; |
7680 | /* Take the following fields only from vmcs12 */ | 7779 | /* Take the following fields only from vmcs12 */ |
@@ -7706,6 +7805,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7706 | else | 7805 | else |
7707 | vmcs_write64(APIC_ACCESS_ADDR, | 7806 | vmcs_write64(APIC_ACCESS_ADDR, |
7708 | page_to_phys(vmx->nested.apic_access_page)); | 7807 | page_to_phys(vmx->nested.apic_access_page)); |
7808 | } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { | ||
7809 | exec_control |= | ||
7810 | SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; | ||
7811 | vmcs_write64(APIC_ACCESS_ADDR, | ||
7812 | page_to_phys(vcpu->kvm->arch.apic_access_page)); | ||
7709 | } | 7813 | } |
7710 | 7814 | ||
7711 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); | 7815 | vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); |
@@ -7756,15 +7860,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7756 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER | 7860 | * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER |
7757 | * bits are further modified by vmx_set_efer() below. | 7861 | * bits are further modified by vmx_set_efer() below. |
7758 | */ | 7862 | */ |
7759 | exit_control = vmcs_config.vmexit_ctrl; | 7863 | vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); |
7760 | if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) | ||
7761 | exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; | ||
7762 | vmcs_write32(VM_EXIT_CONTROLS, exit_control); | ||
7763 | 7864 | ||
7764 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are | 7865 | /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are |
7765 | * emulated by vmx_set_efer(), below. | 7866 | * emulated by vmx_set_efer(), below. |
7766 | */ | 7867 | */ |
7767 | vmcs_write32(VM_ENTRY_CONTROLS, | 7868 | vm_entry_controls_init(vmx, |
7768 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & | 7869 | (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & |
7769 | ~VM_ENTRY_IA32E_MODE) | | 7870 | ~VM_ENTRY_IA32E_MODE) | |
7770 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); | 7871 | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); |
@@ -7778,6 +7879,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
7778 | 7879 | ||
7779 | set_cr4_guest_host_mask(vmx); | 7880 | set_cr4_guest_host_mask(vmx); |
7780 | 7881 | ||
7882 | if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) | ||
7883 | vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); | ||
7884 | |||
7781 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) | 7885 | if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) |
7782 | vmcs_write64(TSC_OFFSET, | 7886 | vmcs_write64(TSC_OFFSET, |
7783 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); | 7887 | vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); |
@@ -7882,7 +7986,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7882 | return 1; | 7986 | return 1; |
7883 | } | 7987 | } |
7884 | 7988 | ||
7885 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { | 7989 | if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && |
7990 | vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { | ||
7886 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); | 7991 | nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); |
7887 | return 1; | 7992 | return 1; |
7888 | } | 7993 | } |
@@ -7994,8 +8099,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
7994 | 8099 | ||
7995 | enter_guest_mode(vcpu); | 8100 | enter_guest_mode(vcpu); |
7996 | 8101 | ||
7997 | vmx->nested.nested_run_pending = 1; | ||
7998 | |||
7999 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); | 8102 | vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); |
8000 | 8103 | ||
8001 | cpu = get_cpu(); | 8104 | cpu = get_cpu(); |
@@ -8011,6 +8114,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) | |||
8011 | 8114 | ||
8012 | prepare_vmcs02(vcpu, vmcs12); | 8115 | prepare_vmcs02(vcpu, vmcs12); |
8013 | 8116 | ||
8117 | if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) | ||
8118 | return kvm_emulate_halt(vcpu); | ||
8119 | |||
8120 | vmx->nested.nested_run_pending = 1; | ||
8121 | |||
8014 | /* | 8122 | /* |
8015 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point | 8123 | * Note no nested_vmx_succeed or nested_vmx_fail here. At this point |
8016 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet | 8124 | * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet |
@@ -8099,6 +8207,58 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
8099 | } | 8207 | } |
8100 | } | 8208 | } |
8101 | 8209 | ||
8210 | static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) | ||
8211 | { | ||
8212 | struct vcpu_vmx *vmx = to_vmx(vcpu); | ||
8213 | |||
8214 | if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && | ||
8215 | vmx->nested.preemption_timer_expired) { | ||
8216 | if (vmx->nested.nested_run_pending) | ||
8217 | return -EBUSY; | ||
8218 | nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); | ||
8219 | return 0; | ||
8220 | } | ||
8221 | |||
8222 | if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { | ||
8223 | if (vmx->nested.nested_run_pending || | ||
8224 | vcpu->arch.interrupt.pending) | ||
8225 | return -EBUSY; | ||
8226 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, | ||
8227 | NMI_VECTOR | INTR_TYPE_NMI_INTR | | ||
8228 | INTR_INFO_VALID_MASK, 0); | ||
8229 | /* | ||
8230 | * The NMI-triggered VM exit counts as injection: | ||
8231 | * clear this one and block further NMIs. | ||
8232 | */ | ||
8233 | vcpu->arch.nmi_pending = 0; | ||
8234 | vmx_set_nmi_mask(vcpu, true); | ||
8235 | return 0; | ||
8236 | } | ||
8237 | |||
8238 | if ((kvm_cpu_has_interrupt(vcpu) || external_intr) && | ||
8239 | nested_exit_on_intr(vcpu)) { | ||
8240 | if (vmx->nested.nested_run_pending) | ||
8241 | return -EBUSY; | ||
8242 | nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); | ||
8243 | } | ||
8244 | |||
8245 | return 0; | ||
8246 | } | ||
8247 | |||
8248 | static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) | ||
8249 | { | ||
8250 | ktime_t remaining = | ||
8251 | hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer); | ||
8252 | u64 value; | ||
8253 | |||
8254 | if (ktime_to_ns(remaining) <= 0) | ||
8255 | return 0; | ||
8256 | |||
8257 | value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz; | ||
8258 | do_div(value, 1000000); | ||
8259 | return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; | ||
8260 | } | ||
8261 | |||
8102 | /* | 8262 | /* |
8103 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits | 8263 | * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits |
8104 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), | 8264 | * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), |
@@ -8110,7 +8270,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, | |||
8110 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, | 8270 | * exit-information fields only. Other fields are modified by L1 with VMWRITE, |
8111 | * which already writes to vmcs12 directly. | 8271 | * which already writes to vmcs12 directly. |
8112 | */ | 8272 | */ |
8113 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | 8273 | static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, |
8274 | u32 exit_reason, u32 exit_intr_info, | ||
8275 | unsigned long exit_qualification) | ||
8114 | { | 8276 | { |
8115 | /* update guest state fields: */ | 8277 | /* update guest state fields: */ |
8116 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); | 8278 | vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); |
@@ -8162,11 +8324,18 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8162 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); | 8324 | vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); |
8163 | vmcs12->guest_pending_dbg_exceptions = | 8325 | vmcs12->guest_pending_dbg_exceptions = |
8164 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); | 8326 | vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); |
8327 | if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) | ||
8328 | vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; | ||
8329 | else | ||
8330 | vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; | ||
8165 | 8331 | ||
8166 | if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && | 8332 | if (nested_cpu_has_preemption_timer(vmcs12)) { |
8167 | (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) | 8333 | if (vmcs12->vm_exit_controls & |
8168 | vmcs12->vmx_preemption_timer_value = | 8334 | VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) |
8169 | vmcs_read32(VMX_PREEMPTION_TIMER_VALUE); | 8335 | vmcs12->vmx_preemption_timer_value = |
8336 | vmx_get_preemption_timer_value(vcpu); | ||
8337 | hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer); | ||
8338 | } | ||
8170 | 8339 | ||
8171 | /* | 8340 | /* |
8172 | * In some cases (usually, nested EPT), L2 is allowed to change its | 8341 | * In some cases (usually, nested EPT), L2 is allowed to change its |
@@ -8186,7 +8355,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8186 | 8355 | ||
8187 | vmcs12->vm_entry_controls = | 8356 | vmcs12->vm_entry_controls = |
8188 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | | 8357 | (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | |
8189 | (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); | 8358 | (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); |
8190 | 8359 | ||
8191 | /* TODO: These cannot have changed unless we have MSR bitmaps and | 8360 | /* TODO: These cannot have changed unless we have MSR bitmaps and |
8192 | * the relevant bit asks not to trap the change */ | 8361 | * the relevant bit asks not to trap the change */ |
@@ -8198,13 +8367,15 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) | |||
8198 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); | 8367 | vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); |
8199 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); | 8368 | vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); |
8200 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); | 8369 | vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); |
8370 | if (vmx_mpx_supported()) | ||
8371 | vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); | ||
8201 | 8372 | ||
8202 | /* update exit information fields: */ | 8373 | /* update exit information fields: */ |
8203 | 8374 | ||
8204 | vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; | 8375 | vmcs12->vm_exit_reason = exit_reason; |
8205 | vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); | 8376 | vmcs12->exit_qualification = exit_qualification; |
8206 | 8377 | ||
8207 | vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); | 8378 | vmcs12->vm_exit_intr_info = exit_intr_info; |
8208 | if ((vmcs12->vm_exit_intr_info & | 8379 | if ((vmcs12->vm_exit_intr_info & |
8209 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == | 8380 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == |
8210 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) | 8381 | (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) |
@@ -8307,6 +8478,10 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8307 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); | 8478 | vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base); |
8308 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); | 8479 | vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base); |
8309 | 8480 | ||
8481 | /* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */ | ||
8482 | if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS) | ||
8483 | vmcs_write64(GUEST_BNDCFGS, 0); | ||
8484 | |||
8310 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { | 8485 | if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) { |
8311 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); | 8486 | vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat); |
8312 | vcpu->arch.pat = vmcs12->host_ia32_pat; | 8487 | vcpu->arch.pat = vmcs12->host_ia32_pat; |
@@ -8370,7 +8545,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, | |||
8370 | * and modify vmcs12 to make it see what it would expect to see there if | 8545 | * and modify vmcs12 to make it see what it would expect to see there if |
8371 | * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) | 8546 | * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) |
8372 | */ | 8547 | */ |
8373 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | 8548 | static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, |
8549 | u32 exit_intr_info, | ||
8550 | unsigned long exit_qualification) | ||
8374 | { | 8551 | { |
8375 | struct vcpu_vmx *vmx = to_vmx(vcpu); | 8552 | struct vcpu_vmx *vmx = to_vmx(vcpu); |
8376 | int cpu; | 8553 | int cpu; |
@@ -8380,7 +8557,15 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8380 | WARN_ON_ONCE(vmx->nested.nested_run_pending); | 8557 | WARN_ON_ONCE(vmx->nested.nested_run_pending); |
8381 | 8558 | ||
8382 | leave_guest_mode(vcpu); | 8559 | leave_guest_mode(vcpu); |
8383 | prepare_vmcs12(vcpu, vmcs12); | 8560 | prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, |
8561 | exit_qualification); | ||
8562 | |||
8563 | trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, | ||
8564 | vmcs12->exit_qualification, | ||
8565 | vmcs12->idt_vectoring_info_field, | ||
8566 | vmcs12->vm_exit_intr_info, | ||
8567 | vmcs12->vm_exit_intr_error_code, | ||
8568 | KVM_ISA_VMX); | ||
8384 | 8569 | ||
8385 | cpu = get_cpu(); | 8570 | cpu = get_cpu(); |
8386 | vmx->loaded_vmcs = &vmx->vmcs01; | 8571 | vmx->loaded_vmcs = &vmx->vmcs01; |
@@ -8389,6 +8574,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8389 | vcpu->cpu = cpu; | 8574 | vcpu->cpu = cpu; |
8390 | put_cpu(); | 8575 | put_cpu(); |
8391 | 8576 | ||
8577 | vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); | ||
8578 | vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); | ||
8392 | vmx_segment_cache_clear(vmx); | 8579 | vmx_segment_cache_clear(vmx); |
8393 | 8580 | ||
8394 | /* if no vmcs02 cache requested, remove the one we used */ | 8581 | /* if no vmcs02 cache requested, remove the one we used */ |
@@ -8421,6 +8608,19 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) | |||
8421 | nested_vmx_succeed(vcpu); | 8608 | nested_vmx_succeed(vcpu); |
8422 | if (enable_shadow_vmcs) | 8609 | if (enable_shadow_vmcs) |
8423 | vmx->nested.sync_shadow_vmcs = true; | 8610 | vmx->nested.sync_shadow_vmcs = true; |
8611 | |||
8612 | /* in case we halted in L2 */ | ||
8613 | vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; | ||
8614 | } | ||
8615 | |||
8616 | /* | ||
8617 | * Forcibly leave nested mode in order to be able to reset the VCPU later on. | ||
8618 | */ | ||
8619 | static void vmx_leave_nested(struct kvm_vcpu *vcpu) | ||
8620 | { | ||
8621 | if (is_guest_mode(vcpu)) | ||
8622 | nested_vmx_vmexit(vcpu, -1, 0, 0); | ||
8623 | free_nested(to_vmx(vcpu)); | ||
8424 | } | 8624 | } |
8425 | 8625 | ||
8426 | /* | 8626 | /* |
@@ -8486,7 +8686,10 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8486 | .set_idt = vmx_set_idt, | 8686 | .set_idt = vmx_set_idt, |
8487 | .get_gdt = vmx_get_gdt, | 8687 | .get_gdt = vmx_get_gdt, |
8488 | .set_gdt = vmx_set_gdt, | 8688 | .set_gdt = vmx_set_gdt, |
8689 | .get_dr6 = vmx_get_dr6, | ||
8690 | .set_dr6 = vmx_set_dr6, | ||
8489 | .set_dr7 = vmx_set_dr7, | 8691 | .set_dr7 = vmx_set_dr7, |
8692 | .sync_dirty_debug_regs = vmx_sync_dirty_debug_regs, | ||
8490 | .cache_reg = vmx_cache_reg, | 8693 | .cache_reg = vmx_cache_reg, |
8491 | .get_rflags = vmx_get_rflags, | 8694 | .get_rflags = vmx_get_rflags, |
8492 | .set_rflags = vmx_set_rflags, | 8695 | .set_rflags = vmx_set_rflags, |
@@ -8548,6 +8751,9 @@ static struct kvm_x86_ops vmx_x86_ops = { | |||
8548 | 8751 | ||
8549 | .check_intercept = vmx_check_intercept, | 8752 | .check_intercept = vmx_check_intercept, |
8550 | .handle_external_intr = vmx_handle_external_intr, | 8753 | .handle_external_intr = vmx_handle_external_intr, |
8754 | .mpx_supported = vmx_mpx_supported, | ||
8755 | |||
8756 | .check_nested_events = vmx_check_nested_events, | ||
8551 | }; | 8757 | }; |
8552 | 8758 | ||
8553 | static int __init vmx_init(void) | 8759 | static int __init vmx_init(void) |
@@ -8635,6 +8841,8 @@ static int __init vmx_init(void) | |||
8635 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); | 8841 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false); |
8636 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); | 8842 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false); |
8637 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); | 8843 | vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false); |
8844 | vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true); | ||
8845 | |||
8638 | memcpy(vmx_msr_bitmap_legacy_x2apic, | 8846 | memcpy(vmx_msr_bitmap_legacy_x2apic, |
8639 | vmx_msr_bitmap_legacy, PAGE_SIZE); | 8847 | vmx_msr_bitmap_legacy, PAGE_SIZE); |
8640 | memcpy(vmx_msr_bitmap_longmode_x2apic, | 8848 | memcpy(vmx_msr_bitmap_longmode_x2apic, |
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5d004da1e35d..9d1b5cd4d34c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c | |||
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); | |||
94 | static bool ignore_msrs = 0; | 94 | static bool ignore_msrs = 0; |
95 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); | 95 | module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); |
96 | 96 | ||
97 | unsigned int min_timer_period_us = 500; | ||
98 | module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); | ||
99 | |||
97 | bool kvm_has_tsc_control; | 100 | bool kvm_has_tsc_control; |
98 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); | 101 | EXPORT_SYMBOL_GPL(kvm_has_tsc_control); |
99 | u32 kvm_max_guest_tsc_khz; | 102 | u32 kvm_max_guest_tsc_khz; |
@@ -254,10 +257,26 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) | |||
254 | } | 257 | } |
255 | EXPORT_SYMBOL_GPL(kvm_get_apic_base); | 258 | EXPORT_SYMBOL_GPL(kvm_get_apic_base); |
256 | 259 | ||
257 | void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data) | 260 | int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) |
258 | { | 261 | { |
259 | /* TODO: reserve bits check */ | 262 | u64 old_state = vcpu->arch.apic_base & |
260 | kvm_lapic_set_base(vcpu, data); | 263 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); |
264 | u64 new_state = msr_info->data & | ||
265 | (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); | ||
266 | u64 reserved_bits = ((~0ULL) << cpuid_maxphyaddr(vcpu)) | | ||
267 | 0x2ff | (guest_cpuid_has_x2apic(vcpu) ? 0 : X2APIC_ENABLE); | ||
268 | |||
269 | if (!msr_info->host_initiated && | ||
270 | ((msr_info->data & reserved_bits) != 0 || | ||
271 | new_state == X2APIC_ENABLE || | ||
272 | (new_state == MSR_IA32_APICBASE_ENABLE && | ||
273 | old_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)) || | ||
274 | (new_state == (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE) && | ||
275 | old_state == 0))) | ||
276 | return 1; | ||
277 | |||
278 | kvm_lapic_set_base(vcpu, msr_info->data); | ||
279 | return 0; | ||
261 | } | 280 | } |
262 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); | 281 | EXPORT_SYMBOL_GPL(kvm_set_apic_base); |
263 | 282 | ||
@@ -576,13 +595,13 @@ static void kvm_put_guest_xcr0(struct kvm_vcpu *vcpu) | |||
576 | 595 | ||
577 | int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | 596 | int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) |
578 | { | 597 | { |
579 | u64 xcr0; | 598 | u64 xcr0 = xcr; |
599 | u64 old_xcr0 = vcpu->arch.xcr0; | ||
580 | u64 valid_bits; | 600 | u64 valid_bits; |
581 | 601 | ||
582 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ | 602 | /* Only support XCR_XFEATURE_ENABLED_MASK(xcr0) now */ |
583 | if (index != XCR_XFEATURE_ENABLED_MASK) | 603 | if (index != XCR_XFEATURE_ENABLED_MASK) |
584 | return 1; | 604 | return 1; |
585 | xcr0 = xcr; | ||
586 | if (!(xcr0 & XSTATE_FP)) | 605 | if (!(xcr0 & XSTATE_FP)) |
587 | return 1; | 606 | return 1; |
588 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) | 607 | if ((xcr0 & XSTATE_YMM) && !(xcr0 & XSTATE_SSE)) |
@@ -597,8 +616,14 @@ int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) | |||
597 | if (xcr0 & ~valid_bits) | 616 | if (xcr0 & ~valid_bits) |
598 | return 1; | 617 | return 1; |
599 | 618 | ||
619 | if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR))) | ||
620 | return 1; | ||
621 | |||
600 | kvm_put_guest_xcr0(vcpu); | 622 | kvm_put_guest_xcr0(vcpu); |
601 | vcpu->arch.xcr0 = xcr0; | 623 | vcpu->arch.xcr0 = xcr0; |
624 | |||
625 | if ((xcr0 ^ old_xcr0) & XSTATE_EXTEND_MASK) | ||
626 | kvm_update_cpuid(vcpu); | ||
602 | return 0; | 627 | return 0; |
603 | } | 628 | } |
604 | 629 | ||
@@ -719,6 +744,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) | |||
719 | } | 744 | } |
720 | EXPORT_SYMBOL_GPL(kvm_get_cr8); | 745 | EXPORT_SYMBOL_GPL(kvm_get_cr8); |
721 | 746 | ||
747 | static void kvm_update_dr6(struct kvm_vcpu *vcpu) | ||
748 | { | ||
749 | if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) | ||
750 | kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6); | ||
751 | } | ||
752 | |||
722 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) | 753 | static void kvm_update_dr7(struct kvm_vcpu *vcpu) |
723 | { | 754 | { |
724 | unsigned long dr7; | 755 | unsigned long dr7; |
@@ -728,7 +759,9 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu) | |||
728 | else | 759 | else |
729 | dr7 = vcpu->arch.dr7; | 760 | dr7 = vcpu->arch.dr7; |
730 | kvm_x86_ops->set_dr7(vcpu, dr7); | 761 | kvm_x86_ops->set_dr7(vcpu, dr7); |
731 | vcpu->arch.switch_db_regs = (dr7 & DR7_BP_EN_MASK); | 762 | vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_BP_ENABLED; |
763 | if (dr7 & DR7_BP_EN_MASK) | ||
764 | vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; | ||
732 | } | 765 | } |
733 | 766 | ||
734 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | 767 | static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) |
@@ -747,6 +780,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) | |||
747 | if (val & 0xffffffff00000000ULL) | 780 | if (val & 0xffffffff00000000ULL) |
748 | return -1; /* #GP */ | 781 | return -1; /* #GP */ |
749 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; | 782 | vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; |
783 | kvm_update_dr6(vcpu); | ||
750 | break; | 784 | break; |
751 | case 5: | 785 | case 5: |
752 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | 786 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
@@ -788,7 +822,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) | |||
788 | return 1; | 822 | return 1; |
789 | /* fall through */ | 823 | /* fall through */ |
790 | case 6: | 824 | case 6: |
791 | *val = vcpu->arch.dr6; | 825 | if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) |
826 | *val = vcpu->arch.dr6; | ||
827 | else | ||
828 | *val = kvm_x86_ops->get_dr6(vcpu); | ||
792 | break; | 829 | break; |
793 | case 5: | 830 | case 5: |
794 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) | 831 | if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) |
@@ -836,11 +873,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); | |||
836 | * kvm-specific. Those are put in the beginning of the list. | 873 | * kvm-specific. Those are put in the beginning of the list. |
837 | */ | 874 | */ |
838 | 875 | ||
839 | #define KVM_SAVE_MSRS_BEGIN 10 | 876 | #define KVM_SAVE_MSRS_BEGIN 12 |
840 | static u32 msrs_to_save[] = { | 877 | static u32 msrs_to_save[] = { |
841 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, | 878 | MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, |
842 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, | 879 | MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, |
843 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, | 880 | HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, |
881 | HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, | ||
844 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, | 882 | HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, |
845 | MSR_KVM_PV_EOI_EN, | 883 | MSR_KVM_PV_EOI_EN, |
846 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, | 884 | MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, |
@@ -849,7 +887,7 @@ static u32 msrs_to_save[] = { | |||
849 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, | 887 | MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, |
850 | #endif | 888 | #endif |
851 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, | 889 | MSR_IA32_TSC, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA, |
852 | MSR_IA32_FEATURE_CONTROL | 890 | MSR_IA32_FEATURE_CONTROL, MSR_IA32_BNDCFGS |
853 | }; | 891 | }; |
854 | 892 | ||
855 | static unsigned num_msrs_to_save; | 893 | static unsigned num_msrs_to_save; |
@@ -1275,8 +1313,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) | |||
1275 | kvm->arch.last_tsc_write = data; | 1313 | kvm->arch.last_tsc_write = data; |
1276 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; | 1314 | kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; |
1277 | 1315 | ||
1278 | /* Reset of TSC must disable overshoot protection below */ | ||
1279 | vcpu->arch.hv_clock.tsc_timestamp = 0; | ||
1280 | vcpu->arch.last_guest_tsc = data; | 1316 | vcpu->arch.last_guest_tsc = data; |
1281 | 1317 | ||
1282 | /* Keep track of which generation this VCPU has synchronized to */ | 1318 | /* Keep track of which generation this VCPU has synchronized to */ |
@@ -1484,7 +1520,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1484 | unsigned long flags, this_tsc_khz; | 1520 | unsigned long flags, this_tsc_khz; |
1485 | struct kvm_vcpu_arch *vcpu = &v->arch; | 1521 | struct kvm_vcpu_arch *vcpu = &v->arch; |
1486 | struct kvm_arch *ka = &v->kvm->arch; | 1522 | struct kvm_arch *ka = &v->kvm->arch; |
1487 | s64 kernel_ns, max_kernel_ns; | 1523 | s64 kernel_ns; |
1488 | u64 tsc_timestamp, host_tsc; | 1524 | u64 tsc_timestamp, host_tsc; |
1489 | struct pvclock_vcpu_time_info guest_hv_clock; | 1525 | struct pvclock_vcpu_time_info guest_hv_clock; |
1490 | u8 pvclock_flags; | 1526 | u8 pvclock_flags; |
@@ -1543,37 +1579,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1543 | if (!vcpu->pv_time_enabled) | 1579 | if (!vcpu->pv_time_enabled) |
1544 | return 0; | 1580 | return 0; |
1545 | 1581 | ||
1546 | /* | ||
1547 | * Time as measured by the TSC may go backwards when resetting the base | ||
1548 | * tsc_timestamp. The reason for this is that the TSC resolution is | ||
1549 | * higher than the resolution of the other clock scales. Thus, many | ||
1550 | * possible measurments of the TSC correspond to one measurement of any | ||
1551 | * other clock, and so a spread of values is possible. This is not a | ||
1552 | * problem for the computation of the nanosecond clock; with TSC rates | ||
1553 | * around 1GHZ, there can only be a few cycles which correspond to one | ||
1554 | * nanosecond value, and any path through this code will inevitably | ||
1555 | * take longer than that. However, with the kernel_ns value itself, | ||
1556 | * the precision may be much lower, down to HZ granularity. If the | ||
1557 | * first sampling of TSC against kernel_ns ends in the low part of the | ||
1558 | * range, and the second in the high end of the range, we can get: | ||
1559 | * | ||
1560 | * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new | ||
1561 | * | ||
1562 | * As the sampling errors potentially range in the thousands of cycles, | ||
1563 | * it is possible such a time value has already been observed by the | ||
1564 | * guest. To protect against this, we must compute the system time as | ||
1565 | * observed by the guest and ensure the new system time is greater. | ||
1566 | */ | ||
1567 | max_kernel_ns = 0; | ||
1568 | if (vcpu->hv_clock.tsc_timestamp) { | ||
1569 | max_kernel_ns = vcpu->last_guest_tsc - | ||
1570 | vcpu->hv_clock.tsc_timestamp; | ||
1571 | max_kernel_ns = pvclock_scale_delta(max_kernel_ns, | ||
1572 | vcpu->hv_clock.tsc_to_system_mul, | ||
1573 | vcpu->hv_clock.tsc_shift); | ||
1574 | max_kernel_ns += vcpu->last_kernel_ns; | ||
1575 | } | ||
1576 | |||
1577 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { | 1582 | if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { |
1578 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, | 1583 | kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, |
1579 | &vcpu->hv_clock.tsc_shift, | 1584 | &vcpu->hv_clock.tsc_shift, |
@@ -1581,18 +1586,9 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1581 | vcpu->hw_tsc_khz = this_tsc_khz; | 1586 | vcpu->hw_tsc_khz = this_tsc_khz; |
1582 | } | 1587 | } |
1583 | 1588 | ||
1584 | /* with a master <monotonic time, tsc value> tuple, | ||
1585 | * pvclock clock reads always increase at the (scaled) rate | ||
1586 | * of guest TSC - no need to deal with sampling errors. | ||
1587 | */ | ||
1588 | if (!use_master_clock) { | ||
1589 | if (max_kernel_ns > kernel_ns) | ||
1590 | kernel_ns = max_kernel_ns; | ||
1591 | } | ||
1592 | /* With all the info we got, fill in the values */ | 1589 | /* With all the info we got, fill in the values */ |
1593 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; | 1590 | vcpu->hv_clock.tsc_timestamp = tsc_timestamp; |
1594 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; | 1591 | vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; |
1595 | vcpu->last_kernel_ns = kernel_ns; | ||
1596 | vcpu->last_guest_tsc = tsc_timestamp; | 1592 | vcpu->last_guest_tsc = tsc_timestamp; |
1597 | 1593 | ||
1598 | /* | 1594 | /* |
@@ -1634,14 +1630,21 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) | |||
1634 | * the others. | 1630 | * the others. |
1635 | * | 1631 | * |
1636 | * So in those cases, request a kvmclock update for all vcpus. | 1632 | * So in those cases, request a kvmclock update for all vcpus. |
1637 | * The worst case for a remote vcpu to update its kvmclock | 1633 | * We need to rate-limit these requests though, as they can |
1638 | * is then bounded by maximum nohz sleep latency. | 1634 | * considerably slow guests that have a large number of vcpus. |
1635 | * The time for a remote vcpu to update its kvmclock is bound | ||
1636 | * by the delay we use to rate-limit the updates. | ||
1639 | */ | 1637 | */ |
1640 | 1638 | ||
1641 | static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) | 1639 | #define KVMCLOCK_UPDATE_DELAY msecs_to_jiffies(100) |
1640 | |||
1641 | static void kvmclock_update_fn(struct work_struct *work) | ||
1642 | { | 1642 | { |
1643 | int i; | 1643 | int i; |
1644 | struct kvm *kvm = v->kvm; | 1644 | struct delayed_work *dwork = to_delayed_work(work); |
1645 | struct kvm_arch *ka = container_of(dwork, struct kvm_arch, | ||
1646 | kvmclock_update_work); | ||
1647 | struct kvm *kvm = container_of(ka, struct kvm, arch); | ||
1645 | struct kvm_vcpu *vcpu; | 1648 | struct kvm_vcpu *vcpu; |
1646 | 1649 | ||
1647 | kvm_for_each_vcpu(i, vcpu, kvm) { | 1650 | kvm_for_each_vcpu(i, vcpu, kvm) { |
@@ -1650,6 +1653,29 @@ static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) | |||
1650 | } | 1653 | } |
1651 | } | 1654 | } |
1652 | 1655 | ||
1656 | static void kvm_gen_kvmclock_update(struct kvm_vcpu *v) | ||
1657 | { | ||
1658 | struct kvm *kvm = v->kvm; | ||
1659 | |||
1660 | set_bit(KVM_REQ_CLOCK_UPDATE, &v->requests); | ||
1661 | schedule_delayed_work(&kvm->arch.kvmclock_update_work, | ||
1662 | KVMCLOCK_UPDATE_DELAY); | ||
1663 | } | ||
1664 | |||
1665 | #define KVMCLOCK_SYNC_PERIOD (300 * HZ) | ||
1666 | |||
1667 | static void kvmclock_sync_fn(struct work_struct *work) | ||
1668 | { | ||
1669 | struct delayed_work *dwork = to_delayed_work(work); | ||
1670 | struct kvm_arch *ka = container_of(dwork, struct kvm_arch, | ||
1671 | kvmclock_sync_work); | ||
1672 | struct kvm *kvm = container_of(ka, struct kvm, arch); | ||
1673 | |||
1674 | schedule_delayed_work(&kvm->arch.kvmclock_update_work, 0); | ||
1675 | schedule_delayed_work(&kvm->arch.kvmclock_sync_work, | ||
1676 | KVMCLOCK_SYNC_PERIOD); | ||
1677 | } | ||
1678 | |||
1653 | static bool msr_mtrr_valid(unsigned msr) | 1679 | static bool msr_mtrr_valid(unsigned msr) |
1654 | { | 1680 | { |
1655 | switch (msr) { | 1681 | switch (msr) { |
@@ -1826,6 +1852,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) | |||
1826 | switch (msr) { | 1852 | switch (msr) { |
1827 | case HV_X64_MSR_GUEST_OS_ID: | 1853 | case HV_X64_MSR_GUEST_OS_ID: |
1828 | case HV_X64_MSR_HYPERCALL: | 1854 | case HV_X64_MSR_HYPERCALL: |
1855 | case HV_X64_MSR_REFERENCE_TSC: | ||
1856 | case HV_X64_MSR_TIME_REF_COUNT: | ||
1829 | r = true; | 1857 | r = true; |
1830 | break; | 1858 | break; |
1831 | } | 1859 | } |
@@ -1865,6 +1893,21 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1865 | if (__copy_to_user((void __user *)addr, instructions, 4)) | 1893 | if (__copy_to_user((void __user *)addr, instructions, 4)) |
1866 | return 1; | 1894 | return 1; |
1867 | kvm->arch.hv_hypercall = data; | 1895 | kvm->arch.hv_hypercall = data; |
1896 | mark_page_dirty(kvm, gfn); | ||
1897 | break; | ||
1898 | } | ||
1899 | case HV_X64_MSR_REFERENCE_TSC: { | ||
1900 | u64 gfn; | ||
1901 | HV_REFERENCE_TSC_PAGE tsc_ref; | ||
1902 | memset(&tsc_ref, 0, sizeof(tsc_ref)); | ||
1903 | kvm->arch.hv_tsc_page = data; | ||
1904 | if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) | ||
1905 | break; | ||
1906 | gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; | ||
1907 | if (kvm_write_guest(kvm, data, | ||
1908 | &tsc_ref, sizeof(tsc_ref))) | ||
1909 | return 1; | ||
1910 | mark_page_dirty(kvm, gfn); | ||
1868 | break; | 1911 | break; |
1869 | } | 1912 | } |
1870 | default: | 1913 | default: |
@@ -1879,19 +1922,21 @@ static int set_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 data) | |||
1879 | { | 1922 | { |
1880 | switch (msr) { | 1923 | switch (msr) { |
1881 | case HV_X64_MSR_APIC_ASSIST_PAGE: { | 1924 | case HV_X64_MSR_APIC_ASSIST_PAGE: { |
1925 | u64 gfn; | ||
1882 | unsigned long addr; | 1926 | unsigned long addr; |
1883 | 1927 | ||
1884 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { | 1928 | if (!(data & HV_X64_MSR_APIC_ASSIST_PAGE_ENABLE)) { |
1885 | vcpu->arch.hv_vapic = data; | 1929 | vcpu->arch.hv_vapic = data; |
1886 | break; | 1930 | break; |
1887 | } | 1931 | } |
1888 | addr = gfn_to_hva(vcpu->kvm, data >> | 1932 | gfn = data >> HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT; |
1889 | HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT); | 1933 | addr = gfn_to_hva(vcpu->kvm, gfn); |
1890 | if (kvm_is_error_hva(addr)) | 1934 | if (kvm_is_error_hva(addr)) |
1891 | return 1; | 1935 | return 1; |
1892 | if (__clear_user((void __user *)addr, PAGE_SIZE)) | 1936 | if (__clear_user((void __user *)addr, PAGE_SIZE)) |
1893 | return 1; | 1937 | return 1; |
1894 | vcpu->arch.hv_vapic = data; | 1938 | vcpu->arch.hv_vapic = data; |
1939 | mark_page_dirty(vcpu->kvm, gfn); | ||
1895 | break; | 1940 | break; |
1896 | } | 1941 | } |
1897 | case HV_X64_MSR_EOI: | 1942 | case HV_X64_MSR_EOI: |
@@ -2017,8 +2062,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) | |||
2017 | case 0x200 ... 0x2ff: | 2062 | case 0x200 ... 0x2ff: |
2018 | return set_msr_mtrr(vcpu, msr, data); | 2063 | return set_msr_mtrr(vcpu, msr, data); |
2019 | case MSR_IA32_APICBASE: | 2064 | case MSR_IA32_APICBASE: |
2020 | kvm_set_apic_base(vcpu, data); | 2065 | return kvm_set_apic_base(vcpu, msr_info); |
2021 | break; | ||
2022 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: | 2066 | case APIC_BASE_MSR ... APIC_BASE_MSR + 0x3ff: |
2023 | return kvm_x2apic_msr_write(vcpu, msr, data); | 2067 | return kvm_x2apic_msr_write(vcpu, msr, data); |
2024 | case MSR_IA32_TSCDEADLINE: | 2068 | case MSR_IA32_TSCDEADLINE: |
@@ -2291,6 +2335,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2291 | case HV_X64_MSR_HYPERCALL: | 2335 | case HV_X64_MSR_HYPERCALL: |
2292 | data = kvm->arch.hv_hypercall; | 2336 | data = kvm->arch.hv_hypercall; |
2293 | break; | 2337 | break; |
2338 | case HV_X64_MSR_TIME_REF_COUNT: { | ||
2339 | data = | ||
2340 | div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); | ||
2341 | break; | ||
2342 | } | ||
2343 | case HV_X64_MSR_REFERENCE_TSC: | ||
2344 | data = kvm->arch.hv_tsc_page; | ||
2345 | break; | ||
2294 | default: | 2346 | default: |
2295 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); | 2347 | vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); |
2296 | return 1; | 2348 | return 1; |
@@ -2308,9 +2360,12 @@ static int get_msr_hyperv(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) | |||
2308 | case HV_X64_MSR_VP_INDEX: { | 2360 | case HV_X64_MSR_VP_INDEX: { |
2309 | int r; | 2361 | int r; |
2310 | struct kvm_vcpu *v; | 2362 | struct kvm_vcpu *v; |
2311 | kvm_for_each_vcpu(r, v, vcpu->kvm) | 2363 | kvm_for_each_vcpu(r, v, vcpu->kvm) { |
2312 | if (v == vcpu) | 2364 | if (v == vcpu) { |
2313 | data = r; | 2365 | data = r; |
2366 | break; | ||
2367 | } | ||
2368 | } | ||
2314 | break; | 2369 | break; |
2315 | } | 2370 | } |
2316 | case HV_X64_MSR_EOI: | 2371 | case HV_X64_MSR_EOI: |
@@ -2601,6 +2656,8 @@ int kvm_dev_ioctl_check_extension(long ext) | |||
2601 | case KVM_CAP_GET_TSC_KHZ: | 2656 | case KVM_CAP_GET_TSC_KHZ: |
2602 | case KVM_CAP_KVMCLOCK_CTRL: | 2657 | case KVM_CAP_KVMCLOCK_CTRL: |
2603 | case KVM_CAP_READONLY_MEM: | 2658 | case KVM_CAP_READONLY_MEM: |
2659 | case KVM_CAP_HYPERV_TIME: | ||
2660 | case KVM_CAP_IOAPIC_POLARITY_IGNORED: | ||
2604 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT | 2661 | #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT |
2605 | case KVM_CAP_ASSIGN_DEV_IRQ: | 2662 | case KVM_CAP_ASSIGN_DEV_IRQ: |
2606 | case KVM_CAP_PCI_2_3: | 2663 | case KVM_CAP_PCI_2_3: |
@@ -2972,8 +3029,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, | |||
2972 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, | 3029 | static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, |
2973 | struct kvm_debugregs *dbgregs) | 3030 | struct kvm_debugregs *dbgregs) |
2974 | { | 3031 | { |
3032 | unsigned long val; | ||
3033 | |||
2975 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); | 3034 | memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); |
2976 | dbgregs->dr6 = vcpu->arch.dr6; | 3035 | _kvm_get_dr(vcpu, 6, &val); |
3036 | dbgregs->dr6 = val; | ||
2977 | dbgregs->dr7 = vcpu->arch.dr7; | 3037 | dbgregs->dr7 = vcpu->arch.dr7; |
2978 | dbgregs->flags = 0; | 3038 | dbgregs->flags = 0; |
2979 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); | 3039 | memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); |
@@ -2987,7 +3047,9 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, | |||
2987 | 3047 | ||
2988 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); | 3048 | memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); |
2989 | vcpu->arch.dr6 = dbgregs->dr6; | 3049 | vcpu->arch.dr6 = dbgregs->dr6; |
3050 | kvm_update_dr6(vcpu); | ||
2990 | vcpu->arch.dr7 = dbgregs->dr7; | 3051 | vcpu->arch.dr7 = dbgregs->dr7; |
3052 | kvm_update_dr7(vcpu); | ||
2991 | 3053 | ||
2992 | return 0; | 3054 | return 0; |
2993 | } | 3055 | } |
@@ -3022,9 +3084,7 @@ static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, | |||
3022 | * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility | 3084 | * CPUID leaf 0xD, index 0, EDX:EAX. This is for compatibility |
3023 | * with old userspace. | 3085 | * with old userspace. |
3024 | */ | 3086 | */ |
3025 | if (xstate_bv & ~KVM_SUPPORTED_XCR0) | 3087 | if (xstate_bv & ~kvm_supported_xcr0()) |
3026 | return -EINVAL; | ||
3027 | if (xstate_bv & ~host_xcr0) | ||
3028 | return -EINVAL; | 3088 | return -EINVAL; |
3029 | memcpy(&vcpu->arch.guest_fpu.state->xsave, | 3089 | memcpy(&vcpu->arch.guest_fpu.state->xsave, |
3030 | guest_xsave->region, vcpu->arch.guest_xstate_size); | 3090 | guest_xsave->region, vcpu->arch.guest_xstate_size); |
@@ -3877,6 +3937,23 @@ static void kvm_init_msr_list(void) | |||
3877 | for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { | 3937 | for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) { |
3878 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) | 3938 | if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0) |
3879 | continue; | 3939 | continue; |
3940 | |||
3941 | /* | ||
3942 | * Even MSRs that are valid in the host may not be exposed | ||
3943 | * to the guests in some cases. We could work around this | ||
3944 | * in VMX with the generic MSR save/load machinery, but it | ||
3945 | * is not really worthwhile since it will really only | ||
3946 | * happen with nested virtualization. | ||
3947 | */ | ||
3948 | switch (msrs_to_save[i]) { | ||
3949 | case MSR_IA32_BNDCFGS: | ||
3950 | if (!kvm_x86_ops->mpx_supported()) | ||
3951 | continue; | ||
3952 | break; | ||
3953 | default: | ||
3954 | break; | ||
3955 | } | ||
3956 | |||
3880 | if (j < i) | 3957 | if (j < i) |
3881 | msrs_to_save[j] = msrs_to_save[i]; | 3958 | msrs_to_save[j] = msrs_to_save[i]; |
3882 | j++; | 3959 | j++; |
@@ -4373,6 +4450,7 @@ static int emulator_cmpxchg_emulated(struct x86_emulate_ctxt *ctxt, | |||
4373 | if (!exchanged) | 4450 | if (!exchanged) |
4374 | return X86EMUL_CMPXCHG_FAILED; | 4451 | return X86EMUL_CMPXCHG_FAILED; |
4375 | 4452 | ||
4453 | mark_page_dirty(vcpu->kvm, gpa >> PAGE_SHIFT); | ||
4376 | kvm_mmu_pte_write(vcpu, gpa, new, bytes); | 4454 | kvm_mmu_pte_write(vcpu, gpa, new, bytes); |
4377 | 4455 | ||
4378 | return X86EMUL_CONTINUE; | 4456 | return X86EMUL_CONTINUE; |
@@ -5344,7 +5422,8 @@ static void kvm_timer_init(void) | |||
5344 | int cpu; | 5422 | int cpu; |
5345 | 5423 | ||
5346 | max_tsc_khz = tsc_khz; | 5424 | max_tsc_khz = tsc_khz; |
5347 | register_hotcpu_notifier(&kvmclock_cpu_notifier_block); | 5425 | |
5426 | cpu_notifier_register_begin(); | ||
5348 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { | 5427 | if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { |
5349 | #ifdef CONFIG_CPU_FREQ | 5428 | #ifdef CONFIG_CPU_FREQ |
5350 | struct cpufreq_policy policy; | 5429 | struct cpufreq_policy policy; |
@@ -5361,6 +5440,10 @@ static void kvm_timer_init(void) | |||
5361 | pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); | 5440 | pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz); |
5362 | for_each_online_cpu(cpu) | 5441 | for_each_online_cpu(cpu) |
5363 | smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); | 5442 | smp_call_function_single(cpu, tsc_khz_changed, NULL, 1); |
5443 | |||
5444 | __register_hotcpu_notifier(&kvmclock_cpu_notifier_block); | ||
5445 | cpu_notifier_register_done(); | ||
5446 | |||
5364 | } | 5447 | } |
5365 | 5448 | ||
5366 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); | 5449 | static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); |
@@ -5516,9 +5599,10 @@ int kvm_arch_init(void *opaque) | |||
5516 | goto out_free_percpu; | 5599 | goto out_free_percpu; |
5517 | 5600 | ||
5518 | kvm_set_mmio_spte_mask(); | 5601 | kvm_set_mmio_spte_mask(); |
5519 | kvm_init_msr_list(); | ||
5520 | 5602 | ||
5521 | kvm_x86_ops = ops; | 5603 | kvm_x86_ops = ops; |
5604 | kvm_init_msr_list(); | ||
5605 | |||
5522 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, | 5606 | kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, |
5523 | PT_DIRTY_MASK, PT64_NX_MASK, 0); | 5607 | PT_DIRTY_MASK, PT64_NX_MASK, 0); |
5524 | 5608 | ||
@@ -5761,8 +5845,10 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) | |||
5761 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); | 5845 | kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr); |
5762 | } | 5846 | } |
5763 | 5847 | ||
5764 | static void inject_pending_event(struct kvm_vcpu *vcpu) | 5848 | static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) |
5765 | { | 5849 | { |
5850 | int r; | ||
5851 | |||
5766 | /* try to reinject previous events if any */ | 5852 | /* try to reinject previous events if any */ |
5767 | if (vcpu->arch.exception.pending) { | 5853 | if (vcpu->arch.exception.pending) { |
5768 | trace_kvm_inj_exception(vcpu->arch.exception.nr, | 5854 | trace_kvm_inj_exception(vcpu->arch.exception.nr, |
@@ -5772,17 +5858,23 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
5772 | vcpu->arch.exception.has_error_code, | 5858 | vcpu->arch.exception.has_error_code, |
5773 | vcpu->arch.exception.error_code, | 5859 | vcpu->arch.exception.error_code, |
5774 | vcpu->arch.exception.reinject); | 5860 | vcpu->arch.exception.reinject); |
5775 | return; | 5861 | return 0; |
5776 | } | 5862 | } |
5777 | 5863 | ||
5778 | if (vcpu->arch.nmi_injected) { | 5864 | if (vcpu->arch.nmi_injected) { |
5779 | kvm_x86_ops->set_nmi(vcpu); | 5865 | kvm_x86_ops->set_nmi(vcpu); |
5780 | return; | 5866 | return 0; |
5781 | } | 5867 | } |
5782 | 5868 | ||
5783 | if (vcpu->arch.interrupt.pending) { | 5869 | if (vcpu->arch.interrupt.pending) { |
5784 | kvm_x86_ops->set_irq(vcpu); | 5870 | kvm_x86_ops->set_irq(vcpu); |
5785 | return; | 5871 | return 0; |
5872 | } | ||
5873 | |||
5874 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { | ||
5875 | r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); | ||
5876 | if (r != 0) | ||
5877 | return r; | ||
5786 | } | 5878 | } |
5787 | 5879 | ||
5788 | /* try to inject new event if pending */ | 5880 | /* try to inject new event if pending */ |
@@ -5799,6 +5891,7 @@ static void inject_pending_event(struct kvm_vcpu *vcpu) | |||
5799 | kvm_x86_ops->set_irq(vcpu); | 5891 | kvm_x86_ops->set_irq(vcpu); |
5800 | } | 5892 | } |
5801 | } | 5893 | } |
5894 | return 0; | ||
5802 | } | 5895 | } |
5803 | 5896 | ||
5804 | static void process_nmi(struct kvm_vcpu *vcpu) | 5897 | static void process_nmi(struct kvm_vcpu *vcpu) |
@@ -5834,6 +5927,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) | |||
5834 | kvm_apic_update_tmr(vcpu, tmr); | 5927 | kvm_apic_update_tmr(vcpu, tmr); |
5835 | } | 5928 | } |
5836 | 5929 | ||
5930 | /* | ||
5931 | * Returns 1 to let __vcpu_run() continue the guest execution loop without | ||
5932 | * exiting to the userspace. Otherwise, the value will be returned to the | ||
5933 | * userspace. | ||
5934 | */ | ||
5837 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | 5935 | static int vcpu_enter_guest(struct kvm_vcpu *vcpu) |
5838 | { | 5936 | { |
5839 | int r; | 5937 | int r; |
@@ -5898,15 +5996,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5898 | goto out; | 5996 | goto out; |
5899 | } | 5997 | } |
5900 | 5998 | ||
5901 | inject_pending_event(vcpu); | 5999 | if (inject_pending_event(vcpu, req_int_win) != 0) |
5902 | 6000 | req_immediate_exit = true; | |
5903 | /* enable NMI/IRQ window open exits if needed */ | 6001 | /* enable NMI/IRQ window open exits if needed */ |
5904 | if (vcpu->arch.nmi_pending) | 6002 | else if (vcpu->arch.nmi_pending) |
5905 | req_immediate_exit = | 6003 | kvm_x86_ops->enable_nmi_window(vcpu); |
5906 | kvm_x86_ops->enable_nmi_window(vcpu) != 0; | ||
5907 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) | 6004 | else if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) |
5908 | req_immediate_exit = | 6005 | kvm_x86_ops->enable_irq_window(vcpu); |
5909 | kvm_x86_ops->enable_irq_window(vcpu) != 0; | ||
5910 | 6006 | ||
5911 | if (kvm_lapic_enabled(vcpu)) { | 6007 | if (kvm_lapic_enabled(vcpu)) { |
5912 | /* | 6008 | /* |
@@ -5966,12 +6062,28 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) | |||
5966 | set_debugreg(vcpu->arch.eff_db[1], 1); | 6062 | set_debugreg(vcpu->arch.eff_db[1], 1); |
5967 | set_debugreg(vcpu->arch.eff_db[2], 2); | 6063 | set_debugreg(vcpu->arch.eff_db[2], 2); |
5968 | set_debugreg(vcpu->arch.eff_db[3], 3); | 6064 | set_debugreg(vcpu->arch.eff_db[3], 3); |
6065 | set_debugreg(vcpu->arch.dr6, 6); | ||
5969 | } | 6066 | } |
5970 | 6067 | ||
5971 | trace_kvm_entry(vcpu->vcpu_id); | 6068 | trace_kvm_entry(vcpu->vcpu_id); |
5972 | kvm_x86_ops->run(vcpu); | 6069 | kvm_x86_ops->run(vcpu); |
5973 | 6070 | ||
5974 | /* | 6071 | /* |
6072 | * Do this here before restoring debug registers on the host. And | ||
6073 | * since we do this before handling the vmexit, a DR access vmexit | ||
6074 | * can (a) read the correct value of the debug registers, (b) set | ||
6075 | * KVM_DEBUGREG_WONT_EXIT again. | ||
6076 | */ | ||
6077 | if (unlikely(vcpu->arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT)) { | ||
6078 | int i; | ||
6079 | |||
6080 | WARN_ON(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP); | ||
6081 | kvm_x86_ops->sync_dirty_debug_regs(vcpu); | ||
6082 | for (i = 0; i < KVM_NR_DB_REGS; i++) | ||
6083 | vcpu->arch.eff_db[i] = vcpu->arch.db[i]; | ||
6084 | } | ||
6085 | |||
6086 | /* | ||
5975 | * If the guest has used debug registers, at least dr7 | 6087 | * If the guest has used debug registers, at least dr7 |
5976 | * will be disabled while returning to the host. | 6088 | * will be disabled while returning to the host. |
5977 | * If we don't have active breakpoints in the host, we don't | 6089 | * If we don't have active breakpoints in the host, we don't |
@@ -6089,7 +6201,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) | |||
6089 | } | 6201 | } |
6090 | if (need_resched()) { | 6202 | if (need_resched()) { |
6091 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); | 6203 | srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); |
6092 | kvm_resched(vcpu); | 6204 | cond_resched(); |
6093 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); | 6205 | vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); |
6094 | } | 6206 | } |
6095 | } | 6207 | } |
@@ -6160,7 +6272,7 @@ static int complete_emulated_mmio(struct kvm_vcpu *vcpu) | |||
6160 | frag->len -= len; | 6272 | frag->len -= len; |
6161 | } | 6273 | } |
6162 | 6274 | ||
6163 | if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { | 6275 | if (vcpu->mmio_cur_fragment >= vcpu->mmio_nr_fragments) { |
6164 | vcpu->mmio_needed = 0; | 6276 | vcpu->mmio_needed = 0; |
6165 | 6277 | ||
6166 | /* FIXME: return into emulator if single-stepping. */ | 6278 | /* FIXME: return into emulator if single-stepping. */ |
@@ -6401,6 +6513,7 @@ EXPORT_SYMBOL_GPL(kvm_task_switch); | |||
6401 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | 6513 | int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, |
6402 | struct kvm_sregs *sregs) | 6514 | struct kvm_sregs *sregs) |
6403 | { | 6515 | { |
6516 | struct msr_data apic_base_msr; | ||
6404 | int mmu_reset_needed = 0; | 6517 | int mmu_reset_needed = 0; |
6405 | int pending_vec, max_bits, idx; | 6518 | int pending_vec, max_bits, idx; |
6406 | struct desc_ptr dt; | 6519 | struct desc_ptr dt; |
@@ -6424,7 +6537,9 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, | |||
6424 | 6537 | ||
6425 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; | 6538 | mmu_reset_needed |= vcpu->arch.efer != sregs->efer; |
6426 | kvm_x86_ops->set_efer(vcpu, sregs->efer); | 6539 | kvm_x86_ops->set_efer(vcpu, sregs->efer); |
6427 | kvm_set_apic_base(vcpu, sregs->apic_base); | 6540 | apic_base_msr.data = sregs->apic_base; |
6541 | apic_base_msr.host_initiated = true; | ||
6542 | kvm_set_apic_base(vcpu, &apic_base_msr); | ||
6428 | 6543 | ||
6429 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; | 6544 | mmu_reset_needed |= kvm_read_cr0(vcpu) != sregs->cr0; |
6430 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); | 6545 | kvm_x86_ops->set_cr0(vcpu, sregs->cr0); |
@@ -6682,6 +6797,7 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | |||
6682 | { | 6797 | { |
6683 | int r; | 6798 | int r; |
6684 | struct msr_data msr; | 6799 | struct msr_data msr; |
6800 | struct kvm *kvm = vcpu->kvm; | ||
6685 | 6801 | ||
6686 | r = vcpu_load(vcpu); | 6802 | r = vcpu_load(vcpu); |
6687 | if (r) | 6803 | if (r) |
@@ -6692,6 +6808,9 @@ int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) | |||
6692 | kvm_write_tsc(vcpu, &msr); | 6808 | kvm_write_tsc(vcpu, &msr); |
6693 | vcpu_put(vcpu); | 6809 | vcpu_put(vcpu); |
6694 | 6810 | ||
6811 | schedule_delayed_work(&kvm->arch.kvmclock_sync_work, | ||
6812 | KVMCLOCK_SYNC_PERIOD); | ||
6813 | |||
6695 | return r; | 6814 | return r; |
6696 | } | 6815 | } |
6697 | 6816 | ||
@@ -6717,6 +6836,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) | |||
6717 | 6836 | ||
6718 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); | 6837 | memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); |
6719 | vcpu->arch.dr6 = DR6_FIXED_1; | 6838 | vcpu->arch.dr6 = DR6_FIXED_1; |
6839 | kvm_update_dr6(vcpu); | ||
6720 | vcpu->arch.dr7 = DR7_FIXED_1; | 6840 | vcpu->arch.dr7 = DR7_FIXED_1; |
6721 | kvm_update_dr7(vcpu); | 6841 | kvm_update_dr7(vcpu); |
6722 | 6842 | ||
@@ -6983,6 +7103,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) | |||
6983 | 7103 | ||
6984 | pvclock_update_vm_gtod_copy(kvm); | 7104 | pvclock_update_vm_gtod_copy(kvm); |
6985 | 7105 | ||
7106 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_update_work, kvmclock_update_fn); | ||
7107 | INIT_DELAYED_WORK(&kvm->arch.kvmclock_sync_work, kvmclock_sync_fn); | ||
7108 | |||
6986 | return 0; | 7109 | return 0; |
6987 | } | 7110 | } |
6988 | 7111 | ||
@@ -7020,6 +7143,8 @@ static void kvm_free_vcpus(struct kvm *kvm) | |||
7020 | 7143 | ||
7021 | void kvm_arch_sync_events(struct kvm *kvm) | 7144 | void kvm_arch_sync_events(struct kvm *kvm) |
7022 | { | 7145 | { |
7146 | cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work); | ||
7147 | cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work); | ||
7023 | kvm_free_all_assigned_devices(kvm); | 7148 | kvm_free_all_assigned_devices(kvm); |
7024 | kvm_free_pit(kvm); | 7149 | kvm_free_pit(kvm); |
7025 | } | 7150 | } |
@@ -7218,6 +7343,9 @@ void kvm_arch_flush_shadow_memslot(struct kvm *kvm, | |||
7218 | 7343 | ||
7219 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) | 7344 | int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) |
7220 | { | 7345 | { |
7346 | if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) | ||
7347 | kvm_x86_ops->check_nested_events(vcpu, false); | ||
7348 | |||
7221 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && | 7349 | return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && |
7222 | !vcpu->arch.apf.halted) | 7350 | !vcpu->arch.apf.halted) |
7223 | || !list_empty_careful(&vcpu->async_pf.done) | 7351 | || !list_empty_careful(&vcpu->async_pf.done) |
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 587fb9ede436..8c97bac9a895 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h | |||
@@ -122,8 +122,13 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, | |||
122 | gva_t addr, void *val, unsigned int bytes, | 122 | gva_t addr, void *val, unsigned int bytes, |
123 | struct x86_exception *exception); | 123 | struct x86_exception *exception); |
124 | 124 | ||
125 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) | 125 | #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \ |
126 | | XSTATE_BNDREGS | XSTATE_BNDCSR) | ||
126 | extern u64 host_xcr0; | 127 | extern u64 host_xcr0; |
127 | 128 | ||
129 | extern u64 kvm_supported_xcr0(void); | ||
130 | |||
131 | extern unsigned int min_timer_period_us; | ||
132 | |||
128 | extern struct static_key kvm_no_apic_vcpu; | 133 | extern struct static_key kvm_no_apic_vcpu; |
129 | #endif | 134 | #endif |
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index bdf8532494fe..ad1fb5f53925 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c | |||
@@ -233,13 +233,13 @@ static void lguest_end_context_switch(struct task_struct *next) | |||
233 | * flags word contains all kind of stuff, but in practice Linux only cares | 233 | * flags word contains all kind of stuff, but in practice Linux only cares |
234 | * about the interrupt flag. Our "save_flags()" just returns that. | 234 | * about the interrupt flag. Our "save_flags()" just returns that. |
235 | */ | 235 | */ |
236 | static unsigned long save_fl(void) | 236 | asmlinkage unsigned long lguest_save_fl(void) |
237 | { | 237 | { |
238 | return lguest_data.irq_enabled; | 238 | return lguest_data.irq_enabled; |
239 | } | 239 | } |
240 | 240 | ||
241 | /* Interrupts go off... */ | 241 | /* Interrupts go off... */ |
242 | static void irq_disable(void) | 242 | asmlinkage void lguest_irq_disable(void) |
243 | { | 243 | { |
244 | lguest_data.irq_enabled = 0; | 244 | lguest_data.irq_enabled = 0; |
245 | } | 245 | } |
@@ -253,8 +253,8 @@ static void irq_disable(void) | |||
253 | * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the | 253 | * PV_CALLEE_SAVE_REGS_THUNK(), which pushes %eax onto the stack, calls the |
254 | * C function, then restores it. | 254 | * C function, then restores it. |
255 | */ | 255 | */ |
256 | PV_CALLEE_SAVE_REGS_THUNK(save_fl); | 256 | PV_CALLEE_SAVE_REGS_THUNK(lguest_save_fl); |
257 | PV_CALLEE_SAVE_REGS_THUNK(irq_disable); | 257 | PV_CALLEE_SAVE_REGS_THUNK(lguest_irq_disable); |
258 | /*:*/ | 258 | /*:*/ |
259 | 259 | ||
260 | /* These are in i386_head.S */ | 260 | /* These are in i386_head.S */ |
@@ -1291,9 +1291,9 @@ __init void lguest_init(void) | |||
1291 | */ | 1291 | */ |
1292 | 1292 | ||
1293 | /* Interrupt-related operations */ | 1293 | /* Interrupt-related operations */ |
1294 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(save_fl); | 1294 | pv_irq_ops.save_fl = PV_CALLEE_SAVE(lguest_save_fl); |
1295 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); | 1295 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(lg_restore_fl); |
1296 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(irq_disable); | 1296 | pv_irq_ops.irq_disable = PV_CALLEE_SAVE(lguest_irq_disable); |
1297 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); | 1297 | pv_irq_ops.irq_enable = __PV_IS_CALLEE_SAVE(lg_irq_enable); |
1298 | pv_irq_ops.safe_halt = lguest_safe_halt; | 1298 | pv_irq_ops.safe_halt = lguest_safe_halt; |
1299 | 1299 | ||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 992d63bb154f..eabcb6e6a900 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -24,7 +24,7 @@ lib-$(CONFIG_SMP) += rwlock.o | |||
24 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o | 24 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o |
25 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o | 25 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o |
26 | 26 | ||
27 | obj-y += msr.o msr-reg.o msr-reg-export.o | 27 | obj-y += msr.o msr-reg.o msr-reg-export.o hash.o |
28 | 28 | ||
29 | ifeq ($(CONFIG_X86_32),y) | 29 | ifeq ($(CONFIG_X86_32),y) |
30 | obj-y += atomic64_32.o | 30 | obj-y += atomic64_32.o |
diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index a30ca15be21c..dee945d55594 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S | |||
@@ -186,7 +186,7 @@ ENTRY(copy_user_generic_unrolled) | |||
186 | 30: shll $6,%ecx | 186 | 30: shll $6,%ecx |
187 | addl %ecx,%edx | 187 | addl %ecx,%edx |
188 | jmp 60f | 188 | jmp 60f |
189 | 40: lea (%rdx,%rcx,8),%rdx | 189 | 40: leal (%rdx,%rcx,8),%edx |
190 | jmp 60f | 190 | jmp 60f |
191 | 50: movl %ecx,%edx | 191 | 50: movl %ecx,%edx |
192 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ | 192 | 60: jmp copy_user_handle_tail /* ecx is zerorest also */ |
@@ -236,8 +236,6 @@ ENDPROC(copy_user_generic_unrolled) | |||
236 | ENTRY(copy_user_generic_string) | 236 | ENTRY(copy_user_generic_string) |
237 | CFI_STARTPROC | 237 | CFI_STARTPROC |
238 | ASM_STAC | 238 | ASM_STAC |
239 | andl %edx,%edx | ||
240 | jz 4f | ||
241 | cmpl $8,%edx | 239 | cmpl $8,%edx |
242 | jb 2f /* less than 8 bytes, go to byte copy loop */ | 240 | jb 2f /* less than 8 bytes, go to byte copy loop */ |
243 | ALIGN_DESTINATION | 241 | ALIGN_DESTINATION |
@@ -249,12 +247,12 @@ ENTRY(copy_user_generic_string) | |||
249 | 2: movl %edx,%ecx | 247 | 2: movl %edx,%ecx |
250 | 3: rep | 248 | 3: rep |
251 | movsb | 249 | movsb |
252 | 4: xorl %eax,%eax | 250 | xorl %eax,%eax |
253 | ASM_CLAC | 251 | ASM_CLAC |
254 | ret | 252 | ret |
255 | 253 | ||
256 | .section .fixup,"ax" | 254 | .section .fixup,"ax" |
257 | 11: lea (%rdx,%rcx,8),%rcx | 255 | 11: leal (%rdx,%rcx,8),%ecx |
258 | 12: movl %ecx,%edx /* ecx is zerorest also */ | 256 | 12: movl %ecx,%edx /* ecx is zerorest also */ |
259 | jmp copy_user_handle_tail | 257 | jmp copy_user_handle_tail |
260 | .previous | 258 | .previous |
@@ -279,12 +277,10 @@ ENDPROC(copy_user_generic_string) | |||
279 | ENTRY(copy_user_enhanced_fast_string) | 277 | ENTRY(copy_user_enhanced_fast_string) |
280 | CFI_STARTPROC | 278 | CFI_STARTPROC |
281 | ASM_STAC | 279 | ASM_STAC |
282 | andl %edx,%edx | ||
283 | jz 2f | ||
284 | movl %edx,%ecx | 280 | movl %edx,%ecx |
285 | 1: rep | 281 | 1: rep |
286 | movsb | 282 | movsb |
287 | 2: xorl %eax,%eax | 283 | xorl %eax,%eax |
288 | ASM_CLAC | 284 | ASM_CLAC |
289 | ret | 285 | ret |
290 | 286 | ||
diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 7c3bee636e2f..39d6a3db0b96 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/timex.h> | 16 | #include <linux/timex.h> |
17 | #include <linux/preempt.h> | 17 | #include <linux/preempt.h> |
18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
19 | #include <linux/init.h> | ||
20 | 19 | ||
21 | #include <asm/processor.h> | 20 | #include <asm/processor.h> |
22 | #include <asm/delay.h> | 21 | #include <asm/delay.h> |
diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c new file mode 100644 index 000000000000..ff4fa51a5b1f --- /dev/null +++ b/arch/x86/lib/hash.c | |||
@@ -0,0 +1,92 @@ | |||
1 | /* | ||
2 | * Some portions derived from code covered by the following notice: | ||
3 | * | ||
4 | * Copyright (c) 2010-2013 Intel Corporation. All rights reserved. | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Redistribution and use in source and binary forms, with or without | ||
8 | * modification, are permitted provided that the following conditions | ||
9 | * are met: | ||
10 | * | ||
11 | * * Redistributions of source code must retain the above copyright | ||
12 | * notice, this list of conditions and the following disclaimer. | ||
13 | * * Redistributions in binary form must reproduce the above copyright | ||
14 | * notice, this list of conditions and the following disclaimer in | ||
15 | * the documentation and/or other materials provided with the | ||
16 | * distribution. | ||
17 | * * Neither the name of Intel Corporation nor the names of its | ||
18 | * contributors may be used to endorse or promote products derived | ||
19 | * from this software without specific prior written permission. | ||
20 | * | ||
21 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | ||
22 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | ||
23 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | ||
24 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | ||
25 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | ||
26 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | ||
27 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | ||
28 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
29 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
30 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
31 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
32 | */ | ||
33 | |||
34 | #include <linux/hash.h> | ||
35 | #include <linux/init.h> | ||
36 | |||
37 | #include <asm/processor.h> | ||
38 | #include <asm/cpufeature.h> | ||
39 | #include <asm/hash.h> | ||
40 | |||
41 | static inline u32 crc32_u32(u32 crc, u32 val) | ||
42 | { | ||
43 | #ifdef CONFIG_AS_CRC32 | ||
44 | asm ("crc32l %1,%0\n" : "+r" (crc) : "rm" (val)); | ||
45 | #else | ||
46 | asm (".byte 0xf2, 0x0f, 0x38, 0xf1, 0xc1" : "+a" (crc) : "c" (val)); | ||
47 | #endif | ||
48 | return crc; | ||
49 | } | ||
50 | |||
51 | static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed) | ||
52 | { | ||
53 | const u32 *p32 = (const u32 *) data; | ||
54 | u32 i, tmp = 0; | ||
55 | |||
56 | for (i = 0; i < len / 4; i++) | ||
57 | seed = crc32_u32(seed, *p32++); | ||
58 | |||
59 | switch (len & 3) { | ||
60 | case 3: | ||
61 | tmp |= *((const u8 *) p32 + 2) << 16; | ||
62 | /* fallthrough */ | ||
63 | case 2: | ||
64 | tmp |= *((const u8 *) p32 + 1) << 8; | ||
65 | /* fallthrough */ | ||
66 | case 1: | ||
67 | tmp |= *((const u8 *) p32); | ||
68 | seed = crc32_u32(seed, tmp); | ||
69 | break; | ||
70 | } | ||
71 | |||
72 | return seed; | ||
73 | } | ||
74 | |||
75 | static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed) | ||
76 | { | ||
77 | const u32 *p32 = (const u32 *) data; | ||
78 | u32 i; | ||
79 | |||
80 | for (i = 0; i < len; i++) | ||
81 | seed = crc32_u32(seed, *p32++); | ||
82 | |||
83 | return seed; | ||
84 | } | ||
85 | |||
86 | void __init setup_arch_fast_hash(struct fast_hash_ops *ops) | ||
87 | { | ||
88 | if (cpu_has_xmm4_2) { | ||
89 | ops->hash = intel_crc4_2_hash; | ||
90 | ops->hash2 = intel_crc4_2_hash2; | ||
91 | } | ||
92 | } | ||
diff --git a/arch/x86/lib/memcpy_32.c b/arch/x86/lib/memcpy_32.c index e78761d6b7f8..a404b4b75533 100644 --- a/arch/x86/lib/memcpy_32.c +++ b/arch/x86/lib/memcpy_32.c | |||
@@ -4,7 +4,7 @@ | |||
4 | #undef memcpy | 4 | #undef memcpy |
5 | #undef memset | 5 | #undef memset |
6 | 6 | ||
7 | void *memcpy(void *to, const void *from, size_t n) | 7 | __visible void *memcpy(void *to, const void *from, size_t n) |
8 | { | 8 | { |
9 | #ifdef CONFIG_X86_USE_3DNOW | 9 | #ifdef CONFIG_X86_USE_3DNOW |
10 | return __memcpy3d(to, from, n); | 10 | return __memcpy3d(to, from, n); |
@@ -14,13 +14,13 @@ void *memcpy(void *to, const void *from, size_t n) | |||
14 | } | 14 | } |
15 | EXPORT_SYMBOL(memcpy); | 15 | EXPORT_SYMBOL(memcpy); |
16 | 16 | ||
17 | void *memset(void *s, int c, size_t count) | 17 | __visible void *memset(void *s, int c, size_t count) |
18 | { | 18 | { |
19 | return __memset(s, c, count); | 19 | return __memset(s, c, count); |
20 | } | 20 | } |
21 | EXPORT_SYMBOL(memset); | 21 | EXPORT_SYMBOL(memset); |
22 | 22 | ||
23 | void *memmove(void *dest, const void *src, size_t n) | 23 | __visible void *memmove(void *dest, const void *src, size_t n) |
24 | { | 24 | { |
25 | int d0,d1,d2,d3,d4,d5; | 25 | int d0,d1,d2,d3,d4,d5; |
26 | char *ret = dest; | 26 | char *ret = dest; |
diff --git a/arch/x86/lib/msr.c b/arch/x86/lib/msr.c index 8f8eebdca7d4..db9db446b71a 100644 --- a/arch/x86/lib/msr.c +++ b/arch/x86/lib/msr.c | |||
@@ -8,7 +8,7 @@ struct msr *msrs_alloc(void) | |||
8 | 8 | ||
9 | msrs = alloc_percpu(struct msr); | 9 | msrs = alloc_percpu(struct msr); |
10 | if (!msrs) { | 10 | if (!msrs) { |
11 | pr_warning("%s: error allocating msrs\n", __func__); | 11 | pr_warn("%s: error allocating msrs\n", __func__); |
12 | return NULL; | 12 | return NULL; |
13 | } | 13 | } |
14 | 14 | ||
@@ -21,3 +21,90 @@ void msrs_free(struct msr *msrs) | |||
21 | free_percpu(msrs); | 21 | free_percpu(msrs); |
22 | } | 22 | } |
23 | EXPORT_SYMBOL(msrs_free); | 23 | EXPORT_SYMBOL(msrs_free); |
24 | |||
25 | /** | ||
26 | * Read an MSR with error handling | ||
27 | * | ||
28 | * @msr: MSR to read | ||
29 | * @m: value to read into | ||
30 | * | ||
31 | * It returns read data only on success, otherwise it doesn't change the output | ||
32 | * argument @m. | ||
33 | * | ||
34 | */ | ||
35 | int msr_read(u32 msr, struct msr *m) | ||
36 | { | ||
37 | int err; | ||
38 | u64 val; | ||
39 | |||
40 | err = rdmsrl_safe(msr, &val); | ||
41 | if (!err) | ||
42 | m->q = val; | ||
43 | |||
44 | return err; | ||
45 | } | ||
46 | |||
47 | /** | ||
48 | * Write an MSR with error handling | ||
49 | * | ||
50 | * @msr: MSR to write | ||
51 | * @m: value to write | ||
52 | */ | ||
53 | int msr_write(u32 msr, struct msr *m) | ||
54 | { | ||
55 | return wrmsrl_safe(msr, m->q); | ||
56 | } | ||
57 | |||
58 | static inline int __flip_bit(u32 msr, u8 bit, bool set) | ||
59 | { | ||
60 | struct msr m, m1; | ||
61 | int err = -EINVAL; | ||
62 | |||
63 | if (bit > 63) | ||
64 | return err; | ||
65 | |||
66 | err = msr_read(msr, &m); | ||
67 | if (err) | ||
68 | return err; | ||
69 | |||
70 | m1 = m; | ||
71 | if (set) | ||
72 | m1.q |= BIT_64(bit); | ||
73 | else | ||
74 | m1.q &= ~BIT_64(bit); | ||
75 | |||
76 | if (m1.q == m.q) | ||
77 | return 0; | ||
78 | |||
79 | err = msr_write(msr, &m); | ||
80 | if (err) | ||
81 | return err; | ||
82 | |||
83 | return 1; | ||
84 | } | ||
85 | |||
86 | /** | ||
87 | * Set @bit in a MSR @msr. | ||
88 | * | ||
89 | * Retval: | ||
90 | * < 0: An error was encountered. | ||
91 | * = 0: Bit was already set. | ||
92 | * > 0: Hardware accepted the MSR write. | ||
93 | */ | ||
94 | int msr_set_bit(u32 msr, u8 bit) | ||
95 | { | ||
96 | return __flip_bit(msr, bit, true); | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * Clear @bit in a MSR @msr. | ||
101 | * | ||
102 | * Retval: | ||
103 | * < 0: An error was encountered. | ||
104 | * = 0: Bit was already cleared. | ||
105 | * > 0: Hardware accepted the MSR write. | ||
106 | */ | ||
107 | int msr_clear_bit(u32 msr, u8 bit) | ||
108 | { | ||
109 | return __flip_bit(msr, bit, false); | ||
110 | } | ||
diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index 533a85e3a07e..1a2be7c6895d 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt | |||
@@ -346,8 +346,8 @@ AVXcode: 1 | |||
346 | 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) | 346 | 17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1) |
347 | 18: Grp16 (1A) | 347 | 18: Grp16 (1A) |
348 | 19: | 348 | 19: |
349 | 1a: | 349 | 1a: BNDCL Ev,Gv | BNDCU Ev,Gv | BNDMOV Gv,Ev | BNDLDX Gv,Ev,Gv |
350 | 1b: | 350 | 1b: BNDCN Ev,Gv | BNDMOV Ev,Gv | BNDMK Gv,Ev | BNDSTX Ev,GV,Gv |
351 | 1c: | 351 | 1c: |
352 | 1d: | 352 | 1d: |
353 | 1e: | 353 | 1e: |
diff --git a/arch/x86/math-emu/errors.c b/arch/x86/math-emu/errors.c index 59d353d2c599..a5449089cd9f 100644 --- a/arch/x86/math-emu/errors.c +++ b/arch/x86/math-emu/errors.c | |||
@@ -330,11 +330,6 @@ asmlinkage void FPU_exception(int n) | |||
330 | 330 | ||
331 | RE_ENTRANT_CHECK_OFF; | 331 | RE_ENTRANT_CHECK_OFF; |
332 | if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) { | 332 | if ((~control_word & n & CW_Exceptions) || (n == EX_INTERNAL)) { |
333 | #ifdef PRINT_MESSAGES | ||
334 | /* My message from the sponsor */ | ||
335 | printk(FPU_VERSION " " __DATE__ " (C) W. Metzenthen.\n"); | ||
336 | #endif /* PRINT_MESSAGES */ | ||
337 | |||
338 | /* Get a name string for error reporting */ | 333 | /* Get a name string for error reporting */ |
339 | for (i = 0; exception_names[i].type; i++) | 334 | for (i = 0; exception_names[i].type; i++) |
340 | if ((exception_names[i].type & n) == | 335 | if ((exception_names[i].type & n) == |
diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 0002a3a33081..20621d753d5f 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c | |||
@@ -30,6 +30,7 @@ struct pg_state { | |||
30 | unsigned long start_address; | 30 | unsigned long start_address; |
31 | unsigned long current_address; | 31 | unsigned long current_address; |
32 | const struct addr_marker *marker; | 32 | const struct addr_marker *marker; |
33 | bool to_dmesg; | ||
33 | }; | 34 | }; |
34 | 35 | ||
35 | struct addr_marker { | 36 | struct addr_marker { |
@@ -88,10 +89,28 @@ static struct addr_marker address_markers[] = { | |||
88 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) | 89 | #define PUD_LEVEL_MULT (PTRS_PER_PMD * PMD_LEVEL_MULT) |
89 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) | 90 | #define PGD_LEVEL_MULT (PTRS_PER_PUD * PUD_LEVEL_MULT) |
90 | 91 | ||
92 | #define pt_dump_seq_printf(m, to_dmesg, fmt, args...) \ | ||
93 | ({ \ | ||
94 | if (to_dmesg) \ | ||
95 | printk(KERN_INFO fmt, ##args); \ | ||
96 | else \ | ||
97 | if (m) \ | ||
98 | seq_printf(m, fmt, ##args); \ | ||
99 | }) | ||
100 | |||
101 | #define pt_dump_cont_printf(m, to_dmesg, fmt, args...) \ | ||
102 | ({ \ | ||
103 | if (to_dmesg) \ | ||
104 | printk(KERN_CONT fmt, ##args); \ | ||
105 | else \ | ||
106 | if (m) \ | ||
107 | seq_printf(m, fmt, ##args); \ | ||
108 | }) | ||
109 | |||
91 | /* | 110 | /* |
92 | * Print a readable form of a pgprot_t to the seq_file | 111 | * Print a readable form of a pgprot_t to the seq_file |
93 | */ | 112 | */ |
94 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level) | 113 | static void printk_prot(struct seq_file *m, pgprot_t prot, int level, bool dmsg) |
95 | { | 114 | { |
96 | pgprotval_t pr = pgprot_val(prot); | 115 | pgprotval_t pr = pgprot_val(prot); |
97 | static const char * const level_name[] = | 116 | static const char * const level_name[] = |
@@ -99,47 +118,47 @@ static void printk_prot(struct seq_file *m, pgprot_t prot, int level) | |||
99 | 118 | ||
100 | if (!pgprot_val(prot)) { | 119 | if (!pgprot_val(prot)) { |
101 | /* Not present */ | 120 | /* Not present */ |
102 | seq_printf(m, " "); | 121 | pt_dump_cont_printf(m, dmsg, " "); |
103 | } else { | 122 | } else { |
104 | if (pr & _PAGE_USER) | 123 | if (pr & _PAGE_USER) |
105 | seq_printf(m, "USR "); | 124 | pt_dump_cont_printf(m, dmsg, "USR "); |
106 | else | 125 | else |
107 | seq_printf(m, " "); | 126 | pt_dump_cont_printf(m, dmsg, " "); |
108 | if (pr & _PAGE_RW) | 127 | if (pr & _PAGE_RW) |
109 | seq_printf(m, "RW "); | 128 | pt_dump_cont_printf(m, dmsg, "RW "); |
110 | else | 129 | else |
111 | seq_printf(m, "ro "); | 130 | pt_dump_cont_printf(m, dmsg, "ro "); |
112 | if (pr & _PAGE_PWT) | 131 | if (pr & _PAGE_PWT) |
113 | seq_printf(m, "PWT "); | 132 | pt_dump_cont_printf(m, dmsg, "PWT "); |
114 | else | 133 | else |
115 | seq_printf(m, " "); | 134 | pt_dump_cont_printf(m, dmsg, " "); |
116 | if (pr & _PAGE_PCD) | 135 | if (pr & _PAGE_PCD) |
117 | seq_printf(m, "PCD "); | 136 | pt_dump_cont_printf(m, dmsg, "PCD "); |
118 | else | 137 | else |
119 | seq_printf(m, " "); | 138 | pt_dump_cont_printf(m, dmsg, " "); |
120 | 139 | ||
121 | /* Bit 9 has a different meaning on level 3 vs 4 */ | 140 | /* Bit 9 has a different meaning on level 3 vs 4 */ |
122 | if (level <= 3) { | 141 | if (level <= 3) { |
123 | if (pr & _PAGE_PSE) | 142 | if (pr & _PAGE_PSE) |
124 | seq_printf(m, "PSE "); | 143 | pt_dump_cont_printf(m, dmsg, "PSE "); |
125 | else | 144 | else |
126 | seq_printf(m, " "); | 145 | pt_dump_cont_printf(m, dmsg, " "); |
127 | } else { | 146 | } else { |
128 | if (pr & _PAGE_PAT) | 147 | if (pr & _PAGE_PAT) |
129 | seq_printf(m, "pat "); | 148 | pt_dump_cont_printf(m, dmsg, "pat "); |
130 | else | 149 | else |
131 | seq_printf(m, " "); | 150 | pt_dump_cont_printf(m, dmsg, " "); |
132 | } | 151 | } |
133 | if (pr & _PAGE_GLOBAL) | 152 | if (pr & _PAGE_GLOBAL) |
134 | seq_printf(m, "GLB "); | 153 | pt_dump_cont_printf(m, dmsg, "GLB "); |
135 | else | 154 | else |
136 | seq_printf(m, " "); | 155 | pt_dump_cont_printf(m, dmsg, " "); |
137 | if (pr & _PAGE_NX) | 156 | if (pr & _PAGE_NX) |
138 | seq_printf(m, "NX "); | 157 | pt_dump_cont_printf(m, dmsg, "NX "); |
139 | else | 158 | else |
140 | seq_printf(m, "x "); | 159 | pt_dump_cont_printf(m, dmsg, "x "); |
141 | } | 160 | } |
142 | seq_printf(m, "%s\n", level_name[level]); | 161 | pt_dump_cont_printf(m, dmsg, "%s\n", level_name[level]); |
143 | } | 162 | } |
144 | 163 | ||
145 | /* | 164 | /* |
@@ -178,7 +197,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
178 | st->current_prot = new_prot; | 197 | st->current_prot = new_prot; |
179 | st->level = level; | 198 | st->level = level; |
180 | st->marker = address_markers; | 199 | st->marker = address_markers; |
181 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | 200 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
201 | st->marker->name); | ||
182 | } else if (prot != cur || level != st->level || | 202 | } else if (prot != cur || level != st->level || |
183 | st->current_address >= st->marker[1].start_address) { | 203 | st->current_address >= st->marker[1].start_address) { |
184 | const char *unit = units; | 204 | const char *unit = units; |
@@ -188,17 +208,17 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
188 | /* | 208 | /* |
189 | * Now print the actual finished series | 209 | * Now print the actual finished series |
190 | */ | 210 | */ |
191 | seq_printf(m, "0x%0*lx-0x%0*lx ", | 211 | pt_dump_seq_printf(m, st->to_dmesg, "0x%0*lx-0x%0*lx ", |
192 | width, st->start_address, | 212 | width, st->start_address, |
193 | width, st->current_address); | 213 | width, st->current_address); |
194 | 214 | ||
195 | delta = (st->current_address - st->start_address) >> 10; | 215 | delta = (st->current_address - st->start_address) >> 10; |
196 | while (!(delta & 1023) && unit[1]) { | 216 | while (!(delta & 1023) && unit[1]) { |
197 | delta >>= 10; | 217 | delta >>= 10; |
198 | unit++; | 218 | unit++; |
199 | } | 219 | } |
200 | seq_printf(m, "%9lu%c ", delta, *unit); | 220 | pt_dump_cont_printf(m, st->to_dmesg, "%9lu%c ", delta, *unit); |
201 | printk_prot(m, st->current_prot, st->level); | 221 | printk_prot(m, st->current_prot, st->level, st->to_dmesg); |
202 | 222 | ||
203 | /* | 223 | /* |
204 | * We print markers for special areas of address space, | 224 | * We print markers for special areas of address space, |
@@ -207,7 +227,8 @@ static void note_page(struct seq_file *m, struct pg_state *st, | |||
207 | */ | 227 | */ |
208 | if (st->current_address >= st->marker[1].start_address) { | 228 | if (st->current_address >= st->marker[1].start_address) { |
209 | st->marker++; | 229 | st->marker++; |
210 | seq_printf(m, "---[ %s ]---\n", st->marker->name); | 230 | pt_dump_seq_printf(m, st->to_dmesg, "---[ %s ]---\n", |
231 | st->marker->name); | ||
211 | } | 232 | } |
212 | 233 | ||
213 | st->start_address = st->current_address; | 234 | st->start_address = st->current_address; |
@@ -296,7 +317,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, | |||
296 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) | 317 | #define pgd_none(a) pud_none(__pud(pgd_val(a))) |
297 | #endif | 318 | #endif |
298 | 319 | ||
299 | static void walk_pgd_level(struct seq_file *m) | 320 | void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd) |
300 | { | 321 | { |
301 | #ifdef CONFIG_X86_64 | 322 | #ifdef CONFIG_X86_64 |
302 | pgd_t *start = (pgd_t *) &init_level4_pgt; | 323 | pgd_t *start = (pgd_t *) &init_level4_pgt; |
@@ -304,9 +325,12 @@ static void walk_pgd_level(struct seq_file *m) | |||
304 | pgd_t *start = swapper_pg_dir; | 325 | pgd_t *start = swapper_pg_dir; |
305 | #endif | 326 | #endif |
306 | int i; | 327 | int i; |
307 | struct pg_state st; | 328 | struct pg_state st = {}; |
308 | 329 | ||
309 | memset(&st, 0, sizeof(st)); | 330 | if (pgd) { |
331 | start = pgd; | ||
332 | st.to_dmesg = true; | ||
333 | } | ||
310 | 334 | ||
311 | for (i = 0; i < PTRS_PER_PGD; i++) { | 335 | for (i = 0; i < PTRS_PER_PGD; i++) { |
312 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); | 336 | st.current_address = normalize_addr(i * PGD_LEVEL_MULT); |
@@ -331,7 +355,7 @@ static void walk_pgd_level(struct seq_file *m) | |||
331 | 355 | ||
332 | static int ptdump_show(struct seq_file *m, void *v) | 356 | static int ptdump_show(struct seq_file *m, void *v) |
333 | { | 357 | { |
334 | walk_pgd_level(m); | 358 | ptdump_walk_pgd_level(m, NULL); |
335 | return 0; | 359 | return 0; |
336 | } | 360 | } |
337 | 361 | ||
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9d591c895803..8e5722992677 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c | |||
@@ -584,8 +584,13 @@ show_fault_oops(struct pt_regs *regs, unsigned long error_code, | |||
584 | 584 | ||
585 | if (error_code & PF_INSTR) { | 585 | if (error_code & PF_INSTR) { |
586 | unsigned int level; | 586 | unsigned int level; |
587 | pgd_t *pgd; | ||
588 | pte_t *pte; | ||
587 | 589 | ||
588 | pte_t *pte = lookup_address(address, &level); | 590 | pgd = __va(read_cr3() & PHYSICAL_PAGE_MASK); |
591 | pgd += pgd_index(address); | ||
592 | |||
593 | pte = lookup_address_in_pgd(pgd, address, &level); | ||
589 | 594 | ||
590 | if (pte && pte_present(*pte) && !pte_exec(*pte)) | 595 | if (pte && pte_present(*pte) && !pte_exec(*pte)) |
591 | printk(nx_warning, from_kuid(&init_user_ns, current_uid())); | 596 | printk(nx_warning, from_kuid(&init_user_ns, current_uid())); |
@@ -1001,6 +1006,12 @@ static int fault_in_kernel_space(unsigned long address) | |||
1001 | 1006 | ||
1002 | static inline bool smap_violation(int error_code, struct pt_regs *regs) | 1007 | static inline bool smap_violation(int error_code, struct pt_regs *regs) |
1003 | { | 1008 | { |
1009 | if (!IS_ENABLED(CONFIG_X86_SMAP)) | ||
1010 | return false; | ||
1011 | |||
1012 | if (!static_cpu_has(X86_FEATURE_SMAP)) | ||
1013 | return false; | ||
1014 | |||
1004 | if (error_code & PF_USER) | 1015 | if (error_code & PF_USER) |
1005 | return false; | 1016 | return false; |
1006 | 1017 | ||
@@ -1014,13 +1025,17 @@ static inline bool smap_violation(int error_code, struct pt_regs *regs) | |||
1014 | * This routine handles page faults. It determines the address, | 1025 | * This routine handles page faults. It determines the address, |
1015 | * and the problem, and then passes it off to one of the appropriate | 1026 | * and the problem, and then passes it off to one of the appropriate |
1016 | * routines. | 1027 | * routines. |
1028 | * | ||
1029 | * This function must have noinline because both callers | ||
1030 | * {,trace_}do_page_fault() have notrace on. Having this an actual function | ||
1031 | * guarantees there's a function trace entry. | ||
1017 | */ | 1032 | */ |
1018 | static void __kprobes | 1033 | static void __kprobes noinline |
1019 | __do_page_fault(struct pt_regs *regs, unsigned long error_code) | 1034 | __do_page_fault(struct pt_regs *regs, unsigned long error_code, |
1035 | unsigned long address) | ||
1020 | { | 1036 | { |
1021 | struct vm_area_struct *vma; | 1037 | struct vm_area_struct *vma; |
1022 | struct task_struct *tsk; | 1038 | struct task_struct *tsk; |
1023 | unsigned long address; | ||
1024 | struct mm_struct *mm; | 1039 | struct mm_struct *mm; |
1025 | int fault; | 1040 | int fault; |
1026 | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; | 1041 | unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; |
@@ -1028,9 +1043,6 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1028 | tsk = current; | 1043 | tsk = current; |
1029 | mm = tsk->mm; | 1044 | mm = tsk->mm; |
1030 | 1045 | ||
1031 | /* Get the faulting address: */ | ||
1032 | address = read_cr2(); | ||
1033 | |||
1034 | /* | 1046 | /* |
1035 | * Detect and handle instructions that would cause a page fault for | 1047 | * Detect and handle instructions that would cause a page fault for |
1036 | * both a tracked kernel page and a userspace page. | 1048 | * both a tracked kernel page and a userspace page. |
@@ -1087,11 +1099,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code) | |||
1087 | if (unlikely(error_code & PF_RSVD)) | 1099 | if (unlikely(error_code & PF_RSVD)) |
1088 | pgtable_bad(regs, error_code, address); | 1100 | pgtable_bad(regs, error_code, address); |
1089 | 1101 | ||
1090 | if (static_cpu_has(X86_FEATURE_SMAP)) { | 1102 | if (unlikely(smap_violation(error_code, regs))) { |
1091 | if (unlikely(smap_violation(error_code, regs))) { | 1103 | bad_area_nosemaphore(regs, error_code, address); |
1092 | bad_area_nosemaphore(regs, error_code, address); | 1104 | return; |
1093 | return; | ||
1094 | } | ||
1095 | } | 1105 | } |
1096 | 1106 | ||
1097 | /* | 1107 | /* |
@@ -1244,32 +1254,50 @@ good_area: | |||
1244 | up_read(&mm->mmap_sem); | 1254 | up_read(&mm->mmap_sem); |
1245 | } | 1255 | } |
1246 | 1256 | ||
1247 | dotraplinkage void __kprobes | 1257 | dotraplinkage void __kprobes notrace |
1248 | do_page_fault(struct pt_regs *regs, unsigned long error_code) | 1258 | do_page_fault(struct pt_regs *regs, unsigned long error_code) |
1249 | { | 1259 | { |
1260 | unsigned long address = read_cr2(); /* Get the faulting address */ | ||
1250 | enum ctx_state prev_state; | 1261 | enum ctx_state prev_state; |
1251 | 1262 | ||
1263 | /* | ||
1264 | * We must have this function tagged with __kprobes, notrace and call | ||
1265 | * read_cr2() before calling anything else. To avoid calling any kind | ||
1266 | * of tracing machinery before we've observed the CR2 value. | ||
1267 | * | ||
1268 | * exception_{enter,exit}() contain all sorts of tracepoints. | ||
1269 | */ | ||
1270 | |||
1252 | prev_state = exception_enter(); | 1271 | prev_state = exception_enter(); |
1253 | __do_page_fault(regs, error_code); | 1272 | __do_page_fault(regs, error_code, address); |
1254 | exception_exit(prev_state); | 1273 | exception_exit(prev_state); |
1255 | } | 1274 | } |
1256 | 1275 | ||
1257 | static void trace_page_fault_entries(struct pt_regs *regs, | 1276 | #ifdef CONFIG_TRACING |
1277 | static void trace_page_fault_entries(unsigned long address, struct pt_regs *regs, | ||
1258 | unsigned long error_code) | 1278 | unsigned long error_code) |
1259 | { | 1279 | { |
1260 | if (user_mode(regs)) | 1280 | if (user_mode(regs)) |
1261 | trace_page_fault_user(read_cr2(), regs, error_code); | 1281 | trace_page_fault_user(address, regs, error_code); |
1262 | else | 1282 | else |
1263 | trace_page_fault_kernel(read_cr2(), regs, error_code); | 1283 | trace_page_fault_kernel(address, regs, error_code); |
1264 | } | 1284 | } |
1265 | 1285 | ||
1266 | dotraplinkage void __kprobes | 1286 | dotraplinkage void __kprobes notrace |
1267 | trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) | 1287 | trace_do_page_fault(struct pt_regs *regs, unsigned long error_code) |
1268 | { | 1288 | { |
1289 | /* | ||
1290 | * The exception_enter and tracepoint processing could | ||
1291 | * trigger another page faults (user space callchain | ||
1292 | * reading) and destroy the original cr2 value, so read | ||
1293 | * the faulting address now. | ||
1294 | */ | ||
1295 | unsigned long address = read_cr2(); | ||
1269 | enum ctx_state prev_state; | 1296 | enum ctx_state prev_state; |
1270 | 1297 | ||
1271 | prev_state = exception_enter(); | 1298 | prev_state = exception_enter(); |
1272 | trace_page_fault_entries(regs, error_code); | 1299 | trace_page_fault_entries(address, regs, error_code); |
1273 | __do_page_fault(regs, error_code); | 1300 | __do_page_fault(regs, error_code, address); |
1274 | exception_exit(prev_state); | 1301 | exception_exit(prev_state); |
1275 | } | 1302 | } |
1303 | #endif /* CONFIG_TRACING */ | ||
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0596e8e0cc19..207d9aef662d 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c | |||
@@ -108,8 +108,8 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, | |||
108 | 108 | ||
109 | static inline void get_head_page_multiple(struct page *page, int nr) | 109 | static inline void get_head_page_multiple(struct page *page, int nr) |
110 | { | 110 | { |
111 | VM_BUG_ON(page != compound_head(page)); | 111 | VM_BUG_ON_PAGE(page != compound_head(page), page); |
112 | VM_BUG_ON(page_count(page) == 0); | 112 | VM_BUG_ON_PAGE(page_count(page) == 0, page); |
113 | atomic_add(nr, &page->_count); | 113 | atomic_add(nr, &page->_count); |
114 | SetPageReferenced(page); | 114 | SetPageReferenced(page); |
115 | } | 115 | } |
@@ -135,7 +135,7 @@ static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, | |||
135 | head = pte_page(pte); | 135 | head = pte_page(pte); |
136 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); | 136 | page = head + ((addr & ~PMD_MASK) >> PAGE_SHIFT); |
137 | do { | 137 | do { |
138 | VM_BUG_ON(compound_head(page) != head); | 138 | VM_BUG_ON_PAGE(compound_head(page) != head, page); |
139 | pages[*nr] = page; | 139 | pages[*nr] = page; |
140 | if (PageTail(page)) | 140 | if (PageTail(page)) |
141 | get_huge_page_tail(page); | 141 | get_huge_page_tail(page); |
@@ -212,7 +212,7 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, | |||
212 | head = pte_page(pte); | 212 | head = pte_page(pte); |
213 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); | 213 | page = head + ((addr & ~PUD_MASK) >> PAGE_SHIFT); |
214 | do { | 214 | do { |
215 | VM_BUG_ON(compound_head(page) != head); | 215 | VM_BUG_ON_PAGE(compound_head(page) != head, page); |
216 | pages[*nr] = page; | 216 | pages[*nr] = page; |
217 | if (PageTail(page)) | 217 | if (PageTail(page)) |
218 | get_huge_page_tail(page); | 218 | get_huge_page_tail(page); |
diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 9d980d88b747..8c9f647ff9e1 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c | |||
@@ -87,9 +87,7 @@ int pmd_huge_support(void) | |||
87 | } | 87 | } |
88 | #endif | 88 | #endif |
89 | 89 | ||
90 | /* x86_64 also uses this file */ | 90 | #ifdef CONFIG_HUGETLB_PAGE |
91 | |||
92 | #ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA | ||
93 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | 91 | static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, |
94 | unsigned long addr, unsigned long len, | 92 | unsigned long addr, unsigned long len, |
95 | unsigned long pgoff, unsigned long flags) | 93 | unsigned long pgoff, unsigned long flags) |
@@ -99,7 +97,7 @@ static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file, | |||
99 | 97 | ||
100 | info.flags = 0; | 98 | info.flags = 0; |
101 | info.length = len; | 99 | info.length = len; |
102 | info.low_limit = TASK_UNMAPPED_BASE; | 100 | info.low_limit = current->mm->mmap_legacy_base; |
103 | info.high_limit = TASK_SIZE; | 101 | info.high_limit = TASK_SIZE; |
104 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); | 102 | info.align_mask = PAGE_MASK & ~huge_page_mask(h); |
105 | info.align_offset = 0; | 103 | info.align_offset = 0; |
@@ -172,8 +170,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr, | |||
172 | return hugetlb_get_unmapped_area_topdown(file, addr, len, | 170 | return hugetlb_get_unmapped_area_topdown(file, addr, len, |
173 | pgoff, flags); | 171 | pgoff, flags); |
174 | } | 172 | } |
175 | 173 | #endif /* CONFIG_HUGETLB_PAGE */ | |
176 | #endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/ | ||
177 | 174 | ||
178 | #ifdef CONFIG_X86_64 | 175 | #ifdef CONFIG_X86_64 |
179 | static __init int setup_hugepagesz(char *opt) | 176 | static __init int setup_hugepagesz(char *opt) |
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 4287f1ffba7e..e39504878aec 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c | |||
@@ -665,7 +665,7 @@ void __init initmem_init(void) | |||
665 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | 665 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; |
666 | #endif | 666 | #endif |
667 | 667 | ||
668 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); | 668 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); |
669 | sparse_memory_present_with_active_regions(0); | 669 | sparse_memory_present_with_active_regions(0); |
670 | 670 | ||
671 | #ifdef CONFIG_FLATMEM | 671 | #ifdef CONFIG_FLATMEM |
@@ -806,6 +806,9 @@ void __init mem_init(void) | |||
806 | BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); | 806 | BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); |
807 | #undef high_memory | 807 | #undef high_memory |
808 | #undef __FIXADDR_TOP | 808 | #undef __FIXADDR_TOP |
809 | #ifdef CONFIG_RANDOMIZE_BASE | ||
810 | BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE); | ||
811 | #endif | ||
809 | 812 | ||
810 | #ifdef CONFIG_HIGHMEM | 813 | #ifdef CONFIG_HIGHMEM |
811 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); | 814 | BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); |
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 104d56a9245f..f35c66c5959a 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c | |||
@@ -643,7 +643,7 @@ kernel_physical_mapping_init(unsigned long start, | |||
643 | #ifndef CONFIG_NUMA | 643 | #ifndef CONFIG_NUMA |
644 | void __init initmem_init(void) | 644 | void __init initmem_init(void) |
645 | { | 645 | { |
646 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, 0); | 646 | memblock_set_node(0, (phys_addr_t)ULLONG_MAX, &memblock.memory, 0); |
647 | } | 647 | } |
648 | #endif | 648 | #endif |
649 | 649 | ||
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 799580cabc78..597ac155c91c 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c | |||
@@ -328,17 +328,6 @@ void unxlate_dev_mem_ptr(unsigned long phys, void *addr) | |||
328 | return; | 328 | return; |
329 | } | 329 | } |
330 | 330 | ||
331 | static int __initdata early_ioremap_debug; | ||
332 | |||
333 | static int __init early_ioremap_debug_setup(char *str) | ||
334 | { | ||
335 | early_ioremap_debug = 1; | ||
336 | |||
337 | return 0; | ||
338 | } | ||
339 | early_param("early_ioremap_debug", early_ioremap_debug_setup); | ||
340 | |||
341 | static __initdata int after_paging_init; | ||
342 | static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; | 331 | static pte_t bm_pte[PAGE_SIZE/sizeof(pte_t)] __page_aligned_bss; |
343 | 332 | ||
344 | static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) | 333 | static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) |
@@ -362,18 +351,11 @@ bool __init is_early_ioremap_ptep(pte_t *ptep) | |||
362 | return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; | 351 | return ptep >= &bm_pte[0] && ptep < &bm_pte[PAGE_SIZE/sizeof(pte_t)]; |
363 | } | 352 | } |
364 | 353 | ||
365 | static unsigned long slot_virt[FIX_BTMAPS_SLOTS] __initdata; | ||
366 | |||
367 | void __init early_ioremap_init(void) | 354 | void __init early_ioremap_init(void) |
368 | { | 355 | { |
369 | pmd_t *pmd; | 356 | pmd_t *pmd; |
370 | int i; | ||
371 | 357 | ||
372 | if (early_ioremap_debug) | 358 | early_ioremap_setup(); |
373 | printk(KERN_INFO "early_ioremap_init()\n"); | ||
374 | |||
375 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) | ||
376 | slot_virt[i] = __fix_to_virt(FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*i); | ||
377 | 359 | ||
378 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); | 360 | pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); |
379 | memset(bm_pte, 0, sizeof(bm_pte)); | 361 | memset(bm_pte, 0, sizeof(bm_pte)); |
@@ -402,13 +384,8 @@ void __init early_ioremap_init(void) | |||
402 | } | 384 | } |
403 | } | 385 | } |
404 | 386 | ||
405 | void __init early_ioremap_reset(void) | 387 | void __init __early_set_fixmap(enum fixed_addresses idx, |
406 | { | 388 | phys_addr_t phys, pgprot_t flags) |
407 | after_paging_init = 1; | ||
408 | } | ||
409 | |||
410 | static void __init __early_set_fixmap(enum fixed_addresses idx, | ||
411 | phys_addr_t phys, pgprot_t flags) | ||
412 | { | 389 | { |
413 | unsigned long addr = __fix_to_virt(idx); | 390 | unsigned long addr = __fix_to_virt(idx); |
414 | pte_t *pte; | 391 | pte_t *pte; |
@@ -425,198 +402,3 @@ static void __init __early_set_fixmap(enum fixed_addresses idx, | |||
425 | pte_clear(&init_mm, addr, pte); | 402 | pte_clear(&init_mm, addr, pte); |
426 | __flush_tlb_one(addr); | 403 | __flush_tlb_one(addr); |
427 | } | 404 | } |
428 | |||
429 | static inline void __init early_set_fixmap(enum fixed_addresses idx, | ||
430 | phys_addr_t phys, pgprot_t prot) | ||
431 | { | ||
432 | if (after_paging_init) | ||
433 | __set_fixmap(idx, phys, prot); | ||
434 | else | ||
435 | __early_set_fixmap(idx, phys, prot); | ||
436 | } | ||
437 | |||
438 | static inline void __init early_clear_fixmap(enum fixed_addresses idx) | ||
439 | { | ||
440 | if (after_paging_init) | ||
441 | clear_fixmap(idx); | ||
442 | else | ||
443 | __early_set_fixmap(idx, 0, __pgprot(0)); | ||
444 | } | ||
445 | |||
446 | static void __iomem *prev_map[FIX_BTMAPS_SLOTS] __initdata; | ||
447 | static unsigned long prev_size[FIX_BTMAPS_SLOTS] __initdata; | ||
448 | |||
449 | void __init fixup_early_ioremap(void) | ||
450 | { | ||
451 | int i; | ||
452 | |||
453 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { | ||
454 | if (prev_map[i]) { | ||
455 | WARN_ON(1); | ||
456 | break; | ||
457 | } | ||
458 | } | ||
459 | |||
460 | early_ioremap_init(); | ||
461 | } | ||
462 | |||
463 | static int __init check_early_ioremap_leak(void) | ||
464 | { | ||
465 | int count = 0; | ||
466 | int i; | ||
467 | |||
468 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) | ||
469 | if (prev_map[i]) | ||
470 | count++; | ||
471 | |||
472 | if (!count) | ||
473 | return 0; | ||
474 | WARN(1, KERN_WARNING | ||
475 | "Debug warning: early ioremap leak of %d areas detected.\n", | ||
476 | count); | ||
477 | printk(KERN_WARNING | ||
478 | "please boot with early_ioremap_debug and report the dmesg.\n"); | ||
479 | |||
480 | return 1; | ||
481 | } | ||
482 | late_initcall(check_early_ioremap_leak); | ||
483 | |||
484 | static void __init __iomem * | ||
485 | __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) | ||
486 | { | ||
487 | unsigned long offset; | ||
488 | resource_size_t last_addr; | ||
489 | unsigned int nrpages; | ||
490 | enum fixed_addresses idx; | ||
491 | int i, slot; | ||
492 | |||
493 | WARN_ON(system_state != SYSTEM_BOOTING); | ||
494 | |||
495 | slot = -1; | ||
496 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { | ||
497 | if (!prev_map[i]) { | ||
498 | slot = i; | ||
499 | break; | ||
500 | } | ||
501 | } | ||
502 | |||
503 | if (slot < 0) { | ||
504 | printk(KERN_INFO "%s(%08llx, %08lx) not found slot\n", | ||
505 | __func__, (u64)phys_addr, size); | ||
506 | WARN_ON(1); | ||
507 | return NULL; | ||
508 | } | ||
509 | |||
510 | if (early_ioremap_debug) { | ||
511 | printk(KERN_INFO "%s(%08llx, %08lx) [%d] => ", | ||
512 | __func__, (u64)phys_addr, size, slot); | ||
513 | dump_stack(); | ||
514 | } | ||
515 | |||
516 | /* Don't allow wraparound or zero size */ | ||
517 | last_addr = phys_addr + size - 1; | ||
518 | if (!size || last_addr < phys_addr) { | ||
519 | WARN_ON(1); | ||
520 | return NULL; | ||
521 | } | ||
522 | |||
523 | prev_size[slot] = size; | ||
524 | /* | ||
525 | * Mappings have to be page-aligned | ||
526 | */ | ||
527 | offset = phys_addr & ~PAGE_MASK; | ||
528 | phys_addr &= PAGE_MASK; | ||
529 | size = PAGE_ALIGN(last_addr + 1) - phys_addr; | ||
530 | |||
531 | /* | ||
532 | * Mappings have to fit in the FIX_BTMAP area. | ||
533 | */ | ||
534 | nrpages = size >> PAGE_SHIFT; | ||
535 | if (nrpages > NR_FIX_BTMAPS) { | ||
536 | WARN_ON(1); | ||
537 | return NULL; | ||
538 | } | ||
539 | |||
540 | /* | ||
541 | * Ok, go for it.. | ||
542 | */ | ||
543 | idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; | ||
544 | while (nrpages > 0) { | ||
545 | early_set_fixmap(idx, phys_addr, prot); | ||
546 | phys_addr += PAGE_SIZE; | ||
547 | --idx; | ||
548 | --nrpages; | ||
549 | } | ||
550 | if (early_ioremap_debug) | ||
551 | printk(KERN_CONT "%08lx + %08lx\n", offset, slot_virt[slot]); | ||
552 | |||
553 | prev_map[slot] = (void __iomem *)(offset + slot_virt[slot]); | ||
554 | return prev_map[slot]; | ||
555 | } | ||
556 | |||
557 | /* Remap an IO device */ | ||
558 | void __init __iomem * | ||
559 | early_ioremap(resource_size_t phys_addr, unsigned long size) | ||
560 | { | ||
561 | return __early_ioremap(phys_addr, size, PAGE_KERNEL_IO); | ||
562 | } | ||
563 | |||
564 | /* Remap memory */ | ||
565 | void __init __iomem * | ||
566 | early_memremap(resource_size_t phys_addr, unsigned long size) | ||
567 | { | ||
568 | return __early_ioremap(phys_addr, size, PAGE_KERNEL); | ||
569 | } | ||
570 | |||
571 | void __init early_iounmap(void __iomem *addr, unsigned long size) | ||
572 | { | ||
573 | unsigned long virt_addr; | ||
574 | unsigned long offset; | ||
575 | unsigned int nrpages; | ||
576 | enum fixed_addresses idx; | ||
577 | int i, slot; | ||
578 | |||
579 | slot = -1; | ||
580 | for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { | ||
581 | if (prev_map[i] == addr) { | ||
582 | slot = i; | ||
583 | break; | ||
584 | } | ||
585 | } | ||
586 | |||
587 | if (slot < 0) { | ||
588 | printk(KERN_INFO "early_iounmap(%p, %08lx) not found slot\n", | ||
589 | addr, size); | ||
590 | WARN_ON(1); | ||
591 | return; | ||
592 | } | ||
593 | |||
594 | if (prev_size[slot] != size) { | ||
595 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n", | ||
596 | addr, size, slot, prev_size[slot]); | ||
597 | WARN_ON(1); | ||
598 | return; | ||
599 | } | ||
600 | |||
601 | if (early_ioremap_debug) { | ||
602 | printk(KERN_INFO "early_iounmap(%p, %08lx) [%d]\n", addr, | ||
603 | size, slot); | ||
604 | dump_stack(); | ||
605 | } | ||
606 | |||
607 | virt_addr = (unsigned long)addr; | ||
608 | if (virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)) { | ||
609 | WARN_ON(1); | ||
610 | return; | ||
611 | } | ||
612 | offset = virt_addr & ~PAGE_MASK; | ||
613 | nrpages = PAGE_ALIGN(offset + size) >> PAGE_SHIFT; | ||
614 | |||
615 | idx = FIX_BTMAP_BEGIN - NR_FIX_BTMAPS*slot; | ||
616 | while (nrpages > 0) { | ||
617 | early_clear_fixmap(idx); | ||
618 | --idx; | ||
619 | --nrpages; | ||
620 | } | ||
621 | prev_map[slot] = NULL; | ||
622 | } | ||
diff --git a/arch/x86/mm/kmemcheck/kmemcheck.c b/arch/x86/mm/kmemcheck/kmemcheck.c index d87dd6d042d6..dd89a13f1051 100644 --- a/arch/x86/mm/kmemcheck/kmemcheck.c +++ b/arch/x86/mm/kmemcheck/kmemcheck.c | |||
@@ -78,10 +78,16 @@ early_initcall(kmemcheck_init); | |||
78 | */ | 78 | */ |
79 | static int __init param_kmemcheck(char *str) | 79 | static int __init param_kmemcheck(char *str) |
80 | { | 80 | { |
81 | int val; | ||
82 | int ret; | ||
83 | |||
81 | if (!str) | 84 | if (!str) |
82 | return -EINVAL; | 85 | return -EINVAL; |
83 | 86 | ||
84 | sscanf(str, "%d", &kmemcheck_enabled); | 87 | ret = kstrtoint(str, 0, &val); |
88 | if (ret) | ||
89 | return ret; | ||
90 | kmemcheck_enabled = val; | ||
85 | return 0; | 91 | return 0; |
86 | } | 92 | } |
87 | 93 | ||
diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index e5d5e2ce9f77..637ab34ed632 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c | |||
@@ -11,7 +11,6 @@ | |||
11 | #include <linux/rculist.h> | 11 | #include <linux/rculist.h> |
12 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
13 | #include <linux/hash.h> | 13 | #include <linux/hash.h> |
14 | #include <linux/init.h> | ||
15 | #include <linux/module.h> | 14 | #include <linux/module.h> |
16 | #include <linux/kernel.h> | 15 | #include <linux/kernel.h> |
17 | #include <linux/uaccess.h> | 16 | #include <linux/uaccess.h> |
diff --git a/arch/x86/mm/memtest.c b/arch/x86/mm/memtest.c index 8dabbed409ee..1e9da795767a 100644 --- a/arch/x86/mm/memtest.c +++ b/arch/x86/mm/memtest.c | |||
@@ -74,7 +74,7 @@ static void __init do_one_pass(u64 pattern, u64 start, u64 end) | |||
74 | u64 i; | 74 | u64 i; |
75 | phys_addr_t this_start, this_end; | 75 | phys_addr_t this_start, this_end; |
76 | 76 | ||
77 | for_each_free_mem_range(i, MAX_NUMNODES, &this_start, &this_end, NULL) { | 77 | for_each_free_mem_range(i, NUMA_NO_NODE, &this_start, &this_end, NULL) { |
78 | this_start = clamp_t(phys_addr_t, this_start, start, end); | 78 | this_start = clamp_t(phys_addr_t, this_start, start, end); |
79 | this_end = clamp_t(phys_addr_t, this_end, start, end); | 79 | this_end = clamp_t(phys_addr_t, this_end, start, end); |
80 | if (this_start < this_end) { | 80 | if (this_start < this_end) { |
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 24aec58d6afd..1d045f9c390f 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c | |||
@@ -211,9 +211,13 @@ static void __init setup_node_data(int nid, u64 start, u64 end) | |||
211 | */ | 211 | */ |
212 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); | 212 | nd_pa = memblock_alloc_nid(nd_size, SMP_CACHE_BYTES, nid); |
213 | if (!nd_pa) { | 213 | if (!nd_pa) { |
214 | pr_err("Cannot find %zu bytes in node %d\n", | 214 | nd_pa = __memblock_alloc_base(nd_size, SMP_CACHE_BYTES, |
215 | nd_size, nid); | 215 | MEMBLOCK_ALLOC_ACCESSIBLE); |
216 | return; | 216 | if (!nd_pa) { |
217 | pr_err("Cannot find %zu bytes in node %d\n", | ||
218 | nd_size, nid); | ||
219 | return; | ||
220 | } | ||
217 | } | 221 | } |
218 | nd = __va(nd_pa); | 222 | nd = __va(nd_pa); |
219 | 223 | ||
@@ -487,7 +491,8 @@ static int __init numa_register_memblks(struct numa_meminfo *mi) | |||
487 | 491 | ||
488 | for (i = 0; i < mi->nr_blks; i++) { | 492 | for (i = 0; i < mi->nr_blks; i++) { |
489 | struct numa_memblk *mb = &mi->blk[i]; | 493 | struct numa_memblk *mb = &mi->blk[i]; |
490 | memblock_set_node(mb->start, mb->end - mb->start, mb->nid); | 494 | memblock_set_node(mb->start, mb->end - mb->start, |
495 | &memblock.memory, mb->nid); | ||
491 | } | 496 | } |
492 | 497 | ||
493 | /* | 498 | /* |
@@ -549,6 +554,41 @@ static void __init numa_init_array(void) | |||
549 | } | 554 | } |
550 | } | 555 | } |
551 | 556 | ||
557 | static void __init numa_clear_kernel_node_hotplug(void) | ||
558 | { | ||
559 | int i, nid; | ||
560 | nodemask_t numa_kernel_nodes = NODE_MASK_NONE; | ||
561 | unsigned long start, end; | ||
562 | struct memblock_type *type = &memblock.reserved; | ||
563 | |||
564 | /* | ||
565 | * At this time, all memory regions reserved by memblock are | ||
566 | * used by the kernel. Set the nid in memblock.reserved will | ||
567 | * mark out all the nodes the kernel resides in. | ||
568 | */ | ||
569 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | ||
570 | struct numa_memblk *mb = &numa_meminfo.blk[i]; | ||
571 | memblock_set_node(mb->start, mb->end - mb->start, | ||
572 | &memblock.reserved, mb->nid); | ||
573 | } | ||
574 | |||
575 | /* Mark all kernel nodes. */ | ||
576 | for (i = 0; i < type->cnt; i++) | ||
577 | node_set(type->regions[i].nid, numa_kernel_nodes); | ||
578 | |||
579 | /* Clear MEMBLOCK_HOTPLUG flag for memory in kernel nodes. */ | ||
580 | for (i = 0; i < numa_meminfo.nr_blks; i++) { | ||
581 | nid = numa_meminfo.blk[i].nid; | ||
582 | if (!node_isset(nid, numa_kernel_nodes)) | ||
583 | continue; | ||
584 | |||
585 | start = numa_meminfo.blk[i].start; | ||
586 | end = numa_meminfo.blk[i].end; | ||
587 | |||
588 | memblock_clear_hotplug(start, end - start); | ||
589 | } | ||
590 | } | ||
591 | |||
552 | static int __init numa_init(int (*init_func)(void)) | 592 | static int __init numa_init(int (*init_func)(void)) |
553 | { | 593 | { |
554 | int i; | 594 | int i; |
@@ -561,7 +601,12 @@ static int __init numa_init(int (*init_func)(void)) | |||
561 | nodes_clear(node_possible_map); | 601 | nodes_clear(node_possible_map); |
562 | nodes_clear(node_online_map); | 602 | nodes_clear(node_online_map); |
563 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); | 603 | memset(&numa_meminfo, 0, sizeof(numa_meminfo)); |
564 | WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES)); | 604 | WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.memory, |
605 | MAX_NUMNODES)); | ||
606 | WARN_ON(memblock_set_node(0, ULLONG_MAX, &memblock.reserved, | ||
607 | MAX_NUMNODES)); | ||
608 | /* In case that parsing SRAT failed. */ | ||
609 | WARN_ON(memblock_clear_hotplug(0, ULLONG_MAX)); | ||
565 | numa_reset_distance(); | 610 | numa_reset_distance(); |
566 | 611 | ||
567 | ret = init_func(); | 612 | ret = init_func(); |
@@ -597,6 +642,16 @@ static int __init numa_init(int (*init_func)(void)) | |||
597 | numa_clear_node(i); | 642 | numa_clear_node(i); |
598 | } | 643 | } |
599 | numa_init_array(); | 644 | numa_init_array(); |
645 | |||
646 | /* | ||
647 | * At very early time, the kernel have to use some memory such as | ||
648 | * loading the kernel image. We cannot prevent this anyway. So any | ||
649 | * node the kernel resides in should be un-hotpluggable. | ||
650 | * | ||
651 | * And when we come here, numa_init() won't fail. | ||
652 | */ | ||
653 | numa_clear_kernel_node_hotplug(); | ||
654 | |||
600 | return 0; | 655 | return 0; |
601 | } | 656 | } |
602 | 657 | ||
@@ -632,10 +687,6 @@ static int __init dummy_numa_init(void) | |||
632 | void __init x86_numa_init(void) | 687 | void __init x86_numa_init(void) |
633 | { | 688 | { |
634 | if (!numa_off) { | 689 | if (!numa_off) { |
635 | #ifdef CONFIG_X86_NUMAQ | ||
636 | if (!numa_init(numaq_numa_init)) | ||
637 | return; | ||
638 | #endif | ||
639 | #ifdef CONFIG_ACPI_NUMA | 690 | #ifdef CONFIG_ACPI_NUMA |
640 | if (!numa_init(x86_acpi_numa_init)) | 691 | if (!numa_init(x86_acpi_numa_init)) |
641 | return; | 692 | return; |
diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 0342d27ca798..47b6436e41c2 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c | |||
@@ -52,6 +52,8 @@ void memory_present(int nid, unsigned long start, unsigned long end) | |||
52 | nid, start, end); | 52 | nid, start, end); |
53 | printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); | 53 | printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); |
54 | printk(KERN_DEBUG " "); | 54 | printk(KERN_DEBUG " "); |
55 | start = round_down(start, PAGES_PER_SECTION); | ||
56 | end = round_up(end, PAGES_PER_SECTION); | ||
55 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { | 57 | for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { |
56 | physnode_map[pfn / PAGES_PER_SECTION] = nid; | 58 | physnode_map[pfn / PAGES_PER_SECTION] = nid; |
57 | printk(KERN_CONT "%lx ", pfn); | 59 | printk(KERN_CONT "%lx ", pfn); |
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index d0b1773d9d2e..461bc8289024 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c | |||
@@ -8,7 +8,6 @@ | |||
8 | #include <linux/kthread.h> | 8 | #include <linux/kthread.h> |
9 | #include <linux/random.h> | 9 | #include <linux/random.h> |
10 | #include <linux/kernel.h> | 10 | #include <linux/kernel.h> |
11 | #include <linux/init.h> | ||
12 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
13 | 12 | ||
14 | #include <asm/cacheflush.h> | 13 | #include <asm/cacheflush.h> |
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index bb32480c2d71..ae242a7c11c7 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c | |||
@@ -30,6 +30,7 @@ | |||
30 | */ | 30 | */ |
31 | struct cpa_data { | 31 | struct cpa_data { |
32 | unsigned long *vaddr; | 32 | unsigned long *vaddr; |
33 | pgd_t *pgd; | ||
33 | pgprot_t mask_set; | 34 | pgprot_t mask_set; |
34 | pgprot_t mask_clr; | 35 | pgprot_t mask_clr; |
35 | int numpages; | 36 | int numpages; |
@@ -125,8 +126,8 @@ within(unsigned long addr, unsigned long start, unsigned long end) | |||
125 | * @vaddr: virtual start address | 126 | * @vaddr: virtual start address |
126 | * @size: number of bytes to flush | 127 | * @size: number of bytes to flush |
127 | * | 128 | * |
128 | * clflush is an unordered instruction which needs fencing with mfence | 129 | * clflushopt is an unordered instruction which needs fencing with mfence or |
129 | * to avoid ordering issues. | 130 | * sfence to avoid ordering issues. |
130 | */ | 131 | */ |
131 | void clflush_cache_range(void *vaddr, unsigned int size) | 132 | void clflush_cache_range(void *vaddr, unsigned int size) |
132 | { | 133 | { |
@@ -135,11 +136,11 @@ void clflush_cache_range(void *vaddr, unsigned int size) | |||
135 | mb(); | 136 | mb(); |
136 | 137 | ||
137 | for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) | 138 | for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) |
138 | clflush(vaddr); | 139 | clflushopt(vaddr); |
139 | /* | 140 | /* |
140 | * Flush any possible final partial cacheline: | 141 | * Flush any possible final partial cacheline: |
141 | */ | 142 | */ |
142 | clflush(vend); | 143 | clflushopt(vend); |
143 | 144 | ||
144 | mb(); | 145 | mb(); |
145 | } | 146 | } |
@@ -323,16 +324,12 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, | |||
323 | } | 324 | } |
324 | 325 | ||
325 | /* | 326 | /* |
326 | * Lookup the page table entry for a virtual address. Return a pointer | 327 | * Lookup the page table entry for a virtual address in a specific pgd. |
327 | * to the entry and the level of the mapping. | 328 | * Return a pointer to the entry and the level of the mapping. |
328 | * | ||
329 | * Note: We return pud and pmd either when the entry is marked large | ||
330 | * or when the present bit is not set. Otherwise we would return a | ||
331 | * pointer to a nonexisting mapping. | ||
332 | */ | 329 | */ |
333 | pte_t *lookup_address(unsigned long address, unsigned int *level) | 330 | pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address, |
331 | unsigned int *level) | ||
334 | { | 332 | { |
335 | pgd_t *pgd = pgd_offset_k(address); | ||
336 | pud_t *pud; | 333 | pud_t *pud; |
337 | pmd_t *pmd; | 334 | pmd_t *pmd; |
338 | 335 | ||
@@ -361,8 +358,31 @@ pte_t *lookup_address(unsigned long address, unsigned int *level) | |||
361 | 358 | ||
362 | return pte_offset_kernel(pmd, address); | 359 | return pte_offset_kernel(pmd, address); |
363 | } | 360 | } |
361 | |||
362 | /* | ||
363 | * Lookup the page table entry for a virtual address. Return a pointer | ||
364 | * to the entry and the level of the mapping. | ||
365 | * | ||
366 | * Note: We return pud and pmd either when the entry is marked large | ||
367 | * or when the present bit is not set. Otherwise we would return a | ||
368 | * pointer to a nonexisting mapping. | ||
369 | */ | ||
370 | pte_t *lookup_address(unsigned long address, unsigned int *level) | ||
371 | { | ||
372 | return lookup_address_in_pgd(pgd_offset_k(address), address, level); | ||
373 | } | ||
364 | EXPORT_SYMBOL_GPL(lookup_address); | 374 | EXPORT_SYMBOL_GPL(lookup_address); |
365 | 375 | ||
376 | static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address, | ||
377 | unsigned int *level) | ||
378 | { | ||
379 | if (cpa->pgd) | ||
380 | return lookup_address_in_pgd(cpa->pgd + pgd_index(address), | ||
381 | address, level); | ||
382 | |||
383 | return lookup_address(address, level); | ||
384 | } | ||
385 | |||
366 | /* | 386 | /* |
367 | * This is necessary because __pa() does not work on some | 387 | * This is necessary because __pa() does not work on some |
368 | * kinds of memory, like vmalloc() or the alloc_remap() | 388 | * kinds of memory, like vmalloc() or the alloc_remap() |
@@ -437,7 +457,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, | |||
437 | * Check for races, another CPU might have split this page | 457 | * Check for races, another CPU might have split this page |
438 | * up already: | 458 | * up already: |
439 | */ | 459 | */ |
440 | tmp = lookup_address(address, &level); | 460 | tmp = _lookup_address_cpa(cpa, address, &level); |
441 | if (tmp != kpte) | 461 | if (tmp != kpte) |
442 | goto out_unlock; | 462 | goto out_unlock; |
443 | 463 | ||
@@ -543,7 +563,8 @@ out_unlock: | |||
543 | } | 563 | } |
544 | 564 | ||
545 | static int | 565 | static int |
546 | __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | 566 | __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address, |
567 | struct page *base) | ||
547 | { | 568 | { |
548 | pte_t *pbase = (pte_t *)page_address(base); | 569 | pte_t *pbase = (pte_t *)page_address(base); |
549 | unsigned long pfn, pfninc = 1; | 570 | unsigned long pfn, pfninc = 1; |
@@ -556,7 +577,7 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | |||
556 | * Check for races, another CPU might have split this page | 577 | * Check for races, another CPU might have split this page |
557 | * up for us already: | 578 | * up for us already: |
558 | */ | 579 | */ |
559 | tmp = lookup_address(address, &level); | 580 | tmp = _lookup_address_cpa(cpa, address, &level); |
560 | if (tmp != kpte) { | 581 | if (tmp != kpte) { |
561 | spin_unlock(&pgd_lock); | 582 | spin_unlock(&pgd_lock); |
562 | return 1; | 583 | return 1; |
@@ -632,7 +653,8 @@ __split_large_page(pte_t *kpte, unsigned long address, struct page *base) | |||
632 | return 0; | 653 | return 0; |
633 | } | 654 | } |
634 | 655 | ||
635 | static int split_large_page(pte_t *kpte, unsigned long address) | 656 | static int split_large_page(struct cpa_data *cpa, pte_t *kpte, |
657 | unsigned long address) | ||
636 | { | 658 | { |
637 | struct page *base; | 659 | struct page *base; |
638 | 660 | ||
@@ -644,15 +666,402 @@ static int split_large_page(pte_t *kpte, unsigned long address) | |||
644 | if (!base) | 666 | if (!base) |
645 | return -ENOMEM; | 667 | return -ENOMEM; |
646 | 668 | ||
647 | if (__split_large_page(kpte, address, base)) | 669 | if (__split_large_page(cpa, kpte, address, base)) |
648 | __free_page(base); | 670 | __free_page(base); |
649 | 671 | ||
650 | return 0; | 672 | return 0; |
651 | } | 673 | } |
652 | 674 | ||
675 | static bool try_to_free_pte_page(pte_t *pte) | ||
676 | { | ||
677 | int i; | ||
678 | |||
679 | for (i = 0; i < PTRS_PER_PTE; i++) | ||
680 | if (!pte_none(pte[i])) | ||
681 | return false; | ||
682 | |||
683 | free_page((unsigned long)pte); | ||
684 | return true; | ||
685 | } | ||
686 | |||
687 | static bool try_to_free_pmd_page(pmd_t *pmd) | ||
688 | { | ||
689 | int i; | ||
690 | |||
691 | for (i = 0; i < PTRS_PER_PMD; i++) | ||
692 | if (!pmd_none(pmd[i])) | ||
693 | return false; | ||
694 | |||
695 | free_page((unsigned long)pmd); | ||
696 | return true; | ||
697 | } | ||
698 | |||
699 | static bool try_to_free_pud_page(pud_t *pud) | ||
700 | { | ||
701 | int i; | ||
702 | |||
703 | for (i = 0; i < PTRS_PER_PUD; i++) | ||
704 | if (!pud_none(pud[i])) | ||
705 | return false; | ||
706 | |||
707 | free_page((unsigned long)pud); | ||
708 | return true; | ||
709 | } | ||
710 | |||
711 | static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end) | ||
712 | { | ||
713 | pte_t *pte = pte_offset_kernel(pmd, start); | ||
714 | |||
715 | while (start < end) { | ||
716 | set_pte(pte, __pte(0)); | ||
717 | |||
718 | start += PAGE_SIZE; | ||
719 | pte++; | ||
720 | } | ||
721 | |||
722 | if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) { | ||
723 | pmd_clear(pmd); | ||
724 | return true; | ||
725 | } | ||
726 | return false; | ||
727 | } | ||
728 | |||
729 | static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd, | ||
730 | unsigned long start, unsigned long end) | ||
731 | { | ||
732 | if (unmap_pte_range(pmd, start, end)) | ||
733 | if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) | ||
734 | pud_clear(pud); | ||
735 | } | ||
736 | |||
737 | static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end) | ||
738 | { | ||
739 | pmd_t *pmd = pmd_offset(pud, start); | ||
740 | |||
741 | /* | ||
742 | * Not on a 2MB page boundary? | ||
743 | */ | ||
744 | if (start & (PMD_SIZE - 1)) { | ||
745 | unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; | ||
746 | unsigned long pre_end = min_t(unsigned long, end, next_page); | ||
747 | |||
748 | __unmap_pmd_range(pud, pmd, start, pre_end); | ||
749 | |||
750 | start = pre_end; | ||
751 | pmd++; | ||
752 | } | ||
753 | |||
754 | /* | ||
755 | * Try to unmap in 2M chunks. | ||
756 | */ | ||
757 | while (end - start >= PMD_SIZE) { | ||
758 | if (pmd_large(*pmd)) | ||
759 | pmd_clear(pmd); | ||
760 | else | ||
761 | __unmap_pmd_range(pud, pmd, start, start + PMD_SIZE); | ||
762 | |||
763 | start += PMD_SIZE; | ||
764 | pmd++; | ||
765 | } | ||
766 | |||
767 | /* | ||
768 | * 4K leftovers? | ||
769 | */ | ||
770 | if (start < end) | ||
771 | return __unmap_pmd_range(pud, pmd, start, end); | ||
772 | |||
773 | /* | ||
774 | * Try again to free the PMD page if haven't succeeded above. | ||
775 | */ | ||
776 | if (!pud_none(*pud)) | ||
777 | if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud))) | ||
778 | pud_clear(pud); | ||
779 | } | ||
780 | |||
781 | static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end) | ||
782 | { | ||
783 | pud_t *pud = pud_offset(pgd, start); | ||
784 | |||
785 | /* | ||
786 | * Not on a GB page boundary? | ||
787 | */ | ||
788 | if (start & (PUD_SIZE - 1)) { | ||
789 | unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; | ||
790 | unsigned long pre_end = min_t(unsigned long, end, next_page); | ||
791 | |||
792 | unmap_pmd_range(pud, start, pre_end); | ||
793 | |||
794 | start = pre_end; | ||
795 | pud++; | ||
796 | } | ||
797 | |||
798 | /* | ||
799 | * Try to unmap in 1G chunks? | ||
800 | */ | ||
801 | while (end - start >= PUD_SIZE) { | ||
802 | |||
803 | if (pud_large(*pud)) | ||
804 | pud_clear(pud); | ||
805 | else | ||
806 | unmap_pmd_range(pud, start, start + PUD_SIZE); | ||
807 | |||
808 | start += PUD_SIZE; | ||
809 | pud++; | ||
810 | } | ||
811 | |||
812 | /* | ||
813 | * 2M leftovers? | ||
814 | */ | ||
815 | if (start < end) | ||
816 | unmap_pmd_range(pud, start, end); | ||
817 | |||
818 | /* | ||
819 | * No need to try to free the PUD page because we'll free it in | ||
820 | * populate_pgd's error path | ||
821 | */ | ||
822 | } | ||
823 | |||
824 | static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end) | ||
825 | { | ||
826 | pgd_t *pgd_entry = root + pgd_index(addr); | ||
827 | |||
828 | unmap_pud_range(pgd_entry, addr, end); | ||
829 | |||
830 | if (try_to_free_pud_page((pud_t *)pgd_page_vaddr(*pgd_entry))) | ||
831 | pgd_clear(pgd_entry); | ||
832 | } | ||
833 | |||
834 | static int alloc_pte_page(pmd_t *pmd) | ||
835 | { | ||
836 | pte_t *pte = (pte_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
837 | if (!pte) | ||
838 | return -1; | ||
839 | |||
840 | set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); | ||
841 | return 0; | ||
842 | } | ||
843 | |||
844 | static int alloc_pmd_page(pud_t *pud) | ||
845 | { | ||
846 | pmd_t *pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
847 | if (!pmd) | ||
848 | return -1; | ||
849 | |||
850 | set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); | ||
851 | return 0; | ||
852 | } | ||
853 | |||
854 | static void populate_pte(struct cpa_data *cpa, | ||
855 | unsigned long start, unsigned long end, | ||
856 | unsigned num_pages, pmd_t *pmd, pgprot_t pgprot) | ||
857 | { | ||
858 | pte_t *pte; | ||
859 | |||
860 | pte = pte_offset_kernel(pmd, start); | ||
861 | |||
862 | while (num_pages-- && start < end) { | ||
863 | |||
864 | /* deal with the NX bit */ | ||
865 | if (!(pgprot_val(pgprot) & _PAGE_NX)) | ||
866 | cpa->pfn &= ~_PAGE_NX; | ||
867 | |||
868 | set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); | ||
869 | |||
870 | start += PAGE_SIZE; | ||
871 | cpa->pfn += PAGE_SIZE; | ||
872 | pte++; | ||
873 | } | ||
874 | } | ||
875 | |||
876 | static int populate_pmd(struct cpa_data *cpa, | ||
877 | unsigned long start, unsigned long end, | ||
878 | unsigned num_pages, pud_t *pud, pgprot_t pgprot) | ||
879 | { | ||
880 | unsigned int cur_pages = 0; | ||
881 | pmd_t *pmd; | ||
882 | |||
883 | /* | ||
884 | * Not on a 2M boundary? | ||
885 | */ | ||
886 | if (start & (PMD_SIZE - 1)) { | ||
887 | unsigned long pre_end = start + (num_pages << PAGE_SHIFT); | ||
888 | unsigned long next_page = (start + PMD_SIZE) & PMD_MASK; | ||
889 | |||
890 | pre_end = min_t(unsigned long, pre_end, next_page); | ||
891 | cur_pages = (pre_end - start) >> PAGE_SHIFT; | ||
892 | cur_pages = min_t(unsigned int, num_pages, cur_pages); | ||
893 | |||
894 | /* | ||
895 | * Need a PTE page? | ||
896 | */ | ||
897 | pmd = pmd_offset(pud, start); | ||
898 | if (pmd_none(*pmd)) | ||
899 | if (alloc_pte_page(pmd)) | ||
900 | return -1; | ||
901 | |||
902 | populate_pte(cpa, start, pre_end, cur_pages, pmd, pgprot); | ||
903 | |||
904 | start = pre_end; | ||
905 | } | ||
906 | |||
907 | /* | ||
908 | * We mapped them all? | ||
909 | */ | ||
910 | if (num_pages == cur_pages) | ||
911 | return cur_pages; | ||
912 | |||
913 | while (end - start >= PMD_SIZE) { | ||
914 | |||
915 | /* | ||
916 | * We cannot use a 1G page so allocate a PMD page if needed. | ||
917 | */ | ||
918 | if (pud_none(*pud)) | ||
919 | if (alloc_pmd_page(pud)) | ||
920 | return -1; | ||
921 | |||
922 | pmd = pmd_offset(pud, start); | ||
923 | |||
924 | set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); | ||
925 | |||
926 | start += PMD_SIZE; | ||
927 | cpa->pfn += PMD_SIZE; | ||
928 | cur_pages += PMD_SIZE >> PAGE_SHIFT; | ||
929 | } | ||
930 | |||
931 | /* | ||
932 | * Map trailing 4K pages. | ||
933 | */ | ||
934 | if (start < end) { | ||
935 | pmd = pmd_offset(pud, start); | ||
936 | if (pmd_none(*pmd)) | ||
937 | if (alloc_pte_page(pmd)) | ||
938 | return -1; | ||
939 | |||
940 | populate_pte(cpa, start, end, num_pages - cur_pages, | ||
941 | pmd, pgprot); | ||
942 | } | ||
943 | return num_pages; | ||
944 | } | ||
945 | |||
946 | static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, | ||
947 | pgprot_t pgprot) | ||
948 | { | ||
949 | pud_t *pud; | ||
950 | unsigned long end; | ||
951 | int cur_pages = 0; | ||
952 | |||
953 | end = start + (cpa->numpages << PAGE_SHIFT); | ||
954 | |||
955 | /* | ||
956 | * Not on a Gb page boundary? => map everything up to it with | ||
957 | * smaller pages. | ||
958 | */ | ||
959 | if (start & (PUD_SIZE - 1)) { | ||
960 | unsigned long pre_end; | ||
961 | unsigned long next_page = (start + PUD_SIZE) & PUD_MASK; | ||
962 | |||
963 | pre_end = min_t(unsigned long, end, next_page); | ||
964 | cur_pages = (pre_end - start) >> PAGE_SHIFT; | ||
965 | cur_pages = min_t(int, (int)cpa->numpages, cur_pages); | ||
966 | |||
967 | pud = pud_offset(pgd, start); | ||
968 | |||
969 | /* | ||
970 | * Need a PMD page? | ||
971 | */ | ||
972 | if (pud_none(*pud)) | ||
973 | if (alloc_pmd_page(pud)) | ||
974 | return -1; | ||
975 | |||
976 | cur_pages = populate_pmd(cpa, start, pre_end, cur_pages, | ||
977 | pud, pgprot); | ||
978 | if (cur_pages < 0) | ||
979 | return cur_pages; | ||
980 | |||
981 | start = pre_end; | ||
982 | } | ||
983 | |||
984 | /* We mapped them all? */ | ||
985 | if (cpa->numpages == cur_pages) | ||
986 | return cur_pages; | ||
987 | |||
988 | pud = pud_offset(pgd, start); | ||
989 | |||
990 | /* | ||
991 | * Map everything starting from the Gb boundary, possibly with 1G pages | ||
992 | */ | ||
993 | while (end - start >= PUD_SIZE) { | ||
994 | set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | massage_pgprot(pgprot))); | ||
995 | |||
996 | start += PUD_SIZE; | ||
997 | cpa->pfn += PUD_SIZE; | ||
998 | cur_pages += PUD_SIZE >> PAGE_SHIFT; | ||
999 | pud++; | ||
1000 | } | ||
1001 | |||
1002 | /* Map trailing leftover */ | ||
1003 | if (start < end) { | ||
1004 | int tmp; | ||
1005 | |||
1006 | pud = pud_offset(pgd, start); | ||
1007 | if (pud_none(*pud)) | ||
1008 | if (alloc_pmd_page(pud)) | ||
1009 | return -1; | ||
1010 | |||
1011 | tmp = populate_pmd(cpa, start, end, cpa->numpages - cur_pages, | ||
1012 | pud, pgprot); | ||
1013 | if (tmp < 0) | ||
1014 | return cur_pages; | ||
1015 | |||
1016 | cur_pages += tmp; | ||
1017 | } | ||
1018 | return cur_pages; | ||
1019 | } | ||
1020 | |||
1021 | /* | ||
1022 | * Restrictions for kernel page table do not necessarily apply when mapping in | ||
1023 | * an alternate PGD. | ||
1024 | */ | ||
1025 | static int populate_pgd(struct cpa_data *cpa, unsigned long addr) | ||
1026 | { | ||
1027 | pgprot_t pgprot = __pgprot(_KERNPG_TABLE); | ||
1028 | pud_t *pud = NULL; /* shut up gcc */ | ||
1029 | pgd_t *pgd_entry; | ||
1030 | int ret; | ||
1031 | |||
1032 | pgd_entry = cpa->pgd + pgd_index(addr); | ||
1033 | |||
1034 | /* | ||
1035 | * Allocate a PUD page and hand it down for mapping. | ||
1036 | */ | ||
1037 | if (pgd_none(*pgd_entry)) { | ||
1038 | pud = (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_NOTRACK); | ||
1039 | if (!pud) | ||
1040 | return -1; | ||
1041 | |||
1042 | set_pgd(pgd_entry, __pgd(__pa(pud) | _KERNPG_TABLE)); | ||
1043 | } | ||
1044 | |||
1045 | pgprot_val(pgprot) &= ~pgprot_val(cpa->mask_clr); | ||
1046 | pgprot_val(pgprot) |= pgprot_val(cpa->mask_set); | ||
1047 | |||
1048 | ret = populate_pud(cpa, addr, pgd_entry, pgprot); | ||
1049 | if (ret < 0) { | ||
1050 | unmap_pgd_range(cpa->pgd, addr, | ||
1051 | addr + (cpa->numpages << PAGE_SHIFT)); | ||
1052 | return ret; | ||
1053 | } | ||
1054 | |||
1055 | cpa->numpages = ret; | ||
1056 | return 0; | ||
1057 | } | ||
1058 | |||
653 | static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, | 1059 | static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr, |
654 | int primary) | 1060 | int primary) |
655 | { | 1061 | { |
1062 | if (cpa->pgd) | ||
1063 | return populate_pgd(cpa, vaddr); | ||
1064 | |||
656 | /* | 1065 | /* |
657 | * Ignore all non primary paths. | 1066 | * Ignore all non primary paths. |
658 | */ | 1067 | */ |
@@ -697,7 +1106,7 @@ static int __change_page_attr(struct cpa_data *cpa, int primary) | |||
697 | else | 1106 | else |
698 | address = *cpa->vaddr; | 1107 | address = *cpa->vaddr; |
699 | repeat: | 1108 | repeat: |
700 | kpte = lookup_address(address, &level); | 1109 | kpte = _lookup_address_cpa(cpa, address, &level); |
701 | if (!kpte) | 1110 | if (!kpte) |
702 | return __cpa_process_fault(cpa, address, primary); | 1111 | return __cpa_process_fault(cpa, address, primary); |
703 | 1112 | ||
@@ -761,7 +1170,7 @@ repeat: | |||
761 | /* | 1170 | /* |
762 | * We have to split the large page: | 1171 | * We have to split the large page: |
763 | */ | 1172 | */ |
764 | err = split_large_page(kpte, address); | 1173 | err = split_large_page(cpa, kpte, address); |
765 | if (!err) { | 1174 | if (!err) { |
766 | /* | 1175 | /* |
767 | * Do a global flush tlb after splitting the large page | 1176 | * Do a global flush tlb after splitting the large page |
@@ -910,6 +1319,8 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
910 | int ret, cache, checkalias; | 1319 | int ret, cache, checkalias; |
911 | unsigned long baddr = 0; | 1320 | unsigned long baddr = 0; |
912 | 1321 | ||
1322 | memset(&cpa, 0, sizeof(cpa)); | ||
1323 | |||
913 | /* | 1324 | /* |
914 | * Check, if we are requested to change a not supported | 1325 | * Check, if we are requested to change a not supported |
915 | * feature: | 1326 | * feature: |
@@ -982,10 +1393,10 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages, | |||
982 | cache = cache_attr(mask_set); | 1393 | cache = cache_attr(mask_set); |
983 | 1394 | ||
984 | /* | 1395 | /* |
985 | * On success we use clflush, when the CPU supports it to | 1396 | * On success we use CLFLUSH, when the CPU supports it to |
986 | * avoid the wbindv. If the CPU does not support it and in the | 1397 | * avoid the WBINVD. If the CPU does not support it and in the |
987 | * error case we fall back to cpa_flush_all (which uses | 1398 | * error case we fall back to cpa_flush_all (which uses |
988 | * wbindv): | 1399 | * WBINVD): |
989 | */ | 1400 | */ |
990 | if (!ret && cpu_has_clflush) { | 1401 | if (!ret && cpu_has_clflush) { |
991 | if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { | 1402 | if (cpa.flags & (CPA_PAGES_ARRAY | CPA_ARRAY)) { |
@@ -1356,6 +1767,7 @@ static int __set_pages_p(struct page *page, int numpages) | |||
1356 | { | 1767 | { |
1357 | unsigned long tempaddr = (unsigned long) page_address(page); | 1768 | unsigned long tempaddr = (unsigned long) page_address(page); |
1358 | struct cpa_data cpa = { .vaddr = &tempaddr, | 1769 | struct cpa_data cpa = { .vaddr = &tempaddr, |
1770 | .pgd = NULL, | ||
1359 | .numpages = numpages, | 1771 | .numpages = numpages, |
1360 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1772 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
1361 | .mask_clr = __pgprot(0), | 1773 | .mask_clr = __pgprot(0), |
@@ -1374,6 +1786,7 @@ static int __set_pages_np(struct page *page, int numpages) | |||
1374 | { | 1786 | { |
1375 | unsigned long tempaddr = (unsigned long) page_address(page); | 1787 | unsigned long tempaddr = (unsigned long) page_address(page); |
1376 | struct cpa_data cpa = { .vaddr = &tempaddr, | 1788 | struct cpa_data cpa = { .vaddr = &tempaddr, |
1789 | .pgd = NULL, | ||
1377 | .numpages = numpages, | 1790 | .numpages = numpages, |
1378 | .mask_set = __pgprot(0), | 1791 | .mask_set = __pgprot(0), |
1379 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), | 1792 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW), |
@@ -1434,6 +1847,42 @@ bool kernel_page_present(struct page *page) | |||
1434 | 1847 | ||
1435 | #endif /* CONFIG_DEBUG_PAGEALLOC */ | 1848 | #endif /* CONFIG_DEBUG_PAGEALLOC */ |
1436 | 1849 | ||
1850 | int kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address, | ||
1851 | unsigned numpages, unsigned long page_flags) | ||
1852 | { | ||
1853 | int retval = -EINVAL; | ||
1854 | |||
1855 | struct cpa_data cpa = { | ||
1856 | .vaddr = &address, | ||
1857 | .pfn = pfn, | ||
1858 | .pgd = pgd, | ||
1859 | .numpages = numpages, | ||
1860 | .mask_set = __pgprot(0), | ||
1861 | .mask_clr = __pgprot(0), | ||
1862 | .flags = 0, | ||
1863 | }; | ||
1864 | |||
1865 | if (!(__supported_pte_mask & _PAGE_NX)) | ||
1866 | goto out; | ||
1867 | |||
1868 | if (!(page_flags & _PAGE_NX)) | ||
1869 | cpa.mask_clr = __pgprot(_PAGE_NX); | ||
1870 | |||
1871 | cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags); | ||
1872 | |||
1873 | retval = __change_page_attr_set_clr(&cpa, 0); | ||
1874 | __flush_tlb_all(); | ||
1875 | |||
1876 | out: | ||
1877 | return retval; | ||
1878 | } | ||
1879 | |||
1880 | void kernel_unmap_pages_in_pgd(pgd_t *root, unsigned long address, | ||
1881 | unsigned numpages) | ||
1882 | { | ||
1883 | unmap_pgd_range(root, address, address + (numpages << PAGE_SHIFT)); | ||
1884 | } | ||
1885 | |||
1437 | /* | 1886 | /* |
1438 | * The testcases use internal knowledge of the implementation that shouldn't | 1887 | * The testcases use internal knowledge of the implementation that shouldn't |
1439 | * be exposed to the rest of the kernel. Include these directly here. | 1888 | * be exposed to the rest of the kernel. Include these directly here. |
diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index a69bcb8c7621..4dd8cf652579 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c | |||
@@ -127,7 +127,7 @@ static int __init parse_reservetop(char *arg) | |||
127 | 127 | ||
128 | address = memparse(arg, &arg); | 128 | address = memparse(arg, &arg); |
129 | reserve_top_address(address); | 129 | reserve_top_address(address); |
130 | fixup_early_ioremap(); | 130 | early_ioremap_init(); |
131 | return 0; | 131 | return 0; |
132 | } | 132 | } |
133 | early_param("reservetop", parse_reservetop); | 133 | early_param("reservetop", parse_reservetop); |
diff --git a/arch/x86/mm/srat.c b/arch/x86/mm/srat.c index 266ca912f62e..66338a60aa6e 100644 --- a/arch/x86/mm/srat.c +++ b/arch/x86/mm/srat.c | |||
@@ -42,15 +42,31 @@ static __init inline int srat_disabled(void) | |||
42 | return acpi_numa < 0; | 42 | return acpi_numa < 0; |
43 | } | 43 | } |
44 | 44 | ||
45 | /* Callback for SLIT parsing */ | 45 | /* |
46 | * Callback for SLIT parsing. pxm_to_node() returns NUMA_NO_NODE for | ||
47 | * I/O localities since SRAT does not list them. I/O localities are | ||
48 | * not supported at this point. | ||
49 | */ | ||
46 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) | 50 | void __init acpi_numa_slit_init(struct acpi_table_slit *slit) |
47 | { | 51 | { |
48 | int i, j; | 52 | int i, j; |
49 | 53 | ||
50 | for (i = 0; i < slit->locality_count; i++) | 54 | for (i = 0; i < slit->locality_count; i++) { |
51 | for (j = 0; j < slit->locality_count; j++) | 55 | const int from_node = pxm_to_node(i); |
52 | numa_set_distance(pxm_to_node(i), pxm_to_node(j), | 56 | |
57 | if (from_node == NUMA_NO_NODE) | ||
58 | continue; | ||
59 | |||
60 | for (j = 0; j < slit->locality_count; j++) { | ||
61 | const int to_node = pxm_to_node(j); | ||
62 | |||
63 | if (to_node == NUMA_NO_NODE) | ||
64 | continue; | ||
65 | |||
66 | numa_set_distance(from_node, to_node, | ||
53 | slit->entry[slit->locality_count * i + j]); | 67 | slit->entry[slit->locality_count * i + j]); |
68 | } | ||
69 | } | ||
54 | } | 70 | } |
55 | 71 | ||
56 | /* Callback for Proximity Domain -> x2APIC mapping */ | 72 | /* Callback for Proximity Domain -> x2APIC mapping */ |
@@ -181,6 +197,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) | |||
181 | (unsigned long long) start, (unsigned long long) end - 1, | 197 | (unsigned long long) start, (unsigned long long) end - 1, |
182 | hotpluggable ? " hotplug" : ""); | 198 | hotpluggable ? " hotplug" : ""); |
183 | 199 | ||
200 | /* Mark hotplug range in memblock. */ | ||
201 | if (hotpluggable && memblock_mark_hotplug(start, ma->length)) | ||
202 | pr_warn("SRAT: Failed to mark hotplug range [mem %#010Lx-%#010Lx] in memblock\n", | ||
203 | (unsigned long long)start, (unsigned long long)end - 1); | ||
204 | |||
184 | return 0; | 205 | return 0; |
185 | out_err_bad_srat: | 206 | out_err_bad_srat: |
186 | bad_srat(); | 207 | bad_srat(); |
diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index ae699b3bbac8..dd8dda167a24 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c | |||
@@ -103,7 +103,7 @@ static void flush_tlb_func(void *info) | |||
103 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) | 103 | if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm)) |
104 | return; | 104 | return; |
105 | 105 | ||
106 | count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | 106 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); |
107 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { | 107 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) { |
108 | if (f->flush_end == TLB_FLUSH_ALL) | 108 | if (f->flush_end == TLB_FLUSH_ALL) |
109 | local_flush_tlb(); | 109 | local_flush_tlb(); |
@@ -131,7 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask, | |||
131 | info.flush_start = start; | 131 | info.flush_start = start; |
132 | info.flush_end = end; | 132 | info.flush_end = end; |
133 | 133 | ||
134 | count_vm_event(NR_TLB_REMOTE_FLUSH); | 134 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); |
135 | if (is_uv_system()) { | 135 | if (is_uv_system()) { |
136 | unsigned int cpu; | 136 | unsigned int cpu; |
137 | 137 | ||
@@ -151,44 +151,19 @@ void flush_tlb_current_task(void) | |||
151 | 151 | ||
152 | preempt_disable(); | 152 | preempt_disable(); |
153 | 153 | ||
154 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | 154 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
155 | local_flush_tlb(); | 155 | local_flush_tlb(); |
156 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) | 156 | if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids) |
157 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); | 157 | flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL); |
158 | preempt_enable(); | 158 | preempt_enable(); |
159 | } | 159 | } |
160 | 160 | ||
161 | /* | ||
162 | * It can find out the THP large page, or | ||
163 | * HUGETLB page in tlb_flush when THP disabled | ||
164 | */ | ||
165 | static inline unsigned long has_large_page(struct mm_struct *mm, | ||
166 | unsigned long start, unsigned long end) | ||
167 | { | ||
168 | pgd_t *pgd; | ||
169 | pud_t *pud; | ||
170 | pmd_t *pmd; | ||
171 | unsigned long addr = ALIGN(start, HPAGE_SIZE); | ||
172 | for (; addr < end; addr += HPAGE_SIZE) { | ||
173 | pgd = pgd_offset(mm, addr); | ||
174 | if (likely(!pgd_none(*pgd))) { | ||
175 | pud = pud_offset(pgd, addr); | ||
176 | if (likely(!pud_none(*pud))) { | ||
177 | pmd = pmd_offset(pud, addr); | ||
178 | if (likely(!pmd_none(*pmd))) | ||
179 | if (pmd_large(*pmd)) | ||
180 | return addr; | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | return 0; | ||
185 | } | ||
186 | |||
187 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | 161 | void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, |
188 | unsigned long end, unsigned long vmflag) | 162 | unsigned long end, unsigned long vmflag) |
189 | { | 163 | { |
190 | unsigned long addr; | 164 | unsigned long addr; |
191 | unsigned act_entries, tlb_entries = 0; | 165 | unsigned act_entries, tlb_entries = 0; |
166 | unsigned long nr_base_pages; | ||
192 | 167 | ||
193 | preempt_disable(); | 168 | preempt_disable(); |
194 | if (current->active_mm != mm) | 169 | if (current->active_mm != mm) |
@@ -210,21 +185,20 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start, | |||
210 | tlb_entries = tlb_lli_4k[ENTRIES]; | 185 | tlb_entries = tlb_lli_4k[ENTRIES]; |
211 | else | 186 | else |
212 | tlb_entries = tlb_lld_4k[ENTRIES]; | 187 | tlb_entries = tlb_lld_4k[ENTRIES]; |
188 | |||
213 | /* Assume all of TLB entries was occupied by this task */ | 189 | /* Assume all of TLB entries was occupied by this task */ |
214 | act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm; | 190 | act_entries = tlb_entries >> tlb_flushall_shift; |
191 | act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm; | ||
192 | nr_base_pages = (end - start) >> PAGE_SHIFT; | ||
215 | 193 | ||
216 | /* tlb_flushall_shift is on balance point, details in commit log */ | 194 | /* tlb_flushall_shift is on balance point, details in commit log */ |
217 | if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) { | 195 | if (nr_base_pages > act_entries) { |
218 | count_vm_event(NR_TLB_LOCAL_FLUSH_ALL); | 196 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); |
219 | local_flush_tlb(); | 197 | local_flush_tlb(); |
220 | } else { | 198 | } else { |
221 | if (has_large_page(mm, start, end)) { | ||
222 | local_flush_tlb(); | ||
223 | goto flush_all; | ||
224 | } | ||
225 | /* flush range by one by one 'invlpg' */ | 199 | /* flush range by one by one 'invlpg' */ |
226 | for (addr = start; addr < end; addr += PAGE_SIZE) { | 200 | for (addr = start; addr < end; addr += PAGE_SIZE) { |
227 | count_vm_event(NR_TLB_LOCAL_FLUSH_ONE); | 201 | count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE); |
228 | __flush_tlb_single(addr); | 202 | __flush_tlb_single(addr); |
229 | } | 203 | } |
230 | 204 | ||
@@ -262,7 +236,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start) | |||
262 | 236 | ||
263 | static void do_flush_tlb_all(void *info) | 237 | static void do_flush_tlb_all(void *info) |
264 | { | 238 | { |
265 | count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED); | 239 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED); |
266 | __flush_tlb_all(); | 240 | __flush_tlb_all(); |
267 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) | 241 | if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY) |
268 | leave_mm(smp_processor_id()); | 242 | leave_mm(smp_processor_id()); |
@@ -270,7 +244,7 @@ static void do_flush_tlb_all(void *info) | |||
270 | 244 | ||
271 | void flush_tlb_all(void) | 245 | void flush_tlb_all(void) |
272 | { | 246 | { |
273 | count_vm_event(NR_TLB_REMOTE_FLUSH); | 247 | count_vm_tlb_event(NR_TLB_REMOTE_FLUSH); |
274 | on_each_cpu(do_flush_tlb_all, NULL, 1); | 248 | on_each_cpu(do_flush_tlb_all, NULL, 1); |
275 | } | 249 | } |
276 | 250 | ||
diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S index 877b9a1b2152..01495755701b 100644 --- a/arch/x86/net/bpf_jit.S +++ b/arch/x86/net/bpf_jit.S | |||
@@ -140,7 +140,7 @@ bpf_slow_path_byte_msh: | |||
140 | push %r9; \ | 140 | push %r9; \ |
141 | push SKBDATA; \ | 141 | push SKBDATA; \ |
142 | /* rsi already has offset */ \ | 142 | /* rsi already has offset */ \ |
143 | mov $SIZE,%ecx; /* size */ \ | 143 | mov $SIZE,%edx; /* size */ \ |
144 | call bpf_internal_load_pointer_neg_helper; \ | 144 | call bpf_internal_load_pointer_neg_helper; \ |
145 | test %rax,%rax; \ | 145 | test %rax,%rax; \ |
146 | pop SKBDATA; \ | 146 | pop SKBDATA; \ |
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4ed75dd81d05..dc017735bb91 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c | |||
@@ -553,13 +553,13 @@ void bpf_jit_compile(struct sk_filter *fp) | |||
553 | } | 553 | } |
554 | break; | 554 | break; |
555 | case BPF_S_ANC_RXHASH: | 555 | case BPF_S_ANC_RXHASH: |
556 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, rxhash) != 4); | 556 | BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, hash) != 4); |
557 | if (is_imm8(offsetof(struct sk_buff, rxhash))) { | 557 | if (is_imm8(offsetof(struct sk_buff, hash))) { |
558 | /* mov off8(%rdi),%eax */ | 558 | /* mov off8(%rdi),%eax */ |
559 | EMIT3(0x8b, 0x47, offsetof(struct sk_buff, rxhash)); | 559 | EMIT3(0x8b, 0x47, offsetof(struct sk_buff, hash)); |
560 | } else { | 560 | } else { |
561 | EMIT2(0x8b, 0x87); | 561 | EMIT2(0x8b, 0x87); |
562 | EMIT(offsetof(struct sk_buff, rxhash), 4); | 562 | EMIT(offsetof(struct sk_buff, hash), 4); |
563 | } | 563 | } |
564 | break; | 564 | break; |
565 | case BPF_S_ANC_QUEUE: | 565 | case BPF_S_ANC_QUEUE: |
@@ -772,6 +772,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf] - addrs[i]; | |||
772 | bpf_flush_icache(header, image + proglen); | 772 | bpf_flush_icache(header, image + proglen); |
773 | set_memory_ro((unsigned long)header, header->pages); | 773 | set_memory_ro((unsigned long)header, header->pages); |
774 | fp->bpf_func = (void *)image; | 774 | fp->bpf_func = (void *)image; |
775 | fp->jited = 1; | ||
775 | } | 776 | } |
776 | out: | 777 | out: |
777 | kfree(addrs); | 778 | kfree(addrs); |
@@ -791,7 +792,7 @@ static void bpf_jit_free_deferred(struct work_struct *work) | |||
791 | 792 | ||
792 | void bpf_jit_free(struct sk_filter *fp) | 793 | void bpf_jit_free(struct sk_filter *fp) |
793 | { | 794 | { |
794 | if (fp->bpf_func != sk_run_filter) { | 795 | if (fp->jited) { |
795 | INIT_WORK(&fp->work, bpf_jit_free_deferred); | 796 | INIT_WORK(&fp->work, bpf_jit_free_deferred); |
796 | schedule_work(&fp->work); | 797 | schedule_work(&fp->work); |
797 | } else { | 798 | } else { |
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 6890d8498e0b..379e8bd0deea 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c | |||
@@ -494,14 +494,19 @@ static int nmi_setup(void) | |||
494 | if (err) | 494 | if (err) |
495 | goto fail; | 495 | goto fail; |
496 | 496 | ||
497 | cpu_notifier_register_begin(); | ||
498 | |||
499 | /* Use get/put_online_cpus() to protect 'nmi_enabled' */ | ||
497 | get_online_cpus(); | 500 | get_online_cpus(); |
498 | register_cpu_notifier(&oprofile_cpu_nb); | ||
499 | nmi_enabled = 1; | 501 | nmi_enabled = 1; |
500 | /* make nmi_enabled visible to the nmi handler: */ | 502 | /* make nmi_enabled visible to the nmi handler: */ |
501 | smp_mb(); | 503 | smp_mb(); |
502 | on_each_cpu(nmi_cpu_setup, NULL, 1); | 504 | on_each_cpu(nmi_cpu_setup, NULL, 1); |
505 | __register_cpu_notifier(&oprofile_cpu_nb); | ||
503 | put_online_cpus(); | 506 | put_online_cpus(); |
504 | 507 | ||
508 | cpu_notifier_register_done(); | ||
509 | |||
505 | return 0; | 510 | return 0; |
506 | fail: | 511 | fail: |
507 | free_msrs(); | 512 | free_msrs(); |
@@ -512,12 +517,18 @@ static void nmi_shutdown(void) | |||
512 | { | 517 | { |
513 | struct op_msrs *msrs; | 518 | struct op_msrs *msrs; |
514 | 519 | ||
520 | cpu_notifier_register_begin(); | ||
521 | |||
522 | /* Use get/put_online_cpus() to protect 'nmi_enabled' & 'ctr_running' */ | ||
515 | get_online_cpus(); | 523 | get_online_cpus(); |
516 | unregister_cpu_notifier(&oprofile_cpu_nb); | ||
517 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); | 524 | on_each_cpu(nmi_cpu_shutdown, NULL, 1); |
518 | nmi_enabled = 0; | 525 | nmi_enabled = 0; |
519 | ctr_running = 0; | 526 | ctr_running = 0; |
527 | __unregister_cpu_notifier(&oprofile_cpu_nb); | ||
520 | put_online_cpus(); | 528 | put_online_cpus(); |
529 | |||
530 | cpu_notifier_register_done(); | ||
531 | |||
521 | /* make variables visible to the nmi handler: */ | 532 | /* make variables visible to the nmi handler: */ |
522 | smp_mb(); | 533 | smp_mb(); |
523 | unregister_nmi_handler(NMI_LOCAL, "oprofile"); | 534 | unregister_nmi_handler(NMI_LOCAL, "oprofile"); |
diff --git a/arch/x86/pci/Makefile b/arch/x86/pci/Makefile index e063eed0f912..5c6fc3577a49 100644 --- a/arch/x86/pci/Makefile +++ b/arch/x86/pci/Makefile | |||
@@ -13,9 +13,6 @@ obj-y += legacy.o irq.o | |||
13 | 13 | ||
14 | obj-$(CONFIG_STA2X11) += sta2x11-fixup.o | 14 | obj-$(CONFIG_STA2X11) += sta2x11-fixup.o |
15 | 15 | ||
16 | obj-$(CONFIG_X86_VISWS) += visws.o | ||
17 | |||
18 | obj-$(CONFIG_X86_NUMAQ) += numaq_32.o | ||
19 | obj-$(CONFIG_X86_NUMACHIP) += numachip.o | 16 | obj-$(CONFIG_X86_NUMACHIP) += numachip.o |
20 | 17 | ||
21 | obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o | 18 | obj-$(CONFIG_X86_INTEL_MID) += intel_mid_pci.o |
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 4f25ec077552..01edac6c5e18 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c | |||
@@ -218,9 +218,8 @@ static void teardown_mcfg_map(struct pci_root_info *info) | |||
218 | } | 218 | } |
219 | #endif | 219 | #endif |
220 | 220 | ||
221 | static acpi_status | 221 | static acpi_status resource_to_addr(struct acpi_resource *resource, |
222 | resource_to_addr(struct acpi_resource *resource, | 222 | struct acpi_resource_address64 *addr) |
223 | struct acpi_resource_address64 *addr) | ||
224 | { | 223 | { |
225 | acpi_status status; | 224 | acpi_status status; |
226 | struct acpi_resource_memory24 *memory24; | 225 | struct acpi_resource_memory24 *memory24; |
@@ -265,8 +264,7 @@ resource_to_addr(struct acpi_resource *resource, | |||
265 | return AE_ERROR; | 264 | return AE_ERROR; |
266 | } | 265 | } |
267 | 266 | ||
268 | static acpi_status | 267 | static acpi_status count_resource(struct acpi_resource *acpi_res, void *data) |
269 | count_resource(struct acpi_resource *acpi_res, void *data) | ||
270 | { | 268 | { |
271 | struct pci_root_info *info = data; | 269 | struct pci_root_info *info = data; |
272 | struct acpi_resource_address64 addr; | 270 | struct acpi_resource_address64 addr; |
@@ -278,8 +276,7 @@ count_resource(struct acpi_resource *acpi_res, void *data) | |||
278 | return AE_OK; | 276 | return AE_OK; |
279 | } | 277 | } |
280 | 278 | ||
281 | static acpi_status | 279 | static acpi_status setup_resource(struct acpi_resource *acpi_res, void *data) |
282 | setup_resource(struct acpi_resource *acpi_res, void *data) | ||
283 | { | 280 | { |
284 | struct pci_root_info *info = data; | 281 | struct pci_root_info *info = data; |
285 | struct resource *res; | 282 | struct resource *res; |
@@ -435,9 +432,9 @@ static void release_pci_root_info(struct pci_host_bridge *bridge) | |||
435 | __release_pci_root_info(info); | 432 | __release_pci_root_info(info); |
436 | } | 433 | } |
437 | 434 | ||
438 | static void | 435 | static void probe_pci_root_info(struct pci_root_info *info, |
439 | probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, | 436 | struct acpi_device *device, |
440 | int busnum, int domain) | 437 | int busnum, int domain) |
441 | { | 438 | { |
442 | size_t size; | 439 | size_t size; |
443 | 440 | ||
@@ -473,16 +470,13 @@ probe_pci_root_info(struct pci_root_info *info, struct acpi_device *device, | |||
473 | struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | 470 | struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) |
474 | { | 471 | { |
475 | struct acpi_device *device = root->device; | 472 | struct acpi_device *device = root->device; |
476 | struct pci_root_info *info = NULL; | 473 | struct pci_root_info *info; |
477 | int domain = root->segment; | 474 | int domain = root->segment; |
478 | int busnum = root->secondary.start; | 475 | int busnum = root->secondary.start; |
479 | LIST_HEAD(resources); | 476 | LIST_HEAD(resources); |
480 | struct pci_bus *bus = NULL; | 477 | struct pci_bus *bus; |
481 | struct pci_sysdata *sd; | 478 | struct pci_sysdata *sd; |
482 | int node; | 479 | int node; |
483 | #ifdef CONFIG_ACPI_NUMA | ||
484 | int pxm; | ||
485 | #endif | ||
486 | 480 | ||
487 | if (pci_ignore_seg) | 481 | if (pci_ignore_seg) |
488 | domain = 0; | 482 | domain = 0; |
@@ -494,19 +488,12 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | |||
494 | return NULL; | 488 | return NULL; |
495 | } | 489 | } |
496 | 490 | ||
497 | node = -1; | 491 | node = acpi_get_node(device->handle); |
498 | #ifdef CONFIG_ACPI_NUMA | 492 | if (node == NUMA_NO_NODE) |
499 | pxm = acpi_get_pxm(device->handle); | 493 | node = x86_pci_root_bus_node(busnum); |
500 | if (pxm >= 0) | ||
501 | node = pxm_to_node(pxm); | ||
502 | if (node != -1) | ||
503 | set_mp_bus_to_node(busnum, node); | ||
504 | else | ||
505 | #endif | ||
506 | node = get_mp_bus_to_node(busnum); | ||
507 | 494 | ||
508 | if (node != -1 && !node_online(node)) | 495 | if (node != NUMA_NO_NODE && !node_online(node)) |
509 | node = -1; | 496 | node = NUMA_NO_NODE; |
510 | 497 | ||
511 | info = kzalloc(sizeof(*info), GFP_KERNEL); | 498 | info = kzalloc(sizeof(*info), GFP_KERNEL); |
512 | if (!info) { | 499 | if (!info) { |
@@ -519,15 +506,12 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | |||
519 | sd->domain = domain; | 506 | sd->domain = domain; |
520 | sd->node = node; | 507 | sd->node = node; |
521 | sd->companion = device; | 508 | sd->companion = device; |
522 | /* | 509 | |
523 | * Maybe the desired pci bus has been already scanned. In such case | ||
524 | * it is unnecessary to scan the pci bus with the given domain,busnum. | ||
525 | */ | ||
526 | bus = pci_find_bus(domain, busnum); | 510 | bus = pci_find_bus(domain, busnum); |
527 | if (bus) { | 511 | if (bus) { |
528 | /* | 512 | /* |
529 | * If the desired bus exits, the content of bus->sysdata will | 513 | * If the desired bus has been scanned already, replace |
530 | * be replaced by sd. | 514 | * its bus->sysdata. |
531 | */ | 515 | */ |
532 | memcpy(bus->sysdata, sd, sizeof(*sd)); | 516 | memcpy(bus->sysdata, sd, sizeof(*sd)); |
533 | kfree(info); | 517 | kfree(info); |
@@ -572,15 +556,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) | |||
572 | pcie_bus_configure_settings(child); | 556 | pcie_bus_configure_settings(child); |
573 | } | 557 | } |
574 | 558 | ||
575 | if (bus && node != -1) { | 559 | if (bus && node != NUMA_NO_NODE) |
576 | #ifdef CONFIG_ACPI_NUMA | ||
577 | if (pxm >= 0) | ||
578 | dev_printk(KERN_DEBUG, &bus->dev, | ||
579 | "on NUMA node %d (pxm %d)\n", node, pxm); | ||
580 | #else | ||
581 | dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); | 560 | dev_printk(KERN_DEBUG, &bus->dev, "on NUMA node %d\n", node); |
582 | #endif | ||
583 | } | ||
584 | 561 | ||
585 | return bus; | 562 | return bus; |
586 | } | 563 | } |
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index a48be98e9ded..e88f4c53d7f6 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c | |||
@@ -44,15 +44,6 @@ static struct pci_root_info __init *find_pci_root_info(int node, int link) | |||
44 | return NULL; | 44 | return NULL; |
45 | } | 45 | } |
46 | 46 | ||
47 | static void __init set_mp_bus_range_to_node(int min_bus, int max_bus, int node) | ||
48 | { | ||
49 | #ifdef CONFIG_NUMA | ||
50 | int j; | ||
51 | |||
52 | for (j = min_bus; j <= max_bus; j++) | ||
53 | set_mp_bus_to_node(j, node); | ||
54 | #endif | ||
55 | } | ||
56 | /** | 47 | /** |
57 | * early_fill_mp_bus_to_node() | 48 | * early_fill_mp_bus_to_node() |
58 | * called before pcibios_scan_root and pci_scan_bus | 49 | * called before pcibios_scan_root and pci_scan_bus |
@@ -117,7 +108,6 @@ static int __init early_fill_mp_bus_info(void) | |||
117 | min_bus = (reg >> 16) & 0xff; | 108 | min_bus = (reg >> 16) & 0xff; |
118 | max_bus = (reg >> 24) & 0xff; | 109 | max_bus = (reg >> 24) & 0xff; |
119 | node = (reg >> 4) & 0x07; | 110 | node = (reg >> 4) & 0x07; |
120 | set_mp_bus_range_to_node(min_bus, max_bus, node); | ||
121 | link = (reg >> 8) & 0x03; | 111 | link = (reg >> 8) & 0x03; |
122 | 112 | ||
123 | info = alloc_pci_root_info(min_bus, max_bus, node, link); | 113 | info = alloc_pci_root_info(min_bus, max_bus, node, link); |
@@ -380,10 +370,13 @@ static int __init pci_io_ecs_init(void) | |||
380 | if (early_pci_allowed()) | 370 | if (early_pci_allowed()) |
381 | pci_enable_pci_io_ecs(); | 371 | pci_enable_pci_io_ecs(); |
382 | 372 | ||
383 | register_cpu_notifier(&amd_cpu_notifier); | 373 | cpu_notifier_register_begin(); |
384 | for_each_online_cpu(cpu) | 374 | for_each_online_cpu(cpu) |
385 | amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, | 375 | amd_cpu_notify(&amd_cpu_notifier, (unsigned long)CPU_ONLINE, |
386 | (void *)(long)cpu); | 376 | (void *)(long)cpu); |
377 | __register_cpu_notifier(&amd_cpu_notifier); | ||
378 | cpu_notifier_register_done(); | ||
379 | |||
387 | pci_probe |= PCI_HAS_IO_ECS; | 380 | pci_probe |= PCI_HAS_IO_ECS; |
388 | 381 | ||
389 | return 0; | 382 | return 0; |
diff --git a/arch/x86/pci/bus_numa.c b/arch/x86/pci/bus_numa.c index c2735feb2508..f3a2cfc14125 100644 --- a/arch/x86/pci/bus_numa.c +++ b/arch/x86/pci/bus_numa.c | |||
@@ -10,9 +10,6 @@ static struct pci_root_info *x86_find_pci_root_info(int bus) | |||
10 | { | 10 | { |
11 | struct pci_root_info *info; | 11 | struct pci_root_info *info; |
12 | 12 | ||
13 | if (list_empty(&pci_root_infos)) | ||
14 | return NULL; | ||
15 | |||
16 | list_for_each_entry(info, &pci_root_infos, list) | 13 | list_for_each_entry(info, &pci_root_infos, list) |
17 | if (info->busn.start == bus) | 14 | if (info->busn.start == bus) |
18 | return info; | 15 | return info; |
@@ -20,6 +17,16 @@ static struct pci_root_info *x86_find_pci_root_info(int bus) | |||
20 | return NULL; | 17 | return NULL; |
21 | } | 18 | } |
22 | 19 | ||
20 | int x86_pci_root_bus_node(int bus) | ||
21 | { | ||
22 | struct pci_root_info *info = x86_find_pci_root_info(bus); | ||
23 | |||
24 | if (!info) | ||
25 | return NUMA_NO_NODE; | ||
26 | |||
27 | return info->node; | ||
28 | } | ||
29 | |||
23 | void x86_pci_root_bus_resources(int bus, struct list_head *resources) | 30 | void x86_pci_root_bus_resources(int bus, struct list_head *resources) |
24 | { | 31 | { |
25 | struct pci_root_info *info = x86_find_pci_root_info(bus); | 32 | struct pci_root_info *info = x86_find_pci_root_info(bus); |
diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 981c2dbd72cc..059a76c29739 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c | |||
@@ -456,19 +456,25 @@ void __init dmi_check_pciprobe(void) | |||
456 | dmi_check_system(pciprobe_dmi_table); | 456 | dmi_check_system(pciprobe_dmi_table); |
457 | } | 457 | } |
458 | 458 | ||
459 | struct pci_bus *pcibios_scan_root(int busnum) | 459 | void pcibios_scan_root(int busnum) |
460 | { | 460 | { |
461 | struct pci_bus *bus = NULL; | 461 | struct pci_bus *bus; |
462 | struct pci_sysdata *sd; | ||
463 | LIST_HEAD(resources); | ||
462 | 464 | ||
463 | while ((bus = pci_find_next_bus(bus)) != NULL) { | 465 | sd = kzalloc(sizeof(*sd), GFP_KERNEL); |
464 | if (bus->number == busnum) { | 466 | if (!sd) { |
465 | /* Already scanned */ | 467 | printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busnum); |
466 | return bus; | 468 | return; |
467 | } | 469 | } |
470 | sd->node = x86_pci_root_bus_node(busnum); | ||
471 | x86_pci_root_bus_resources(busnum, &resources); | ||
472 | printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busnum); | ||
473 | bus = pci_scan_root_bus(NULL, busnum, &pci_root_ops, sd, &resources); | ||
474 | if (!bus) { | ||
475 | pci_free_resource_list(&resources); | ||
476 | kfree(sd); | ||
468 | } | 477 | } |
469 | |||
470 | return pci_scan_bus_on_node(busnum, &pci_root_ops, | ||
471 | get_mp_bus_to_node(busnum)); | ||
472 | } | 478 | } |
473 | 479 | ||
474 | void __init pcibios_set_cache_line_size(void) | 480 | void __init pcibios_set_cache_line_size(void) |
@@ -561,7 +567,6 @@ char * __init pcibios_setup(char *str) | |||
561 | pci_probe |= PCI_PROBE_NOEARLY; | 567 | pci_probe |= PCI_PROBE_NOEARLY; |
562 | return NULL; | 568 | return NULL; |
563 | } | 569 | } |
564 | #ifndef CONFIG_X86_VISWS | ||
565 | else if (!strcmp(str, "usepirqmask")) { | 570 | else if (!strcmp(str, "usepirqmask")) { |
566 | pci_probe |= PCI_USE_PIRQ_MASK; | 571 | pci_probe |= PCI_USE_PIRQ_MASK; |
567 | return NULL; | 572 | return NULL; |
@@ -571,9 +576,7 @@ char * __init pcibios_setup(char *str) | |||
571 | } else if (!strncmp(str, "lastbus=", 8)) { | 576 | } else if (!strncmp(str, "lastbus=", 8)) { |
572 | pcibios_last_bus = simple_strtol(str+8, NULL, 0); | 577 | pcibios_last_bus = simple_strtol(str+8, NULL, 0); |
573 | return NULL; | 578 | return NULL; |
574 | } | 579 | } else if (!strcmp(str, "rom")) { |
575 | #endif | ||
576 | else if (!strcmp(str, "rom")) { | ||
577 | pci_probe |= PCI_ASSIGN_ROMS; | 580 | pci_probe |= PCI_ASSIGN_ROMS; |
578 | return NULL; | 581 | return NULL; |
579 | } else if (!strcmp(str, "norom")) { | 582 | } else if (!strcmp(str, "norom")) { |
@@ -677,105 +680,3 @@ int pci_ext_cfg_avail(void) | |||
677 | else | 680 | else |
678 | return 0; | 681 | return 0; |
679 | } | 682 | } |
680 | |||
681 | struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node) | ||
682 | { | ||
683 | LIST_HEAD(resources); | ||
684 | struct pci_bus *bus = NULL; | ||
685 | struct pci_sysdata *sd; | ||
686 | |||
687 | /* | ||
688 | * Allocate per-root-bus (not per bus) arch-specific data. | ||
689 | * TODO: leak; this memory is never freed. | ||
690 | * It's arguable whether it's worth the trouble to care. | ||
691 | */ | ||
692 | sd = kzalloc(sizeof(*sd), GFP_KERNEL); | ||
693 | if (!sd) { | ||
694 | printk(KERN_ERR "PCI: OOM, skipping PCI bus %02x\n", busno); | ||
695 | return NULL; | ||
696 | } | ||
697 | sd->node = node; | ||
698 | x86_pci_root_bus_resources(busno, &resources); | ||
699 | printk(KERN_DEBUG "PCI: Probing PCI hardware (bus %02x)\n", busno); | ||
700 | bus = pci_scan_root_bus(NULL, busno, ops, sd, &resources); | ||
701 | if (!bus) { | ||
702 | pci_free_resource_list(&resources); | ||
703 | kfree(sd); | ||
704 | } | ||
705 | |||
706 | return bus; | ||
707 | } | ||
708 | |||
709 | struct pci_bus *pci_scan_bus_with_sysdata(int busno) | ||
710 | { | ||
711 | return pci_scan_bus_on_node(busno, &pci_root_ops, -1); | ||
712 | } | ||
713 | |||
714 | /* | ||
715 | * NUMA info for PCI busses | ||
716 | * | ||
717 | * Early arch code is responsible for filling in reasonable values here. | ||
718 | * A node id of "-1" means "use current node". In other words, if a bus | ||
719 | * has a -1 node id, it's not tightly coupled to any particular chunk | ||
720 | * of memory (as is the case on some Nehalem systems). | ||
721 | */ | ||
722 | #ifdef CONFIG_NUMA | ||
723 | |||
724 | #define BUS_NR 256 | ||
725 | |||
726 | #ifdef CONFIG_X86_64 | ||
727 | |||
728 | static int mp_bus_to_node[BUS_NR] = { | ||
729 | [0 ... BUS_NR - 1] = -1 | ||
730 | }; | ||
731 | |||
732 | void set_mp_bus_to_node(int busnum, int node) | ||
733 | { | ||
734 | if (busnum >= 0 && busnum < BUS_NR) | ||
735 | mp_bus_to_node[busnum] = node; | ||
736 | } | ||
737 | |||
738 | int get_mp_bus_to_node(int busnum) | ||
739 | { | ||
740 | int node = -1; | ||
741 | |||
742 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
743 | return node; | ||
744 | |||
745 | node = mp_bus_to_node[busnum]; | ||
746 | |||
747 | /* | ||
748 | * let numa_node_id to decide it later in dma_alloc_pages | ||
749 | * if there is no ram on that node | ||
750 | */ | ||
751 | if (node != -1 && !node_online(node)) | ||
752 | node = -1; | ||
753 | |||
754 | return node; | ||
755 | } | ||
756 | |||
757 | #else /* CONFIG_X86_32 */ | ||
758 | |||
759 | static int mp_bus_to_node[BUS_NR] = { | ||
760 | [0 ... BUS_NR - 1] = -1 | ||
761 | }; | ||
762 | |||
763 | void set_mp_bus_to_node(int busnum, int node) | ||
764 | { | ||
765 | if (busnum >= 0 && busnum < BUS_NR) | ||
766 | mp_bus_to_node[busnum] = (unsigned char) node; | ||
767 | } | ||
768 | |||
769 | int get_mp_bus_to_node(int busnum) | ||
770 | { | ||
771 | int node; | ||
772 | |||
773 | if (busnum < 0 || busnum > (BUS_NR - 1)) | ||
774 | return 0; | ||
775 | node = mp_bus_to_node[busnum]; | ||
776 | return node; | ||
777 | } | ||
778 | |||
779 | #endif /* CONFIG_X86_32 */ | ||
780 | |||
781 | #endif /* CONFIG_NUMA */ | ||
diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index b046e070e088..94ae9ae9574f 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c | |||
@@ -5,7 +5,6 @@ | |||
5 | #include <linux/delay.h> | 5 | #include <linux/delay.h> |
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/pci.h> | 7 | #include <linux/pci.h> |
8 | #include <linux/init.h> | ||
9 | #include <linux/vgaarb.h> | 8 | #include <linux/vgaarb.h> |
10 | #include <asm/pci_x86.h> | 9 | #include <asm/pci_x86.h> |
11 | 10 | ||
@@ -26,9 +25,9 @@ static void pci_fixup_i450nx(struct pci_dev *d) | |||
26 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, | 25 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", pxb, busno, |
27 | suba, subb); | 26 | suba, subb); |
28 | if (busno) | 27 | if (busno) |
29 | pci_scan_bus_with_sysdata(busno); /* Bus A */ | 28 | pcibios_scan_root(busno); /* Bus A */ |
30 | if (suba < subb) | 29 | if (suba < subb) |
31 | pci_scan_bus_with_sysdata(suba+1); /* Bus B */ | 30 | pcibios_scan_root(suba+1); /* Bus B */ |
32 | } | 31 | } |
33 | pcibios_last_bus = -1; | 32 | pcibios_last_bus = -1; |
34 | } | 33 | } |
@@ -43,7 +42,7 @@ static void pci_fixup_i450gx(struct pci_dev *d) | |||
43 | u8 busno; | 42 | u8 busno; |
44 | pci_read_config_byte(d, 0x4a, &busno); | 43 | pci_read_config_byte(d, 0x4a, &busno); |
45 | dev_info(&d->dev, "i440KX/GX host bridge; secondary bus %02x\n", busno); | 44 | dev_info(&d->dev, "i440KX/GX host bridge; secondary bus %02x\n", busno); |
46 | pci_scan_bus_with_sysdata(busno); | 45 | pcibios_scan_root(busno); |
47 | pcibios_last_bus = -1; | 46 | pcibios_last_bus = -1; |
48 | } | 47 | } |
49 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); | 48 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82454GX, pci_fixup_i450gx); |
@@ -314,9 +313,10 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_MCH_PC1, pcie_r | |||
314 | * IORESOURCE_ROM_SHADOW is used to associate the boot video | 313 | * IORESOURCE_ROM_SHADOW is used to associate the boot video |
315 | * card with this copy. On laptops this copy has to be used since | 314 | * card with this copy. On laptops this copy has to be used since |
316 | * the main ROM may be compressed or combined with another image. | 315 | * the main ROM may be compressed or combined with another image. |
317 | * See pci_map_rom() for use of this flag. IORESOURCE_ROM_SHADOW | 316 | * See pci_map_rom() for use of this flag. Before marking the device |
318 | * is marked here since the boot video device will be the only enabled | 317 | * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set |
319 | * video device at this point. | 318 | * by either arch cde or vga-arbitration, if so only apply the fixup to this |
319 | * already determined primary video card. | ||
320 | */ | 320 | */ |
321 | 321 | ||
322 | static void pci_fixup_video(struct pci_dev *pdev) | 322 | static void pci_fixup_video(struct pci_dev *pdev) |
@@ -347,12 +347,13 @@ static void pci_fixup_video(struct pci_dev *pdev) | |||
347 | } | 347 | } |
348 | bus = bus->parent; | 348 | bus = bus->parent; |
349 | } | 349 | } |
350 | pci_read_config_word(pdev, PCI_COMMAND, &config); | 350 | if (!vga_default_device() || pdev == vga_default_device()) { |
351 | if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { | 351 | pci_read_config_word(pdev, PCI_COMMAND, &config); |
352 | pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; | 352 | if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { |
353 | dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); | 353 | pdev->resource[PCI_ROM_RESOURCE].flags |= IORESOURCE_ROM_SHADOW; |
354 | if (!vga_default_device()) | 354 | dev_printk(KERN_DEBUG, &pdev->dev, "Boot video device\n"); |
355 | vga_set_default_device(pdev); | 355 | vga_set_default_device(pdev); |
356 | } | ||
356 | } | 357 | } |
357 | } | 358 | } |
358 | DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, | 359 | DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_ANY_ID, PCI_ANY_ID, |
diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c index 51384ca727ad..84b9d672843d 100644 --- a/arch/x86/pci/intel_mid_pci.c +++ b/arch/x86/pci/intel_mid_pci.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <asm/pci_x86.h> | 31 | #include <asm/pci_x86.h> |
32 | #include <asm/hw_irq.h> | 32 | #include <asm/hw_irq.h> |
33 | #include <asm/io_apic.h> | 33 | #include <asm/io_apic.h> |
34 | #include <asm/intel-mid.h> | ||
34 | 35 | ||
35 | #define PCIE_CAP_OFFSET 0x100 | 36 | #define PCIE_CAP_OFFSET 0x100 |
36 | 37 | ||
@@ -219,7 +220,10 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev) | |||
219 | irq_attr.ioapic = mp_find_ioapic(dev->irq); | 220 | irq_attr.ioapic = mp_find_ioapic(dev->irq); |
220 | irq_attr.ioapic_pin = dev->irq; | 221 | irq_attr.ioapic_pin = dev->irq; |
221 | irq_attr.trigger = 1; /* level */ | 222 | irq_attr.trigger = 1; /* level */ |
222 | irq_attr.polarity = 1; /* active low */ | 223 | if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER) |
224 | irq_attr.polarity = 0; /* active high */ | ||
225 | else | ||
226 | irq_attr.polarity = 1; /* active low */ | ||
223 | io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr); | 227 | io_apic_set_pci_routing(&dev->dev, dev->irq, &irq_attr); |
224 | 228 | ||
225 | return 0; | 229 | return 0; |
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 372e9b8989b3..84112f55dd7a 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c | |||
@@ -136,13 +136,9 @@ static void __init pirq_peer_trick(void) | |||
136 | busmap[e->bus] = 1; | 136 | busmap[e->bus] = 1; |
137 | } | 137 | } |
138 | for (i = 1; i < 256; i++) { | 138 | for (i = 1; i < 256; i++) { |
139 | int node; | ||
140 | if (!busmap[i] || pci_find_bus(0, i)) | 139 | if (!busmap[i] || pci_find_bus(0, i)) |
141 | continue; | 140 | continue; |
142 | node = get_mp_bus_to_node(i); | 141 | pcibios_scan_root(i); |
143 | if (pci_scan_bus_on_node(i, &pci_root_ops, node)) | ||
144 | printk(KERN_INFO "PCI: Discovered primary peer " | ||
145 | "bus %02x [IRQ]\n", i); | ||
146 | } | 142 | } |
147 | pcibios_last_bus = -1; | 143 | pcibios_last_bus = -1; |
148 | } | 144 | } |
diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 4db96fb1c232..5b662c0faf8c 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c | |||
@@ -37,19 +37,17 @@ int __init pci_legacy_init(void) | |||
37 | void pcibios_scan_specific_bus(int busn) | 37 | void pcibios_scan_specific_bus(int busn) |
38 | { | 38 | { |
39 | int devfn; | 39 | int devfn; |
40 | long node; | ||
41 | u32 l; | 40 | u32 l; |
42 | 41 | ||
43 | if (pci_find_bus(0, busn)) | 42 | if (pci_find_bus(0, busn)) |
44 | return; | 43 | return; |
45 | 44 | ||
46 | node = get_mp_bus_to_node(busn); | ||
47 | for (devfn = 0; devfn < 256; devfn += 8) { | 45 | for (devfn = 0; devfn < 256; devfn += 8) { |
48 | if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) && | 46 | if (!raw_pci_read(0, busn, devfn, PCI_VENDOR_ID, 2, &l) && |
49 | l != 0x0000 && l != 0xffff) { | 47 | l != 0x0000 && l != 0xffff) { |
50 | DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l); | 48 | DBG("Found device at %02x:%02x [%04x]\n", busn, devfn, l); |
51 | printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn); | 49 | printk(KERN_INFO "PCI: Discovered peer bus %02x\n", busn); |
52 | pci_scan_bus_on_node(busn, &pci_root_ops, node); | 50 | pcibios_scan_root(busn); |
53 | return; | 51 | return; |
54 | } | 52 | } |
55 | } | 53 | } |
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c index 082e88129712..248642f4bab7 100644 --- a/arch/x86/pci/mmconfig-shared.c +++ b/arch/x86/pci/mmconfig-shared.c | |||
@@ -12,7 +12,6 @@ | |||
12 | 12 | ||
13 | #include <linux/pci.h> | 13 | #include <linux/pci.h> |
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/acpi.h> | ||
16 | #include <linux/sfi_acpi.h> | 15 | #include <linux/sfi_acpi.h> |
17 | #include <linux/bitmap.h> | 16 | #include <linux/bitmap.h> |
18 | #include <linux/dmi.h> | 17 | #include <linux/dmi.h> |
diff --git a/arch/x86/pci/mmconfig_32.c b/arch/x86/pci/mmconfig_32.c index 5c90975cdf0f..43984bc1665a 100644 --- a/arch/x86/pci/mmconfig_32.c +++ b/arch/x86/pci/mmconfig_32.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/rcupdate.h> | 14 | #include <linux/rcupdate.h> |
15 | #include <asm/e820.h> | 15 | #include <asm/e820.h> |
16 | #include <asm/pci_x86.h> | 16 | #include <asm/pci_x86.h> |
17 | #include <acpi/acpi.h> | ||
18 | 17 | ||
19 | /* Assume systems with more busses have correct MCFG */ | 18 | /* Assume systems with more busses have correct MCFG */ |
20 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) | 19 | #define mmcfg_virt_addr ((void __iomem *) fix_to_virt(FIX_PCIE_MCFG)) |
diff --git a/arch/x86/pci/numaq_32.c b/arch/x86/pci/numaq_32.c deleted file mode 100644 index 72c229f9ebcf..000000000000 --- a/arch/x86/pci/numaq_32.c +++ /dev/null | |||
@@ -1,165 +0,0 @@ | |||
1 | /* | ||
2 | * numaq_32.c - Low-level PCI access for NUMA-Q machines | ||
3 | */ | ||
4 | |||
5 | #include <linux/pci.h> | ||
6 | #include <linux/init.h> | ||
7 | #include <linux/nodemask.h> | ||
8 | #include <asm/apic.h> | ||
9 | #include <asm/mpspec.h> | ||
10 | #include <asm/pci_x86.h> | ||
11 | #include <asm/numaq.h> | ||
12 | |||
13 | #define BUS2QUAD(global) (mp_bus_id_to_node[global]) | ||
14 | |||
15 | #define BUS2LOCAL(global) (mp_bus_id_to_local[global]) | ||
16 | |||
17 | #define QUADLOCAL2BUS(quad,local) (quad_local_to_mp_bus_id[quad][local]) | ||
18 | |||
19 | #define PCI_CONF1_MQ_ADDRESS(bus, devfn, reg) \ | ||
20 | (0x80000000 | (BUS2LOCAL(bus) << 16) | (devfn << 8) | (reg & ~3)) | ||
21 | |||
22 | static void write_cf8(unsigned bus, unsigned devfn, unsigned reg) | ||
23 | { | ||
24 | unsigned val = PCI_CONF1_MQ_ADDRESS(bus, devfn, reg); | ||
25 | if (xquad_portio) | ||
26 | writel(val, XQUAD_PORT_ADDR(0xcf8, BUS2QUAD(bus))); | ||
27 | else | ||
28 | outl(val, 0xCF8); | ||
29 | } | ||
30 | |||
31 | static int pci_conf1_mq_read(unsigned int seg, unsigned int bus, | ||
32 | unsigned int devfn, int reg, int len, u32 *value) | ||
33 | { | ||
34 | unsigned long flags; | ||
35 | void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); | ||
36 | |||
37 | WARN_ON(seg); | ||
38 | if (!value || (bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) | ||
39 | return -EINVAL; | ||
40 | |||
41 | raw_spin_lock_irqsave(&pci_config_lock, flags); | ||
42 | |||
43 | write_cf8(bus, devfn, reg); | ||
44 | |||
45 | switch (len) { | ||
46 | case 1: | ||
47 | if (xquad_portio) | ||
48 | *value = readb(adr + (reg & 3)); | ||
49 | else | ||
50 | *value = inb(0xCFC + (reg & 3)); | ||
51 | break; | ||
52 | case 2: | ||
53 | if (xquad_portio) | ||
54 | *value = readw(adr + (reg & 2)); | ||
55 | else | ||
56 | *value = inw(0xCFC + (reg & 2)); | ||
57 | break; | ||
58 | case 4: | ||
59 | if (xquad_portio) | ||
60 | *value = readl(adr); | ||
61 | else | ||
62 | *value = inl(0xCFC); | ||
63 | break; | ||
64 | } | ||
65 | |||
66 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | ||
67 | |||
68 | return 0; | ||
69 | } | ||
70 | |||
71 | static int pci_conf1_mq_write(unsigned int seg, unsigned int bus, | ||
72 | unsigned int devfn, int reg, int len, u32 value) | ||
73 | { | ||
74 | unsigned long flags; | ||
75 | void *adr __iomem = XQUAD_PORT_ADDR(0xcfc, BUS2QUAD(bus)); | ||
76 | |||
77 | WARN_ON(seg); | ||
78 | if ((bus >= MAX_MP_BUSSES) || (devfn > 255) || (reg > 255)) | ||
79 | return -EINVAL; | ||
80 | |||
81 | raw_spin_lock_irqsave(&pci_config_lock, flags); | ||
82 | |||
83 | write_cf8(bus, devfn, reg); | ||
84 | |||
85 | switch (len) { | ||
86 | case 1: | ||
87 | if (xquad_portio) | ||
88 | writeb(value, adr + (reg & 3)); | ||
89 | else | ||
90 | outb((u8)value, 0xCFC + (reg & 3)); | ||
91 | break; | ||
92 | case 2: | ||
93 | if (xquad_portio) | ||
94 | writew(value, adr + (reg & 2)); | ||
95 | else | ||
96 | outw((u16)value, 0xCFC + (reg & 2)); | ||
97 | break; | ||
98 | case 4: | ||
99 | if (xquad_portio) | ||
100 | writel(value, adr + reg); | ||
101 | else | ||
102 | outl((u32)value, 0xCFC); | ||
103 | break; | ||
104 | } | ||
105 | |||
106 | raw_spin_unlock_irqrestore(&pci_config_lock, flags); | ||
107 | |||
108 | return 0; | ||
109 | } | ||
110 | |||
111 | #undef PCI_CONF1_MQ_ADDRESS | ||
112 | |||
113 | static const struct pci_raw_ops pci_direct_conf1_mq = { | ||
114 | .read = pci_conf1_mq_read, | ||
115 | .write = pci_conf1_mq_write | ||
116 | }; | ||
117 | |||
118 | |||
119 | static void pci_fixup_i450nx(struct pci_dev *d) | ||
120 | { | ||
121 | /* | ||
122 | * i450NX -- Find and scan all secondary buses on all PXB's. | ||
123 | */ | ||
124 | int pxb, reg; | ||
125 | u8 busno, suba, subb; | ||
126 | int quad = BUS2QUAD(d->bus->number); | ||
127 | |||
128 | dev_info(&d->dev, "searching for i450NX host bridges\n"); | ||
129 | reg = 0xd0; | ||
130 | for(pxb=0; pxb<2; pxb++) { | ||
131 | pci_read_config_byte(d, reg++, &busno); | ||
132 | pci_read_config_byte(d, reg++, &suba); | ||
133 | pci_read_config_byte(d, reg++, &subb); | ||
134 | dev_dbg(&d->dev, "i450NX PXB %d: %02x/%02x/%02x\n", | ||
135 | pxb, busno, suba, subb); | ||
136 | if (busno) { | ||
137 | /* Bus A */ | ||
138 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, busno)); | ||
139 | } | ||
140 | if (suba < subb) { | ||
141 | /* Bus B */ | ||
142 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, suba+1)); | ||
143 | } | ||
144 | } | ||
145 | pcibios_last_bus = -1; | ||
146 | } | ||
147 | DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82451NX, pci_fixup_i450nx); | ||
148 | |||
149 | int __init pci_numaq_init(void) | ||
150 | { | ||
151 | int quad; | ||
152 | |||
153 | raw_pci_ops = &pci_direct_conf1_mq; | ||
154 | |||
155 | pcibios_scan_root(0); | ||
156 | if (num_online_nodes() > 1) | ||
157 | for_each_online_node(quad) { | ||
158 | if (quad == 0) | ||
159 | continue; | ||
160 | printk("Scanning PCI bus %d for quad %d\n", | ||
161 | QUADLOCAL2BUS(quad,0), quad); | ||
162 | pci_scan_bus_with_sysdata(QUADLOCAL2BUS(quad, 0)); | ||
163 | } | ||
164 | return 0; | ||
165 | } | ||
diff --git a/arch/x86/pci/visws.c b/arch/x86/pci/visws.c deleted file mode 100644 index 3e6d2a6db866..000000000000 --- a/arch/x86/pci/visws.c +++ /dev/null | |||
@@ -1,87 +0,0 @@ | |||
1 | /* | ||
2 | * Low-Level PCI Support for SGI Visual Workstation | ||
3 | * | ||
4 | * (c) 1999--2000 Martin Mares <mj@ucw.cz> | ||
5 | */ | ||
6 | |||
7 | #include <linux/kernel.h> | ||
8 | #include <linux/pci.h> | ||
9 | #include <linux/init.h> | ||
10 | |||
11 | #include <asm/setup.h> | ||
12 | #include <asm/pci_x86.h> | ||
13 | #include <asm/visws/cobalt.h> | ||
14 | #include <asm/visws/lithium.h> | ||
15 | |||
16 | static int pci_visws_enable_irq(struct pci_dev *dev) { return 0; } | ||
17 | static void pci_visws_disable_irq(struct pci_dev *dev) { } | ||
18 | |||
19 | /* int (*pcibios_enable_irq)(struct pci_dev *dev) = &pci_visws_enable_irq; */ | ||
20 | /* void (*pcibios_disable_irq)(struct pci_dev *dev) = &pci_visws_disable_irq; */ | ||
21 | |||
22 | /* void __init pcibios_penalize_isa_irq(int irq, int active) {} */ | ||
23 | |||
24 | |||
25 | unsigned int pci_bus0, pci_bus1; | ||
26 | |||
27 | static int __init visws_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) | ||
28 | { | ||
29 | int irq, bus = dev->bus->number; | ||
30 | |||
31 | pin--; | ||
32 | |||
33 | /* Nothing useful at PIIX4 pin 1 */ | ||
34 | if (bus == pci_bus0 && slot == 4 && pin == 0) | ||
35 | return -1; | ||
36 | |||
37 | /* PIIX4 USB is on Bus 0, Slot 4, Line 3 */ | ||
38 | if (bus == pci_bus0 && slot == 4 && pin == 3) { | ||
39 | irq = CO_IRQ(CO_APIC_PIIX4_USB); | ||
40 | goto out; | ||
41 | } | ||
42 | |||
43 | /* First pin spread down 1 APIC entry per slot */ | ||
44 | if (pin == 0) { | ||
45 | irq = CO_IRQ((bus == pci_bus0 ? CO_APIC_PCIB_BASE0 : | ||
46 | CO_APIC_PCIA_BASE0) + slot); | ||
47 | goto out; | ||
48 | } | ||
49 | |||
50 | /* lines 1,2,3 from any slot is shared in this twirly pattern */ | ||
51 | if (bus == pci_bus1) { | ||
52 | /* lines 1-3 from devices 0 1 rotate over 2 apic entries */ | ||
53 | irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((slot + (pin - 1)) % 2)); | ||
54 | } else { /* bus == pci_bus0 */ | ||
55 | /* lines 1-3 from devices 0-3 rotate over 3 apic entries */ | ||
56 | if (slot == 0) | ||
57 | slot = 3; /* same pattern */ | ||
58 | irq = CO_IRQ(CO_APIC_PCIA_BASE123 + ((3 - slot) + (pin - 1) % 3)); | ||
59 | } | ||
60 | out: | ||
61 | printk(KERN_DEBUG "PCI: Bus %d Slot %d Line %d -> IRQ %d\n", bus, slot, pin, irq); | ||
62 | return irq; | ||
63 | } | ||
64 | |||
65 | int __init pci_visws_init(void) | ||
66 | { | ||
67 | pcibios_enable_irq = &pci_visws_enable_irq; | ||
68 | pcibios_disable_irq = &pci_visws_disable_irq; | ||
69 | |||
70 | /* The VISWS supports configuration access type 1 only */ | ||
71 | pci_probe = (pci_probe | PCI_PROBE_CONF1) & | ||
72 | ~(PCI_PROBE_BIOS | PCI_PROBE_CONF2); | ||
73 | |||
74 | pci_bus0 = li_pcib_read16(LI_PCI_BUSNUM) & 0xff; | ||
75 | pci_bus1 = li_pcia_read16(LI_PCI_BUSNUM) & 0xff; | ||
76 | |||
77 | printk(KERN_INFO "PCI: Lithium bridge A bus: %u, " | ||
78 | "bridge B (PIIX4) bus: %u\n", pci_bus1, pci_bus0); | ||
79 | |||
80 | raw_pci_ops = &pci_direct_conf1; | ||
81 | pci_scan_bus_with_sysdata(pci_bus0); | ||
82 | pci_scan_bus_with_sysdata(pci_bus1); | ||
83 | pci_fixup_irqs(pci_common_swizzle, visws_map_irq); | ||
84 | pcibios_resource_survey(); | ||
85 | /* Request bus scan */ | ||
86 | return 1; | ||
87 | } | ||
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index 5eee4959785d..905956f16465 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c | |||
@@ -178,6 +178,7 @@ static int xen_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
178 | i = 0; | 178 | i = 0; |
179 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 179 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
180 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], | 180 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, v[i], |
181 | (type == PCI_CAP_ID_MSI) ? nvec : 1, | ||
181 | (type == PCI_CAP_ID_MSIX) ? | 182 | (type == PCI_CAP_ID_MSIX) ? |
182 | "pcifront-msi-x" : | 183 | "pcifront-msi-x" : |
183 | "pcifront-msi", | 184 | "pcifront-msi", |
@@ -245,6 +246,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
245 | "xen: msi already bound to pirq=%d\n", pirq); | 246 | "xen: msi already bound to pirq=%d\n", pirq); |
246 | } | 247 | } |
247 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, | 248 | irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, |
249 | (type == PCI_CAP_ID_MSI) ? nvec : 1, | ||
248 | (type == PCI_CAP_ID_MSIX) ? | 250 | (type == PCI_CAP_ID_MSIX) ? |
249 | "msi-x" : "msi", | 251 | "msi-x" : "msi", |
250 | DOMID_SELF); | 252 | DOMID_SELF); |
@@ -269,9 +271,6 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
269 | int ret = 0; | 271 | int ret = 0; |
270 | struct msi_desc *msidesc; | 272 | struct msi_desc *msidesc; |
271 | 273 | ||
272 | if (type == PCI_CAP_ID_MSI && nvec > 1) | ||
273 | return 1; | ||
274 | |||
275 | list_for_each_entry(msidesc, &dev->msi_list, list) { | 274 | list_for_each_entry(msidesc, &dev->msi_list, list) { |
276 | struct physdev_map_pirq map_irq; | 275 | struct physdev_map_pirq map_irq; |
277 | domid_t domid; | 276 | domid_t domid; |
@@ -291,7 +290,10 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
291 | (pci_domain_nr(dev->bus) << 16); | 290 | (pci_domain_nr(dev->bus) << 16); |
292 | map_irq.devfn = dev->devfn; | 291 | map_irq.devfn = dev->devfn; |
293 | 292 | ||
294 | if (type == PCI_CAP_ID_MSIX) { | 293 | if (type == PCI_CAP_ID_MSI && nvec > 1) { |
294 | map_irq.type = MAP_PIRQ_TYPE_MULTI_MSI; | ||
295 | map_irq.entry_nr = nvec; | ||
296 | } else if (type == PCI_CAP_ID_MSIX) { | ||
295 | int pos; | 297 | int pos; |
296 | u32 table_offset, bir; | 298 | u32 table_offset, bir; |
297 | 299 | ||
@@ -308,6 +310,16 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
308 | if (pci_seg_supported) | 310 | if (pci_seg_supported) |
309 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, | 311 | ret = HYPERVISOR_physdev_op(PHYSDEVOP_map_pirq, |
310 | &map_irq); | 312 | &map_irq); |
313 | if (type == PCI_CAP_ID_MSI && nvec > 1 && ret) { | ||
314 | /* | ||
315 | * If MAP_PIRQ_TYPE_MULTI_MSI is not available | ||
316 | * there's nothing else we can do in this case. | ||
317 | * Just set ret > 0 so driver can retry with | ||
318 | * single MSI. | ||
319 | */ | ||
320 | ret = 1; | ||
321 | goto out; | ||
322 | } | ||
311 | if (ret == -EINVAL && !pci_domain_nr(dev->bus)) { | 323 | if (ret == -EINVAL && !pci_domain_nr(dev->bus)) { |
312 | map_irq.type = MAP_PIRQ_TYPE_MSI; | 324 | map_irq.type = MAP_PIRQ_TYPE_MSI; |
313 | map_irq.index = -1; | 325 | map_irq.index = -1; |
@@ -324,11 +336,10 @@ static int xen_initdom_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) | |||
324 | goto out; | 336 | goto out; |
325 | } | 337 | } |
326 | 338 | ||
327 | ret = xen_bind_pirq_msi_to_irq(dev, msidesc, | 339 | ret = xen_bind_pirq_msi_to_irq(dev, msidesc, map_irq.pirq, |
328 | map_irq.pirq, | 340 | (type == PCI_CAP_ID_MSI) ? nvec : 1, |
329 | (type == PCI_CAP_ID_MSIX) ? | 341 | (type == PCI_CAP_ID_MSIX) ? "msi-x" : "msi", |
330 | "msi-x" : "msi", | 342 | domid); |
331 | domid); | ||
332 | if (ret < 0) | 343 | if (ret < 0) |
333 | goto out; | 344 | goto out; |
334 | } | 345 | } |
@@ -337,7 +348,7 @@ out: | |||
337 | return ret; | 348 | return ret; |
338 | } | 349 | } |
339 | 350 | ||
340 | static void xen_initdom_restore_msi_irqs(struct pci_dev *dev, int irq) | 351 | static void xen_initdom_restore_msi_irqs(struct pci_dev *dev) |
341 | { | 352 | { |
342 | int ret = 0; | 353 | int ret = 0; |
343 | 354 | ||
diff --git a/arch/x86/platform/Makefile b/arch/x86/platform/Makefile index 20342d4c82ce..85afde1fa3e5 100644 --- a/arch/x86/platform/Makefile +++ b/arch/x86/platform/Makefile | |||
@@ -9,5 +9,4 @@ obj-y += olpc/ | |||
9 | obj-y += scx200/ | 9 | obj-y += scx200/ |
10 | obj-y += sfi/ | 10 | obj-y += sfi/ |
11 | obj-y += ts5500/ | 11 | obj-y += ts5500/ |
12 | obj-y += visws/ | ||
13 | obj-y += uv/ | 12 | obj-y += uv/ |
diff --git a/arch/x86/platform/efi/Makefile b/arch/x86/platform/efi/Makefile index b7b0b35c1981..d51045afcaaf 100644 --- a/arch/x86/platform/efi/Makefile +++ b/arch/x86/platform/efi/Makefile | |||
@@ -1,3 +1,4 @@ | |||
1 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o | 1 | obj-$(CONFIG_EFI) += efi.o efi_$(BITS).o efi_stub_$(BITS).o |
2 | obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o | 2 | obj-$(CONFIG_ACPI_BGRT) += efi-bgrt.o |
3 | obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o | 3 | obj-$(CONFIG_EARLY_PRINTK_EFI) += early_printk.o |
4 | obj-$(CONFIG_EFI_MIXED) += efi_thunk_$(BITS).o | ||
diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 7145ec63c520..f15103dff4b4 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c | |||
@@ -42,14 +42,15 @@ void __init efi_bgrt_init(void) | |||
42 | 42 | ||
43 | if (bgrt_tab->header.length < sizeof(*bgrt_tab)) | 43 | if (bgrt_tab->header.length < sizeof(*bgrt_tab)) |
44 | return; | 44 | return; |
45 | if (bgrt_tab->version != 1) | 45 | if (bgrt_tab->version != 1 || bgrt_tab->status != 1) |
46 | return; | 46 | return; |
47 | if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) | 47 | if (bgrt_tab->image_type != 0 || !bgrt_tab->image_address) |
48 | return; | 48 | return; |
49 | 49 | ||
50 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); | 50 | image = efi_lookup_mapped_addr(bgrt_tab->image_address); |
51 | if (!image) { | 51 | if (!image) { |
52 | image = ioremap(bgrt_tab->image_address, sizeof(bmp_header)); | 52 | image = early_memremap(bgrt_tab->image_address, |
53 | sizeof(bmp_header)); | ||
53 | ioremapped = true; | 54 | ioremapped = true; |
54 | if (!image) | 55 | if (!image) |
55 | return; | 56 | return; |
@@ -57,7 +58,7 @@ void __init efi_bgrt_init(void) | |||
57 | 58 | ||
58 | memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); | 59 | memcpy_fromio(&bmp_header, image, sizeof(bmp_header)); |
59 | if (ioremapped) | 60 | if (ioremapped) |
60 | iounmap(image); | 61 | early_iounmap(image, sizeof(bmp_header)); |
61 | bgrt_image_size = bmp_header.size; | 62 | bgrt_image_size = bmp_header.size; |
62 | 63 | ||
63 | bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); | 64 | bgrt_image = kmalloc(bgrt_image_size, GFP_KERNEL); |
@@ -65,7 +66,8 @@ void __init efi_bgrt_init(void) | |||
65 | return; | 66 | return; |
66 | 67 | ||
67 | if (ioremapped) { | 68 | if (ioremapped) { |
68 | image = ioremap(bgrt_tab->image_address, bmp_header.size); | 69 | image = early_memremap(bgrt_tab->image_address, |
70 | bmp_header.size); | ||
69 | if (!image) { | 71 | if (!image) { |
70 | kfree(bgrt_image); | 72 | kfree(bgrt_image); |
71 | bgrt_image = NULL; | 73 | bgrt_image = NULL; |
@@ -75,5 +77,5 @@ void __init efi_bgrt_init(void) | |||
75 | 77 | ||
76 | memcpy_fromio(bgrt_image, image, bgrt_image_size); | 78 | memcpy_fromio(bgrt_image, image, bgrt_image_size); |
77 | if (ioremapped) | 79 | if (ioremapped) |
78 | iounmap(image); | 80 | early_iounmap(image, bmp_header.size); |
79 | } | 81 | } |
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index cceb813044ef..3781dd39e8bd 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c | |||
@@ -12,6 +12,8 @@ | |||
12 | * Bibo Mao <bibo.mao@intel.com> | 12 | * Bibo Mao <bibo.mao@intel.com> |
13 | * Chandramouli Narayanan <mouli@linux.intel.com> | 13 | * Chandramouli Narayanan <mouli@linux.intel.com> |
14 | * Huang Ying <ying.huang@intel.com> | 14 | * Huang Ying <ying.huang@intel.com> |
15 | * Copyright (C) 2013 SuSE Labs | ||
16 | * Borislav Petkov <bp@suse.de> - runtime services VA mapping | ||
15 | * | 17 | * |
16 | * Copied from efi_32.c to eliminate the duplicated code between EFI | 18 | * Copied from efi_32.c to eliminate the duplicated code between EFI |
17 | * 32/64 support code. --ying 2007-10-26 | 19 | * 32/64 support code. --ying 2007-10-26 |
@@ -50,8 +52,9 @@ | |||
50 | #include <asm/tlbflush.h> | 52 | #include <asm/tlbflush.h> |
51 | #include <asm/x86_init.h> | 53 | #include <asm/x86_init.h> |
52 | #include <asm/rtc.h> | 54 | #include <asm/rtc.h> |
55 | #include <asm/uv/uv.h> | ||
53 | 56 | ||
54 | #define EFI_DEBUG 1 | 57 | #define EFI_DEBUG |
55 | 58 | ||
56 | #define EFI_MIN_RESERVE 5120 | 59 | #define EFI_MIN_RESERVE 5120 |
57 | 60 | ||
@@ -65,25 +68,16 @@ struct efi_memory_map memmap; | |||
65 | static struct efi efi_phys __initdata; | 68 | static struct efi efi_phys __initdata; |
66 | static efi_system_table_t efi_systab __initdata; | 69 | static efi_system_table_t efi_systab __initdata; |
67 | 70 | ||
68 | unsigned long x86_efi_facility; | 71 | static efi_config_table_type_t arch_tables[] __initdata = { |
69 | |||
70 | static __initdata efi_config_table_type_t arch_tables[] = { | ||
71 | #ifdef CONFIG_X86_UV | 72 | #ifdef CONFIG_X86_UV |
72 | {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, | 73 | {UV_SYSTEM_TABLE_GUID, "UVsystab", &efi.uv_systab}, |
73 | #endif | 74 | #endif |
74 | {NULL_GUID, NULL, NULL}, | 75 | {NULL_GUID, NULL, NULL}, |
75 | }; | 76 | }; |
76 | 77 | ||
77 | /* | 78 | u64 efi_setup; /* efi setup_data physical address */ |
78 | * Returns 1 if 'facility' is enabled, 0 otherwise. | ||
79 | */ | ||
80 | int efi_enabled(int facility) | ||
81 | { | ||
82 | return test_bit(facility, &x86_efi_facility) != 0; | ||
83 | } | ||
84 | EXPORT_SYMBOL(efi_enabled); | ||
85 | 79 | ||
86 | static bool __initdata disable_runtime = false; | 80 | static bool disable_runtime __initdata = false; |
87 | static int __init setup_noefi(char *arg) | 81 | static int __init setup_noefi(char *arg) |
88 | { | 82 | { |
89 | disable_runtime = true; | 83 | disable_runtime = true; |
@@ -110,7 +104,6 @@ static int __init setup_storage_paranoia(char *arg) | |||
110 | } | 104 | } |
111 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); | 105 | early_param("efi_no_storage_paranoia", setup_storage_paranoia); |
112 | 106 | ||
113 | |||
114 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) | 107 | static efi_status_t virt_efi_get_time(efi_time_t *tm, efi_time_cap_t *tc) |
115 | { | 108 | { |
116 | unsigned long flags; | 109 | unsigned long flags; |
@@ -253,27 +246,12 @@ static efi_status_t __init phys_efi_set_virtual_address_map( | |||
253 | return status; | 246 | return status; |
254 | } | 247 | } |
255 | 248 | ||
256 | static efi_status_t __init phys_efi_get_time(efi_time_t *tm, | ||
257 | efi_time_cap_t *tc) | ||
258 | { | ||
259 | unsigned long flags; | ||
260 | efi_status_t status; | ||
261 | |||
262 | spin_lock_irqsave(&rtc_lock, flags); | ||
263 | efi_call_phys_prelog(); | ||
264 | status = efi_call_phys2(efi_phys.get_time, virt_to_phys(tm), | ||
265 | virt_to_phys(tc)); | ||
266 | efi_call_phys_epilog(); | ||
267 | spin_unlock_irqrestore(&rtc_lock, flags); | ||
268 | return status; | ||
269 | } | ||
270 | |||
271 | int efi_set_rtc_mmss(const struct timespec *now) | 249 | int efi_set_rtc_mmss(const struct timespec *now) |
272 | { | 250 | { |
273 | unsigned long nowtime = now->tv_sec; | 251 | unsigned long nowtime = now->tv_sec; |
274 | efi_status_t status; | 252 | efi_status_t status; |
275 | efi_time_t eft; | 253 | efi_time_t eft; |
276 | efi_time_cap_t cap; | 254 | efi_time_cap_t cap; |
277 | struct rtc_time tm; | 255 | struct rtc_time tm; |
278 | 256 | ||
279 | status = efi.get_time(&eft, &cap); | 257 | status = efi.get_time(&eft, &cap); |
@@ -291,9 +269,8 @@ int efi_set_rtc_mmss(const struct timespec *now) | |||
291 | eft.second = tm.tm_sec; | 269 | eft.second = tm.tm_sec; |
292 | eft.nanosecond = 0; | 270 | eft.nanosecond = 0; |
293 | } else { | 271 | } else { |
294 | printk(KERN_ERR | 272 | pr_err("%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", |
295 | "%s: Invalid EFI RTC value: write of %lx to EFI RTC failed\n", | 273 | __func__, nowtime); |
296 | __FUNCTION__, nowtime); | ||
297 | return -1; | 274 | return -1; |
298 | } | 275 | } |
299 | 276 | ||
@@ -398,9 +375,9 @@ int __init efi_memblock_x86_reserve_range(void) | |||
398 | return 0; | 375 | return 0; |
399 | } | 376 | } |
400 | 377 | ||
401 | #if EFI_DEBUG | ||
402 | static void __init print_efi_memmap(void) | 378 | static void __init print_efi_memmap(void) |
403 | { | 379 | { |
380 | #ifdef EFI_DEBUG | ||
404 | efi_memory_desc_t *md; | 381 | efi_memory_desc_t *md; |
405 | void *p; | 382 | void *p; |
406 | int i; | 383 | int i; |
@@ -409,14 +386,13 @@ static void __init print_efi_memmap(void) | |||
409 | p < memmap.map_end; | 386 | p < memmap.map_end; |
410 | p += memmap.desc_size, i++) { | 387 | p += memmap.desc_size, i++) { |
411 | md = p; | 388 | md = p; |
412 | pr_info("mem%02u: type=%u, attr=0x%llx, " | 389 | pr_info("mem%02u: type=%u, attr=0x%llx, range=[0x%016llx-0x%016llx) (%lluMB)\n", |
413 | "range=[0x%016llx-0x%016llx) (%lluMB)\n", | ||
414 | i, md->type, md->attribute, md->phys_addr, | 390 | i, md->type, md->attribute, md->phys_addr, |
415 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), | 391 | md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT), |
416 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); | 392 | (md->num_pages >> (20 - EFI_PAGE_SHIFT))); |
417 | } | 393 | } |
418 | } | ||
419 | #endif /* EFI_DEBUG */ | 394 | #endif /* EFI_DEBUG */ |
395 | } | ||
420 | 396 | ||
421 | void __init efi_reserve_boot_services(void) | 397 | void __init efi_reserve_boot_services(void) |
422 | { | 398 | { |
@@ -436,15 +412,14 @@ void __init efi_reserve_boot_services(void) | |||
436 | * - Not within any part of the kernel | 412 | * - Not within any part of the kernel |
437 | * - Not the bios reserved area | 413 | * - Not the bios reserved area |
438 | */ | 414 | */ |
439 | if ((start+size >= __pa_symbol(_text) | 415 | if ((start + size > __pa_symbol(_text) |
440 | && start <= __pa_symbol(_end)) || | 416 | && start <= __pa_symbol(_end)) || |
441 | !e820_all_mapped(start, start+size, E820_RAM) || | 417 | !e820_all_mapped(start, start+size, E820_RAM) || |
442 | memblock_is_region_reserved(start, size)) { | 418 | memblock_is_region_reserved(start, size)) { |
443 | /* Could not reserve, skip it */ | 419 | /* Could not reserve, skip it */ |
444 | md->num_pages = 0; | 420 | md->num_pages = 0; |
445 | memblock_dbg("Could not reserve boot range " | 421 | memblock_dbg("Could not reserve boot range [0x%010llx-0x%010llx]\n", |
446 | "[0x%010llx-0x%010llx]\n", | 422 | start, start+size-1); |
447 | start, start+size-1); | ||
448 | } else | 423 | } else |
449 | memblock_reserve(start, size); | 424 | memblock_reserve(start, size); |
450 | } | 425 | } |
@@ -452,7 +427,7 @@ void __init efi_reserve_boot_services(void) | |||
452 | 427 | ||
453 | void __init efi_unmap_memmap(void) | 428 | void __init efi_unmap_memmap(void) |
454 | { | 429 | { |
455 | clear_bit(EFI_MEMMAP, &x86_efi_facility); | 430 | clear_bit(EFI_MEMMAP, &efi.flags); |
456 | if (memmap.map) { | 431 | if (memmap.map) { |
457 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); | 432 | early_iounmap(memmap.map, memmap.nr_map * memmap.desc_size); |
458 | memmap.map = NULL; | 433 | memmap.map = NULL; |
@@ -463,9 +438,6 @@ void __init efi_free_boot_services(void) | |||
463 | { | 438 | { |
464 | void *p; | 439 | void *p; |
465 | 440 | ||
466 | if (!efi_is_native()) | ||
467 | return; | ||
468 | |||
469 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 441 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
470 | efi_memory_desc_t *md = p; | 442 | efi_memory_desc_t *md = p; |
471 | unsigned long long start = md->phys_addr; | 443 | unsigned long long start = md->phys_addr; |
@@ -489,18 +461,27 @@ static int __init efi_systab_init(void *phys) | |||
489 | { | 461 | { |
490 | if (efi_enabled(EFI_64BIT)) { | 462 | if (efi_enabled(EFI_64BIT)) { |
491 | efi_system_table_64_t *systab64; | 463 | efi_system_table_64_t *systab64; |
464 | struct efi_setup_data *data = NULL; | ||
492 | u64 tmp = 0; | 465 | u64 tmp = 0; |
493 | 466 | ||
467 | if (efi_setup) { | ||
468 | data = early_memremap(efi_setup, sizeof(*data)); | ||
469 | if (!data) | ||
470 | return -ENOMEM; | ||
471 | } | ||
494 | systab64 = early_ioremap((unsigned long)phys, | 472 | systab64 = early_ioremap((unsigned long)phys, |
495 | sizeof(*systab64)); | 473 | sizeof(*systab64)); |
496 | if (systab64 == NULL) { | 474 | if (systab64 == NULL) { |
497 | pr_err("Couldn't map the system table!\n"); | 475 | pr_err("Couldn't map the system table!\n"); |
476 | if (data) | ||
477 | early_iounmap(data, sizeof(*data)); | ||
498 | return -ENOMEM; | 478 | return -ENOMEM; |
499 | } | 479 | } |
500 | 480 | ||
501 | efi_systab.hdr = systab64->hdr; | 481 | efi_systab.hdr = systab64->hdr; |
502 | efi_systab.fw_vendor = systab64->fw_vendor; | 482 | efi_systab.fw_vendor = data ? (unsigned long)data->fw_vendor : |
503 | tmp |= systab64->fw_vendor; | 483 | systab64->fw_vendor; |
484 | tmp |= data ? data->fw_vendor : systab64->fw_vendor; | ||
504 | efi_systab.fw_revision = systab64->fw_revision; | 485 | efi_systab.fw_revision = systab64->fw_revision; |
505 | efi_systab.con_in_handle = systab64->con_in_handle; | 486 | efi_systab.con_in_handle = systab64->con_in_handle; |
506 | tmp |= systab64->con_in_handle; | 487 | tmp |= systab64->con_in_handle; |
@@ -514,15 +495,20 @@ static int __init efi_systab_init(void *phys) | |||
514 | tmp |= systab64->stderr_handle; | 495 | tmp |= systab64->stderr_handle; |
515 | efi_systab.stderr = systab64->stderr; | 496 | efi_systab.stderr = systab64->stderr; |
516 | tmp |= systab64->stderr; | 497 | tmp |= systab64->stderr; |
517 | efi_systab.runtime = (void *)(unsigned long)systab64->runtime; | 498 | efi_systab.runtime = data ? |
518 | tmp |= systab64->runtime; | 499 | (void *)(unsigned long)data->runtime : |
500 | (void *)(unsigned long)systab64->runtime; | ||
501 | tmp |= data ? data->runtime : systab64->runtime; | ||
519 | efi_systab.boottime = (void *)(unsigned long)systab64->boottime; | 502 | efi_systab.boottime = (void *)(unsigned long)systab64->boottime; |
520 | tmp |= systab64->boottime; | 503 | tmp |= systab64->boottime; |
521 | efi_systab.nr_tables = systab64->nr_tables; | 504 | efi_systab.nr_tables = systab64->nr_tables; |
522 | efi_systab.tables = systab64->tables; | 505 | efi_systab.tables = data ? (unsigned long)data->tables : |
523 | tmp |= systab64->tables; | 506 | systab64->tables; |
507 | tmp |= data ? data->tables : systab64->tables; | ||
524 | 508 | ||
525 | early_iounmap(systab64, sizeof(*systab64)); | 509 | early_iounmap(systab64, sizeof(*systab64)); |
510 | if (data) | ||
511 | early_iounmap(data, sizeof(*data)); | ||
526 | #ifdef CONFIG_X86_32 | 512 | #ifdef CONFIG_X86_32 |
527 | if (tmp >> 32) { | 513 | if (tmp >> 32) { |
528 | pr_err("EFI data located above 4GB, disabling EFI.\n"); | 514 | pr_err("EFI data located above 4GB, disabling EFI.\n"); |
@@ -566,45 +552,82 @@ static int __init efi_systab_init(void *phys) | |||
566 | return -EINVAL; | 552 | return -EINVAL; |
567 | } | 553 | } |
568 | if ((efi.systab->hdr.revision >> 16) == 0) | 554 | if ((efi.systab->hdr.revision >> 16) == 0) |
569 | pr_err("Warning: System table version " | 555 | pr_err("Warning: System table version %d.%02d, expected 1.00 or greater!\n", |
570 | "%d.%02d, expected 1.00 or greater!\n", | ||
571 | efi.systab->hdr.revision >> 16, | 556 | efi.systab->hdr.revision >> 16, |
572 | efi.systab->hdr.revision & 0xffff); | 557 | efi.systab->hdr.revision & 0xffff); |
573 | 558 | ||
559 | set_bit(EFI_SYSTEM_TABLES, &efi.flags); | ||
560 | |||
574 | return 0; | 561 | return 0; |
575 | } | 562 | } |
576 | 563 | ||
577 | static int __init efi_runtime_init(void) | 564 | static int __init efi_runtime_init32(void) |
578 | { | 565 | { |
579 | efi_runtime_services_t *runtime; | 566 | efi_runtime_services_32_t *runtime; |
567 | |||
568 | runtime = early_ioremap((unsigned long)efi.systab->runtime, | ||
569 | sizeof(efi_runtime_services_32_t)); | ||
570 | if (!runtime) { | ||
571 | pr_err("Could not map the runtime service table!\n"); | ||
572 | return -ENOMEM; | ||
573 | } | ||
580 | 574 | ||
581 | /* | 575 | /* |
582 | * Check out the runtime services table. We need to map | 576 | * We will only need *early* access to the following two |
583 | * the runtime services table so that we can grab the physical | 577 | * EFI runtime services before set_virtual_address_map |
584 | * address of several of the EFI runtime functions, needed to | 578 | * is invoked. |
585 | * set the firmware into virtual mode. | ||
586 | */ | 579 | */ |
580 | efi_phys.set_virtual_address_map = | ||
581 | (efi_set_virtual_address_map_t *) | ||
582 | (unsigned long)runtime->set_virtual_address_map; | ||
583 | early_iounmap(runtime, sizeof(efi_runtime_services_32_t)); | ||
584 | |||
585 | return 0; | ||
586 | } | ||
587 | |||
588 | static int __init efi_runtime_init64(void) | ||
589 | { | ||
590 | efi_runtime_services_64_t *runtime; | ||
591 | |||
587 | runtime = early_ioremap((unsigned long)efi.systab->runtime, | 592 | runtime = early_ioremap((unsigned long)efi.systab->runtime, |
588 | sizeof(efi_runtime_services_t)); | 593 | sizeof(efi_runtime_services_64_t)); |
589 | if (!runtime) { | 594 | if (!runtime) { |
590 | pr_err("Could not map the runtime service table!\n"); | 595 | pr_err("Could not map the runtime service table!\n"); |
591 | return -ENOMEM; | 596 | return -ENOMEM; |
592 | } | 597 | } |
598 | |||
593 | /* | 599 | /* |
594 | * We will only need *early* access to the following | 600 | * We will only need *early* access to the following two |
595 | * two EFI runtime services before set_virtual_address_map | 601 | * EFI runtime services before set_virtual_address_map |
596 | * is invoked. | 602 | * is invoked. |
597 | */ | 603 | */ |
598 | efi_phys.get_time = (efi_get_time_t *)runtime->get_time; | ||
599 | efi_phys.set_virtual_address_map = | 604 | efi_phys.set_virtual_address_map = |
600 | (efi_set_virtual_address_map_t *) | 605 | (efi_set_virtual_address_map_t *) |
601 | runtime->set_virtual_address_map; | 606 | (unsigned long)runtime->set_virtual_address_map; |
607 | early_iounmap(runtime, sizeof(efi_runtime_services_64_t)); | ||
608 | |||
609 | return 0; | ||
610 | } | ||
611 | |||
612 | static int __init efi_runtime_init(void) | ||
613 | { | ||
614 | int rv; | ||
615 | |||
602 | /* | 616 | /* |
603 | * Make efi_get_time can be called before entering | 617 | * Check out the runtime services table. We need to map |
604 | * virtual mode. | 618 | * the runtime services table so that we can grab the physical |
619 | * address of several of the EFI runtime functions, needed to | ||
620 | * set the firmware into virtual mode. | ||
605 | */ | 621 | */ |
606 | efi.get_time = phys_efi_get_time; | 622 | if (efi_enabled(EFI_64BIT)) |
607 | early_iounmap(runtime, sizeof(efi_runtime_services_t)); | 623 | rv = efi_runtime_init64(); |
624 | else | ||
625 | rv = efi_runtime_init32(); | ||
626 | |||
627 | if (rv) | ||
628 | return rv; | ||
629 | |||
630 | set_bit(EFI_RUNTIME_SERVICES, &efi.flags); | ||
608 | 631 | ||
609 | return 0; | 632 | return 0; |
610 | } | 633 | } |
@@ -623,9 +646,67 @@ static int __init efi_memmap_init(void) | |||
623 | if (add_efi_memmap) | 646 | if (add_efi_memmap) |
624 | do_add_efi_memmap(); | 647 | do_add_efi_memmap(); |
625 | 648 | ||
649 | set_bit(EFI_MEMMAP, &efi.flags); | ||
650 | |||
626 | return 0; | 651 | return 0; |
627 | } | 652 | } |
628 | 653 | ||
654 | /* | ||
655 | * A number of config table entries get remapped to virtual addresses | ||
656 | * after entering EFI virtual mode. However, the kexec kernel requires | ||
657 | * their physical addresses therefore we pass them via setup_data and | ||
658 | * correct those entries to their respective physical addresses here. | ||
659 | * | ||
660 | * Currently only handles smbios which is necessary for some firmware | ||
661 | * implementation. | ||
662 | */ | ||
663 | static int __init efi_reuse_config(u64 tables, int nr_tables) | ||
664 | { | ||
665 | int i, sz, ret = 0; | ||
666 | void *p, *tablep; | ||
667 | struct efi_setup_data *data; | ||
668 | |||
669 | if (!efi_setup) | ||
670 | return 0; | ||
671 | |||
672 | if (!efi_enabled(EFI_64BIT)) | ||
673 | return 0; | ||
674 | |||
675 | data = early_memremap(efi_setup, sizeof(*data)); | ||
676 | if (!data) { | ||
677 | ret = -ENOMEM; | ||
678 | goto out; | ||
679 | } | ||
680 | |||
681 | if (!data->smbios) | ||
682 | goto out_memremap; | ||
683 | |||
684 | sz = sizeof(efi_config_table_64_t); | ||
685 | |||
686 | p = tablep = early_memremap(tables, nr_tables * sz); | ||
687 | if (!p) { | ||
688 | pr_err("Could not map Configuration table!\n"); | ||
689 | ret = -ENOMEM; | ||
690 | goto out_memremap; | ||
691 | } | ||
692 | |||
693 | for (i = 0; i < efi.systab->nr_tables; i++) { | ||
694 | efi_guid_t guid; | ||
695 | |||
696 | guid = ((efi_config_table_64_t *)p)->guid; | ||
697 | |||
698 | if (!efi_guidcmp(guid, SMBIOS_TABLE_GUID)) | ||
699 | ((efi_config_table_64_t *)p)->table = data->smbios; | ||
700 | p += sz; | ||
701 | } | ||
702 | early_iounmap(tablep, nr_tables * sz); | ||
703 | |||
704 | out_memremap: | ||
705 | early_iounmap(data, sizeof(*data)); | ||
706 | out: | ||
707 | return ret; | ||
708 | } | ||
709 | |||
629 | void __init efi_init(void) | 710 | void __init efi_init(void) |
630 | { | 711 | { |
631 | efi_char16_t *c16; | 712 | efi_char16_t *c16; |
@@ -649,7 +730,11 @@ void __init efi_init(void) | |||
649 | if (efi_systab_init(efi_phys.systab)) | 730 | if (efi_systab_init(efi_phys.systab)) |
650 | return; | 731 | return; |
651 | 732 | ||
652 | set_bit(EFI_SYSTEM_TABLES, &x86_efi_facility); | 733 | set_bit(EFI_SYSTEM_TABLES, &efi.flags); |
734 | |||
735 | efi.config_table = (unsigned long)efi.systab->tables; | ||
736 | efi.fw_vendor = (unsigned long)efi.systab->fw_vendor; | ||
737 | efi.runtime = (unsigned long)efi.systab->runtime; | ||
653 | 738 | ||
654 | /* | 739 | /* |
655 | * Show what we know for posterity | 740 | * Show what we know for posterity |
@@ -667,32 +752,29 @@ void __init efi_init(void) | |||
667 | efi.systab->hdr.revision >> 16, | 752 | efi.systab->hdr.revision >> 16, |
668 | efi.systab->hdr.revision & 0xffff, vendor); | 753 | efi.systab->hdr.revision & 0xffff, vendor); |
669 | 754 | ||
670 | if (efi_config_init(arch_tables)) | 755 | if (efi_reuse_config(efi.systab->tables, efi.systab->nr_tables)) |
671 | return; | 756 | return; |
672 | 757 | ||
673 | set_bit(EFI_CONFIG_TABLES, &x86_efi_facility); | 758 | if (efi_config_init(arch_tables)) |
759 | return; | ||
674 | 760 | ||
675 | /* | 761 | /* |
676 | * Note: We currently don't support runtime services on an EFI | 762 | * Note: We currently don't support runtime services on an EFI |
677 | * that doesn't match the kernel 32/64-bit mode. | 763 | * that doesn't match the kernel 32/64-bit mode. |
678 | */ | 764 | */ |
679 | 765 | ||
680 | if (!efi_is_native()) | 766 | if (!efi_runtime_supported()) |
681 | pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); | 767 | pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n"); |
682 | else { | 768 | else { |
683 | if (disable_runtime || efi_runtime_init()) | 769 | if (disable_runtime || efi_runtime_init()) |
684 | return; | 770 | return; |
685 | set_bit(EFI_RUNTIME_SERVICES, &x86_efi_facility); | ||
686 | } | 771 | } |
687 | |||
688 | if (efi_memmap_init()) | 772 | if (efi_memmap_init()) |
689 | return; | 773 | return; |
690 | 774 | ||
691 | set_bit(EFI_MEMMAP, &x86_efi_facility); | 775 | set_bit(EFI_MEMMAP, &efi.flags); |
692 | 776 | ||
693 | #if EFI_DEBUG | ||
694 | print_efi_memmap(); | 777 | print_efi_memmap(); |
695 | #endif | ||
696 | } | 778 | } |
697 | 779 | ||
698 | void __init efi_late_init(void) | 780 | void __init efi_late_init(void) |
@@ -715,7 +797,7 @@ void __init efi_set_executable(efi_memory_desc_t *md, bool executable) | |||
715 | set_memory_nx(addr, npages); | 797 | set_memory_nx(addr, npages); |
716 | } | 798 | } |
717 | 799 | ||
718 | static void __init runtime_code_page_mkexec(void) | 800 | void __init runtime_code_page_mkexec(void) |
719 | { | 801 | { |
720 | efi_memory_desc_t *md; | 802 | efi_memory_desc_t *md; |
721 | void *p; | 803 | void *p; |
@@ -741,36 +823,54 @@ void efi_memory_uc(u64 addr, unsigned long size) | |||
741 | set_memory_uc(addr, npages); | 823 | set_memory_uc(addr, npages); |
742 | } | 824 | } |
743 | 825 | ||
744 | /* | 826 | void __init old_map_region(efi_memory_desc_t *md) |
745 | * This function will switch the EFI runtime services to virtual mode. | ||
746 | * Essentially, look through the EFI memmap and map every region that | ||
747 | * has the runtime attribute bit set in its memory descriptor and update | ||
748 | * that memory descriptor with the virtual address obtained from ioremap(). | ||
749 | * This enables the runtime services to be called without having to | ||
750 | * thunk back into physical mode for every invocation. | ||
751 | */ | ||
752 | void __init efi_enter_virtual_mode(void) | ||
753 | { | 827 | { |
754 | efi_memory_desc_t *md, *prev_md = NULL; | 828 | u64 start_pfn, end_pfn, end; |
755 | efi_status_t status; | ||
756 | unsigned long size; | 829 | unsigned long size; |
757 | u64 end, systab, start_pfn, end_pfn; | 830 | void *va; |
758 | void *p, *va, *new_memmap = NULL; | ||
759 | int count = 0; | ||
760 | 831 | ||
761 | efi.systab = NULL; | 832 | start_pfn = PFN_DOWN(md->phys_addr); |
833 | size = md->num_pages << PAGE_SHIFT; | ||
834 | end = md->phys_addr + size; | ||
835 | end_pfn = PFN_UP(end); | ||
762 | 836 | ||
763 | /* | 837 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { |
764 | * We don't do virtual mode, since we don't do runtime services, on | 838 | va = __va(md->phys_addr); |
765 | * non-native EFI | ||
766 | */ | ||
767 | 839 | ||
768 | if (!efi_is_native()) { | 840 | if (!(md->attribute & EFI_MEMORY_WB)) |
769 | efi_unmap_memmap(); | 841 | efi_memory_uc((u64)(unsigned long)va, size); |
770 | return; | 842 | } else |
771 | } | 843 | va = efi_ioremap(md->phys_addr, size, |
844 | md->type, md->attribute); | ||
845 | |||
846 | md->virt_addr = (u64) (unsigned long) va; | ||
847 | if (!va) | ||
848 | pr_err("ioremap of 0x%llX failed!\n", | ||
849 | (unsigned long long)md->phys_addr); | ||
850 | } | ||
851 | |||
852 | static void native_runtime_setup(void) | ||
853 | { | ||
854 | efi.get_time = virt_efi_get_time; | ||
855 | efi.set_time = virt_efi_set_time; | ||
856 | efi.get_wakeup_time = virt_efi_get_wakeup_time; | ||
857 | efi.set_wakeup_time = virt_efi_set_wakeup_time; | ||
858 | efi.get_variable = virt_efi_get_variable; | ||
859 | efi.get_next_variable = virt_efi_get_next_variable; | ||
860 | efi.set_variable = virt_efi_set_variable; | ||
861 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; | ||
862 | efi.reset_system = virt_efi_reset_system; | ||
863 | efi.query_variable_info = virt_efi_query_variable_info; | ||
864 | efi.update_capsule = virt_efi_update_capsule; | ||
865 | efi.query_capsule_caps = virt_efi_query_capsule_caps; | ||
866 | } | ||
867 | |||
868 | /* Merge contiguous regions of the same type and attribute */ | ||
869 | static void __init efi_merge_regions(void) | ||
870 | { | ||
871 | void *p; | ||
872 | efi_memory_desc_t *md, *prev_md = NULL; | ||
772 | 873 | ||
773 | /* Merge contiguous regions of the same type and attribute */ | ||
774 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 874 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
775 | u64 prev_size; | 875 | u64 prev_size; |
776 | md = p; | 876 | md = p; |
@@ -796,6 +896,84 @@ void __init efi_enter_virtual_mode(void) | |||
796 | } | 896 | } |
797 | prev_md = md; | 897 | prev_md = md; |
798 | } | 898 | } |
899 | } | ||
900 | |||
901 | static void __init get_systab_virt_addr(efi_memory_desc_t *md) | ||
902 | { | ||
903 | unsigned long size; | ||
904 | u64 end, systab; | ||
905 | |||
906 | size = md->num_pages << EFI_PAGE_SHIFT; | ||
907 | end = md->phys_addr + size; | ||
908 | systab = (u64)(unsigned long)efi_phys.systab; | ||
909 | if (md->phys_addr <= systab && systab < end) { | ||
910 | systab += md->virt_addr - md->phys_addr; | ||
911 | efi.systab = (efi_system_table_t *)(unsigned long)systab; | ||
912 | } | ||
913 | } | ||
914 | |||
915 | static void __init save_runtime_map(void) | ||
916 | { | ||
917 | #ifdef CONFIG_KEXEC | ||
918 | efi_memory_desc_t *md; | ||
919 | void *tmp, *p, *q = NULL; | ||
920 | int count = 0; | ||
921 | |||
922 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
923 | md = p; | ||
924 | |||
925 | if (!(md->attribute & EFI_MEMORY_RUNTIME) || | ||
926 | (md->type == EFI_BOOT_SERVICES_CODE) || | ||
927 | (md->type == EFI_BOOT_SERVICES_DATA)) | ||
928 | continue; | ||
929 | tmp = krealloc(q, (count + 1) * memmap.desc_size, GFP_KERNEL); | ||
930 | if (!tmp) | ||
931 | goto out; | ||
932 | q = tmp; | ||
933 | |||
934 | memcpy(q + count * memmap.desc_size, md, memmap.desc_size); | ||
935 | count++; | ||
936 | } | ||
937 | |||
938 | efi_runtime_map_setup(q, count, memmap.desc_size); | ||
939 | return; | ||
940 | |||
941 | out: | ||
942 | kfree(q); | ||
943 | pr_err("Error saving runtime map, efi runtime on kexec non-functional!!\n"); | ||
944 | #endif | ||
945 | } | ||
946 | |||
947 | static void *realloc_pages(void *old_memmap, int old_shift) | ||
948 | { | ||
949 | void *ret; | ||
950 | |||
951 | ret = (void *)__get_free_pages(GFP_KERNEL, old_shift + 1); | ||
952 | if (!ret) | ||
953 | goto out; | ||
954 | |||
955 | /* | ||
956 | * A first-time allocation doesn't have anything to copy. | ||
957 | */ | ||
958 | if (!old_memmap) | ||
959 | return ret; | ||
960 | |||
961 | memcpy(ret, old_memmap, PAGE_SIZE << old_shift); | ||
962 | |||
963 | out: | ||
964 | free_pages((unsigned long)old_memmap, old_shift); | ||
965 | return ret; | ||
966 | } | ||
967 | |||
968 | /* | ||
969 | * Map the efi memory ranges of the runtime services and update new_mmap with | ||
970 | * virtual addresses. | ||
971 | */ | ||
972 | static void * __init efi_map_regions(int *count, int *pg_shift) | ||
973 | { | ||
974 | void *p, *new_memmap = NULL; | ||
975 | unsigned long left = 0; | ||
976 | efi_memory_desc_t *md; | ||
799 | 977 | ||
800 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | 978 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { |
801 | md = p; | 979 | md = p; |
@@ -807,52 +985,150 @@ void __init efi_enter_virtual_mode(void) | |||
807 | continue; | 985 | continue; |
808 | } | 986 | } |
809 | 987 | ||
810 | size = md->num_pages << EFI_PAGE_SHIFT; | 988 | efi_map_region(md); |
811 | end = md->phys_addr + size; | 989 | get_systab_virt_addr(md); |
812 | 990 | ||
813 | start_pfn = PFN_DOWN(md->phys_addr); | 991 | if (left < memmap.desc_size) { |
814 | end_pfn = PFN_UP(end); | 992 | new_memmap = realloc_pages(new_memmap, *pg_shift); |
815 | if (pfn_range_is_mapped(start_pfn, end_pfn)) { | 993 | if (!new_memmap) |
816 | va = __va(md->phys_addr); | 994 | return NULL; |
817 | 995 | ||
818 | if (!(md->attribute & EFI_MEMORY_WB)) | 996 | left += PAGE_SIZE << *pg_shift; |
819 | efi_memory_uc((u64)(unsigned long)va, size); | 997 | (*pg_shift)++; |
820 | } else | ||
821 | va = efi_ioremap(md->phys_addr, size, | ||
822 | md->type, md->attribute); | ||
823 | |||
824 | md->virt_addr = (u64) (unsigned long) va; | ||
825 | |||
826 | if (!va) { | ||
827 | pr_err("ioremap of 0x%llX failed!\n", | ||
828 | (unsigned long long)md->phys_addr); | ||
829 | continue; | ||
830 | } | 998 | } |
831 | 999 | ||
832 | systab = (u64) (unsigned long) efi_phys.systab; | 1000 | memcpy(new_memmap + (*count * memmap.desc_size), md, |
833 | if (md->phys_addr <= systab && systab < end) { | ||
834 | systab += md->virt_addr - md->phys_addr; | ||
835 | efi.systab = (efi_system_table_t *) (unsigned long) systab; | ||
836 | } | ||
837 | new_memmap = krealloc(new_memmap, | ||
838 | (count + 1) * memmap.desc_size, | ||
839 | GFP_KERNEL); | ||
840 | memcpy(new_memmap + (count * memmap.desc_size), md, | ||
841 | memmap.desc_size); | 1001 | memmap.desc_size); |
842 | count++; | 1002 | |
1003 | left -= memmap.desc_size; | ||
1004 | (*count)++; | ||
1005 | } | ||
1006 | |||
1007 | return new_memmap; | ||
1008 | } | ||
1009 | |||
1010 | static void __init kexec_enter_virtual_mode(void) | ||
1011 | { | ||
1012 | #ifdef CONFIG_KEXEC | ||
1013 | efi_memory_desc_t *md; | ||
1014 | void *p; | ||
1015 | |||
1016 | efi.systab = NULL; | ||
1017 | |||
1018 | /* | ||
1019 | * We don't do virtual mode, since we don't do runtime services, on | ||
1020 | * non-native EFI | ||
1021 | */ | ||
1022 | if (!efi_is_native()) { | ||
1023 | efi_unmap_memmap(); | ||
1024 | return; | ||
1025 | } | ||
1026 | |||
1027 | /* | ||
1028 | * Map efi regions which were passed via setup_data. The virt_addr is a | ||
1029 | * fixed addr which was used in first kernel of a kexec boot. | ||
1030 | */ | ||
1031 | for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { | ||
1032 | md = p; | ||
1033 | efi_map_region_fixed(md); /* FIXME: add error handling */ | ||
1034 | get_systab_virt_addr(md); | ||
1035 | } | ||
1036 | |||
1037 | save_runtime_map(); | ||
1038 | |||
1039 | BUG_ON(!efi.systab); | ||
1040 | |||
1041 | efi_sync_low_kernel_mappings(); | ||
1042 | |||
1043 | /* | ||
1044 | * Now that EFI is in virtual mode, update the function | ||
1045 | * pointers in the runtime service table to the new virtual addresses. | ||
1046 | * | ||
1047 | * Call EFI services through wrapper functions. | ||
1048 | */ | ||
1049 | efi.runtime_version = efi_systab.hdr.revision; | ||
1050 | |||
1051 | native_runtime_setup(); | ||
1052 | |||
1053 | efi.set_virtual_address_map = NULL; | ||
1054 | |||
1055 | if (efi_enabled(EFI_OLD_MEMMAP) && (__supported_pte_mask & _PAGE_NX)) | ||
1056 | runtime_code_page_mkexec(); | ||
1057 | |||
1058 | /* clean DUMMY object */ | ||
1059 | efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, | ||
1060 | EFI_VARIABLE_NON_VOLATILE | | ||
1061 | EFI_VARIABLE_BOOTSERVICE_ACCESS | | ||
1062 | EFI_VARIABLE_RUNTIME_ACCESS, | ||
1063 | 0, NULL); | ||
1064 | #endif | ||
1065 | } | ||
1066 | |||
1067 | /* | ||
1068 | * This function will switch the EFI runtime services to virtual mode. | ||
1069 | * Essentially, we look through the EFI memmap and map every region that | ||
1070 | * has the runtime attribute bit set in its memory descriptor into the | ||
1071 | * ->trampoline_pgd page table using a top-down VA allocation scheme. | ||
1072 | * | ||
1073 | * The old method which used to update that memory descriptor with the | ||
1074 | * virtual address obtained from ioremap() is still supported when the | ||
1075 | * kernel is booted with efi=old_map on its command line. Same old | ||
1076 | * method enabled the runtime services to be called without having to | ||
1077 | * thunk back into physical mode for every invocation. | ||
1078 | * | ||
1079 | * The new method does a pagetable switch in a preemption-safe manner | ||
1080 | * so that we're in a different address space when calling a runtime | ||
1081 | * function. For function arguments passing we do copy the PGDs of the | ||
1082 | * kernel page table into ->trampoline_pgd prior to each call. | ||
1083 | * | ||
1084 | * Specially for kexec boot, efi runtime maps in previous kernel should | ||
1085 | * be passed in via setup_data. In that case runtime ranges will be mapped | ||
1086 | * to the same virtual addresses as the first kernel, see | ||
1087 | * kexec_enter_virtual_mode(). | ||
1088 | */ | ||
1089 | static void __init __efi_enter_virtual_mode(void) | ||
1090 | { | ||
1091 | int count = 0, pg_shift = 0; | ||
1092 | void *new_memmap = NULL; | ||
1093 | efi_status_t status; | ||
1094 | |||
1095 | efi.systab = NULL; | ||
1096 | |||
1097 | efi_merge_regions(); | ||
1098 | new_memmap = efi_map_regions(&count, &pg_shift); | ||
1099 | if (!new_memmap) { | ||
1100 | pr_err("Error reallocating memory, EFI runtime non-functional!\n"); | ||
1101 | return; | ||
843 | } | 1102 | } |
844 | 1103 | ||
1104 | save_runtime_map(); | ||
1105 | |||
845 | BUG_ON(!efi.systab); | 1106 | BUG_ON(!efi.systab); |
846 | 1107 | ||
847 | status = phys_efi_set_virtual_address_map( | 1108 | if (efi_setup_page_tables(__pa(new_memmap), 1 << pg_shift)) |
848 | memmap.desc_size * count, | 1109 | return; |
849 | memmap.desc_size, | 1110 | |
850 | memmap.desc_version, | 1111 | efi_sync_low_kernel_mappings(); |
851 | (efi_memory_desc_t *)__pa(new_memmap)); | 1112 | efi_dump_pagetable(); |
1113 | |||
1114 | if (efi_is_native()) { | ||
1115 | status = phys_efi_set_virtual_address_map( | ||
1116 | memmap.desc_size * count, | ||
1117 | memmap.desc_size, | ||
1118 | memmap.desc_version, | ||
1119 | (efi_memory_desc_t *)__pa(new_memmap)); | ||
1120 | } else { | ||
1121 | status = efi_thunk_set_virtual_address_map( | ||
1122 | efi_phys.set_virtual_address_map, | ||
1123 | memmap.desc_size * count, | ||
1124 | memmap.desc_size, | ||
1125 | memmap.desc_version, | ||
1126 | (efi_memory_desc_t *)__pa(new_memmap)); | ||
1127 | } | ||
852 | 1128 | ||
853 | if (status != EFI_SUCCESS) { | 1129 | if (status != EFI_SUCCESS) { |
854 | pr_alert("Unable to switch EFI into virtual mode " | 1130 | pr_alert("Unable to switch EFI into virtual mode (status=%lx)!\n", |
855 | "(status=%lx)!\n", status); | 1131 | status); |
856 | panic("EFI call to SetVirtualAddressMap() failed!"); | 1132 | panic("EFI call to SetVirtualAddressMap() failed!"); |
857 | } | 1133 | } |
858 | 1134 | ||
@@ -863,23 +1139,43 @@ void __init efi_enter_virtual_mode(void) | |||
863 | * Call EFI services through wrapper functions. | 1139 | * Call EFI services through wrapper functions. |
864 | */ | 1140 | */ |
865 | efi.runtime_version = efi_systab.hdr.revision; | 1141 | efi.runtime_version = efi_systab.hdr.revision; |
866 | efi.get_time = virt_efi_get_time; | 1142 | |
867 | efi.set_time = virt_efi_set_time; | 1143 | if (efi_is_native()) |
868 | efi.get_wakeup_time = virt_efi_get_wakeup_time; | 1144 | native_runtime_setup(); |
869 | efi.set_wakeup_time = virt_efi_set_wakeup_time; | 1145 | else |
870 | efi.get_variable = virt_efi_get_variable; | 1146 | efi_thunk_runtime_setup(); |
871 | efi.get_next_variable = virt_efi_get_next_variable; | 1147 | |
872 | efi.set_variable = virt_efi_set_variable; | ||
873 | efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count; | ||
874 | efi.reset_system = virt_efi_reset_system; | ||
875 | efi.set_virtual_address_map = NULL; | 1148 | efi.set_virtual_address_map = NULL; |
876 | efi.query_variable_info = virt_efi_query_variable_info; | ||
877 | efi.update_capsule = virt_efi_update_capsule; | ||
878 | efi.query_capsule_caps = virt_efi_query_capsule_caps; | ||
879 | if (__supported_pte_mask & _PAGE_NX) | ||
880 | runtime_code_page_mkexec(); | ||
881 | 1149 | ||
882 | kfree(new_memmap); | 1150 | efi_runtime_mkexec(); |
1151 | |||
1152 | /* | ||
1153 | * We mapped the descriptor array into the EFI pagetable above but we're | ||
1154 | * not unmapping it here. Here's why: | ||
1155 | * | ||
1156 | * We're copying select PGDs from the kernel page table to the EFI page | ||
1157 | * table and when we do so and make changes to those PGDs like unmapping | ||
1158 | * stuff from them, those changes appear in the kernel page table and we | ||
1159 | * go boom. | ||
1160 | * | ||
1161 | * From setup_real_mode(): | ||
1162 | * | ||
1163 | * ... | ||
1164 | * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; | ||
1165 | * | ||
1166 | * In this particular case, our allocation is in PGD 0 of the EFI page | ||
1167 | * table but we've copied that PGD from PGD[272] of the EFI page table: | ||
1168 | * | ||
1169 | * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 | ||
1170 | * | ||
1171 | * where the direct memory mapping in kernel space is. | ||
1172 | * | ||
1173 | * new_memmap's VA comes from that direct mapping and thus clearing it, | ||
1174 | * it would get cleared in the kernel page table too. | ||
1175 | * | ||
1176 | * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); | ||
1177 | */ | ||
1178 | free_pages((unsigned long)new_memmap, pg_shift); | ||
883 | 1179 | ||
884 | /* clean DUMMY object */ | 1180 | /* clean DUMMY object */ |
885 | efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, | 1181 | efi.set_variable(efi_dummy_name, &EFI_DUMMY_GUID, |
@@ -889,6 +1185,14 @@ void __init efi_enter_virtual_mode(void) | |||
889 | 0, NULL); | 1185 | 0, NULL); |
890 | } | 1186 | } |
891 | 1187 | ||
1188 | void __init efi_enter_virtual_mode(void) | ||
1189 | { | ||
1190 | if (efi_setup) | ||
1191 | kexec_enter_virtual_mode(); | ||
1192 | else | ||
1193 | __efi_enter_virtual_mode(); | ||
1194 | } | ||
1195 | |||
892 | /* | 1196 | /* |
893 | * Convenience functions to obtain memory types and attributes | 1197 | * Convenience functions to obtain memory types and attributes |
894 | */ | 1198 | */ |
@@ -926,9 +1230,8 @@ u64 efi_mem_attributes(unsigned long phys_addr) | |||
926 | } | 1230 | } |
927 | 1231 | ||
928 | /* | 1232 | /* |
929 | * Some firmware has serious problems when using more than 50% of the EFI | 1233 | * Some firmware implementations refuse to boot if there's insufficient space |
930 | * variable store, i.e. it triggers bugs that can brick machines. Ensure that | 1234 | * in the variable store. Ensure that we never use more than a safe limit. |
931 | * we never use more than this safe limit. | ||
932 | * | 1235 | * |
933 | * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable | 1236 | * Return EFI_SUCCESS if it is safe to write 'size' bytes to the variable |
934 | * store. | 1237 | * store. |
@@ -947,10 +1250,9 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) | |||
947 | return status; | 1250 | return status; |
948 | 1251 | ||
949 | /* | 1252 | /* |
950 | * Some firmware implementations refuse to boot if there's insufficient | 1253 | * We account for that by refusing the write if permitting it would |
951 | * space in the variable store. We account for that by refusing the | 1254 | * reduce the available space to under 5KB. This figure was provided by |
952 | * write if permitting it would reduce the available space to under | 1255 | * Samsung, so should be safe. |
953 | * 5KB. This figure was provided by Samsung, so should be safe. | ||
954 | */ | 1256 | */ |
955 | if ((remaining_size - size < EFI_MIN_RESERVE) && | 1257 | if ((remaining_size - size < EFI_MIN_RESERVE) && |
956 | !efi_no_storage_paranoia) { | 1258 | !efi_no_storage_paranoia) { |
@@ -1006,3 +1308,34 @@ efi_status_t efi_query_variable_store(u32 attributes, unsigned long size) | |||
1006 | return EFI_SUCCESS; | 1308 | return EFI_SUCCESS; |
1007 | } | 1309 | } |
1008 | EXPORT_SYMBOL_GPL(efi_query_variable_store); | 1310 | EXPORT_SYMBOL_GPL(efi_query_variable_store); |
1311 | |||
1312 | static int __init parse_efi_cmdline(char *str) | ||
1313 | { | ||
1314 | if (*str == '=') | ||
1315 | str++; | ||
1316 | |||
1317 | if (!strncmp(str, "old_map", 7)) | ||
1318 | set_bit(EFI_OLD_MEMMAP, &efi.flags); | ||
1319 | |||
1320 | return 0; | ||
1321 | } | ||
1322 | early_param("efi", parse_efi_cmdline); | ||
1323 | |||
1324 | void __init efi_apply_memmap_quirks(void) | ||
1325 | { | ||
1326 | /* | ||
1327 | * Once setup is done earlier, unmap the EFI memory map on mismatched | ||
1328 | * firmware/kernel architectures since there is no support for runtime | ||
1329 | * services. | ||
1330 | */ | ||
1331 | if (!efi_runtime_supported()) { | ||
1332 | pr_info("efi: Setup done, disabling due to 32/64-bit mismatch\n"); | ||
1333 | efi_unmap_memmap(); | ||
1334 | } | ||
1335 | |||
1336 | /* | ||
1337 | * UV doesn't support the new EFI pagetable mapping yet. | ||
1338 | */ | ||
1339 | if (is_uv_system()) | ||
1340 | set_bit(EFI_OLD_MEMMAP, &efi.flags); | ||
1341 | } | ||
diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c index 40e446941dd7..9ee3491e31fb 100644 --- a/arch/x86/platform/efi/efi_32.c +++ b/arch/x86/platform/efi/efi_32.c | |||
@@ -37,9 +37,24 @@ | |||
37 | * claim EFI runtime service handler exclusively and to duplicate a memory in | 37 | * claim EFI runtime service handler exclusively and to duplicate a memory in |
38 | * low memory space say 0 - 3G. | 38 | * low memory space say 0 - 3G. |
39 | */ | 39 | */ |
40 | |||
41 | static unsigned long efi_rt_eflags; | 40 | static unsigned long efi_rt_eflags; |
42 | 41 | ||
42 | void efi_sync_low_kernel_mappings(void) {} | ||
43 | void __init efi_dump_pagetable(void) {} | ||
44 | int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | ||
45 | { | ||
46 | return 0; | ||
47 | } | ||
48 | void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) {} | ||
49 | |||
50 | void __init efi_map_region(efi_memory_desc_t *md) | ||
51 | { | ||
52 | old_map_region(md); | ||
53 | } | ||
54 | |||
55 | void __init efi_map_region_fixed(efi_memory_desc_t *md) {} | ||
56 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) {} | ||
57 | |||
43 | void efi_call_phys_prelog(void) | 58 | void efi_call_phys_prelog(void) |
44 | { | 59 | { |
45 | struct desc_ptr gdt_descr; | 60 | struct desc_ptr gdt_descr; |
@@ -67,3 +82,9 @@ void efi_call_phys_epilog(void) | |||
67 | 82 | ||
68 | local_irq_restore(efi_rt_eflags); | 83 | local_irq_restore(efi_rt_eflags); |
69 | } | 84 | } |
85 | |||
86 | void __init efi_runtime_mkexec(void) | ||
87 | { | ||
88 | if (__supported_pte_mask & _PAGE_NX) | ||
89 | runtime_code_page_mkexec(); | ||
90 | } | ||
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 39a0e7f1f0a3..290d397e1dd9 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c | |||
@@ -38,10 +38,30 @@ | |||
38 | #include <asm/efi.h> | 38 | #include <asm/efi.h> |
39 | #include <asm/cacheflush.h> | 39 | #include <asm/cacheflush.h> |
40 | #include <asm/fixmap.h> | 40 | #include <asm/fixmap.h> |
41 | #include <asm/realmode.h> | ||
42 | #include <asm/time.h> | ||
41 | 43 | ||
42 | static pgd_t *save_pgd __initdata; | 44 | static pgd_t *save_pgd __initdata; |
43 | static unsigned long efi_flags __initdata; | 45 | static unsigned long efi_flags __initdata; |
44 | 46 | ||
47 | /* | ||
48 | * We allocate runtime services regions bottom-up, starting from -4G, i.e. | ||
49 | * 0xffff_ffff_0000_0000 and limit EFI VA mapping space to 64G. | ||
50 | */ | ||
51 | static u64 efi_va = -4 * (1UL << 30); | ||
52 | #define EFI_VA_END (-68 * (1UL << 30)) | ||
53 | |||
54 | /* | ||
55 | * Scratch space used for switching the pagetable in the EFI stub | ||
56 | */ | ||
57 | struct efi_scratch { | ||
58 | u64 r15; | ||
59 | u64 prev_cr3; | ||
60 | pgd_t *efi_pgt; | ||
61 | bool use_pgd; | ||
62 | u64 phys_stack; | ||
63 | } __packed; | ||
64 | |||
45 | static void __init early_code_mapping_set_exec(int executable) | 65 | static void __init early_code_mapping_set_exec(int executable) |
46 | { | 66 | { |
47 | efi_memory_desc_t *md; | 67 | efi_memory_desc_t *md; |
@@ -65,6 +85,9 @@ void __init efi_call_phys_prelog(void) | |||
65 | int pgd; | 85 | int pgd; |
66 | int n_pgds; | 86 | int n_pgds; |
67 | 87 | ||
88 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
89 | return; | ||
90 | |||
68 | early_code_mapping_set_exec(1); | 91 | early_code_mapping_set_exec(1); |
69 | local_irq_save(efi_flags); | 92 | local_irq_save(efi_flags); |
70 | 93 | ||
@@ -86,6 +109,10 @@ void __init efi_call_phys_epilog(void) | |||
86 | */ | 109 | */ |
87 | int pgd; | 110 | int pgd; |
88 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); | 111 | int n_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); |
112 | |||
113 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
114 | return; | ||
115 | |||
89 | for (pgd = 0; pgd < n_pgds; pgd++) | 116 | for (pgd = 0; pgd < n_pgds; pgd++) |
90 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); | 117 | set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), save_pgd[pgd]); |
91 | kfree(save_pgd); | 118 | kfree(save_pgd); |
@@ -94,6 +121,158 @@ void __init efi_call_phys_epilog(void) | |||
94 | early_code_mapping_set_exec(0); | 121 | early_code_mapping_set_exec(0); |
95 | } | 122 | } |
96 | 123 | ||
124 | /* | ||
125 | * Add low kernel mappings for passing arguments to EFI functions. | ||
126 | */ | ||
127 | void efi_sync_low_kernel_mappings(void) | ||
128 | { | ||
129 | unsigned num_pgds; | ||
130 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
131 | |||
132 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
133 | return; | ||
134 | |||
135 | num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); | ||
136 | |||
137 | memcpy(pgd + pgd_index(PAGE_OFFSET), | ||
138 | init_mm.pgd + pgd_index(PAGE_OFFSET), | ||
139 | sizeof(pgd_t) * num_pgds); | ||
140 | } | ||
141 | |||
142 | int efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) | ||
143 | { | ||
144 | unsigned long text; | ||
145 | struct page *page; | ||
146 | unsigned npages; | ||
147 | pgd_t *pgd; | ||
148 | |||
149 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
150 | return 0; | ||
151 | |||
152 | efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; | ||
153 | pgd = __va(efi_scratch.efi_pgt); | ||
154 | |||
155 | /* | ||
156 | * It can happen that the physical address of new_memmap lands in memory | ||
157 | * which is not mapped in the EFI page table. Therefore we need to go | ||
158 | * and ident-map those pages containing the map before calling | ||
159 | * phys_efi_set_virtual_address_map(). | ||
160 | */ | ||
161 | if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { | ||
162 | pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); | ||
163 | return 1; | ||
164 | } | ||
165 | |||
166 | efi_scratch.use_pgd = true; | ||
167 | |||
168 | /* | ||
169 | * When making calls to the firmware everything needs to be 1:1 | ||
170 | * mapped and addressable with 32-bit pointers. Map the kernel | ||
171 | * text and allocate a new stack because we can't rely on the | ||
172 | * stack pointer being < 4GB. | ||
173 | */ | ||
174 | if (!IS_ENABLED(CONFIG_EFI_MIXED)) | ||
175 | return 0; | ||
176 | |||
177 | page = alloc_page(GFP_KERNEL|__GFP_DMA32); | ||
178 | if (!page) | ||
179 | panic("Unable to allocate EFI runtime stack < 4GB\n"); | ||
180 | |||
181 | efi_scratch.phys_stack = virt_to_phys(page_address(page)); | ||
182 | efi_scratch.phys_stack += PAGE_SIZE; /* stack grows down */ | ||
183 | |||
184 | npages = (_end - _text) >> PAGE_SHIFT; | ||
185 | text = __pa(_text); | ||
186 | |||
187 | if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { | ||
188 | pr_err("Failed to map kernel text 1:1\n"); | ||
189 | return 1; | ||
190 | } | ||
191 | |||
192 | return 0; | ||
193 | } | ||
194 | |||
195 | void efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) | ||
196 | { | ||
197 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
198 | |||
199 | kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); | ||
200 | } | ||
201 | |||
202 | static void __init __map_region(efi_memory_desc_t *md, u64 va) | ||
203 | { | ||
204 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
205 | unsigned long pf = 0; | ||
206 | |||
207 | if (!(md->attribute & EFI_MEMORY_WB)) | ||
208 | pf |= _PAGE_PCD; | ||
209 | |||
210 | if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) | ||
211 | pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", | ||
212 | md->phys_addr, va); | ||
213 | } | ||
214 | |||
215 | void __init efi_map_region(efi_memory_desc_t *md) | ||
216 | { | ||
217 | unsigned long size = md->num_pages << PAGE_SHIFT; | ||
218 | u64 pa = md->phys_addr; | ||
219 | |||
220 | if (efi_enabled(EFI_OLD_MEMMAP)) | ||
221 | return old_map_region(md); | ||
222 | |||
223 | /* | ||
224 | * Make sure the 1:1 mappings are present as a catch-all for b0rked | ||
225 | * firmware which doesn't update all internal pointers after switching | ||
226 | * to virtual mode and would otherwise crap on us. | ||
227 | */ | ||
228 | __map_region(md, md->phys_addr); | ||
229 | |||
230 | /* | ||
231 | * Enforce the 1:1 mapping as the default virtual address when | ||
232 | * booting in EFI mixed mode, because even though we may be | ||
233 | * running a 64-bit kernel, the firmware may only be 32-bit. | ||
234 | */ | ||
235 | if (!efi_is_native () && IS_ENABLED(CONFIG_EFI_MIXED)) { | ||
236 | md->virt_addr = md->phys_addr; | ||
237 | return; | ||
238 | } | ||
239 | |||
240 | efi_va -= size; | ||
241 | |||
242 | /* Is PA 2M-aligned? */ | ||
243 | if (!(pa & (PMD_SIZE - 1))) { | ||
244 | efi_va &= PMD_MASK; | ||
245 | } else { | ||
246 | u64 pa_offset = pa & (PMD_SIZE - 1); | ||
247 | u64 prev_va = efi_va; | ||
248 | |||
249 | /* get us the same offset within this 2M page */ | ||
250 | efi_va = (efi_va & PMD_MASK) + pa_offset; | ||
251 | |||
252 | if (efi_va > prev_va) | ||
253 | efi_va -= PMD_SIZE; | ||
254 | } | ||
255 | |||
256 | if (efi_va < EFI_VA_END) { | ||
257 | pr_warn(FW_WARN "VA address range overflow!\n"); | ||
258 | return; | ||
259 | } | ||
260 | |||
261 | /* Do the VA map */ | ||
262 | __map_region(md, efi_va); | ||
263 | md->virt_addr = efi_va; | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * kexec kernel will use efi_map_region_fixed to map efi runtime memory ranges. | ||
268 | * md->virt_addr is the original virtual address which had been mapped in kexec | ||
269 | * 1st kernel. | ||
270 | */ | ||
271 | void __init efi_map_region_fixed(efi_memory_desc_t *md) | ||
272 | { | ||
273 | __map_region(md, md->virt_addr); | ||
274 | } | ||
275 | |||
97 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | 276 | void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, |
98 | u32 type, u64 attribute) | 277 | u32 type, u64 attribute) |
99 | { | 278 | { |
@@ -113,3 +292,313 @@ void __iomem *__init efi_ioremap(unsigned long phys_addr, unsigned long size, | |||
113 | 292 | ||
114 | return (void __iomem *)__va(phys_addr); | 293 | return (void __iomem *)__va(phys_addr); |
115 | } | 294 | } |
295 | |||
296 | void __init parse_efi_setup(u64 phys_addr, u32 data_len) | ||
297 | { | ||
298 | efi_setup = phys_addr + sizeof(struct setup_data); | ||
299 | } | ||
300 | |||
301 | void __init efi_runtime_mkexec(void) | ||
302 | { | ||
303 | if (!efi_enabled(EFI_OLD_MEMMAP)) | ||
304 | return; | ||
305 | |||
306 | if (__supported_pte_mask & _PAGE_NX) | ||
307 | runtime_code_page_mkexec(); | ||
308 | } | ||
309 | |||
310 | void __init efi_dump_pagetable(void) | ||
311 | { | ||
312 | #ifdef CONFIG_EFI_PGT_DUMP | ||
313 | pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); | ||
314 | |||
315 | ptdump_walk_pgd_level(NULL, pgd); | ||
316 | #endif | ||
317 | } | ||
318 | |||
319 | #ifdef CONFIG_EFI_MIXED | ||
320 | extern efi_status_t efi64_thunk(u32, ...); | ||
321 | |||
322 | #define runtime_service32(func) \ | ||
323 | ({ \ | ||
324 | u32 table = (u32)(unsigned long)efi.systab; \ | ||
325 | u32 *rt, *___f; \ | ||
326 | \ | ||
327 | rt = (u32 *)(table + offsetof(efi_system_table_32_t, runtime)); \ | ||
328 | ___f = (u32 *)(*rt + offsetof(efi_runtime_services_32_t, func)); \ | ||
329 | *___f; \ | ||
330 | }) | ||
331 | |||
332 | /* | ||
333 | * Switch to the EFI page tables early so that we can access the 1:1 | ||
334 | * runtime services mappings which are not mapped in any other page | ||
335 | * tables. This function must be called before runtime_service32(). | ||
336 | * | ||
337 | * Also, disable interrupts because the IDT points to 64-bit handlers, | ||
338 | * which aren't going to function correctly when we switch to 32-bit. | ||
339 | */ | ||
340 | #define efi_thunk(f, ...) \ | ||
341 | ({ \ | ||
342 | efi_status_t __s; \ | ||
343 | unsigned long flags; \ | ||
344 | u32 func; \ | ||
345 | \ | ||
346 | efi_sync_low_kernel_mappings(); \ | ||
347 | local_irq_save(flags); \ | ||
348 | \ | ||
349 | efi_scratch.prev_cr3 = read_cr3(); \ | ||
350 | write_cr3((unsigned long)efi_scratch.efi_pgt); \ | ||
351 | __flush_tlb_all(); \ | ||
352 | \ | ||
353 | func = runtime_service32(f); \ | ||
354 | __s = efi64_thunk(func, __VA_ARGS__); \ | ||
355 | \ | ||
356 | write_cr3(efi_scratch.prev_cr3); \ | ||
357 | __flush_tlb_all(); \ | ||
358 | local_irq_restore(flags); \ | ||
359 | \ | ||
360 | __s; \ | ||
361 | }) | ||
362 | |||
363 | efi_status_t efi_thunk_set_virtual_address_map( | ||
364 | void *phys_set_virtual_address_map, | ||
365 | unsigned long memory_map_size, | ||
366 | unsigned long descriptor_size, | ||
367 | u32 descriptor_version, | ||
368 | efi_memory_desc_t *virtual_map) | ||
369 | { | ||
370 | efi_status_t status; | ||
371 | unsigned long flags; | ||
372 | u32 func; | ||
373 | |||
374 | efi_sync_low_kernel_mappings(); | ||
375 | local_irq_save(flags); | ||
376 | |||
377 | efi_scratch.prev_cr3 = read_cr3(); | ||
378 | write_cr3((unsigned long)efi_scratch.efi_pgt); | ||
379 | __flush_tlb_all(); | ||
380 | |||
381 | func = (u32)(unsigned long)phys_set_virtual_address_map; | ||
382 | status = efi64_thunk(func, memory_map_size, descriptor_size, | ||
383 | descriptor_version, virtual_map); | ||
384 | |||
385 | write_cr3(efi_scratch.prev_cr3); | ||
386 | __flush_tlb_all(); | ||
387 | local_irq_restore(flags); | ||
388 | |||
389 | return status; | ||
390 | } | ||
391 | |||
392 | static efi_status_t efi_thunk_get_time(efi_time_t *tm, efi_time_cap_t *tc) | ||
393 | { | ||
394 | efi_status_t status; | ||
395 | u32 phys_tm, phys_tc; | ||
396 | |||
397 | spin_lock(&rtc_lock); | ||
398 | |||
399 | phys_tm = virt_to_phys(tm); | ||
400 | phys_tc = virt_to_phys(tc); | ||
401 | |||
402 | status = efi_thunk(get_time, phys_tm, phys_tc); | ||
403 | |||
404 | spin_unlock(&rtc_lock); | ||
405 | |||
406 | return status; | ||
407 | } | ||
408 | |||
409 | static efi_status_t efi_thunk_set_time(efi_time_t *tm) | ||
410 | { | ||
411 | efi_status_t status; | ||
412 | u32 phys_tm; | ||
413 | |||
414 | spin_lock(&rtc_lock); | ||
415 | |||
416 | phys_tm = virt_to_phys(tm); | ||
417 | |||
418 | status = efi_thunk(set_time, phys_tm); | ||
419 | |||
420 | spin_unlock(&rtc_lock); | ||
421 | |||
422 | return status; | ||
423 | } | ||
424 | |||
425 | static efi_status_t | ||
426 | efi_thunk_get_wakeup_time(efi_bool_t *enabled, efi_bool_t *pending, | ||
427 | efi_time_t *tm) | ||
428 | { | ||
429 | efi_status_t status; | ||
430 | u32 phys_enabled, phys_pending, phys_tm; | ||
431 | |||
432 | spin_lock(&rtc_lock); | ||
433 | |||
434 | phys_enabled = virt_to_phys(enabled); | ||
435 | phys_pending = virt_to_phys(pending); | ||
436 | phys_tm = virt_to_phys(tm); | ||
437 | |||
438 | status = efi_thunk(get_wakeup_time, phys_enabled, | ||
439 | phys_pending, phys_tm); | ||
440 | |||
441 | spin_unlock(&rtc_lock); | ||
442 | |||
443 | return status; | ||
444 | } | ||
445 | |||
446 | static efi_status_t | ||
447 | efi_thunk_set_wakeup_time(efi_bool_t enabled, efi_time_t *tm) | ||
448 | { | ||
449 | efi_status_t status; | ||
450 | u32 phys_tm; | ||
451 | |||
452 | spin_lock(&rtc_lock); | ||
453 | |||
454 | phys_tm = virt_to_phys(tm); | ||
455 | |||
456 | status = efi_thunk(set_wakeup_time, enabled, phys_tm); | ||
457 | |||
458 | spin_unlock(&rtc_lock); | ||
459 | |||
460 | return status; | ||
461 | } | ||
462 | |||
463 | |||
464 | static efi_status_t | ||
465 | efi_thunk_get_variable(efi_char16_t *name, efi_guid_t *vendor, | ||
466 | u32 *attr, unsigned long *data_size, void *data) | ||
467 | { | ||
468 | efi_status_t status; | ||
469 | u32 phys_name, phys_vendor, phys_attr; | ||
470 | u32 phys_data_size, phys_data; | ||
471 | |||
472 | phys_data_size = virt_to_phys(data_size); | ||
473 | phys_vendor = virt_to_phys(vendor); | ||
474 | phys_name = virt_to_phys(name); | ||
475 | phys_attr = virt_to_phys(attr); | ||
476 | phys_data = virt_to_phys(data); | ||
477 | |||
478 | status = efi_thunk(get_variable, phys_name, phys_vendor, | ||
479 | phys_attr, phys_data_size, phys_data); | ||
480 | |||
481 | return status; | ||
482 | } | ||
483 | |||
484 | static efi_status_t | ||
485 | efi_thunk_set_variable(efi_char16_t *name, efi_guid_t *vendor, | ||
486 | u32 attr, unsigned long data_size, void *data) | ||
487 | { | ||
488 | u32 phys_name, phys_vendor, phys_data; | ||
489 | efi_status_t status; | ||
490 | |||
491 | phys_name = virt_to_phys(name); | ||
492 | phys_vendor = virt_to_phys(vendor); | ||
493 | phys_data = virt_to_phys(data); | ||
494 | |||
495 | /* If data_size is > sizeof(u32) we've got problems */ | ||
496 | status = efi_thunk(set_variable, phys_name, phys_vendor, | ||
497 | attr, data_size, phys_data); | ||
498 | |||
499 | return status; | ||
500 | } | ||
501 | |||
502 | static efi_status_t | ||
503 | efi_thunk_get_next_variable(unsigned long *name_size, | ||
504 | efi_char16_t *name, | ||
505 | efi_guid_t *vendor) | ||
506 | { | ||
507 | efi_status_t status; | ||
508 | u32 phys_name_size, phys_name, phys_vendor; | ||
509 | |||
510 | phys_name_size = virt_to_phys(name_size); | ||
511 | phys_vendor = virt_to_phys(vendor); | ||
512 | phys_name = virt_to_phys(name); | ||
513 | |||
514 | status = efi_thunk(get_next_variable, phys_name_size, | ||
515 | phys_name, phys_vendor); | ||
516 | |||
517 | return status; | ||
518 | } | ||
519 | |||
520 | static efi_status_t | ||
521 | efi_thunk_get_next_high_mono_count(u32 *count) | ||
522 | { | ||
523 | efi_status_t status; | ||
524 | u32 phys_count; | ||
525 | |||
526 | phys_count = virt_to_phys(count); | ||
527 | status = efi_thunk(get_next_high_mono_count, phys_count); | ||
528 | |||
529 | return status; | ||
530 | } | ||
531 | |||
532 | static void | ||
533 | efi_thunk_reset_system(int reset_type, efi_status_t status, | ||
534 | unsigned long data_size, efi_char16_t *data) | ||
535 | { | ||
536 | u32 phys_data; | ||
537 | |||
538 | phys_data = virt_to_phys(data); | ||
539 | |||
540 | efi_thunk(reset_system, reset_type, status, data_size, phys_data); | ||
541 | } | ||
542 | |||
543 | static efi_status_t | ||
544 | efi_thunk_update_capsule(efi_capsule_header_t **capsules, | ||
545 | unsigned long count, unsigned long sg_list) | ||
546 | { | ||
547 | /* | ||
548 | * To properly support this function we would need to repackage | ||
549 | * 'capsules' because the firmware doesn't understand 64-bit | ||
550 | * pointers. | ||
551 | */ | ||
552 | return EFI_UNSUPPORTED; | ||
553 | } | ||
554 | |||
555 | static efi_status_t | ||
556 | efi_thunk_query_variable_info(u32 attr, u64 *storage_space, | ||
557 | u64 *remaining_space, | ||
558 | u64 *max_variable_size) | ||
559 | { | ||
560 | efi_status_t status; | ||
561 | u32 phys_storage, phys_remaining, phys_max; | ||
562 | |||
563 | if (efi.runtime_version < EFI_2_00_SYSTEM_TABLE_REVISION) | ||
564 | return EFI_UNSUPPORTED; | ||
565 | |||
566 | phys_storage = virt_to_phys(storage_space); | ||
567 | phys_remaining = virt_to_phys(remaining_space); | ||
568 | phys_max = virt_to_phys(max_variable_size); | ||
569 | |||
570 | status = efi_thunk(query_variable_info, attr, phys_storage, | ||
571 | phys_remaining, phys_max); | ||
572 | |||
573 | return status; | ||
574 | } | ||
575 | |||
576 | static efi_status_t | ||
577 | efi_thunk_query_capsule_caps(efi_capsule_header_t **capsules, | ||
578 | unsigned long count, u64 *max_size, | ||
579 | int *reset_type) | ||
580 | { | ||
581 | /* | ||
582 | * To properly support this function we would need to repackage | ||
583 | * 'capsules' because the firmware doesn't understand 64-bit | ||
584 | * pointers. | ||
585 | */ | ||
586 | return EFI_UNSUPPORTED; | ||
587 | } | ||
588 | |||
589 | void efi_thunk_runtime_setup(void) | ||
590 | { | ||
591 | efi.get_time = efi_thunk_get_time; | ||
592 | efi.set_time = efi_thunk_set_time; | ||
593 | efi.get_wakeup_time = efi_thunk_get_wakeup_time; | ||
594 | efi.set_wakeup_time = efi_thunk_set_wakeup_time; | ||
595 | efi.get_variable = efi_thunk_get_variable; | ||
596 | efi.get_next_variable = efi_thunk_get_next_variable; | ||
597 | efi.set_variable = efi_thunk_set_variable; | ||
598 | efi.get_next_high_mono_count = efi_thunk_get_next_high_mono_count; | ||
599 | efi.reset_system = efi_thunk_reset_system; | ||
600 | efi.query_variable_info = efi_thunk_query_variable_info; | ||
601 | efi.update_capsule = efi_thunk_update_capsule; | ||
602 | efi.query_capsule_caps = efi_thunk_query_capsule_caps; | ||
603 | } | ||
604 | #endif /* CONFIG_EFI_MIXED */ | ||
diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 4c07ccab8146..e0984ef0374b 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S | |||
@@ -7,6 +7,10 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/segment.h> | ||
11 | #include <asm/msr.h> | ||
12 | #include <asm/processor-flags.h> | ||
13 | #include <asm/page_types.h> | ||
10 | 14 | ||
11 | #define SAVE_XMM \ | 15 | #define SAVE_XMM \ |
12 | mov %rsp, %rax; \ | 16 | mov %rsp, %rax; \ |
@@ -34,10 +38,47 @@ | |||
34 | mov %rsi, %cr0; \ | 38 | mov %rsi, %cr0; \ |
35 | mov (%rsp), %rsp | 39 | mov (%rsp), %rsp |
36 | 40 | ||
41 | /* stolen from gcc */ | ||
42 | .macro FLUSH_TLB_ALL | ||
43 | movq %r15, efi_scratch(%rip) | ||
44 | movq %r14, efi_scratch+8(%rip) | ||
45 | movq %cr4, %r15 | ||
46 | movq %r15, %r14 | ||
47 | andb $0x7f, %r14b | ||
48 | movq %r14, %cr4 | ||
49 | movq %r15, %cr4 | ||
50 | movq efi_scratch+8(%rip), %r14 | ||
51 | movq efi_scratch(%rip), %r15 | ||
52 | .endm | ||
53 | |||
54 | .macro SWITCH_PGT | ||
55 | cmpb $0, efi_scratch+24(%rip) | ||
56 | je 1f | ||
57 | movq %r15, efi_scratch(%rip) # r15 | ||
58 | # save previous CR3 | ||
59 | movq %cr3, %r15 | ||
60 | movq %r15, efi_scratch+8(%rip) # prev_cr3 | ||
61 | movq efi_scratch+16(%rip), %r15 # EFI pgt | ||
62 | movq %r15, %cr3 | ||
63 | 1: | ||
64 | .endm | ||
65 | |||
66 | .macro RESTORE_PGT | ||
67 | cmpb $0, efi_scratch+24(%rip) | ||
68 | je 2f | ||
69 | movq efi_scratch+8(%rip), %r15 | ||
70 | movq %r15, %cr3 | ||
71 | movq efi_scratch(%rip), %r15 | ||
72 | FLUSH_TLB_ALL | ||
73 | 2: | ||
74 | .endm | ||
75 | |||
37 | ENTRY(efi_call0) | 76 | ENTRY(efi_call0) |
38 | SAVE_XMM | 77 | SAVE_XMM |
39 | subq $32, %rsp | 78 | subq $32, %rsp |
79 | SWITCH_PGT | ||
40 | call *%rdi | 80 | call *%rdi |
81 | RESTORE_PGT | ||
41 | addq $32, %rsp | 82 | addq $32, %rsp |
42 | RESTORE_XMM | 83 | RESTORE_XMM |
43 | ret | 84 | ret |
@@ -47,7 +88,9 @@ ENTRY(efi_call1) | |||
47 | SAVE_XMM | 88 | SAVE_XMM |
48 | subq $32, %rsp | 89 | subq $32, %rsp |
49 | mov %rsi, %rcx | 90 | mov %rsi, %rcx |
91 | SWITCH_PGT | ||
50 | call *%rdi | 92 | call *%rdi |
93 | RESTORE_PGT | ||
51 | addq $32, %rsp | 94 | addq $32, %rsp |
52 | RESTORE_XMM | 95 | RESTORE_XMM |
53 | ret | 96 | ret |
@@ -57,7 +100,9 @@ ENTRY(efi_call2) | |||
57 | SAVE_XMM | 100 | SAVE_XMM |
58 | subq $32, %rsp | 101 | subq $32, %rsp |
59 | mov %rsi, %rcx | 102 | mov %rsi, %rcx |
103 | SWITCH_PGT | ||
60 | call *%rdi | 104 | call *%rdi |
105 | RESTORE_PGT | ||
61 | addq $32, %rsp | 106 | addq $32, %rsp |
62 | RESTORE_XMM | 107 | RESTORE_XMM |
63 | ret | 108 | ret |
@@ -68,7 +113,9 @@ ENTRY(efi_call3) | |||
68 | subq $32, %rsp | 113 | subq $32, %rsp |
69 | mov %rcx, %r8 | 114 | mov %rcx, %r8 |
70 | mov %rsi, %rcx | 115 | mov %rsi, %rcx |
116 | SWITCH_PGT | ||
71 | call *%rdi | 117 | call *%rdi |
118 | RESTORE_PGT | ||
72 | addq $32, %rsp | 119 | addq $32, %rsp |
73 | RESTORE_XMM | 120 | RESTORE_XMM |
74 | ret | 121 | ret |
@@ -80,7 +127,9 @@ ENTRY(efi_call4) | |||
80 | mov %r8, %r9 | 127 | mov %r8, %r9 |
81 | mov %rcx, %r8 | 128 | mov %rcx, %r8 |
82 | mov %rsi, %rcx | 129 | mov %rsi, %rcx |
130 | SWITCH_PGT | ||
83 | call *%rdi | 131 | call *%rdi |
132 | RESTORE_PGT | ||
84 | addq $32, %rsp | 133 | addq $32, %rsp |
85 | RESTORE_XMM | 134 | RESTORE_XMM |
86 | ret | 135 | ret |
@@ -93,7 +142,9 @@ ENTRY(efi_call5) | |||
93 | mov %r8, %r9 | 142 | mov %r8, %r9 |
94 | mov %rcx, %r8 | 143 | mov %rcx, %r8 |
95 | mov %rsi, %rcx | 144 | mov %rsi, %rcx |
145 | SWITCH_PGT | ||
96 | call *%rdi | 146 | call *%rdi |
147 | RESTORE_PGT | ||
97 | addq $48, %rsp | 148 | addq $48, %rsp |
98 | RESTORE_XMM | 149 | RESTORE_XMM |
99 | ret | 150 | ret |
@@ -109,8 +160,177 @@ ENTRY(efi_call6) | |||
109 | mov %r8, %r9 | 160 | mov %r8, %r9 |
110 | mov %rcx, %r8 | 161 | mov %rcx, %r8 |
111 | mov %rsi, %rcx | 162 | mov %rsi, %rcx |
163 | SWITCH_PGT | ||
112 | call *%rdi | 164 | call *%rdi |
165 | RESTORE_PGT | ||
113 | addq $48, %rsp | 166 | addq $48, %rsp |
114 | RESTORE_XMM | 167 | RESTORE_XMM |
115 | ret | 168 | ret |
116 | ENDPROC(efi_call6) | 169 | ENDPROC(efi_call6) |
170 | |||
171 | #ifdef CONFIG_EFI_MIXED | ||
172 | |||
173 | /* | ||
174 | * We run this function from the 1:1 mapping. | ||
175 | * | ||
176 | * This function must be invoked with a 1:1 mapped stack. | ||
177 | */ | ||
178 | ENTRY(__efi64_thunk) | ||
179 | movl %ds, %eax | ||
180 | push %rax | ||
181 | movl %es, %eax | ||
182 | push %rax | ||
183 | movl %ss, %eax | ||
184 | push %rax | ||
185 | |||
186 | subq $32, %rsp | ||
187 | movl %esi, 0x0(%rsp) | ||
188 | movl %edx, 0x4(%rsp) | ||
189 | movl %ecx, 0x8(%rsp) | ||
190 | movq %r8, %rsi | ||
191 | movl %esi, 0xc(%rsp) | ||
192 | movq %r9, %rsi | ||
193 | movl %esi, 0x10(%rsp) | ||
194 | |||
195 | sgdt save_gdt(%rip) | ||
196 | |||
197 | leaq 1f(%rip), %rbx | ||
198 | movq %rbx, func_rt_ptr(%rip) | ||
199 | |||
200 | /* Switch to gdt with 32-bit segments */ | ||
201 | movl 64(%rsp), %eax | ||
202 | lgdt (%rax) | ||
203 | |||
204 | leaq efi_enter32(%rip), %rax | ||
205 | pushq $__KERNEL_CS | ||
206 | pushq %rax | ||
207 | lretq | ||
208 | |||
209 | 1: addq $32, %rsp | ||
210 | |||
211 | lgdt save_gdt(%rip) | ||
212 | |||
213 | pop %rbx | ||
214 | movl %ebx, %ss | ||
215 | pop %rbx | ||
216 | movl %ebx, %es | ||
217 | pop %rbx | ||
218 | movl %ebx, %ds | ||
219 | |||
220 | /* | ||
221 | * Convert 32-bit status code into 64-bit. | ||
222 | */ | ||
223 | test %rax, %rax | ||
224 | jz 1f | ||
225 | movl %eax, %ecx | ||
226 | andl $0x0fffffff, %ecx | ||
227 | andl $0xf0000000, %eax | ||
228 | shl $32, %rax | ||
229 | or %rcx, %rax | ||
230 | 1: | ||
231 | ret | ||
232 | ENDPROC(__efi64_thunk) | ||
233 | |||
234 | ENTRY(efi_exit32) | ||
235 | movq func_rt_ptr(%rip), %rax | ||
236 | push %rax | ||
237 | mov %rdi, %rax | ||
238 | ret | ||
239 | ENDPROC(efi_exit32) | ||
240 | |||
241 | .code32 | ||
242 | /* | ||
243 | * EFI service pointer must be in %edi. | ||
244 | * | ||
245 | * The stack should represent the 32-bit calling convention. | ||
246 | */ | ||
247 | ENTRY(efi_enter32) | ||
248 | movl $__KERNEL_DS, %eax | ||
249 | movl %eax, %ds | ||
250 | movl %eax, %es | ||
251 | movl %eax, %ss | ||
252 | |||
253 | /* Reload pgtables */ | ||
254 | movl %cr3, %eax | ||
255 | movl %eax, %cr3 | ||
256 | |||
257 | /* Disable paging */ | ||
258 | movl %cr0, %eax | ||
259 | btrl $X86_CR0_PG_BIT, %eax | ||
260 | movl %eax, %cr0 | ||
261 | |||
262 | /* Disable long mode via EFER */ | ||
263 | movl $MSR_EFER, %ecx | ||
264 | rdmsr | ||
265 | btrl $_EFER_LME, %eax | ||
266 | wrmsr | ||
267 | |||
268 | call *%edi | ||
269 | |||
270 | /* We must preserve return value */ | ||
271 | movl %eax, %edi | ||
272 | |||
273 | /* | ||
274 | * Some firmware will return with interrupts enabled. Be sure to | ||
275 | * disable them before we switch GDTs. | ||
276 | */ | ||
277 | cli | ||
278 | |||
279 | movl 68(%esp), %eax | ||
280 | movl %eax, 2(%eax) | ||
281 | lgdtl (%eax) | ||
282 | |||
283 | movl %cr4, %eax | ||
284 | btsl $(X86_CR4_PAE_BIT), %eax | ||
285 | movl %eax, %cr4 | ||
286 | |||
287 | movl %cr3, %eax | ||
288 | movl %eax, %cr3 | ||
289 | |||
290 | movl $MSR_EFER, %ecx | ||
291 | rdmsr | ||
292 | btsl $_EFER_LME, %eax | ||
293 | wrmsr | ||
294 | |||
295 | xorl %eax, %eax | ||
296 | lldt %ax | ||
297 | |||
298 | movl 72(%esp), %eax | ||
299 | pushl $__KERNEL_CS | ||
300 | pushl %eax | ||
301 | |||
302 | /* Enable paging */ | ||
303 | movl %cr0, %eax | ||
304 | btsl $X86_CR0_PG_BIT, %eax | ||
305 | movl %eax, %cr0 | ||
306 | lret | ||
307 | ENDPROC(efi_enter32) | ||
308 | |||
309 | .data | ||
310 | .balign 8 | ||
311 | .global efi32_boot_gdt | ||
312 | efi32_boot_gdt: .word 0 | ||
313 | .quad 0 | ||
314 | |||
315 | save_gdt: .word 0 | ||
316 | .quad 0 | ||
317 | func_rt_ptr: .quad 0 | ||
318 | |||
319 | .global efi_gdt64 | ||
320 | efi_gdt64: | ||
321 | .word efi_gdt64_end - efi_gdt64 | ||
322 | .long 0 /* Filled out by user */ | ||
323 | .word 0 | ||
324 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
325 | .quad 0x00af9a000000ffff /* __KERNEL_CS */ | ||
326 | .quad 0x00cf92000000ffff /* __KERNEL_DS */ | ||
327 | .quad 0x0080890000000000 /* TS descriptor */ | ||
328 | .quad 0x0000000000000000 /* TS continued */ | ||
329 | efi_gdt64_end: | ||
330 | #endif /* CONFIG_EFI_MIXED */ | ||
331 | |||
332 | .data | ||
333 | ENTRY(efi_scratch) | ||
334 | .fill 3,8,0 | ||
335 | .byte 0 | ||
336 | .quad 0 | ||
diff --git a/arch/x86/platform/efi/efi_thunk_64.S b/arch/x86/platform/efi/efi_thunk_64.S new file mode 100644 index 000000000000..8806fa73e6e6 --- /dev/null +++ b/arch/x86/platform/efi/efi_thunk_64.S | |||
@@ -0,0 +1,65 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2014 Intel Corporation; author Matt Fleming | ||
3 | */ | ||
4 | |||
5 | #include <linux/linkage.h> | ||
6 | #include <asm/page_types.h> | ||
7 | |||
8 | .text | ||
9 | .code64 | ||
10 | ENTRY(efi64_thunk) | ||
11 | push %rbp | ||
12 | push %rbx | ||
13 | |||
14 | /* | ||
15 | * Switch to 1:1 mapped 32-bit stack pointer. | ||
16 | */ | ||
17 | movq %rsp, efi_saved_sp(%rip) | ||
18 | movq efi_scratch+25(%rip), %rsp | ||
19 | |||
20 | /* | ||
21 | * Calculate the physical address of the kernel text. | ||
22 | */ | ||
23 | movq $__START_KERNEL_map, %rax | ||
24 | subq phys_base(%rip), %rax | ||
25 | |||
26 | /* | ||
27 | * Push some physical addresses onto the stack. This is easier | ||
28 | * to do now in a code64 section while the assembler can address | ||
29 | * 64-bit values. Note that all the addresses on the stack are | ||
30 | * 32-bit. | ||
31 | */ | ||
32 | subq $16, %rsp | ||
33 | leaq efi_exit32(%rip), %rbx | ||
34 | subq %rax, %rbx | ||
35 | movl %ebx, 8(%rsp) | ||
36 | leaq efi_gdt64(%rip), %rbx | ||
37 | subq %rax, %rbx | ||
38 | movl %ebx, 2(%ebx) | ||
39 | movl %ebx, 4(%rsp) | ||
40 | leaq efi_gdt32(%rip), %rbx | ||
41 | subq %rax, %rbx | ||
42 | movl %ebx, 2(%ebx) | ||
43 | movl %ebx, (%rsp) | ||
44 | |||
45 | leaq __efi64_thunk(%rip), %rbx | ||
46 | subq %rax, %rbx | ||
47 | call *%rbx | ||
48 | |||
49 | movq efi_saved_sp(%rip), %rsp | ||
50 | pop %rbx | ||
51 | pop %rbp | ||
52 | retq | ||
53 | ENDPROC(efi64_thunk) | ||
54 | |||
55 | .data | ||
56 | efi_gdt32: | ||
57 | .word efi_gdt32_end - efi_gdt32 | ||
58 | .long 0 /* Filled out above */ | ||
59 | .word 0 | ||
60 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
61 | .quad 0x00cf9a000000ffff /* __KERNEL_CS */ | ||
62 | .quad 0x00cf93000000ffff /* __KERNEL_DS */ | ||
63 | efi_gdt32_end: | ||
64 | |||
65 | efi_saved_sp: .quad 0 | ||
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile index 01cc29ea5ff7..0a8ee703b9fa 100644 --- a/arch/x86/platform/intel-mid/Makefile +++ b/arch/x86/platform/intel-mid/Makefile | |||
@@ -1,6 +1,6 @@ | |||
1 | obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o | 1 | obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o |
2 | obj-$(CONFIG_X86_INTEL_MID) += intel_mid_vrtc.o | ||
3 | obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o | 2 | obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o |
3 | |||
4 | # SFI specific code | 4 | # SFI specific code |
5 | ifdef CONFIG_X86_INTEL_MID | 5 | ifdef CONFIG_X86_INTEL_MID |
6 | obj-$(CONFIG_SFI) += sfi.o device_libs/ | 6 | obj-$(CONFIG_SFI) += sfi.o device_libs/ |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c index 0d942c1d26d5..69a783689d21 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c | |||
@@ -22,7 +22,9 @@ static void __init *emc1403_platform_data(void *info) | |||
22 | int intr = get_gpio_by_name("thermal_int"); | 22 | int intr = get_gpio_by_name("thermal_int"); |
23 | int intr2nd = get_gpio_by_name("thermal_alert"); | 23 | int intr2nd = get_gpio_by_name("thermal_alert"); |
24 | 24 | ||
25 | if (intr == -1 || intr2nd == -1) | 25 | if (intr < 0) |
26 | return NULL; | ||
27 | if (intr2nd < 0) | ||
26 | return NULL; | 28 | return NULL; |
27 | 29 | ||
28 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; | 30 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c index a013a4834bbe..dccae6b0413f 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c | |||
@@ -66,7 +66,7 @@ static int __init pb_keys_init(void) | |||
66 | gb[i].gpio = get_gpio_by_name(gb[i].desc); | 66 | gb[i].gpio = get_gpio_by_name(gb[i].desc); |
67 | pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, | 67 | pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc, |
68 | gb[i].gpio); | 68 | gb[i].gpio); |
69 | if (gb[i].gpio == -1) | 69 | if (gb[i].gpio < 0) |
70 | continue; | 70 | continue; |
71 | 71 | ||
72 | if (i != good) | 72 | if (i != good) |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h index 8f568dd79605..79bb09d4f718 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h +++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h | |||
@@ -12,6 +12,7 @@ | |||
12 | #ifndef _PLATFORM_IPC_H_ | 12 | #ifndef _PLATFORM_IPC_H_ |
13 | #define _PLATFORM_IPC_H_ | 13 | #define _PLATFORM_IPC_H_ |
14 | 14 | ||
15 | extern void __init ipc_device_handler(struct sfi_device_table_entry *pentry, | 15 | void __init |
16 | struct devs_id *dev) __attribute__((weak)); | 16 | ipc_device_handler(struct sfi_device_table_entry *pentry, struct devs_id *dev); |
17 | |||
17 | #endif | 18 | #endif |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c index 15278c11f714..54226de7541a 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c | |||
@@ -21,7 +21,9 @@ static void __init *lis331dl_platform_data(void *info) | |||
21 | int intr = get_gpio_by_name("accel_int"); | 21 | int intr = get_gpio_by_name("accel_int"); |
22 | int intr2nd = get_gpio_by_name("accel_2"); | 22 | int intr2nd = get_gpio_by_name("accel_2"); |
23 | 23 | ||
24 | if (intr == -1 || intr2nd == -1) | 24 | if (intr < 0) |
25 | return NULL; | ||
26 | if (intr2nd < 0) | ||
25 | return NULL; | 27 | return NULL; |
26 | 28 | ||
27 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; | 29 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c index 94ade10024ae..2c8acbc1e9ad 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c | |||
@@ -48,7 +48,7 @@ static void __init *max7315_platform_data(void *info) | |||
48 | gpio_base = get_gpio_by_name(base_pin_name); | 48 | gpio_base = get_gpio_by_name(base_pin_name); |
49 | intr = get_gpio_by_name(intr_pin_name); | 49 | intr = get_gpio_by_name(intr_pin_name); |
50 | 50 | ||
51 | if (gpio_base == -1) | 51 | if (gpio_base < 0) |
52 | return NULL; | 52 | return NULL; |
53 | max7315->gpio_base = gpio_base; | 53 | max7315->gpio_base = gpio_base; |
54 | if (intr != -1) { | 54 | if (intr != -1) { |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c index dd28d63c84fb..cfe9a47a1e87 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c | |||
@@ -19,7 +19,7 @@ static void *mpu3050_platform_data(void *info) | |||
19 | struct i2c_board_info *i2c_info = info; | 19 | struct i2c_board_info *i2c_info = info; |
20 | int intr = get_gpio_by_name("mpu3050_int"); | 20 | int intr = get_gpio_by_name("mpu3050_int"); |
21 | 21 | ||
22 | if (intr == -1) | 22 | if (intr < 0) |
23 | return NULL; | 23 | return NULL; |
24 | 24 | ||
25 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; | 25 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.h b/arch/x86/platform/intel-mid/device_libs/platform_msic.h index 917eb56d77da..b7be1d041da2 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic.h +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic.h | |||
@@ -14,6 +14,6 @@ | |||
14 | 14 | ||
15 | extern struct intel_msic_platform_data msic_pdata; | 15 | extern struct intel_msic_platform_data msic_pdata; |
16 | 16 | ||
17 | extern void *msic_generic_platform_data(void *info, | 17 | void *msic_generic_platform_data(void *info, enum intel_msic_block block); |
18 | enum intel_msic_block block) __attribute__((weak)); | 18 | |
19 | #endif | 19 | #endif |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c index d87182a09263..65c2a9a19db4 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c | |||
@@ -26,7 +26,7 @@ static void __init *pmic_gpio_platform_data(void *info) | |||
26 | static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; | 26 | static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; |
27 | int gpio_base = get_gpio_by_name("pmic_gpio_base"); | 27 | int gpio_base = get_gpio_by_name("pmic_gpio_base"); |
28 | 28 | ||
29 | if (gpio_base == -1) | 29 | if (gpio_base < 0) |
30 | gpio_base = 64; | 30 | gpio_base = 64; |
31 | pmic_gpio_pdata.gpio_base = gpio_base; | 31 | pmic_gpio_pdata.gpio_base = gpio_base; |
32 | pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; | 32 | pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; |
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c index 22881c9a6737..33be0b3be6e1 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c | |||
@@ -34,10 +34,10 @@ static void *tca6416_platform_data(void *info) | |||
34 | gpio_base = get_gpio_by_name(base_pin_name); | 34 | gpio_base = get_gpio_by_name(base_pin_name); |
35 | intr = get_gpio_by_name(intr_pin_name); | 35 | intr = get_gpio_by_name(intr_pin_name); |
36 | 36 | ||
37 | if (gpio_base == -1) | 37 | if (gpio_base < 0) |
38 | return NULL; | 38 | return NULL; |
39 | tca6416.gpio_base = gpio_base; | 39 | tca6416.gpio_base = gpio_base; |
40 | if (intr != -1) { | 40 | if (intr >= 0) { |
41 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; | 41 | i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET; |
42 | tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; | 42 | tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; |
43 | } else { | 43 | } else { |
diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c index 4f702f554f6e..e0bd082a80e0 100644 --- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c +++ b/arch/x86/platform/intel-mid/early_printk_intel_mid.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/console.h> | 22 | #include <linux/console.h> |
23 | #include <linux/kernel.h> | 23 | #include <linux/kernel.h> |
24 | #include <linux/delay.h> | 24 | #include <linux/delay.h> |
25 | #include <linux/init.h> | ||
26 | #include <linux/io.h> | 25 | #include <linux/io.h> |
27 | 26 | ||
28 | #include <asm/fixmap.h> | 27 | #include <asm/fixmap.h> |
diff --git a/arch/x86/platform/intel-mid/intel-mid.c b/arch/x86/platform/intel-mid/intel-mid.c index f90e290f689f..1bbedc4b0f88 100644 --- a/arch/x86/platform/intel-mid/intel-mid.c +++ b/arch/x86/platform/intel-mid/intel-mid.c | |||
@@ -35,6 +35,8 @@ | |||
35 | #include <asm/apb_timer.h> | 35 | #include <asm/apb_timer.h> |
36 | #include <asm/reboot.h> | 36 | #include <asm/reboot.h> |
37 | 37 | ||
38 | #include "intel_mid_weak_decls.h" | ||
39 | |||
38 | /* | 40 | /* |
39 | * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, | 41 | * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock, |
40 | * cmdline option x86_intel_mid_timer can be used to override the configuration | 42 | * cmdline option x86_intel_mid_timer can be used to override the configuration |
@@ -58,12 +60,16 @@ | |||
58 | 60 | ||
59 | enum intel_mid_timer_options intel_mid_timer_options; | 61 | enum intel_mid_timer_options intel_mid_timer_options; |
60 | 62 | ||
63 | /* intel_mid_ops to store sub arch ops */ | ||
64 | struct intel_mid_ops *intel_mid_ops; | ||
65 | /* getter function for sub arch ops*/ | ||
66 | static void *(*get_intel_mid_ops[])(void) = INTEL_MID_OPS_INIT; | ||
61 | enum intel_mid_cpu_type __intel_mid_cpu_chip; | 67 | enum intel_mid_cpu_type __intel_mid_cpu_chip; |
62 | EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); | 68 | EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip); |
63 | 69 | ||
64 | static void intel_mid_power_off(void) | 70 | static void intel_mid_power_off(void) |
65 | { | 71 | { |
66 | } | 72 | }; |
67 | 73 | ||
68 | static void intel_mid_reboot(void) | 74 | static void intel_mid_reboot(void) |
69 | { | 75 | { |
@@ -72,32 +78,6 @@ static void intel_mid_reboot(void) | |||
72 | 78 | ||
73 | static unsigned long __init intel_mid_calibrate_tsc(void) | 79 | static unsigned long __init intel_mid_calibrate_tsc(void) |
74 | { | 80 | { |
75 | unsigned long fast_calibrate; | ||
76 | u32 lo, hi, ratio, fsb; | ||
77 | |||
78 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
79 | pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); | ||
80 | ratio = (hi >> 8) & 0x1f; | ||
81 | pr_debug("ratio is %d\n", ratio); | ||
82 | if (!ratio) { | ||
83 | pr_err("read a zero ratio, should be incorrect!\n"); | ||
84 | pr_err("force tsc ratio to 16 ...\n"); | ||
85 | ratio = 16; | ||
86 | } | ||
87 | rdmsr(MSR_FSB_FREQ, lo, hi); | ||
88 | if ((lo & 0x7) == 0x7) | ||
89 | fsb = PENWELL_FSB_FREQ_83SKU; | ||
90 | else | ||
91 | fsb = PENWELL_FSB_FREQ_100SKU; | ||
92 | fast_calibrate = ratio * fsb; | ||
93 | pr_debug("read penwell tsc %lu khz\n", fast_calibrate); | ||
94 | lapic_timer_frequency = fsb * 1000 / HZ; | ||
95 | /* mark tsc clocksource as reliable */ | ||
96 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); | ||
97 | |||
98 | if (fast_calibrate) | ||
99 | return fast_calibrate; | ||
100 | |||
101 | return 0; | 81 | return 0; |
102 | } | 82 | } |
103 | 83 | ||
@@ -125,13 +105,37 @@ static void __init intel_mid_time_init(void) | |||
125 | 105 | ||
126 | static void intel_mid_arch_setup(void) | 106 | static void intel_mid_arch_setup(void) |
127 | { | 107 | { |
128 | if (boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 0x27) | 108 | if (boot_cpu_data.x86 != 6) { |
129 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; | ||
130 | else { | ||
131 | pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n", | 109 | pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n", |
132 | boot_cpu_data.x86, boot_cpu_data.x86_model); | 110 | boot_cpu_data.x86, boot_cpu_data.x86_model); |
133 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; | 111 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; |
112 | goto out; | ||
134 | } | 113 | } |
114 | |||
115 | switch (boot_cpu_data.x86_model) { | ||
116 | case 0x35: | ||
117 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_CLOVERVIEW; | ||
118 | break; | ||
119 | case 0x3C: | ||
120 | case 0x4A: | ||
121 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER; | ||
122 | break; | ||
123 | case 0x27: | ||
124 | default: | ||
125 | __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL; | ||
126 | break; | ||
127 | } | ||
128 | |||
129 | if (__intel_mid_cpu_chip < MAX_CPU_OPS(get_intel_mid_ops)) | ||
130 | intel_mid_ops = get_intel_mid_ops[__intel_mid_cpu_chip](); | ||
131 | else { | ||
132 | intel_mid_ops = get_intel_mid_ops[INTEL_MID_CPU_CHIP_PENWELL](); | ||
133 | pr_info("ARCH: Uknown SoC, assuming PENWELL!\n"); | ||
134 | } | ||
135 | |||
136 | out: | ||
137 | if (intel_mid_ops->arch_setup) | ||
138 | intel_mid_ops->arch_setup(); | ||
135 | } | 139 | } |
136 | 140 | ||
137 | /* MID systems don't have i8042 controller */ | 141 | /* MID systems don't have i8042 controller */ |
diff --git a/arch/x86/platform/intel-mid/intel_mid_weak_decls.h b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h new file mode 100644 index 000000000000..46aa25c8ce06 --- /dev/null +++ b/arch/x86/platform/intel-mid/intel_mid_weak_decls.h | |||
@@ -0,0 +1,19 @@ | |||
1 | /* | ||
2 | * intel_mid_weak_decls.h: Weak declarations of intel-mid.c | ||
3 | * | ||
4 | * (C) Copyright 2013 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | */ | ||
11 | |||
12 | |||
13 | /* __attribute__((weak)) makes these declarations overridable */ | ||
14 | /* For every CPU addition a new get_<cpuname>_ops interface needs | ||
15 | * to be added. | ||
16 | */ | ||
17 | extern void *get_penwell_ops(void) __attribute__((weak)); | ||
18 | extern void *get_cloverview_ops(void) __attribute__((weak)); | ||
19 | extern void *get_tangier_ops(void) __attribute__((weak)); | ||
diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c new file mode 100644 index 000000000000..23381d2174ae --- /dev/null +++ b/arch/x86/platform/intel-mid/mfld.c | |||
@@ -0,0 +1,75 @@ | |||
1 | /* | ||
2 | * mfld.c: Intel Medfield platform setup code | ||
3 | * | ||
4 | * (C) Copyright 2013 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | |||
14 | #include <asm/apic.h> | ||
15 | #include <asm/intel-mid.h> | ||
16 | #include <asm/intel_mid_vrtc.h> | ||
17 | |||
18 | #include "intel_mid_weak_decls.h" | ||
19 | |||
20 | static void penwell_arch_setup(void); | ||
21 | /* penwell arch ops */ | ||
22 | static struct intel_mid_ops penwell_ops = { | ||
23 | .arch_setup = penwell_arch_setup, | ||
24 | }; | ||
25 | |||
26 | static void mfld_power_off(void) | ||
27 | { | ||
28 | } | ||
29 | |||
30 | static unsigned long __init mfld_calibrate_tsc(void) | ||
31 | { | ||
32 | unsigned long fast_calibrate; | ||
33 | u32 lo, hi, ratio, fsb; | ||
34 | |||
35 | rdmsr(MSR_IA32_PERF_STATUS, lo, hi); | ||
36 | pr_debug("IA32 perf status is 0x%x, 0x%0x\n", lo, hi); | ||
37 | ratio = (hi >> 8) & 0x1f; | ||
38 | pr_debug("ratio is %d\n", ratio); | ||
39 | if (!ratio) { | ||
40 | pr_err("read a zero ratio, should be incorrect!\n"); | ||
41 | pr_err("force tsc ratio to 16 ...\n"); | ||
42 | ratio = 16; | ||
43 | } | ||
44 | rdmsr(MSR_FSB_FREQ, lo, hi); | ||
45 | if ((lo & 0x7) == 0x7) | ||
46 | fsb = FSB_FREQ_83SKU; | ||
47 | else | ||
48 | fsb = FSB_FREQ_100SKU; | ||
49 | fast_calibrate = ratio * fsb; | ||
50 | pr_debug("read penwell tsc %lu khz\n", fast_calibrate); | ||
51 | lapic_timer_frequency = fsb * 1000 / HZ; | ||
52 | /* mark tsc clocksource as reliable */ | ||
53 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); | ||
54 | |||
55 | if (fast_calibrate) | ||
56 | return fast_calibrate; | ||
57 | |||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | static void __init penwell_arch_setup(void) | ||
62 | { | ||
63 | x86_platform.calibrate_tsc = mfld_calibrate_tsc; | ||
64 | pm_power_off = mfld_power_off; | ||
65 | } | ||
66 | |||
67 | void *get_penwell_ops(void) | ||
68 | { | ||
69 | return &penwell_ops; | ||
70 | } | ||
71 | |||
72 | void *get_cloverview_ops(void) | ||
73 | { | ||
74 | return &penwell_ops; | ||
75 | } | ||
diff --git a/arch/x86/platform/intel-mid/mrfl.c b/arch/x86/platform/intel-mid/mrfl.c new file mode 100644 index 000000000000..aaca91753d32 --- /dev/null +++ b/arch/x86/platform/intel-mid/mrfl.c | |||
@@ -0,0 +1,103 @@ | |||
1 | /* | ||
2 | * mrfl.c: Intel Merrifield platform specific setup code | ||
3 | * | ||
4 | * (C) Copyright 2013 Intel Corporation | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or | ||
7 | * modify it under the terms of the GNU General Public License | ||
8 | * as published by the Free Software Foundation; version 2 | ||
9 | * of the License. | ||
10 | */ | ||
11 | |||
12 | #include <linux/init.h> | ||
13 | |||
14 | #include <asm/apic.h> | ||
15 | #include <asm/intel-mid.h> | ||
16 | |||
17 | #include "intel_mid_weak_decls.h" | ||
18 | |||
19 | static unsigned long __init tangier_calibrate_tsc(void) | ||
20 | { | ||
21 | unsigned long fast_calibrate; | ||
22 | u32 lo, hi, ratio, fsb, bus_freq; | ||
23 | |||
24 | /* *********************** */ | ||
25 | /* Compute TSC:Ratio * FSB */ | ||
26 | /* *********************** */ | ||
27 | |||
28 | /* Compute Ratio */ | ||
29 | rdmsr(MSR_PLATFORM_INFO, lo, hi); | ||
30 | pr_debug("IA32 PLATFORM_INFO is 0x%x : %x\n", hi, lo); | ||
31 | |||
32 | ratio = (lo >> 8) & 0xFF; | ||
33 | pr_debug("ratio is %d\n", ratio); | ||
34 | if (!ratio) { | ||
35 | pr_err("Read a zero ratio, force tsc ratio to 4 ...\n"); | ||
36 | ratio = 4; | ||
37 | } | ||
38 | |||
39 | /* Compute FSB */ | ||
40 | rdmsr(MSR_FSB_FREQ, lo, hi); | ||
41 | pr_debug("Actual FSB frequency detected by SOC 0x%x : %x\n", | ||
42 | hi, lo); | ||
43 | |||
44 | bus_freq = lo & 0x7; | ||
45 | pr_debug("bus_freq = 0x%x\n", bus_freq); | ||
46 | |||
47 | if (bus_freq == 0) | ||
48 | fsb = FSB_FREQ_100SKU; | ||
49 | else if (bus_freq == 1) | ||
50 | fsb = FSB_FREQ_100SKU; | ||
51 | else if (bus_freq == 2) | ||
52 | fsb = FSB_FREQ_133SKU; | ||
53 | else if (bus_freq == 3) | ||
54 | fsb = FSB_FREQ_167SKU; | ||
55 | else if (bus_freq == 4) | ||
56 | fsb = FSB_FREQ_83SKU; | ||
57 | else if (bus_freq == 5) | ||
58 | fsb = FSB_FREQ_400SKU; | ||
59 | else if (bus_freq == 6) | ||
60 | fsb = FSB_FREQ_267SKU; | ||
61 | else if (bus_freq == 7) | ||
62 | fsb = FSB_FREQ_333SKU; | ||
63 | else { | ||
64 | BUG(); | ||
65 | pr_err("Invalid bus_freq! Setting to minimal value!\n"); | ||
66 | fsb = FSB_FREQ_100SKU; | ||
67 | } | ||
68 | |||
69 | /* TSC = FSB Freq * Resolved HFM Ratio */ | ||
70 | fast_calibrate = ratio * fsb; | ||
71 | pr_debug("calculate tangier tsc %lu KHz\n", fast_calibrate); | ||
72 | |||
73 | /* ************************************ */ | ||
74 | /* Calculate Local APIC Timer Frequency */ | ||
75 | /* ************************************ */ | ||
76 | lapic_timer_frequency = (fsb * 1000) / HZ; | ||
77 | |||
78 | pr_debug("Setting lapic_timer_frequency = %d\n", | ||
79 | lapic_timer_frequency); | ||
80 | |||
81 | /* mark tsc clocksource as reliable */ | ||
82 | set_cpu_cap(&boot_cpu_data, X86_FEATURE_TSC_RELIABLE); | ||
83 | |||
84 | if (fast_calibrate) | ||
85 | return fast_calibrate; | ||
86 | |||
87 | return 0; | ||
88 | } | ||
89 | |||
90 | static void __init tangier_arch_setup(void) | ||
91 | { | ||
92 | x86_platform.calibrate_tsc = tangier_calibrate_tsc; | ||
93 | } | ||
94 | |||
95 | /* tangier arch ops */ | ||
96 | static struct intel_mid_ops tangier_ops = { | ||
97 | .arch_setup = tangier_arch_setup, | ||
98 | }; | ||
99 | |||
100 | void *get_tangier_ops(void) | ||
101 | { | ||
102 | return &tangier_ops; | ||
103 | } | ||
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index c84c1ca396bf..994c40bd7cb7 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c | |||
@@ -224,7 +224,7 @@ int get_gpio_by_name(const char *name) | |||
224 | if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) | 224 | if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN)) |
225 | return pentry->pin_no; | 225 | return pentry->pin_no; |
226 | } | 226 | } |
227 | return -1; | 227 | return -EINVAL; |
228 | } | 228 | } |
229 | 229 | ||
230 | void __init intel_scu_device_register(struct platform_device *pdev) | 230 | void __init intel_scu_device_register(struct platform_device *pdev) |
@@ -250,7 +250,7 @@ static void __init intel_scu_spi_device_register(struct spi_board_info *sdev) | |||
250 | sdev->modalias); | 250 | sdev->modalias); |
251 | return; | 251 | return; |
252 | } | 252 | } |
253 | memcpy(new_dev, sdev, sizeof(*sdev)); | 253 | *new_dev = *sdev; |
254 | 254 | ||
255 | spi_devs[spi_next_dev++] = new_dev; | 255 | spi_devs[spi_next_dev++] = new_dev; |
256 | } | 256 | } |
@@ -271,7 +271,7 @@ static void __init intel_scu_i2c_device_register(int bus, | |||
271 | idev->type); | 271 | idev->type); |
272 | return; | 272 | return; |
273 | } | 273 | } |
274 | memcpy(new_dev, idev, sizeof(*idev)); | 274 | *new_dev = *idev; |
275 | 275 | ||
276 | i2c_bus[i2c_next_dev] = bus; | 276 | i2c_bus[i2c_next_dev] = bus; |
277 | i2c_devs[i2c_next_dev++] = new_dev; | 277 | i2c_devs[i2c_next_dev++] = new_dev; |
@@ -337,6 +337,8 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, | |||
337 | pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", | 337 | pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", |
338 | pentry->name, pentry->irq); | 338 | pentry->name, pentry->irq); |
339 | pdata = intel_mid_sfi_get_pdata(dev, pentry); | 339 | pdata = intel_mid_sfi_get_pdata(dev, pentry); |
340 | if (IS_ERR(pdata)) | ||
341 | return; | ||
340 | 342 | ||
341 | pdev = platform_device_alloc(pentry->name, 0); | 343 | pdev = platform_device_alloc(pentry->name, 0); |
342 | if (pdev == NULL) { | 344 | if (pdev == NULL) { |
@@ -370,6 +372,8 @@ static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry, | |||
370 | spi_info.chip_select); | 372 | spi_info.chip_select); |
371 | 373 | ||
372 | pdata = intel_mid_sfi_get_pdata(dev, &spi_info); | 374 | pdata = intel_mid_sfi_get_pdata(dev, &spi_info); |
375 | if (IS_ERR(pdata)) | ||
376 | return; | ||
373 | 377 | ||
374 | spi_info.platform_data = pdata; | 378 | spi_info.platform_data = pdata; |
375 | if (dev->delay) | 379 | if (dev->delay) |
@@ -395,6 +399,8 @@ static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry, | |||
395 | i2c_info.addr); | 399 | i2c_info.addr); |
396 | pdata = intel_mid_sfi_get_pdata(dev, &i2c_info); | 400 | pdata = intel_mid_sfi_get_pdata(dev, &i2c_info); |
397 | i2c_info.platform_data = pdata; | 401 | i2c_info.platform_data = pdata; |
402 | if (IS_ERR(pdata)) | ||
403 | return; | ||
398 | 404 | ||
399 | if (dev->delay) | 405 | if (dev->delay) |
400 | intel_scu_i2c_device_register(pentry->host_num, &i2c_info); | 406 | intel_scu_i2c_device_register(pentry->host_num, &i2c_info); |
@@ -443,13 +449,35 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) | |||
443 | * so we have to enable them one by one here | 449 | * so we have to enable them one by one here |
444 | */ | 450 | */ |
445 | ioapic = mp_find_ioapic(irq); | 451 | ioapic = mp_find_ioapic(irq); |
446 | irq_attr.ioapic = ioapic; | 452 | if (ioapic >= 0) { |
447 | irq_attr.ioapic_pin = irq; | 453 | irq_attr.ioapic = ioapic; |
448 | irq_attr.trigger = 1; | 454 | irq_attr.ioapic_pin = irq; |
449 | irq_attr.polarity = 1; | 455 | irq_attr.trigger = 1; |
450 | io_apic_set_pci_routing(NULL, irq, &irq_attr); | 456 | if (intel_mid_identify_cpu() == |
451 | } else | 457 | INTEL_MID_CPU_CHIP_TANGIER) { |
458 | if (!strncmp(pentry->name, | ||
459 | "r69001-ts-i2c", 13)) | ||
460 | /* active low */ | ||
461 | irq_attr.polarity = 1; | ||
462 | else if (!strncmp(pentry->name, | ||
463 | "synaptics_3202", 14)) | ||
464 | /* active low */ | ||
465 | irq_attr.polarity = 1; | ||
466 | else if (irq == 41) | ||
467 | /* fast_int_1 */ | ||
468 | irq_attr.polarity = 1; | ||
469 | else | ||
470 | /* active high */ | ||
471 | irq_attr.polarity = 0; | ||
472 | } else { | ||
473 | /* PNW and CLV go with active low */ | ||
474 | irq_attr.polarity = 1; | ||
475 | } | ||
476 | io_apic_set_pci_routing(NULL, irq, &irq_attr); | ||
477 | } | ||
478 | } else { | ||
452 | irq = 0; /* No irq */ | 479 | irq = 0; /* No irq */ |
480 | } | ||
453 | 481 | ||
454 | dev = get_device_id(pentry->type, pentry->name); | 482 | dev = get_device_id(pentry->type, pentry->name); |
455 | 483 | ||
diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index e6cb80f620af..4d171e8640ef 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c | |||
@@ -27,7 +27,6 @@ | |||
27 | #include <linux/kernel.h> | 27 | #include <linux/kernel.h> |
28 | #include <linux/errno.h> | 28 | #include <linux/errno.h> |
29 | #include <linux/delay.h> | 29 | #include <linux/delay.h> |
30 | #include <linux/init.h> | ||
31 | #include <linux/pm.h> | 30 | #include <linux/pm.h> |
32 | #include <asm/io.h> | 31 | #include <asm/io.h> |
33 | 32 | ||
diff --git a/arch/x86/platform/olpc/olpc-xo15-sci.c b/arch/x86/platform/olpc/olpc-xo15-sci.c index 649a12befba9..08e350e757dc 100644 --- a/arch/x86/platform/olpc/olpc-xo15-sci.c +++ b/arch/x86/platform/olpc/olpc-xo15-sci.c | |||
@@ -15,8 +15,7 @@ | |||
15 | #include <linux/power_supply.h> | 15 | #include <linux/power_supply.h> |
16 | #include <linux/olpc-ec.h> | 16 | #include <linux/olpc-ec.h> |
17 | 17 | ||
18 | #include <acpi/acpi_bus.h> | 18 | #include <linux/acpi.h> |
19 | #include <acpi/acpi_drivers.h> | ||
20 | #include <asm/olpc.h> | 19 | #include <asm/olpc.h> |
21 | 20 | ||
22 | #define DRV_NAME "olpc-xo15-sci" | 21 | #define DRV_NAME "olpc-xo15-sci" |
diff --git a/arch/x86/platform/ts5500/ts5500.c b/arch/x86/platform/ts5500/ts5500.c index 39febb214e8c..9471b9456f25 100644 --- a/arch/x86/platform/ts5500/ts5500.c +++ b/arch/x86/platform/ts5500/ts5500.c | |||
@@ -88,7 +88,7 @@ struct ts5500_sbc { | |||
88 | static const struct { | 88 | static const struct { |
89 | const char * const string; | 89 | const char * const string; |
90 | const ssize_t offset; | 90 | const ssize_t offset; |
91 | } ts5500_signatures[] __initdata = { | 91 | } ts5500_signatures[] __initconst = { |
92 | { "TS-5x00 AMD Elan", 0xb14 }, | 92 | { "TS-5x00 AMD Elan", 0xb14 }, |
93 | }; | 93 | }; |
94 | 94 | ||
diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index efe4d7220397..dfe605ac1bcd 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c | |||
@@ -433,15 +433,49 @@ static void reset_with_ipi(struct pnmask *distribution, struct bau_control *bcp) | |||
433 | return; | 433 | return; |
434 | } | 434 | } |
435 | 435 | ||
436 | static inline unsigned long cycles_2_us(unsigned long long cyc) | 436 | /* |
437 | * Not to be confused with cycles_2_ns() from tsc.c; this gives a relative | ||
438 | * number, not an absolute. It converts a duration in cycles to a duration in | ||
439 | * ns. | ||
440 | */ | ||
441 | static inline unsigned long long cycles_2_ns(unsigned long long cyc) | ||
437 | { | 442 | { |
443 | struct cyc2ns_data *data = cyc2ns_read_begin(); | ||
438 | unsigned long long ns; | 444 | unsigned long long ns; |
439 | unsigned long us; | ||
440 | int cpu = smp_processor_id(); | ||
441 | 445 | ||
442 | ns = (cyc * per_cpu(cyc2ns, cpu)) >> CYC2NS_SCALE_FACTOR; | 446 | ns = mul_u64_u32_shr(cyc, data->cyc2ns_mul, data->cyc2ns_shift); |
443 | us = ns / 1000; | 447 | |
444 | return us; | 448 | cyc2ns_read_end(data); |
449 | return ns; | ||
450 | } | ||
451 | |||
452 | /* | ||
453 | * The reverse of the above; converts a duration in ns to a duration in cycles. | ||
454 | */ | ||
455 | static inline unsigned long long ns_2_cycles(unsigned long long ns) | ||
456 | { | ||
457 | struct cyc2ns_data *data = cyc2ns_read_begin(); | ||
458 | unsigned long long cyc; | ||
459 | |||
460 | cyc = (ns << data->cyc2ns_shift) / data->cyc2ns_mul; | ||
461 | |||
462 | cyc2ns_read_end(data); | ||
463 | return cyc; | ||
464 | } | ||
465 | |||
466 | static inline unsigned long cycles_2_us(unsigned long long cyc) | ||
467 | { | ||
468 | return cycles_2_ns(cyc) / NSEC_PER_USEC; | ||
469 | } | ||
470 | |||
471 | static inline cycles_t sec_2_cycles(unsigned long sec) | ||
472 | { | ||
473 | return ns_2_cycles(sec * NSEC_PER_SEC); | ||
474 | } | ||
475 | |||
476 | static inline unsigned long long usec_2_cycles(unsigned long usec) | ||
477 | { | ||
478 | return ns_2_cycles(usec * NSEC_PER_USEC); | ||
445 | } | 479 | } |
446 | 480 | ||
447 | /* | 481 | /* |
@@ -668,16 +702,6 @@ static int wait_completion(struct bau_desc *bau_desc, | |||
668 | bcp, try); | 702 | bcp, try); |
669 | } | 703 | } |
670 | 704 | ||
671 | static inline cycles_t sec_2_cycles(unsigned long sec) | ||
672 | { | ||
673 | unsigned long ns; | ||
674 | cycles_t cyc; | ||
675 | |||
676 | ns = sec * 1000000000; | ||
677 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
678 | return cyc; | ||
679 | } | ||
680 | |||
681 | /* | 705 | /* |
682 | * Our retries are blocked by all destination sw ack resources being | 706 | * Our retries are blocked by all destination sw ack resources being |
683 | * in use, and a timeout is pending. In that case hardware immediately | 707 | * in use, and a timeout is pending. In that case hardware immediately |
@@ -1327,16 +1351,6 @@ static void ptc_seq_stop(struct seq_file *file, void *data) | |||
1327 | { | 1351 | { |
1328 | } | 1352 | } |
1329 | 1353 | ||
1330 | static inline unsigned long long usec_2_cycles(unsigned long microsec) | ||
1331 | { | ||
1332 | unsigned long ns; | ||
1333 | unsigned long long cyc; | ||
1334 | |||
1335 | ns = microsec * 1000; | ||
1336 | cyc = (ns << CYC2NS_SCALE_FACTOR)/(per_cpu(cyc2ns, smp_processor_id())); | ||
1337 | return cyc; | ||
1338 | } | ||
1339 | |||
1340 | /* | 1354 | /* |
1341 | * Display the statistics thru /proc/sgi_uv/ptc_statistics | 1355 | * Display the statistics thru /proc/sgi_uv/ptc_statistics |
1342 | * 'data' points to the cpu number | 1356 | * 'data' points to the cpu number |
diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 8eeccba73130..be27da60dc8f 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c | |||
@@ -74,7 +74,6 @@ static atomic_t uv_in_nmi; | |||
74 | static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); | 74 | static atomic_t uv_nmi_cpu = ATOMIC_INIT(-1); |
75 | static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); | 75 | static atomic_t uv_nmi_cpus_in_nmi = ATOMIC_INIT(-1); |
76 | static atomic_t uv_nmi_slave_continue; | 76 | static atomic_t uv_nmi_slave_continue; |
77 | static atomic_t uv_nmi_kexec_failed; | ||
78 | static cpumask_var_t uv_nmi_cpu_mask; | 77 | static cpumask_var_t uv_nmi_cpu_mask; |
79 | 78 | ||
80 | /* Values for uv_nmi_slave_continue */ | 79 | /* Values for uv_nmi_slave_continue */ |
@@ -149,7 +148,8 @@ module_param_named(retry_count, uv_nmi_retry_count, int, 0644); | |||
149 | * "dump" - dump process stack for each cpu | 148 | * "dump" - dump process stack for each cpu |
150 | * "ips" - dump IP info for each cpu | 149 | * "ips" - dump IP info for each cpu |
151 | * "kdump" - do crash dump | 150 | * "kdump" - do crash dump |
152 | * "kdb" - enter KDB/KGDB (default) | 151 | * "kdb" - enter KDB (default) |
152 | * "kgdb" - enter KGDB | ||
153 | */ | 153 | */ |
154 | static char uv_nmi_action[8] = "kdb"; | 154 | static char uv_nmi_action[8] = "kdb"; |
155 | module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); | 155 | module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); |
@@ -504,6 +504,7 @@ static void uv_nmi_touch_watchdogs(void) | |||
504 | } | 504 | } |
505 | 505 | ||
506 | #if defined(CONFIG_KEXEC) | 506 | #if defined(CONFIG_KEXEC) |
507 | static atomic_t uv_nmi_kexec_failed; | ||
507 | static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | 508 | static void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) |
508 | { | 509 | { |
509 | /* Call crash to dump system state */ | 510 | /* Call crash to dump system state */ |
@@ -537,18 +538,45 @@ static inline void uv_nmi_kdump(int cpu, int master, struct pt_regs *regs) | |||
537 | } | 538 | } |
538 | #endif /* !CONFIG_KEXEC */ | 539 | #endif /* !CONFIG_KEXEC */ |
539 | 540 | ||
541 | #ifdef CONFIG_KGDB | ||
540 | #ifdef CONFIG_KGDB_KDB | 542 | #ifdef CONFIG_KGDB_KDB |
541 | /* Call KDB from NMI handler */ | 543 | static inline int uv_nmi_kdb_reason(void) |
542 | static void uv_call_kdb(int cpu, struct pt_regs *regs, int master) | ||
543 | { | 544 | { |
544 | int ret; | 545 | return KDB_REASON_SYSTEM_NMI; |
546 | } | ||
547 | #else /* !CONFIG_KGDB_KDB */ | ||
548 | static inline int uv_nmi_kdb_reason(void) | ||
549 | { | ||
550 | /* Insure user is expecting to attach gdb remote */ | ||
551 | if (uv_nmi_action_is("kgdb")) | ||
552 | return 0; | ||
553 | |||
554 | pr_err("UV: NMI error: KDB is not enabled in this kernel\n"); | ||
555 | return -1; | ||
556 | } | ||
557 | #endif /* CONFIG_KGDB_KDB */ | ||
545 | 558 | ||
559 | /* | ||
560 | * Call KGDB/KDB from NMI handler | ||
561 | * | ||
562 | * Note that if both KGDB and KDB are configured, then the action of 'kgdb' or | ||
563 | * 'kdb' has no affect on which is used. See the KGDB documention for further | ||
564 | * information. | ||
565 | */ | ||
566 | static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) | ||
567 | { | ||
546 | if (master) { | 568 | if (master) { |
569 | int reason = uv_nmi_kdb_reason(); | ||
570 | int ret; | ||
571 | |||
572 | if (reason < 0) | ||
573 | return; | ||
574 | |||
547 | /* call KGDB NMI handler as MASTER */ | 575 | /* call KGDB NMI handler as MASTER */ |
548 | ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, | 576 | ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason, |
549 | &uv_nmi_slave_continue); | 577 | &uv_nmi_slave_continue); |
550 | if (ret) { | 578 | if (ret) { |
551 | pr_alert("KDB returned error, is kgdboc set?\n"); | 579 | pr_alert("KGDB returned error, is kgdboc set?\n"); |
552 | atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); | 580 | atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); |
553 | } | 581 | } |
554 | } else { | 582 | } else { |
@@ -567,12 +595,12 @@ static void uv_call_kdb(int cpu, struct pt_regs *regs, int master) | |||
567 | uv_nmi_sync_exit(master); | 595 | uv_nmi_sync_exit(master); |
568 | } | 596 | } |
569 | 597 | ||
570 | #else /* !CONFIG_KGDB_KDB */ | 598 | #else /* !CONFIG_KGDB */ |
571 | static inline void uv_call_kdb(int cpu, struct pt_regs *regs, int master) | 599 | static inline void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) |
572 | { | 600 | { |
573 | pr_err("UV: NMI error: KGDB/KDB is not enabled in this kernel\n"); | 601 | pr_err("UV: NMI error: KGDB is not enabled in this kernel\n"); |
574 | } | 602 | } |
575 | #endif /* !CONFIG_KGDB_KDB */ | 603 | #endif /* !CONFIG_KGDB */ |
576 | 604 | ||
577 | /* | 605 | /* |
578 | * UV NMI handler | 606 | * UV NMI handler |
@@ -606,9 +634,9 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) | |||
606 | if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) | 634 | if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) |
607 | uv_nmi_dump_state(cpu, regs, master); | 635 | uv_nmi_dump_state(cpu, regs, master); |
608 | 636 | ||
609 | /* Call KDB if enabled */ | 637 | /* Call KGDB/KDB if enabled */ |
610 | else if (uv_nmi_action_is("kdb")) | 638 | else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) |
611 | uv_call_kdb(cpu, regs, master); | 639 | uv_call_kgdb_kdb(cpu, regs, master); |
612 | 640 | ||
613 | /* Clear per_cpu "in nmi" flag */ | 641 | /* Clear per_cpu "in nmi" flag */ |
614 | atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); | 642 | atomic_set(&uv_cpu_nmi.state, UV_NMI_STATE_OUT); |
@@ -634,7 +662,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) | |||
634 | /* | 662 | /* |
635 | * NMI handler for pulling in CPUs when perf events are grabbing our NMI | 663 | * NMI handler for pulling in CPUs when perf events are grabbing our NMI |
636 | */ | 664 | */ |
637 | int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) | 665 | static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) |
638 | { | 666 | { |
639 | int ret; | 667 | int ret; |
640 | 668 | ||
@@ -651,7 +679,7 @@ int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) | |||
651 | return ret; | 679 | return ret; |
652 | } | 680 | } |
653 | 681 | ||
654 | void uv_register_nmi_notifier(void) | 682 | static void uv_register_nmi_notifier(void) |
655 | { | 683 | { |
656 | if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) | 684 | if (register_nmi_handler(NMI_UNKNOWN, uv_handle_nmi, 0, "uv")) |
657 | pr_warn("UV: NMI handler failed to register\n"); | 685 | pr_warn("UV: NMI handler failed to register\n"); |
@@ -695,6 +723,5 @@ void uv_nmi_setup(void) | |||
695 | uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; | 723 | uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; |
696 | } | 724 | } |
697 | BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); | 725 | BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); |
726 | uv_register_nmi_notifier(); | ||
698 | } | 727 | } |
699 | |||
700 | |||
diff --git a/arch/x86/platform/visws/Makefile b/arch/x86/platform/visws/Makefile deleted file mode 100644 index 91bc17ab2fd5..000000000000 --- a/arch/x86/platform/visws/Makefile +++ /dev/null | |||
@@ -1 +0,0 @@ | |||
1 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | ||
diff --git a/arch/x86/platform/visws/visws_quirks.c b/arch/x86/platform/visws/visws_quirks.c deleted file mode 100644 index 94d8a39332ec..000000000000 --- a/arch/x86/platform/visws/visws_quirks.c +++ /dev/null | |||
@@ -1,608 +0,0 @@ | |||
1 | /* | ||
2 | * SGI Visual Workstation support and quirks, unmaintained. | ||
3 | * | ||
4 | * Split out from setup.c by davej@suse.de | ||
5 | * | ||
6 | * Copyright (C) 1999 Bent Hagemark, Ingo Molnar | ||
7 | * | ||
8 | * SGI Visual Workstation interrupt controller | ||
9 | * | ||
10 | * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC | ||
11 | * which serves as the main interrupt controller in the system. Non-legacy | ||
12 | * hardware in the system uses this controller directly. Legacy devices | ||
13 | * are connected to the PIIX4 which in turn has its 8259(s) connected to | ||
14 | * a of the Cobalt APIC entry. | ||
15 | * | ||
16 | * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com | ||
17 | * | ||
18 | * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru> | ||
19 | */ | ||
20 | #include <linux/interrupt.h> | ||
21 | #include <linux/module.h> | ||
22 | #include <linux/init.h> | ||
23 | #include <linux/smp.h> | ||
24 | |||
25 | #include <asm/visws/cobalt.h> | ||
26 | #include <asm/visws/piix4.h> | ||
27 | #include <asm/io_apic.h> | ||
28 | #include <asm/fixmap.h> | ||
29 | #include <asm/reboot.h> | ||
30 | #include <asm/setup.h> | ||
31 | #include <asm/apic.h> | ||
32 | #include <asm/e820.h> | ||
33 | #include <asm/time.h> | ||
34 | #include <asm/io.h> | ||
35 | |||
36 | #include <linux/kernel_stat.h> | ||
37 | |||
38 | #include <asm/i8259.h> | ||
39 | #include <asm/irq_vectors.h> | ||
40 | #include <asm/visws/lithium.h> | ||
41 | |||
42 | #include <linux/sched.h> | ||
43 | #include <linux/kernel.h> | ||
44 | #include <linux/pci.h> | ||
45 | #include <linux/pci_ids.h> | ||
46 | |||
47 | extern int no_broadcast; | ||
48 | |||
49 | char visws_board_type = -1; | ||
50 | char visws_board_rev = -1; | ||
51 | |||
52 | static void __init visws_time_init(void) | ||
53 | { | ||
54 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | ||
55 | |||
56 | /* Set the countdown value */ | ||
57 | co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ); | ||
58 | |||
59 | /* Start the timer */ | ||
60 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN); | ||
61 | |||
62 | /* Enable (unmask) the timer interrupt */ | ||
63 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); | ||
64 | |||
65 | setup_default_timer_irq(); | ||
66 | } | ||
67 | |||
68 | /* Replaces the default init_ISA_irqs in the generic setup */ | ||
69 | static void __init visws_pre_intr_init(void); | ||
70 | |||
71 | /* Quirk for machine specific memory setup. */ | ||
72 | |||
73 | #define MB (1024 * 1024) | ||
74 | |||
75 | unsigned long sgivwfb_mem_phys; | ||
76 | unsigned long sgivwfb_mem_size; | ||
77 | EXPORT_SYMBOL(sgivwfb_mem_phys); | ||
78 | EXPORT_SYMBOL(sgivwfb_mem_size); | ||
79 | |||
80 | long long mem_size __initdata = 0; | ||
81 | |||
82 | static char * __init visws_memory_setup(void) | ||
83 | { | ||
84 | long long gfx_mem_size = 8 * MB; | ||
85 | |||
86 | mem_size = boot_params.alt_mem_k; | ||
87 | |||
88 | if (!mem_size) { | ||
89 | printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n"); | ||
90 | mem_size = 128 * MB; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * this hardcodes the graphics memory to 8 MB | ||
95 | * it really should be sized dynamically (or at least | ||
96 | * set as a boot param) | ||
97 | */ | ||
98 | if (!sgivwfb_mem_size) { | ||
99 | printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n"); | ||
100 | sgivwfb_mem_size = 8 * MB; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * Trim to nearest MB | ||
105 | */ | ||
106 | sgivwfb_mem_size &= ~((1 << 20) - 1); | ||
107 | sgivwfb_mem_phys = mem_size - gfx_mem_size; | ||
108 | |||
109 | e820_add_region(0, LOWMEMSIZE(), E820_RAM); | ||
110 | e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM); | ||
111 | e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED); | ||
112 | |||
113 | return "PROM"; | ||
114 | } | ||
115 | |||
116 | static void visws_machine_emergency_restart(void) | ||
117 | { | ||
118 | /* | ||
119 | * Visual Workstations restart after this | ||
120 | * register is poked on the PIIX4 | ||
121 | */ | ||
122 | outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT); | ||
123 | } | ||
124 | |||
125 | static void visws_machine_power_off(void) | ||
126 | { | ||
127 | unsigned short pm_status; | ||
128 | /* extern unsigned int pci_bus0; */ | ||
129 | |||
130 | while ((pm_status = inw(PMSTS_PORT)) & 0x100) | ||
131 | outw(pm_status, PMSTS_PORT); | ||
132 | |||
133 | outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT); | ||
134 | |||
135 | mdelay(10); | ||
136 | |||
137 | #define PCI_CONF1_ADDRESS(bus, devfn, reg) \ | ||
138 | (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3)) | ||
139 | |||
140 | /* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */ | ||
141 | outl(PIIX_SPECIAL_STOP, 0xCFC); | ||
142 | } | ||
143 | |||
144 | static void __init visws_get_smp_config(unsigned int early) | ||
145 | { | ||
146 | } | ||
147 | |||
148 | /* | ||
149 | * The Visual Workstation is Intel MP compliant in the hardware | ||
150 | * sense, but it doesn't have a BIOS(-configuration table). | ||
151 | * No problem for Linux. | ||
152 | */ | ||
153 | |||
154 | static void __init MP_processor_info(struct mpc_cpu *m) | ||
155 | { | ||
156 | int ver, logical_apicid; | ||
157 | physid_mask_t apic_cpus; | ||
158 | |||
159 | if (!(m->cpuflag & CPU_ENABLED)) | ||
160 | return; | ||
161 | |||
162 | logical_apicid = m->apicid; | ||
163 | printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n", | ||
164 | m->cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "", | ||
165 | m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8, | ||
166 | (m->cpufeature & CPU_MODEL_MASK) >> 4, m->apicver); | ||
167 | |||
168 | if (m->cpuflag & CPU_BOOTPROCESSOR) | ||
169 | boot_cpu_physical_apicid = m->apicid; | ||
170 | |||
171 | ver = m->apicver; | ||
172 | if ((ver >= 0x14 && m->apicid >= 0xff) || m->apicid >= 0xf) { | ||
173 | printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", | ||
174 | m->apicid, MAX_LOCAL_APIC); | ||
175 | return; | ||
176 | } | ||
177 | |||
178 | apic->apicid_to_cpu_present(m->apicid, &apic_cpus); | ||
179 | physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); | ||
180 | /* | ||
181 | * Validate version | ||
182 | */ | ||
183 | if (ver == 0x0) { | ||
184 | printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! " | ||
185 | "fixing up to 0x10. (tell your hw vendor)\n", | ||
186 | m->apicid); | ||
187 | ver = 0x10; | ||
188 | } | ||
189 | apic_version[m->apicid] = ver; | ||
190 | } | ||
191 | |||
192 | static void __init visws_find_smp_config(void) | ||
193 | { | ||
194 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); | ||
195 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | ||
196 | |||
197 | if (ncpus > CO_CPU_MAX) { | ||
198 | printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n", | ||
199 | ncpus, mp); | ||
200 | |||
201 | ncpus = CO_CPU_MAX; | ||
202 | } | ||
203 | |||
204 | if (ncpus > setup_max_cpus) | ||
205 | ncpus = setup_max_cpus; | ||
206 | |||
207 | #ifdef CONFIG_X86_LOCAL_APIC | ||
208 | smp_found_config = 1; | ||
209 | #endif | ||
210 | while (ncpus--) | ||
211 | MP_processor_info(mp++); | ||
212 | |||
213 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | ||
214 | } | ||
215 | |||
216 | static void visws_trap_init(void); | ||
217 | |||
218 | void __init visws_early_detect(void) | ||
219 | { | ||
220 | int raw; | ||
221 | |||
222 | visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG) | ||
223 | >> PIIX_GPI_BD_SHIFT; | ||
224 | |||
225 | if (visws_board_type < 0) | ||
226 | return; | ||
227 | |||
228 | /* | ||
229 | * Override the default platform setup functions | ||
230 | */ | ||
231 | x86_init.resources.memory_setup = visws_memory_setup; | ||
232 | x86_init.mpparse.get_smp_config = visws_get_smp_config; | ||
233 | x86_init.mpparse.find_smp_config = visws_find_smp_config; | ||
234 | x86_init.irqs.pre_vector_init = visws_pre_intr_init; | ||
235 | x86_init.irqs.trap_init = visws_trap_init; | ||
236 | x86_init.timers.timer_init = visws_time_init; | ||
237 | x86_init.pci.init = pci_visws_init; | ||
238 | x86_init.pci.init_irq = x86_init_noop; | ||
239 | |||
240 | /* | ||
241 | * Install reboot quirks: | ||
242 | */ | ||
243 | pm_power_off = visws_machine_power_off; | ||
244 | machine_ops.emergency_restart = visws_machine_emergency_restart; | ||
245 | |||
246 | /* | ||
247 | * Do not use broadcast IPIs: | ||
248 | */ | ||
249 | no_broadcast = 0; | ||
250 | |||
251 | #ifdef CONFIG_X86_IO_APIC | ||
252 | /* | ||
253 | * Turn off IO-APIC detection and initialization: | ||
254 | */ | ||
255 | skip_ioapic_setup = 1; | ||
256 | #endif | ||
257 | |||
258 | /* | ||
259 | * Get Board rev. | ||
260 | * First, we have to initialize the 307 part to allow us access | ||
261 | * to the GPIO registers. Let's map them at 0x0fc0 which is right | ||
262 | * after the PIIX4 PM section. | ||
263 | */ | ||
264 | outb_p(SIO_DEV_SEL, SIO_INDEX); | ||
265 | outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */ | ||
266 | |||
267 | outb_p(SIO_DEV_MSB, SIO_INDEX); | ||
268 | outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */ | ||
269 | |||
270 | outb_p(SIO_DEV_LSB, SIO_INDEX); | ||
271 | outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */ | ||
272 | |||
273 | outb_p(SIO_DEV_ENB, SIO_INDEX); | ||
274 | outb_p(1, SIO_DATA); /* Enable GPIO registers. */ | ||
275 | |||
276 | /* | ||
277 | * Now, we have to map the power management section to write | ||
278 | * a bit which enables access to the GPIO registers. | ||
279 | * What lunatic came up with this shit? | ||
280 | */ | ||
281 | outb_p(SIO_DEV_SEL, SIO_INDEX); | ||
282 | outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */ | ||
283 | |||
284 | outb_p(SIO_DEV_MSB, SIO_INDEX); | ||
285 | outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */ | ||
286 | |||
287 | outb_p(SIO_DEV_LSB, SIO_INDEX); | ||
288 | outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */ | ||
289 | |||
290 | outb_p(SIO_DEV_ENB, SIO_INDEX); | ||
291 | outb_p(1, SIO_DATA); /* Enable PM registers. */ | ||
292 | |||
293 | /* | ||
294 | * Now, write the PM register which enables the GPIO registers. | ||
295 | */ | ||
296 | outb_p(SIO_PM_FER2, SIO_PM_INDEX); | ||
297 | outb_p(SIO_PM_GP_EN, SIO_PM_DATA); | ||
298 | |||
299 | /* | ||
300 | * Now, initialize the GPIO registers. | ||
301 | * We want them all to be inputs which is the | ||
302 | * power on default, so let's leave them alone. | ||
303 | * So, let's just read the board rev! | ||
304 | */ | ||
305 | raw = inb_p(SIO_GP_DATA1); | ||
306 | raw &= 0x7f; /* 7 bits of valid board revision ID. */ | ||
307 | |||
308 | if (visws_board_type == VISWS_320) { | ||
309 | if (raw < 0x6) { | ||
310 | visws_board_rev = 4; | ||
311 | } else if (raw < 0xc) { | ||
312 | visws_board_rev = 5; | ||
313 | } else { | ||
314 | visws_board_rev = 6; | ||
315 | } | ||
316 | } else if (visws_board_type == VISWS_540) { | ||
317 | visws_board_rev = 2; | ||
318 | } else { | ||
319 | visws_board_rev = raw; | ||
320 | } | ||
321 | |||
322 | printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n", | ||
323 | (visws_board_type == VISWS_320 ? "320" : | ||
324 | (visws_board_type == VISWS_540 ? "540" : | ||
325 | "unknown")), visws_board_rev); | ||
326 | } | ||
327 | |||
328 | #define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4) | ||
329 | #define BCD (LI_INTB | LI_INTC | LI_INTD) | ||
330 | #define ALLDEVS (A01234 | BCD) | ||
331 | |||
332 | static __init void lithium_init(void) | ||
333 | { | ||
334 | set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS); | ||
335 | set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS); | ||
336 | |||
337 | if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || | ||
338 | (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { | ||
339 | printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A'); | ||
340 | /* panic("This machine is not SGI Visual Workstation 320/540"); */ | ||
341 | } | ||
342 | |||
343 | if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) || | ||
344 | (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) { | ||
345 | printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B'); | ||
346 | /* panic("This machine is not SGI Visual Workstation 320/540"); */ | ||
347 | } | ||
348 | |||
349 | li_pcia_write16(LI_PCI_INTEN, ALLDEVS); | ||
350 | li_pcib_write16(LI_PCI_INTEN, ALLDEVS); | ||
351 | } | ||
352 | |||
353 | static __init void cobalt_init(void) | ||
354 | { | ||
355 | /* | ||
356 | * On normal SMP PC this is used only with SMP, but we have to | ||
357 | * use it and set it up here to start the Cobalt clock | ||
358 | */ | ||
359 | set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE); | ||
360 | setup_local_APIC(); | ||
361 | printk(KERN_INFO "Local APIC Version %#x, ID %#x\n", | ||
362 | (unsigned int)apic_read(APIC_LVR), | ||
363 | (unsigned int)apic_read(APIC_ID)); | ||
364 | |||
365 | set_fixmap(FIX_CO_CPU, CO_CPU_PHYS); | ||
366 | set_fixmap(FIX_CO_APIC, CO_APIC_PHYS); | ||
367 | printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n", | ||
368 | co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID)); | ||
369 | |||
370 | /* Enable Cobalt APIC being careful to NOT change the ID! */ | ||
371 | co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE); | ||
372 | |||
373 | printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n", | ||
374 | co_apic_read(CO_APIC_ID)); | ||
375 | } | ||
376 | |||
377 | static void __init visws_trap_init(void) | ||
378 | { | ||
379 | lithium_init(); | ||
380 | cobalt_init(); | ||
381 | } | ||
382 | |||
383 | /* | ||
384 | * IRQ controller / APIC support: | ||
385 | */ | ||
386 | |||
387 | static DEFINE_SPINLOCK(cobalt_lock); | ||
388 | |||
389 | /* | ||
390 | * Set the given Cobalt APIC Redirection Table entry to point | ||
391 | * to the given IDT vector/index. | ||
392 | */ | ||
393 | static inline void co_apic_set(int entry, int irq) | ||
394 | { | ||
395 | co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR)); | ||
396 | co_apic_write(CO_APIC_HI(entry), 0); | ||
397 | } | ||
398 | |||
399 | /* | ||
400 | * Cobalt (IO)-APIC functions to handle PCI devices. | ||
401 | */ | ||
402 | static inline int co_apic_ide0_hack(void) | ||
403 | { | ||
404 | extern char visws_board_type; | ||
405 | extern char visws_board_rev; | ||
406 | |||
407 | if (visws_board_type == VISWS_320 && visws_board_rev == 5) | ||
408 | return 5; | ||
409 | return CO_APIC_IDE0; | ||
410 | } | ||
411 | |||
412 | static int is_co_apic(unsigned int irq) | ||
413 | { | ||
414 | if (IS_CO_APIC(irq)) | ||
415 | return CO_APIC(irq); | ||
416 | |||
417 | switch (irq) { | ||
418 | case 0: return CO_APIC_CPU; | ||
419 | case CO_IRQ_IDE0: return co_apic_ide0_hack(); | ||
420 | case CO_IRQ_IDE1: return CO_APIC_IDE1; | ||
421 | default: return -1; | ||
422 | } | ||
423 | } | ||
424 | |||
425 | |||
426 | /* | ||
427 | * This is the SGI Cobalt (IO-)APIC: | ||
428 | */ | ||
429 | static void enable_cobalt_irq(struct irq_data *data) | ||
430 | { | ||
431 | co_apic_set(is_co_apic(data->irq), data->irq); | ||
432 | } | ||
433 | |||
434 | static void disable_cobalt_irq(struct irq_data *data) | ||
435 | { | ||
436 | int entry = is_co_apic(data->irq); | ||
437 | |||
438 | co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK); | ||
439 | co_apic_read(CO_APIC_LO(entry)); | ||
440 | } | ||
441 | |||
442 | static void ack_cobalt_irq(struct irq_data *data) | ||
443 | { | ||
444 | unsigned long flags; | ||
445 | |||
446 | spin_lock_irqsave(&cobalt_lock, flags); | ||
447 | disable_cobalt_irq(data); | ||
448 | apic_write(APIC_EOI, APIC_EOI_ACK); | ||
449 | spin_unlock_irqrestore(&cobalt_lock, flags); | ||
450 | } | ||
451 | |||
452 | static struct irq_chip cobalt_irq_type = { | ||
453 | .name = "Cobalt-APIC", | ||
454 | .irq_enable = enable_cobalt_irq, | ||
455 | .irq_disable = disable_cobalt_irq, | ||
456 | .irq_ack = ack_cobalt_irq, | ||
457 | }; | ||
458 | |||
459 | |||
460 | /* | ||
461 | * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt | ||
462 | * -- not the manner expected by the code in i8259.c. | ||
463 | * | ||
464 | * there is a 'master' physical interrupt source that gets sent to | ||
465 | * the CPU. But in the chipset there are various 'virtual' interrupts | ||
466 | * waiting to be handled. We represent this to Linux through a 'master' | ||
467 | * interrupt controller type, and through a special virtual interrupt- | ||
468 | * controller. Device drivers only see the virtual interrupt sources. | ||
469 | */ | ||
470 | static unsigned int startup_piix4_master_irq(struct irq_data *data) | ||
471 | { | ||
472 | legacy_pic->init(0); | ||
473 | enable_cobalt_irq(data); | ||
474 | return 0; | ||
475 | } | ||
476 | |||
477 | static struct irq_chip piix4_master_irq_type = { | ||
478 | .name = "PIIX4-master", | ||
479 | .irq_startup = startup_piix4_master_irq, | ||
480 | .irq_ack = ack_cobalt_irq, | ||
481 | }; | ||
482 | |||
483 | static void pii4_mask(struct irq_data *data) { } | ||
484 | |||
485 | static struct irq_chip piix4_virtual_irq_type = { | ||
486 | .name = "PIIX4-virtual", | ||
487 | .irq_mask = pii4_mask, | ||
488 | }; | ||
489 | |||
490 | /* | ||
491 | * PIIX4-8259 master/virtual functions to handle interrupt requests | ||
492 | * from legacy devices: floppy, parallel, serial, rtc. | ||
493 | * | ||
494 | * None of these get Cobalt APIC entries, neither do they have IDT | ||
495 | * entries. These interrupts are purely virtual and distributed from | ||
496 | * the 'master' interrupt source: CO_IRQ_8259. | ||
497 | * | ||
498 | * When the 8259 interrupts its handler figures out which of these | ||
499 | * devices is interrupting and dispatches to its handler. | ||
500 | * | ||
501 | * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/ | ||
502 | * enable_irq gets the right irq. This 'master' irq is never directly | ||
503 | * manipulated by any driver. | ||
504 | */ | ||
505 | static irqreturn_t piix4_master_intr(int irq, void *dev_id) | ||
506 | { | ||
507 | unsigned long flags; | ||
508 | int realirq; | ||
509 | |||
510 | raw_spin_lock_irqsave(&i8259A_lock, flags); | ||
511 | |||
512 | /* Find out what's interrupting in the PIIX4 master 8259 */ | ||
513 | outb(0x0c, 0x20); /* OCW3 Poll command */ | ||
514 | realirq = inb(0x20); | ||
515 | |||
516 | /* | ||
517 | * Bit 7 == 0 means invalid/spurious | ||
518 | */ | ||
519 | if (unlikely(!(realirq & 0x80))) | ||
520 | goto out_unlock; | ||
521 | |||
522 | realirq &= 7; | ||
523 | |||
524 | if (unlikely(realirq == 2)) { | ||
525 | outb(0x0c, 0xa0); | ||
526 | realirq = inb(0xa0); | ||
527 | |||
528 | if (unlikely(!(realirq & 0x80))) | ||
529 | goto out_unlock; | ||
530 | |||
531 | realirq = (realirq & 7) + 8; | ||
532 | } | ||
533 | |||
534 | /* mask and ack interrupt */ | ||
535 | cached_irq_mask |= 1 << realirq; | ||
536 | if (unlikely(realirq > 7)) { | ||
537 | inb(0xa1); | ||
538 | outb(cached_slave_mask, 0xa1); | ||
539 | outb(0x60 + (realirq & 7), 0xa0); | ||
540 | outb(0x60 + 2, 0x20); | ||
541 | } else { | ||
542 | inb(0x21); | ||
543 | outb(cached_master_mask, 0x21); | ||
544 | outb(0x60 + realirq, 0x20); | ||
545 | } | ||
546 | |||
547 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | ||
548 | |||
549 | /* | ||
550 | * handle this 'virtual interrupt' as a Cobalt one now. | ||
551 | */ | ||
552 | generic_handle_irq(realirq); | ||
553 | |||
554 | return IRQ_HANDLED; | ||
555 | |||
556 | out_unlock: | ||
557 | raw_spin_unlock_irqrestore(&i8259A_lock, flags); | ||
558 | return IRQ_NONE; | ||
559 | } | ||
560 | |||
561 | static struct irqaction master_action = { | ||
562 | .handler = piix4_master_intr, | ||
563 | .name = "PIIX4-8259", | ||
564 | .flags = IRQF_NO_THREAD, | ||
565 | }; | ||
566 | |||
567 | static struct irqaction cascade_action = { | ||
568 | .handler = no_action, | ||
569 | .name = "cascade", | ||
570 | .flags = IRQF_NO_THREAD, | ||
571 | }; | ||
572 | |||
573 | static inline void set_piix4_virtual_irq_type(void) | ||
574 | { | ||
575 | piix4_virtual_irq_type.irq_enable = i8259A_chip.irq_unmask; | ||
576 | piix4_virtual_irq_type.irq_disable = i8259A_chip.irq_mask; | ||
577 | piix4_virtual_irq_type.irq_unmask = i8259A_chip.irq_unmask; | ||
578 | } | ||
579 | |||
580 | static void __init visws_pre_intr_init(void) | ||
581 | { | ||
582 | int i; | ||
583 | |||
584 | set_piix4_virtual_irq_type(); | ||
585 | |||
586 | for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) { | ||
587 | struct irq_chip *chip = NULL; | ||
588 | |||
589 | if (i == 0) | ||
590 | chip = &cobalt_irq_type; | ||
591 | else if (i == CO_IRQ_IDE0) | ||
592 | chip = &cobalt_irq_type; | ||
593 | else if (i == CO_IRQ_IDE1) | ||
594 | chip = &cobalt_irq_type; | ||
595 | else if (i == CO_IRQ_8259) | ||
596 | chip = &piix4_master_irq_type; | ||
597 | else if (i < CO_IRQ_APIC0) | ||
598 | chip = &piix4_virtual_irq_type; | ||
599 | else if (IS_CO_APIC(i)) | ||
600 | chip = &cobalt_irq_type; | ||
601 | |||
602 | if (chip) | ||
603 | irq_set_chip(i, chip); | ||
604 | } | ||
605 | |||
606 | setup_irq(CO_IRQ_8259, &master_action); | ||
607 | setup_irq(2, &cascade_action); | ||
608 | } | ||
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c index a44f457e70a1..bad628a620c4 100644 --- a/arch/x86/realmode/init.c +++ b/arch/x86/realmode/init.c | |||
@@ -29,12 +29,10 @@ void __init reserve_real_mode(void) | |||
29 | void __init setup_real_mode(void) | 29 | void __init setup_real_mode(void) |
30 | { | 30 | { |
31 | u16 real_mode_seg; | 31 | u16 real_mode_seg; |
32 | u32 *rel; | 32 | const u32 *rel; |
33 | u32 count; | 33 | u32 count; |
34 | u32 *ptr; | ||
35 | u16 *seg; | ||
36 | int i; | ||
37 | unsigned char *base; | 34 | unsigned char *base; |
35 | unsigned long phys_base; | ||
38 | struct trampoline_header *trampoline_header; | 36 | struct trampoline_header *trampoline_header; |
39 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); | 37 | size_t size = PAGE_ALIGN(real_mode_blob_end - real_mode_blob); |
40 | #ifdef CONFIG_X86_64 | 38 | #ifdef CONFIG_X86_64 |
@@ -46,23 +44,23 @@ void __init setup_real_mode(void) | |||
46 | 44 | ||
47 | memcpy(base, real_mode_blob, size); | 45 | memcpy(base, real_mode_blob, size); |
48 | 46 | ||
49 | real_mode_seg = __pa(base) >> 4; | 47 | phys_base = __pa(base); |
48 | real_mode_seg = phys_base >> 4; | ||
49 | |||
50 | rel = (u32 *) real_mode_relocs; | 50 | rel = (u32 *) real_mode_relocs; |
51 | 51 | ||
52 | /* 16-bit segment relocations. */ | 52 | /* 16-bit segment relocations. */ |
53 | count = rel[0]; | 53 | count = *rel++; |
54 | rel = &rel[1]; | 54 | while (count--) { |
55 | for (i = 0; i < count; i++) { | 55 | u16 *seg = (u16 *) (base + *rel++); |
56 | seg = (u16 *) (base + rel[i]); | ||
57 | *seg = real_mode_seg; | 56 | *seg = real_mode_seg; |
58 | } | 57 | } |
59 | 58 | ||
60 | /* 32-bit linear relocations. */ | 59 | /* 32-bit linear relocations. */ |
61 | count = rel[i]; | 60 | count = *rel++; |
62 | rel = &rel[i + 1]; | 61 | while (count--) { |
63 | for (i = 0; i < count; i++) { | 62 | u32 *ptr = (u32 *) (base + *rel++); |
64 | ptr = (u32 *) (base + rel[i]); | 63 | *ptr += phys_base; |
65 | *ptr += __pa(base); | ||
66 | } | 64 | } |
67 | 65 | ||
68 | /* Must be perfomed *after* relocation. */ | 66 | /* Must be perfomed *after* relocation. */ |
diff --git a/arch/x86/realmode/rm/Makefile b/arch/x86/realmode/rm/Makefile index 9cac82588cbc..3497f14e4dea 100644 --- a/arch/x86/realmode/rm/Makefile +++ b/arch/x86/realmode/rm/Makefile | |||
@@ -64,20 +64,7 @@ $(obj)/realmode.relocs: $(obj)/realmode.elf FORCE | |||
64 | 64 | ||
65 | # --------------------------------------------------------------------------- | 65 | # --------------------------------------------------------------------------- |
66 | 66 | ||
67 | # How to compile the 16-bit code. Note we always compile for -march=i386, | 67 | KBUILD_CFLAGS := $(LINUXINCLUDE) $(REALMODE_CFLAGS) -D_SETUP -D_WAKEUP \ |
68 | # that way we can complain to the user if the CPU is insufficient. | 68 | -I$(srctree)/arch/x86/boot |
69 | KBUILD_CFLAGS := $(LINUXINCLUDE) -m32 -g -Os -D_SETUP -D__KERNEL__ -D_WAKEUP \ | ||
70 | -I$(srctree)/arch/x86/boot \ | ||
71 | -DDISABLE_BRANCH_PROFILING \ | ||
72 | -Wall -Wstrict-prototypes \ | ||
73 | -march=i386 -mregparm=3 \ | ||
74 | -include $(srctree)/$(src)/../../boot/code16gcc.h \ | ||
75 | -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ | ||
76 | -mno-mmx -mno-sse \ | ||
77 | $(call cc-option, -ffreestanding) \ | ||
78 | $(call cc-option, -fno-toplevel-reorder,\ | ||
79 | $(call cc-option, -fno-unit-at-a-time)) \ | ||
80 | $(call cc-option, -fno-stack-protector) \ | ||
81 | $(call cc-option, -mpreferred-stack-boundary=2) | ||
82 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ | 69 | KBUILD_AFLAGS := $(KBUILD_CFLAGS) -D__ASSEMBLY__ |
83 | GCOV_PROFILE := n | 70 | GCOV_PROFILE := n |
diff --git a/arch/x86/realmode/rm/reboot.S b/arch/x86/realmode/rm/reboot.S index f932ea61d1c8..d66c607bdc58 100644 --- a/arch/x86/realmode/rm/reboot.S +++ b/arch/x86/realmode/rm/reboot.S | |||
@@ -1,5 +1,4 @@ | |||
1 | #include <linux/linkage.h> | 1 | #include <linux/linkage.h> |
2 | #include <linux/init.h> | ||
3 | #include <asm/segment.h> | 2 | #include <asm/segment.h> |
4 | #include <asm/page_types.h> | 3 | #include <asm/page_types.h> |
5 | #include <asm/processor-flags.h> | 4 | #include <asm/processor-flags.h> |
diff --git a/arch/x86/realmode/rm/trampoline_32.S b/arch/x86/realmode/rm/trampoline_32.S index c1b2791183e7..48ddd76bc4c3 100644 --- a/arch/x86/realmode/rm/trampoline_32.S +++ b/arch/x86/realmode/rm/trampoline_32.S | |||
@@ -20,7 +20,6 @@ | |||
20 | */ | 20 | */ |
21 | 21 | ||
22 | #include <linux/linkage.h> | 22 | #include <linux/linkage.h> |
23 | #include <linux/init.h> | ||
24 | #include <asm/segment.h> | 23 | #include <asm/segment.h> |
25 | #include <asm/page_types.h> | 24 | #include <asm/page_types.h> |
26 | #include "realmode.h" | 25 | #include "realmode.h" |
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S index bb360dc39d21..dac7b20d2f9d 100644 --- a/arch/x86/realmode/rm/trampoline_64.S +++ b/arch/x86/realmode/rm/trampoline_64.S | |||
@@ -25,7 +25,6 @@ | |||
25 | */ | 25 | */ |
26 | 26 | ||
27 | #include <linux/linkage.h> | 27 | #include <linux/linkage.h> |
28 | #include <linux/init.h> | ||
29 | #include <asm/pgtable_types.h> | 28 | #include <asm/pgtable_types.h> |
30 | #include <asm/page_types.h> | 29 | #include <asm/page_types.h> |
31 | #include <asm/msr.h> | 30 | #include <asm/msr.h> |
diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index aabfb8380a1c..96bc506ac6de 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl | |||
@@ -357,3 +357,5 @@ | |||
357 | 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev | 357 | 348 i386 process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev |
358 | 349 i386 kcmp sys_kcmp | 358 | 349 i386 kcmp sys_kcmp |
359 | 350 i386 finit_module sys_finit_module | 359 | 350 i386 finit_module sys_finit_module |
360 | 351 i386 sched_setattr sys_sched_setattr | ||
361 | 352 i386 sched_getattr sys_sched_getattr | ||
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 38ae65dfd14f..04376ac3d9ef 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl | |||
@@ -320,6 +320,9 @@ | |||
320 | 311 64 process_vm_writev sys_process_vm_writev | 320 | 311 64 process_vm_writev sys_process_vm_writev |
321 | 312 common kcmp sys_kcmp | 321 | 312 common kcmp sys_kcmp |
322 | 313 common finit_module sys_finit_module | 322 | 313 common finit_module sys_finit_module |
323 | 314 common sched_setattr sys_sched_setattr | ||
324 | 315 common sched_getattr sys_sched_getattr | ||
325 | 316 common renameat2 sys_renameat2 | ||
323 | 326 | ||
324 | # | 327 | # |
325 | # x32-specific system call numbers start at 512 to avoid cache impact | 328 | # x32-specific system call numbers start at 512 to avoid cache impact |
diff --git a/arch/x86/tools/relocs.c b/arch/x86/tools/relocs.c index f7bab68a4b83..bbb1d2259ecf 100644 --- a/arch/x86/tools/relocs.c +++ b/arch/x86/tools/relocs.c | |||
@@ -69,8 +69,8 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = { | |||
69 | "__per_cpu_load|" | 69 | "__per_cpu_load|" |
70 | "init_per_cpu__.*|" | 70 | "init_per_cpu__.*|" |
71 | "__end_rodata_hpage_align|" | 71 | "__end_rodata_hpage_align|" |
72 | "__vvar_page|" | ||
73 | #endif | 72 | #endif |
73 | "__vvar_page|" | ||
74 | "_end)$" | 74 | "_end)$" |
75 | }; | 75 | }; |
76 | 76 | ||
@@ -722,15 +722,25 @@ static void percpu_init(void) | |||
722 | 722 | ||
723 | /* | 723 | /* |
724 | * Check to see if a symbol lies in the .data..percpu section. | 724 | * Check to see if a symbol lies in the .data..percpu section. |
725 | * For some as yet not understood reason the "__init_begin" | 725 | * |
726 | * symbol which immediately preceeds the .data..percpu section | 726 | * The linker incorrectly associates some symbols with the |
727 | * also shows up as it it were part of it so we do an explict | 727 | * .data..percpu section so we also need to check the symbol |
728 | * check for that symbol name and ignore it. | 728 | * name to make sure that we classify the symbol correctly. |
729 | * | ||
730 | * The GNU linker incorrectly associates: | ||
731 | * __init_begin | ||
732 | * __per_cpu_load | ||
733 | * | ||
734 | * The "gold" linker incorrectly associates: | ||
735 | * init_per_cpu__irq_stack_union | ||
736 | * init_per_cpu__gdt_page | ||
729 | */ | 737 | */ |
730 | static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) | 738 | static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) |
731 | { | 739 | { |
732 | return (sym->st_shndx == per_cpu_shndx) && | 740 | return (sym->st_shndx == per_cpu_shndx) && |
733 | strcmp(symname, "__init_begin"); | 741 | strcmp(symname, "__init_begin") && |
742 | strcmp(symname, "__per_cpu_load") && | ||
743 | strncmp(symname, "init_per_cpu_", 13); | ||
734 | } | 744 | } |
735 | 745 | ||
736 | 746 | ||
@@ -1015,6 +1025,29 @@ static void emit_relocs(int as_text, int use_real_mode) | |||
1015 | } | 1025 | } |
1016 | } | 1026 | } |
1017 | 1027 | ||
1028 | /* | ||
1029 | * As an aid to debugging problems with different linkers | ||
1030 | * print summary information about the relocs. | ||
1031 | * Since different linkers tend to emit the sections in | ||
1032 | * different orders we use the section names in the output. | ||
1033 | */ | ||
1034 | static int do_reloc_info(struct section *sec, Elf_Rel *rel, ElfW(Sym) *sym, | ||
1035 | const char *symname) | ||
1036 | { | ||
1037 | printf("%s\t%s\t%s\t%s\n", | ||
1038 | sec_name(sec->shdr.sh_info), | ||
1039 | rel_type(ELF_R_TYPE(rel->r_info)), | ||
1040 | symname, | ||
1041 | sec_name(sym->st_shndx)); | ||
1042 | return 0; | ||
1043 | } | ||
1044 | |||
1045 | static void print_reloc_info(void) | ||
1046 | { | ||
1047 | printf("reloc section\treloc type\tsymbol\tsymbol section\n"); | ||
1048 | walk_relocs(do_reloc_info); | ||
1049 | } | ||
1050 | |||
1018 | #if ELF_BITS == 64 | 1051 | #if ELF_BITS == 64 |
1019 | # define process process_64 | 1052 | # define process process_64 |
1020 | #else | 1053 | #else |
@@ -1022,7 +1055,8 @@ static void emit_relocs(int as_text, int use_real_mode) | |||
1022 | #endif | 1055 | #endif |
1023 | 1056 | ||
1024 | void process(FILE *fp, int use_real_mode, int as_text, | 1057 | void process(FILE *fp, int use_real_mode, int as_text, |
1025 | int show_absolute_syms, int show_absolute_relocs) | 1058 | int show_absolute_syms, int show_absolute_relocs, |
1059 | int show_reloc_info) | ||
1026 | { | 1060 | { |
1027 | regex_init(use_real_mode); | 1061 | regex_init(use_real_mode); |
1028 | read_ehdr(fp); | 1062 | read_ehdr(fp); |
@@ -1040,5 +1074,9 @@ void process(FILE *fp, int use_real_mode, int as_text, | |||
1040 | print_absolute_relocs(); | 1074 | print_absolute_relocs(); |
1041 | return; | 1075 | return; |
1042 | } | 1076 | } |
1077 | if (show_reloc_info) { | ||
1078 | print_reloc_info(); | ||
1079 | return; | ||
1080 | } | ||
1043 | emit_relocs(as_text, use_real_mode); | 1081 | emit_relocs(as_text, use_real_mode); |
1044 | } | 1082 | } |
diff --git a/arch/x86/tools/relocs.h b/arch/x86/tools/relocs.h index 07cdb1eca4fa..f59590645b68 100644 --- a/arch/x86/tools/relocs.h +++ b/arch/x86/tools/relocs.h | |||
@@ -29,8 +29,9 @@ enum symtype { | |||
29 | }; | 29 | }; |
30 | 30 | ||
31 | void process_32(FILE *fp, int use_real_mode, int as_text, | 31 | void process_32(FILE *fp, int use_real_mode, int as_text, |
32 | int show_absolute_syms, int show_absolute_relocs); | 32 | int show_absolute_syms, int show_absolute_relocs, |
33 | int show_reloc_info); | ||
33 | void process_64(FILE *fp, int use_real_mode, int as_text, | 34 | void process_64(FILE *fp, int use_real_mode, int as_text, |
34 | int show_absolute_syms, int show_absolute_relocs); | 35 | int show_absolute_syms, int show_absolute_relocs, |
35 | 36 | int show_reloc_info); | |
36 | #endif /* RELOCS_H */ | 37 | #endif /* RELOCS_H */ |
diff --git a/arch/x86/tools/relocs_common.c b/arch/x86/tools/relocs_common.c index 44d396823a53..acab636bcb34 100644 --- a/arch/x86/tools/relocs_common.c +++ b/arch/x86/tools/relocs_common.c | |||
@@ -11,12 +11,13 @@ void die(char *fmt, ...) | |||
11 | 11 | ||
12 | static void usage(void) | 12 | static void usage(void) |
13 | { | 13 | { |
14 | die("relocs [--abs-syms|--abs-relocs|--text|--realmode] vmlinux\n"); | 14 | die("relocs [--abs-syms|--abs-relocs|--reloc-info|--text|--realmode]" \ |
15 | " vmlinux\n"); | ||
15 | } | 16 | } |
16 | 17 | ||
17 | int main(int argc, char **argv) | 18 | int main(int argc, char **argv) |
18 | { | 19 | { |
19 | int show_absolute_syms, show_absolute_relocs; | 20 | int show_absolute_syms, show_absolute_relocs, show_reloc_info; |
20 | int as_text, use_real_mode; | 21 | int as_text, use_real_mode; |
21 | const char *fname; | 22 | const char *fname; |
22 | FILE *fp; | 23 | FILE *fp; |
@@ -25,6 +26,7 @@ int main(int argc, char **argv) | |||
25 | 26 | ||
26 | show_absolute_syms = 0; | 27 | show_absolute_syms = 0; |
27 | show_absolute_relocs = 0; | 28 | show_absolute_relocs = 0; |
29 | show_reloc_info = 0; | ||
28 | as_text = 0; | 30 | as_text = 0; |
29 | use_real_mode = 0; | 31 | use_real_mode = 0; |
30 | fname = NULL; | 32 | fname = NULL; |
@@ -39,6 +41,10 @@ int main(int argc, char **argv) | |||
39 | show_absolute_relocs = 1; | 41 | show_absolute_relocs = 1; |
40 | continue; | 42 | continue; |
41 | } | 43 | } |
44 | if (strcmp(arg, "--reloc-info") == 0) { | ||
45 | show_reloc_info = 1; | ||
46 | continue; | ||
47 | } | ||
42 | if (strcmp(arg, "--text") == 0) { | 48 | if (strcmp(arg, "--text") == 0) { |
43 | as_text = 1; | 49 | as_text = 1; |
44 | continue; | 50 | continue; |
@@ -67,10 +73,12 @@ int main(int argc, char **argv) | |||
67 | rewind(fp); | 73 | rewind(fp); |
68 | if (e_ident[EI_CLASS] == ELFCLASS64) | 74 | if (e_ident[EI_CLASS] == ELFCLASS64) |
69 | process_64(fp, use_real_mode, as_text, | 75 | process_64(fp, use_real_mode, as_text, |
70 | show_absolute_syms, show_absolute_relocs); | 76 | show_absolute_syms, show_absolute_relocs, |
77 | show_reloc_info); | ||
71 | else | 78 | else |
72 | process_32(fp, use_real_mode, as_text, | 79 | process_32(fp, use_real_mode, as_text, |
73 | show_absolute_syms, show_absolute_relocs); | 80 | show_absolute_syms, show_absolute_relocs, |
81 | show_reloc_info); | ||
74 | fclose(fp); | 82 | fclose(fp); |
75 | return 0; | 83 | return 0; |
76 | } | 84 | } |
diff --git a/arch/x86/um/asm/barrier.h b/arch/x86/um/asm/barrier.h index 7d01b8c56c00..cc04e67bfd05 100644 --- a/arch/x86/um/asm/barrier.h +++ b/arch/x86/um/asm/barrier.h | |||
@@ -40,11 +40,7 @@ | |||
40 | #define smp_rmb() barrier() | 40 | #define smp_rmb() barrier() |
41 | #endif /* CONFIG_X86_PPRO_FENCE */ | 41 | #endif /* CONFIG_X86_PPRO_FENCE */ |
42 | 42 | ||
43 | #ifdef CONFIG_X86_OOSTORE | ||
44 | #define smp_wmb() wmb() | ||
45 | #else /* CONFIG_X86_OOSTORE */ | ||
46 | #define smp_wmb() barrier() | 43 | #define smp_wmb() barrier() |
47 | #endif /* CONFIG_X86_OOSTORE */ | ||
48 | 44 | ||
49 | #define smp_read_barrier_depends() read_barrier_depends() | 45 | #define smp_read_barrier_depends() read_barrier_depends() |
50 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | 46 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) |
diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index fd14be1d1472..c580d1210ffe 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile | |||
@@ -2,6 +2,8 @@ | |||
2 | # Building vDSO images for x86. | 2 | # Building vDSO images for x86. |
3 | # | 3 | # |
4 | 4 | ||
5 | KBUILD_CFLAGS += $(DISABLE_LTO) | ||
6 | |||
5 | VDSO64-$(CONFIG_X86_64) := y | 7 | VDSO64-$(CONFIG_X86_64) := y |
6 | VDSOX32-$(CONFIG_X86_X32_ABI) := y | 8 | VDSOX32-$(CONFIG_X86_X32_ABI) := y |
7 | VDSO32-$(CONFIG_X86_32) := y | 9 | VDSO32-$(CONFIG_X86_32) := y |
@@ -21,7 +23,8 @@ vobjs-$(VDSOX32-y) += $(vobjx32s-compat) | |||
21 | vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) | 23 | vobj64s := $(filter-out $(vobjx32s-compat),$(vobjs-y)) |
22 | 24 | ||
23 | # files to link into kernel | 25 | # files to link into kernel |
24 | obj-$(VDSO64-y) += vma.o vdso.o | 26 | obj-y += vma.o |
27 | obj-$(VDSO64-y) += vdso.o | ||
25 | obj-$(VDSOX32-y) += vdsox32.o | 28 | obj-$(VDSOX32-y) += vdsox32.o |
26 | obj-$(VDSO32-y) += vdso32.o vdso32-setup.o | 29 | obj-$(VDSO32-y) += vdso32.o vdso32-setup.o |
27 | 30 | ||
@@ -35,7 +38,8 @@ export CPPFLAGS_vdso.lds += -P -C | |||
35 | 38 | ||
36 | VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ | 39 | VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ |
37 | -Wl,--no-undefined \ | 40 | -Wl,--no-undefined \ |
38 | -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 | 41 | -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \ |
42 | $(DISABLE_LTO) | ||
39 | 43 | ||
40 | $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so | 44 | $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so |
41 | 45 | ||
@@ -127,7 +131,7 @@ vdso32.so-$(VDSO32-y) += sysenter | |||
127 | vdso32-images = $(vdso32.so-y:%=vdso32-%.so) | 131 | vdso32-images = $(vdso32.so-y:%=vdso32-%.so) |
128 | 132 | ||
129 | CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) | 133 | CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) |
130 | VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1 | 134 | VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 |
131 | 135 | ||
132 | # This makes sure the $(obj) subdirectory exists even though vdso32/ | 136 | # This makes sure the $(obj) subdirectory exists even though vdso32/ |
133 | # is not a kbuild sub-make subdirectory. | 137 | # is not a kbuild sub-make subdirectory. |
@@ -135,7 +139,7 @@ override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ | |||
135 | 139 | ||
136 | targets += vdso32/vdso32.lds | 140 | targets += vdso32/vdso32.lds |
137 | targets += $(vdso32-images) $(vdso32-images:=.dbg) | 141 | targets += $(vdso32-images) $(vdso32-images:=.dbg) |
138 | targets += vdso32/note.o $(vdso32.so-y:%=vdso32/%.o) | 142 | targets += vdso32/note.o vdso32/vclock_gettime.o $(vdso32.so-y:%=vdso32/%.o) |
139 | 143 | ||
140 | extra-y += $(vdso32-images) | 144 | extra-y += $(vdso32-images) |
141 | 145 | ||
@@ -145,8 +149,19 @@ KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) | |||
145 | $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) | 149 | $(vdso32-images:%=$(obj)/%.dbg): KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) |
146 | $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 | 150 | $(vdso32-images:%=$(obj)/%.dbg): asflags-$(CONFIG_X86_64) += -m32 |
147 | 151 | ||
152 | KBUILD_CFLAGS_32 := $(filter-out -m64,$(KBUILD_CFLAGS)) | ||
153 | KBUILD_CFLAGS_32 := $(filter-out -mcmodel=kernel,$(KBUILD_CFLAGS_32)) | ||
154 | KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32)) | ||
155 | KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32)) | ||
156 | KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic | ||
157 | KBUILD_CFLAGS_32 += $(call cc-option, -fno-stack-protector) | ||
158 | KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) | ||
159 | KBUILD_CFLAGS_32 += -fno-omit-frame-pointer | ||
160 | $(vdso32-images:%=$(obj)/%.dbg): KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) | ||
161 | |||
148 | $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ | 162 | $(vdso32-images:%=$(obj)/%.dbg): $(obj)/vdso32-%.so.dbg: FORCE \ |
149 | $(obj)/vdso32/vdso32.lds \ | 163 | $(obj)/vdso32/vdso32.lds \ |
164 | $(obj)/vdso32/vclock_gettime.o \ | ||
150 | $(obj)/vdso32/note.o \ | 165 | $(obj)/vdso32/note.o \ |
151 | $(obj)/vdso32/%.o | 166 | $(obj)/vdso32/%.o |
152 | $(call if_changed,vdso) | 167 | $(call if_changed,vdso) |
@@ -181,7 +196,8 @@ quiet_cmd_vdso = VDSO $@ | |||
181 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ | 196 | -Wl,-T,$(filter %.lds,$^) $(filter %.o,$^) && \ |
182 | sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' | 197 | sh $(srctree)/$(src)/checkundef.sh '$(NM)' '$@' |
183 | 198 | ||
184 | VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) | 199 | VDSO_LDFLAGS = -fPIC -shared $(call cc-ldoption, -Wl$(comma)--hash-style=sysv) \ |
200 | $(LTO_CFLAGS) | ||
185 | GCOV_PROFILE := n | 201 | GCOV_PROFILE := n |
186 | 202 | ||
187 | # | 203 | # |
diff --git a/arch/x86/vdso/vclock_gettime.c b/arch/x86/vdso/vclock_gettime.c index eb5d7a56f8d4..16d686171e9a 100644 --- a/arch/x86/vdso/vclock_gettime.c +++ b/arch/x86/vdso/vclock_gettime.c | |||
@@ -4,6 +4,9 @@ | |||
4 | * | 4 | * |
5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. | 5 | * Fast user context implementation of clock_gettime, gettimeofday, and time. |
6 | * | 6 | * |
7 | * 32 Bit compat layer by Stefani Seibold <stefani@seibold.net> | ||
8 | * sponsored by Rohde & Schwarz GmbH & Co. KG Munich/Germany | ||
9 | * | ||
7 | * The code should have no internal unresolved relocations. | 10 | * The code should have no internal unresolved relocations. |
8 | * Check with readelf after changing. | 11 | * Check with readelf after changing. |
9 | */ | 12 | */ |
@@ -11,56 +14,55 @@ | |||
11 | /* Disable profiling for userspace code: */ | 14 | /* Disable profiling for userspace code: */ |
12 | #define DISABLE_BRANCH_PROFILING | 15 | #define DISABLE_BRANCH_PROFILING |
13 | 16 | ||
14 | #include <linux/kernel.h> | 17 | #include <uapi/linux/time.h> |
15 | #include <linux/posix-timers.h> | ||
16 | #include <linux/time.h> | ||
17 | #include <linux/string.h> | ||
18 | #include <asm/vsyscall.h> | ||
19 | #include <asm/fixmap.h> | ||
20 | #include <asm/vgtod.h> | 18 | #include <asm/vgtod.h> |
21 | #include <asm/timex.h> | ||
22 | #include <asm/hpet.h> | 19 | #include <asm/hpet.h> |
20 | #include <asm/vvar.h> | ||
23 | #include <asm/unistd.h> | 21 | #include <asm/unistd.h> |
24 | #include <asm/io.h> | 22 | #include <asm/msr.h> |
25 | #include <asm/pvclock.h> | 23 | #include <linux/math64.h> |
24 | #include <linux/time.h> | ||
26 | 25 | ||
27 | #define gtod (&VVAR(vsyscall_gtod_data)) | 26 | #define gtod (&VVAR(vsyscall_gtod_data)) |
28 | 27 | ||
29 | notrace static cycle_t vread_tsc(void) | 28 | extern int __vdso_clock_gettime(clockid_t clock, struct timespec *ts); |
29 | extern int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); | ||
30 | extern time_t __vdso_time(time_t *t); | ||
31 | |||
32 | #ifdef CONFIG_HPET_TIMER | ||
33 | static inline u32 read_hpet_counter(const volatile void *addr) | ||
30 | { | 34 | { |
31 | cycle_t ret; | 35 | return *(const volatile u32 *) (addr + HPET_COUNTER); |
32 | u64 last; | 36 | } |
37 | #endif | ||
33 | 38 | ||
34 | /* | 39 | #ifndef BUILD_VDSO32 |
35 | * Empirically, a fence (of type that depends on the CPU) | ||
36 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
37 | * with respect to loads. The various CPU manuals are unclear | ||
38 | * as to whether rdtsc can be reordered with later loads, | ||
39 | * but no one has ever seen it happen. | ||
40 | */ | ||
41 | rdtsc_barrier(); | ||
42 | ret = (cycle_t)vget_cycles(); | ||
43 | 40 | ||
44 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | 41 | #include <linux/kernel.h> |
42 | #include <asm/vsyscall.h> | ||
43 | #include <asm/fixmap.h> | ||
44 | #include <asm/pvclock.h> | ||
45 | 45 | ||
46 | if (likely(ret >= last)) | 46 | static notrace cycle_t vread_hpet(void) |
47 | return ret; | 47 | { |
48 | return read_hpet_counter((const void *)fix_to_virt(VSYSCALL_HPET)); | ||
49 | } | ||
48 | 50 | ||
49 | /* | 51 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
50 | * GCC likes to generate cmov here, but this branch is extremely | 52 | { |
51 | * predictable (it's just a funciton of time and the likely is | 53 | long ret; |
52 | * very likely) and there's a data dependence, so force GCC | 54 | asm("syscall" : "=a" (ret) : |
53 | * to generate a branch instead. I don't barrier() because | 55 | "0" (__NR_clock_gettime), "D" (clock), "S" (ts) : "memory"); |
54 | * we don't actually need a barrier, and if this function | 56 | return ret; |
55 | * ever gets inlined it will generate worse code. | ||
56 | */ | ||
57 | asm volatile (""); | ||
58 | return last; | ||
59 | } | 57 | } |
60 | 58 | ||
61 | static notrace cycle_t vread_hpet(void) | 59 | notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) |
62 | { | 60 | { |
63 | return readl((const void __iomem *)fix_to_virt(VSYSCALL_HPET) + HPET_COUNTER); | 61 | long ret; |
62 | |||
63 | asm("syscall" : "=a" (ret) : | ||
64 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | ||
65 | return ret; | ||
64 | } | 66 | } |
65 | 67 | ||
66 | #ifdef CONFIG_PARAVIRT_CLOCK | 68 | #ifdef CONFIG_PARAVIRT_CLOCK |
@@ -124,7 +126,7 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
124 | *mode = VCLOCK_NONE; | 126 | *mode = VCLOCK_NONE; |
125 | 127 | ||
126 | /* refer to tsc.c read_tsc() comment for rationale */ | 128 | /* refer to tsc.c read_tsc() comment for rationale */ |
127 | last = VVAR(vsyscall_gtod_data).clock.cycle_last; | 129 | last = gtod->cycle_last; |
128 | 130 | ||
129 | if (likely(ret >= last)) | 131 | if (likely(ret >= last)) |
130 | return ret; | 132 | return ret; |
@@ -133,11 +135,30 @@ static notrace cycle_t vread_pvclock(int *mode) | |||
133 | } | 135 | } |
134 | #endif | 136 | #endif |
135 | 137 | ||
138 | #else | ||
139 | |||
140 | extern u8 hpet_page | ||
141 | __attribute__((visibility("hidden"))); | ||
142 | |||
143 | #ifdef CONFIG_HPET_TIMER | ||
144 | static notrace cycle_t vread_hpet(void) | ||
145 | { | ||
146 | return read_hpet_counter((const void *)(&hpet_page)); | ||
147 | } | ||
148 | #endif | ||
149 | |||
136 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) | 150 | notrace static long vdso_fallback_gettime(long clock, struct timespec *ts) |
137 | { | 151 | { |
138 | long ret; | 152 | long ret; |
139 | asm("syscall" : "=a" (ret) : | 153 | |
140 | "0" (__NR_clock_gettime),"D" (clock), "S" (ts) : "memory"); | 154 | asm( |
155 | "mov %%ebx, %%edx \n" | ||
156 | "mov %2, %%ebx \n" | ||
157 | "call VDSO32_vsyscall \n" | ||
158 | "mov %%edx, %%ebx \n" | ||
159 | : "=a" (ret) | ||
160 | : "0" (__NR_clock_gettime), "g" (clock), "c" (ts) | ||
161 | : "memory", "edx"); | ||
141 | return ret; | 162 | return ret; |
142 | } | 163 | } |
143 | 164 | ||
@@ -145,28 +166,79 @@ notrace static long vdso_fallback_gtod(struct timeval *tv, struct timezone *tz) | |||
145 | { | 166 | { |
146 | long ret; | 167 | long ret; |
147 | 168 | ||
148 | asm("syscall" : "=a" (ret) : | 169 | asm( |
149 | "0" (__NR_gettimeofday), "D" (tv), "S" (tz) : "memory"); | 170 | "mov %%ebx, %%edx \n" |
171 | "mov %2, %%ebx \n" | ||
172 | "call VDSO32_vsyscall \n" | ||
173 | "mov %%edx, %%ebx \n" | ||
174 | : "=a" (ret) | ||
175 | : "0" (__NR_gettimeofday), "g" (tv), "c" (tz) | ||
176 | : "memory", "edx"); | ||
150 | return ret; | 177 | return ret; |
151 | } | 178 | } |
152 | 179 | ||
180 | #ifdef CONFIG_PARAVIRT_CLOCK | ||
181 | |||
182 | static notrace cycle_t vread_pvclock(int *mode) | ||
183 | { | ||
184 | *mode = VCLOCK_NONE; | ||
185 | return 0; | ||
186 | } | ||
187 | #endif | ||
188 | |||
189 | #endif | ||
190 | |||
191 | notrace static cycle_t vread_tsc(void) | ||
192 | { | ||
193 | cycle_t ret; | ||
194 | u64 last; | ||
195 | |||
196 | /* | ||
197 | * Empirically, a fence (of type that depends on the CPU) | ||
198 | * before rdtsc is enough to ensure that rdtsc is ordered | ||
199 | * with respect to loads. The various CPU manuals are unclear | ||
200 | * as to whether rdtsc can be reordered with later loads, | ||
201 | * but no one has ever seen it happen. | ||
202 | */ | ||
203 | rdtsc_barrier(); | ||
204 | ret = (cycle_t)__native_read_tsc(); | ||
205 | |||
206 | last = gtod->cycle_last; | ||
207 | |||
208 | if (likely(ret >= last)) | ||
209 | return ret; | ||
210 | |||
211 | /* | ||
212 | * GCC likes to generate cmov here, but this branch is extremely | ||
213 | * predictable (it's just a funciton of time and the likely is | ||
214 | * very likely) and there's a data dependence, so force GCC | ||
215 | * to generate a branch instead. I don't barrier() because | ||
216 | * we don't actually need a barrier, and if this function | ||
217 | * ever gets inlined it will generate worse code. | ||
218 | */ | ||
219 | asm volatile (""); | ||
220 | return last; | ||
221 | } | ||
153 | 222 | ||
154 | notrace static inline u64 vgetsns(int *mode) | 223 | notrace static inline u64 vgetsns(int *mode) |
155 | { | 224 | { |
156 | long v; | 225 | u64 v; |
157 | cycles_t cycles; | 226 | cycles_t cycles; |
158 | if (gtod->clock.vclock_mode == VCLOCK_TSC) | 227 | |
228 | if (gtod->vclock_mode == VCLOCK_TSC) | ||
159 | cycles = vread_tsc(); | 229 | cycles = vread_tsc(); |
160 | else if (gtod->clock.vclock_mode == VCLOCK_HPET) | 230 | #ifdef CONFIG_HPET_TIMER |
231 | else if (gtod->vclock_mode == VCLOCK_HPET) | ||
161 | cycles = vread_hpet(); | 232 | cycles = vread_hpet(); |
233 | #endif | ||
162 | #ifdef CONFIG_PARAVIRT_CLOCK | 234 | #ifdef CONFIG_PARAVIRT_CLOCK |
163 | else if (gtod->clock.vclock_mode == VCLOCK_PVCLOCK) | 235 | else if (gtod->vclock_mode == VCLOCK_PVCLOCK) |
164 | cycles = vread_pvclock(mode); | 236 | cycles = vread_pvclock(mode); |
165 | #endif | 237 | #endif |
166 | else | 238 | else |
167 | return 0; | 239 | return 0; |
168 | v = (cycles - gtod->clock.cycle_last) & gtod->clock.mask; | 240 | v = (cycles - gtod->cycle_last) & gtod->mask; |
169 | return v * gtod->clock.mult; | 241 | return v * gtod->mult; |
170 | } | 242 | } |
171 | 243 | ||
172 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ | 244 | /* Code size doesn't matter (vdso is 4k anyway) and this is faster. */ |
@@ -176,106 +248,102 @@ notrace static int __always_inline do_realtime(struct timespec *ts) | |||
176 | u64 ns; | 248 | u64 ns; |
177 | int mode; | 249 | int mode; |
178 | 250 | ||
179 | ts->tv_nsec = 0; | ||
180 | do { | 251 | do { |
181 | seq = raw_read_seqcount_begin(>od->seq); | 252 | seq = gtod_read_begin(gtod); |
182 | mode = gtod->clock.vclock_mode; | 253 | mode = gtod->vclock_mode; |
183 | ts->tv_sec = gtod->wall_time_sec; | 254 | ts->tv_sec = gtod->wall_time_sec; |
184 | ns = gtod->wall_time_snsec; | 255 | ns = gtod->wall_time_snsec; |
185 | ns += vgetsns(&mode); | 256 | ns += vgetsns(&mode); |
186 | ns >>= gtod->clock.shift; | 257 | ns >>= gtod->shift; |
187 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 258 | } while (unlikely(gtod_read_retry(gtod, seq))); |
259 | |||
260 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | ||
261 | ts->tv_nsec = ns; | ||
188 | 262 | ||
189 | timespec_add_ns(ts, ns); | ||
190 | return mode; | 263 | return mode; |
191 | } | 264 | } |
192 | 265 | ||
193 | notrace static int do_monotonic(struct timespec *ts) | 266 | notrace static int __always_inline do_monotonic(struct timespec *ts) |
194 | { | 267 | { |
195 | unsigned long seq; | 268 | unsigned long seq; |
196 | u64 ns; | 269 | u64 ns; |
197 | int mode; | 270 | int mode; |
198 | 271 | ||
199 | ts->tv_nsec = 0; | ||
200 | do { | 272 | do { |
201 | seq = raw_read_seqcount_begin(>od->seq); | 273 | seq = gtod_read_begin(gtod); |
202 | mode = gtod->clock.vclock_mode; | 274 | mode = gtod->vclock_mode; |
203 | ts->tv_sec = gtod->monotonic_time_sec; | 275 | ts->tv_sec = gtod->monotonic_time_sec; |
204 | ns = gtod->monotonic_time_snsec; | 276 | ns = gtod->monotonic_time_snsec; |
205 | ns += vgetsns(&mode); | 277 | ns += vgetsns(&mode); |
206 | ns >>= gtod->clock.shift; | 278 | ns >>= gtod->shift; |
207 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 279 | } while (unlikely(gtod_read_retry(gtod, seq))); |
208 | timespec_add_ns(ts, ns); | 280 | |
281 | ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); | ||
282 | ts->tv_nsec = ns; | ||
209 | 283 | ||
210 | return mode; | 284 | return mode; |
211 | } | 285 | } |
212 | 286 | ||
213 | notrace static int do_realtime_coarse(struct timespec *ts) | 287 | notrace static void do_realtime_coarse(struct timespec *ts) |
214 | { | 288 | { |
215 | unsigned long seq; | 289 | unsigned long seq; |
216 | do { | 290 | do { |
217 | seq = raw_read_seqcount_begin(>od->seq); | 291 | seq = gtod_read_begin(gtod); |
218 | ts->tv_sec = gtod->wall_time_coarse.tv_sec; | 292 | ts->tv_sec = gtod->wall_time_coarse_sec; |
219 | ts->tv_nsec = gtod->wall_time_coarse.tv_nsec; | 293 | ts->tv_nsec = gtod->wall_time_coarse_nsec; |
220 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 294 | } while (unlikely(gtod_read_retry(gtod, seq))); |
221 | return 0; | ||
222 | } | 295 | } |
223 | 296 | ||
224 | notrace static int do_monotonic_coarse(struct timespec *ts) | 297 | notrace static void do_monotonic_coarse(struct timespec *ts) |
225 | { | 298 | { |
226 | unsigned long seq; | 299 | unsigned long seq; |
227 | do { | 300 | do { |
228 | seq = raw_read_seqcount_begin(>od->seq); | 301 | seq = gtod_read_begin(gtod); |
229 | ts->tv_sec = gtod->monotonic_time_coarse.tv_sec; | 302 | ts->tv_sec = gtod->monotonic_time_coarse_sec; |
230 | ts->tv_nsec = gtod->monotonic_time_coarse.tv_nsec; | 303 | ts->tv_nsec = gtod->monotonic_time_coarse_nsec; |
231 | } while (unlikely(read_seqcount_retry(>od->seq, seq))); | 304 | } while (unlikely(gtod_read_retry(gtod, seq))); |
232 | |||
233 | return 0; | ||
234 | } | 305 | } |
235 | 306 | ||
236 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) | 307 | notrace int __vdso_clock_gettime(clockid_t clock, struct timespec *ts) |
237 | { | 308 | { |
238 | int ret = VCLOCK_NONE; | ||
239 | |||
240 | switch (clock) { | 309 | switch (clock) { |
241 | case CLOCK_REALTIME: | 310 | case CLOCK_REALTIME: |
242 | ret = do_realtime(ts); | 311 | if (do_realtime(ts) == VCLOCK_NONE) |
312 | goto fallback; | ||
243 | break; | 313 | break; |
244 | case CLOCK_MONOTONIC: | 314 | case CLOCK_MONOTONIC: |
245 | ret = do_monotonic(ts); | 315 | if (do_monotonic(ts) == VCLOCK_NONE) |
316 | goto fallback; | ||
246 | break; | 317 | break; |
247 | case CLOCK_REALTIME_COARSE: | 318 | case CLOCK_REALTIME_COARSE: |
248 | return do_realtime_coarse(ts); | 319 | do_realtime_coarse(ts); |
320 | break; | ||
249 | case CLOCK_MONOTONIC_COARSE: | 321 | case CLOCK_MONOTONIC_COARSE: |
250 | return do_monotonic_coarse(ts); | 322 | do_monotonic_coarse(ts); |
323 | break; | ||
324 | default: | ||
325 | goto fallback; | ||
251 | } | 326 | } |
252 | 327 | ||
253 | if (ret == VCLOCK_NONE) | ||
254 | return vdso_fallback_gettime(clock, ts); | ||
255 | return 0; | 328 | return 0; |
329 | fallback: | ||
330 | return vdso_fallback_gettime(clock, ts); | ||
256 | } | 331 | } |
257 | int clock_gettime(clockid_t, struct timespec *) | 332 | int clock_gettime(clockid_t, struct timespec *) |
258 | __attribute__((weak, alias("__vdso_clock_gettime"))); | 333 | __attribute__((weak, alias("__vdso_clock_gettime"))); |
259 | 334 | ||
260 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) | 335 | notrace int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) |
261 | { | 336 | { |
262 | long ret = VCLOCK_NONE; | ||
263 | |||
264 | if (likely(tv != NULL)) { | 337 | if (likely(tv != NULL)) { |
265 | BUILD_BUG_ON(offsetof(struct timeval, tv_usec) != | 338 | if (unlikely(do_realtime((struct timespec *)tv) == VCLOCK_NONE)) |
266 | offsetof(struct timespec, tv_nsec) || | 339 | return vdso_fallback_gtod(tv, tz); |
267 | sizeof(*tv) != sizeof(struct timespec)); | ||
268 | ret = do_realtime((struct timespec *)tv); | ||
269 | tv->tv_usec /= 1000; | 340 | tv->tv_usec /= 1000; |
270 | } | 341 | } |
271 | if (unlikely(tz != NULL)) { | 342 | if (unlikely(tz != NULL)) { |
272 | /* Avoid memcpy. Some old compilers fail to inline it */ | 343 | tz->tz_minuteswest = gtod->tz_minuteswest; |
273 | tz->tz_minuteswest = gtod->sys_tz.tz_minuteswest; | 344 | tz->tz_dsttime = gtod->tz_dsttime; |
274 | tz->tz_dsttime = gtod->sys_tz.tz_dsttime; | ||
275 | } | 345 | } |
276 | 346 | ||
277 | if (ret == VCLOCK_NONE) | ||
278 | return vdso_fallback_gtod(tv, tz); | ||
279 | return 0; | 347 | return 0; |
280 | } | 348 | } |
281 | int gettimeofday(struct timeval *, struct timezone *) | 349 | int gettimeofday(struct timeval *, struct timezone *) |
@@ -287,8 +355,8 @@ int gettimeofday(struct timeval *, struct timezone *) | |||
287 | */ | 355 | */ |
288 | notrace time_t __vdso_time(time_t *t) | 356 | notrace time_t __vdso_time(time_t *t) |
289 | { | 357 | { |
290 | /* This is atomic on x86_64 so we don't need any locks. */ | 358 | /* This is atomic on x86 so we don't need any locks. */ |
291 | time_t result = ACCESS_ONCE(VVAR(vsyscall_gtod_data).wall_time_sec); | 359 | time_t result = ACCESS_ONCE(gtod->wall_time_sec); |
292 | 360 | ||
293 | if (t) | 361 | if (t) |
294 | *t = result; | 362 | *t = result; |
diff --git a/arch/x86/vdso/vdso-layout.lds.S b/arch/x86/vdso/vdso-layout.lds.S index 634a2cf62046..2e263f367b13 100644 --- a/arch/x86/vdso/vdso-layout.lds.S +++ b/arch/x86/vdso/vdso-layout.lds.S | |||
@@ -6,7 +6,25 @@ | |||
6 | 6 | ||
7 | SECTIONS | 7 | SECTIONS |
8 | { | 8 | { |
9 | . = VDSO_PRELINK + SIZEOF_HEADERS; | 9 | #ifdef BUILD_VDSO32 |
10 | #include <asm/vdso32.h> | ||
11 | |||
12 | .hpet_sect : { | ||
13 | hpet_page = . - VDSO_OFFSET(VDSO_HPET_PAGE); | ||
14 | } :text :hpet_sect | ||
15 | |||
16 | .vvar_sect : { | ||
17 | vvar = . - VDSO_OFFSET(VDSO_VVAR_PAGE); | ||
18 | |||
19 | /* Place all vvars at the offsets in asm/vvar.h. */ | ||
20 | #define EMIT_VVAR(name, offset) vvar_ ## name = vvar + offset; | ||
21 | #define __VVAR_KERNEL_LDS | ||
22 | #include <asm/vvar.h> | ||
23 | #undef __VVAR_KERNEL_LDS | ||
24 | #undef EMIT_VVAR | ||
25 | } :text :vvar_sect | ||
26 | #endif | ||
27 | . = SIZEOF_HEADERS; | ||
10 | 28 | ||
11 | .hash : { *(.hash) } :text | 29 | .hash : { *(.hash) } :text |
12 | .gnu.hash : { *(.gnu.hash) } | 30 | .gnu.hash : { *(.gnu.hash) } |
@@ -44,6 +62,11 @@ SECTIONS | |||
44 | . = ALIGN(0x100); | 62 | . = ALIGN(0x100); |
45 | 63 | ||
46 | .text : { *(.text*) } :text =0x90909090 | 64 | .text : { *(.text*) } :text =0x90909090 |
65 | |||
66 | /DISCARD/ : { | ||
67 | *(.discard) | ||
68 | *(.discard.*) | ||
69 | } | ||
47 | } | 70 | } |
48 | 71 | ||
49 | /* | 72 | /* |
@@ -61,4 +84,8 @@ PHDRS | |||
61 | dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ | 84 | dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ |
62 | note PT_NOTE FLAGS(4); /* PF_R */ | 85 | note PT_NOTE FLAGS(4); /* PF_R */ |
63 | eh_frame_hdr PT_GNU_EH_FRAME; | 86 | eh_frame_hdr PT_GNU_EH_FRAME; |
87 | #ifdef BUILD_VDSO32 | ||
88 | vvar_sect PT_NULL FLAGS(4); /* PF_R */ | ||
89 | hpet_sect PT_NULL FLAGS(4); /* PF_R */ | ||
90 | #endif | ||
64 | } | 91 | } |
diff --git a/arch/x86/vdso/vdso.S b/arch/x86/vdso/vdso.S index 01f5e3b4613c..be3f23b09af5 100644 --- a/arch/x86/vdso/vdso.S +++ b/arch/x86/vdso/vdso.S | |||
@@ -1,22 +1,3 @@ | |||
1 | #include <asm/page_types.h> | 1 | #include <asm/vdso.h> |
2 | #include <linux/linkage.h> | ||
3 | #include <linux/init.h> | ||
4 | 2 | ||
5 | __PAGE_ALIGNED_DATA | 3 | DEFINE_VDSO_IMAGE(vdso, "arch/x86/vdso/vdso.so") |
6 | |||
7 | .globl vdso_start, vdso_end | ||
8 | .align PAGE_SIZE | ||
9 | vdso_start: | ||
10 | .incbin "arch/x86/vdso/vdso.so" | ||
11 | vdso_end: | ||
12 | .align PAGE_SIZE /* extra data here leaks to userspace. */ | ||
13 | |||
14 | .previous | ||
15 | |||
16 | .globl vdso_pages | ||
17 | .bss | ||
18 | .align 8 | ||
19 | .type vdso_pages, @object | ||
20 | vdso_pages: | ||
21 | .zero (vdso_end - vdso_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 | ||
22 | .size vdso_pages, .-vdso_pages | ||
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c index d6bfb876cfb0..00348980a3a6 100644 --- a/arch/x86/vdso/vdso32-setup.c +++ b/arch/x86/vdso/vdso32-setup.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | #include <linux/module.h> | 18 | #include <linux/module.h> |
19 | #include <linux/slab.h> | ||
19 | 20 | ||
20 | #include <asm/cpufeature.h> | 21 | #include <asm/cpufeature.h> |
21 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
@@ -25,17 +26,14 @@ | |||
25 | #include <asm/tlbflush.h> | 26 | #include <asm/tlbflush.h> |
26 | #include <asm/vdso.h> | 27 | #include <asm/vdso.h> |
27 | #include <asm/proto.h> | 28 | #include <asm/proto.h> |
28 | 29 | #include <asm/fixmap.h> | |
29 | enum { | 30 | #include <asm/hpet.h> |
30 | VDSO_DISABLED = 0, | 31 | #include <asm/vvar.h> |
31 | VDSO_ENABLED = 1, | ||
32 | VDSO_COMPAT = 2, | ||
33 | }; | ||
34 | 32 | ||
35 | #ifdef CONFIG_COMPAT_VDSO | 33 | #ifdef CONFIG_COMPAT_VDSO |
36 | #define VDSO_DEFAULT VDSO_COMPAT | 34 | #define VDSO_DEFAULT 0 |
37 | #else | 35 | #else |
38 | #define VDSO_DEFAULT VDSO_ENABLED | 36 | #define VDSO_DEFAULT 1 |
39 | #endif | 37 | #endif |
40 | 38 | ||
41 | #ifdef CONFIG_X86_64 | 39 | #ifdef CONFIG_X86_64 |
@@ -44,13 +42,6 @@ enum { | |||
44 | #endif | 42 | #endif |
45 | 43 | ||
46 | /* | 44 | /* |
47 | * This is the difference between the prelinked addresses in the vDSO images | ||
48 | * and the VDSO_HIGH_BASE address where CONFIG_COMPAT_VDSO places the vDSO | ||
49 | * in the user address space. | ||
50 | */ | ||
51 | #define VDSO_ADDR_ADJUST (VDSO_HIGH_BASE - (unsigned long)VDSO32_PRELINK) | ||
52 | |||
53 | /* | ||
54 | * Should the kernel map a VDSO page into processes and pass its | 45 | * Should the kernel map a VDSO page into processes and pass its |
55 | * address down to glibc upon exec()? | 46 | * address down to glibc upon exec()? |
56 | */ | 47 | */ |
@@ -60,6 +51,9 @@ static int __init vdso_setup(char *s) | |||
60 | { | 51 | { |
61 | vdso_enabled = simple_strtoul(s, NULL, 0); | 52 | vdso_enabled = simple_strtoul(s, NULL, 0); |
62 | 53 | ||
54 | if (vdso_enabled > 1) | ||
55 | pr_warn("vdso32 values other than 0 and 1 are no longer allowed; vdso disabled\n"); | ||
56 | |||
63 | return 1; | 57 | return 1; |
64 | } | 58 | } |
65 | 59 | ||
@@ -76,124 +70,8 @@ __setup_param("vdso=", vdso32_setup, vdso_setup, 0); | |||
76 | EXPORT_SYMBOL_GPL(vdso_enabled); | 70 | EXPORT_SYMBOL_GPL(vdso_enabled); |
77 | #endif | 71 | #endif |
78 | 72 | ||
79 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, | 73 | static struct page **vdso32_pages; |
80 | unsigned offset, unsigned size) | 74 | static unsigned vdso32_size; |
81 | { | ||
82 | Elf32_Sym *sym = (void *)ehdr + offset; | ||
83 | unsigned nsym = size / sizeof(*sym); | ||
84 | unsigned i; | ||
85 | |||
86 | for(i = 0; i < nsym; i++, sym++) { | ||
87 | if (sym->st_shndx == SHN_UNDEF || | ||
88 | sym->st_shndx == SHN_ABS) | ||
89 | continue; /* skip */ | ||
90 | |||
91 | if (sym->st_shndx > SHN_LORESERVE) { | ||
92 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", | ||
93 | sym->st_shndx); | ||
94 | continue; | ||
95 | } | ||
96 | |||
97 | switch(ELF_ST_TYPE(sym->st_info)) { | ||
98 | case STT_OBJECT: | ||
99 | case STT_FUNC: | ||
100 | case STT_SECTION: | ||
101 | case STT_FILE: | ||
102 | sym->st_value += VDSO_ADDR_ADJUST; | ||
103 | } | ||
104 | } | ||
105 | } | ||
106 | |||
107 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) | ||
108 | { | ||
109 | Elf32_Dyn *dyn = (void *)ehdr + offset; | ||
110 | |||
111 | for(; dyn->d_tag != DT_NULL; dyn++) | ||
112 | switch(dyn->d_tag) { | ||
113 | case DT_PLTGOT: | ||
114 | case DT_HASH: | ||
115 | case DT_STRTAB: | ||
116 | case DT_SYMTAB: | ||
117 | case DT_RELA: | ||
118 | case DT_INIT: | ||
119 | case DT_FINI: | ||
120 | case DT_REL: | ||
121 | case DT_DEBUG: | ||
122 | case DT_JMPREL: | ||
123 | case DT_VERSYM: | ||
124 | case DT_VERDEF: | ||
125 | case DT_VERNEED: | ||
126 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: | ||
127 | /* definitely pointers needing relocation */ | ||
128 | dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; | ||
129 | break; | ||
130 | |||
131 | case DT_ENCODING ... OLD_DT_LOOS-1: | ||
132 | case DT_LOOS ... DT_HIOS-1: | ||
133 | /* Tags above DT_ENCODING are pointers if | ||
134 | they're even */ | ||
135 | if (dyn->d_tag >= DT_ENCODING && | ||
136 | (dyn->d_tag & 1) == 0) | ||
137 | dyn->d_un.d_ptr += VDSO_ADDR_ADJUST; | ||
138 | break; | ||
139 | |||
140 | case DT_VERDEFNUM: | ||
141 | case DT_VERNEEDNUM: | ||
142 | case DT_FLAGS_1: | ||
143 | case DT_RELACOUNT: | ||
144 | case DT_RELCOUNT: | ||
145 | case DT_VALRNGLO ... DT_VALRNGHI: | ||
146 | /* definitely not pointers */ | ||
147 | break; | ||
148 | |||
149 | case OLD_DT_LOOS ... DT_LOOS-1: | ||
150 | case DT_HIOS ... DT_VALRNGLO-1: | ||
151 | default: | ||
152 | if (dyn->d_tag > DT_ENCODING) | ||
153 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", | ||
154 | dyn->d_tag); | ||
155 | break; | ||
156 | } | ||
157 | } | ||
158 | |||
159 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) | ||
160 | { | ||
161 | Elf32_Phdr *phdr; | ||
162 | Elf32_Shdr *shdr; | ||
163 | int i; | ||
164 | |||
165 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 || | ||
166 | !elf_check_arch_ia32(ehdr) || | ||
167 | ehdr->e_type != ET_DYN); | ||
168 | |||
169 | ehdr->e_entry += VDSO_ADDR_ADJUST; | ||
170 | |||
171 | /* rebase phdrs */ | ||
172 | phdr = (void *)ehdr + ehdr->e_phoff; | ||
173 | for (i = 0; i < ehdr->e_phnum; i++) { | ||
174 | phdr[i].p_vaddr += VDSO_ADDR_ADJUST; | ||
175 | |||
176 | /* relocate dynamic stuff */ | ||
177 | if (phdr[i].p_type == PT_DYNAMIC) | ||
178 | reloc_dyn(ehdr, phdr[i].p_offset); | ||
179 | } | ||
180 | |||
181 | /* rebase sections */ | ||
182 | shdr = (void *)ehdr + ehdr->e_shoff; | ||
183 | for(i = 0; i < ehdr->e_shnum; i++) { | ||
184 | if (!(shdr[i].sh_flags & SHF_ALLOC)) | ||
185 | continue; | ||
186 | |||
187 | shdr[i].sh_addr += VDSO_ADDR_ADJUST; | ||
188 | |||
189 | if (shdr[i].sh_type == SHT_SYMTAB || | ||
190 | shdr[i].sh_type == SHT_DYNSYM) | ||
191 | reloc_symtab(ehdr, shdr[i].sh_offset, | ||
192 | shdr[i].sh_size); | ||
193 | } | ||
194 | } | ||
195 | |||
196 | static struct page *vdso32_pages[1]; | ||
197 | 75 | ||
198 | #ifdef CONFIG_X86_64 | 76 | #ifdef CONFIG_X86_64 |
199 | 77 | ||
@@ -212,12 +90,6 @@ void syscall32_cpu_init(void) | |||
212 | wrmsrl(MSR_CSTAR, ia32_cstar_target); | 90 | wrmsrl(MSR_CSTAR, ia32_cstar_target); |
213 | } | 91 | } |
214 | 92 | ||
215 | #define compat_uses_vma 1 | ||
216 | |||
217 | static inline void map_compat_vdso(int map) | ||
218 | { | ||
219 | } | ||
220 | |||
221 | #else /* CONFIG_X86_32 */ | 93 | #else /* CONFIG_X86_32 */ |
222 | 94 | ||
223 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) | 95 | #define vdso32_sysenter() (boot_cpu_has(X86_FEATURE_SEP)) |
@@ -241,64 +113,36 @@ void enable_sep_cpu(void) | |||
241 | put_cpu(); | 113 | put_cpu(); |
242 | } | 114 | } |
243 | 115 | ||
244 | static struct vm_area_struct gate_vma; | ||
245 | |||
246 | static int __init gate_vma_init(void) | ||
247 | { | ||
248 | gate_vma.vm_mm = NULL; | ||
249 | gate_vma.vm_start = FIXADDR_USER_START; | ||
250 | gate_vma.vm_end = FIXADDR_USER_END; | ||
251 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | ||
252 | gate_vma.vm_page_prot = __P101; | ||
253 | |||
254 | return 0; | ||
255 | } | ||
256 | |||
257 | #define compat_uses_vma 0 | ||
258 | |||
259 | static void map_compat_vdso(int map) | ||
260 | { | ||
261 | static int vdso_mapped; | ||
262 | |||
263 | if (map == vdso_mapped) | ||
264 | return; | ||
265 | |||
266 | vdso_mapped = map; | ||
267 | |||
268 | __set_fixmap(FIX_VDSO, page_to_pfn(vdso32_pages[0]) << PAGE_SHIFT, | ||
269 | map ? PAGE_READONLY_EXEC : PAGE_NONE); | ||
270 | |||
271 | /* flush stray tlbs */ | ||
272 | flush_tlb_all(); | ||
273 | } | ||
274 | |||
275 | #endif /* CONFIG_X86_64 */ | 116 | #endif /* CONFIG_X86_64 */ |
276 | 117 | ||
277 | int __init sysenter_setup(void) | 118 | int __init sysenter_setup(void) |
278 | { | 119 | { |
279 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | 120 | char *vdso32_start, *vdso32_end; |
280 | const void *vsyscall; | 121 | int npages, i; |
281 | size_t vsyscall_len; | ||
282 | |||
283 | vdso32_pages[0] = virt_to_page(syscall_page); | ||
284 | |||
285 | #ifdef CONFIG_X86_32 | ||
286 | gate_vma_init(); | ||
287 | #endif | ||
288 | 122 | ||
123 | #ifdef CONFIG_COMPAT | ||
289 | if (vdso32_syscall()) { | 124 | if (vdso32_syscall()) { |
290 | vsyscall = &vdso32_syscall_start; | 125 | vdso32_start = vdso32_syscall_start; |
291 | vsyscall_len = &vdso32_syscall_end - &vdso32_syscall_start; | 126 | vdso32_end = vdso32_syscall_end; |
292 | } else if (vdso32_sysenter()){ | 127 | vdso32_pages = vdso32_syscall_pages; |
293 | vsyscall = &vdso32_sysenter_start; | 128 | } else |
294 | vsyscall_len = &vdso32_sysenter_end - &vdso32_sysenter_start; | 129 | #endif |
130 | if (vdso32_sysenter()) { | ||
131 | vdso32_start = vdso32_sysenter_start; | ||
132 | vdso32_end = vdso32_sysenter_end; | ||
133 | vdso32_pages = vdso32_sysenter_pages; | ||
295 | } else { | 134 | } else { |
296 | vsyscall = &vdso32_int80_start; | 135 | vdso32_start = vdso32_int80_start; |
297 | vsyscall_len = &vdso32_int80_end - &vdso32_int80_start; | 136 | vdso32_end = vdso32_int80_end; |
137 | vdso32_pages = vdso32_int80_pages; | ||
298 | } | 138 | } |
299 | 139 | ||
300 | memcpy(syscall_page, vsyscall, vsyscall_len); | 140 | npages = ((vdso32_end - vdso32_start) + PAGE_SIZE - 1) / PAGE_SIZE; |
301 | relocate_vdso(syscall_page); | 141 | vdso32_size = npages << PAGE_SHIFT; |
142 | for (i = 0; i < npages; i++) | ||
143 | vdso32_pages[i] = virt_to_page(vdso32_start + i*PAGE_SIZE); | ||
144 | |||
145 | patch_vdso32(vdso32_start, vdso32_size); | ||
302 | 146 | ||
303 | return 0; | 147 | return 0; |
304 | } | 148 | } |
@@ -309,48 +153,73 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) | |||
309 | struct mm_struct *mm = current->mm; | 153 | struct mm_struct *mm = current->mm; |
310 | unsigned long addr; | 154 | unsigned long addr; |
311 | int ret = 0; | 155 | int ret = 0; |
312 | bool compat; | 156 | struct vm_area_struct *vma; |
313 | 157 | ||
314 | #ifdef CONFIG_X86_X32_ABI | 158 | #ifdef CONFIG_X86_X32_ABI |
315 | if (test_thread_flag(TIF_X32)) | 159 | if (test_thread_flag(TIF_X32)) |
316 | return x32_setup_additional_pages(bprm, uses_interp); | 160 | return x32_setup_additional_pages(bprm, uses_interp); |
317 | #endif | 161 | #endif |
318 | 162 | ||
319 | if (vdso_enabled == VDSO_DISABLED) | 163 | if (vdso_enabled != 1) /* Other values all mean "disabled" */ |
320 | return 0; | 164 | return 0; |
321 | 165 | ||
322 | down_write(&mm->mmap_sem); | 166 | down_write(&mm->mmap_sem); |
323 | 167 | ||
324 | /* Test compat mode once here, in case someone | 168 | addr = get_unmapped_area(NULL, 0, vdso32_size + VDSO_OFFSET(VDSO_PREV_PAGES), 0, 0); |
325 | changes it via sysctl */ | 169 | if (IS_ERR_VALUE(addr)) { |
326 | compat = (vdso_enabled == VDSO_COMPAT); | 170 | ret = addr; |
171 | goto up_fail; | ||
172 | } | ||
173 | |||
174 | addr += VDSO_OFFSET(VDSO_PREV_PAGES); | ||
327 | 175 | ||
328 | map_compat_vdso(compat); | 176 | current->mm->context.vdso = (void *)addr; |
329 | 177 | ||
330 | if (compat) | 178 | /* |
331 | addr = VDSO_HIGH_BASE; | 179 | * MAYWRITE to allow gdb to COW and set breakpoints |
332 | else { | 180 | */ |
333 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | 181 | ret = install_special_mapping(mm, |
334 | if (IS_ERR_VALUE(addr)) { | 182 | addr, |
335 | ret = addr; | 183 | vdso32_size, |
336 | goto up_fail; | 184 | VM_READ|VM_EXEC| |
337 | } | 185 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, |
186 | vdso32_pages); | ||
187 | |||
188 | if (ret) | ||
189 | goto up_fail; | ||
190 | |||
191 | vma = _install_special_mapping(mm, | ||
192 | addr - VDSO_OFFSET(VDSO_PREV_PAGES), | ||
193 | VDSO_OFFSET(VDSO_PREV_PAGES), | ||
194 | VM_READ, | ||
195 | NULL); | ||
196 | |||
197 | if (IS_ERR(vma)) { | ||
198 | ret = PTR_ERR(vma); | ||
199 | goto up_fail; | ||
338 | } | 200 | } |
339 | 201 | ||
340 | current->mm->context.vdso = (void *)addr; | 202 | ret = remap_pfn_range(vma, |
203 | addr - VDSO_OFFSET(VDSO_VVAR_PAGE), | ||
204 | __pa_symbol(&__vvar_page) >> PAGE_SHIFT, | ||
205 | PAGE_SIZE, | ||
206 | PAGE_READONLY); | ||
207 | |||
208 | if (ret) | ||
209 | goto up_fail; | ||
341 | 210 | ||
342 | if (compat_uses_vma || !compat) { | 211 | #ifdef CONFIG_HPET_TIMER |
343 | /* | 212 | if (hpet_address) { |
344 | * MAYWRITE to allow gdb to COW and set breakpoints | 213 | ret = io_remap_pfn_range(vma, |
345 | */ | 214 | addr - VDSO_OFFSET(VDSO_HPET_PAGE), |
346 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | 215 | hpet_address >> PAGE_SHIFT, |
347 | VM_READ|VM_EXEC| | 216 | PAGE_SIZE, |
348 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, | 217 | pgprot_noncached(PAGE_READONLY)); |
349 | vdso32_pages); | ||
350 | 218 | ||
351 | if (ret) | 219 | if (ret) |
352 | goto up_fail; | 220 | goto up_fail; |
353 | } | 221 | } |
222 | #endif | ||
354 | 223 | ||
355 | current_thread_info()->sysenter_return = | 224 | current_thread_info()->sysenter_return = |
356 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); | 225 | VDSO32_SYMBOL(addr, SYSENTER_RETURN); |
@@ -411,20 +280,12 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
411 | 280 | ||
412 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) | 281 | struct vm_area_struct *get_gate_vma(struct mm_struct *mm) |
413 | { | 282 | { |
414 | /* | ||
415 | * Check to see if the corresponding task was created in compat vdso | ||
416 | * mode. | ||
417 | */ | ||
418 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | ||
419 | return &gate_vma; | ||
420 | return NULL; | 283 | return NULL; |
421 | } | 284 | } |
422 | 285 | ||
423 | int in_gate_area(struct mm_struct *mm, unsigned long addr) | 286 | int in_gate_area(struct mm_struct *mm, unsigned long addr) |
424 | { | 287 | { |
425 | const struct vm_area_struct *vma = get_gate_vma(mm); | 288 | return 0; |
426 | |||
427 | return vma && addr >= vma->vm_start && addr < vma->vm_end; | ||
428 | } | 289 | } |
429 | 290 | ||
430 | int in_gate_area_no_mm(unsigned long addr) | 291 | int in_gate_area_no_mm(unsigned long addr) |
diff --git a/arch/x86/vdso/vdso32.S b/arch/x86/vdso/vdso32.S index 2ce5f82c333b..018bcd9f97b4 100644 --- a/arch/x86/vdso/vdso32.S +++ b/arch/x86/vdso/vdso32.S | |||
@@ -1,22 +1,9 @@ | |||
1 | #include <linux/init.h> | 1 | #include <asm/vdso.h> |
2 | 2 | ||
3 | __INITDATA | 3 | DEFINE_VDSO_IMAGE(vdso32_int80, "arch/x86/vdso/vdso32-int80.so") |
4 | 4 | ||
5 | .globl vdso32_int80_start, vdso32_int80_end | ||
6 | vdso32_int80_start: | ||
7 | .incbin "arch/x86/vdso/vdso32-int80.so" | ||
8 | vdso32_int80_end: | ||
9 | |||
10 | .globl vdso32_syscall_start, vdso32_syscall_end | ||
11 | vdso32_syscall_start: | ||
12 | #ifdef CONFIG_COMPAT | 5 | #ifdef CONFIG_COMPAT |
13 | .incbin "arch/x86/vdso/vdso32-syscall.so" | 6 | DEFINE_VDSO_IMAGE(vdso32_syscall, "arch/x86/vdso/vdso32-syscall.so") |
14 | #endif | 7 | #endif |
15 | vdso32_syscall_end: | ||
16 | |||
17 | .globl vdso32_sysenter_start, vdso32_sysenter_end | ||
18 | vdso32_sysenter_start: | ||
19 | .incbin "arch/x86/vdso/vdso32-sysenter.so" | ||
20 | vdso32_sysenter_end: | ||
21 | 8 | ||
22 | __FINIT | 9 | DEFINE_VDSO_IMAGE(vdso32_sysenter, "arch/x86/vdso/vdso32-sysenter.so") |
diff --git a/arch/x86/vdso/vdso32/vclock_gettime.c b/arch/x86/vdso/vdso32/vclock_gettime.c new file mode 100644 index 000000000000..175cc72c0f68 --- /dev/null +++ b/arch/x86/vdso/vdso32/vclock_gettime.c | |||
@@ -0,0 +1,30 @@ | |||
1 | #define BUILD_VDSO32 | ||
2 | |||
3 | #ifndef CONFIG_CC_OPTIMIZE_FOR_SIZE | ||
4 | #undef CONFIG_OPTIMIZE_INLINING | ||
5 | #endif | ||
6 | |||
7 | #undef CONFIG_X86_PPRO_FENCE | ||
8 | |||
9 | #ifdef CONFIG_X86_64 | ||
10 | |||
11 | /* | ||
12 | * in case of a 32 bit VDSO for a 64 bit kernel fake a 32 bit kernel | ||
13 | * configuration | ||
14 | */ | ||
15 | #undef CONFIG_64BIT | ||
16 | #undef CONFIG_X86_64 | ||
17 | #undef CONFIG_ILLEGAL_POINTER_VALUE | ||
18 | #undef CONFIG_SPARSEMEM_VMEMMAP | ||
19 | #undef CONFIG_NR_CPUS | ||
20 | |||
21 | #define CONFIG_X86_32 1 | ||
22 | #define CONFIG_PAGE_OFFSET 0 | ||
23 | #define CONFIG_ILLEGAL_POINTER_VALUE 0 | ||
24 | #define CONFIG_NR_CPUS 1 | ||
25 | |||
26 | #define BUILD_VDSO32_64 | ||
27 | |||
28 | #endif | ||
29 | |||
30 | #include "../vclock_gettime.c" | ||
diff --git a/arch/x86/vdso/vdso32/vdso32.lds.S b/arch/x86/vdso/vdso32/vdso32.lds.S index 976124bb5f92..aadb8b9994cd 100644 --- a/arch/x86/vdso/vdso32/vdso32.lds.S +++ b/arch/x86/vdso/vdso32/vdso32.lds.S | |||
@@ -8,7 +8,11 @@ | |||
8 | * values visible using the asm-x86/vdso.h macros from the kernel proper. | 8 | * values visible using the asm-x86/vdso.h macros from the kernel proper. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <asm/page.h> | ||
12 | |||
13 | #define BUILD_VDSO32 | ||
11 | #define VDSO_PRELINK 0 | 14 | #define VDSO_PRELINK 0 |
15 | |||
12 | #include "../vdso-layout.lds.S" | 16 | #include "../vdso-layout.lds.S" |
13 | 17 | ||
14 | /* The ELF entry point can be used to set the AT_SYSINFO value. */ | 18 | /* The ELF entry point can be used to set the AT_SYSINFO value. */ |
@@ -19,6 +23,13 @@ ENTRY(__kernel_vsyscall); | |||
19 | */ | 23 | */ |
20 | VERSION | 24 | VERSION |
21 | { | 25 | { |
26 | LINUX_2.6 { | ||
27 | global: | ||
28 | __vdso_clock_gettime; | ||
29 | __vdso_gettimeofday; | ||
30 | __vdso_time; | ||
31 | }; | ||
32 | |||
22 | LINUX_2.5 { | 33 | LINUX_2.5 { |
23 | global: | 34 | global: |
24 | __kernel_vsyscall; | 35 | __kernel_vsyscall; |
@@ -31,7 +42,9 @@ VERSION | |||
31 | /* | 42 | /* |
32 | * Symbols we define here called VDSO* get their values into vdso32-syms.h. | 43 | * Symbols we define here called VDSO* get their values into vdso32-syms.h. |
33 | */ | 44 | */ |
34 | VDSO32_PRELINK = VDSO_PRELINK; | ||
35 | VDSO32_vsyscall = __kernel_vsyscall; | 45 | VDSO32_vsyscall = __kernel_vsyscall; |
36 | VDSO32_sigreturn = __kernel_sigreturn; | 46 | VDSO32_sigreturn = __kernel_sigreturn; |
37 | VDSO32_rt_sigreturn = __kernel_rt_sigreturn; | 47 | VDSO32_rt_sigreturn = __kernel_rt_sigreturn; |
48 | VDSO32_clock_gettime = clock_gettime; | ||
49 | VDSO32_gettimeofday = gettimeofday; | ||
50 | VDSO32_time = time; | ||
diff --git a/arch/x86/vdso/vdsox32.S b/arch/x86/vdso/vdsox32.S index d6b9a7f42a8a..f4aa34e7f370 100644 --- a/arch/x86/vdso/vdsox32.S +++ b/arch/x86/vdso/vdsox32.S | |||
@@ -1,22 +1,3 @@ | |||
1 | #include <asm/page_types.h> | 1 | #include <asm/vdso.h> |
2 | #include <linux/linkage.h> | ||
3 | #include <linux/init.h> | ||
4 | 2 | ||
5 | __PAGE_ALIGNED_DATA | 3 | DEFINE_VDSO_IMAGE(vdsox32, "arch/x86/vdso/vdsox32.so") |
6 | |||
7 | .globl vdsox32_start, vdsox32_end | ||
8 | .align PAGE_SIZE | ||
9 | vdsox32_start: | ||
10 | .incbin "arch/x86/vdso/vdsox32.so" | ||
11 | vdsox32_end: | ||
12 | .align PAGE_SIZE /* extra data here leaks to userspace. */ | ||
13 | |||
14 | .previous | ||
15 | |||
16 | .globl vdsox32_pages | ||
17 | .bss | ||
18 | .align 8 | ||
19 | .type vdsox32_pages, @object | ||
20 | vdsox32_pages: | ||
21 | .zero (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE * 8 | ||
22 | .size vdsox32_pages, .-vdsox32_pages | ||
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c index 431e87544411..1ad102613127 100644 --- a/arch/x86/vdso/vma.c +++ b/arch/x86/vdso/vma.c | |||
@@ -16,20 +16,22 @@ | |||
16 | #include <asm/vdso.h> | 16 | #include <asm/vdso.h> |
17 | #include <asm/page.h> | 17 | #include <asm/page.h> |
18 | 18 | ||
19 | #if defined(CONFIG_X86_64) | ||
19 | unsigned int __read_mostly vdso_enabled = 1; | 20 | unsigned int __read_mostly vdso_enabled = 1; |
20 | 21 | ||
21 | extern char vdso_start[], vdso_end[]; | 22 | DECLARE_VDSO_IMAGE(vdso); |
22 | extern unsigned short vdso_sync_cpuid; | 23 | extern unsigned short vdso_sync_cpuid; |
23 | |||
24 | extern struct page *vdso_pages[]; | ||
25 | static unsigned vdso_size; | 24 | static unsigned vdso_size; |
26 | 25 | ||
27 | #ifdef CONFIG_X86_X32_ABI | 26 | #ifdef CONFIG_X86_X32_ABI |
28 | extern char vdsox32_start[], vdsox32_end[]; | 27 | DECLARE_VDSO_IMAGE(vdsox32); |
29 | extern struct page *vdsox32_pages[]; | ||
30 | static unsigned vdsox32_size; | 28 | static unsigned vdsox32_size; |
29 | #endif | ||
30 | #endif | ||
31 | 31 | ||
32 | static void __init patch_vdsox32(void *vdso, size_t len) | 32 | #if defined(CONFIG_X86_32) || defined(CONFIG_X86_X32_ABI) || \ |
33 | defined(CONFIG_COMPAT) | ||
34 | void __init patch_vdso32(void *vdso, size_t len) | ||
33 | { | 35 | { |
34 | Elf32_Ehdr *hdr = vdso; | 36 | Elf32_Ehdr *hdr = vdso; |
35 | Elf32_Shdr *sechdrs, *alt_sec = 0; | 37 | Elf32_Shdr *sechdrs, *alt_sec = 0; |
@@ -52,7 +54,7 @@ static void __init patch_vdsox32(void *vdso, size_t len) | |||
52 | } | 54 | } |
53 | 55 | ||
54 | /* If we get here, it's probably a bug. */ | 56 | /* If we get here, it's probably a bug. */ |
55 | pr_warning("patch_vdsox32: .altinstructions not found\n"); | 57 | pr_warning("patch_vdso32: .altinstructions not found\n"); |
56 | return; /* nothing to patch */ | 58 | return; /* nothing to patch */ |
57 | 59 | ||
58 | found: | 60 | found: |
@@ -61,6 +63,7 @@ found: | |||
61 | } | 63 | } |
62 | #endif | 64 | #endif |
63 | 65 | ||
66 | #if defined(CONFIG_X86_64) | ||
64 | static void __init patch_vdso64(void *vdso, size_t len) | 67 | static void __init patch_vdso64(void *vdso, size_t len) |
65 | { | 68 | { |
66 | Elf64_Ehdr *hdr = vdso; | 69 | Elf64_Ehdr *hdr = vdso; |
@@ -104,7 +107,7 @@ static int __init init_vdso(void) | |||
104 | vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); | 107 | vdso_pages[i] = virt_to_page(vdso_start + i*PAGE_SIZE); |
105 | 108 | ||
106 | #ifdef CONFIG_X86_X32_ABI | 109 | #ifdef CONFIG_X86_X32_ABI |
107 | patch_vdsox32(vdsox32_start, vdsox32_end - vdsox32_start); | 110 | patch_vdso32(vdsox32_start, vdsox32_end - vdsox32_start); |
108 | npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; | 111 | npages = (vdsox32_end - vdsox32_start + PAGE_SIZE - 1) / PAGE_SIZE; |
109 | vdsox32_size = npages << PAGE_SHIFT; | 112 | vdsox32_size = npages << PAGE_SHIFT; |
110 | for (i = 0; i < npages; i++) | 113 | for (i = 0; i < npages; i++) |
@@ -204,3 +207,4 @@ static __init int vdso_setup(char *s) | |||
204 | return 0; | 207 | return 0; |
205 | } | 208 | } |
206 | __setup("vdso=", vdso_setup); | 209 | __setup("vdso=", vdso_setup); |
210 | #endif | ||
diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index 1a3c76505649..e88fda867a33 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig | |||
@@ -7,7 +7,7 @@ config XEN | |||
7 | depends on PARAVIRT | 7 | depends on PARAVIRT |
8 | select PARAVIRT_CLOCK | 8 | select PARAVIRT_CLOCK |
9 | select XEN_HAVE_PVMMU | 9 | select XEN_HAVE_PVMMU |
10 | depends on X86_64 || (X86_32 && X86_PAE && !X86_VISWS) | 10 | depends on X86_64 || (X86_32 && X86_PAE) |
11 | depends on X86_TSC | 11 | depends on X86_TSC |
12 | help | 12 | help |
13 | This is the Linux Xen port. Enabling this will allow the | 13 | This is the Linux Xen port. Enabling this will allow the |
@@ -19,11 +19,6 @@ config XEN_DOM0 | |||
19 | depends on XEN && PCI_XEN && SWIOTLB_XEN | 19 | depends on XEN && PCI_XEN && SWIOTLB_XEN |
20 | depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI | 20 | depends on X86_LOCAL_APIC && X86_IO_APIC && ACPI && PCI |
21 | 21 | ||
22 | # Dummy symbol since people have come to rely on the PRIVILEGED_GUEST | ||
23 | # name in tools. | ||
24 | config XEN_PRIVILEGED_GUEST | ||
25 | def_bool XEN_DOM0 | ||
26 | |||
27 | config XEN_PVHVM | 22 | config XEN_PVHVM |
28 | def_bool y | 23 | def_bool y |
29 | depends on XEN && PCI && X86_LOCAL_APIC | 24 | depends on XEN && PCI && X86_LOCAL_APIC |
@@ -51,3 +46,7 @@ config XEN_DEBUG_FS | |||
51 | Enable statistics output and various tuning options in debugfs. | 46 | Enable statistics output and various tuning options in debugfs. |
52 | Enabling this option may incur a significant performance overhead. | 47 | Enabling this option may incur a significant performance overhead. |
53 | 48 | ||
49 | config XEN_PVH | ||
50 | bool "Support for running as a PVH guest" | ||
51 | depends on X86_64 && XEN && XEN_PVHVM | ||
52 | def_bool n | ||
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index fa6ade76ef3f..201d09a7c46b 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c | |||
@@ -262,8 +262,9 @@ static void __init xen_banner(void) | |||
262 | struct xen_extraversion extra; | 262 | struct xen_extraversion extra; |
263 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); | 263 | HYPERVISOR_xen_version(XENVER_extraversion, &extra); |
264 | 264 | ||
265 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 265 | pr_info("Booting paravirtualized kernel %son %s\n", |
266 | pv_info.name); | 266 | xen_feature(XENFEAT_auto_translated_physmap) ? |
267 | "with PVH extensions " : "", pv_info.name); | ||
267 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", | 268 | printk(KERN_INFO "Xen version: %d.%d%s%s\n", |
268 | version >> 16, version & 0xffff, extra.extraversion, | 269 | version >> 16, version & 0xffff, extra.extraversion, |
269 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); | 270 | xen_feature(XENFEAT_mmu_pt_update_preserve_ad) ? " (preserve-AD)" : ""); |
@@ -433,7 +434,7 @@ static void __init xen_init_cpuid_mask(void) | |||
433 | 434 | ||
434 | ax = 1; | 435 | ax = 1; |
435 | cx = 0; | 436 | cx = 0; |
436 | xen_cpuid(&ax, &bx, &cx, &dx); | 437 | cpuid(1, &ax, &bx, &cx, &dx); |
437 | 438 | ||
438 | xsave_mask = | 439 | xsave_mask = |
439 | (1 << (X86_FEATURE_XSAVE % 32)) | | 440 | (1 << (X86_FEATURE_XSAVE % 32)) | |
@@ -1142,8 +1143,9 @@ void xen_setup_vcpu_info_placement(void) | |||
1142 | xen_vcpu_setup(cpu); | 1143 | xen_vcpu_setup(cpu); |
1143 | 1144 | ||
1144 | /* xen_vcpu_setup managed to place the vcpu_info within the | 1145 | /* xen_vcpu_setup managed to place the vcpu_info within the |
1145 | percpu area for all cpus, so make use of it */ | 1146 | * percpu area for all cpus, so make use of it. Note that for |
1146 | if (have_vcpu_info_placement) { | 1147 | * PVH we want to use native IRQ mechanism. */ |
1148 | if (have_vcpu_info_placement && !xen_pvh_domain()) { | ||
1147 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); | 1149 | pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); |
1148 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); | 1150 | pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); |
1149 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); | 1151 | pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); |
@@ -1407,9 +1409,49 @@ static void __init xen_boot_params_init_edd(void) | |||
1407 | * Set up the GDT and segment registers for -fstack-protector. Until | 1409 | * Set up the GDT and segment registers for -fstack-protector. Until |
1408 | * we do this, we have to be careful not to call any stack-protected | 1410 | * we do this, we have to be careful not to call any stack-protected |
1409 | * function, which is most of the kernel. | 1411 | * function, which is most of the kernel. |
1412 | * | ||
1413 | * Note, that it is __ref because the only caller of this after init | ||
1414 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1415 | * __init functions. | ||
1410 | */ | 1416 | */ |
1411 | static void __init xen_setup_stackprotector(void) | 1417 | static void __ref xen_setup_gdt(int cpu) |
1412 | { | 1418 | { |
1419 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1420 | #ifdef CONFIG_X86_64 | ||
1421 | unsigned long dummy; | ||
1422 | |||
1423 | load_percpu_segment(cpu); /* We need to access per-cpu area */ | ||
1424 | switch_to_new_gdt(cpu); /* GDT and GS set */ | ||
1425 | |||
1426 | /* We are switching of the Xen provided GDT to our HVM mode | ||
1427 | * GDT. The new GDT has __KERNEL_CS with CS.L = 1 | ||
1428 | * and we are jumping to reload it. | ||
1429 | */ | ||
1430 | asm volatile ("pushq %0\n" | ||
1431 | "leaq 1f(%%rip),%0\n" | ||
1432 | "pushq %0\n" | ||
1433 | "lretq\n" | ||
1434 | "1:\n" | ||
1435 | : "=&r" (dummy) : "0" (__KERNEL_CS)); | ||
1436 | |||
1437 | /* | ||
1438 | * While not needed, we also set the %es, %ds, and %fs | ||
1439 | * to zero. We don't care about %ss as it is NULL. | ||
1440 | * Strictly speaking this is not needed as Xen zeros those | ||
1441 | * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE) | ||
1442 | * | ||
1443 | * Linux zeros them in cpu_init() and in secondary_startup_64 | ||
1444 | * (for BSP). | ||
1445 | */ | ||
1446 | loadsegment(es, 0); | ||
1447 | loadsegment(ds, 0); | ||
1448 | loadsegment(fs, 0); | ||
1449 | #else | ||
1450 | /* PVH: TODO Implement. */ | ||
1451 | BUG(); | ||
1452 | #endif | ||
1453 | return; /* PVH does not need any PV GDT ops. */ | ||
1454 | } | ||
1413 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; | 1455 | pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; |
1414 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; | 1456 | pv_cpu_ops.load_gdt = xen_load_gdt_boot; |
1415 | 1457 | ||
@@ -1420,6 +1462,58 @@ static void __init xen_setup_stackprotector(void) | |||
1420 | pv_cpu_ops.load_gdt = xen_load_gdt; | 1462 | pv_cpu_ops.load_gdt = xen_load_gdt; |
1421 | } | 1463 | } |
1422 | 1464 | ||
1465 | /* | ||
1466 | * A PV guest starts with default flags that are not set for PVH, set them | ||
1467 | * here asap. | ||
1468 | */ | ||
1469 | static void xen_pvh_set_cr_flags(int cpu) | ||
1470 | { | ||
1471 | |||
1472 | /* Some of these are setup in 'secondary_startup_64'. The others: | ||
1473 | * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests | ||
1474 | * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */ | ||
1475 | write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM); | ||
1476 | |||
1477 | if (!cpu) | ||
1478 | return; | ||
1479 | /* | ||
1480 | * For BSP, PSE PGE are set in probe_page_size_mask(), for APs | ||
1481 | * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu_init. | ||
1482 | */ | ||
1483 | if (cpu_has_pse) | ||
1484 | set_in_cr4(X86_CR4_PSE); | ||
1485 | |||
1486 | if (cpu_has_pge) | ||
1487 | set_in_cr4(X86_CR4_PGE); | ||
1488 | } | ||
1489 | |||
1490 | /* | ||
1491 | * Note, that it is ref - because the only caller of this after init | ||
1492 | * is PVH which is not going to use xen_load_gdt_boot or other | ||
1493 | * __init functions. | ||
1494 | */ | ||
1495 | void __ref xen_pvh_secondary_vcpu_init(int cpu) | ||
1496 | { | ||
1497 | xen_setup_gdt(cpu); | ||
1498 | xen_pvh_set_cr_flags(cpu); | ||
1499 | } | ||
1500 | |||
1501 | static void __init xen_pvh_early_guest_init(void) | ||
1502 | { | ||
1503 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
1504 | return; | ||
1505 | |||
1506 | if (!xen_feature(XENFEAT_hvm_callback_vector)) | ||
1507 | return; | ||
1508 | |||
1509 | xen_have_vector_callback = 1; | ||
1510 | xen_pvh_set_cr_flags(0); | ||
1511 | |||
1512 | #ifdef CONFIG_X86_32 | ||
1513 | BUG(); /* PVH: Implement proper support. */ | ||
1514 | #endif | ||
1515 | } | ||
1516 | |||
1423 | /* First C function to be called on Xen boot */ | 1517 | /* First C function to be called on Xen boot */ |
1424 | asmlinkage void __init xen_start_kernel(void) | 1518 | asmlinkage void __init xen_start_kernel(void) |
1425 | { | 1519 | { |
@@ -1431,13 +1525,16 @@ asmlinkage void __init xen_start_kernel(void) | |||
1431 | 1525 | ||
1432 | xen_domain_type = XEN_PV_DOMAIN; | 1526 | xen_domain_type = XEN_PV_DOMAIN; |
1433 | 1527 | ||
1528 | xen_setup_features(); | ||
1529 | xen_pvh_early_guest_init(); | ||
1434 | xen_setup_machphys_mapping(); | 1530 | xen_setup_machphys_mapping(); |
1435 | 1531 | ||
1436 | /* Install Xen paravirt ops */ | 1532 | /* Install Xen paravirt ops */ |
1437 | pv_info = xen_info; | 1533 | pv_info = xen_info; |
1438 | pv_init_ops = xen_init_ops; | 1534 | pv_init_ops = xen_init_ops; |
1439 | pv_cpu_ops = xen_cpu_ops; | ||
1440 | pv_apic_ops = xen_apic_ops; | 1535 | pv_apic_ops = xen_apic_ops; |
1536 | if (!xen_pvh_domain()) | ||
1537 | pv_cpu_ops = xen_cpu_ops; | ||
1441 | 1538 | ||
1442 | x86_init.resources.memory_setup = xen_memory_setup; | 1539 | x86_init.resources.memory_setup = xen_memory_setup; |
1443 | x86_init.oem.arch_setup = xen_arch_setup; | 1540 | x86_init.oem.arch_setup = xen_arch_setup; |
@@ -1469,17 +1566,14 @@ asmlinkage void __init xen_start_kernel(void) | |||
1469 | /* Work out if we support NX */ | 1566 | /* Work out if we support NX */ |
1470 | x86_configure_nx(); | 1567 | x86_configure_nx(); |
1471 | 1568 | ||
1472 | xen_setup_features(); | ||
1473 | |||
1474 | /* Get mfn list */ | 1569 | /* Get mfn list */ |
1475 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 1570 | xen_build_dynamic_phys_to_machine(); |
1476 | xen_build_dynamic_phys_to_machine(); | ||
1477 | 1571 | ||
1478 | /* | 1572 | /* |
1479 | * Set up kernel GDT and segment registers, mainly so that | 1573 | * Set up kernel GDT and segment registers, mainly so that |
1480 | * -fstack-protector code can be executed. | 1574 | * -fstack-protector code can be executed. |
1481 | */ | 1575 | */ |
1482 | xen_setup_stackprotector(); | 1576 | xen_setup_gdt(0); |
1483 | 1577 | ||
1484 | xen_init_irq_ops(); | 1578 | xen_init_irq_ops(); |
1485 | xen_init_cpuid_mask(); | 1579 | xen_init_cpuid_mask(); |
@@ -1548,14 +1642,18 @@ asmlinkage void __init xen_start_kernel(void) | |||
1548 | /* set the limit of our address space */ | 1642 | /* set the limit of our address space */ |
1549 | xen_reserve_top(); | 1643 | xen_reserve_top(); |
1550 | 1644 | ||
1551 | /* We used to do this in xen_arch_setup, but that is too late on AMD | 1645 | /* PVH: runs at default kernel iopl of 0 */ |
1552 | * were early_cpu_init (run before ->arch_setup()) calls early_amd_init | 1646 | if (!xen_pvh_domain()) { |
1553 | * which pokes 0xcf8 port. | 1647 | /* |
1554 | */ | 1648 | * We used to do this in xen_arch_setup, but that is too late |
1555 | set_iopl.iopl = 1; | 1649 | * on AMD were early_cpu_init (run before ->arch_setup()) calls |
1556 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | 1650 | * early_amd_init which pokes 0xcf8 port. |
1557 | if (rc != 0) | 1651 | */ |
1558 | xen_raw_printk("physdev_op failed %d\n", rc); | 1652 | set_iopl.iopl = 1; |
1653 | rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); | ||
1654 | if (rc != 0) | ||
1655 | xen_raw_printk("physdev_op failed %d\n", rc); | ||
1656 | } | ||
1559 | 1657 | ||
1560 | #ifdef CONFIG_X86_32 | 1658 | #ifdef CONFIG_X86_32 |
1561 | /* set up basic CPUID stuff */ | 1659 | /* set up basic CPUID stuff */ |
diff --git a/arch/x86/xen/grant-table.c b/arch/x86/xen/grant-table.c index 3a5f55d51907..c98583588580 100644 --- a/arch/x86/xen/grant-table.c +++ b/arch/x86/xen/grant-table.c | |||
@@ -125,3 +125,67 @@ void arch_gnttab_unmap(void *shared, unsigned long nr_gframes) | |||
125 | apply_to_page_range(&init_mm, (unsigned long)shared, | 125 | apply_to_page_range(&init_mm, (unsigned long)shared, |
126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); | 126 | PAGE_SIZE * nr_gframes, unmap_pte_fn, NULL); |
127 | } | 127 | } |
128 | #ifdef CONFIG_XEN_PVH | ||
129 | #include <xen/balloon.h> | ||
130 | #include <xen/events.h> | ||
131 | #include <xen/xen.h> | ||
132 | #include <linux/slab.h> | ||
133 | static int __init xlated_setup_gnttab_pages(void) | ||
134 | { | ||
135 | struct page **pages; | ||
136 | xen_pfn_t *pfns; | ||
137 | int rc; | ||
138 | unsigned int i; | ||
139 | unsigned long nr_grant_frames = gnttab_max_grant_frames(); | ||
140 | |||
141 | BUG_ON(nr_grant_frames == 0); | ||
142 | pages = kcalloc(nr_grant_frames, sizeof(pages[0]), GFP_KERNEL); | ||
143 | if (!pages) | ||
144 | return -ENOMEM; | ||
145 | |||
146 | pfns = kcalloc(nr_grant_frames, sizeof(pfns[0]), GFP_KERNEL); | ||
147 | if (!pfns) { | ||
148 | kfree(pages); | ||
149 | return -ENOMEM; | ||
150 | } | ||
151 | rc = alloc_xenballooned_pages(nr_grant_frames, pages, 0 /* lowmem */); | ||
152 | if (rc) { | ||
153 | pr_warn("%s Couldn't balloon alloc %ld pfns rc:%d\n", __func__, | ||
154 | nr_grant_frames, rc); | ||
155 | kfree(pages); | ||
156 | kfree(pfns); | ||
157 | return rc; | ||
158 | } | ||
159 | for (i = 0; i < nr_grant_frames; i++) | ||
160 | pfns[i] = page_to_pfn(pages[i]); | ||
161 | |||
162 | rc = arch_gnttab_map_shared(pfns, nr_grant_frames, nr_grant_frames, | ||
163 | &xen_auto_xlat_grant_frames.vaddr); | ||
164 | |||
165 | if (rc) { | ||
166 | pr_warn("%s Couldn't map %ld pfns rc:%d\n", __func__, | ||
167 | nr_grant_frames, rc); | ||
168 | free_xenballooned_pages(nr_grant_frames, pages); | ||
169 | kfree(pages); | ||
170 | kfree(pfns); | ||
171 | return rc; | ||
172 | } | ||
173 | kfree(pages); | ||
174 | |||
175 | xen_auto_xlat_grant_frames.pfn = pfns; | ||
176 | xen_auto_xlat_grant_frames.count = nr_grant_frames; | ||
177 | |||
178 | return 0; | ||
179 | } | ||
180 | |||
181 | static int __init xen_pvh_gnttab_setup(void) | ||
182 | { | ||
183 | if (!xen_pvh_domain()) | ||
184 | return -ENODEV; | ||
185 | |||
186 | return xlated_setup_gnttab_pages(); | ||
187 | } | ||
188 | /* Call it _before_ __gnttab_init as we need to initialize the | ||
189 | * xen_auto_xlat_grant_frames first. */ | ||
190 | core_initcall(xen_pvh_gnttab_setup); | ||
191 | #endif | ||
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 0da7f863056f..08f763de26fe 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <xen/interface/xen.h> | 5 | #include <xen/interface/xen.h> |
6 | #include <xen/interface/sched.h> | 6 | #include <xen/interface/sched.h> |
7 | #include <xen/interface/vcpu.h> | 7 | #include <xen/interface/vcpu.h> |
8 | #include <xen/features.h> | ||
8 | #include <xen/events.h> | 9 | #include <xen/events.h> |
9 | 10 | ||
10 | #include <asm/xen/hypercall.h> | 11 | #include <asm/xen/hypercall.h> |
@@ -22,7 +23,7 @@ void xen_force_evtchn_callback(void) | |||
22 | (void)HYPERVISOR_xen_version(0, NULL); | 23 | (void)HYPERVISOR_xen_version(0, NULL); |
23 | } | 24 | } |
24 | 25 | ||
25 | static unsigned long xen_save_fl(void) | 26 | asmlinkage unsigned long xen_save_fl(void) |
26 | { | 27 | { |
27 | struct vcpu_info *vcpu; | 28 | struct vcpu_info *vcpu; |
28 | unsigned long flags; | 29 | unsigned long flags; |
@@ -40,7 +41,7 @@ static unsigned long xen_save_fl(void) | |||
40 | } | 41 | } |
41 | PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); | 42 | PV_CALLEE_SAVE_REGS_THUNK(xen_save_fl); |
42 | 43 | ||
43 | static void xen_restore_fl(unsigned long flags) | 44 | __visible void xen_restore_fl(unsigned long flags) |
44 | { | 45 | { |
45 | struct vcpu_info *vcpu; | 46 | struct vcpu_info *vcpu; |
46 | 47 | ||
@@ -62,7 +63,7 @@ static void xen_restore_fl(unsigned long flags) | |||
62 | } | 63 | } |
63 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); | 64 | PV_CALLEE_SAVE_REGS_THUNK(xen_restore_fl); |
64 | 65 | ||
65 | static void xen_irq_disable(void) | 66 | asmlinkage void xen_irq_disable(void) |
66 | { | 67 | { |
67 | /* There's a one instruction preempt window here. We need to | 68 | /* There's a one instruction preempt window here. We need to |
68 | make sure we're don't switch CPUs between getting the vcpu | 69 | make sure we're don't switch CPUs between getting the vcpu |
@@ -73,7 +74,7 @@ static void xen_irq_disable(void) | |||
73 | } | 74 | } |
74 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); | 75 | PV_CALLEE_SAVE_REGS_THUNK(xen_irq_disable); |
75 | 76 | ||
76 | static void xen_irq_enable(void) | 77 | asmlinkage void xen_irq_enable(void) |
77 | { | 78 | { |
78 | struct vcpu_info *vcpu; | 79 | struct vcpu_info *vcpu; |
79 | 80 | ||
@@ -128,6 +129,8 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { | |||
128 | 129 | ||
129 | void __init xen_init_irq_ops(void) | 130 | void __init xen_init_irq_ops(void) |
130 | { | 131 | { |
131 | pv_irq_ops = xen_irq_ops; | 132 | /* For PVH we use default pv_irq_ops settings. */ |
133 | if (!xen_feature(XENFEAT_hvm_callback_vector)) | ||
134 | pv_irq_ops = xen_irq_ops; | ||
132 | x86_init.irqs.intr_init = xen_init_IRQ; | 135 | x86_init.irqs.intr_init = xen_init_IRQ; |
133 | } | 136 | } |
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index ce563be09cc1..86e02eabb640 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c | |||
@@ -431,7 +431,7 @@ static pteval_t iomap_pte(pteval_t val) | |||
431 | return val; | 431 | return val; |
432 | } | 432 | } |
433 | 433 | ||
434 | static pteval_t xen_pte_val(pte_t pte) | 434 | __visible pteval_t xen_pte_val(pte_t pte) |
435 | { | 435 | { |
436 | pteval_t pteval = pte.pte; | 436 | pteval_t pteval = pte.pte; |
437 | #if 0 | 437 | #if 0 |
@@ -448,7 +448,7 @@ static pteval_t xen_pte_val(pte_t pte) | |||
448 | } | 448 | } |
449 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); | 449 | PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val); |
450 | 450 | ||
451 | static pgdval_t xen_pgd_val(pgd_t pgd) | 451 | __visible pgdval_t xen_pgd_val(pgd_t pgd) |
452 | { | 452 | { |
453 | return pte_mfn_to_pfn(pgd.pgd); | 453 | return pte_mfn_to_pfn(pgd.pgd); |
454 | } | 454 | } |
@@ -479,7 +479,7 @@ void xen_set_pat(u64 pat) | |||
479 | WARN_ON(pat != 0x0007010600070106ull); | 479 | WARN_ON(pat != 0x0007010600070106ull); |
480 | } | 480 | } |
481 | 481 | ||
482 | static pte_t xen_make_pte(pteval_t pte) | 482 | __visible pte_t xen_make_pte(pteval_t pte) |
483 | { | 483 | { |
484 | phys_addr_t addr = (pte & PTE_PFN_MASK); | 484 | phys_addr_t addr = (pte & PTE_PFN_MASK); |
485 | #if 0 | 485 | #if 0 |
@@ -514,14 +514,14 @@ static pte_t xen_make_pte(pteval_t pte) | |||
514 | } | 514 | } |
515 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); | 515 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte); |
516 | 516 | ||
517 | static pgd_t xen_make_pgd(pgdval_t pgd) | 517 | __visible pgd_t xen_make_pgd(pgdval_t pgd) |
518 | { | 518 | { |
519 | pgd = pte_pfn_to_mfn(pgd); | 519 | pgd = pte_pfn_to_mfn(pgd); |
520 | return native_make_pgd(pgd); | 520 | return native_make_pgd(pgd); |
521 | } | 521 | } |
522 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); | 522 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd); |
523 | 523 | ||
524 | static pmdval_t xen_pmd_val(pmd_t pmd) | 524 | __visible pmdval_t xen_pmd_val(pmd_t pmd) |
525 | { | 525 | { |
526 | return pte_mfn_to_pfn(pmd.pmd); | 526 | return pte_mfn_to_pfn(pmd.pmd); |
527 | } | 527 | } |
@@ -580,7 +580,7 @@ static void xen_pmd_clear(pmd_t *pmdp) | |||
580 | } | 580 | } |
581 | #endif /* CONFIG_X86_PAE */ | 581 | #endif /* CONFIG_X86_PAE */ |
582 | 582 | ||
583 | static pmd_t xen_make_pmd(pmdval_t pmd) | 583 | __visible pmd_t xen_make_pmd(pmdval_t pmd) |
584 | { | 584 | { |
585 | pmd = pte_pfn_to_mfn(pmd); | 585 | pmd = pte_pfn_to_mfn(pmd); |
586 | return native_make_pmd(pmd); | 586 | return native_make_pmd(pmd); |
@@ -588,13 +588,13 @@ static pmd_t xen_make_pmd(pmdval_t pmd) | |||
588 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); | 588 | PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); |
589 | 589 | ||
590 | #if PAGETABLE_LEVELS == 4 | 590 | #if PAGETABLE_LEVELS == 4 |
591 | static pudval_t xen_pud_val(pud_t pud) | 591 | __visible pudval_t xen_pud_val(pud_t pud) |
592 | { | 592 | { |
593 | return pte_mfn_to_pfn(pud.pud); | 593 | return pte_mfn_to_pfn(pud.pud); |
594 | } | 594 | } |
595 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); | 595 | PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val); |
596 | 596 | ||
597 | static pud_t xen_make_pud(pudval_t pud) | 597 | __visible pud_t xen_make_pud(pudval_t pud) |
598 | { | 598 | { |
599 | pud = pte_pfn_to_mfn(pud); | 599 | pud = pte_pfn_to_mfn(pud); |
600 | 600 | ||
@@ -1198,44 +1198,40 @@ static void __init xen_cleanhighmap(unsigned long vaddr, | |||
1198 | * instead of somewhere later and be confusing. */ | 1198 | * instead of somewhere later and be confusing. */ |
1199 | xen_mc_flush(); | 1199 | xen_mc_flush(); |
1200 | } | 1200 | } |
1201 | #endif | 1201 | static void __init xen_pagetable_p2m_copy(void) |
1202 | static void __init xen_pagetable_init(void) | ||
1203 | { | 1202 | { |
1204 | #ifdef CONFIG_X86_64 | ||
1205 | unsigned long size; | 1203 | unsigned long size; |
1206 | unsigned long addr; | 1204 | unsigned long addr; |
1207 | #endif | 1205 | unsigned long new_mfn_list; |
1208 | paging_init(); | 1206 | |
1209 | xen_setup_shared_info(); | 1207 | if (xen_feature(XENFEAT_auto_translated_physmap)) |
1210 | #ifdef CONFIG_X86_64 | 1208 | return; |
1211 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | 1209 | |
1212 | unsigned long new_mfn_list; | 1210 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1213 | 1211 | ||
1214 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1212 | new_mfn_list = xen_revector_p2m_tree(); |
1215 | 1213 | /* No memory or already called. */ | |
1216 | /* On 32-bit, we get zero so this never gets executed. */ | 1214 | if (!new_mfn_list || new_mfn_list == xen_start_info->mfn_list) |
1217 | new_mfn_list = xen_revector_p2m_tree(); | 1215 | return; |
1218 | if (new_mfn_list && new_mfn_list != xen_start_info->mfn_list) { | 1216 | |
1219 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ | 1217 | /* using __ka address and sticking INVALID_P2M_ENTRY! */ |
1220 | memset((void *)xen_start_info->mfn_list, 0xff, size); | 1218 | memset((void *)xen_start_info->mfn_list, 0xff, size); |
1221 | 1219 | ||
1222 | /* We should be in __ka space. */ | 1220 | /* We should be in __ka space. */ |
1223 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); | 1221 | BUG_ON(xen_start_info->mfn_list < __START_KERNEL_map); |
1224 | addr = xen_start_info->mfn_list; | 1222 | addr = xen_start_info->mfn_list; |
1225 | /* We roundup to the PMD, which means that if anybody at this stage is | 1223 | /* We roundup to the PMD, which means that if anybody at this stage is |
1226 | * using the __ka address of xen_start_info or xen_start_info->shared_info | 1224 | * using the __ka address of xen_start_info or xen_start_info->shared_info |
1227 | * they are in going to crash. Fortunatly we have already revectored | 1225 | * they are in going to crash. Fortunatly we have already revectored |
1228 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ | 1226 | * in xen_setup_kernel_pagetable and in xen_setup_shared_info. */ |
1229 | size = roundup(size, PMD_SIZE); | 1227 | size = roundup(size, PMD_SIZE); |
1230 | xen_cleanhighmap(addr, addr + size); | 1228 | xen_cleanhighmap(addr, addr + size); |
1231 | 1229 | ||
1232 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); | 1230 | size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long)); |
1233 | memblock_free(__pa(xen_start_info->mfn_list), size); | 1231 | memblock_free(__pa(xen_start_info->mfn_list), size); |
1234 | /* And revector! Bye bye old array */ | 1232 | /* And revector! Bye bye old array */ |
1235 | xen_start_info->mfn_list = new_mfn_list; | 1233 | xen_start_info->mfn_list = new_mfn_list; |
1236 | } else | 1234 | |
1237 | goto skip; | ||
1238 | } | ||
1239 | /* At this stage, cleanup_highmap has already cleaned __ka space | 1235 | /* At this stage, cleanup_highmap has already cleaned __ka space |
1240 | * from _brk_limit way up to the max_pfn_mapped (which is the end of | 1236 | * from _brk_limit way up to the max_pfn_mapped (which is the end of |
1241 | * the ramdisk). We continue on, erasing PMD entries that point to page | 1237 | * the ramdisk). We continue on, erasing PMD entries that point to page |
@@ -1255,7 +1251,15 @@ static void __init xen_pagetable_init(void) | |||
1255 | * anything at this stage. */ | 1251 | * anything at this stage. */ |
1256 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); | 1252 | xen_cleanhighmap(MODULES_VADDR, roundup(MODULES_VADDR, PUD_SIZE) - 1); |
1257 | #endif | 1253 | #endif |
1258 | skip: | 1254 | } |
1255 | #endif | ||
1256 | |||
1257 | static void __init xen_pagetable_init(void) | ||
1258 | { | ||
1259 | paging_init(); | ||
1260 | xen_setup_shared_info(); | ||
1261 | #ifdef CONFIG_X86_64 | ||
1262 | xen_pagetable_p2m_copy(); | ||
1259 | #endif | 1263 | #endif |
1260 | xen_post_allocator_init(); | 1264 | xen_post_allocator_init(); |
1261 | } | 1265 | } |
@@ -1753,6 +1757,10 @@ static void set_page_prot_flags(void *addr, pgprot_t prot, unsigned long flags) | |||
1753 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; | 1757 | unsigned long pfn = __pa(addr) >> PAGE_SHIFT; |
1754 | pte_t pte = pfn_pte(pfn, prot); | 1758 | pte_t pte = pfn_pte(pfn, prot); |
1755 | 1759 | ||
1760 | /* For PVH no need to set R/O or R/W to pin them or unpin them. */ | ||
1761 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
1762 | return; | ||
1763 | |||
1756 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) | 1764 | if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) |
1757 | BUG(); | 1765 | BUG(); |
1758 | } | 1766 | } |
@@ -1863,6 +1871,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, | |||
1863 | * but that's enough to get __va working. We need to fill in the rest | 1871 | * but that's enough to get __va working. We need to fill in the rest |
1864 | * of the physical mapping once some sort of allocator has been set | 1872 | * of the physical mapping once some sort of allocator has been set |
1865 | * up. | 1873 | * up. |
1874 | * NOTE: for PVH, the page tables are native. | ||
1866 | */ | 1875 | */ |
1867 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | 1876 | void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) |
1868 | { | 1877 | { |
@@ -1884,17 +1893,18 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1884 | /* Zap identity mapping */ | 1893 | /* Zap identity mapping */ |
1885 | init_level4_pgt[0] = __pgd(0); | 1894 | init_level4_pgt[0] = __pgd(0); |
1886 | 1895 | ||
1887 | /* Pre-constructed entries are in pfn, so convert to mfn */ | 1896 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
1888 | /* L4[272] -> level3_ident_pgt | 1897 | /* Pre-constructed entries are in pfn, so convert to mfn */ |
1889 | * L4[511] -> level3_kernel_pgt */ | 1898 | /* L4[272] -> level3_ident_pgt |
1890 | convert_pfn_mfn(init_level4_pgt); | 1899 | * L4[511] -> level3_kernel_pgt */ |
1891 | 1900 | convert_pfn_mfn(init_level4_pgt); | |
1892 | /* L3_i[0] -> level2_ident_pgt */ | 1901 | |
1893 | convert_pfn_mfn(level3_ident_pgt); | 1902 | /* L3_i[0] -> level2_ident_pgt */ |
1894 | /* L3_k[510] -> level2_kernel_pgt | 1903 | convert_pfn_mfn(level3_ident_pgt); |
1895 | * L3_i[511] -> level2_fixmap_pgt */ | 1904 | /* L3_k[510] -> level2_kernel_pgt |
1896 | convert_pfn_mfn(level3_kernel_pgt); | 1905 | * L3_i[511] -> level2_fixmap_pgt */ |
1897 | 1906 | convert_pfn_mfn(level3_kernel_pgt); | |
1907 | } | ||
1898 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ | 1908 | /* We get [511][511] and have Xen's version of level2_kernel_pgt */ |
1899 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); | 1909 | l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd); |
1900 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); | 1910 | l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud); |
@@ -1918,31 +1928,33 @@ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) | |||
1918 | copy_page(level2_fixmap_pgt, l2); | 1928 | copy_page(level2_fixmap_pgt, l2); |
1919 | /* Note that we don't do anything with level1_fixmap_pgt which | 1929 | /* Note that we don't do anything with level1_fixmap_pgt which |
1920 | * we don't need. */ | 1930 | * we don't need. */ |
1931 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { | ||
1932 | /* Make pagetable pieces RO */ | ||
1933 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | ||
1934 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | ||
1935 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | ||
1936 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | ||
1937 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | ||
1938 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | ||
1939 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | ||
1940 | |||
1941 | /* Pin down new L4 */ | ||
1942 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1943 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1944 | |||
1945 | /* Unpin Xen-provided one */ | ||
1946 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1921 | 1947 | ||
1922 | /* Make pagetable pieces RO */ | 1948 | /* |
1923 | set_page_prot(init_level4_pgt, PAGE_KERNEL_RO); | 1949 | * At this stage there can be no user pgd, and no page |
1924 | set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO); | 1950 | * structure to attach it to, so make sure we just set kernel |
1925 | set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO); | 1951 | * pgd. |
1926 | set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO); | 1952 | */ |
1927 | set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO); | 1953 | xen_mc_batch(); |
1928 | set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO); | 1954 | __xen_write_cr3(true, __pa(init_level4_pgt)); |
1929 | set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO); | 1955 | xen_mc_issue(PARAVIRT_LAZY_CPU); |
1930 | 1956 | } else | |
1931 | /* Pin down new L4 */ | 1957 | native_write_cr3(__pa(init_level4_pgt)); |
1932 | pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, | ||
1933 | PFN_DOWN(__pa_symbol(init_level4_pgt))); | ||
1934 | |||
1935 | /* Unpin Xen-provided one */ | ||
1936 | pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd))); | ||
1937 | |||
1938 | /* | ||
1939 | * At this stage there can be no user pgd, and no page | ||
1940 | * structure to attach it to, so make sure we just set kernel | ||
1941 | * pgd. | ||
1942 | */ | ||
1943 | xen_mc_batch(); | ||
1944 | __xen_write_cr3(true, __pa(init_level4_pgt)); | ||
1945 | xen_mc_issue(PARAVIRT_LAZY_CPU); | ||
1946 | 1958 | ||
1947 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are | 1959 | /* We can't that easily rip out L3 and L2, as the Xen pagetables are |
1948 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for | 1960 | * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ... for |
@@ -2046,7 +2058,6 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2046 | case FIX_RO_IDT: | 2058 | case FIX_RO_IDT: |
2047 | #ifdef CONFIG_X86_32 | 2059 | #ifdef CONFIG_X86_32 |
2048 | case FIX_WP_TEST: | 2060 | case FIX_WP_TEST: |
2049 | case FIX_VDSO: | ||
2050 | # ifdef CONFIG_HIGHMEM | 2061 | # ifdef CONFIG_HIGHMEM |
2051 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: | 2062 | case FIX_KMAP_BEGIN ... FIX_KMAP_END: |
2052 | # endif | 2063 | # endif |
@@ -2103,6 +2114,9 @@ static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot) | |||
2103 | 2114 | ||
2104 | static void __init xen_post_allocator_init(void) | 2115 | static void __init xen_post_allocator_init(void) |
2105 | { | 2116 | { |
2117 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
2118 | return; | ||
2119 | |||
2106 | pv_mmu_ops.set_pte = xen_set_pte; | 2120 | pv_mmu_ops.set_pte = xen_set_pte; |
2107 | pv_mmu_ops.set_pmd = xen_set_pmd; | 2121 | pv_mmu_ops.set_pmd = xen_set_pmd; |
2108 | pv_mmu_ops.set_pud = xen_set_pud; | 2122 | pv_mmu_ops.set_pud = xen_set_pud; |
@@ -2207,6 +2221,15 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { | |||
2207 | void __init xen_init_mmu_ops(void) | 2221 | void __init xen_init_mmu_ops(void) |
2208 | { | 2222 | { |
2209 | x86_init.paging.pagetable_init = xen_pagetable_init; | 2223 | x86_init.paging.pagetable_init = xen_pagetable_init; |
2224 | |||
2225 | /* Optimization - we can use the HVM one but it has no idea which | ||
2226 | * VCPUs are descheduled - which means that it will needlessly IPI | ||
2227 | * them. Xen knows so let it do the job. | ||
2228 | */ | ||
2229 | if (xen_feature(XENFEAT_auto_translated_physmap)) { | ||
2230 | pv_mmu_ops.flush_tlb_others = xen_flush_tlb_others; | ||
2231 | return; | ||
2232 | } | ||
2210 | pv_mmu_ops = xen_mmu_ops; | 2233 | pv_mmu_ops = xen_mmu_ops; |
2211 | 2234 | ||
2212 | memset(dummy_mapping, 0xff, PAGE_SIZE); | 2235 | memset(dummy_mapping, 0xff, PAGE_SIZE); |
diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 2ae8699e8767..85e5d78c9874 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c | |||
@@ -280,6 +280,9 @@ void __ref xen_build_mfn_list_list(void) | |||
280 | { | 280 | { |
281 | unsigned long pfn; | 281 | unsigned long pfn; |
282 | 282 | ||
283 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
284 | return; | ||
285 | |||
283 | /* Pre-initialize p2m_top_mfn to be completely missing */ | 286 | /* Pre-initialize p2m_top_mfn to be completely missing */ |
284 | if (p2m_top_mfn == NULL) { | 287 | if (p2m_top_mfn == NULL) { |
285 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); | 288 | p2m_mid_missing_mfn = extend_brk(PAGE_SIZE, PAGE_SIZE); |
@@ -336,6 +339,9 @@ void __ref xen_build_mfn_list_list(void) | |||
336 | 339 | ||
337 | void xen_setup_mfn_list_list(void) | 340 | void xen_setup_mfn_list_list(void) |
338 | { | 341 | { |
342 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
343 | return; | ||
344 | |||
339 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); | 345 | BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); |
340 | 346 | ||
341 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = | 347 | HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list = |
@@ -346,10 +352,15 @@ void xen_setup_mfn_list_list(void) | |||
346 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ | 352 | /* Set up p2m_top to point to the domain-builder provided p2m pages */ |
347 | void __init xen_build_dynamic_phys_to_machine(void) | 353 | void __init xen_build_dynamic_phys_to_machine(void) |
348 | { | 354 | { |
349 | unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list; | 355 | unsigned long *mfn_list; |
350 | unsigned long max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | 356 | unsigned long max_pfn; |
351 | unsigned long pfn; | 357 | unsigned long pfn; |
352 | 358 | ||
359 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
360 | return; | ||
361 | |||
362 | mfn_list = (unsigned long *)xen_start_info->mfn_list; | ||
363 | max_pfn = min(MAX_DOMAIN_PAGES, xen_start_info->nr_pages); | ||
353 | xen_max_p2m_pfn = max_pfn; | 364 | xen_max_p2m_pfn = max_pfn; |
354 | 365 | ||
355 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); | 366 | p2m_missing = extend_brk(PAGE_SIZE, PAGE_SIZE); |
@@ -870,6 +881,65 @@ static unsigned long mfn_hash(unsigned long mfn) | |||
870 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); | 881 | return hash_long(mfn, M2P_OVERRIDE_HASH_SHIFT); |
871 | } | 882 | } |
872 | 883 | ||
884 | int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops, | ||
885 | struct gnttab_map_grant_ref *kmap_ops, | ||
886 | struct page **pages, unsigned int count) | ||
887 | { | ||
888 | int i, ret = 0; | ||
889 | bool lazy = false; | ||
890 | pte_t *pte; | ||
891 | |||
892 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
893 | return 0; | ||
894 | |||
895 | if (kmap_ops && | ||
896 | !in_interrupt() && | ||
897 | paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { | ||
898 | arch_enter_lazy_mmu_mode(); | ||
899 | lazy = true; | ||
900 | } | ||
901 | |||
902 | for (i = 0; i < count; i++) { | ||
903 | unsigned long mfn, pfn; | ||
904 | |||
905 | /* Do not add to override if the map failed. */ | ||
906 | if (map_ops[i].status) | ||
907 | continue; | ||
908 | |||
909 | if (map_ops[i].flags & GNTMAP_contains_pte) { | ||
910 | pte = (pte_t *) (mfn_to_virt(PFN_DOWN(map_ops[i].host_addr)) + | ||
911 | (map_ops[i].host_addr & ~PAGE_MASK)); | ||
912 | mfn = pte_mfn(*pte); | ||
913 | } else { | ||
914 | mfn = PFN_DOWN(map_ops[i].dev_bus_addr); | ||
915 | } | ||
916 | pfn = page_to_pfn(pages[i]); | ||
917 | |||
918 | WARN_ON(PagePrivate(pages[i])); | ||
919 | SetPagePrivate(pages[i]); | ||
920 | set_page_private(pages[i], mfn); | ||
921 | pages[i]->index = pfn_to_mfn(pfn); | ||
922 | |||
923 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) { | ||
924 | ret = -ENOMEM; | ||
925 | goto out; | ||
926 | } | ||
927 | |||
928 | if (kmap_ops) { | ||
929 | ret = m2p_add_override(mfn, pages[i], &kmap_ops[i]); | ||
930 | if (ret) | ||
931 | goto out; | ||
932 | } | ||
933 | } | ||
934 | |||
935 | out: | ||
936 | if (lazy) | ||
937 | arch_leave_lazy_mmu_mode(); | ||
938 | |||
939 | return ret; | ||
940 | } | ||
941 | EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping); | ||
942 | |||
873 | /* Add an MFN override for a particular page */ | 943 | /* Add an MFN override for a particular page */ |
874 | int m2p_add_override(unsigned long mfn, struct page *page, | 944 | int m2p_add_override(unsigned long mfn, struct page *page, |
875 | struct gnttab_map_grant_ref *kmap_op) | 945 | struct gnttab_map_grant_ref *kmap_op) |
@@ -888,13 +958,6 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
888 | "m2p_add_override: pfn %lx not mapped", pfn)) | 958 | "m2p_add_override: pfn %lx not mapped", pfn)) |
889 | return -EINVAL; | 959 | return -EINVAL; |
890 | } | 960 | } |
891 | WARN_ON(PagePrivate(page)); | ||
892 | SetPagePrivate(page); | ||
893 | set_page_private(page, mfn); | ||
894 | page->index = pfn_to_mfn(pfn); | ||
895 | |||
896 | if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) | ||
897 | return -ENOMEM; | ||
898 | 961 | ||
899 | if (kmap_op != NULL) { | 962 | if (kmap_op != NULL) { |
900 | if (!PageHighMem(page)) { | 963 | if (!PageHighMem(page)) { |
@@ -932,20 +995,62 @@ int m2p_add_override(unsigned long mfn, struct page *page, | |||
932 | return 0; | 995 | return 0; |
933 | } | 996 | } |
934 | EXPORT_SYMBOL_GPL(m2p_add_override); | 997 | EXPORT_SYMBOL_GPL(m2p_add_override); |
998 | |||
999 | int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, | ||
1000 | struct gnttab_map_grant_ref *kmap_ops, | ||
1001 | struct page **pages, unsigned int count) | ||
1002 | { | ||
1003 | int i, ret = 0; | ||
1004 | bool lazy = false; | ||
1005 | |||
1006 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
1007 | return 0; | ||
1008 | |||
1009 | if (kmap_ops && | ||
1010 | !in_interrupt() && | ||
1011 | paravirt_get_lazy_mode() == PARAVIRT_LAZY_NONE) { | ||
1012 | arch_enter_lazy_mmu_mode(); | ||
1013 | lazy = true; | ||
1014 | } | ||
1015 | |||
1016 | for (i = 0; i < count; i++) { | ||
1017 | unsigned long mfn = get_phys_to_machine(page_to_pfn(pages[i])); | ||
1018 | unsigned long pfn = page_to_pfn(pages[i]); | ||
1019 | |||
1020 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) { | ||
1021 | ret = -EINVAL; | ||
1022 | goto out; | ||
1023 | } | ||
1024 | |||
1025 | set_page_private(pages[i], INVALID_P2M_ENTRY); | ||
1026 | WARN_ON(!PagePrivate(pages[i])); | ||
1027 | ClearPagePrivate(pages[i]); | ||
1028 | set_phys_to_machine(pfn, pages[i]->index); | ||
1029 | |||
1030 | if (kmap_ops) | ||
1031 | ret = m2p_remove_override(pages[i], &kmap_ops[i], mfn); | ||
1032 | if (ret) | ||
1033 | goto out; | ||
1034 | } | ||
1035 | |||
1036 | out: | ||
1037 | if (lazy) | ||
1038 | arch_leave_lazy_mmu_mode(); | ||
1039 | return ret; | ||
1040 | } | ||
1041 | EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping); | ||
1042 | |||
935 | int m2p_remove_override(struct page *page, | 1043 | int m2p_remove_override(struct page *page, |
936 | struct gnttab_map_grant_ref *kmap_op) | 1044 | struct gnttab_map_grant_ref *kmap_op, |
1045 | unsigned long mfn) | ||
937 | { | 1046 | { |
938 | unsigned long flags; | 1047 | unsigned long flags; |
939 | unsigned long mfn; | ||
940 | unsigned long pfn; | 1048 | unsigned long pfn; |
941 | unsigned long uninitialized_var(address); | 1049 | unsigned long uninitialized_var(address); |
942 | unsigned level; | 1050 | unsigned level; |
943 | pte_t *ptep = NULL; | 1051 | pte_t *ptep = NULL; |
944 | 1052 | ||
945 | pfn = page_to_pfn(page); | 1053 | pfn = page_to_pfn(page); |
946 | mfn = get_phys_to_machine(pfn); | ||
947 | if (mfn == INVALID_P2M_ENTRY || !(mfn & FOREIGN_FRAME_BIT)) | ||
948 | return -EINVAL; | ||
949 | 1054 | ||
950 | if (!PageHighMem(page)) { | 1055 | if (!PageHighMem(page)) { |
951 | address = (unsigned long)__va(pfn << PAGE_SHIFT); | 1056 | address = (unsigned long)__va(pfn << PAGE_SHIFT); |
@@ -959,10 +1064,7 @@ int m2p_remove_override(struct page *page, | |||
959 | spin_lock_irqsave(&m2p_override_lock, flags); | 1064 | spin_lock_irqsave(&m2p_override_lock, flags); |
960 | list_del(&page->lru); | 1065 | list_del(&page->lru); |
961 | spin_unlock_irqrestore(&m2p_override_lock, flags); | 1066 | spin_unlock_irqrestore(&m2p_override_lock, flags); |
962 | WARN_ON(!PagePrivate(page)); | ||
963 | ClearPagePrivate(page); | ||
964 | 1067 | ||
965 | set_phys_to_machine(pfn, page->index); | ||
966 | if (kmap_op != NULL) { | 1068 | if (kmap_op != NULL) { |
967 | if (!PageHighMem(page)) { | 1069 | if (!PageHighMem(page)) { |
968 | struct multicall_space mcs; | 1070 | struct multicall_space mcs; |
diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 0a7852483ffe..a8261716d58d 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c | |||
@@ -30,10 +30,9 @@ | |||
30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 | 30 | #define XEN_PLATFORM_ERR_PROTOCOL -2 |
31 | #define XEN_PLATFORM_ERR_BLACKLIST -3 | 31 | #define XEN_PLATFORM_ERR_BLACKLIST -3 |
32 | 32 | ||
33 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
34 | int xen_platform_pci_unplug; | ||
35 | EXPORT_SYMBOL_GPL(xen_platform_pci_unplug); | ||
36 | #ifdef CONFIG_XEN_PVHVM | 33 | #ifdef CONFIG_XEN_PVHVM |
34 | /* store the value of xen_emul_unplug after the unplug is done */ | ||
35 | static int xen_platform_pci_unplug; | ||
37 | static int xen_emul_unplug; | 36 | static int xen_emul_unplug; |
38 | 37 | ||
39 | static int check_platform_magic(void) | 38 | static int check_platform_magic(void) |
@@ -69,6 +68,80 @@ static int check_platform_magic(void) | |||
69 | return 0; | 68 | return 0; |
70 | } | 69 | } |
71 | 70 | ||
71 | bool xen_has_pv_devices() | ||
72 | { | ||
73 | if (!xen_domain()) | ||
74 | return false; | ||
75 | |||
76 | /* PV domains always have them. */ | ||
77 | if (xen_pv_domain()) | ||
78 | return true; | ||
79 | |||
80 | /* And user has xen_platform_pci=0 set in guest config as | ||
81 | * driver did not modify the value. */ | ||
82 | if (xen_platform_pci_unplug == 0) | ||
83 | return false; | ||
84 | |||
85 | if (xen_platform_pci_unplug & XEN_UNPLUG_NEVER) | ||
86 | return false; | ||
87 | |||
88 | if (xen_platform_pci_unplug & XEN_UNPLUG_ALL) | ||
89 | return true; | ||
90 | |||
91 | /* This is an odd one - we are going to run legacy | ||
92 | * and PV drivers at the same time. */ | ||
93 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) | ||
94 | return true; | ||
95 | |||
96 | /* And the caller has to follow with xen_pv_{disk,nic}_devices | ||
97 | * to be certain which driver can load. */ | ||
98 | return false; | ||
99 | } | ||
100 | EXPORT_SYMBOL_GPL(xen_has_pv_devices); | ||
101 | |||
102 | static bool __xen_has_pv_device(int state) | ||
103 | { | ||
104 | /* HVM domains might or might not */ | ||
105 | if (xen_hvm_domain() && (xen_platform_pci_unplug & state)) | ||
106 | return true; | ||
107 | |||
108 | return xen_has_pv_devices(); | ||
109 | } | ||
110 | |||
111 | bool xen_has_pv_nic_devices(void) | ||
112 | { | ||
113 | return __xen_has_pv_device(XEN_UNPLUG_ALL_NICS | XEN_UNPLUG_ALL); | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(xen_has_pv_nic_devices); | ||
116 | |||
117 | bool xen_has_pv_disk_devices(void) | ||
118 | { | ||
119 | return __xen_has_pv_device(XEN_UNPLUG_ALL_IDE_DISKS | | ||
120 | XEN_UNPLUG_AUX_IDE_DISKS | XEN_UNPLUG_ALL); | ||
121 | } | ||
122 | EXPORT_SYMBOL_GPL(xen_has_pv_disk_devices); | ||
123 | |||
124 | /* | ||
125 | * This one is odd - it determines whether you want to run PV _and_ | ||
126 | * legacy (IDE) drivers together. This combination is only possible | ||
127 | * under HVM. | ||
128 | */ | ||
129 | bool xen_has_pv_and_legacy_disk_devices(void) | ||
130 | { | ||
131 | if (!xen_domain()) | ||
132 | return false; | ||
133 | |||
134 | /* N.B. This is only ever used in HVM mode */ | ||
135 | if (xen_pv_domain()) | ||
136 | return false; | ||
137 | |||
138 | if (xen_platform_pci_unplug & XEN_UNPLUG_UNNECESSARY) | ||
139 | return true; | ||
140 | |||
141 | return false; | ||
142 | } | ||
143 | EXPORT_SYMBOL_GPL(xen_has_pv_and_legacy_disk_devices); | ||
144 | |||
72 | void xen_unplug_emulated_devices(void) | 145 | void xen_unplug_emulated_devices(void) |
73 | { | 146 | { |
74 | int r; | 147 | int r; |
diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 68c054f59de6..0982233b9b84 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <xen/interface/memory.h> | 27 | #include <xen/interface/memory.h> |
28 | #include <xen/interface/physdev.h> | 28 | #include <xen/interface/physdev.h> |
29 | #include <xen/features.h> | 29 | #include <xen/features.h> |
30 | #include "mmu.h" | ||
30 | #include "xen-ops.h" | 31 | #include "xen-ops.h" |
31 | #include "vdso.h" | 32 | #include "vdso.h" |
32 | 33 | ||
@@ -34,7 +35,7 @@ | |||
34 | extern const char xen_hypervisor_callback[]; | 35 | extern const char xen_hypervisor_callback[]; |
35 | extern const char xen_failsafe_callback[]; | 36 | extern const char xen_failsafe_callback[]; |
36 | #ifdef CONFIG_X86_64 | 37 | #ifdef CONFIG_X86_64 |
37 | extern const char nmi[]; | 38 | extern asmlinkage void nmi(void); |
38 | #endif | 39 | #endif |
39 | extern void xen_sysenter_target(void); | 40 | extern void xen_sysenter_target(void); |
40 | extern void xen_syscall_target(void); | 41 | extern void xen_syscall_target(void); |
@@ -81,6 +82,9 @@ static void __init xen_add_extra_mem(u64 start, u64 size) | |||
81 | 82 | ||
82 | memblock_reserve(start, size); | 83 | memblock_reserve(start, size); |
83 | 84 | ||
85 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
86 | return; | ||
87 | |||
84 | xen_max_p2m_pfn = PFN_DOWN(start + size); | 88 | xen_max_p2m_pfn = PFN_DOWN(start + size); |
85 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { | 89 | for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { |
86 | unsigned long mfn = pfn_to_mfn(pfn); | 90 | unsigned long mfn = pfn_to_mfn(pfn); |
@@ -103,6 +107,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
103 | .domid = DOMID_SELF | 107 | .domid = DOMID_SELF |
104 | }; | 108 | }; |
105 | unsigned long len = 0; | 109 | unsigned long len = 0; |
110 | int xlated_phys = xen_feature(XENFEAT_auto_translated_physmap); | ||
106 | unsigned long pfn; | 111 | unsigned long pfn; |
107 | int ret; | 112 | int ret; |
108 | 113 | ||
@@ -116,7 +121,7 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
116 | continue; | 121 | continue; |
117 | frame = mfn; | 122 | frame = mfn; |
118 | } else { | 123 | } else { |
119 | if (mfn != INVALID_P2M_ENTRY) | 124 | if (!xlated_phys && mfn != INVALID_P2M_ENTRY) |
120 | continue; | 125 | continue; |
121 | frame = pfn; | 126 | frame = pfn; |
122 | } | 127 | } |
@@ -154,6 +159,13 @@ static unsigned long __init xen_do_chunk(unsigned long start, | |||
154 | static unsigned long __init xen_release_chunk(unsigned long start, | 159 | static unsigned long __init xen_release_chunk(unsigned long start, |
155 | unsigned long end) | 160 | unsigned long end) |
156 | { | 161 | { |
162 | /* | ||
163 | * Xen already ballooned out the E820 non RAM regions for us | ||
164 | * and set them up properly in EPT. | ||
165 | */ | ||
166 | if (xen_feature(XENFEAT_auto_translated_physmap)) | ||
167 | return end - start; | ||
168 | |||
157 | return xen_do_chunk(start, end, true); | 169 | return xen_do_chunk(start, end, true); |
158 | } | 170 | } |
159 | 171 | ||
@@ -222,7 +234,13 @@ static void __init xen_set_identity_and_release_chunk( | |||
222 | * (except for the ISA region which must be 1:1 mapped) to | 234 | * (except for the ISA region which must be 1:1 mapped) to |
223 | * release the refcounts (in Xen) on the original frames. | 235 | * release the refcounts (in Xen) on the original frames. |
224 | */ | 236 | */ |
225 | for (pfn = start_pfn; pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | 237 | |
238 | /* | ||
239 | * PVH E820 matches the hypervisor's P2M which means we need to | ||
240 | * account for the proper values of *release and *identity. | ||
241 | */ | ||
242 | for (pfn = start_pfn; !xen_feature(XENFEAT_auto_translated_physmap) && | ||
243 | pfn <= max_pfn_mapped && pfn < end_pfn; pfn++) { | ||
226 | pte_t pte = __pte_ma(0); | 244 | pte_t pte = __pte_ma(0); |
227 | 245 | ||
228 | if (pfn < PFN_UP(ISA_END_ADDRESS)) | 246 | if (pfn < PFN_UP(ISA_END_ADDRESS)) |
@@ -559,20 +577,17 @@ void xen_enable_syscall(void) | |||
559 | void xen_enable_nmi(void) | 577 | void xen_enable_nmi(void) |
560 | { | 578 | { |
561 | #ifdef CONFIG_X86_64 | 579 | #ifdef CONFIG_X86_64 |
562 | if (register_callback(CALLBACKTYPE_nmi, nmi)) | 580 | if (register_callback(CALLBACKTYPE_nmi, (char *)nmi)) |
563 | BUG(); | 581 | BUG(); |
564 | #endif | 582 | #endif |
565 | } | 583 | } |
566 | void __init xen_arch_setup(void) | 584 | void __init xen_pvmmu_arch_setup(void) |
567 | { | 585 | { |
568 | xen_panic_handler_init(); | ||
569 | |||
570 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); | 586 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments); |
571 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); | 587 | HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables); |
572 | 588 | ||
573 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | 589 | HYPERVISOR_vm_assist(VMASST_CMD_enable, |
574 | HYPERVISOR_vm_assist(VMASST_CMD_enable, | 590 | VMASST_TYPE_pae_extended_cr3); |
575 | VMASST_TYPE_pae_extended_cr3); | ||
576 | 591 | ||
577 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || | 592 | if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) || |
578 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) | 593 | register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback)) |
@@ -581,6 +596,15 @@ void __init xen_arch_setup(void) | |||
581 | xen_enable_sysenter(); | 596 | xen_enable_sysenter(); |
582 | xen_enable_syscall(); | 597 | xen_enable_syscall(); |
583 | xen_enable_nmi(); | 598 | xen_enable_nmi(); |
599 | } | ||
600 | |||
601 | /* This function is not called for HVM domains */ | ||
602 | void __init xen_arch_setup(void) | ||
603 | { | ||
604 | xen_panic_handler_init(); | ||
605 | if (!xen_feature(XENFEAT_auto_translated_physmap)) | ||
606 | xen_pvmmu_arch_setup(); | ||
607 | |||
584 | #ifdef CONFIG_ACPI | 608 | #ifdef CONFIG_ACPI |
585 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { | 609 | if (!(xen_start_info->flags & SIF_INITDOMAIN)) { |
586 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); | 610 | printk(KERN_INFO "ACPI in unprivileged domain disabled\n"); |
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index c36b325abd83..a18eadd8bb40 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c | |||
@@ -73,9 +73,11 @@ static void cpu_bringup(void) | |||
73 | touch_softlockup_watchdog(); | 73 | touch_softlockup_watchdog(); |
74 | preempt_disable(); | 74 | preempt_disable(); |
75 | 75 | ||
76 | xen_enable_sysenter(); | 76 | /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */ |
77 | xen_enable_syscall(); | 77 | if (!xen_feature(XENFEAT_supervisor_mode_kernel)) { |
78 | 78 | xen_enable_sysenter(); | |
79 | xen_enable_syscall(); | ||
80 | } | ||
79 | cpu = smp_processor_id(); | 81 | cpu = smp_processor_id(); |
80 | smp_store_cpu_info(cpu); | 82 | smp_store_cpu_info(cpu); |
81 | cpu_data(cpu).x86_max_cores = 1; | 83 | cpu_data(cpu).x86_max_cores = 1; |
@@ -97,8 +99,14 @@ static void cpu_bringup(void) | |||
97 | wmb(); /* make sure everything is out */ | 99 | wmb(); /* make sure everything is out */ |
98 | } | 100 | } |
99 | 101 | ||
100 | static void cpu_bringup_and_idle(void) | 102 | /* Note: cpu parameter is only relevant for PVH */ |
103 | static void cpu_bringup_and_idle(int cpu) | ||
101 | { | 104 | { |
105 | #ifdef CONFIG_X86_64 | ||
106 | if (xen_feature(XENFEAT_auto_translated_physmap) && | ||
107 | xen_feature(XENFEAT_supervisor_mode_kernel)) | ||
108 | xen_pvh_secondary_vcpu_init(cpu); | ||
109 | #endif | ||
102 | cpu_bringup(); | 110 | cpu_bringup(); |
103 | cpu_startup_entry(CPUHP_ONLINE); | 111 | cpu_startup_entry(CPUHP_ONLINE); |
104 | } | 112 | } |
@@ -274,9 +282,10 @@ static void __init xen_smp_prepare_boot_cpu(void) | |||
274 | native_smp_prepare_boot_cpu(); | 282 | native_smp_prepare_boot_cpu(); |
275 | 283 | ||
276 | if (xen_pv_domain()) { | 284 | if (xen_pv_domain()) { |
277 | /* We've switched to the "real" per-cpu gdt, so make sure the | 285 | if (!xen_feature(XENFEAT_writable_page_tables)) |
278 | old memory can be recycled */ | 286 | /* We've switched to the "real" per-cpu gdt, so make |
279 | make_lowmem_page_readwrite(xen_initial_gdt); | 287 | * sure the old memory can be recycled. */ |
288 | make_lowmem_page_readwrite(xen_initial_gdt); | ||
280 | 289 | ||
281 | #ifdef CONFIG_X86_32 | 290 | #ifdef CONFIG_X86_32 |
282 | /* | 291 | /* |
@@ -360,22 +369,21 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
360 | 369 | ||
361 | gdt = get_cpu_gdt_table(cpu); | 370 | gdt = get_cpu_gdt_table(cpu); |
362 | 371 | ||
363 | ctxt->flags = VGCF_IN_KERNEL; | ||
364 | ctxt->user_regs.ss = __KERNEL_DS; | ||
365 | #ifdef CONFIG_X86_32 | 372 | #ifdef CONFIG_X86_32 |
373 | /* Note: PVH is not yet supported on x86_32. */ | ||
366 | ctxt->user_regs.fs = __KERNEL_PERCPU; | 374 | ctxt->user_regs.fs = __KERNEL_PERCPU; |
367 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; | 375 | ctxt->user_regs.gs = __KERNEL_STACK_CANARY; |
368 | #else | ||
369 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
370 | #endif | 376 | #endif |
371 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | 377 | ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; |
372 | 378 | ||
373 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | 379 | memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); |
374 | 380 | ||
375 | { | 381 | if (!xen_feature(XENFEAT_auto_translated_physmap)) { |
382 | ctxt->flags = VGCF_IN_KERNEL; | ||
376 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 383 | ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ |
377 | ctxt->user_regs.ds = __USER_DS; | 384 | ctxt->user_regs.ds = __USER_DS; |
378 | ctxt->user_regs.es = __USER_DS; | 385 | ctxt->user_regs.es = __USER_DS; |
386 | ctxt->user_regs.ss = __KERNEL_DS; | ||
379 | 387 | ||
380 | xen_copy_trap_info(ctxt->trap_ctxt); | 388 | xen_copy_trap_info(ctxt->trap_ctxt); |
381 | 389 | ||
@@ -396,18 +404,27 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | |||
396 | #ifdef CONFIG_X86_32 | 404 | #ifdef CONFIG_X86_32 |
397 | ctxt->event_callback_cs = __KERNEL_CS; | 405 | ctxt->event_callback_cs = __KERNEL_CS; |
398 | ctxt->failsafe_callback_cs = __KERNEL_CS; | 406 | ctxt->failsafe_callback_cs = __KERNEL_CS; |
407 | #else | ||
408 | ctxt->gs_base_kernel = per_cpu_offset(cpu); | ||
399 | #endif | 409 | #endif |
400 | ctxt->event_callback_eip = | 410 | ctxt->event_callback_eip = |
401 | (unsigned long)xen_hypervisor_callback; | 411 | (unsigned long)xen_hypervisor_callback; |
402 | ctxt->failsafe_callback_eip = | 412 | ctxt->failsafe_callback_eip = |
403 | (unsigned long)xen_failsafe_callback; | 413 | (unsigned long)xen_failsafe_callback; |
414 | ctxt->user_regs.cs = __KERNEL_CS; | ||
415 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
416 | #ifdef CONFIG_X86_32 | ||
404 | } | 417 | } |
405 | ctxt->user_regs.cs = __KERNEL_CS; | 418 | #else |
419 | } else | ||
420 | /* N.B. The user_regs.eip (cpu_bringup_and_idle) is called with | ||
421 | * %rdi having the cpu number - which means are passing in | ||
422 | * as the first parameter the cpu. Subtle! | ||
423 | */ | ||
424 | ctxt->user_regs.rdi = cpu; | ||
425 | #endif | ||
406 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); | 426 | ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); |
407 | |||
408 | per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | ||
409 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | 427 | ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); |
410 | |||
411 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) | 428 | if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) |
412 | BUG(); | 429 | BUG(); |
413 | 430 | ||
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 0e36cde12f7e..4d3acc34a998 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -106,7 +106,7 @@ static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); | |||
106 | static cpumask_t waiting_cpus; | 106 | static cpumask_t waiting_cpus; |
107 | 107 | ||
108 | static bool xen_pvspin = true; | 108 | static bool xen_pvspin = true; |
109 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | 109 | __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
110 | { | 110 | { |
111 | int irq = __this_cpu_read(lock_kicker_irq); | 111 | int irq = __this_cpu_read(lock_kicker_irq); |
112 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); | 112 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); |
@@ -183,7 +183,7 @@ static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) | |||
183 | 183 | ||
184 | local_irq_save(flags); | 184 | local_irq_save(flags); |
185 | 185 | ||
186 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); | 186 | kstat_incr_irq_this_cpu(irq); |
187 | out: | 187 | out: |
188 | cpumask_clear_cpu(cpu, &waiting_cpus); | 188 | cpumask_clear_cpu(cpu, &waiting_cpus); |
189 | w->lock = NULL; | 189 | w->lock = NULL; |
diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 12a1ca707b94..7b78f88c1707 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c | |||
@@ -446,6 +446,7 @@ void xen_setup_timer(int cpu) | |||
446 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| | 446 | IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER| |
447 | IRQF_FORCE_RESUME, | 447 | IRQF_FORCE_RESUME, |
448 | name, NULL); | 448 | name, NULL); |
449 | (void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX); | ||
449 | 450 | ||
450 | memcpy(evt, xen_clockevent, sizeof(*evt)); | 451 | memcpy(evt, xen_clockevent, sizeof(*evt)); |
451 | 452 | ||
diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7faed5869e5b..485b69585540 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S | |||
@@ -11,8 +11,28 @@ | |||
11 | #include <asm/page_types.h> | 11 | #include <asm/page_types.h> |
12 | 12 | ||
13 | #include <xen/interface/elfnote.h> | 13 | #include <xen/interface/elfnote.h> |
14 | #include <xen/interface/features.h> | ||
14 | #include <asm/xen/interface.h> | 15 | #include <asm/xen/interface.h> |
15 | 16 | ||
17 | #ifdef CONFIG_XEN_PVH | ||
18 | #define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel" | ||
19 | /* Note the lack of 'hvm_callback_vector'. Older hypervisor will | ||
20 | * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in | ||
21 | * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore. | ||
22 | */ | ||
23 | #define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \ | ||
24 | (1 << XENFEAT_auto_translated_physmap) | \ | ||
25 | (1 << XENFEAT_supervisor_mode_kernel) | \ | ||
26 | (1 << XENFEAT_hvm_callback_vector)) | ||
27 | /* The XENFEAT_writable_page_tables is not stricly neccessary as we set that | ||
28 | * up regardless whether this CONFIG option is enabled or not, but it | ||
29 | * clarifies what the right flags need to be. | ||
30 | */ | ||
31 | #else | ||
32 | #define PVH_FEATURES_STR "" | ||
33 | #define PVH_FEATURES (0) | ||
34 | #endif | ||
35 | |||
16 | __INIT | 36 | __INIT |
17 | ENTRY(startup_xen) | 37 | ENTRY(startup_xen) |
18 | cld | 38 | cld |
@@ -95,7 +115,10 @@ NEXT_HYPERCALL(arch_6) | |||
95 | #endif | 115 | #endif |
96 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) | 116 | ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) |
97 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) | 117 | ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) |
98 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .asciz "!writable_page_tables|pae_pgdir_above_4gb") | 118 | ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR) |
119 | ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) | | ||
120 | (1 << XENFEAT_writable_page_tables) | | ||
121 | (1 << XENFEAT_dom0)) | ||
99 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") | 122 | ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") |
100 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") | 123 | ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") |
101 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, | 124 | ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, |
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 95f8c6142328..1cb6f4c37300 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h | |||
@@ -123,4 +123,5 @@ __visible void xen_adjust_exception_frame(void); | |||
123 | 123 | ||
124 | extern int xen_panic_handler_init(void); | 124 | extern int xen_panic_handler_init(void); |
125 | 125 | ||
126 | void xen_pvh_secondary_vcpu_init(int cpu); | ||
126 | #endif /* XEN_OPS_H */ | 127 | #endif /* XEN_OPS_H */ |